#46 Resolve issue #45: TOCTOU race on port
Merged 6 years ago by stefw. Opened 6 years ago by cevich.
cevich/standard-test-roles fix_toctou  into  master

@@ -88,11 +88,9 @@ 

                  variables[subject] = vars

      return { "localhost": { "hosts": hosts, "vars": { } }, "subjects": { "hosts": hosts, "vars": { } }, "_meta": { "hostvars": variables } }

  

- def host(image):

-     null = open(os.devnull, 'w')

  

-     # No name specified, find a port to use

-     for port in range(2222, 5000):

+ def start_qemu(image, cloudinit, log, portrange=(2222, 5555)):

+     for port in xrange(*portrange):

          sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)

          sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)

          try:
@@ -106,6 +104,16 @@ 

      else:

          raise RuntimeError("unable to find free local port to map SSH to")

  

+     return subprocess.Popen(["/usr/bin/qemu-system-x86_64", "-m", "1024", image,

+         "-enable-kvm", "-snapshot", "-cdrom", cloudinit,

+         "-net", "nic,model=virtio", "-net", "user,hostfwd=tcp:127.0.0.3:{0}-:22".format(port),

+         "-device", "isa-serial,chardev=pts2", "-chardev", "file,id=pts2,path=" + log,

+         "-display", "none"], stdout=open(os.devnull, 'w')), port

+ 

+ 

+ def host(image):

+     null = open(os.devnull, 'w')

+ 

      try:

          tty = os.open("/dev/tty", os.O_WRONLY)

          os.dup2(tty, 2)
@@ -148,39 +156,47 @@ 

          if exc.errno != errno.EEXIST or not os.path.isdir(artifacts):

              raise

      log = os.path.join(artifacts, "{0}.log".format(os.path.basename(image)))

-     proc = subprocess.Popen(["/usr/bin/qemu-system-x86_64", "-m", "1024", image,

-         "-enable-kvm", "-snapshot", "-cdrom", cloudinit,

- 	"-net", "nic,model=virtio", "-net", "user,hostfwd=tcp:127.0.0.3:{0}-:22".format(port),

- 	"-device", "isa-serial,chardev=pts2", "-chardev", "file,id=pts2,path=" + log,

- 	"-display", "none"], stdout=null)

- 

-     # The variables

-     variables = {

-         "ansible_ssh_port": "{0}".format(port),

-         "ansible_ssh_host": "127.0.0.3",

-         "ansible_ssh_user": "root",

-         "ansible_ssh_pass": "foobar",

-         "ansible_ssh_private_key_file": identity,

-         "ansible_ssh_common_args": "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"

-     }

- 

-     # wait for ssh to come up

-     args = " ".join([ "{0}='{1}'".format(*item) for item in variables.items() ])

-     inventory = os.path.join(directory, "inventory")

-     with open(inventory, "w") as f:

-         f.write("{0} {1}\n".format(image, args))

- 

-     ping = [

-         "/usr/bin/ansible",

-         "--inventory",

-         inventory,

-         image,

-         "--module-name",

-         "ping"

-     ]

- 

-     for tries in range(0, 30):

+ 

+     proc = None  # for failure detection

+     cpe = None  # for exception scoping

+     for tries in xrange(0, 5):

          try:

+             proc, port = start_qemu(image, cloudinit, log)

+             break

+         except subprocess.CalledProcessError as cpe:

+             time.sleep(1)

+             continue

+     if proc is None:

+         raise RuntimeError("Could not launch VM for qcow2 image"

+                            " '{0}':{1}".format(image, cpe.output))

+ 

+     for tries in xrange(0, 30):

+         try:

+             # The variables

+             variables = {

+                 "ansible_ssh_port": "{0}".format(port),

+                 "ansible_ssh_host": "127.0.0.3",

+                 "ansible_ssh_user": "root",

+                 "ansible_ssh_pass": "foobar",

+                 "ansible_ssh_private_key_file": identity,

+                 "ansible_ssh_common_args": "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no"

+             }

+ 

+             # wait for ssh to come up

+             args = " ".join([ "{0}='{1}'".format(*item) for item in variables.items() ])

+             inventory = os.path.join(directory, "inventory")

+             with open(inventory, "w") as f:

+                 f.write("{0} {1}\n".format(image, args))

+ 

+             ping = [

+                 "/usr/bin/ansible",

+                 "--inventory",

+                 inventory,

+                 image,

+                 "--module-name",

+                 "ping"

+             ]

+ 

              (pid, ret) = os.waitpid(proc.pid, os.WNOHANG)

              if pid != 0:

                  raise RuntimeError("qemu failed to launch qcow2 image: {0}".format(image))

In the host() function, an available port is located,
bound, then released, well before starting the qemu
process. However, during the interval, another process
could easily grab the port, causing an exception to be raised.

Avoid this situation by wrapping both the port search and
qemu process launch, inside a retry loop. If retries are
exceeded, display the error from the qemu process to aid
in debugging.

Signed-off-by: Chris Evich cevich@redhat.com

This looks good to me. Tested with concurrent inventory usage fighting for ports (even though that's not really something that's supported by the spec).

Pull-Request has been merged by stefw

6 years ago
Metadata