#26 inventory: Enable containers and VMs to be diagnosed by setting TEST_DEBUG environment variable
Merged 6 years ago by merlinm. Opened 6 years ago by merlinm.
Unknown source inventory-diagnose  into  master

@@ -13,6 +13,7 @@

  import tempfile

  import time

  import traceback

+ import distutils.util

  

  def main(argv):

      parser = argparse.ArgumentParser(description="Inventory for a container image in a registry")
@@ -58,6 +59,12 @@

      directory = tempfile.mkdtemp(prefix="inventory-docker")

      cidfile = os.path.join(directory, "cid")

  

+     # Determine if container should be kept available for diagnosis after completion

+     try:

+         diagnose = distutils.util.strtobool(os.getenv("TEST_DEBUG", "0"))

+     except ValueError:

+         diagnose = 0

+ 

      sys.stderr.write("Launching Docker container for {0}\n".format(image))

  

      # And launch the actual container
@@ -129,6 +136,17 @@

          except OSError:

              break # Either of the processes no longer exist

  

+     if diagnose:

+         sys.stderr.write("\n")

+         sys.stderr.write("DIAGNOSE: docker exec -it {0} /bin/bash\n".format(name))

+         sys.stderr.write("DIAGNOSE: kill {0} # when finished\n".format(os.getpid()))

+ 

+         def _signal_handler(*args):

+             sys.stderr.write("\nDIAGNOSE ending...\n")

+ 

+         signal.signal(signal.SIGTERM, _signal_handler)

+         signal.pause()

+ 

      # Dump the container logs

      try:

          os.makedirs(artifacts)

@@ -13,6 +13,7 @@

  import tempfile

  import time

  import traceback

+ import distutils.util

  

  IDENTITY = """

  -----BEGIN RSA PRIVATE KEY-----
@@ -131,6 +132,12 @@

                             "-volid", "cidata", "-joliet", "-rock", "-quiet",

                             "-output", cloudinit, userdata, metadata], stdout=null)

  

+     # Determine if virtual machine should be kept available for diagnosis after completion

+     try:

+         diagnose = distutils.util.strtobool(os.getenv("TEST_DEBUG", "0"))

+     except ValueError:

+         diagnose = 0

+ 

      sys.stderr.write("Launching virtual machine for {0}\n".format(image))

  

      # And launch the actual VM
@@ -214,6 +221,17 @@

          except OSError:

              break # Either of the processes no longer exist

  

+     if diagnose:

+         sys.stderr.write("\n")

+         sys.stderr.write("DIAGNOSE: ssh -p {0} -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@{1} # password: {2}\n".format(port, "127.0.0.3", "foobar"))

+         sys.stderr.write("DIAGNOSE: kill {0} # when finished\n".format(os.getpid()))

+ 

+         def _signal_handler(*args):

+             sys.stderr.write("\nDIAGNOSE ending...\n")

+ 

+         signal.signal(signal.SIGTERM, _signal_handler)

+         signal.pause()

+ 

      # Kill the qemu process

      try:

          os.kill(proc.pid, signal.SIGTERM)

The inventory scripts do a very good job of cleaning up the containers and VMs when finished. Too good, in fact--which made it impossible to diagnose things going wrong inside the containers/VMs. With this PR, when the playbook finishes, if environment variable TEST_DIAGNOSE_INVENTORY is set to a true value, the inventory scripts will send diagnostic information to stderr and wait for an explicit signal before cleaning up.

For example:

...
TASK [standard-test-beakerlib : Check the results] ************************************************************************************************
changed: [/home/merlinm/Fedora-Atomic-26-20170707.1.x86_64.qcow2]

PLAY RECAP ****************************************************************************************************************************************
/home/merlinm/Fedora-Atomic-26-20170707.1.x86_64.qcow2 : ok=25   changed=13   unreachable=0    failed=0   

$ 
DIAGNOSE: ssh -p 2222 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@127.0.0.3 # password: foobar
DIAGNOSE: kill 5438 # when finished
$ ssh -p 2222 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null root@127.0.0.3
Warning: Permanently added '[127.0.0.3]:2222' (ECDSA) to the list of known hosts.
root@127.0.0.3's password: 
[root@localhost ~]# 
...
[root@localhost ~]# exit
$ sudo kill 5438
DIAGNOSE ending...
qemu-system-x86_64: terminating on signal 15 from pid 5438
...
PLAY RECAP ****************************************************************************************************************************************
d683e25ad3b1d11a0a9752efa5013935c195ed9949380824d5cdf326afb89970 : ok=35   changed=24   unreachable=0    failed=1   

$ 
DIAGNOSE: docker exec -it d683e25ad3b1d11a0a9752efa5013935c195ed9949380824d5cdf326afb89970 /bin/bash
DIAGNOSE: kill 17558 # when finished
$ docker exec -it d683e25ad3b1d11a0a9752efa5013935c195ed9949380824d5cdf326afb89970 /bin/bash
[root@d683e25ad3b1 /]# 
...
[root@d683e25ad3b1 /]# exit
$ sudo kill 17558
DIAGNOSE ending...

I like this in principle, but I'd prefer a larger knob ... which will become the go to for developers/maintainers/testers writing and debugging tests.

What do you think about a $TEST_DEBUG or $TEST_DIAGNOSE variable that turns on this, and later other more verbose diagnostics?

1 new commit added

  • inventory: rename environment variable to trigger container/VM diagnosis from TEST_DIAGNOSE_INVENTORY to TEST_DEBUG
6 years ago

Pull-Request has been merged by merlinm

6 years ago