#52 Retry to stop instance when host is busy
Merged 6 years ago by lbrabec. Opened 6 years ago by lbrabec.

@@ -56,3 +56,9 @@ 

  # Desired size, in GiB of instance disks. 0 leaves disk capacity

  # identical to source image

  #DISK_SIZE = 0

+ 

+ # Number of retries when stopping of instance fails (host is busy)

+ #STOP_RETRIES = 3

+ 

+ # Waiting time between stop retries, in seconds

+ #STOP_RETRY_WAIT = 1

file modified
+6
@@ -132,6 +132,12 @@ 

      # identical to source image

      DISK_SIZE = 0

  

+     # Number of retries when stopping of instance fails (host is busy)

+     STOP_RETRIES = 3

+ 

+     # Waiting time between stop retries, in seconds

+     STOP_RETRY_WAIT = 1

+ 

      def merge_object(self, obj):

          '''Overwrites default values with values from a python object which have

          names containing all upper case letters.

file modified
+21 -3
@@ -448,7 +448,8 @@ 

      def stop(self):

          """Stop the instance

  

-         :raises TestcloudInstanceError: if the instance does not exist

+         :raises TestcloudInstanceError: if the instance does not exist or

+                                         if unable to stop the instance (host is busy)

          """

  

          log.debug("stopping instance {}.".format(self.name))
@@ -462,8 +463,25 @@ 

              log.debug('Instance already shut off, not stopping: {}'.format(self.name))

              return

  

-         # stop (destroy) the vm

-         self._get_domain().destroy()

+         retries = config_data.STOP_RETRIES

+ 

+         while retries > 0:

+             try:

+                 # stop (destroy) the vm

+                 self._get_domain().destroy()

+                 return

+             except libvirt.libvirtError as e:

+                 if e.get_error_code() == libvirt.VIR_ERR_SYSTEM_ERROR:

+                     # host is busy, see https://bugzilla.redhat.com/1205647#c13

+                     log.warn("Host is busy, retrying to stop the instance {}".format(self.name))

+                 else:

+                     raise TestcloudInstanceError('Error while stopping instance {}: {}'

+                                                  .format(self.name, e))

+ 

+             retries -= 1

+             time.sleep(config_data.STOP_RETRY_WAIT)

+ 

+         raise TestcloudInstanceError("Unable to stop instance {}.".format(self.name))

  

      def remove(self, autostop=True):

          """Remove an already stopped instance

Fix for #51

If the error raised by libvirt is VIR_ERR_SYSTEM_ERROR (38, general system call failure), testcloud will retry to stop the instance.

Tested with patched libvirt.

It'd be nice if this were configurable instead of hard-coded

1 new commit added

  • configurable number of retries and wait time
6 years ago

Let's use log.warn and add instance ID.

This should probably get wrapped as TestcloudInstanceError('Error while stopping instance {name}: {error}')

rebased onto 91db0df

6 years ago

Pull-Request has been merged by lbrabec

6 years ago