[haizea-commit] r444 - in trunk/src/haizea/resourcemanager: . enact/opennebula

haizea-commit at mailman.cs.uchicago.edu haizea-commit at mailman.cs.uchicago.edu
Wed Jul 23 13:19:09 CDT 2008


Author: borja
Date: 2008-07-23 13:19:09 -0500 (Wed, 23 Jul 2008)
New Revision: 444

Modified:
   trunk/src/haizea/resourcemanager/enact/opennebula/vm.py
   trunk/src/haizea/resourcemanager/resourcepool.py
   trunk/src/haizea/resourcemanager/rm.py
Log:
- Catch exception when OpenNebula enactment fails
- Return status and stdout in exception

Modified: trunk/src/haizea/resourcemanager/enact/opennebula/vm.py
===================================================================
--- trunk/src/haizea/resourcemanager/enact/opennebula/vm.py	2008-07-23 18:16:58 UTC (rev 443)
+++ trunk/src/haizea/resourcemanager/enact/opennebula/vm.py	2008-07-23 18:19:09 UTC (rev 444)
@@ -30,11 +30,11 @@
         self.conn.row_factory = sqlite.Row
 
         
-    def runCommand(self, cmd):
+    def run_command(self, cmd):
         self.logger.debug("Running command: %s" % cmd, constants.ONE)
         (status, output) = commands.getstatusoutput(cmd)
         self.logger.debug("Returned status=%i, output='%s'" % (status, output), constants.ONE)
-        return status
+        return status, output
 
     def start(self, action):
         for vnode in action.vnodes:
@@ -49,44 +49,44 @@
                          % (action.lease_haizea_id, vnode, hostID, image, cpu, memory), constants.ONE)
 
             cmd = "%s deploy %i %i" % (self.onevm, vmid, hostID)
-            status = self.runCommand(cmd)
+            status, output = self.run_command(cmd)
             if status == 0:
                 self.logger.debug("Command returned succesfully.", constants.ONE)
             else:
-                raise Exception, "Error when running onevm deploy"
+                raise Exception, "Error when running onevm deploy (status=%i, output='%s')" % (status, output)
             
     def stop(self, action):
         for vnode in action.vnodes:
             # Unpack action
             vmid = action.vnodes[vnode].enactment_info
             cmd = "%s shutdown %i" % (self.onevm, vmid)
-            status = self.runCommand(cmd)
+            status, output = self.run_command(cmd)
             if status == 0:
                 self.logger.debug("Command returned succesfully.", constants.ONE)
             else:
-                raise Exception, "Error when running onevm shutdown"
+                raise Exception, "Error when running onevm shutdown (status=%i, output='%s')" % (status, output)
 
     def suspend(self, action):
         for vnode in action.vnodes:
             # Unpack action
             vmid = action.vnodes[vnode].enactment_info
             cmd = "%s suspend %i" % (self.onevm, vmid)
-            status = self.runCommand(cmd)
+            status, output = self.run_command(cmd)
             if status == 0:
                 self.logger.debug("Command returned succesfully.", constants.ONE)
             else:
-                raise Exception, "Error when running onevm suspend"
+                raise Exception, "Error when running onevm suspend (status=%i, output='%s')" % (status, output)
         
     def resume(self, action):
         for vnode in action.vnodes:
             # Unpack action
             vmid = action.vnodes[vnode].enactment_info
             cmd = "%s resume %i" % (self.onevm, vmid)
-            status = self.runCommand(cmd)
+            status, output = self.run_command(cmd)
             if status == 0:
                 self.logger.debug("Command returned succesfully.", constants.ONE)
             else:
-                raise Exception, "Error when running onevm resume"
+                raise Exception, "Error when running onevm resume (status=%i, output='%s')" % (status, output)
 
     def verifySuspend(self, action):
         # TODO: Do a single query

Modified: trunk/src/haizea/resourcemanager/resourcepool.py
===================================================================
--- trunk/src/haizea/resourcemanager/resourcepool.py	2008-07-23 18:16:58 UTC (rev 443)
+++ trunk/src/haizea/resourcemanager/resourcepool.py	2008-07-23 18:19:09 UTC (rev 444)
@@ -114,12 +114,20 @@
             startAction.vnodes[vnode].diskimage = taintedImage.filename
             startAction.vnodes[vnode].resources = rr.resources_in_pnode[pnode]
 
-        self.vm.start(startAction)
+        try:
+            self.vm.start(startAction)
+        except Exception, msg:
+            self.rm.logger.error("Enactment of start VM failed: %s" % msg, constants.RM)
+            self.rm.cancel_lease(lease)
         
     def stopVMs(self, lease, rr):
         stopAction = actions.VMEnactmentStopAction()
         stopAction.fromRR(rr)
-        self.vm.stop(stopAction)
+        try:
+            self.vm.stop(stopAction)
+        except Exception, msg:
+            self.rm.logger.error("Enactment of end VM failed: %s" % msg, constants.RM)
+            self.rm.cancel_lease(lease)
         
     def transferFiles(self):
         pass
@@ -187,12 +195,16 @@
         pass
     
     def suspendVMs(self, lease, rr):
-        suspendAction = actions.VMEnactmentStopAction()
+        suspendAction = actions.VMEnactmentSuspendAction()
         suspendAction.fromRR(rr)
-        self.vm.suspend(suspendAction)
+        try:
+            self.vm.suspend(suspendAction)
+        except Exception, msg:
+            self.rm.logger.error("Enactment of suspend VM failed: %s" % msg, constants.RM)
+            self.rm.cancel_lease(lease)
     
     def verifySuspend(self, lease, rr):
-        verifySuspendAction = actions.VMEnactmentStopAction()
+        verifySuspendAction = actions.VMEnactmentConfirmSuspendAction()
         verifySuspendAction.fromRR(rr)
         self.vm.verifySuspend(verifySuspendAction)
     
@@ -203,12 +215,16 @@
     #    pass
     
     def resumeVMs(self, lease, rr):
-        resumeAction = actions.VMEnactmentStopAction()
+        resumeAction = actions.VMEnactmentResumeAction()
         resumeAction.fromRR(rr)
-        self.vm.resume(resumeAction)
+        try:
+            self.vm.resume(resumeAction)
+        except Exception, msg:
+            self.rm.logger.error("Enactment of resume VM failed: %s" % msg, constants.RM)
+            self.rm.cancel_lease(lease)
     
     def verifyResume(self, lease, rr):
-        verifyResumeAction = actions.VMEnactmentStopAction()
+        verifyResumeAction = actions.VMEnactmentConfirmResumeAction()
         verifyResumeAction.fromRR(rr)
         self.vm.verifyResume(verifyResumeAction)    
         

Modified: trunk/src/haizea/resourcemanager/rm.py
===================================================================
--- trunk/src/haizea/resourcemanager/rm.py	2008-07-23 18:16:58 UTC (rev 443)
+++ trunk/src/haizea/resourcemanager/rm.py	2008-07-23 18:19:09 UTC (rev 444)
@@ -254,6 +254,20 @@
         rr    -- Resource reservations where the premature end happened"""
         self.scheduler.notify_premature_end_vm(lease, rr)
 
+
+    def cancel_lease(self, lease):
+        """Cancels a lease.
+        
+        TODO: Right now, a lease only gets cancelled if an enactment action
+        fails. If this happens, Haizea will continue running, but won't clean
+        up the lease (so, in effect, it will think resources are still being
+        used, and any future enactment actions for that lease will also fail)
+        
+        Arguments:
+        lease -- Lease to cancel
+        """
+        pass
+
             
 class Clock(object):
     """Base class for the resource manager's clock.
@@ -532,7 +546,7 @@
         The clock keeps on tickin' until a SIGINT signal (Ctrl-C if running in the
         foreground) or a SIGTERM signal is received.
         """
-        self.rm.logger.status("Starting simulated clock", constants.CLOCK)
+        self.rm.logger.status("Starting clock", constants.CLOCK)
         self.rm.stats.start(self.get_start_time())
         
         signal.signal(signal.SIGINT, self.signalhandler_gracefulstop)
@@ -553,13 +567,20 @@
             # Next schedulable time
             self.nextschedulable = roundDateTime(self.lastwakeup + self.non_sched)
             
-            # Next wakeup time
-            self.nextperiodicwakeup = roundDateTime(self.lastwakeup + self.quantum)
-
             # Wake up the resource manager
             self.rm.process_reservations(self.lastwakeup)
+            # TODO: Compute nextschedulable here, before processing requests
             self.rm.process_requests(self.nextschedulable)
             
+            # Next wakeup time
+            time_now = now()
+            if self.lastwakeup + self.quantum <= time_now:
+                quantums = (time_now - self.lastwakeup) / self.quantum
+                quantums = int(ceil(quantums)) * self.quantum
+                self.nextperiodicwakeup = roundDateTime(self.lastwakeup + quantums)
+            else:
+                self.nextperiodicwakeup = roundDateTime(self.lastwakeup + self.quantum)
+            
             # Determine if there's anything to do before the next wakeup time
             nextchangepoint = self.rm.get_next_changepoint()
             if nextchangepoint != None and nextchangepoint <= self.nextperiodicwakeup:



More information about the Haizea-commit mailing list