[haizea-commit] r444 - in trunk/src/haizea/resourcemanager: . enact/opennebula
haizea-commit at mailman.cs.uchicago.edu
haizea-commit at mailman.cs.uchicago.edu
Wed Jul 23 13:19:09 CDT 2008
Author: borja
Date: 2008-07-23 13:19:09 -0500 (Wed, 23 Jul 2008)
New Revision: 444
Modified:
trunk/src/haizea/resourcemanager/enact/opennebula/vm.py
trunk/src/haizea/resourcemanager/resourcepool.py
trunk/src/haizea/resourcemanager/rm.py
Log:
- Catch exception when OpenNebula enactment fails
- Return status and stdout in exception
Modified: trunk/src/haizea/resourcemanager/enact/opennebula/vm.py
===================================================================
--- trunk/src/haizea/resourcemanager/enact/opennebula/vm.py 2008-07-23 18:16:58 UTC (rev 443)
+++ trunk/src/haizea/resourcemanager/enact/opennebula/vm.py 2008-07-23 18:19:09 UTC (rev 444)
@@ -30,11 +30,11 @@
self.conn.row_factory = sqlite.Row
- def runCommand(self, cmd):
+ def run_command(self, cmd):
self.logger.debug("Running command: %s" % cmd, constants.ONE)
(status, output) = commands.getstatusoutput(cmd)
self.logger.debug("Returned status=%i, output='%s'" % (status, output), constants.ONE)
- return status
+ return status, output
def start(self, action):
for vnode in action.vnodes:
@@ -49,44 +49,44 @@
% (action.lease_haizea_id, vnode, hostID, image, cpu, memory), constants.ONE)
cmd = "%s deploy %i %i" % (self.onevm, vmid, hostID)
- status = self.runCommand(cmd)
+ status, output = self.run_command(cmd)
if status == 0:
self.logger.debug("Command returned succesfully.", constants.ONE)
else:
- raise Exception, "Error when running onevm deploy"
+ raise Exception, "Error when running onevm deploy (status=%i, output='%s')" % (status, output)
def stop(self, action):
for vnode in action.vnodes:
# Unpack action
vmid = action.vnodes[vnode].enactment_info
cmd = "%s shutdown %i" % (self.onevm, vmid)
- status = self.runCommand(cmd)
+ status, output = self.run_command(cmd)
if status == 0:
self.logger.debug("Command returned succesfully.", constants.ONE)
else:
- raise Exception, "Error when running onevm shutdown"
+ raise Exception, "Error when running onevm shutdown (status=%i, output='%s')" % (status, output)
def suspend(self, action):
for vnode in action.vnodes:
# Unpack action
vmid = action.vnodes[vnode].enactment_info
cmd = "%s suspend %i" % (self.onevm, vmid)
- status = self.runCommand(cmd)
+ status, output = self.run_command(cmd)
if status == 0:
self.logger.debug("Command returned succesfully.", constants.ONE)
else:
- raise Exception, "Error when running onevm suspend"
+ raise Exception, "Error when running onevm suspend (status=%i, output='%s')" % (status, output)
def resume(self, action):
for vnode in action.vnodes:
# Unpack action
vmid = action.vnodes[vnode].enactment_info
cmd = "%s resume %i" % (self.onevm, vmid)
- status = self.runCommand(cmd)
+ status, output = self.run_command(cmd)
if status == 0:
self.logger.debug("Command returned succesfully.", constants.ONE)
else:
- raise Exception, "Error when running onevm resume"
+ raise Exception, "Error when running onevm resume (status=%i, output='%s')" % (status, output)
def verifySuspend(self, action):
# TODO: Do a single query
Modified: trunk/src/haizea/resourcemanager/resourcepool.py
===================================================================
--- trunk/src/haizea/resourcemanager/resourcepool.py 2008-07-23 18:16:58 UTC (rev 443)
+++ trunk/src/haizea/resourcemanager/resourcepool.py 2008-07-23 18:19:09 UTC (rev 444)
@@ -114,12 +114,20 @@
startAction.vnodes[vnode].diskimage = taintedImage.filename
startAction.vnodes[vnode].resources = rr.resources_in_pnode[pnode]
- self.vm.start(startAction)
+ try:
+ self.vm.start(startAction)
+ except Exception, msg:
+ self.rm.logger.error("Enactment of start VM failed: %s" % msg, constants.RM)
+ self.rm.cancel_lease(lease)
def stopVMs(self, lease, rr):
stopAction = actions.VMEnactmentStopAction()
stopAction.fromRR(rr)
- self.vm.stop(stopAction)
+ try:
+ self.vm.stop(stopAction)
+ except Exception, msg:
+ self.rm.logger.error("Enactment of end VM failed: %s" % msg, constants.RM)
+ self.rm.cancel_lease(lease)
def transferFiles(self):
pass
@@ -187,12 +195,16 @@
pass
def suspendVMs(self, lease, rr):
- suspendAction = actions.VMEnactmentStopAction()
+ suspendAction = actions.VMEnactmentSuspendAction()
suspendAction.fromRR(rr)
- self.vm.suspend(suspendAction)
+ try:
+ self.vm.suspend(suspendAction)
+ except Exception, msg:
+ self.rm.logger.error("Enactment of suspend VM failed: %s" % msg, constants.RM)
+ self.rm.cancel_lease(lease)
def verifySuspend(self, lease, rr):
- verifySuspendAction = actions.VMEnactmentStopAction()
+ verifySuspendAction = actions.VMEnactmentConfirmSuspendAction()
verifySuspendAction.fromRR(rr)
self.vm.verifySuspend(verifySuspendAction)
@@ -203,12 +215,16 @@
# pass
def resumeVMs(self, lease, rr):
- resumeAction = actions.VMEnactmentStopAction()
+ resumeAction = actions.VMEnactmentResumeAction()
resumeAction.fromRR(rr)
- self.vm.resume(resumeAction)
+ try:
+ self.vm.resume(resumeAction)
+ except Exception, msg:
+ self.rm.logger.error("Enactment of resume VM failed: %s" % msg, constants.RM)
+ self.rm.cancel_lease(lease)
def verifyResume(self, lease, rr):
- verifyResumeAction = actions.VMEnactmentStopAction()
+ verifyResumeAction = actions.VMEnactmentConfirmResumeAction()
verifyResumeAction.fromRR(rr)
self.vm.verifyResume(verifyResumeAction)
Modified: trunk/src/haizea/resourcemanager/rm.py
===================================================================
--- trunk/src/haizea/resourcemanager/rm.py 2008-07-23 18:16:58 UTC (rev 443)
+++ trunk/src/haizea/resourcemanager/rm.py 2008-07-23 18:19:09 UTC (rev 444)
@@ -254,6 +254,20 @@
rr -- Resource reservations where the premature end happened"""
self.scheduler.notify_premature_end_vm(lease, rr)
+
+ def cancel_lease(self, lease):
+ """Cancels a lease.
+
+ TODO: Right now, a lease only gets cancelled if an enactment action
+ fails. If this happens, Haizea will continue running, but won't clean
+ up the lease (so, in effect, it will think resources are still being
+ used, and any future enactment actions for that lease will also fail)
+
+ Arguments:
+ lease -- Lease to cancel
+ """
+ pass
+
class Clock(object):
"""Base class for the resource manager's clock.
@@ -532,7 +546,7 @@
The clock keeps on tickin' until a SIGINT signal (Ctrl-C if running in the
foreground) or a SIGTERM signal is received.
"""
- self.rm.logger.status("Starting simulated clock", constants.CLOCK)
+ self.rm.logger.status("Starting clock", constants.CLOCK)
self.rm.stats.start(self.get_start_time())
signal.signal(signal.SIGINT, self.signalhandler_gracefulstop)
@@ -553,13 +567,20 @@
# Next schedulable time
self.nextschedulable = roundDateTime(self.lastwakeup + self.non_sched)
- # Next wakeup time
- self.nextperiodicwakeup = roundDateTime(self.lastwakeup + self.quantum)
-
# Wake up the resource manager
self.rm.process_reservations(self.lastwakeup)
+ # TODO: Compute nextschedulable here, before processing requests
self.rm.process_requests(self.nextschedulable)
+ # Next wakeup time
+ time_now = now()
+ if self.lastwakeup + self.quantum <= time_now:
+ quantums = (time_now - self.lastwakeup) / self.quantum
+ quantums = int(ceil(quantums)) * self.quantum
+ self.nextperiodicwakeup = roundDateTime(self.lastwakeup + quantums)
+ else:
+ self.nextperiodicwakeup = roundDateTime(self.lastwakeup + self.quantum)
+
# Determine if there's anything to do before the next wakeup time
nextchangepoint = self.rm.get_next_changepoint()
if nextchangepoint != None and nextchangepoint <= self.nextperiodicwakeup:
More information about the Haizea-commit
mailing list