[haizea-commit] r494 - in trunk/src/haizea: common resourcemanager

haizea-commit at mailman.cs.uchicago.edu haizea-commit at mailman.cs.uchicago.edu
Mon Sep 15 04:00:23 CDT 2008


Author: borja
Date: 2008-09-15 04:00:23 -0500 (Mon, 15 Sep 2008)
New Revision: 494

Modified:
   trunk/src/haizea/common/constants.py
   trunk/src/haizea/resourcemanager/scheduler.py
Log:
Refactored new suspend/resume code

Modified: trunk/src/haizea/common/constants.py
===================================================================
--- trunk/src/haizea/common/constants.py	2008-09-12 17:23:15 UTC (rev 493)
+++ trunk/src/haizea/common/constants.py	2008-09-15 09:00:23 UTC (rev 494)
@@ -83,15 +83,13 @@
 EQUAL = 0
 WORSE = 1
 
+DIRECTION_FORWARD = 0
+DIRECTION_BACKWARD = 1
+        
 CACHESIZE_UNLIMITED = -1
 
-DOING_IDLE=0
-DOING_TRANSFER=1
-DOING_VM_SUSPEND=99
-DOING_VM_RUN=100
-DOING_VM_RESUME=101
-DOING_TRANSFER_NOVM=666
 
+
 ENACT_PACKAGE="haizea.resourcemanager.enact"
 
 COUNTER_ARACCEPTED="Accepted AR"

Modified: trunk/src/haizea/resourcemanager/scheduler.py
===================================================================
--- trunk/src/haizea/resourcemanager/scheduler.py	2008-09-12 17:23:15 UTC (rev 493)
+++ trunk/src/haizea/resourcemanager/scheduler.py	2008-09-15 09:00:23 UTC (rev 494)
@@ -783,42 +783,31 @@
                     break        
                 
         return start, end, canfit
+    
+    def __compute_susprem_times(self, vmrr, time, direction, exclusion, rate):
+        times = [] # (start, end, pnode, vnodes)
 
-    # TODO: There is a LOT of common code between __schedule_resumption and 
-    # __schedule_suspension. This has to be factored out.
-    # Also, we need to "consolidate" RRs when doing local exclusion.
-
-    def __schedule_resumption(self, vmrr, resume_at):
-        from haizea.resourcemanager.rm import ResourceManager
-        config = ResourceManager.get_singleton().config
-        resm_exclusion = config.get("suspendresume-exclusion")        
-        rate = self.resourcepool.info.get_suspendresume_rate()
-
-        if resume_at < vmrr.start or resume_at > vmrr.end:
-            raise SchedException, "Tried to schedule a resumption at %s, which is outside the VMRR's duration (%s-%s)" % (resume_at, vmrr.start, vmrr.end)
-
-        resume_rrs = []
-        if resm_exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
-            # Global exclusion (which represents, e.g., reading the memory image files
-            # from a global file system) meaning no two suspensions can happen at the same
-            # time in the entire resource pool.
-            start = resume_at
+        if exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
+            # Global exclusion (which represents, e.g., reading/writing the memory image files
+            # from a global file system) meaning no two suspensions/resumptions can happen at 
+            # the same time in the entire resource pool.
+            
+            t = time
+            t_prev = None
+                
             for (vnode,pnode) in vmrr.nodes.items():
                 mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
-                single_resm_time = self.__compute_suspend_resume_time(mem, rate)
-                end = start + single_resm_time
-
-                r = ds.ResourceTuple.create_empty()
-                r.set_by_type(constants.RES_MEM, mem)
-                r.set_by_type(constants.RES_DISK, mem)
-                resmres = {pnode: r}
-                vnodes = [vnode]
-                resmrr = ds.ResumptionResourceReservation(vmrr.lease, start, end, resmres, vnodes, vmrr)
-                resmrr.state = ResourceReservation.STATE_SCHEDULED
-                resume_rrs.append(resmrr)
+                op_time = self.__compute_suspend_resume_time(mem, rate)
+                t_prev = t
                 
-                start = end
-        elif resm_exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
+                if direction == constants.DIRECTION_FORWARD:
+                    t += op_time
+                    times.append((t_prev, t, pnode, [vnode]))
+                elif direction == constants.DIRECTION_BACKWARD:
+                    t -= op_time
+                    times.append((t, t_prev, pnode, [vnode]))
+
+        elif exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
             # Local exclusion (which represents, e.g., reading the memory image files
             # from a local file system) means no two resumptions can happen at the same
             # time in the same physical node.
@@ -826,24 +815,48 @@
             for (vnode,pnode) in vmrr.nodes.items():
                 vnodes_in_pnode.setdefault(pnode, []).append(vnode)
             for pnode in vnodes_in_pnode:
-                start = resume_at
+                t = time
+                t_prev = None
                 for vnode in vnodes_in_pnode[pnode]:
                     mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
-                    single_resm_time = self.__compute_suspend_resume_time(mem, rate)
-                    end = start + single_resm_time
-    
-                    r = ds.ResourceTuple.create_empty()
-                    r.set_by_type(constants.RES_MEM, mem)
-                    r.set_by_type(constants.RES_DISK, mem)
-                    resmres = {pnode: r}
-                    vnodes = [vnode]
-                    resmrr = ds.ResumptionResourceReservation(vmrr.lease, start, end, resmres, vnodes, vmrr)
-                    resmrr.state = ResourceReservation.STATE_SCHEDULED
-                    resume_rrs.append(resmrr)
+                    op_time = self.__compute_suspend_resume_time(mem, rate)
+                    t_prev = t
+                    
+                    if direction == constants.DIRECTION_FORWARD:
+                        t += op_time
+                        times.append((t_prev, t, pnode, [vnode]))
+                    elif direction == constants.DIRECTION_BACKWARD:
+                        t -= op_time
+                        times.append((t, t_prev, pnode, [vnode]))
+            # TODO: "consolidate" times (i.e., figure out what operations can be grouped
+            # into a single RR. This will not be an issue when running with real hardware,
+            # but might impact simulation performance.
+        
+        return times
+
+
+    def __schedule_resumption(self, vmrr, resume_at):
+        from haizea.resourcemanager.rm import ResourceManager
+        config = ResourceManager.get_singleton().config
+        resm_exclusion = config.get("suspendresume-exclusion")        
+        rate = self.resourcepool.info.get_suspendresume_rate()
+
+        if resume_at < vmrr.start or resume_at > vmrr.end:
+            raise SchedException, "Tried to schedule a resumption at %s, which is outside the VMRR's duration (%s-%s)" % (resume_at, vmrr.start, vmrr.end)
+
+        times = self.__compute_susprem_times(vmrr, resume_at, constants.DIRECTION_FORWARD, resm_exclusion, rate)
+        resume_rrs = []
+        for (start, end, pnode, vnodes) in times:
+            r = ds.ResourceTuple.create_empty()
+            mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
+            r.set_by_type(constants.RES_MEM, mem)
+            r.set_by_type(constants.RES_DISK, mem)
+            resmres = {pnode: r}
+            resmrr = ds.ResumptionResourceReservation(vmrr.lease, start, end, resmres, vnodes, vmrr)
+            resmrr.state = ResourceReservation.STATE_SCHEDULED
+            resume_rrs.append(resmrr)
                 
-                    start = end
-                
-            resume_rrs.sort(key=attrgetter("start"))
+        resume_rrs.sort(key=attrgetter("start"))
             
         resm_end = resume_rrs[-1].end
         if resm_end > vmrr.end:
@@ -863,55 +876,19 @@
         if suspend_by < vmrr.start or suspend_by > vmrr.end:
             raise SchedException, "Tried to schedule a suspension by %s, which is outside the VMRR's duration (%s-%s)" % (suspend_by, vmrr.start, vmrr.end)
 
+        times = self.__compute_susprem_times(vmrr, suspend_by, constants.DIRECTION_BACKWARD, susp_exclusion, rate)
         suspend_rrs = []
-        if susp_exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
-            # Global exclusion (which represents, e.g., saving the memory image files
-            # to a global file system) means no two suspensions can happen at the same
-            # time in the entire resource pool.
-            end = suspend_by
-            for (vnode,pnode) in vmrr.nodes.items():
-                mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
-                single_susp_time = self.__compute_suspend_resume_time(mem, rate)
-                start = end - single_susp_time
-
-                r = ds.ResourceTuple.create_empty()
-                r.set_by_type(constants.RES_MEM, mem)
-                r.set_by_type(constants.RES_DISK, mem)
-                suspres = {pnode: r}
-                vnodes = [vnode]
-                susprr = ds.SuspensionResourceReservation(vmrr.lease, start, end, suspres, vnodes, vmrr)
-                susprr.state = ResourceReservation.STATE_SCHEDULED
-                suspend_rrs.append(susprr)
+        for (start, end, pnode, vnodes) in times:
+            r = ds.ResourceTuple.create_empty()
+            mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
+            r.set_by_type(constants.RES_MEM, mem)
+            r.set_by_type(constants.RES_DISK, mem)
+            suspres = {pnode: r}
+            susprr = ds.SuspensionResourceReservation(vmrr.lease, start, end, suspres, vnodes, vmrr)
+            susprr.state = ResourceReservation.STATE_SCHEDULED
+            suspend_rrs.append(susprr)
                 
-                end = start
-                
-            suspend_rrs.reverse()
-        elif susp_exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
-            # Local exclusion (which represents, e.g., saving the memory image files
-            # to a local file system) means no two suspensions can happen at the same
-            # time in the same physical node.
-            vnodes_in_pnode = {}
-            for (vnode,pnode) in vmrr.nodes.items():
-                vnodes_in_pnode.setdefault(pnode, []).append(vnode)
-            for pnode in vnodes_in_pnode:
-                end = suspend_by
-                for vnode in vnodes_in_pnode[pnode]:
-                    mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
-                    single_susp_time = self.__compute_suspend_resume_time(mem, rate)
-                    start = end - single_susp_time
-    
-                    r = ds.ResourceTuple.create_empty()
-                    r.set_by_type(constants.RES_MEM, mem)
-                    r.set_by_type(constants.RES_DISK, mem)
-                    suspres = {pnode: r}
-                    vnodes = [vnode]
-                    susprr = ds.SuspensionResourceReservation(vmrr.lease, start, end, suspres, vnodes, vmrr)
-                    susprr.state = ResourceReservation.STATE_SCHEDULED
-                    suspend_rrs.append(susprr)
-                    
-                    end = start
-                
-            suspend_rrs.sort(key=attrgetter("start"))
+        suspend_rrs.sort(key=attrgetter("start"))
             
         susp_start = suspend_rrs[0].start
         if susp_start < vmrr.start:



More information about the Haizea-commit mailing list