[haizea-commit] r537 - in branches: . TP1.3-scheduler-refactoring/haizea/resourcemanager TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment TP1.3-scheduler-refactoring/haizea/resourcemanager/enact TP1.3-scheduler-refactoring/haizea/traces
haizea-commit at mailman.cs.uchicago.edu
haizea-commit at mailman.cs.uchicago.edu
Mon Oct 20 12:09:51 CDT 2008
Author: borja
Date: 2008-10-20 11:50:12 -0500 (Mon, 20 Oct 2008)
New Revision: 537
Added:
branches/TP1.3-scheduler-refactoring/
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py
branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py
Removed:
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py
branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py
branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py
Modified:
branches/TP1.3-scheduler-refactoring/haizea/traces/generators.py
Log:
Refactoring the Scheduler class into smaller components is going to require extensive changes. Creating a branch for this.
Copied: branches/TP1.3-scheduler-refactoring (from rev 501, trunk/src)
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py
===================================================================
--- trunk/src/haizea/resourcemanager/configfile.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,721 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-from haizea.common.config import ConfigException, Section, Option, Config, OPTTYPE_INT, OPTTYPE_FLOAT, OPTTYPE_STRING, OPTTYPE_BOOLEAN, OPTTYPE_DATETIME, OPTTYPE_TIMEDELTA
-from haizea.common.utils import generate_config_name
-import haizea.common.constants as constants
-import os.path
-import sys
-from mx.DateTime import TimeDelta
-import ConfigParser
-
-class HaizeaConfig(Config):
-
- sections = []
-
- # ============================= #
- # #
- # GENERAL OPTIONS #
- # #
- # ============================= #
-
- general = Section("general", required=True,
- doc = "This section is used for general options affecting Haizea as a whole.")
- general.options = \
- [
- Option(name = "loglevel",
- getter = "loglevel",
- type = OPTTYPE_STRING,
- required = False,
- default = "INFO",
- valid = ["STATUS","INFO","DEBUG","VDEBUG"],
- doc = """
- Controls the level (and amount) of
- log messages. Valid values are:
-
- - STATUS: Only print status messages
- - INFO: Slightly more verbose that STATUS
- - DEBUG: Prints information useful for debugging the scheduler.
- - VDEBUG: Prints very verbose information
- on the scheduler's internal data structures. Use only
- for short runs.
- """),
-
- Option(name = "logfile",
- getter = "logfile",
- type = OPTTYPE_STRING,
- required = False,
- default = "/var/tmp/haizea.log",
- doc = """
- When running Haizea as a daemon, this option specifies the file
- that log messages should be written to.
- """),
-
- Option(name = "mode",
- getter = "mode",
- type = OPTTYPE_STRING,
- required = True,
- valid = ["simulated","opennebula"],
- doc = """
- Sets the mode the scheduler will run in.
- Currently the only valid values are "simulated" and
- "opennebula". The "simulated" mode expects lease
- requests to be provided through a trace file, and
- all enactment is simulated. The "opennebula" mode
- interacts with the OpenNebula virtual infrastructure
- manager (http://www.opennebula.org/) to obtain lease
- requests and to do enactment on physical resources.
- """),
-
- Option(name = "lease-preparation",
- getter = "lease-preparation",
- type = OPTTYPE_STRING,
- required = False,
- default = constants.DEPLOYMENT_UNMANAGED,
- valid = [constants.DEPLOYMENT_UNMANAGED,
- constants.DEPLOYMENT_PREDEPLOY,
- constants.DEPLOYMENT_TRANSFER],
- doc = """
- Sets how the scheduler will handle the
- preparation overhead of leases. Valid values are:
-
- - unmanaged: The scheduler can assume that there
- is no deployment overhead, or that some
- other entity is taking care of it (e.g., one
- of the enactment backends)
- - predeployed-images: The scheduler can assume that
- all required disk images are predeployed on the
- physical nodes. This is different from "unmanaged"
- because the scheduler may still have to handle
- making local copies of the predeployed images before
- a lease can start.
- - imagetransfer: A disk image has to be transferred
- from a repository node before the lease can start.
- """),
-
- Option(name = "datafile",
- getter = "datafile",
- type = OPTTYPE_STRING,
- required = False,
- default = None,
- doc = """
- This is the file where statistics on
- the scheduler's run will be saved to (waiting time of leases,
- utilization data, etc.). If omitted, no data will be saved.
- """),
-
- Option(name = "attributes",
- getter = "attributes",
- type = OPTTYPE_STRING,
- required = False,
- doc = """
- This option is used internally by Haizea when using
- multiconfiguration files. See the multiconfiguration
- documentation for more details.
- """)
- ]
-
- sections.append(general)
-
- # ============================= #
- # #
- # SCHEDULING OPTIONS #
- # #
- # ============================= #
-
- scheduling = Section("scheduling", required=True,
- doc = "The options in this section control how Haizea schedules leases.")
- scheduling.options = \
- [
- Option(name = "wakeup-interval",
- getter = "wakeup-interval",
- type = OPTTYPE_TIMEDELTA,
- required = False,
- default = TimeDelta(seconds=60),
- doc = """
- Interval at which Haizea will wake up
- to manage resources and process pending requests.
- This option is not used when using a simulated clock,
- since the clock will skip directly to the time where an
- event is happening.
- """),
-
- Option(name = "backfilling",
- getter = "backfilling",
- type = OPTTYPE_STRING,
- required = False,
- default = None,
- valid = [constants.BACKFILLING_OFF,
- constants.BACKFILLING_AGGRESSIVE,
- constants.BACKFILLING_CONSERVATIVE,
- constants.BACKFILLING_INTERMEDIATE],
- doc = """
- Backfilling algorithm to use. Valid values are:
-
- - off: don't do backfilling
- - aggressive: at most 1 reservation in the future
- - conservative: unlimited reservations in the future
- - intermediate: N reservations in the future (N is specified
- in the backfilling-reservations option)
- """),
-
- Option(name = "backfilling-reservations",
- getter = "backfilling-reservations",
- type = OPTTYPE_INT,
- required = False,
- required_if = [(("scheduling","backfilling"),constants.BACKFILLING_INTERMEDIATE)],
- doc = """
- Number of future reservations to allow when
- using the "intermediate" backfilling option.
- """),
-
- Option(name = "suspension",
- getter = "suspension",
- type = OPTTYPE_STRING,
- required = True,
- valid = [constants.SUSPENSION_NONE,
- constants.SUSPENSION_SERIAL,
- constants.SUSPENSION_ALL],
- doc = """
- Specifies what can be suspended. Valid values are:
-
- - none: suspension is never allowed
- - serial-only: only 1-node leases can be suspended
- - all: any lease can be suspended
- """),
-
- Option(name = "suspendresume-exclusion",
- getter = "suspendresume-exclusion",
- type = OPTTYPE_STRING,
- required = False,
- default = constants.SUSPRES_EXCLUSION_LOCAL,
- valid = [constants.SUSPRES_EXCLUSION_LOCAL,
- constants.SUSPRES_EXCLUSION_GLOBAL],
- doc = """
- Documentation
- """),
-
- Option(name = "scheduling-threshold-factor",
- getter = "scheduling-threshold-factor",
- type = OPTTYPE_INT,
- required = False,
- default = 1,
- doc = """
- Documentation
- """),
-
- Option(name = "force-scheduling-threshold",
- getter = "force-scheduling-threshold",
- type = OPTTYPE_TIMEDELTA,
- required = False,
- doc = """
- Documentation
- """),
-
- Option(name = "migration",
- getter = "migration",
- type = OPTTYPE_BOOLEAN,
- required = True,
- doc = """
- Specifies whether leases can be migrated from one
- physical node to another. Valid values are "True" or "False"
- """),
-
- Option(name = "what-to-migrate",
- getter = "what-to-migrate",
- type = OPTTYPE_STRING,
- required = False,
- required_if = [(("scheduling","migration"),True)],
- default = constants.MIGRATE_NONE,
- valid = [constants.MIGRATE_NONE,
- constants.MIGRATE_MEM,
- constants.MIGRATE_MEMDISK],
- doc = """
- Specifies what data has to be moved around when
- migrating a lease. Valid values are:
-
- - nothing: migration can be performed without transferring any
- files.
- - mem: only the memory must be transferred
- - mem+disk: both the memory and the VM disk image must be
- transferred
- """),
-
- Option(name = "non-schedulable-interval",
- getter = "non-schedulable-interval",
- type = OPTTYPE_TIMEDELTA,
- required = False,
- default = TimeDelta(seconds=10),
- doc = """
- The minimum amount of time that must pass between
- when a request is scheduled to when it can actually start.
- The default should be good for most configurations, but
- may need to be increased if you're dealing with exceptionally
- high loads.
- """)
-
- ]
- sections.append(scheduling)
-
- # ============================= #
- # #
- # SIMULATION OPTIONS #
- # #
- # ============================= #
-
- simulation = Section("simulation", required=False,
- required_if = [(("general","mode"),"simulated")],
- doc = "This section is used to specify options when Haizea runs in simulation" )
- simulation.options = \
- [
- Option(name = "clock",
- getter = "clock",
- type = OPTTYPE_STRING,
- required = False,
- default = constants.CLOCK_REAL,
- valid = [constants.CLOCK_REAL,
- constants.CLOCK_SIMULATED],
- doc = """
- Type of clock to use in simulation:
-
- - "simulated": A simulated clock that fastforwards through
- time. Can only use the tracefile request
- frontend
- - "real": A real clock is used, but simulated resources and
- enactment actions are used. Can only use the RPC
- request frontend.
- """),
-
- Option(name = "starttime",
- getter = "starttime",
- type = OPTTYPE_DATETIME,
- required = False,
- required_if = [(("simulation","clock"),constants.CLOCK_SIMULATED)],
- doc = """
- Time at which simulated clock will start.
- """),
-
- Option(name = "nodes",
- getter = "simul.nodes",
- type = OPTTYPE_INT,
- required = True,
- doc = """
- Number of nodes in the simulated cluster
- """) ,
-
- Option(name = "resources",
- getter = "simul.resources",
- type = OPTTYPE_STRING,
- required = True,
- doc = """
- Resources in each node. Five types of resources
- are recognized right now:
-
- - CPU: Number of processors per node
- - Mem: Memory (in MB)
- - Net (in): Inbound network bandwidth (in Mbps)
- - Net (out): Outbound network bandwidth (in Mbps)
- - Disk: Disk space in MB (not counting space for disk cache)
- """),
-
- Option(name = "imagetransfer-bandwidth",
- getter = "imagetransfer-bandwidth",
- type = OPTTYPE_INT,
- required = True,
- doc = """
- Bandwidth (in Mbps) available for image transfers.
- This would correspond to the outbound network bandwidth of the
- node where the images are stored.
- """),
-
- Option(name = "suspendresume-rate",
- getter = "simul.suspendresume-rate",
- type = OPTTYPE_FLOAT,
- required = True,
- doc = """
- Rate at which VMs are assumed to suspend (in MB of
- memory per second)
- """),
-
- Option(name = "stop-when",
- getter = "stop-when",
- type = OPTTYPE_STRING,
- required = False,
- default = constants.STOPWHEN_ALLDONE,
- valid = [constants.STOPWHEN_ALLDONE,
- constants.STOPWHEN_BESUBMITTED,
- constants.STOPWHEN_BEDONE],
- doc = """
- When using the simulated clock, this specifies when the
- simulation must end. Valid options are:
-
- - all-leases-done: All requested leases have been completed
- and there are no queued/pending requests.
- - besteffort-submitted: When all best-effort leases have been
- submitted.
- - besteffort-done: When all best-effort leases have been
- completed.
- """),
-
- Option(name = "status-message-interval",
- getter = "status-message-interval",
- type = OPTTYPE_INT,
- required = False,
- default = None,
- doc = """
- If specified, the simulated clock will print a status
- message with some basic statistics. This is useful to keep track
- of long simulations. The interval is specified in minutes.
- """)
-
- ]
- sections.append(simulation)
-
-
- # ============================= #
- # #
- # DEPLOYMENT OPTIONS #
- # (w/ image transfers) #
- # #
- # ============================= #
-
- imgtransfer = Section("deploy-imagetransfer", required=False,
- required_if = [(("general","lease-deployment"),"imagetransfer")],
- doc = """
- When lease deployment with disk image transfers is selected,
- this section is used to control image deployment parameters.""")
- imgtransfer.options = \
- [
- Option(name = "transfer-mechanism",
- getter = "transfer-mechanism",
- type = OPTTYPE_STRING,
- required = True,
- valid = [constants.TRANSFER_UNICAST,
- constants.TRANSFER_MULTICAST],
- doc = """
- Specifies how disk images are transferred. Valid values are:
-
- - unicast: A disk image can be transferred to just one node at a time
- (NOTE: Not currently supported)
- - multicast: A disk image can be multicast to multiple nodes at
- the same time.
- """),
-
- Option(name = "avoid-redundant-transfers",
- getter = "avoid-redundant-transfers",
- type = OPTTYPE_BOOLEAN,
- required = False,
- default = True,
- doc = """
- Specifies whether the scheduler should take steps to
- detect and avoid redundant transfers (e.g., if two leases are
- scheduled on the same node, and they both require the same disk
- image, don't transfer the image twice; allow one to "piggyback"
- on the other). There is generally no reason to set this option
- to False.
- """),
-
- Option(name = "force-imagetransfer-time",
- getter = "force-imagetransfer-time",
- type = OPTTYPE_TIMEDELTA,
- required = False,
- doc = """
- Documentation
- """),
-
- Option(name = "diskimage-reuse",
- getter = "diskimage-reuse",
- type = OPTTYPE_STRING,
- required = False,
- required_if = None,
- default = constants.REUSE_NONE,
- valid = [constants.REUSE_NONE,
- constants.REUSE_IMAGECACHES],
- doc = """
- Specifies whether disk image caches should be created
- on the nodes, so the scheduler can reduce the number of transfers
- by reusing images. Valid values are:
-
- - none: No image reuse
- - image-caches: Use image caching algorithm described in Haizea
- publications
- """),
-
- Option(name = "diskimage-cache-size",
- getter = "diskimage-cache-size",
- type = OPTTYPE_INT,
- required = False,
- required_if = [(("deploy-imagetransfer","diskimage-reuse"),True)],
- doc = """
- Specifies the size (in MB) of the disk image cache on
- each physical node.
- """)
- ]
- sections.append(imgtransfer)
-
- # ============================= #
- # #
- # TRACEFILE OPTIONS #
- # #
- # ============================= #
-
- tracefile = Section("tracefile", required=False,
- doc="""
- When reading in requests from a tracefile, this section is used
- to specify the tracefile and other parameters.""")
- tracefile.options = \
- [
- Option(name = "tracefile",
- getter = "tracefile",
- type = OPTTYPE_STRING,
- required = True,
- doc = """
- Path to tracefile to use.
- """),
-
- Option(name = "imagefile",
- getter = "imagefile",
- type = OPTTYPE_STRING,
- required = False,
- doc = """
- Path to list of images to append to lease requests.
- If omitted, the images in the tracefile are used.
- """),
-
- Option(name = "injectionfile",
- getter = "injectionfile",
- type = OPTTYPE_STRING,
- required = False,
- doc = """
- Path to file with leases to "inject" into the tracefile.
- """),
-
- Option(name = "add-overhead",
- getter = "add-overhead",
- type = OPTTYPE_STRING,
- required = False,
- default = constants.RUNTIMEOVERHEAD_NONE,
- valid = [constants.RUNTIMEOVERHEAD_NONE,
- constants.RUNTIMEOVERHEAD_ALL,
- constants.RUNTIMEOVERHEAD_BE],
- doc = """
- Documentation
- """),
-
- Option(name = "bootshutdown-overhead",
- getter = "bootshutdown-overhead",
- type = OPTTYPE_TIMEDELTA,
- required = False,
- default = TimeDelta(seconds=0),
- doc = """
- Specifies how many seconds will be alloted to
- boot and shutdown of the lease.
- """),
-
- Option(name = "runtime-slowdown-overhead",
- getter = "runtime-slowdown-overhead",
- type = OPTTYPE_FLOAT,
- required = False,
- default = 0,
- doc = """
- Adds a runtime overhead (in %) to the lease duration.
- """)
-
- ]
- sections.append(tracefile)
-
- # ============================= #
- # #
- # OPENNEBULA OPTIONS #
- # #
- # ============================= #
-
- opennebula = Section("opennebula", required=False,
- required_if = [(("general","mode"),"opennebula")],
- doc = """
- This section is used to specify OpenNebula parameters,
- necessary when using Haizea as an OpenNebula scheduling backend.""")
- opennebula.options = \
- [
- Option(name = "db",
- getter = "one.db",
- type = OPTTYPE_STRING,
- required = True,
- doc = """
- Location of OpenNebula database.
- """),
-
- Option(name = "onevm",
- getter = "onevm",
- type = OPTTYPE_STRING,
- required = True,
- doc = """
- Location of OpenNebula "onevm" command.
- """),
-
- Option(name = "suspendresume-rate-estimate",
- getter = "one.suspendresume-rate-estimate",
- type = OPTTYPE_FLOAT,
- required = False,
- default = 32,
- doc = """
- Rate at which VMs are estimated to suspend (in MB of
- memory per second)
- """),
-
- Option(name = "stop-when-no-more-leases",
- getter = "stop-when-no-more-leases",
- type = OPTTYPE_BOOLEAN,
- required = False,
- default = False,
- doc = """
- This option is useful for testing and running experiments.
- If set to True, Haizea will stop when there are no more leases
- to process (which allows you to tun Haizea+OpenNebula unattended,
- and count on it stopping when there are no more leases to process).
- For now, this only makes sense if you're seeding Haizea with requests from
- the start (otherwise, it will start and immediately stop).
- """),
-
- Option(name = "dry-run",
- getter = "dry-run",
- type = OPTTYPE_BOOLEAN,
- required = False,
- default = False,
- doc = """
- This option is useful for testing.
- If set to True, Haizea will fast-forward through time (note that this is
- different that using the simulated clock, which has to be used with a tracefile;
- with an Haizea/OpenNebula dry run, you will have to see OpenNebula with requests
- before starting Haizea). You will generally want to set stop-when-no-more-leases
- when doing a dry-run.
-
- IMPORTANT: Haizea will still send out enactment commands to OpenNebula. Make
- sure you replace onevm with a dummy command that does nothing (or that reacts
- in some way you want to test; e.g., by emulating a deployment failure, etc.)
- """),
-
- ]
- sections.append(opennebula)
-
- def __init__(self, config):
- Config.__init__(self, config, self.sections)
-
- self.attrs = {}
- if self._options["attributes"] != None:
- self.attrs = {}
- attrs = self._options["attributes"].split(";")
- for attr in attrs:
- (k,v) = attr.split("=")
- self.attrs[k] = v
-
- def get_attr(self, attr):
- return self.attrs[attr]
-
- def get_attrs(self):
- return self.attrs.keys()
-
-
-class HaizeaMultiConfig(Config):
-
- MULTI_SEC = "multi"
- COMMON_SEC = "common"
- TRACEDIR_OPT = "tracedir"
- TRACEFILES_OPT = "tracefiles"
- INJDIR_OPT = "injectiondir"
- INJFILES_OPT = "injectionfiles"
- DATADIR_OPT = "datadir"
-
- def __init__(self, config):
- # TODO: Define "multi" section as a Section object
- Config.__init__(self, config, [])
-
- def get_profiles(self):
- sections = set([s.split(":")[0] for s in self.config.sections()])
- # Remove multi and common sections
- sections.difference_update([self.COMMON_SEC, self.MULTI_SEC])
- return list(sections)
-
- def get_trace_files(self):
- dir = self.config.get(self.MULTI_SEC, self.TRACEDIR_OPT)
- traces = self.config.get(self.MULTI_SEC, self.TRACEFILES_OPT).split()
- return [dir + "/" + t for t in traces]
-
- def get_inject_files(self):
- dir = self.config.get(self.MULTI_SEC, self.INJDIR_OPT)
- inj = self.config.get(self.MULTI_SEC, self.INJFILES_OPT).split()
- inj = [dir + "/" + i for i in inj]
- inj.append(None)
- return inj
-
- def get_configs(self):
- profiles = self.get_profiles()
- tracefiles = self.get_trace_files()
- injectfiles = self.get_inject_files()
-
- configs = []
- for profile in profiles:
- for tracefile in tracefiles:
- for injectfile in injectfiles:
- profileconfig = ConfigParser.ConfigParser()
- commonsections = [s for s in self.config.sections() if s.startswith("common:")]
- profilesections = [s for s in self.config.sections() if s.startswith(profile +":")]
- sections = commonsections + profilesections
- for s in sections:
- s_noprefix = s.split(":")[1]
- items = self.config.items(s)
- if not profileconfig.has_section(s_noprefix):
- profileconfig.add_section(s_noprefix)
- for item in items:
- profileconfig.set(s_noprefix, item[0], item[1])
-
- # The tracefile section may have not been created
- if not profileconfig.has_section("tracefile"):
- profileconfig.add_section("tracefile")
-
- # Add tracefile option
- profileconfig.set("tracefile", "tracefile", tracefile)
-
- # Add injected file option
- if injectfile == None:
- inj = "None"
- else:
- inj = injectfile
- profileconfig.set("tracefile", "injectionfile", inj)
-
- # Add datafile option
- datadir = self.config.get(self.MULTI_SEC, self.DATADIR_OPT)
- datafilename = generate_config_name(profile, tracefile, injectfile)
- datafile = datadir + "/" + datafilename + ".dat"
- profileconfig.set("general", "datafile", datafile)
-
- # Set "attributes" option (only used internally)
- attrs = {"profile":profile}
- # TODO: Load additional attributes from trace/injfiles
- attrs_str = ",".join(["%s=%s" % (k,v) for (k,v) in attrs.items()])
- profileconfig.set("general", "attributes", attrs_str)
-
- try:
- c = HaizeaConfig(profileconfig)
- except ConfigException, msg:
- print >> sys.stderr, "Error in configuration file:"
- print >> sys.stderr, msg
- exit(1)
- configs.append(c)
-
- return configs
\ No newline at end of file
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py (from rev 520, trunk/src/haizea/resourcemanager/configfile.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/configfile.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,742 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+from haizea.common.config import ConfigException, Section, Option, Config, OPTTYPE_INT, OPTTYPE_FLOAT, OPTTYPE_STRING, OPTTYPE_BOOLEAN, OPTTYPE_DATETIME, OPTTYPE_TIMEDELTA
+from haizea.common.utils import generate_config_name
+import haizea.common.constants as constants
+import os.path
+import sys
+from mx.DateTime import TimeDelta
+import ConfigParser
+
+class HaizeaConfig(Config):
+
+ sections = []
+
+ # ============================= #
+ # #
+ # GENERAL OPTIONS #
+ # #
+ # ============================= #
+
+ general = Section("general", required=True,
+ doc = "This section is used for general options affecting Haizea as a whole.")
+ general.options = \
+ [
+ Option(name = "loglevel",
+ getter = "loglevel",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = "INFO",
+ valid = ["STATUS","INFO","DEBUG","VDEBUG"],
+ doc = """
+ Controls the level (and amount) of
+ log messages. Valid values are:
+
+ - STATUS: Only print status messages
+ - INFO: Slightly more verbose that STATUS
+ - DEBUG: Prints information useful for debugging the scheduler.
+ - VDEBUG: Prints very verbose information
+ on the scheduler's internal data structures. Use only
+ for short runs.
+ """),
+
+ Option(name = "logfile",
+ getter = "logfile",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = "/var/tmp/haizea.log",
+ doc = """
+ When running Haizea as a daemon, this option specifies the file
+ that log messages should be written to.
+ """),
+
+ Option(name = "mode",
+ getter = "mode",
+ type = OPTTYPE_STRING,
+ required = True,
+ valid = ["simulated","opennebula"],
+ doc = """
+ Sets the mode the scheduler will run in.
+ Currently the only valid values are "simulated" and
+ "opennebula". The "simulated" mode expects lease
+ requests to be provided through a trace file, and
+ all enactment is simulated. The "opennebula" mode
+ interacts with the OpenNebula virtual infrastructure
+ manager (http://www.opennebula.org/) to obtain lease
+ requests and to do enactment on physical resources.
+ """),
+
+ Option(name = "lease-preparation",
+ getter = "lease-preparation",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = constants.DEPLOYMENT_UNMANAGED,
+ valid = [constants.DEPLOYMENT_UNMANAGED,
+ constants.DEPLOYMENT_TRANSFER],
+ doc = """
+ Sets how the scheduler will handle the
+ preparation overhead of leases. Valid values are:
+
+ - unmanaged: The scheduler can assume that there
+ is no deployment overhead, or that some
+ other entity is taking care of it (e.g., one
+ of the enactment backends)
+ - imagetransfer: A disk image has to be transferred
+ from a repository node before the lease can start.
+ """),
+
+ Option(name = "datafile",
+ getter = "datafile",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = None,
+ doc = """
+ This is the file where statistics on
+ the scheduler's run will be saved to (waiting time of leases,
+ utilization data, etc.). If omitted, no data will be saved.
+ """),
+
+ Option(name = "attributes",
+ getter = "attributes",
+ type = OPTTYPE_STRING,
+ required = False,
+ doc = """
+ This option is used internally by Haizea when using
+ multiconfiguration files. See the multiconfiguration
+ documentation for more details.
+ """)
+ ]
+
+ sections.append(general)
+
+ # ============================= #
+ # #
+ # SCHEDULING OPTIONS #
+ # #
+ # ============================= #
+
+ scheduling = Section("scheduling", required=True,
+ doc = "The options in this section control how Haizea schedules leases.")
+ scheduling.options = \
+ [
+ Option(name = "wakeup-interval",
+ getter = "wakeup-interval",
+ type = OPTTYPE_TIMEDELTA,
+ required = False,
+ default = TimeDelta(seconds=60),
+ doc = """
+ Interval at which Haizea will wake up
+ to manage resources and process pending requests.
+ This option is not used when using a simulated clock,
+ since the clock will skip directly to the time where an
+ event is happening.
+ """),
+
+ Option(name = "backfilling",
+ getter = "backfilling",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = None,
+ valid = [constants.BACKFILLING_OFF,
+ constants.BACKFILLING_AGGRESSIVE,
+ constants.BACKFILLING_CONSERVATIVE,
+ constants.BACKFILLING_INTERMEDIATE],
+ doc = """
+ Backfilling algorithm to use. Valid values are:
+
+ - off: don't do backfilling
+ - aggressive: at most 1 reservation in the future
+ - conservative: unlimited reservations in the future
+ - intermediate: N reservations in the future (N is specified
+ in the backfilling-reservations option)
+ """),
+
+ Option(name = "backfilling-reservations",
+ getter = "backfilling-reservations",
+ type = OPTTYPE_INT,
+ required = False,
+ required_if = [(("scheduling","backfilling"),constants.BACKFILLING_INTERMEDIATE)],
+ doc = """
+ Number of future reservations to allow when
+ using the "intermediate" backfilling option.
+ """),
+
+ Option(name = "suspension",
+ getter = "suspension",
+ type = OPTTYPE_STRING,
+ required = True,
+ valid = [constants.SUSPENSION_NONE,
+ constants.SUSPENSION_SERIAL,
+ constants.SUSPENSION_ALL],
+ doc = """
+ Specifies what can be suspended. Valid values are:
+
+ - none: suspension is never allowed
+ - serial-only: only 1-node leases can be suspended
+ - all: any lease can be suspended
+ """),
+
+ Option(name = "suspendresume-exclusion",
+ getter = "suspendresume-exclusion",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = constants.SUSPRES_EXCLUSION_LOCAL,
+ valid = [constants.SUSPRES_EXCLUSION_LOCAL,
+ constants.SUSPRES_EXCLUSION_GLOBAL],
+ doc = """
+ When suspending or resuming a VM, the VM's memory is dumped to a
+ file on disk. To correctly estimate the time required to suspend
+ a lease with multiple VMs, Haizea makes sure that no two
+ suspensions/resumptions happen at the same time (e.g., if eight
+ memory files were being saved at the same time to disk, the disk's
+ performance would be reduced in a way that is not as easy to estimate
+ as if only one file were being saved at a time).
+
+ Depending on whether the files are being saved to/read from a global
+ or local filesystem, this exclusion can be either global or local.
+ """),
+
+ Option(name = "scheduling-threshold-factor",
+ getter = "scheduling-threshold-factor",
+ type = OPTTYPE_INT,
+ required = False,
+ default = 1,
+ doc = """
+ To avoid thrashing, Haizea will not schedule a lease unless all overheads
+ can be correctly scheduled (which includes image transfers, suspensions, etc.).
+ However, this can still result in situations where a lease is prepared,
+ and then immediately suspended because of a blocking lease in the future.
+ The scheduling threshold factor can be used to specify that a lease must
+ not be scheduled unless it is guaranteed to run for a minimum amount of
+ time (the rationale behind this is that you ideally don't want leases
+ to be scheduled if they're not going to be active for at least as much time
+ as was spent in overheads).
+
+ The default value is 1, meaning that the lease will be active for at least
+ as much time T as was spent on overheads (e.g., if preparing the lease requires
+ 60 seconds, and we know that it will have to be suspended, requiring 30 seconds,
+ Haizea won't schedule the lease unless it can run for at least 90 minutes).
+ In other words, a scheduling factor of F required a minimum duration of
+ F*T. A value of 0 could lead to thrashing, since Haizea could end up with
+ situations where a lease starts and immediately gets suspended.
+ """),
+
+ Option(name = "force-scheduling-threshold",
+ getter = "force-scheduling-threshold",
+ type = OPTTYPE_TIMEDELTA,
+ required = False,
+ doc = """
+ This option can be used to force a specific scheduling threshold time
+ to be used, instead of calculating one based on overheads.
+ """),
+
+ Option(name = "migration",
+ getter = "migration",
+ type = OPTTYPE_BOOLEAN,
+ required = True,
+ doc = """
+ Specifies whether leases can be migrated from one
+ physical node to another. Valid values are "True" or "False"
+ """),
+
+ Option(name = "what-to-migrate",
+ getter = "what-to-migrate",
+ type = OPTTYPE_STRING,
+ required = False,
+ required_if = [(("scheduling","migration"),True)],
+ default = constants.MIGRATE_NONE,
+ valid = [constants.MIGRATE_NONE,
+ constants.MIGRATE_MEM,
+ constants.MIGRATE_MEMDISK],
+ doc = """
+ Specifies what data has to be moved around when
+ migrating a lease. Valid values are:
+
+ - nothing: migration can be performed without transferring any
+ files.
+ - mem: only the memory must be transferred
+ - mem+disk: both the memory and the VM disk image must be
+ transferred
+ """),
+
+ Option(name = "non-schedulable-interval",
+ getter = "non-schedulable-interval",
+ type = OPTTYPE_TIMEDELTA,
+ required = False,
+ default = TimeDelta(seconds=10),
+ doc = """
+ The minimum amount of time that must pass between
+ when a request is scheduled to when it can actually start.
+ The default should be good for most configurations, but
+ may need to be increased if you're dealing with exceptionally
+ high loads.
+ """)
+
+ ]
+ sections.append(scheduling)
+
+ # ============================= #
+ # #
+ # SIMULATION OPTIONS #
+ # #
+ # ============================= #
+
+ simulation = Section("simulation", required=False,
+ required_if = [(("general","mode"),"simulated")],
+ doc = "This section is used to specify options when Haizea runs in simulation" )
+ simulation.options = \
+ [
+ Option(name = "clock",
+ getter = "clock",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = constants.CLOCK_REAL,
+ valid = [constants.CLOCK_REAL,
+ constants.CLOCK_SIMULATED],
+ doc = """
+ Type of clock to use in simulation:
+
+ - simulated: A simulated clock that fastforwards through
+ time. Can only use the tracefile request
+ frontend
+ - real: A real clock is used, but simulated resources and
+ enactment actions are used. Can only use the RPC
+ request frontend.
+ """),
+
+ Option(name = "starttime",
+ getter = "starttime",
+ type = OPTTYPE_DATETIME,
+ required = False,
+ required_if = [(("simulation","clock"),constants.CLOCK_SIMULATED)],
+ doc = """
+ Time at which simulated clock will start.
+ """),
+
+ Option(name = "nodes",
+ getter = "simul.nodes",
+ type = OPTTYPE_INT,
+ required = True,
+ doc = """
+ Number of nodes in the simulated cluster
+ """) ,
+
+ Option(name = "resources",
+ getter = "simul.resources",
+ type = OPTTYPE_STRING,
+ required = True,
+ doc = """
+ Resources in each node. Five types of resources
+ are recognized right now:
+
+ - CPU: Number of processors per node
+ - Mem: Memory (in MB)
+ - Net (in): Inbound network bandwidth (in Mbps)
+ - Net (out): Outbound network bandwidth (in Mbps)
+ - Disk: Disk space in MB (not counting space for disk cache)
+ """),
+
+ Option(name = "imagetransfer-bandwidth",
+ getter = "imagetransfer-bandwidth",
+ type = OPTTYPE_INT,
+ required = True,
+ doc = """
+ Bandwidth (in Mbps) available for image transfers.
+ This would correspond to the outbound network bandwidth of the
+ node where the images are stored.
+ """),
+
+ Option(name = "suspendresume-rate",
+ getter = "simul.suspendresume-rate",
+ type = OPTTYPE_FLOAT,
+ required = True,
+ doc = """
+ Rate at which VMs are assumed to suspend (in MB of
+ memory per second)
+ """),
+
+ Option(name = "stop-when",
+ getter = "stop-when",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = constants.STOPWHEN_ALLDONE,
+ valid = [constants.STOPWHEN_ALLDONE,
+ constants.STOPWHEN_BESUBMITTED,
+ constants.STOPWHEN_BEDONE],
+ doc = """
+ When using the simulated clock, this specifies when the
+ simulation must end. Valid options are:
+
+ - all-leases-done: All requested leases have been completed
+ and there are no queued/pending requests.
+ - besteffort-submitted: When all best-effort leases have been
+ submitted.
+ - besteffort-done: When all best-effort leases have been
+ completed.
+ """),
+
+ Option(name = "status-message-interval",
+ getter = "status-message-interval",
+ type = OPTTYPE_INT,
+ required = False,
+ default = None,
+ doc = """
+ If specified, the simulated clock will print a status
+ message with some basic statistics. This is useful to keep track
+ of long simulations. The interval is specified in minutes.
+ """)
+
+ ]
+ sections.append(simulation)
+
+
+ # ============================= #
+ # #
+ # DEPLOYMENT OPTIONS #
+ # (w/ image transfers) #
+ # #
+ # ============================= #
+
+ imgtransfer = Section("deploy-imagetransfer", required=False,
+ required_if = [(("general","lease-deployment"),"imagetransfer")],
+ doc = """
+ When lease deployment with disk image transfers is selected,
+ this section is used to control image deployment parameters.""")
+ imgtransfer.options = \
+ [
+ Option(name = "transfer-mechanism",
+ getter = "transfer-mechanism",
+ type = OPTTYPE_STRING,
+ required = True,
+ valid = [constants.TRANSFER_UNICAST,
+ constants.TRANSFER_MULTICAST],
+ doc = """
+ Specifies how disk images are transferred. Valid values are:
+
+ - unicast: A disk image can be transferred to just one node at a time
+ (NOTE: Not currently supported)
+ - multicast: A disk image can be multicast to multiple nodes at
+ the same time.
+ """),
+
+ Option(name = "avoid-redundant-transfers",
+ getter = "avoid-redundant-transfers",
+ type = OPTTYPE_BOOLEAN,
+ required = False,
+ default = True,
+ doc = """
+ Specifies whether the scheduler should take steps to
+ detect and avoid redundant transfers (e.g., if two leases are
+ scheduled on the same node, and they both require the same disk
+ image, don't transfer the image twice; allow one to "piggyback"
+ on the other). There is generally no reason to set this option
+ to False.
+ """),
+
+ Option(name = "force-imagetransfer-time",
+ getter = "force-imagetransfer-time",
+ type = OPTTYPE_TIMEDELTA,
+ required = False,
+ doc = """
+ Forces the image transfer time to a specific amount.
+ This options is intended for testing purposes.
+ """),
+
+ Option(name = "diskimage-reuse",
+ getter = "diskimage-reuse",
+ type = OPTTYPE_STRING,
+ required = False,
+ required_if = None,
+ default = constants.REUSE_NONE,
+ valid = [constants.REUSE_NONE,
+ constants.REUSE_IMAGECACHES],
+ doc = """
+ Specifies whether disk image caches should be created
+ on the nodes, so the scheduler can reduce the number of transfers
+ by reusing images. Valid values are:
+
+ - none: No image reuse
+ - image-caches: Use image caching algorithm described in Haizea
+ publications
+ """),
+
+ Option(name = "diskimage-cache-size",
+ getter = "diskimage-cache-size",
+ type = OPTTYPE_INT,
+ required = False,
+ required_if = [(("deploy-imagetransfer","diskimage-reuse"),True)],
+ doc = """
+ Specifies the size (in MB) of the disk image cache on
+ each physical node.
+ """)
+ ]
+ sections.append(imgtransfer)
+
+ # ============================= #
+ # #
+ # TRACEFILE OPTIONS #
+ # #
+ # ============================= #
+
+ tracefile = Section("tracefile", required=False,
+ doc="""
+ When reading in requests from a tracefile, this section is used
+ to specify the tracefile and other parameters.""")
+ tracefile.options = \
+ [
+ Option(name = "tracefile",
+ getter = "tracefile",
+ type = OPTTYPE_STRING,
+ required = True,
+ doc = """
+ Path to tracefile to use.
+ """),
+
+ Option(name = "imagefile",
+ getter = "imagefile",
+ type = OPTTYPE_STRING,
+ required = False,
+ doc = """
+ Path to list of images to append to lease requests.
+ If omitted, the images in the tracefile are used.
+ """),
+
+ Option(name = "injectionfile",
+ getter = "injectionfile",
+ type = OPTTYPE_STRING,
+ required = False,
+ doc = """
+ Path to file with leases to "inject" into the tracefile.
+ """),
+
+ Option(name = "runtime-slowdown-overhead",
+ getter = "runtime-slowdown-overhead",
+ type = OPTTYPE_FLOAT,
+ required = False,
+ default = 0,
+ doc = """
+ Adds a runtime overhead (in %) to the lease duration.
+ """),
+
+ Option(name = "add-overhead",
+ getter = "add-overhead",
+ type = OPTTYPE_STRING,
+ required = False,
+ default = constants.RUNTIMEOVERHEAD_NONE,
+ valid = [constants.RUNTIMEOVERHEAD_NONE,
+ constants.RUNTIMEOVERHEAD_ALL,
+ constants.RUNTIMEOVERHEAD_BE],
+ doc = """
+ Specifies what leases will have a runtime overhead added:
+
+ - none: No runtime overhead must be added.
+ - besteffort: Add only to best-effort leases
+ - all: Add runtime overhead to all leases
+ """),
+
+ Option(name = "bootshutdown-overhead",
+ getter = "bootshutdown-overhead",
+ type = OPTTYPE_TIMEDELTA,
+ required = False,
+ default = TimeDelta(seconds=0),
+ doc = """
+ Specifies how many seconds will be alloted to
+ boot and shutdown of the lease.
+ """)
+
+ ]
+ sections.append(tracefile)
+
+ # ============================= #
+ # #
+ # OPENNEBULA OPTIONS #
+ # #
+ # ============================= #
+
+ opennebula = Section("opennebula", required=False,
+ required_if = [(("general","mode"),"opennebula")],
+ doc = """
+ This section is used to specify OpenNebula parameters,
+ necessary when using Haizea as an OpenNebula scheduling backend.""")
+ opennebula.options = \
+ [
+ Option(name = "db",
+ getter = "one.db",
+ type = OPTTYPE_STRING,
+ required = True,
+ doc = """
+ Location of OpenNebula database.
+ """),
+
+ Option(name = "onevm",
+ getter = "onevm",
+ type = OPTTYPE_STRING,
+ required = True,
+ doc = """
+ Location of OpenNebula "onevm" command.
+ """),
+
+ Option(name = "suspendresume-rate-estimate",
+ getter = "one.suspendresume-rate-estimate",
+ type = OPTTYPE_FLOAT,
+ required = False,
+ default = 32,
+ doc = """
+ Rate at which VMs are estimated to suspend (in MB of
+ memory per second)
+ """),
+
+ Option(name = "stop-when-no-more-leases",
+ getter = "stop-when-no-more-leases",
+ type = OPTTYPE_BOOLEAN,
+ required = False,
+ default = False,
+ doc = """
+ This option is useful for testing and running experiments.
+ If set to True, Haizea will stop when there are no more leases
+ to process (which allows you to tun Haizea and OpenNebula unattended,
+ and count on it stopping when there are no more leases to process).
+ For now, this only makes sense if you're seeding Haizea with requests from
+ the start (otherwise, it will start and immediately stop).
+ """),
+
+ Option(name = "dry-run",
+ getter = "dry-run",
+ type = OPTTYPE_BOOLEAN,
+ required = False,
+ default = False,
+ doc = """
+ This option is useful for testing.
+ If set to True, Haizea will fast-forward through time (note that this is
+ different that using the simulated clock, which has to be used with a tracefile;
+ with an Haizea/OpenNebula dry run, you will have to seed OpenNebula with requests
+ before starting Haizea). You will generally want to set stop-when-no-more-leases
+ when doing a dry-run.
+
+ IMPORTANT: Haizea will still send out enactment commands to OpenNebula. Make
+ sure you replace onevm with a dummy command that does nothing (or that reacts
+ in some way you want to test; e.g., by emulating a deployment failure, etc.)
+ """),
+
+ ]
+ sections.append(opennebula)
+
+ def __init__(self, config):
+ Config.__init__(self, config, self.sections)
+
+ self.attrs = {}
+ if self._options["attributes"] != None:
+ self.attrs = {}
+ attrs = self._options["attributes"].split(";")
+ for attr in attrs:
+ (k,v) = attr.split("=")
+ self.attrs[k] = v
+
+ def get_attr(self, attr):
+ return self.attrs[attr]
+
+ def get_attrs(self):
+ return self.attrs.keys()
+
+
+class HaizeaMultiConfig(Config):
+
+ MULTI_SEC = "multi"
+ COMMON_SEC = "common"
+ TRACEDIR_OPT = "tracedir"
+ TRACEFILES_OPT = "tracefiles"
+ INJDIR_OPT = "injectiondir"
+ INJFILES_OPT = "injectionfiles"
+ DATADIR_OPT = "datadir"
+
+ def __init__(self, config):
+ # TODO: Define "multi" section as a Section object
+ Config.__init__(self, config, [])
+
+ def get_profiles(self):
+ sections = set([s.split(":")[0] for s in self.config.sections()])
+ # Remove multi and common sections
+ sections.difference_update([self.COMMON_SEC, self.MULTI_SEC])
+ return list(sections)
+
+ def get_trace_files(self):
+ dir = self.config.get(self.MULTI_SEC, self.TRACEDIR_OPT)
+ traces = self.config.get(self.MULTI_SEC, self.TRACEFILES_OPT).split()
+ return [dir + "/" + t for t in traces]
+
+ def get_inject_files(self):
+ dir = self.config.get(self.MULTI_SEC, self.INJDIR_OPT)
+ inj = self.config.get(self.MULTI_SEC, self.INJFILES_OPT).split()
+ inj = [dir + "/" + i for i in inj]
+ inj.append(None)
+ return inj
+
+ def get_configs(self):
+ profiles = self.get_profiles()
+ tracefiles = self.get_trace_files()
+ injectfiles = self.get_inject_files()
+
+ configs = []
+ for profile in profiles:
+ for tracefile in tracefiles:
+ for injectfile in injectfiles:
+ profileconfig = ConfigParser.ConfigParser()
+ commonsections = [s for s in self.config.sections() if s.startswith("common:")]
+ profilesections = [s for s in self.config.sections() if s.startswith(profile +":")]
+ sections = commonsections + profilesections
+ for s in sections:
+ s_noprefix = s.split(":")[1]
+ items = self.config.items(s)
+ if not profileconfig.has_section(s_noprefix):
+ profileconfig.add_section(s_noprefix)
+ for item in items:
+ profileconfig.set(s_noprefix, item[0], item[1])
+
+ # The tracefile section may have not been created
+ if not profileconfig.has_section("tracefile"):
+ profileconfig.add_section("tracefile")
+
+ # Add tracefile option
+ profileconfig.set("tracefile", "tracefile", tracefile)
+
+ # Add injected file option
+ if injectfile != None:
+ profileconfig.set("tracefile", "injectionfile", injectfile)
+
+ # Add datafile option
+ datadir = self.config.get(self.MULTI_SEC, self.DATADIR_OPT)
+ datafilename = generate_config_name(profile, tracefile, injectfile)
+ datafile = datadir + "/" + datafilename + ".dat"
+ profileconfig.set("general", "datafile", datafile)
+
+ # Set "attributes" option (only used internally)
+ attrs = {"profile":profile}
+ # TODO: Load additional attributes from trace/injfiles
+ attrs_str = ",".join(["%s=%s" % (k,v) for (k,v) in attrs.items()])
+ profileconfig.set("general", "attributes", attrs_str)
+
+ try:
+ c = HaizeaConfig(profileconfig)
+ except ConfigException, msg:
+ print >> sys.stderr, "Error in configuration file:"
+ print >> sys.stderr, msg
+ exit(1)
+ configs.append(c)
+
+ return configs
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py
===================================================================
--- trunk/src/haizea/resourcemanager/datastruct.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,647 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-"""This module provides the fundamental data structures (besides the slot table,
-which is in a module of its own) used by Haizea. The module provides four types
-of structures:
-
-* Lease data structures
- * Lease: Base class for leases
- * ARLease: Advance reservation lease
- * BestEffortLease: Best-effort lease
- * ImmediateLease: Immediate lease
-* Resource reservation (RR) structures:
- * ResourceReservationBase: Base class for RRs in the slot table
- * VMResourceReservation: RR representing one or more VMs
- * SuspensionResourceReservation: RR representing a lease suspension
- * ResumptionResourceReservation: RR representing a lease resumption
-* Lease containers
- * Queue: Your run-of-the-mill queue
- * LeaseTable: Provides easy access to leases in the system
-* Miscellaneous structures
- * ResourceTuple: A tuple representing a resource usage or requirement
- * Timestamp: A wrapper around requested/scheduled/actual timestamps
- * Duration: A wrapper around requested/accumulated/actual durations
-"""
-
-from haizea.common.constants import RES_MEM, MIGRATE_NONE, MIGRATE_MEM, MIGRATE_MEMDISK, LOGLEVEL_VDEBUG
-from haizea.common.utils import round_datetime_delta, get_lease_id, pretty_nodemap, estimate_transfer_time, xmlrpc_marshall_singlevalue
-
-from operator import attrgetter
-from mx.DateTime import TimeDelta
-from math import floor
-
-import logging
-
-
-#-------------------------------------------------------------------#
-# #
-# LEASE DATA STRUCTURES #
-# #
-#-------------------------------------------------------------------#
-
-
-class Lease(object):
- # Lease states
- STATE_NEW = 0
- STATE_PENDING = 1
- STATE_REJECTED = 2
- STATE_SCHEDULED = 3
- STATE_QUEUED = 4
- STATE_CANCELLED = 5
- STATE_PREPARING = 6
- STATE_READY = 7
- STATE_ACTIVE = 8
- STATE_SUSPENDING = 9
- STATE_SUSPENDED = 10
- STATE_MIGRATING = 11
- STATE_RESUMING = 12
- STATE_RESUMED_READY = 13
- STATE_DONE = 14
- STATE_FAIL = 15
-
- state_str = {STATE_NEW : "New",
- STATE_PENDING : "Pending",
- STATE_REJECTED : "Rejected",
- STATE_SCHEDULED : "Scheduled",
- STATE_QUEUED : "Queued",
- STATE_CANCELLED : "Cancelled",
- STATE_PREPARING : "Preparing",
- STATE_READY : "Ready",
- STATE_ACTIVE : "Active",
- STATE_SUSPENDING : "Suspending",
- STATE_SUSPENDED : "Suspended",
- STATE_MIGRATING : "Migrating",
- STATE_RESUMING : "Resuming",
- STATE_RESUMED_READY: "Resumed-Ready",
- STATE_DONE : "Done",
- STATE_FAIL : "Fail"}
-
- def __init__(self, submit_time, start, duration, diskimage_id,
- diskimage_size, numnodes, requested_resources, preemptible):
- # Lease ID (read only)
- self.id = get_lease_id()
-
- # Request attributes (read only)
- self.submit_time = submit_time
- self.start = start
- self.duration = duration
- self.end = None
- self.diskimage_id = diskimage_id
- self.diskimage_size = diskimage_size
- # TODO: The following assumes homogeneous nodes. Should be modified
- # to account for heterogeneous nodes.
- self.numnodes = numnodes
- self.requested_resources = requested_resources
- self.preemptible = preemptible
-
- # Bookkeeping attributes
- # (keep track of the lease's state, resource reservations, etc.)
- self.state = Lease.STATE_NEW
- self.diskimagemap = {}
- self.memimagemap = {}
- self.deployment_rrs = []
- self.vm_rrs = []
-
- # Enactment information. Should only be manipulated by enactment module
- self.enactment_info = None
- self.vnode_enactment_info = dict([(n+1, None) for n in range(numnodes)])
-
- self.logger = logging.getLogger("LEASES")
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "Lease ID : %i" % self.id)
- self.logger.log(loglevel, "Submission time: %s" % self.submit_time)
- self.logger.log(loglevel, "Duration : %s" % self.duration)
- self.logger.log(loglevel, "State : %s" % Lease.state_str[self.state])
- self.logger.log(loglevel, "Disk image : %s" % self.diskimage_id)
- self.logger.log(loglevel, "Disk image size: %s" % self.diskimage_size)
- self.logger.log(loglevel, "Num nodes : %s" % self.numnodes)
- self.logger.log(loglevel, "Resource req : %s" % self.requested_resources)
- self.logger.log(loglevel, "Disk image map : %s" % pretty_nodemap(self.diskimagemap))
- self.logger.log(loglevel, "Mem image map : %s" % pretty_nodemap(self.memimagemap))
-
- def print_rrs(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "RESOURCE RESERVATIONS")
- self.logger.log(loglevel, "~~~~~~~~~~~~~~~~~~~~~")
- for r in self.vm_rrs:
- r.print_contents(loglevel)
- self.logger.log(loglevel, "##")
-
- def get_endtime(self):
- vmrr = self.get_last_vmrr()
- return vmrr.end
-
- def append_vmrr(self, vmrr):
- self.vm_rrs.append(vmrr)
-
- def get_last_vmrr(self):
- return self.vm_rrs[-1]
-
- def update_vmrr(self, rrold, rrnew):
- self.vm_rrs[self.vm_rrs.index(rrold)] = rrnew
-
- def remove_vmrr(self, vmrr):
- if not vmrr in self.vm_rrs:
- raise Exception, "Tried to remove an VM RR not contained in this lease"
- else:
- self.vm_rrs.remove(vmrr)
-
- def clear_rrs(self):
- self.deployment_rrs = []
- self.vm_rrs = []
-
- def add_boot_overhead(self, t):
- self.duration.incr(t)
-
- def add_runtime_overhead(self, percent):
- self.duration.incr_by_percent(percent)
-
- def xmlrpc_marshall(self):
- # Convert to something we can send through XMLRPC
- l = {}
- l["id"] = self.id
- l["submit_time"] = xmlrpc_marshall_singlevalue(self.submit_time)
- l["start_req"] = xmlrpc_marshall_singlevalue(self.start.requested)
- l["start_sched"] = xmlrpc_marshall_singlevalue(self.start.scheduled)
- l["start_actual"] = xmlrpc_marshall_singlevalue(self.start.actual)
- l["duration_req"] = xmlrpc_marshall_singlevalue(self.duration.requested)
- l["duration_acc"] = xmlrpc_marshall_singlevalue(self.duration.accumulated)
- l["duration_actual"] = xmlrpc_marshall_singlevalue(self.duration.actual)
- l["end"] = xmlrpc_marshall_singlevalue(self.end)
- l["diskimage_id"] = self.diskimage_id
- l["diskimage_size"] = self.diskimage_size
- l["numnodes"] = self.numnodes
- l["resources"] = `self.requested_resources`
- l["preemptible"] = self.preemptible
- l["state"] = self.state
- l["vm_rrs"] = [vmrr.xmlrpc_marshall() for vmrr in self.vm_rrs]
-
- return l
-
-
-class ARLease(Lease):
- def __init__(self, submit_time, start, duration, diskimage_id,
- diskimage_size, numnodes, resreq, preemptible,
- # AR-specific parameters:
- realdur = None):
- start = Timestamp(start)
- duration = Duration(duration)
- duration.known = realdur # ONLY for simulation
- Lease.__init__(self, submit_time, start, duration, diskimage_id,
- diskimage_size, numnodes, resreq, preemptible)
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "__________________________________________________")
- Lease.print_contents(self, loglevel)
- self.logger.log(loglevel, "Type : AR")
- self.logger.log(loglevel, "Start time : %s" % self.start)
- self.print_rrs(loglevel)
- self.logger.log(loglevel, "--------------------------------------------------")
-
- def xmlrpc_marshall(self):
- l = Lease.xmlrpc_marshall(self)
- l["type"] = "AR"
- return l
-
-
-class BestEffortLease(Lease):
- def __init__(self, submit_time, duration, diskimage_id,
- diskimage_size, numnodes, resreq, preemptible,
- # BE-specific parameters:
- realdur = None):
- start = Timestamp(None) # i.e., start on a best-effort basis
- duration = Duration(duration)
- duration.known = realdur # ONLY for simulation
- # When the images will be available
- self.imagesavail = None
- Lease.__init__(self, submit_time, start, duration, diskimage_id,
- diskimage_size, numnodes, resreq, preemptible)
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "__________________________________________________")
- Lease.print_contents(self, loglevel)
- self.logger.log(loglevel, "Type : BEST-EFFORT")
- self.logger.log(loglevel, "Images Avail @ : %s" % self.imagesavail)
- self.print_rrs(loglevel)
- self.logger.log(loglevel, "--------------------------------------------------")
-
- def get_waiting_time(self):
- return self.start.actual - self.submit_time
-
- def get_slowdown(self, bound=10):
- time_on_dedicated = self.duration.original
- time_on_loaded = self.end - self.submit_time
- bound = TimeDelta(seconds=bound)
- if time_on_dedicated < bound:
- time_on_dedicated = bound
- return time_on_loaded / time_on_dedicated
-
- def xmlrpc_marshall(self):
- l = Lease.xmlrpc_marshall(self)
- l["type"] = "BE"
- return l
-
-
-class ImmediateLease(Lease):
- def __init__(self, submit_time, duration, diskimage_id,
- diskimage_size, numnodes, resreq, preemptible,
- # Immediate-specific parameters:
- realdur = None):
- start = Timestamp(None) # i.e., start on a best-effort basis
- duration = Duration(duration)
- duration.known = realdur # ONLY for simulation
- Lease.__init__(self, submit_time, start, duration, diskimage_id,
- diskimage_size, numnodes, resreq, preemptible)
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "__________________________________________________")
- Lease.print_contents(self, loglevel)
- self.logger.log(loglevel, "Type : IMMEDIATE")
- self.print_rrs(loglevel)
- self.logger.log(loglevel, "--------------------------------------------------")
-
- def xmlrpc_marshall(self):
- l = Lease.xmlrpc_marshall(self)
- l["type"] = "IM"
- return l
-
-
-#-------------------------------------------------------------------#
-# #
-# RESOURCE RESERVATION #
-# DATA STRUCTURES #
-# #
-#-------------------------------------------------------------------#
-
-
-class ResourceReservation(object):
-
- # Resource reservation states
- STATE_SCHEDULED = 0
- STATE_ACTIVE = 1
- STATE_DONE = 2
-
- state_str = {STATE_SCHEDULED : "Scheduled",
- STATE_ACTIVE : "Active",
- STATE_DONE : "Done"}
-
- def __init__(self, lease, start, end, res):
- self.lease = lease
- self.start = start
- self.end = end
- self.state = None
- self.resources_in_pnode = res
- self.logger = logging.getLogger("LEASES")
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "Start : %s" % self.start)
- self.logger.log(loglevel, "End : %s" % self.end)
- self.logger.log(loglevel, "State : %s" % ResourceReservation.state_str[self.state])
- self.logger.log(loglevel, "Resources : \n %s" % "\n ".join(["N%i: %s" %(i, x) for i, x in self.resources_in_pnode.items()]))
-
- def xmlrpc_marshall(self):
- # Convert to something we can send through XMLRPC
- rr = {}
- rr["start"] = xmlrpc_marshall_singlevalue(self.start)
- rr["end"] = xmlrpc_marshall_singlevalue(self.end)
- rr["state"] = self.state
- return rr
-
-class VMResourceReservation(ResourceReservation):
- def __init__(self, lease, start, end, nodes, res, backfill_reservation):
- ResourceReservation.__init__(self, lease, start, end, res)
- self.nodes = nodes
- self.backfill_reservation = backfill_reservation
- self.resm_rrs = []
- self.susp_rrs = []
-
- # ONLY for simulation
- self.__update_prematureend()
-
- def update_start(self, time):
- self.start = time
- # ONLY for simulation
- self.__update_prematureend()
-
- def update_end(self, time):
- self.end = time
- # ONLY for simulation
- self.__update_prematureend()
-
- # ONLY for simulation
- def __update_prematureend(self):
- if self.lease.duration.known != None:
- remdur = self.lease.duration.get_remaining_known_duration()
- rrdur = self.end - self.start
- if remdur < rrdur:
- self.prematureend = self.start + remdur
- else:
- self.prematureend = None
- else:
- self.prematureend = None
-
- def is_suspending(self):
- return len(self.susp_rrs) > 0
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- for resmrr in self.resm_rrs:
- resmrr.print_contents(loglevel)
- self.logger.log(loglevel, "--")
- self.logger.log(loglevel, "Type : VM")
- self.logger.log(loglevel, "Nodes : %s" % pretty_nodemap(self.nodes))
- if self.prematureend != None:
- self.logger.log(loglevel, "Premature end : %s" % self.prematureend)
- ResourceReservation.print_contents(self, loglevel)
- for susprr in self.susp_rrs:
- self.logger.log(loglevel, "--")
- susprr.print_contents(loglevel)
-
- def is_preemptible(self):
- return self.lease.preemptible
-
- def xmlrpc_marshall(self):
- rr = ResourceReservation.xmlrpc_marshall(self)
- rr["type"] = "VM"
- rr["nodes"] = self.nodes.items()
- return rr
-
-
-class SuspensionResourceReservation(ResourceReservation):
- def __init__(self, lease, start, end, res, vnodes, vmrr):
- ResourceReservation.__init__(self, lease, start, end, res)
- self.vmrr = vmrr
- self.vnodes = vnodes
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- self.logger.log(loglevel, "Type : SUSPEND")
- ResourceReservation.print_contents(self, loglevel)
-
- def is_first(self):
- return (self == self.vmrr.susp_rrs[0])
-
- def is_last(self):
- return (self == self.vmrr.susp_rrs[-1])
-
- # TODO: Suspension RRs should be preemptible, but preempting a suspension RR
- # has wider implications (with a non-trivial handling). For now, we leave them
- # as non-preemptible, since the probability of preempting a suspension RR is slim.
- def is_preemptible(self):
- return False
-
- def xmlrpc_marshall(self):
- rr = ResourceReservation.xmlrpc_marshall(self)
- rr["type"] = "SUSP"
- return rr
-
-class ResumptionResourceReservation(ResourceReservation):
- def __init__(self, lease, start, end, res, vnodes, vmrr):
- ResourceReservation.__init__(self, lease, start, end, res)
- self.vmrr = vmrr
- self.vnodes = vnodes
-
- def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
- ResourceReservation.print_contents(self, loglevel)
- self.logger.log(loglevel, "Type : RESUME")
-
- def is_first(self):
- return (self == self.vmrr.resm_rrs[0])
-
- def is_last(self):
- return (self == self.vmrr.resm_rrs[-1])
-
- # TODO: Resumption RRs should be preemptible, but preempting a resumption RR
- # has wider implications (with a non-trivial handling). For now, we leave them
- # as non-preemptible, since the probability of preempting a resumption RR is slim.
- def is_preemptible(self):
- return False
-
- def xmlrpc_marshall(self):
- rr = ResourceReservation.xmlrpc_marshall(self)
- rr["type"] = "RESM"
- return rr
-
-class MigrationResourceReservation(ResourceReservation):
- def __init__(self, lease, start, end, res, vmrr):
- ResourceReservation.__init__(self, lease, start, end, res)
- self.vmrr = vmrr
-
-#-------------------------------------------------------------------#
-# #
-# LEASE CONTAINERS #
-# #
-#-------------------------------------------------------------------#
-
-class Queue(object):
- def __init__(self, scheduler):
- self.scheduler = scheduler
- self.__q = []
-
- def is_empty(self):
- return len(self.__q)==0
-
- def enqueue(self, r):
- self.__q.append(r)
-
- def dequeue(self):
- return self.__q.pop(0)
-
- def enqueue_in_order(self, r):
- self.__q.append(r)
- self.__q.sort(key=attrgetter("submit_time"))
-
- def length(self):
- return len(self.__q)
-
- def has_lease(self, lease_id):
- return (1 == len([l for l in self.__q if l.id == lease_id]))
-
- def get_lease(self, lease_id):
- return [l for l in self.__q if l.id == lease_id][0]
-
- def remove_lease(self, lease):
- self.__q.remove(lease)
-
- def __iter__(self):
- return iter(self.__q)
-
-class LeaseTable(object):
- def __init__(self, scheduler):
- self.scheduler = scheduler
- self.entries = {}
-
- def has_lease(self, lease_id):
- return self.entries.has_key(lease_id)
-
- def get_lease(self, lease_id):
- return self.entries[lease_id]
-
- def is_empty(self):
- return len(self.entries)==0
-
- def remove(self, lease):
- del self.entries[lease.id]
-
- def add(self, lease):
- self.entries[lease.id] = lease
-
- def get_leases(self, type=None):
- if type==None:
- return self.entries.values()
- else:
- return [e for e in self.entries.values() if isinstance(e, type)]
-
- def get_leases_by_state(self, state):
- return [e for e in self.entries.values() if e.state == state]
-
-
- # TODO: Should be moved to slottable module
- def getNextLeasesScheduledInNodes(self, time, nodes):
- nodes = set(nodes)
- leases = []
- earliestEndTime = {}
- for l in self.entries.values():
- start = l.rr[-1].start
- nodes2 = set(l.rr[-1].nodes.values())
- if len(nodes & nodes2) > 0 and start > time:
- leases.append(l)
- end = l.rr[-1].end
- for n in nodes2:
- if not earliestEndTime.has_key(n):
- earliestEndTime[n] = end
- else:
- if end < earliestEndTime[n]:
- earliestEndTime[n] = end
- leases2 = set()
- for n in nodes:
- if earliestEndTime.has_key(n):
- end = earliestEndTime[n]
- l = [l for l in leases if n in l.rr[-1].nodes.values() and l.rr[-1].start < end]
- leases2.update(l)
- return list(leases2)
-
-#-------------------------------------------------------------------#
-# #
-# MISCELLANEOUS DATA STRUCTURES CONTAINERS #
-# #
-#-------------------------------------------------------------------#
-
-class ResourceTuple(object):
- def __init__(self, res):
- self._res = res
-
- @classmethod
- def from_list(cls, l):
- return cls(l[:])
-
- @classmethod
- def copy(cls, rt):
- return cls(rt._res[:])
-
- @classmethod
- def set_resource_types(cls, resourcetypes):
- cls.type2pos = dict([(x[0], i) for i, x in enumerate(resourcetypes)])
- cls.descriptions = dict([(i, x[2]) for i, x in enumerate(resourcetypes)])
- cls.tuplelength = len(resourcetypes)
-
- @classmethod
- def create_empty(cls):
- return cls([0 for x in range(cls.tuplelength)])
-
- def fits_in(self, res2):
- fits = True
- for i in xrange(len(self._res)):
- if self._res[i] > res2._res[i]:
- fits = False
- break
- return fits
-
- def get_num_fits_in(self, res2):
- canfit = 10000 # Arbitrarily large
- for i in xrange(len(self._res)):
- if self._res[i] != 0:
- f = res2._res[i] / self._res[i]
- if f < canfit:
- canfit = f
- return int(floor(canfit))
-
- def decr(self, res2):
- for slottype in xrange(len(self._res)):
- self._res[slottype] -= res2._res[slottype]
-
- def incr(self, res2):
- for slottype in xrange(len(self._res)):
- self._res[slottype] += res2._res[slottype]
-
- def get_by_type(self, resourcetype):
- return self._res[self.type2pos[resourcetype]]
-
- def set_by_type(self, resourcetype, value):
- self._res[self.type2pos[resourcetype]] = value
-
- def is_zero_or_less(self):
- return sum([v for v in self._res]) <= 0
-
- def __repr__(self):
- r=""
- for i, x in enumerate(self._res):
- r += "%s:%.2f " % (self.descriptions[i], x)
- return r
-
-class Timestamp(object):
- def __init__(self, requested):
- self.requested = requested
- self.scheduled = None
- self.actual = None
-
- def __repr__(self):
- return "REQ: %s | SCH: %s | ACT: %s" % (self.requested, self.scheduled, self.actual)
-
-class Duration(object):
- def __init__(self, requested, known=None):
- self.original = requested
- self.requested = requested
- self.accumulated = TimeDelta()
- self.actual = None
- # The following is ONLY used in simulation
- self.known = known
-
- def incr(self, t):
- self.requested += t
- if self.known != None:
- self.known += t
-
- def incr_by_percent(self, pct):
- factor = 1 + float(pct)/100
- self.requested = round_datetime_delta(self.requested * factor)
- if self.known != None:
- self.requested = round_datetime_delta(self.known * factor)
-
- def accumulate_duration(self, t):
- self.accumulated += t
-
- def get_remaining_duration(self):
- return self.requested - self.accumulated
-
- # ONLY in simulation
- def get_remaining_known_duration(self):
- return self.known - self.accumulated
-
- def __repr__(self):
- return "REQ: %s | ACC: %s | ACT: %s | KNW: %s" % (self.requested, self.accumulated, self.actual, self.known)
-
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py (from rev 506, trunk/src/haizea/resourcemanager/datastruct.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/datastruct.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,630 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+"""This module provides the fundamental data structures (besides the slot table,
+which is in a module of its own) used by Haizea. The module provides four types
+of structures:
+
+* Lease data structures
+ * Lease: Base class for leases
+ * ARLease: Advance reservation lease
+ * BestEffortLease: Best-effort lease
+ * ImmediateLease: Immediate lease
+* Resource reservation (RR) structures:
+ * ResourceReservationBase: Base class for RRs in the slot table
+ * VMResourceReservation: RR representing one or more VMs
+ * SuspensionResourceReservation: RR representing a lease suspension
+ * ResumptionResourceReservation: RR representing a lease resumption
+* Lease containers
+ * Queue: Your run-of-the-mill queue
+ * LeaseTable: Provides easy access to leases in the system
+* Miscellaneous structures
+ * ResourceTuple: A tuple representing a resource usage or requirement
+ * Timestamp: A wrapper around requested/scheduled/actual timestamps
+ * Duration: A wrapper around requested/accumulated/actual durations
+"""
+
+from haizea.common.constants import RES_MEM, MIGRATE_NONE, MIGRATE_MEM, MIGRATE_MEMDISK, LOGLEVEL_VDEBUG
+from haizea.common.utils import round_datetime_delta, get_lease_id, pretty_nodemap, estimate_transfer_time, xmlrpc_marshall_singlevalue
+
+from operator import attrgetter
+from mx.DateTime import TimeDelta
+from math import floor
+
+import logging
+
+
+#-------------------------------------------------------------------#
+# #
+# LEASE DATA STRUCTURES #
+# #
+#-------------------------------------------------------------------#
+
+
+class Lease(object):
+ # Lease states
+ STATE_NEW = 0
+ STATE_PENDING = 1
+ STATE_REJECTED = 2
+ STATE_SCHEDULED = 3
+ STATE_QUEUED = 4
+ STATE_CANCELLED = 5
+ STATE_PREPARING = 6
+ STATE_READY = 7
+ STATE_ACTIVE = 8
+ STATE_SUSPENDING = 9
+ STATE_SUSPENDED = 10
+ STATE_MIGRATING = 11
+ STATE_RESUMING = 12
+ STATE_RESUMED_READY = 13
+ STATE_DONE = 14
+ STATE_FAIL = 15
+
+ state_str = {STATE_NEW : "New",
+ STATE_PENDING : "Pending",
+ STATE_REJECTED : "Rejected",
+ STATE_SCHEDULED : "Scheduled",
+ STATE_QUEUED : "Queued",
+ STATE_CANCELLED : "Cancelled",
+ STATE_PREPARING : "Preparing",
+ STATE_READY : "Ready",
+ STATE_ACTIVE : "Active",
+ STATE_SUSPENDING : "Suspending",
+ STATE_SUSPENDED : "Suspended",
+ STATE_MIGRATING : "Migrating",
+ STATE_RESUMING : "Resuming",
+ STATE_RESUMED_READY: "Resumed-Ready",
+ STATE_DONE : "Done",
+ STATE_FAIL : "Fail"}
+
+ def __init__(self, submit_time, start, duration, diskimage_id,
+ diskimage_size, numnodes, requested_resources, preemptible):
+ # Lease ID (read only)
+ self.id = get_lease_id()
+
+ # Request attributes (read only)
+ self.submit_time = submit_time
+ self.start = start
+ self.duration = duration
+ self.end = None
+ self.diskimage_id = diskimage_id
+ self.diskimage_size = diskimage_size
+ # TODO: The following assumes homogeneous nodes. Should be modified
+ # to account for heterogeneous nodes.
+ self.numnodes = numnodes
+ self.requested_resources = requested_resources
+ self.preemptible = preemptible
+
+ # Bookkeeping attributes
+ # (keep track of the lease's state, resource reservations, etc.)
+ self.state = Lease.STATE_NEW
+ self.diskimagemap = {}
+ self.memimagemap = {}
+ self.deployment_rrs = []
+ self.vm_rrs = []
+
+ # Enactment information. Should only be manipulated by enactment module
+ self.enactment_info = None
+ self.vnode_enactment_info = dict([(n+1, None) for n in range(numnodes)])
+
+ self.logger = logging.getLogger("LEASES")
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ self.logger.log(loglevel, "Lease ID : %i" % self.id)
+ self.logger.log(loglevel, "Submission time: %s" % self.submit_time)
+ self.logger.log(loglevel, "Duration : %s" % self.duration)
+ self.logger.log(loglevel, "State : %s" % Lease.state_str[self.state])
+ self.logger.log(loglevel, "Disk image : %s" % self.diskimage_id)
+ self.logger.log(loglevel, "Disk image size: %s" % self.diskimage_size)
+ self.logger.log(loglevel, "Num nodes : %s" % self.numnodes)
+ self.logger.log(loglevel, "Resource req : %s" % self.requested_resources)
+ self.logger.log(loglevel, "Disk image map : %s" % pretty_nodemap(self.diskimagemap))
+ self.logger.log(loglevel, "Mem image map : %s" % pretty_nodemap(self.memimagemap))
+
+ def print_rrs(self, loglevel=LOGLEVEL_VDEBUG):
+ if len(self.deployment_rrs) > 0:
+ self.logger.log(loglevel, "DEPLOYMENT RESOURCE RESERVATIONS")
+ self.logger.log(loglevel, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
+ for r in self.deployment_rrs:
+ r.print_contents(loglevel)
+ self.logger.log(loglevel, "##")
+ self.logger.log(loglevel, "VM RESOURCE RESERVATIONS")
+ self.logger.log(loglevel, "~~~~~~~~~~~~~~~~~~~~~~~~")
+ for r in self.vm_rrs:
+ r.print_contents(loglevel)
+ self.logger.log(loglevel, "##")
+
+ def get_endtime(self):
+ vmrr = self.get_last_vmrr()
+ return vmrr.end
+
+ def append_vmrr(self, vmrr):
+ self.vm_rrs.append(vmrr)
+
+ def append_deployrr(self, vmrr):
+ self.deployment_rrs.append(vmrr)
+
+ def get_last_vmrr(self):
+ return self.vm_rrs[-1]
+
+ def update_vmrr(self, rrold, rrnew):
+ self.vm_rrs[self.vm_rrs.index(rrold)] = rrnew
+
+ def remove_vmrr(self, vmrr):
+ if not vmrr in self.vm_rrs:
+ raise Exception, "Tried to remove an VM RR not contained in this lease"
+ else:
+ self.vm_rrs.remove(vmrr)
+
+ def clear_rrs(self):
+ self.deployment_rrs = []
+ self.vm_rrs = []
+
+ def add_boot_overhead(self, t):
+ self.duration.incr(t)
+
+ def add_runtime_overhead(self, percent):
+ self.duration.incr_by_percent(percent)
+
+ def xmlrpc_marshall(self):
+ # Convert to something we can send through XMLRPC
+ l = {}
+ l["id"] = self.id
+ l["submit_time"] = xmlrpc_marshall_singlevalue(self.submit_time)
+ l["start_req"] = xmlrpc_marshall_singlevalue(self.start.requested)
+ l["start_sched"] = xmlrpc_marshall_singlevalue(self.start.scheduled)
+ l["start_actual"] = xmlrpc_marshall_singlevalue(self.start.actual)
+ l["duration_req"] = xmlrpc_marshall_singlevalue(self.duration.requested)
+ l["duration_acc"] = xmlrpc_marshall_singlevalue(self.duration.accumulated)
+ l["duration_actual"] = xmlrpc_marshall_singlevalue(self.duration.actual)
+ l["end"] = xmlrpc_marshall_singlevalue(self.end)
+ l["diskimage_id"] = self.diskimage_id
+ l["diskimage_size"] = self.diskimage_size
+ l["numnodes"] = self.numnodes
+ l["resources"] = `self.requested_resources`
+ l["preemptible"] = self.preemptible
+ l["state"] = self.state
+ l["vm_rrs"] = [vmrr.xmlrpc_marshall() for vmrr in self.vm_rrs]
+
+ return l
+
+
+class ARLease(Lease):
+ def __init__(self, submit_time, start, duration, diskimage_id,
+ diskimage_size, numnodes, resreq, preemptible,
+ # AR-specific parameters:
+ realdur = None):
+ start = Timestamp(start)
+ duration = Duration(duration)
+ duration.known = realdur # ONLY for simulation
+ Lease.__init__(self, submit_time, start, duration, diskimage_id,
+ diskimage_size, numnodes, resreq, preemptible)
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ self.logger.log(loglevel, "__________________________________________________")
+ Lease.print_contents(self, loglevel)
+ self.logger.log(loglevel, "Type : AR")
+ self.logger.log(loglevel, "Start time : %s" % self.start)
+ self.print_rrs(loglevel)
+ self.logger.log(loglevel, "--------------------------------------------------")
+
+ def xmlrpc_marshall(self):
+ l = Lease.xmlrpc_marshall(self)
+ l["type"] = "AR"
+ return l
+
+
+class BestEffortLease(Lease):
+ def __init__(self, submit_time, duration, diskimage_id,
+ diskimage_size, numnodes, resreq, preemptible,
+ # BE-specific parameters:
+ realdur = None):
+ start = Timestamp(None) # i.e., start on a best-effort basis
+ duration = Duration(duration)
+ duration.known = realdur # ONLY for simulation
+ # When the images will be available
+ self.imagesavail = None
+ Lease.__init__(self, submit_time, start, duration, diskimage_id,
+ diskimage_size, numnodes, resreq, preemptible)
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ self.logger.log(loglevel, "__________________________________________________")
+ Lease.print_contents(self, loglevel)
+ self.logger.log(loglevel, "Type : BEST-EFFORT")
+ self.logger.log(loglevel, "Images Avail @ : %s" % self.imagesavail)
+ self.print_rrs(loglevel)
+ self.logger.log(loglevel, "--------------------------------------------------")
+
+ def get_waiting_time(self):
+ return self.start.actual - self.submit_time
+
+ def get_slowdown(self, bound=10):
+ time_on_dedicated = self.duration.original
+ time_on_loaded = self.end - self.submit_time
+ bound = TimeDelta(seconds=bound)
+ if time_on_dedicated < bound:
+ time_on_dedicated = bound
+ return time_on_loaded / time_on_dedicated
+
+ def xmlrpc_marshall(self):
+ l = Lease.xmlrpc_marshall(self)
+ l["type"] = "BE"
+ return l
+
+
+class ImmediateLease(Lease):
+ def __init__(self, submit_time, duration, diskimage_id,
+ diskimage_size, numnodes, resreq, preemptible,
+ # Immediate-specific parameters:
+ realdur = None):
+ start = Timestamp(None) # i.e., start on a best-effort basis
+ duration = Duration(duration)
+ duration.known = realdur # ONLY for simulation
+ Lease.__init__(self, submit_time, start, duration, diskimage_id,
+ diskimage_size, numnodes, resreq, preemptible)
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ self.logger.log(loglevel, "__________________________________________________")
+ Lease.print_contents(self, loglevel)
+ self.logger.log(loglevel, "Type : IMMEDIATE")
+ self.print_rrs(loglevel)
+ self.logger.log(loglevel, "--------------------------------------------------")
+
+ def xmlrpc_marshall(self):
+ l = Lease.xmlrpc_marshall(self)
+ l["type"] = "IM"
+ return l
+
+
+#-------------------------------------------------------------------#
+# #
+# RESOURCE RESERVATION #
+# DATA STRUCTURES #
+# #
+#-------------------------------------------------------------------#
+
+
+class ResourceReservation(object):
+
+ # Resource reservation states
+ STATE_SCHEDULED = 0
+ STATE_ACTIVE = 1
+ STATE_DONE = 2
+
+ state_str = {STATE_SCHEDULED : "Scheduled",
+ STATE_ACTIVE : "Active",
+ STATE_DONE : "Done"}
+
+ def __init__(self, lease, start, end, res):
+ self.lease = lease
+ self.start = start
+ self.end = end
+ self.state = None
+ self.resources_in_pnode = res
+ self.logger = logging.getLogger("LEASES")
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ self.logger.log(loglevel, "Start : %s" % self.start)
+ self.logger.log(loglevel, "End : %s" % self.end)
+ self.logger.log(loglevel, "State : %s" % ResourceReservation.state_str[self.state])
+ self.logger.log(loglevel, "Resources : \n %s" % "\n ".join(["N%i: %s" %(i, x) for i, x in self.resources_in_pnode.items()]))
+
+ def xmlrpc_marshall(self):
+ # Convert to something we can send through XMLRPC
+ rr = {}
+ rr["start"] = xmlrpc_marshall_singlevalue(self.start)
+ rr["end"] = xmlrpc_marshall_singlevalue(self.end)
+ rr["state"] = self.state
+ return rr
+
+class VMResourceReservation(ResourceReservation):
+ def __init__(self, lease, start, end, nodes, res, backfill_reservation):
+ ResourceReservation.__init__(self, lease, start, end, res)
+ self.nodes = nodes
+ self.backfill_reservation = backfill_reservation
+ self.resm_rrs = []
+ self.susp_rrs = []
+
+ # ONLY for simulation
+ self.__update_prematureend()
+
+ def update_start(self, time):
+ self.start = time
+ # ONLY for simulation
+ self.__update_prematureend()
+
+ def update_end(self, time):
+ self.end = time
+ # ONLY for simulation
+ self.__update_prematureend()
+
+ # ONLY for simulation
+ def __update_prematureend(self):
+ if self.lease.duration.known != None:
+ remdur = self.lease.duration.get_remaining_known_duration()
+ rrdur = self.end - self.start
+ if remdur < rrdur:
+ self.prematureend = self.start + remdur
+ else:
+ self.prematureend = None
+ else:
+ self.prematureend = None
+
+ def is_suspending(self):
+ return len(self.susp_rrs) > 0
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ for resmrr in self.resm_rrs:
+ resmrr.print_contents(loglevel)
+ self.logger.log(loglevel, "--")
+ self.logger.log(loglevel, "Type : VM")
+ self.logger.log(loglevel, "Nodes : %s" % pretty_nodemap(self.nodes))
+ if self.prematureend != None:
+ self.logger.log(loglevel, "Premature end : %s" % self.prematureend)
+ ResourceReservation.print_contents(self, loglevel)
+ for susprr in self.susp_rrs:
+ self.logger.log(loglevel, "--")
+ susprr.print_contents(loglevel)
+
+ def is_preemptible(self):
+ return self.lease.preemptible
+
+ def xmlrpc_marshall(self):
+ rr = ResourceReservation.xmlrpc_marshall(self)
+ rr["type"] = "VM"
+ rr["nodes"] = self.nodes.items()
+ return rr
+
+
+class SuspensionResourceReservation(ResourceReservation):
+ def __init__(self, lease, start, end, res, vnodes, vmrr):
+ ResourceReservation.__init__(self, lease, start, end, res)
+ self.vmrr = vmrr
+ self.vnodes = vnodes
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ self.logger.log(loglevel, "Type : SUSPEND")
+ ResourceReservation.print_contents(self, loglevel)
+
+ def is_first(self):
+ return (self == self.vmrr.susp_rrs[0])
+
+ def is_last(self):
+ return (self == self.vmrr.susp_rrs[-1])
+
+ # TODO: Suspension RRs should be preemptible, but preempting a suspension RR
+ # has wider implications (with a non-trivial handling). For now, we leave them
+ # as non-preemptible, since the probability of preempting a suspension RR is slim.
+ def is_preemptible(self):
+ return False
+
+ def xmlrpc_marshall(self):
+ rr = ResourceReservation.xmlrpc_marshall(self)
+ rr["type"] = "SUSP"
+ return rr
+
+class ResumptionResourceReservation(ResourceReservation):
+ def __init__(self, lease, start, end, res, vnodes, vmrr):
+ ResourceReservation.__init__(self, lease, start, end, res)
+ self.vmrr = vmrr
+ self.vnodes = vnodes
+
+ def print_contents(self, loglevel=LOGLEVEL_VDEBUG):
+ ResourceReservation.print_contents(self, loglevel)
+ self.logger.log(loglevel, "Type : RESUME")
+
+ def is_first(self):
+ return (self == self.vmrr.resm_rrs[0])
+
+ def is_last(self):
+ return (self == self.vmrr.resm_rrs[-1])
+
+ # TODO: Resumption RRs should be preemptible, but preempting a resumption RR
+ # has wider implications (with a non-trivial handling). For now, we leave them
+ # as non-preemptible, since the probability of preempting a resumption RR is slim.
+ def is_preemptible(self):
+ return False
+
+ def xmlrpc_marshall(self):
+ rr = ResourceReservation.xmlrpc_marshall(self)
+ rr["type"] = "RESM"
+ return rr
+
+class MigrationResourceReservation(ResourceReservation):
+ def __init__(self, lease, start, end, res, vmrr):
+ ResourceReservation.__init__(self, lease, start, end, res)
+ self.vmrr = vmrr
+
+#-------------------------------------------------------------------#
+# #
+# LEASE CONTAINERS #
+# #
+#-------------------------------------------------------------------#
+
+class Queue(object):
+ def __init__(self, scheduler):
+ self.scheduler = scheduler
+ self.__q = []
+
+ def is_empty(self):
+ return len(self.__q)==0
+
+ def enqueue(self, r):
+ self.__q.append(r)
+
+ def dequeue(self):
+ return self.__q.pop(0)
+
+ def enqueue_in_order(self, r):
+ self.__q.append(r)
+ self.__q.sort(key=attrgetter("submit_time"))
+
+ def length(self):
+ return len(self.__q)
+
+ def has_lease(self, lease_id):
+ return (1 == len([l for l in self.__q if l.id == lease_id]))
+
+ def get_lease(self, lease_id):
+ return [l for l in self.__q if l.id == lease_id][0]
+
+ def remove_lease(self, lease):
+ self.__q.remove(lease)
+
+ def __iter__(self):
+ return iter(self.__q)
+
+class LeaseTable(object):
+ def __init__(self, scheduler):
+ self.scheduler = scheduler
+ self.entries = {}
+
+ def has_lease(self, lease_id):
+ return self.entries.has_key(lease_id)
+
+ def get_lease(self, lease_id):
+ return self.entries[lease_id]
+
+ def is_empty(self):
+ return len(self.entries)==0
+
+ def remove(self, lease):
+ del self.entries[lease.id]
+
+ def add(self, lease):
+ self.entries[lease.id] = lease
+
+ def get_leases(self, type=None):
+ if type==None:
+ return self.entries.values()
+ else:
+ return [e for e in self.entries.values() if isinstance(e, type)]
+
+ def get_leases_by_state(self, state):
+ return [e for e in self.entries.values() if e.state == state]
+
+#-------------------------------------------------------------------#
+# #
+# MISCELLANEOUS DATA STRUCTURES CONTAINERS #
+# #
+#-------------------------------------------------------------------#
+
+class ResourceTuple(object):
+ def __init__(self, res):
+ self._res = res
+
+ @classmethod
+ def from_list(cls, l):
+ return cls(l[:])
+
+ @classmethod
+ def copy(cls, rt):
+ return cls(rt._res[:])
+
+ @classmethod
+ def set_resource_types(cls, resourcetypes):
+ cls.type2pos = dict([(x[0], i) for i, x in enumerate(resourcetypes)])
+ cls.descriptions = dict([(i, x[2]) for i, x in enumerate(resourcetypes)])
+ cls.tuplelength = len(resourcetypes)
+
+ @classmethod
+ def create_empty(cls):
+ return cls([0 for x in range(cls.tuplelength)])
+
+ def fits_in(self, res2):
+ fits = True
+ for i in xrange(len(self._res)):
+ if self._res[i] > res2._res[i]:
+ fits = False
+ break
+ return fits
+
+ def get_num_fits_in(self, res2):
+ canfit = 10000 # Arbitrarily large
+ for i in xrange(len(self._res)):
+ if self._res[i] != 0:
+ f = res2._res[i] / self._res[i]
+ if f < canfit:
+ canfit = f
+ return int(floor(canfit))
+
+ def decr(self, res2):
+ for slottype in xrange(len(self._res)):
+ self._res[slottype] -= res2._res[slottype]
+
+ def incr(self, res2):
+ for slottype in xrange(len(self._res)):
+ self._res[slottype] += res2._res[slottype]
+
+ def get_by_type(self, resourcetype):
+ return self._res[self.type2pos[resourcetype]]
+
+ def set_by_type(self, resourcetype, value):
+ self._res[self.type2pos[resourcetype]] = value
+
+ def is_zero_or_less(self):
+ return sum([v for v in self._res]) <= 0
+
+ def __repr__(self):
+ r=""
+ for i, x in enumerate(self._res):
+ r += "%s:%.2f " % (self.descriptions[i], x)
+ return r
+
+class Timestamp(object):
+ def __init__(self, requested):
+ self.requested = requested
+ self.scheduled = None
+ self.actual = None
+
+ def __repr__(self):
+ return "REQ: %s | SCH: %s | ACT: %s" % (self.requested, self.scheduled, self.actual)
+
+class Duration(object):
+ def __init__(self, requested, known=None):
+ self.original = requested
+ self.requested = requested
+ self.accumulated = TimeDelta()
+ self.actual = None
+ # The following is ONLY used in simulation
+ self.known = known
+
+ def incr(self, t):
+ self.requested += t
+ if self.known != None:
+ self.known += t
+
+ def incr_by_percent(self, pct):
+ factor = 1 + float(pct)/100
+ self.requested = round_datetime_delta(self.requested * factor)
+ if self.known != None:
+ self.requested = round_datetime_delta(self.known * factor)
+
+ def accumulate_duration(self, t):
+ self.accumulated += t
+
+ def get_remaining_duration(self):
+ return self.requested - self.accumulated
+
+ # ONLY in simulation
+ def get_remaining_known_duration(self):
+ return self.known - self.accumulated
+
+ def __repr__(self):
+ return "REQ: %s | ACC: %s | ACT: %s | KNW: %s" % (self.requested, self.accumulated, self.actual, self.known)
+
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py
===================================================================
--- trunk/src/haizea/resourcemanager/deployment/imagetransfer.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,545 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-import haizea.common.constants as constants
-import haizea.resourcemanager.datastruct as ds
-from haizea.resourcemanager.deployment import DeploymentScheduler, DeploymentSchedException
-from haizea.resourcemanager.datastruct import ResourceReservation, Lease, ARLease, BestEffortLease
-from haizea.resourcemanager.scheduler import ReservationEventHandler
-from haizea.common.utils import estimate_transfer_time, get_config
-
-import copy
-
-class ImageTransferDeploymentScheduler(DeploymentScheduler):
- def __init__(self, slottable, resourcepool, deployment_enact):
- DeploymentScheduler.__init__(self, slottable, resourcepool, deployment_enact)
-
- # TODO: The following two should be merged into
- # something like this:
- # self.image_node = self.deployment_enact.get_image_node()
- self.fifo_node = self.deployment_enact.get_fifo_node()
- self.edf_node = self.deployment_enact.get_edf_node()
-
- self.transfers_edf = []
- self.transfers_fifo = []
- self.completed_transfers = []
-
- config = get_config()
- self.reusealg = config.get("diskimage-reuse")
- if self.reusealg == constants.REUSE_IMAGECACHES:
- self.maxcachesize = config.get("diskimage-cache-size")
- else:
- self.maxcachesize = None
-
- self.imagenode_bandwidth = self.deployment_enact.get_bandwidth()
-
- self.handlers ={}
- self.handlers[FileTransferResourceReservation] = ReservationEventHandler(
- on_start = ImageTransferDeploymentScheduler.handle_start_filetransfer,
- on_end = ImageTransferDeploymentScheduler.handle_end_filetransfer)
-
- def schedule(self, lease, vmrr, nexttime):
- if isinstance(lease, ARLease):
- self.schedule_for_ar(lease, vmrr, nexttime)
- elif isinstance(lease, BestEffortLease):
- self.schedule_for_besteffort(lease, vmrr, nexttime)
-
- def cancel_deployment(self, lease):
- if isinstance(lease, BestEffortLease):
- self.__remove_from_fifo_transfers(lease.id)
-
- def schedule_for_ar(self, lease, vmrr, nexttime):
- config = get_config()
- mechanism = config.get("transfer-mechanism")
- reusealg = config.get("diskimage-reuse")
- avoidredundant = config.get("avoid-redundant-transfers")
-
- lease.state = Lease.STATE_SCHEDULED
-
- if avoidredundant:
- pass # TODO
-
- musttransfer = {}
- mustpool = {}
- nodeassignment = vmrr.nodes
- start = lease.start.requested
- end = lease.start.requested + lease.duration.requested
- for (vnode, pnode) in nodeassignment.items():
- lease_id = lease.id
- self.logger.debug("Scheduling image transfer of '%s' from vnode %i to physnode %i" % (lease.diskimage_id, vnode, pnode))
-
- if reusealg == constants.REUSE_IMAGECACHES:
- if self.resourcepool.exists_reusable_image(pnode, lease.diskimage_id, start):
- self.logger.debug("No need to schedule an image transfer (reusing an image in pool)")
- mustpool[vnode] = pnode
- else:
- self.logger.debug("Need to schedule a transfer.")
- musttransfer[vnode] = pnode
- else:
- self.logger.debug("Need to schedule a transfer.")
- musttransfer[vnode] = pnode
-
- if len(musttransfer) == 0:
- lease.state = Lease.STATE_READY
- else:
- if mechanism == constants.TRANSFER_UNICAST:
- # Dictionary of transfer RRs. Key is the physical node where
- # the image is being transferred to
- transferRRs = {}
- for vnode, pnode in musttransfer:
- if transferRRs.has_key(pnode):
- # We've already scheduled a transfer to this node. Reuse it.
- self.logger.debug("No need to schedule an image transfer (reusing an existing transfer)")
- transferRR = transferRRs[pnode]
- transferRR.piggyback(lease_id, vnode, pnode, end)
- else:
- filetransfer = self.schedule_imagetransfer_edf(lease, {vnode:pnode}, nexttime)
- transferRRs[pnode] = filetransfer
- lease.appendRR(filetransfer)
- elif mechanism == constants.TRANSFER_MULTICAST:
- filetransfer = self.schedule_imagetransfer_edf(lease, musttransfer, nexttime)
- lease.append_rr(filetransfer)
-
- # No chance of scheduling exception at this point. It's safe
- # to add entries to the pools
- if reusealg == constants.REUSE_IMAGECACHES:
- for (vnode, pnode) in mustpool.items():
- self.resourcepool.add_mapping_to_existing_reusable_image(pnode, lease.diskimage_id, lease.id, vnode, start)
-
- def schedule_for_besteffort(self, lease, vmrr, nexttime):
- config = get_config()
- mechanism = config.get("transfer-mechanism")
- reusealg = config.get("diskimage-reuse")
- avoidredundant = config.get("avoid-redundant-transfers")
- earliest = self.find_earliest_starting_times(lease, nexttime)
- lease.state = Lease.STATE_SCHEDULED
- transferRRs = []
- musttransfer = {}
- piggybacking = []
- for (vnode, pnode) in vmrr.nodes.items():
- reqtransfer = earliest[pnode][1]
- if reqtransfer == constants.REQTRANSFER_COWPOOL:
- # Add to pool
- self.logger.debug("Reusing image for V%i->P%i." % (vnode, pnode))
- self.resourcepool.add_mapping_to_existing_reusable_image(pnode, lease.diskimage_id, lease.id, vnode, vmrr.end)
- elif reqtransfer == constants.REQTRANSFER_PIGGYBACK:
- # We can piggyback on an existing transfer
- transferRR = earliest[pnode][2]
- transferRR.piggyback(lease.id, vnode, pnode)
- self.logger.debug("Piggybacking transfer for V%i->P%i on existing transfer in lease %i." % (vnode, pnode, transferRR.lease.id))
- piggybacking.append(transferRR)
- else:
- # Transfer
- musttransfer[vnode] = pnode
- self.logger.debug("Must transfer V%i->P%i." % (vnode, pnode))
- if len(musttransfer)>0:
- transferRRs = self.schedule_imagetransfer_fifo(lease, musttransfer, nexttime)
- endtransfer = transferRRs[-1].end
- lease.imagesavail = endtransfer
- else:
- # TODO: Not strictly correct. Should mark the lease
- # as deployed when piggybacked transfers have concluded
- lease.state = Lease.STATE_READY
- if len(piggybacking) > 0:
- endtimes = [t.end for t in piggybacking]
- if len(musttransfer) > 0:
- endtimes.append(endtransfer)
- lease.imagesavail = max(endtimes)
- if len(musttransfer)==0 and len(piggybacking)==0:
- lease.state = Lease.STATE_READY
- lease.imagesavail = nexttime
- for rr in transferRRs:
- lease.append_rr(rr)
-
-
- def find_earliest_starting_times(self, lease_req, nexttime):
- nodIDs = [n.nod_id for n in self.resourcepool.get_nodes()]
- config = get_config()
- mechanism = config.get("transfer-mechanism")
- reusealg = config.get("diskimage-reuse")
- avoidredundant = config.get("avoid-redundant-transfers")
-
- # Figure out starting time assuming we have to transfer the image
- nextfifo = self.get_next_fifo_transfer_time(nexttime)
-
- imgTransferTime=self.estimate_image_transfer_time(lease_req, self.imagenode_bandwidth)
-
- # Find worst-case earliest start time
- if lease_req.numnodes == 1:
- startTime = nextfifo + imgTransferTime
- earliest = dict([(node, [startTime, constants.REQTRANSFER_YES]) for node in nodIDs])
- else:
- # Unlike the previous case, we may have to find a new start time
- # for all the nodes.
- if mechanism == constants.TRANSFER_UNICAST:
- pass
- # TODO: If transferring each image individually, this will
- # make determining what images can be reused more complicated.
- if mechanism == constants.TRANSFER_MULTICAST:
- startTime = nextfifo + imgTransferTime
- earliest = dict([(node, [startTime, constants.REQTRANSFER_YES]) for node in nodIDs]) # TODO: Take into account reusable images
-
- # Check if we can reuse images
- if reusealg==constants.REUSE_IMAGECACHES:
- nodeswithimg = self.resourcepool.get_nodes_with_reusable_image(lease_req.diskimage_id)
- for node in nodeswithimg:
- earliest[node] = [nexttime, constants.REQTRANSFER_COWPOOL]
-
-
- # Check if we can avoid redundant transfers
- if avoidredundant:
- if mechanism == constants.TRANSFER_UNICAST:
- pass
- # TODO
- if mechanism == constants.TRANSFER_MULTICAST:
- # We can only piggyback on transfers that haven't started yet
- transfers = [t for t in self.transfers_fifo if t.state == ResourceReservation.STATE_SCHEDULED]
- for t in transfers:
- if t.file == lease_req.diskImageID:
- startTime = t.end
- if startTime > nexttime:
- for n in earliest:
- if startTime < earliest[n]:
- earliest[n] = [startTime, constants.REQTRANSFER_PIGGYBACK, t]
-
- return earliest
-
- def schedule_imagetransfer_edf(self, req, vnodes, nexttime):
- # Estimate image transfer time
- bandwidth = self.deployment_enact.get_bandwidth()
- imgTransferTime=self.estimate_image_transfer_time(req, bandwidth)
-
- # Determine start time
- activetransfers = [t for t in self.transfers_edf if t.state == ResourceReservation.STATE_ACTIVE]
- if len(activetransfers) > 0:
- startTime = activetransfers[-1].end
- else:
- startTime = nexttime
-
- transfermap = dict([(copy.copy(t), t) for t in self.transfers_edf if t.state == ResourceReservation.STATE_SCHEDULED])
- newtransfers = transfermap.keys()
-
- res = {}
- resimgnode = ds.ResourceTuple.create_empty()
- resimgnode.set_by_type(constants.RES_NETOUT, bandwidth)
- resnode = ds.ResourceTuple.create_empty()
- resnode.set_by_type(constants.RES_NETIN, bandwidth)
- res[self.edf_node.nod_id] = resimgnode
- for n in vnodes.values():
- res[n] = resnode
-
- newtransfer = FileTransferResourceReservation(req, res)
- newtransfer.deadline = req.start.requested
- newtransfer.state = ResourceReservation.STATE_SCHEDULED
- newtransfer.file = req.diskimage_id
- for vnode, pnode in vnodes.items():
- newtransfer.piggyback(req.id, vnode, pnode)
- newtransfers.append(newtransfer)
-
- def comparedates(x, y):
- dx=x.deadline
- dy=y.deadline
- if dx>dy:
- return 1
- elif dx==dy:
- # If deadlines are equal, we break the tie by order of arrival
- # (currently, we just check if this is the new transfer)
- if x == newtransfer:
- return 1
- elif y == newtransfer:
- return -1
- else:
- return 0
- else:
- return -1
-
- # Order transfers by deadline
- newtransfers.sort(comparedates)
-
- # Compute start times and make sure that deadlines are met
- fits = True
- for t in newtransfers:
- if t == newtransfer:
- duration = imgTransferTime
- else:
- duration = t.end - t.start
-
- t.start = startTime
- t.end = startTime + duration
- if t.end > t.deadline:
- fits = False
- break
- startTime = t.end
-
- if not fits:
- raise DeploymentSchedException, "Adding this VW results in an unfeasible image transfer schedule."
-
- # Push image transfers as close as possible to their deadlines.
- feasibleEndTime=newtransfers[-1].deadline
- for t in reversed(newtransfers):
- if t == newtransfer:
- duration = imgTransferTime
- else:
- duration = t.end - t.start
-
- newEndTime=min([t.deadline, feasibleEndTime])
- t.end=newEndTime
- newStartTime=newEndTime-duration
- t.start=newStartTime
- feasibleEndTime=newStartTime
-
- # Make changes
- for t in newtransfers:
- if t == newtransfer:
- self.slottable.addReservation(t)
- self.transfers_edf.append(t)
- else:
- tOld = transfermap[t]
- self.transfers_edf.remove(tOld)
- self.transfers_edf.append(t)
- self.slottable.updateReservationWithKeyChange(tOld, t)
-
- return newtransfer
-
- def schedule_imagetransfer_fifo(self, req, reqtransfers, nexttime):
- # Estimate image transfer time
- bandwidth = self.imagenode_bandwidth
- imgTransferTime=self.estimate_image_transfer_time(req, bandwidth)
- config = get_config()
- mechanism = config.get("transfer-mechanism")
- startTime = self.get_next_fifo_transfer_time(nexttime)
-
- newtransfers = []
-
- if mechanism == constants.TRANSFER_UNICAST:
- pass
- # TODO: If transferring each image individually, this will
- # make determining what images can be reused more complicated.
- if mechanism == constants.TRANSFER_MULTICAST:
- # Time to transfer is imagesize / bandwidth, regardless of
- # number of nodes
- res = {}
- resimgnode = ds.ResourceTuple.create_empty()
- resimgnode.set_by_type(constants.RES_NETOUT, bandwidth)
- resnode = ds.ResourceTuple.create_empty()
- resnode.set_by_type(constants.RES_NETIN, bandwidth)
- res[self.fifo_node.nod_id] = resimgnode
- for n in reqtransfers.values():
- res[n] = resnode
- newtransfer = FileTransferResourceReservation(req, res)
- newtransfer.start = startTime
- newtransfer.end = startTime+imgTransferTime
- newtransfer.deadline = None
- newtransfer.state = ResourceReservation.STATE_SCHEDULED
- newtransfer.file = req.diskimage_id
- for vnode in reqtransfers:
- physnode = reqtransfers[vnode]
- newtransfer.piggyback(req.id, vnode, physnode)
- self.slottable.addReservation(newtransfer)
- newtransfers.append(newtransfer)
-
- self.transfers_fifo += newtransfers
-
- return newtransfers
-
- def estimate_image_transfer_time(self, lease, bandwidth):
- from haizea.resourcemanager.rm import ResourceManager
- config = ResourceManager.get_singleton().config
- forceTransferTime = config.get("force-imagetransfer-time")
- if forceTransferTime != None:
- return forceTransferTime
- else:
- return estimate_transfer_time(lease.diskimage_size, bandwidth)
-
- def get_next_fifo_transfer_time(self, nexttime):
- transfers = [t for t in self.transfers_fifo if t.state != ResourceReservation.STATE_DONE]
- if len(transfers) > 0:
- startTime = transfers[-1].end
- else:
- startTime = nexttime
- return startTime
-
- def __remove_from_fifo_transfers(self, lease_id):
- transfers = [t for t in self.transfers_fifo if t.state != ResourceReservation.STATE_DONE]
- toremove = []
- for t in transfers:
- for pnode in t.transfers:
- leases = [l for l, v in t.transfers[pnode]]
- if lease_id in leases:
- newtransfers = [(l, v) for l, v in t.transfers[pnode] if l!=lease_id]
- t.transfers[pnode] = newtransfers
- # Check if the transfer has to be cancelled
- a = sum([len(l) for l in t.transfers.values()])
- if a == 0:
- t.lease.removeRR(t)
- self.slottable.removeReservation(t)
- toremove.append(t)
- for t in toremove:
- self.transfers_fifo.remove(t)
-
- @staticmethod
- def handle_start_filetransfer(sched, lease, rr):
- sched.rm.logger.debug("LEASE-%i Start of handleStartFileTransfer" % lease.id)
- lease.print_contents()
- if lease.state == Lease.STATE_SCHEDULED or lease.state == Lease.STATE_READY:
- lease.state = Lease.STATE_PREPARING
- rr.state = ResourceReservation.STATE_ACTIVE
- # TODO: Enactment
- lease.print_contents()
- sched.logger.debug("LEASE-%i End of handleStartFileTransfer" % lease.id)
- sched.logger.info("Starting image transfer for lease %i" % (lease.id))
-
- @staticmethod
- def handle_end_filetransfer(sched, lease, rr):
- sched.rm.logger.debug("LEASE-%i Start of handleEndFileTransfer" % lease.id)
- lease.print_contents()
- if lease.state == Lease.STATE_PREPARING:
- lease.state = Lease.STATE_READY
- rr.state = ResourceReservation.STATE_DONE
- for physnode in rr.transfers:
- vnodes = rr.transfers[physnode]
-
- # Update VM Image maps
- for lease_id, v in vnodes:
- lease = sched.scheduledleases.get_lease(lease_id)
- lease.vmimagemap[v] = physnode
-
- # Find out timeout of image. It will be the latest end time of all the
- # leases being used by that image.
- leases = [l for (l, v) in vnodes]
- maxend=None
- for lease_id in leases:
- l = sched.scheduledleases.get_lease(lease_id)
- end = lease.get_endtime()
- if maxend==None or end>maxend:
- maxend=end
- # TODO: ENACTMENT: Verify the image was transferred correctly
- sched.deployment.add_diskimages(physnode, rr.file, lease.diskimage_size, vnodes, timeout=maxend)
-
- lease.print_contents()
- sched.rm.logger.debug("LEASE-%i End of handleEndFileTransfer" % lease.id)
- sched.rm.logger.info("Completed image transfer for lease %i" % (lease.id))
-
- def add_diskimages(self, pnode_id, diskimage_id, diskimage_size, vnodes, timeout):
- self.logger.debug("Adding image for leases=%s in nod_id=%i" % (vnodes, pnode_id))
-
- pnode = self.resourcepool.get_node(pnode_id)
-
- if self.reusealg == constants.REUSE_NONE:
- for (lease_id, vnode) in vnodes:
- self.resourcepool.add_diskimage(pnode_id, diskimage_id, diskimage_size, lease_id, vnode)
- elif self.reusealg == constants.REUSE_IMAGECACHES:
- # Sometimes we might find that the image is already deployed
- # (although unused). In that case, don't add another copy to
- # the pool. Just "reactivate" it.
- if pnode.exists_reusable_image(diskimage_id):
- for (lease_id, vnode) in vnodes:
- pnode.add_mapping_to_existing_reusable_image(diskimage_id, lease_id, vnode, timeout)
- else:
- if self.maxcachesize == constants.CACHESIZE_UNLIMITED:
- can_add_to_cache = True
- else:
- # We may have to remove images from the cache
- cachesize = pnode.get_reusable_images_size()
- reqsize = cachesize + diskimage_size
- if reqsize > self.maxcachesize:
- # Have to shrink cache
- desiredsize = self.maxcachesize - diskimage_size
- self.logger.debug("Adding the image would make the size of pool in node %i = %iMB. Will try to bring it down to %i" % (pnode_id, reqsize, desiredsize))
- pnode.print_files()
- success = pnode.purge_downto(self.maxcachesize)
- if not success:
- can_add_to_cache = False
- else:
- can_add_to_cache = True
- else:
- can_add_to_cache = True
-
- if can_add_to_cache:
- self.resourcepool.add_reusable_image(pnode_id, diskimage_id, diskimage_size, vnodes, timeout)
- else:
- # This just means we couldn't add the image
- # to the pool. We will have to create disk images to be used
- # only by these leases
- self.logger.debug("Unable to add to pool. Must create individual disk images instead.")
- for (lease_id, vnode) in vnodes:
- self.resourcepool.add_diskimage(pnode_id, diskimage_id, diskimage_size, lease_id, vnode)
-
- pnode.print_files()
-
-
- def check(self, lease, vmrr):
- # Check that all the required disk images are available.
- # Note that it is the enactment module's responsibility to
- # mark an image as correctly deployed. The check we do here
- # is (1) to catch scheduling errors (i.e., the image transfer
- # was not scheduled) and (2) to create disk images if
- # we can reuse a reusable image in the node'.
- # TODO: However, we're assuming CoW, which means the enactment
- # must support it too. If we can't assume CoW, we would have to
- # make a copy of the master image (which takes time), and should
- # be scheduled.
-
- for (vnode, pnode_id) in vmrr.nodes.items():
- pnode = self.resourcepool.get_node(pnode_id)
-
- diskimage = pnode.get_diskimage(lease.id, vnode, lease.diskimage_id)
- if self.reusealg == constants.REUSE_NONE:
- if diskimage == None:
- raise Exception, "ERROR: No image for L%iV%i is on node %i" % (lease.id, vnode, pnode)
- elif self.reusealg == constants.REUSE_IMAGECACHES:
- reusable_image = pnode.get_reusable_image(lease.diskimage_id, lease_id=lease.id, vnode=vnode)
- if reusable_image == None:
- # Not necessarily an error. Maybe the pool was full, and
- # we had to fall back on creating a tainted image right
- # when the image was transferred. We have to check this.
- if diskimage == None:
- raise Exception, "ERROR: Image for L%iV%i is not in pool on node %i, and there is no tainted image" % (lease.id, vnode, pnode_id)
- else:
- # Create tainted image
- self.resourcepool.add_diskimage(pnode_id, lease.diskimage_id, lease.diskimage_size, lease.id, vnode)
- # ENACTMENT
- # self.storage.createCopyFromCache(pnode, lease.diskImageSize)
-
- def cleanup(self, lease, vmrr):
- for vnode, pnode in lease.vmimagemap.items():
- self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
-
-class FileTransferResourceReservation(ResourceReservation):
- def __init__(self, lease, res, start=None, end=None):
- ResourceReservation.__init__(self, lease, start, end, res)
- self.deadline = None
- self.file = None
- # Dictionary of physnode -> [ (lease_id, vnode)* ]
- self.transfers = {}
-
- def print_contents(self, loglevel="VDEBUG"):
- ResourceReservation.print_contents(self, loglevel)
- self.logger.log(loglevel, "Type : FILE TRANSFER")
- self.logger.log(loglevel, "Deadline : %s" % self.deadline)
- self.logger.log(loglevel, "File : %s" % self.file)
- self.logger.log(loglevel, "Transfers : %s" % self.transfers)
-
- def piggyback(self, lease_id, vnode, physnode):
- if self.transfers.has_key(physnode):
- self.transfers[physnode].append((lease_id, vnode))
- else:
- self.transfers[physnode] = [(lease_id, vnode)]
-
- def is_preemptible(self):
- return False
\ No newline at end of file
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py (from rev 532, trunk/src/haizea/resourcemanager/deployment/imagetransfer.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/deployment/imagetransfer.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,551 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+import haizea.common.constants as constants
+import haizea.resourcemanager.datastruct as ds
+from haizea.resourcemanager.deployment import DeploymentScheduler, DeploymentSchedException
+from haizea.resourcemanager.datastruct import ResourceReservation, Lease, ARLease, BestEffortLease
+from haizea.resourcemanager.scheduler import ReservationEventHandler
+from haizea.common.utils import estimate_transfer_time, get_config
+
+import copy
+
+class ImageTransferDeploymentScheduler(DeploymentScheduler):
+ def __init__(self, slottable, resourcepool, deployment_enact):
+ DeploymentScheduler.__init__(self, slottable, resourcepool, deployment_enact)
+
+ # TODO: The following two should be merged into
+ # something like this:
+ # self.image_node = self.deployment_enact.get_image_node()
+ self.fifo_node = self.deployment_enact.get_fifo_node()
+ self.edf_node = self.deployment_enact.get_edf_node()
+
+ self.transfers_edf = []
+ self.transfers_fifo = []
+ self.completed_transfers = []
+
+ config = get_config()
+ self.reusealg = config.get("diskimage-reuse")
+ if self.reusealg == constants.REUSE_IMAGECACHES:
+ self.maxcachesize = config.get("diskimage-cache-size")
+ else:
+ self.maxcachesize = None
+
+ self.imagenode_bandwidth = self.deployment_enact.get_bandwidth()
+
+ self.handlers ={}
+ self.handlers[FileTransferResourceReservation] = ReservationEventHandler(
+ on_start = ImageTransferDeploymentScheduler.handle_start_filetransfer,
+ on_end = ImageTransferDeploymentScheduler.handle_end_filetransfer)
+
+ def schedule(self, lease, vmrr, nexttime):
+ if isinstance(lease, ARLease):
+ self.schedule_for_ar(lease, vmrr, nexttime)
+ elif isinstance(lease, BestEffortLease):
+ self.schedule_for_besteffort(lease, vmrr, nexttime)
+
+ def cancel_deployment(self, lease):
+ if isinstance(lease, BestEffortLease):
+ self.__remove_from_fifo_transfers(lease.id)
+
+ def schedule_for_ar(self, lease, vmrr, nexttime):
+ config = get_config()
+ mechanism = config.get("transfer-mechanism")
+ reusealg = config.get("diskimage-reuse")
+ avoidredundant = config.get("avoid-redundant-transfers")
+
+ lease.state = Lease.STATE_SCHEDULED
+
+ if avoidredundant:
+ pass # TODO
+
+ musttransfer = {}
+ mustpool = {}
+ nodeassignment = vmrr.nodes
+ start = lease.start.requested
+ end = lease.start.requested + lease.duration.requested
+ for (vnode, pnode) in nodeassignment.items():
+ lease_id = lease.id
+ self.logger.debug("Scheduling image transfer of '%s' from vnode %i to physnode %i" % (lease.diskimage_id, vnode, pnode))
+
+ if reusealg == constants.REUSE_IMAGECACHES:
+ if self.resourcepool.exists_reusable_image(pnode, lease.diskimage_id, start):
+ self.logger.debug("No need to schedule an image transfer (reusing an image in pool)")
+ mustpool[vnode] = pnode
+ else:
+ self.logger.debug("Need to schedule a transfer.")
+ musttransfer[vnode] = pnode
+ else:
+ self.logger.debug("Need to schedule a transfer.")
+ musttransfer[vnode] = pnode
+
+ if len(musttransfer) == 0:
+ lease.state = Lease.STATE_READY
+ else:
+ if mechanism == constants.TRANSFER_UNICAST:
+ # Dictionary of transfer RRs. Key is the physical node where
+ # the image is being transferred to
+ transferRRs = {}
+ for vnode, pnode in musttransfer:
+ if transferRRs.has_key(pnode):
+ # We've already scheduled a transfer to this node. Reuse it.
+ self.logger.debug("No need to schedule an image transfer (reusing an existing transfer)")
+ transferRR = transferRRs[pnode]
+ transferRR.piggyback(lease_id, vnode, pnode, end)
+ else:
+ filetransfer = self.schedule_imagetransfer_edf(lease, {vnode:pnode}, nexttime)
+ transferRRs[pnode] = filetransfer
+ lease.appendRR(filetransfer)
+ elif mechanism == constants.TRANSFER_MULTICAST:
+ try:
+ filetransfer = self.schedule_imagetransfer_edf(lease, musttransfer, nexttime)
+ lease.append_deployrr(filetransfer)
+ except DeploymentSchedException, msg:
+ raise
+
+ # No chance of scheduling exception at this point. It's safe
+ # to add entries to the pools
+ if reusealg == constants.REUSE_IMAGECACHES:
+ for (vnode, pnode) in mustpool.items():
+ self.resourcepool.add_mapping_to_existing_reusable_image(pnode, lease.diskimage_id, lease.id, vnode, start)
+
+ def schedule_for_besteffort(self, lease, vmrr, nexttime):
+ config = get_config()
+ mechanism = config.get("transfer-mechanism")
+ reusealg = config.get("diskimage-reuse")
+ avoidredundant = config.get("avoid-redundant-transfers")
+ earliest = self.find_earliest_starting_times(lease, nexttime)
+ lease.state = Lease.STATE_SCHEDULED
+ transferRRs = []
+ musttransfer = {}
+ piggybacking = []
+ for (vnode, pnode) in vmrr.nodes.items():
+ reqtransfer = earliest[pnode][1]
+ if reqtransfer == constants.REQTRANSFER_COWPOOL:
+ # Add to pool
+ self.logger.debug("Reusing image for V%i->P%i." % (vnode, pnode))
+ self.resourcepool.add_mapping_to_existing_reusable_image(pnode, lease.diskimage_id, lease.id, vnode, vmrr.end)
+ elif reqtransfer == constants.REQTRANSFER_PIGGYBACK:
+ # We can piggyback on an existing transfer
+ transferRR = earliest[pnode][2]
+ transferRR.piggyback(lease.id, vnode, pnode)
+ self.logger.debug("Piggybacking transfer for V%i->P%i on existing transfer in lease %i." % (vnode, pnode, transferRR.lease.id))
+ piggybacking.append(transferRR)
+ else:
+ # Transfer
+ musttransfer[vnode] = pnode
+ self.logger.debug("Must transfer V%i->P%i." % (vnode, pnode))
+ if len(musttransfer)>0:
+ transferRRs = self.schedule_imagetransfer_fifo(lease, musttransfer, nexttime)
+ endtransfer = transferRRs[-1].end
+ lease.imagesavail = endtransfer
+ else:
+ # TODO: Not strictly correct. Should mark the lease
+ # as deployed when piggybacked transfers have concluded
+ lease.state = Lease.STATE_READY
+ if len(piggybacking) > 0:
+ endtimes = [t.end for t in piggybacking]
+ if len(musttransfer) > 0:
+ endtimes.append(endtransfer)
+ lease.imagesavail = max(endtimes)
+ if len(musttransfer)==0 and len(piggybacking)==0:
+ lease.state = Lease.STATE_READY
+ lease.imagesavail = nexttime
+ for rr in transferRRs:
+ lease.append_deployrr(rr)
+
+
+ def find_earliest_starting_times(self, lease_req, nexttime):
+ nodIDs = [n.nod_id for n in self.resourcepool.get_nodes()]
+ config = get_config()
+ mechanism = config.get("transfer-mechanism")
+ reusealg = config.get("diskimage-reuse")
+ avoidredundant = config.get("avoid-redundant-transfers")
+
+ # Figure out starting time assuming we have to transfer the image
+ nextfifo = self.get_next_fifo_transfer_time(nexttime)
+
+ imgTransferTime=self.estimate_image_transfer_time(lease_req, self.imagenode_bandwidth)
+
+ # Find worst-case earliest start time
+ if lease_req.numnodes == 1:
+ startTime = nextfifo + imgTransferTime
+ earliest = dict([(node, [startTime, constants.REQTRANSFER_YES]) for node in nodIDs])
+ else:
+ # Unlike the previous case, we may have to find a new start time
+ # for all the nodes.
+ if mechanism == constants.TRANSFER_UNICAST:
+ pass
+ # TODO: If transferring each image individually, this will
+ # make determining what images can be reused more complicated.
+ if mechanism == constants.TRANSFER_MULTICAST:
+ startTime = nextfifo + imgTransferTime
+ earliest = dict([(node, [startTime, constants.REQTRANSFER_YES]) for node in nodIDs]) # TODO: Take into account reusable images
+
+ # Check if we can reuse images
+ if reusealg==constants.REUSE_IMAGECACHES:
+ nodeswithimg = self.resourcepool.get_nodes_with_reusable_image(lease_req.diskimage_id)
+ for node in nodeswithimg:
+ earliest[node] = [nexttime, constants.REQTRANSFER_COWPOOL]
+
+
+ # Check if we can avoid redundant transfers
+ if avoidredundant:
+ if mechanism == constants.TRANSFER_UNICAST:
+ pass
+ # TODO
+ if mechanism == constants.TRANSFER_MULTICAST:
+ # We can only piggyback on transfers that haven't started yet
+ transfers = [t for t in self.transfers_fifo if t.state == ResourceReservation.STATE_SCHEDULED]
+ for t in transfers:
+ if t.file == lease_req.diskimage_id:
+ startTime = t.end
+ if startTime > nexttime:
+ for n in earliest:
+ if startTime < earliest[n]:
+ earliest[n] = [startTime, constants.REQTRANSFER_PIGGYBACK, t]
+
+ return earliest
+
+ def schedule_imagetransfer_edf(self, req, vnodes, nexttime):
+ # Estimate image transfer time
+ bandwidth = self.deployment_enact.get_bandwidth()
+ imgTransferTime=self.estimate_image_transfer_time(req, bandwidth)
+
+ # Determine start time
+ activetransfers = [t for t in self.transfers_edf if t.state == ResourceReservation.STATE_ACTIVE]
+ if len(activetransfers) > 0:
+ startTime = activetransfers[-1].end
+ else:
+ startTime = nexttime
+
+ # TODO: Only save a copy of start/end times, not the whole RR
+ transfermap = dict([(copy.copy(t), t) for t in self.transfers_edf if t.state == ResourceReservation.STATE_SCHEDULED])
+ newtransfers = transfermap.keys()
+
+ res = {}
+ resimgnode = ds.ResourceTuple.create_empty()
+ resimgnode.set_by_type(constants.RES_NETOUT, bandwidth)
+ resnode = ds.ResourceTuple.create_empty()
+ resnode.set_by_type(constants.RES_NETIN, bandwidth)
+ res[self.edf_node.nod_id] = resimgnode
+ for n in vnodes.values():
+ res[n] = resnode
+
+ newtransfer = FileTransferResourceReservation(req, res)
+ newtransfer.deadline = req.start.requested
+ newtransfer.state = ResourceReservation.STATE_SCHEDULED
+ newtransfer.file = req.diskimage_id
+ for vnode, pnode in vnodes.items():
+ newtransfer.piggyback(req.id, vnode, pnode)
+ newtransfers.append(newtransfer)
+
+ def comparedates(x, y):
+ dx=x.deadline
+ dy=y.deadline
+ if dx>dy:
+ return 1
+ elif dx==dy:
+ # If deadlines are equal, we break the tie by order of arrival
+ # (currently, we just check if this is the new transfer)
+ if x == newtransfer:
+ return 1
+ elif y == newtransfer:
+ return -1
+ else:
+ return 0
+ else:
+ return -1
+
+ # Order transfers by deadline
+ newtransfers.sort(comparedates)
+
+ # Compute start times and make sure that deadlines are met
+ fits = True
+ for t in newtransfers:
+ if t == newtransfer:
+ duration = imgTransferTime
+ else:
+ duration = t.end - t.start
+
+ t.start = startTime
+ t.end = startTime + duration
+ if t.end > t.deadline:
+ fits = False
+ break
+ startTime = t.end
+
+ if not fits:
+ raise DeploymentSchedException, "Adding this lease results in an unfeasible image transfer schedule."
+
+ # Push image transfers as close as possible to their deadlines.
+ feasibleEndTime=newtransfers[-1].deadline
+ for t in reversed(newtransfers):
+ if t == newtransfer:
+ duration = imgTransferTime
+ else:
+ duration = t.end - t.start
+
+ newEndTime=min([t.deadline, feasibleEndTime])
+ t.end=newEndTime
+ newStartTime=newEndTime-duration
+ t.start=newStartTime
+ feasibleEndTime=newStartTime
+
+ # Make changes
+ for new_t in newtransfers:
+ if new_t == newtransfer:
+ self.slottable.addReservation(new_t)
+ self.transfers_edf.append(new_t)
+ else:
+ t_original = transfermap[new_t]
+ old_start = t_original.start
+ old_end = t_original.end
+ t_original.start = new_t.start
+ t_original.end = new_t.end
+ self.slottable.update_reservation_with_key_change(t_original, old_start, old_end)
+
+ return newtransfer
+
+ def schedule_imagetransfer_fifo(self, req, reqtransfers, nexttime):
+ # Estimate image transfer time
+ bandwidth = self.imagenode_bandwidth
+ imgTransferTime=self.estimate_image_transfer_time(req, bandwidth)
+ config = get_config()
+ mechanism = config.get("transfer-mechanism")
+ startTime = self.get_next_fifo_transfer_time(nexttime)
+
+ newtransfers = []
+
+ if mechanism == constants.TRANSFER_UNICAST:
+ pass
+ # TODO: If transferring each image individually, this will
+ # make determining what images can be reused more complicated.
+ if mechanism == constants.TRANSFER_MULTICAST:
+ # Time to transfer is imagesize / bandwidth, regardless of
+ # number of nodes
+ res = {}
+ resimgnode = ds.ResourceTuple.create_empty()
+ resimgnode.set_by_type(constants.RES_NETOUT, bandwidth)
+ resnode = ds.ResourceTuple.create_empty()
+ resnode.set_by_type(constants.RES_NETIN, bandwidth)
+ res[self.fifo_node.nod_id] = resimgnode
+ for n in reqtransfers.values():
+ res[n] = resnode
+ newtransfer = FileTransferResourceReservation(req, res)
+ newtransfer.start = startTime
+ newtransfer.end = startTime+imgTransferTime
+ newtransfer.deadline = None
+ newtransfer.state = ResourceReservation.STATE_SCHEDULED
+ newtransfer.file = req.diskimage_id
+ for vnode in reqtransfers:
+ physnode = reqtransfers[vnode]
+ newtransfer.piggyback(req.id, vnode, physnode)
+ self.slottable.addReservation(newtransfer)
+ newtransfers.append(newtransfer)
+
+ self.transfers_fifo += newtransfers
+
+ return newtransfers
+
+ def estimate_image_transfer_time(self, lease, bandwidth):
+ from haizea.resourcemanager.rm import ResourceManager
+ config = ResourceManager.get_singleton().config
+ forceTransferTime = config.get("force-imagetransfer-time")
+ if forceTransferTime != None:
+ return forceTransferTime
+ else:
+ return estimate_transfer_time(lease.diskimage_size, bandwidth)
+
+ def get_next_fifo_transfer_time(self, nexttime):
+ transfers = [t for t in self.transfers_fifo if t.state != ResourceReservation.STATE_DONE]
+ if len(transfers) > 0:
+ startTime = transfers[-1].end
+ else:
+ startTime = nexttime
+ return startTime
+
+ def __remove_from_fifo_transfers(self, lease_id):
+ transfers = [t for t in self.transfers_fifo if t.state != ResourceReservation.STATE_DONE]
+ toremove = []
+ for t in transfers:
+ for pnode in t.transfers:
+ leases = [l for l, v in t.transfers[pnode]]
+ if lease_id in leases:
+ newtransfers = [(l, v) for l, v in t.transfers[pnode] if l!=lease_id]
+ t.transfers[pnode] = newtransfers
+ # Check if the transfer has to be cancelled
+ a = sum([len(l) for l in t.transfers.values()])
+ if a == 0:
+ t.lease.removeRR(t)
+ self.slottable.removeReservation(t)
+ toremove.append(t)
+ for t in toremove:
+ self.transfers_fifo.remove(t)
+
+ @staticmethod
+ def handle_start_filetransfer(sched, lease, rr):
+ sched.logger.debug("LEASE-%i Start of handleStartFileTransfer" % lease.id)
+ lease.print_contents()
+ if lease.state == Lease.STATE_SCHEDULED or lease.state == Lease.STATE_READY:
+ lease.state = Lease.STATE_PREPARING
+ rr.state = ResourceReservation.STATE_ACTIVE
+ # TODO: Enactment
+ lease.print_contents()
+ sched.logger.debug("LEASE-%i End of handleStartFileTransfer" % lease.id)
+ sched.logger.info("Starting image transfer for lease %i" % (lease.id))
+
+ @staticmethod
+ def handle_end_filetransfer(sched, lease, rr):
+ sched.logger.debug("LEASE-%i Start of handleEndFileTransfer" % lease.id)
+ lease.print_contents()
+ if lease.state == Lease.STATE_PREPARING:
+ lease.state = Lease.STATE_READY
+ rr.state = ResourceReservation.STATE_DONE
+ for physnode in rr.transfers:
+ vnodes = rr.transfers[physnode]
+
+ # Update VM Image maps
+ for lease_id, v in vnodes:
+ lease = sched.leases.get_lease(lease_id)
+ lease.diskimagemap[v] = physnode
+
+ # Find out timeout of image. It will be the latest end time of all the
+ # leases being used by that image.
+ leases = [l for (l, v) in vnodes]
+ maxend=None
+ for lease_id in leases:
+ l = sched.leases.get_lease(lease_id)
+ end = lease.get_endtime()
+ if maxend==None or end>maxend:
+ maxend=end
+ # TODO: ENACTMENT: Verify the image was transferred correctly
+ sched.deployment_scheduler.add_diskimages(physnode, rr.file, lease.diskimage_size, vnodes, timeout=maxend)
+
+ lease.print_contents()
+ sched.logger.debug("LEASE-%i End of handleEndFileTransfer" % lease.id)
+ sched.logger.info("Completed image transfer for lease %i" % (lease.id))
+
+ def add_diskimages(self, pnode_id, diskimage_id, diskimage_size, vnodes, timeout):
+ self.logger.debug("Adding image for leases=%s in nod_id=%i" % (vnodes, pnode_id))
+
+ pnode = self.resourcepool.get_node(pnode_id)
+
+ if self.reusealg == constants.REUSE_NONE:
+ for (lease_id, vnode) in vnodes:
+ self.resourcepool.add_diskimage(pnode_id, diskimage_id, diskimage_size, lease_id, vnode)
+ elif self.reusealg == constants.REUSE_IMAGECACHES:
+ # Sometimes we might find that the image is already deployed
+ # (although unused). In that case, don't add another copy to
+ # the pool. Just "reactivate" it.
+ if pnode.exists_reusable_image(diskimage_id):
+ for (lease_id, vnode) in vnodes:
+ pnode.add_mapping_to_existing_reusable_image(diskimage_id, lease_id, vnode, timeout)
+ else:
+ if self.maxcachesize == constants.CACHESIZE_UNLIMITED:
+ can_add_to_cache = True
+ else:
+ # We may have to remove images from the cache
+ cachesize = pnode.get_reusable_images_size()
+ reqsize = cachesize + diskimage_size
+ if reqsize > self.maxcachesize:
+ # Have to shrink cache
+ desiredsize = self.maxcachesize - diskimage_size
+ self.logger.debug("Adding the image would make the size of pool in node %i = %iMB. Will try to bring it down to %i" % (pnode_id, reqsize, desiredsize))
+ pnode.print_files()
+ success = pnode.purge_downto(self.maxcachesize)
+ if not success:
+ can_add_to_cache = False
+ else:
+ can_add_to_cache = True
+ else:
+ can_add_to_cache = True
+
+ if can_add_to_cache:
+ self.resourcepool.add_reusable_image(pnode_id, diskimage_id, diskimage_size, vnodes, timeout)
+ else:
+ # This just means we couldn't add the image
+ # to the pool. We will have to create disk images to be used
+ # only by these leases
+ self.logger.debug("Unable to add to pool. Must create individual disk images instead.")
+ for (lease_id, vnode) in vnodes:
+ self.resourcepool.add_diskimage(pnode_id, diskimage_id, diskimage_size, lease_id, vnode)
+
+ pnode.print_files()
+
+
+ def check(self, lease, vmrr):
+ # Check that all the required disk images are available.
+ # Note that it is the enactment module's responsibility to
+ # mark an image as correctly deployed. The check we do here
+ # is (1) to catch scheduling errors (i.e., the image transfer
+ # was not scheduled) and (2) to create disk images if
+ # we can reuse a reusable image in the node'.
+ # TODO: However, we're assuming CoW, which means the enactment
+ # must support it too. If we can't assume CoW, we would have to
+ # make a copy of the master image (which takes time), and should
+ # be scheduled.
+
+ for (vnode, pnode_id) in vmrr.nodes.items():
+ pnode = self.resourcepool.get_node(pnode_id)
+
+ diskimage = pnode.get_diskimage(lease.id, vnode, lease.diskimage_id)
+ if self.reusealg == constants.REUSE_NONE:
+ if diskimage == None:
+ raise Exception, "ERROR: No image for L%iV%i is on node %i" % (lease.id, vnode, pnode)
+ elif self.reusealg == constants.REUSE_IMAGECACHES:
+ reusable_image = pnode.get_reusable_image(lease.diskimage_id, lease_id=lease.id, vnode=vnode)
+ if reusable_image == None:
+ # Not necessarily an error. Maybe the pool was full, and
+ # we had to fall back on creating a tainted image right
+ # when the image was transferred. We have to check this.
+ if diskimage == None:
+ raise Exception, "ERROR: Image for L%iV%i is not in pool on node %i, and there is no tainted image" % (lease.id, vnode, pnode_id)
+ else:
+ # Create tainted image
+ self.resourcepool.add_diskimage(pnode_id, lease.diskimage_id, lease.diskimage_size, lease.id, vnode)
+ # ENACTMENT
+ # self.storage.createCopyFromCache(pnode, lease.diskImageSize)
+
+ def cleanup(self, lease, vmrr):
+ for vnode, pnode in lease.diskimagemap.items():
+ self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
+
+class FileTransferResourceReservation(ResourceReservation):
+ def __init__(self, lease, res, start=None, end=None):
+ ResourceReservation.__init__(self, lease, start, end, res)
+ self.deadline = None
+ self.file = None
+ # Dictionary of physnode -> [ (lease_id, vnode)* ]
+ self.transfers = {}
+
+ def print_contents(self, loglevel="VDEBUG"):
+ ResourceReservation.print_contents(self, loglevel)
+ self.logger.log(loglevel, "Type : FILE TRANSFER")
+ self.logger.log(loglevel, "Deadline : %s" % self.deadline)
+ self.logger.log(loglevel, "File : %s" % self.file)
+ self.logger.log(loglevel, "Transfers : %s" % self.transfers)
+
+ def piggyback(self, lease_id, vnode, physnode):
+ if self.transfers.has_key(physnode):
+ self.transfers[physnode].append((lease_id, vnode))
+ else:
+ self.transfers[physnode] = [(lease_id, vnode)]
+
+ def is_preemptible(self):
+ return False
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py
===================================================================
--- trunk/src/haizea/resourcemanager/enact/opennebula.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,188 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-from haizea.resourcemanager.resourcepool import Node
-from haizea.resourcemanager.enact import ResourcePoolInfo, VMEnactment
-from haizea.common.utils import get_config
-import haizea.common.constants as constants
-import haizea.resourcemanager.datastruct as ds
-from pysqlite2 import dbapi2 as sqlite
-import logging
-import commands
-
-class OpenNebulaResourcePoolInfo(ResourcePoolInfo):
- ONEATTR2HAIZEA = { "TOTALCPU": constants.RES_CPU,
- "TOTALMEMORY": constants.RES_MEM }
-
- def __init__(self):
- ResourcePoolInfo.__init__(self)
- config = get_config()
- self.logger = logging.getLogger("ENACT.ONE.INFO")
- self.suspendresumerate = config.get("one.suspendresume-rate-estimate")
-
- # Get information about nodes from DB
- conn = sqlite.connect(config.get("one.db"))
- conn.row_factory = sqlite.Row
-
- self.nodes = []
- cur = conn.cursor()
- cur.execute("select hid, host_name from hostpool where state != 4")
- hosts = cur.fetchall()
- for (i, host) in enumerate(hosts):
- nod_id = i+1
- enactID = int(host["hid"])
- hostname = host["host_name"]
- capacity = ds.ResourceTuple.create_empty()
- capacity.set_by_type(constants.RES_DISK, 80000) # OpenNebula currently doesn't provide this
- capacity.set_by_type(constants.RES_NETIN, 100) # OpenNebula currently doesn't provide this
- capacity.set_by_type(constants.RES_NETOUT, 100) # OpenNebula currently doesn't provide this
- cur.execute("select name, value from host_attributes where id=%i" % enactID)
- attrs = cur.fetchall()
- for attr in attrs:
- name = attr["name"]
- if OpenNebulaResourcePoolInfo.ONEATTR2HAIZEA.has_key(name):
- capacity.set_by_type(OpenNebulaResourcePoolInfo.ONEATTR2HAIZEA[name], int(attr["value"]))
- capacity.set_by_type(constants.RES_CPU, capacity.get_by_type(constants.RES_CPU) / 100.0)
- capacity.set_by_type(constants.RES_MEM, capacity.get_by_type(constants.RES_MEM) / 1024.0)
- node = Node(nod_id, hostname, capacity)
- node.enactment_info = int(enactID)
- self.nodes.append(node)
-
- self.logger.info("Fetched %i nodes from ONE db" % len(self.nodes))
- for n in self.nodes:
- self.logger.debug("%i %s %s" % (n.nod_id, n.hostname, n.capacity))
-
- def get_nodes(self):
- return self.nodes
-
- def get_resource_types(self):
- return [(constants.RES_CPU, constants.RESTYPE_FLOAT, "CPU"),
- (constants.RES_MEM, constants.RESTYPE_INT, "Mem"),
- (constants.RES_DISK, constants.RESTYPE_INT, "Disk"),
- (constants.RES_NETIN, constants.RESTYPE_INT, "Net (in)"),
- (constants.RES_NETOUT, constants.RESTYPE_INT, "Net (out)")]
-
- def get_suspendresume_rate(self):
- return self.suspendresumerate
-
- def get_bandwidth(self):
- return 0
-
-class OpenNebulaVMEnactment(VMEnactment):
- def __init__(self):
- VMEnactment.__init__(self)
- self.logger = logging.getLogger("ENACT.ONE.VM")
-
- self.onevm = get_config().get("onevm")
-
- self.conn = sqlite.connect(get_config().get("one.db"))
- self.conn.row_factory = sqlite.Row
-
-
- def run_command(self, cmd):
- self.logger.debug("Running command: %s" % cmd)
- (status, output) = commands.getstatusoutput(cmd)
- self.logger.debug("Returned status=%i, output='%s'" % (status, output))
- return status, output
-
- def start(self, action):
- for vnode in action.vnodes:
- # Unpack action
- vmid = action.vnodes[vnode].enactment_info
- hostID = action.vnodes[vnode].pnode
- image = action.vnodes[vnode].diskimage
- cpu = action.vnodes[vnode].resources.get_by_type(constants.RES_CPU)
- memory = action.vnodes[vnode].resources.get_by_type(constants.RES_MEM)
-
- self.logger.debug("Received request to start VM for L%iV%i on host %i, image=%s, cpu=%i, mem=%i"
- % (action.lease_haizea_id, vnode, hostID, image, cpu, memory))
-
- cmd = "%s deploy %i %i" % (self.onevm, vmid, hostID)
- status, output = self.run_command(cmd)
- if status == 0:
- self.logger.debug("Command returned succesfully.")
- else:
- raise Exception, "Error when running onevm deploy (status=%i, output='%s')" % (status, output)
-
- def stop(self, action):
- for vnode in action.vnodes:
- # Unpack action
- vmid = action.vnodes[vnode].enactment_info
- cmd = "%s shutdown %i" % (self.onevm, vmid)
- status, output = self.run_command(cmd)
- if status == 0:
- self.logger.debug("Command returned succesfully.")
- else:
- raise Exception, "Error when running onevm shutdown (status=%i, output='%s')" % (status, output)
-
- def suspend(self, action):
- for vnode in action.vnodes:
- # Unpack action
- vmid = action.vnodes[vnode].enactment_info
- cmd = "%s suspend %i" % (self.onevm, vmid)
- status, output = self.run_command(cmd)
- if status == 0:
- self.logger.debug("Command returned succesfully.")
- else:
- raise Exception, "Error when running onevm suspend (status=%i, output='%s')" % (status, output)
-
- def resume(self, action):
- for vnode in action.vnodes:
- # Unpack action
- vmid = action.vnodes[vnode].enactment_info
- cmd = "%s resume %i" % (self.onevm, vmid)
- status, output = self.run_command(cmd)
- if status == 0:
- self.logger.debug("Command returned succesfully.")
- else:
- raise Exception, "Error when running onevm resume (status=%i, output='%s')" % (status, output)
-
- def verify_suspend(self, action):
- # TODO: Do a single query
- result = 0
- for vnode in action.vnodes:
- # Unpack action
- vmid = action.vnodes[vnode].enactment_info
- cur = self.conn.cursor()
- cur.execute("select state from vmpool where oid = %i" % vmid)
- onevm = cur.fetchone()
- state = onevm["state"]
- if state == 5:
- self.logger.debug("Suspend of L%iV%i correct." % (action.lease_haizea_id, vnode))
- else:
- self.logger.warning("ONE did not complete suspend of L%iV%i on time. State is %i" % (action.lease_haizea_id, vnode, state))
- result = 1
- return result
-
- def verify_resume(self, action):
- # TODO: Do a single query
- result = 0
- for vnode in action.vnodes:
- # Unpack action
- vmid = action.vnodes[vnode].enactment_info
- cur = self.conn.cursor()
- cur.execute("select state from vmpool where oid = %i" % vmid)
- onevm = cur.fetchone()
- state = onevm["state"]
- if state == 3:
- self.logger.debug("Resume of L%iV%i correct." % (action.lease_haizea_id, vnode))
- else:
- self.logger.warning("ONE did not complete resume of L%iV%i on time. State is %i" % (action.lease_haizea_id, vnode, state))
- result = 1
- return result
-
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py (from rev 507, trunk/src/haizea/resourcemanager/enact/opennebula.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/opennebula.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,197 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+from haizea.resourcemanager.resourcepool import Node
+from haizea.resourcemanager.enact import ResourcePoolInfo, VMEnactment, DeploymentEnactment
+from haizea.common.utils import get_config
+import haizea.common.constants as constants
+import haizea.resourcemanager.datastruct as ds
+from pysqlite2 import dbapi2 as sqlite
+import logging
+import commands
+
+class OpenNebulaResourcePoolInfo(ResourcePoolInfo):
+ ONEATTR2HAIZEA = { "TOTALCPU": constants.RES_CPU,
+ "TOTALMEMORY": constants.RES_MEM }
+
+ def __init__(self):
+ ResourcePoolInfo.__init__(self)
+ config = get_config()
+ self.logger = logging.getLogger("ENACT.ONE.INFO")
+ self.suspendresumerate = config.get("one.suspendresume-rate-estimate")
+
+ # Get information about nodes from DB
+ conn = sqlite.connect(config.get("one.db"))
+ conn.row_factory = sqlite.Row
+
+ self.nodes = []
+ cur = conn.cursor()
+ cur.execute("select hid, host_name from hostpool where state != 4")
+ hosts = cur.fetchall()
+ for (i, host) in enumerate(hosts):
+ nod_id = i+1
+ enactID = int(host["hid"])
+ hostname = host["host_name"]
+ capacity = ds.ResourceTuple.create_empty()
+ capacity.set_by_type(constants.RES_DISK, 80000) # OpenNebula currently doesn't provide this
+ capacity.set_by_type(constants.RES_NETIN, 100) # OpenNebula currently doesn't provide this
+ capacity.set_by_type(constants.RES_NETOUT, 100) # OpenNebula currently doesn't provide this
+ cur.execute("select name, value from host_attributes where id=%i" % enactID)
+ attrs = cur.fetchall()
+ for attr in attrs:
+ name = attr["name"]
+ if OpenNebulaResourcePoolInfo.ONEATTR2HAIZEA.has_key(name):
+ capacity.set_by_type(OpenNebulaResourcePoolInfo.ONEATTR2HAIZEA[name], int(attr["value"]))
+ capacity.set_by_type(constants.RES_CPU, capacity.get_by_type(constants.RES_CPU) / 100.0)
+ capacity.set_by_type(constants.RES_MEM, capacity.get_by_type(constants.RES_MEM) / 1024.0)
+ node = Node(nod_id, hostname, capacity)
+ node.enactment_info = int(enactID)
+ self.nodes.append(node)
+
+ self.logger.info("Fetched %i nodes from ONE db" % len(self.nodes))
+ for n in self.nodes:
+ self.logger.debug("%i %s %s" % (n.nod_id, n.hostname, n.capacity))
+
+ def get_nodes(self):
+ return self.nodes
+
+ def get_resource_types(self):
+ return [(constants.RES_CPU, constants.RESTYPE_FLOAT, "CPU"),
+ (constants.RES_MEM, constants.RESTYPE_INT, "Mem"),
+ (constants.RES_DISK, constants.RESTYPE_INT, "Disk"),
+ (constants.RES_NETIN, constants.RESTYPE_INT, "Net (in)"),
+ (constants.RES_NETOUT, constants.RESTYPE_INT, "Net (out)")]
+
+ def get_suspendresume_rate(self):
+ return self.suspendresumerate
+
+ def get_bandwidth(self):
+ return 0
+
+class OpenNebulaVMEnactment(VMEnactment):
+ def __init__(self):
+ VMEnactment.__init__(self)
+ self.logger = logging.getLogger("ENACT.ONE.VM")
+
+ self.onevm = get_config().get("onevm")
+
+ self.conn = sqlite.connect(get_config().get("one.db"))
+ self.conn.row_factory = sqlite.Row
+
+
+ def run_command(self, cmd):
+ self.logger.debug("Running command: %s" % cmd)
+ (status, output) = commands.getstatusoutput(cmd)
+ self.logger.debug("Returned status=%i, output='%s'" % (status, output))
+ return status, output
+
+ def start(self, action):
+ for vnode in action.vnodes:
+ # Unpack action
+ vmid = action.vnodes[vnode].enactment_info
+ hostID = action.vnodes[vnode].pnode
+ image = action.vnodes[vnode].diskimage
+ cpu = action.vnodes[vnode].resources.get_by_type(constants.RES_CPU)
+ memory = action.vnodes[vnode].resources.get_by_type(constants.RES_MEM)
+
+ self.logger.debug("Received request to start VM for L%iV%i on host %i, image=%s, cpu=%i, mem=%i"
+ % (action.lease_haizea_id, vnode, hostID, image, cpu, memory))
+
+ cmd = "%s deploy %i %i" % (self.onevm, vmid, hostID)
+ status, output = self.run_command(cmd)
+ if status == 0:
+ self.logger.debug("Command returned succesfully.")
+ else:
+ raise Exception, "Error when running onevm deploy (status=%i, output='%s')" % (status, output)
+
+ def stop(self, action):
+ for vnode in action.vnodes:
+ # Unpack action
+ vmid = action.vnodes[vnode].enactment_info
+ cmd = "%s shutdown %i" % (self.onevm, vmid)
+ status, output = self.run_command(cmd)
+ if status == 0:
+ self.logger.debug("Command returned succesfully.")
+ else:
+ raise Exception, "Error when running onevm shutdown (status=%i, output='%s')" % (status, output)
+
+ def suspend(self, action):
+ for vnode in action.vnodes:
+ # Unpack action
+ vmid = action.vnodes[vnode].enactment_info
+ cmd = "%s suspend %i" % (self.onevm, vmid)
+ status, output = self.run_command(cmd)
+ if status == 0:
+ self.logger.debug("Command returned succesfully.")
+ else:
+ raise Exception, "Error when running onevm suspend (status=%i, output='%s')" % (status, output)
+
+ def resume(self, action):
+ for vnode in action.vnodes:
+ # Unpack action
+ vmid = action.vnodes[vnode].enactment_info
+ cmd = "%s resume %i" % (self.onevm, vmid)
+ status, output = self.run_command(cmd)
+ if status == 0:
+ self.logger.debug("Command returned succesfully.")
+ else:
+ raise Exception, "Error when running onevm resume (status=%i, output='%s')" % (status, output)
+
+ def verify_suspend(self, action):
+ # TODO: Do a single query
+ result = 0
+ for vnode in action.vnodes:
+ # Unpack action
+ vmid = action.vnodes[vnode].enactment_info
+ cur = self.conn.cursor()
+ cur.execute("select state from vmpool where oid = %i" % vmid)
+ onevm = cur.fetchone()
+ state = onevm["state"]
+ if state == 5:
+ self.logger.debug("Suspend of L%iV%i correct." % (action.lease_haizea_id, vnode))
+ else:
+ self.logger.warning("ONE did not complete suspend of L%iV%i on time. State is %i" % (action.lease_haizea_id, vnode, state))
+ result = 1
+ return result
+
+ def verify_resume(self, action):
+ # TODO: Do a single query
+ result = 0
+ for vnode in action.vnodes:
+ # Unpack action
+ vmid = action.vnodes[vnode].enactment_info
+ cur = self.conn.cursor()
+ cur.execute("select state from vmpool where oid = %i" % vmid)
+ onevm = cur.fetchone()
+ state = onevm["state"]
+ if state == 3:
+ self.logger.debug("Resume of L%iV%i correct." % (action.lease_haizea_id, vnode))
+ else:
+ self.logger.warning("ONE did not complete resume of L%iV%i on time. State is %i" % (action.lease_haizea_id, vnode, state))
+ result = 1
+ return result
+
+class OpenNebulaDummyDeploymentEnactment(DeploymentEnactment):
+ def __init__(self):
+ DeploymentEnactment.__init__(self)
+
+ def get_aux_nodes(self):
+ return []
+
+ def resolve_to_file(self, lease_id, vnode, diskimage_id):
+ return "/var/haizea/images/%s-L%iV%i" % (diskimage_id, lease_id, vnode)
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py
===================================================================
--- trunk/src/haizea/resourcemanager/enact/simulated.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,134 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-from haizea.resourcemanager.resourcepool import Node
-from haizea.resourcemanager.enact import ResourcePoolInfo, VMEnactment, DeploymentEnactment
-import haizea.common.constants as constants
-from haizea.common.utils import get_config
-import haizea.resourcemanager.datastruct as ds
-import logging
-
-class SimulatedResourcePoolInfo(ResourcePoolInfo):
- def __init__(self):
- ResourcePoolInfo.__init__(self)
- self.logger = logging.getLogger("ENACT.SIMUL.INFO")
- config = get_config()
- self.suspendresumerate = config.get("simul.suspendresume-rate")
-
- numnodes = config.get("simul.nodes")
-
- capacity = self.parse_resources_string(config.get("simul.resources"))
-
- self.nodes = [Node(i+1, "simul-%i" % (i+1), capacity) for i in range(numnodes)]
- for n in self.nodes:
- n.enactment_info = n.nod_id
-
- def get_nodes(self):
- return self.nodes
-
- def get_resource_types(self):
- return [(constants.RES_CPU, constants.RESTYPE_FLOAT, "CPU"),
- (constants.RES_MEM, constants.RESTYPE_INT, "Mem"),
- (constants.RES_DISK, constants.RESTYPE_INT, "Disk"),
- (constants.RES_NETIN, constants.RESTYPE_INT, "Net (in)"),
- (constants.RES_NETOUT, constants.RESTYPE_INT, "Net (out)")]
-
- def parse_resources_string(self, resources):
- resources = resources.split(";")
- desc2type = dict([(x[2], x[0]) for x in self.get_resource_types()])
- capacity=ds.ResourceTuple.create_empty()
- for r in resources:
- resourcename = r.split(",")[0]
- resourcecapacity = r.split(",")[1]
- capacity.set_by_type(desc2type[resourcename], int(resourcecapacity))
- return capacity
-
- def get_suspendresume_rate(self):
- return self.suspendresumerate
-
- def get_migration_bandwidth(self):
- return 100 # TODO: Get from config file
-
-class SimulatedVMEnactment(VMEnactment):
- def __init__(self):
- VMEnactment.__init__(self)
- self.logger = logging.getLogger("ENACT.SIMUL.VM")
-
- def start(self, action):
- for vnode in action.vnodes:
- # Unpack action
- pnode = action.vnodes[vnode].pnode
- image = action.vnodes[vnode].diskimage
- cpu = action.vnodes[vnode].resources.get_by_type(constants.RES_CPU)
- memory = action.vnodes[vnode].resources.get_by_type(constants.RES_MEM)
- print (action.lease_haizea_id, vnode, pnode, image, cpu, memory)
- self.logger.debug("Received request to start VM for L%iV%i on host %i, image=%s, cpu=%i, mem=%i"
- % (action.lease_haizea_id, vnode, pnode, image, cpu, memory))
-
- def stop(self, action):
- for vnode in action.vnodes:
- self.logger.debug("Received request to stop VM for L%iV%i"
- % (action.lease_haizea_id, vnode))
-
- def suspend(self, action):
- for vnode in action.vnodes:
- self.logger.debug("Received request to suspend VM for L%iV%i"
- % (action.lease_haizea_id, vnode))
-
- def resume(self, action):
- for vnode in action.vnodes:
- self.logger.debug("Received request to resume VM for L%iV%i"
- % (action.lease_haizea_id, vnode))
-
- def verify_suspend(self, action):
- return 0
-
- def verify_resume(self, action):
- return 0
-
-class SimulatedDeploymentEnactment(DeploymentEnactment):
- def __init__(self):
- DeploymentEnactment.__init__(self)
- self.logger = logging.getLogger("ENACT.SIMUL.INFO")
- config = get_config()
-
- self.bandwidth = config.get("imagetransfer-bandwidth")
-
- # Image repository nodes
- numnodes = config.get("simul.nodes")
-
- imgcapacity = ds.ResourceTuple.create_empty()
- imgcapacity.set_by_type(constants.RES_NETOUT, self.bandwidth)
-
- self.fifo_node = Node(numnodes+1, "FIFOnode", imgcapacity)
- self.edf_node = Node(numnodes+2, "EDFnode", imgcapacity)
-
- def get_edf_node(self):
- return self.edf_node
-
- def get_fifo_node(self):
- return self.fifo_node
-
- def get_aux_nodes(self):
- return [self.edf_node, self.fifo_node]
-
- def get_bandwidth(self):
- return self.bandwidth
-
- def resolve_to_file(self, lease_id, vnode, diskimage_id):
- return "/var/haizea/images/%s-L%iV%i" % (diskimage_id, lease_id, vnode)
\ No newline at end of file
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py (from rev 508, trunk/src/haizea/resourcemanager/enact/simulated.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/enact/simulated.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,133 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+from haizea.resourcemanager.resourcepool import Node
+from haizea.resourcemanager.enact import ResourcePoolInfo, VMEnactment, DeploymentEnactment
+import haizea.common.constants as constants
+from haizea.common.utils import get_config
+import haizea.resourcemanager.datastruct as ds
+import logging
+
+class SimulatedResourcePoolInfo(ResourcePoolInfo):
+ def __init__(self):
+ ResourcePoolInfo.__init__(self)
+ self.logger = logging.getLogger("ENACT.SIMUL.INFO")
+ config = get_config()
+ self.suspendresumerate = config.get("simul.suspendresume-rate")
+
+ numnodes = config.get("simul.nodes")
+
+ capacity = self.parse_resources_string(config.get("simul.resources"))
+
+ self.nodes = [Node(i+1, "simul-%i" % (i+1), capacity) for i in range(numnodes)]
+ for n in self.nodes:
+ n.enactment_info = n.nod_id
+
+ def get_nodes(self):
+ return self.nodes
+
+ def get_resource_types(self):
+ return [(constants.RES_CPU, constants.RESTYPE_FLOAT, "CPU"),
+ (constants.RES_MEM, constants.RESTYPE_INT, "Mem"),
+ (constants.RES_DISK, constants.RESTYPE_INT, "Disk"),
+ (constants.RES_NETIN, constants.RESTYPE_INT, "Net (in)"),
+ (constants.RES_NETOUT, constants.RESTYPE_INT, "Net (out)")]
+
+ def parse_resources_string(self, resources):
+ resources = resources.split(";")
+ desc2type = dict([(x[2], x[0]) for x in self.get_resource_types()])
+ capacity=ds.ResourceTuple.create_empty()
+ for r in resources:
+ resourcename = r.split(",")[0]
+ resourcecapacity = r.split(",")[1]
+ capacity.set_by_type(desc2type[resourcename], int(resourcecapacity))
+ return capacity
+
+ def get_suspendresume_rate(self):
+ return self.suspendresumerate
+
+ def get_migration_bandwidth(self):
+ return 100 # TODO: Get from config file
+
+class SimulatedVMEnactment(VMEnactment):
+ def __init__(self):
+ VMEnactment.__init__(self)
+ self.logger = logging.getLogger("ENACT.SIMUL.VM")
+
+ def start(self, action):
+ for vnode in action.vnodes:
+ # Unpack action
+ pnode = action.vnodes[vnode].pnode
+ image = action.vnodes[vnode].diskimage
+ cpu = action.vnodes[vnode].resources.get_by_type(constants.RES_CPU)
+ memory = action.vnodes[vnode].resources.get_by_type(constants.RES_MEM)
+ self.logger.debug("Received request to start VM for L%iV%i on host %i, image=%s, cpu=%i, mem=%i"
+ % (action.lease_haizea_id, vnode, pnode, image, cpu, memory))
+
+ def stop(self, action):
+ for vnode in action.vnodes:
+ self.logger.debug("Received request to stop VM for L%iV%i"
+ % (action.lease_haizea_id, vnode))
+
+ def suspend(self, action):
+ for vnode in action.vnodes:
+ self.logger.debug("Received request to suspend VM for L%iV%i"
+ % (action.lease_haizea_id, vnode))
+
+ def resume(self, action):
+ for vnode in action.vnodes:
+ self.logger.debug("Received request to resume VM for L%iV%i"
+ % (action.lease_haizea_id, vnode))
+
+ def verify_suspend(self, action):
+ return 0
+
+ def verify_resume(self, action):
+ return 0
+
+class SimulatedDeploymentEnactment(DeploymentEnactment):
+ def __init__(self):
+ DeploymentEnactment.__init__(self)
+ self.logger = logging.getLogger("ENACT.SIMUL.INFO")
+ config = get_config()
+
+ self.bandwidth = config.get("imagetransfer-bandwidth")
+
+ # Image repository nodes
+ numnodes = config.get("simul.nodes")
+
+ imgcapacity = ds.ResourceTuple.create_empty()
+ imgcapacity.set_by_type(constants.RES_NETOUT, self.bandwidth)
+
+ self.fifo_node = Node(numnodes+1, "FIFOnode", imgcapacity)
+ self.edf_node = Node(numnodes+2, "EDFnode", imgcapacity)
+
+ def get_edf_node(self):
+ return self.edf_node
+
+ def get_fifo_node(self):
+ return self.fifo_node
+
+ def get_aux_nodes(self):
+ return [self.edf_node, self.fifo_node]
+
+ def get_bandwidth(self):
+ return self.bandwidth
+
+ def resolve_to_file(self, lease_id, vnode, diskimage_id):
+ return "/var/haizea/images/%s-L%iV%i" % (diskimage_id, lease_id, vnode)
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py
===================================================================
--- trunk/src/haizea/resourcemanager/resourcepool.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,425 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-from haizea.common.utils import vnodemapstr, get_accounting
-import haizea.common.constants as constants
-import haizea.resourcemanager.enact.actions as actions
-import logging
-
-class FailedEnactmentException(Exception):
- pass
-
-class ResourcePool(object):
- def __init__(self, info_enact, vm_enact, deploy_enact):
- self.logger = logging.getLogger("RPOOL")
-
- self.info = info_enact
- self.vm = vm_enact
- # TODO: Ideally, deployment enactment shouldn't be here, specially since
- # it already "hangs" below the deployment modules. For now,
- # it does no harm, though.
- self.deployment = deploy_enact
-
- self.nodes = self.info.get_nodes()
-
- def start_vms(self, lease, rr):
- start_action = actions.VMEnactmentStartAction()
- start_action.from_rr(rr)
-
- for (vnode, pnode) in rr.nodes.items():
- node = self.get_node(pnode)
- diskimage = node.get_diskimage(lease.id, vnode, lease.diskimage_id)
- start_action.vnodes[vnode].pnode = node.enactment_info
- start_action.vnodes[vnode].diskimage = diskimage.filename
- start_action.vnodes[vnode].resources = rr.resources_in_pnode[pnode]
-
- try:
- self.vm.start(start_action)
- except Exception, msg:
- self.logger.error("Enactment of start VM failed: %s" % msg)
- raise FailedEnactmentException()
-
- def stop_vms(self, lease, rr):
- stop_action = actions.VMEnactmentStopAction()
- stop_action.from_rr(rr)
- try:
- self.vm.stop(stop_action)
- except Exception, msg:
- self.logger.error("Enactment of end VM failed: %s" % msg)
- raise FailedEnactmentException()
-
- def suspend_vms(self, lease, rr):
- # Add memory image files
- for vnode in rr.vnodes:
- pnode = rr.vmrr.nodes[vnode]
- self.add_ramfile(pnode, lease.id, vnode, lease.requested_resources.get_by_type(constants.RES_MEM))
-
- # Enact suspend
- suspend_action = actions.VMEnactmentSuspendAction()
- suspend_action.from_rr(rr)
- try:
- self.vm.suspend(suspend_action)
- except Exception, msg:
- self.logger.error("Enactment of suspend VM failed: %s" % msg)
- raise FailedEnactmentException()
-
- def verify_suspend(self, lease, rr):
- verify_suspend_action = actions.VMEnactmentConfirmSuspendAction()
- verify_suspend_action.from_rr(rr)
- self.vm.verify_suspend(verify_suspend_action)
-
- def resume_vms(self, lease, rr):
- # Remove memory image files
- for vnode in rr.vnodes:
- pnode = rr.vmrr.nodes[vnode]
- self.remove_ramfile(pnode, lease.id, vnode)
-
- # Enact resume
- resume_action = actions.VMEnactmentResumeAction()
- resume_action.from_rr(rr)
- try:
- self.vm.resume(resume_action)
- except Exception, msg:
- self.logger.error("Enactment of resume VM failed: %s" % msg)
- raise FailedEnactmentException()
-
- def verify_resume(self, lease, rr):
- verify_resume_action = actions.VMEnactmentConfirmResumeAction()
- verify_resume_action.from_rr(rr)
- self.vm.verify_resume(verify_resume_action)
-
- def get_nodes(self):
- return self.nodes
-
- # An auxiliary node is a host whose resources are going to be scheduled, but
- # where no VMs are actually going to run. For example, a disk image repository node.
- def get_aux_nodes(self):
- # TODO: We're only asking the deployment enactment module for auxiliary nodes.
- # There might be a scenario where the info enactment module also reports
- # auxiliary nodes.
- return self.deployment.get_aux_nodes()
-
- def get_num_nodes(self):
- return len(self.nodes)
-
- def get_node(self, nod_id):
- return self.nodes[nod_id-1]
-
- def add_diskimage(self, pnode, diskimage_id, imagesize, lease_id, vnode):
- self.logger.debug("Adding disk image for L%iV%i in pnode=%i" % (lease_id, vnode, pnode))
-
- self.logger.vdebug("Files BEFORE:")
- self.get_node(pnode).print_files()
-
- imagefile = self.deployment.resolve_to_file(lease_id, vnode, diskimage_id)
- img = DiskImageFile(imagefile, imagesize, lease_id, vnode, diskimage_id)
- self.get_node(pnode).add_file(img)
-
- self.logger.vdebug("Files AFTER:")
- self.get_node(pnode).print_files()
-
- get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
- return img
-
- def remove_diskimage(self, pnode, lease, vnode):
- node = self.get_node(pnode)
- node.print_files()
-
- self.logger.debug("Removing disk image for L%iV%i in node %i" % (lease, vnode, pnode))
- node.remove_diskimage(lease, vnode)
-
- node.print_files()
-
- get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
-
- def add_ramfile(self, pnode, lease_id, vnode, size):
- node = self.get_node(pnode)
- self.logger.debug("Adding RAM file for L%iV%i in node %i" % (lease_id, vnode, pnode))
- node.print_files()
- f = RAMImageFile("RAM_L%iV%i" % (lease_id, vnode), size, lease_id, vnode)
- node.add_file(f)
- node.print_files()
- get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
-
- def remove_ramfile(self, pnode, lease_id, vnode):
- node = self.get_node(pnode)
- self.logger.debug("Removing RAM file for L%iV%i in node %i" % (lease_id, vnode, pnode))
- node.print_files()
- node.remove_ramfile(lease_id, vnode)
- node.print_files()
- get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
-
- def get_max_disk_usage(self):
- return max([n.get_disk_usage() for n in self.nodes])
-
-class Node(object):
- def __init__(self, nod_id, hostname, capacity):
- self.logger = logging.getLogger("RESOURCEPOOL")
- self.nod_id = nod_id
- self.hostname = hostname
- self.capacity = capacity
- self.files = []
-
- # enactment-specific information
- self.enactment_info = None
-
- def get_capacity(self):
- return self.capacity
-
- def add_file(self, f):
- self.files.append(f)
-
- def get_diskimage(self, lease_id, vnode, diskimage_id):
- image = [f for f in self.files if isinstance(f, DiskImageFile) and
- f.diskimage_id == diskimage_id and
- f.lease_id == lease_id and
- f.vnode == vnode]
- if len(image) == 0:
- return None
- elif len(image) == 1:
- return image[0]
- elif len(image) > 1:
- self.logger.warning("More than one tainted image for L%iV%i on node %i" % (lease_id, vnode, self.nod_id))
- return image[0]
-
- def remove_diskimage(self, lease_id, vnode):
- image = [f for f in self.files if isinstance(f, DiskImageFile) and
- f.lease_id == lease_id and
- f.vnode == vnode]
- if len(image) > 0:
- image = image[0]
- self.files.remove(image)
-
- def remove_ramfile(self, lease_id, vnode):
- ramfile = [f for f in self.files if isinstance(f, RAMImageFile) and f.lease_id==lease_id and f.vnode==vnode]
- if len(ramfile) > 0:
- ramfile = ramfile[0]
- self.files.remove(ramfile)
-
-
- def get_disk_usage(self):
- return sum([f.filesize for f in self.files])
-
-
- def get_diskimages(self):
- return [f for f in self.files if isinstance(f, DiskImageFile)]
-
- def print_files(self):
- images = ""
- if len(self.files) > 0:
- images = ", ".join([str(img) for img in self.files])
- self.logger.vdebug("Node %i files: %iMB %s" % (self.nod_id, self.get_disk_usage(), images))
-
- def xmlrpc_marshall(self):
- # Convert to something we can send through XMLRPC
- h = {}
- h["id"] = self.nod_id
- h["hostname"] = self.hostname
- h["cpu"] = self.capacity.get_by_type(constants.RES_CPU)
- h["mem"] = self.capacity.get_by_type(constants.RES_MEM)
-
- return h
-
-
-
-class File(object):
- def __init__(self, filename, filesize):
- self.filename = filename
- self.filesize = filesize
-
-class DiskImageFile(File):
- def __init__(self, filename, filesize, lease_id, vnode, diskimage_id):
- File.__init__(self, filename, filesize)
- self.lease_id = lease_id
- self.vnode = vnode
- self.diskimage_id = diskimage_id
-
- def __str__(self):
- return "(DISK L%iv%i %s %s)" % (self.lease_id, self.vnode, self.diskimage_id, self.filename)
-
-
-class RAMImageFile(File):
- def __init__(self, filename, filesize, lease_id, vnode):
- File.__init__(self, filename, filesize)
- self.lease_id = lease_id
- self.vnode = vnode
-
- def __str__(self):
- return "(RAM L%iv%i %s)" % (self.lease_id, self.vnode, self.filename)
-
-class ResourcePoolWithReusableImages(ResourcePool):
- def __init__(self, scheduler):
- ResourcePool.__init__(self, scheduler)
-
- self.nodes = [NodeWithReusableImages.from_node(n) for n in self.nodes]
-
- def add_reusable_image(self, pnode, diskimage_id, imagesize, mappings, timeout):
- self.logger.debug("Adding reusable image for %s in pnode=%i" % (mappings, pnode))
-
- self.logger.vdebug("Files BEFORE:")
- self.get_node(pnode).print_files()
-
- imagefile = "reusable-%s" % diskimage_id
- img = ReusableDiskImageFile(imagefile, imagesize, diskimage_id, timeout)
- for (lease_id, vnode) in mappings:
- img.add_mapping(lease_id, vnode)
-
- self.get_node(pnode).add_reusable_image(img)
-
- self.logger.vdebug("Files AFTER:")
- self.get_node(pnode).print_files()
-
- get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
- return img
-
- def add_mapping_to_existing_reusable_image(self, pnode_id, diskimage_id, lease_id, vnode, timeout):
- self.get_node(pnode_id).add_mapping_to_existing_reusable_image(diskimage_id, lease_id, vnode, timeout)
-
- def remove_diskimage(self, pnode_id, lease, vnode):
- ResourcePool.remove_diskimage(self, pnode_id, lease, vnode)
- self.logger.debug("Removing cached images for L%iV%i in node %i" % (lease, vnode, pnode_id))
- for img in self.get_node(pnode_id).get_reusable_images():
- if (lease, vnode) in img.mappings:
- img.mappings.remove((lease, vnode))
- self.get_node(pnode_id).print_files()
- # Keep image around, even if it isn't going to be used
- # by any VMs. It might be reused later on.
- # It will be purged if space has to be made available
- # for other images
-
- def get_nodes_with_reusable_image(self, diskimage_id, after = None):
- return [n.nod_id for n in self.nodes if n.exists_reusable_image(diskimage_id, after=after)]
-
- def exists_reusable_image(self, pnode_id, diskimage_id, after):
- return self.get_node(pnode_id).exists_reusable_image(diskimage_id, after = after)
-
-
-class NodeWithReusableImages(Node):
- def __init__(self, nod_id, hostname, capacity):
- Node.__init__(self, nod_id, hostname, capacity)
- self.reusable_images = []
-
- @classmethod
- def from_node(cls, n):
- node = cls(n.nod_id, n.hostname, n.capacity)
- node.enactment_info = n.enactment_info
- return node
-
- def add_reusable_image(self, f):
- self.reusable_images.append(f)
-
- def add_mapping_to_existing_reusable_image(self, diskimage_id, lease_id, vnode, timeout):
- for f in self.reusable_images:
- if f.diskimage_id == diskimage_id:
- f.add_mapping(lease_id, vnode)
- f.update_timeout(timeout)
- break # Ugh
- self.print_files()
-
- def get_reusable_image(self, diskimage_id, after = None, lease_id=None, vnode=None):
- images = [i for i in self.reusable_images if i.diskimage_id == diskimage_id]
- if after != None:
- images = [i for i in images if i.timeout >= after]
- if lease_id != None and vnode != None:
- images = [i for i in images if i.has_mapping(lease_id, vnode)]
- if len(images)>0:
- return images[0]
- else:
- return None
-
- def exists_reusable_image(self, imagefile, after = None, lease_id=None, vnode=None):
- entry = self.get_reusable_image(imagefile, after = after, lease_id=lease_id, vnode=vnode)
- if entry == None:
- return False
- else:
- return True
-
- def get_reusable_images(self):
- return self.reusable_images
-
- def get_reusable_images_size(self):
- return sum([f.filesize for f in self.reusable_images])
-
- def purge_oldest_unused_image(self):
- unused = [img for img in self.reusable_images if not img.has_mappings()]
- if len(unused) == 0:
- return 0
- else:
- i = iter(unused)
- oldest = i.next()
- for img in i:
- if img.timeout < oldest.timeout:
- oldest = img
- self.reusable_images.remove(oldest)
- return 1
-
- def purge_downto(self, target):
- done = False
- while not done:
- removed = self.purge_oldest_unused_image()
- if removed==0:
- done = True
- success = False
- elif removed == 1:
- if self.get_reusable_images_size() <= target:
- done = True
- success = True
- return success
-
- def print_files(self):
- Node.print_files(self)
- images = ""
- if len(self.reusable_images) > 0:
- images = ", ".join([str(img) for img in self.reusable_images])
- self.logger.vdebug("Node %i reusable images: %iMB %s" % (self.nod_id, self.get_reusable_images_size(), images))
-
-class ReusableDiskImageFile(File):
- def __init__(self, filename, filesize, diskimage_id, timeout):
- File.__init__(self, filename, filesize)
- self.diskimage_id = diskimage_id
- self.mappings = set([])
- self.timeout = timeout
-
- def add_mapping(self, lease_id, vnode):
- self.mappings.add((lease_id, vnode))
-
- def has_mapping(self, lease_id, vnode):
- return (lease_id, vnode) in self.mappings
-
- def has_mappings(self):
- return len(self.mappings) > 0
-
- def update_timeout(self, timeout):
- if timeout > self.timeout:
- self.timeout = timeout
-
- def is_expired(self, curTime):
- if self.timeout == None:
- return False
- elif self.timeout > curTime:
- return True
- else:
- return False
-
- def __str__(self):
- if self.timeout == None:
- timeout = "NOTIMEOUT"
- else:
- timeout = self.timeout
- return "(REUSABLE %s %s %s %s)" % (vnodemapstr(self.mappings), self.diskimage_id, str(timeout), self.filename)
-
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py (from rev 504, trunk/src/haizea/resourcemanager/resourcepool.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/resourcepool.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,425 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+from haizea.common.utils import vnodemapstr, get_accounting
+import haizea.common.constants as constants
+import haizea.resourcemanager.enact.actions as actions
+import logging
+
+class FailedEnactmentException(Exception):
+ pass
+
+class ResourcePool(object):
+ def __init__(self, info_enact, vm_enact, deploy_enact):
+ self.logger = logging.getLogger("RPOOL")
+
+ self.info = info_enact
+ self.vm = vm_enact
+ # TODO: Ideally, deployment enactment shouldn't be here, specially since
+ # it already "hangs" below the deployment modules. For now,
+ # it does no harm, though.
+ self.deployment = deploy_enact
+
+ self.nodes = self.info.get_nodes()
+
+ def start_vms(self, lease, rr):
+ start_action = actions.VMEnactmentStartAction()
+ start_action.from_rr(rr)
+
+ for (vnode, pnode) in rr.nodes.items():
+ node = self.get_node(pnode)
+ diskimage = node.get_diskimage(lease.id, vnode, lease.diskimage_id)
+ start_action.vnodes[vnode].pnode = node.enactment_info
+ start_action.vnodes[vnode].diskimage = diskimage.filename
+ start_action.vnodes[vnode].resources = rr.resources_in_pnode[pnode]
+
+ try:
+ self.vm.start(start_action)
+ except Exception, msg:
+ self.logger.error("Enactment of start VM failed: %s" % msg)
+ raise FailedEnactmentException()
+
+ def stop_vms(self, lease, rr):
+ stop_action = actions.VMEnactmentStopAction()
+ stop_action.from_rr(rr)
+ try:
+ self.vm.stop(stop_action)
+ except Exception, msg:
+ self.logger.error("Enactment of end VM failed: %s" % msg)
+ raise FailedEnactmentException()
+
+ def suspend_vms(self, lease, rr):
+ # Add memory image files
+ for vnode in rr.vnodes:
+ pnode = rr.vmrr.nodes[vnode]
+ self.add_ramfile(pnode, lease.id, vnode, lease.requested_resources.get_by_type(constants.RES_MEM))
+
+ # Enact suspend
+ suspend_action = actions.VMEnactmentSuspendAction()
+ suspend_action.from_rr(rr)
+ try:
+ self.vm.suspend(suspend_action)
+ except Exception, msg:
+ self.logger.error("Enactment of suspend VM failed: %s" % msg)
+ raise FailedEnactmentException()
+
+ def verify_suspend(self, lease, rr):
+ verify_suspend_action = actions.VMEnactmentConfirmSuspendAction()
+ verify_suspend_action.from_rr(rr)
+ self.vm.verify_suspend(verify_suspend_action)
+
+ def resume_vms(self, lease, rr):
+ # Remove memory image files
+ for vnode in rr.vnodes:
+ pnode = rr.vmrr.nodes[vnode]
+ self.remove_ramfile(pnode, lease.id, vnode)
+
+ # Enact resume
+ resume_action = actions.VMEnactmentResumeAction()
+ resume_action.from_rr(rr)
+ try:
+ self.vm.resume(resume_action)
+ except Exception, msg:
+ self.logger.error("Enactment of resume VM failed: %s" % msg)
+ raise FailedEnactmentException()
+
+ def verify_resume(self, lease, rr):
+ verify_resume_action = actions.VMEnactmentConfirmResumeAction()
+ verify_resume_action.from_rr(rr)
+ self.vm.verify_resume(verify_resume_action)
+
+ def get_nodes(self):
+ return self.nodes
+
+ # An auxiliary node is a host whose resources are going to be scheduled, but
+ # where no VMs are actually going to run. For example, a disk image repository node.
+ def get_aux_nodes(self):
+ # TODO: We're only asking the deployment enactment module for auxiliary nodes.
+ # There might be a scenario where the info enactment module also reports
+ # auxiliary nodes.
+ return self.deployment.get_aux_nodes()
+
+ def get_num_nodes(self):
+ return len(self.nodes)
+
+ def get_node(self, nod_id):
+ return self.nodes[nod_id-1]
+
+ def add_diskimage(self, pnode, diskimage_id, imagesize, lease_id, vnode):
+ self.logger.debug("Adding disk image for L%iV%i in pnode=%i" % (lease_id, vnode, pnode))
+
+ self.logger.vdebug("Files BEFORE:")
+ self.get_node(pnode).print_files()
+
+ imagefile = self.deployment.resolve_to_file(lease_id, vnode, diskimage_id)
+ img = DiskImageFile(imagefile, imagesize, lease_id, vnode, diskimage_id)
+ self.get_node(pnode).add_file(img)
+
+ self.logger.vdebug("Files AFTER:")
+ self.get_node(pnode).print_files()
+
+ get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
+ return img
+
+ def remove_diskimage(self, pnode, lease, vnode):
+ node = self.get_node(pnode)
+ node.print_files()
+
+ self.logger.debug("Removing disk image for L%iV%i in node %i" % (lease, vnode, pnode))
+ node.remove_diskimage(lease, vnode)
+
+ node.print_files()
+
+ get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
+
+ def add_ramfile(self, pnode, lease_id, vnode, size):
+ node = self.get_node(pnode)
+ self.logger.debug("Adding RAM file for L%iV%i in node %i" % (lease_id, vnode, pnode))
+ node.print_files()
+ f = RAMImageFile("RAM_L%iV%i" % (lease_id, vnode), size, lease_id, vnode)
+ node.add_file(f)
+ node.print_files()
+ get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
+
+ def remove_ramfile(self, pnode, lease_id, vnode):
+ node = self.get_node(pnode)
+ self.logger.debug("Removing RAM file for L%iV%i in node %i" % (lease_id, vnode, pnode))
+ node.print_files()
+ node.remove_ramfile(lease_id, vnode)
+ node.print_files()
+ get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
+
+ def get_max_disk_usage(self):
+ return max([n.get_disk_usage() for n in self.nodes])
+
+class Node(object):
+ def __init__(self, nod_id, hostname, capacity):
+ self.logger = logging.getLogger("RESOURCEPOOL")
+ self.nod_id = nod_id
+ self.hostname = hostname
+ self.capacity = capacity
+ self.files = []
+
+ # enactment-specific information
+ self.enactment_info = None
+
+ def get_capacity(self):
+ return self.capacity
+
+ def add_file(self, f):
+ self.files.append(f)
+
+ def get_diskimage(self, lease_id, vnode, diskimage_id):
+ image = [f for f in self.files if isinstance(f, DiskImageFile) and
+ f.diskimage_id == diskimage_id and
+ f.lease_id == lease_id and
+ f.vnode == vnode]
+ if len(image) == 0:
+ return None
+ elif len(image) == 1:
+ return image[0]
+ elif len(image) > 1:
+ self.logger.warning("More than one tainted image for L%iV%i on node %i" % (lease_id, vnode, self.nod_id))
+ return image[0]
+
+ def remove_diskimage(self, lease_id, vnode):
+ image = [f for f in self.files if isinstance(f, DiskImageFile) and
+ f.lease_id == lease_id and
+ f.vnode == vnode]
+ if len(image) > 0:
+ image = image[0]
+ self.files.remove(image)
+
+ def remove_ramfile(self, lease_id, vnode):
+ ramfile = [f for f in self.files if isinstance(f, RAMImageFile) and f.lease_id==lease_id and f.vnode==vnode]
+ if len(ramfile) > 0:
+ ramfile = ramfile[0]
+ self.files.remove(ramfile)
+
+
+ def get_disk_usage(self):
+ return sum([f.filesize for f in self.files])
+
+
+ def get_diskimages(self):
+ return [f for f in self.files if isinstance(f, DiskImageFile)]
+
+ def print_files(self):
+ images = ""
+ if len(self.files) > 0:
+ images = ", ".join([str(img) for img in self.files])
+ self.logger.vdebug("Node %i files: %iMB %s" % (self.nod_id, self.get_disk_usage(), images))
+
+ def xmlrpc_marshall(self):
+ # Convert to something we can send through XMLRPC
+ h = {}
+ h["id"] = self.nod_id
+ h["hostname"] = self.hostname
+ h["cpu"] = self.capacity.get_by_type(constants.RES_CPU)
+ h["mem"] = self.capacity.get_by_type(constants.RES_MEM)
+
+ return h
+
+
+
+class File(object):
+ def __init__(self, filename, filesize):
+ self.filename = filename
+ self.filesize = filesize
+
+class DiskImageFile(File):
+ def __init__(self, filename, filesize, lease_id, vnode, diskimage_id):
+ File.__init__(self, filename, filesize)
+ self.lease_id = lease_id
+ self.vnode = vnode
+ self.diskimage_id = diskimage_id
+
+ def __str__(self):
+ return "(DISK L%iv%i %s %s)" % (self.lease_id, self.vnode, self.diskimage_id, self.filename)
+
+
+class RAMImageFile(File):
+ def __init__(self, filename, filesize, lease_id, vnode):
+ File.__init__(self, filename, filesize)
+ self.lease_id = lease_id
+ self.vnode = vnode
+
+ def __str__(self):
+ return "(RAM L%iv%i %s)" % (self.lease_id, self.vnode, self.filename)
+
+class ResourcePoolWithReusableImages(ResourcePool):
+ def __init__(self, info_enact, vm_enact, deploy_enact):
+ ResourcePool.__init__(self, info_enact, vm_enact, deploy_enact)
+
+ self.nodes = [NodeWithReusableImages.from_node(n) for n in self.nodes]
+
+ def add_reusable_image(self, pnode, diskimage_id, imagesize, mappings, timeout):
+ self.logger.debug("Adding reusable image for %s in pnode=%i" % (mappings, pnode))
+
+ self.logger.vdebug("Files BEFORE:")
+ self.get_node(pnode).print_files()
+
+ imagefile = "reusable-%s" % diskimage_id
+ img = ReusableDiskImageFile(imagefile, imagesize, diskimage_id, timeout)
+ for (lease_id, vnode) in mappings:
+ img.add_mapping(lease_id, vnode)
+
+ self.get_node(pnode).add_reusable_image(img)
+
+ self.logger.vdebug("Files AFTER:")
+ self.get_node(pnode).print_files()
+
+ get_accounting().append_stat(constants.COUNTER_DISKUSAGE, self.get_max_disk_usage())
+ return img
+
+ def add_mapping_to_existing_reusable_image(self, pnode_id, diskimage_id, lease_id, vnode, timeout):
+ self.get_node(pnode_id).add_mapping_to_existing_reusable_image(diskimage_id, lease_id, vnode, timeout)
+
+ def remove_diskimage(self, pnode_id, lease, vnode):
+ ResourcePool.remove_diskimage(self, pnode_id, lease, vnode)
+ self.logger.debug("Removing cached images for L%iV%i in node %i" % (lease, vnode, pnode_id))
+ for img in self.get_node(pnode_id).get_reusable_images():
+ if (lease, vnode) in img.mappings:
+ img.mappings.remove((lease, vnode))
+ self.get_node(pnode_id).print_files()
+ # Keep image around, even if it isn't going to be used
+ # by any VMs. It might be reused later on.
+ # It will be purged if space has to be made available
+ # for other images
+
+ def get_nodes_with_reusable_image(self, diskimage_id, after = None):
+ return [n.nod_id for n in self.nodes if n.exists_reusable_image(diskimage_id, after=after)]
+
+ def exists_reusable_image(self, pnode_id, diskimage_id, after):
+ return self.get_node(pnode_id).exists_reusable_image(diskimage_id, after = after)
+
+
+class NodeWithReusableImages(Node):
+ def __init__(self, nod_id, hostname, capacity):
+ Node.__init__(self, nod_id, hostname, capacity)
+ self.reusable_images = []
+
+ @classmethod
+ def from_node(cls, n):
+ node = cls(n.nod_id, n.hostname, n.capacity)
+ node.enactment_info = n.enactment_info
+ return node
+
+ def add_reusable_image(self, f):
+ self.reusable_images.append(f)
+
+ def add_mapping_to_existing_reusable_image(self, diskimage_id, lease_id, vnode, timeout):
+ for f in self.reusable_images:
+ if f.diskimage_id == diskimage_id:
+ f.add_mapping(lease_id, vnode)
+ f.update_timeout(timeout)
+ break # Ugh
+ self.print_files()
+
+ def get_reusable_image(self, diskimage_id, after = None, lease_id=None, vnode=None):
+ images = [i for i in self.reusable_images if i.diskimage_id == diskimage_id]
+ if after != None:
+ images = [i for i in images if i.timeout >= after]
+ if lease_id != None and vnode != None:
+ images = [i for i in images if i.has_mapping(lease_id, vnode)]
+ if len(images)>0:
+ return images[0]
+ else:
+ return None
+
+ def exists_reusable_image(self, imagefile, after = None, lease_id=None, vnode=None):
+ entry = self.get_reusable_image(imagefile, after = after, lease_id=lease_id, vnode=vnode)
+ if entry == None:
+ return False
+ else:
+ return True
+
+ def get_reusable_images(self):
+ return self.reusable_images
+
+ def get_reusable_images_size(self):
+ return sum([f.filesize for f in self.reusable_images])
+
+ def purge_oldest_unused_image(self):
+ unused = [img for img in self.reusable_images if not img.has_mappings()]
+ if len(unused) == 0:
+ return 0
+ else:
+ i = iter(unused)
+ oldest = i.next()
+ for img in i:
+ if img.timeout < oldest.timeout:
+ oldest = img
+ self.reusable_images.remove(oldest)
+ return 1
+
+ def purge_downto(self, target):
+ done = False
+ while not done:
+ removed = self.purge_oldest_unused_image()
+ if removed==0:
+ done = True
+ success = False
+ elif removed == 1:
+ if self.get_reusable_images_size() <= target:
+ done = True
+ success = True
+ return success
+
+ def print_files(self):
+ Node.print_files(self)
+ images = ""
+ if len(self.reusable_images) > 0:
+ images = ", ".join([str(img) for img in self.reusable_images])
+ self.logger.vdebug("Node %i reusable images: %iMB %s" % (self.nod_id, self.get_reusable_images_size(), images))
+
+class ReusableDiskImageFile(File):
+ def __init__(self, filename, filesize, diskimage_id, timeout):
+ File.__init__(self, filename, filesize)
+ self.diskimage_id = diskimage_id
+ self.mappings = set([])
+ self.timeout = timeout
+
+ def add_mapping(self, lease_id, vnode):
+ self.mappings.add((lease_id, vnode))
+
+ def has_mapping(self, lease_id, vnode):
+ return (lease_id, vnode) in self.mappings
+
+ def has_mappings(self):
+ return len(self.mappings) > 0
+
+ def update_timeout(self, timeout):
+ if timeout > self.timeout:
+ self.timeout = timeout
+
+ def is_expired(self, curTime):
+ if self.timeout == None:
+ return False
+ elif self.timeout > curTime:
+ return True
+ else:
+ return False
+
+ def __str__(self):
+ if self.timeout == None:
+ timeout = "NOTIMEOUT"
+ else:
+ timeout = self.timeout
+ return "(REUSABLE %s %s %s %s)" % (vnodemapstr(self.mappings), self.diskimage_id, str(timeout), self.filename)
+
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py
===================================================================
--- trunk/src/haizea/resourcemanager/rm.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,802 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-"""The rm (resource manager) module is the root of Haizea. If you want to
-see where the ball starts rolling, look at the following two functions:
-
-* rm.ResourceManager.__init__()
-* rm.ResourceManager.start()
-
-This module provides the following classes:
-
-* ResourceManager: The resource manager itself. Pretty much everything else
- is contained in this class.
-* Clock: A base class for the resource manager's clock.
-* SimulatedClock: A clock for simulations.
-* RealClock: A clock that advances in realtime.
-"""
-
-import haizea.resourcemanager.accounting as accounting
-import haizea.common.constants as constants
-import haizea.resourcemanager.enact as enact
-from haizea.resourcemanager.deployment.unmanaged import UnmanagedDeploymentScheduler
-from haizea.resourcemanager.deployment.imagetransfer import ImageTransferDeploymentScheduler
-from haizea.resourcemanager.enact.opennebula import OpenNebulaResourcePoolInfo, OpenNebulaVMEnactment
-from haizea.resourcemanager.enact.simulated import SimulatedResourcePoolInfo, SimulatedVMEnactment, SimulatedDeploymentEnactment
-from haizea.resourcemanager.frontends.tracefile import TracefileFrontend
-from haizea.resourcemanager.frontends.opennebula import OpenNebulaFrontend
-from haizea.resourcemanager.frontends.rpc import RPCFrontend
-from haizea.resourcemanager.datastruct import Lease, ARLease, BestEffortLease, ImmediateLease, ResourceTuple
-from haizea.resourcemanager.scheduler import Scheduler
-from haizea.resourcemanager.slottable import SlotTable
-from haizea.resourcemanager.resourcepool import ResourcePool, ResourcePoolWithReusableImages
-from haizea.resourcemanager.rpcserver import RPCServer
-from haizea.common.utils import abstract, round_datetime, Singleton
-
-import operator
-import logging
-import signal
-import sys, os
-from time import sleep
-from math import ceil
-from mx.DateTime import now, TimeDelta
-
-DAEMON_STDOUT = DAEMON_STDIN = "/dev/null"
-DAEMON_STDERR = "/var/tmp/haizea.err"
-DEFAULT_LOGFILE = "/var/tmp/haizea.log"
-
-class ResourceManager(Singleton):
- """The resource manager
-
- This class is the root of Haizea. Pretty much everything else (scheduler,
- enactment modules, etc.) is contained in this class. The ResourceManager
- class is meant to be a singleton.
-
- """
-
- def __init__(self, config, daemon=False, pidfile=None):
- """Initializes the resource manager.
-
- Argument:
- config -- a populated instance of haizea.common.config.RMConfig
- daemon -- True if Haizea must run as a daemon, False if it must
- run in the foreground
- pidfile -- When running as a daemon, file to save pid to
- """
- self.config = config
-
- # Create the RM components
-
- mode = config.get("mode")
- clock = config.get("clock")
-
- self.daemon = daemon
- self.pidfile = pidfile
-
- if mode == "simulated":
- self.init_simulated_mode()
- elif mode == "opennebula":
- self.init_opennebula_mode()
-
- # Statistics collection
- self.accounting = accounting.AccountingDataCollection(self, self.config.get("datafile"))
-
- self.logger = logging.getLogger("RM")
-
- def init_simulated_mode(self):
- """Initializes the resource manager in simulated mode
-
- """
-
- # Simulations always run in the foreground
- self.daemon = False
-
- self.init_logging()
-
- # The clock
- if self.clock == constants.CLOCK_SIMULATED:
- starttime = self.config.get("starttime")
- self.clock = SimulatedClock(self, starttime)
- self.rpc_server = None
- elif self.clock == constants.CLOCK_REAL:
- wakeup_interval = self.config.get("wakeup-interval")
- non_sched = self.config.get("non-schedulable-interval")
- self.clock = RealClock(self, wakeup_interval, non_sched)
- self.rpc_server = RPCServer(self)
-
- # Enactment modules
- info_enact = SimulatedResourcePoolInfo()
- vm_enact = SimulatedVMEnactment()
- deploy_enact = SimulatedDeploymentEnactment()
-
- # Resource pool
- if self.config.get("diskimage-reuse") == constants.REUSE_IMAGECACHES:
- resourcepool = ResourcePoolWithReusableImages(info_enact, vm_enact, deploy_enact)
- else:
- resourcepool = ResourcePool(info_enact, vm_enact, deploy_enact)
-
- # Slot table
- slottable = SlotTable()
-
- # Deployment scheduler
- deploy_type = self.config.get("lease-preparation")
- if deploy_type == constants.DEPLOYMENT_UNMANAGED:
- deployment_scheduler = UnmanagedDeploymentScheduler(slottable, resourcepool, deploy_enact)
- elif deploy_type == constants.DEPLOYMENT_TRANSFER:
- deployment_scheduler = ImageTransferDeploymentScheduler(slottable, resourcepool, deploy_enact)
-
- # Scheduler
- self.scheduler = Scheduler(self, slottable, resourcepool, deployment_scheduler)
-
- # TODO: Having the slot table contained in the deployment scheduler, and also
- # in the "main" scheduler (which itself contains the same slot table) is far
- # from ideal, although this is mostly a consequence of the Scheduler class
- # being in need of some serious refactoring. This will be fixed (see Scheduler
- # class comments for more details)
-
- # Lease request frontends
- if self.clock == constants.CLOCK_SIMULATED:
- # In pure simulation, we can only use the tracefile frontend
- self.frontends = [TracefileFrontend(self, self.clock.get_start_time())]
- elif self.clock == constants.CLOCK_REAL:
- # In simulation with a real clock, only the RPC frontend can be used
- self.frontends = [RPCFrontend(self)]
-
- def init_opennebula_mode(self):
- """Initializes the resource manager in OpenNebula mode
-
- """
- self.init_logging()
-
- # The clock
- wakeup_interval = self.config.get("wakeup-interval")
- non_sched = self.config.get("non-schedulable-interval")
- dry_run = self.config.get("dry-run")
- fastforward = dry_run
- self.clock = RealClock(self, wakeup_interval, non_sched, fastforward)
-
- # RPC server
- if dry_run:
- # No need for an RPC server when doing a dry run
- self.rpc_server = None
- else:
- self.rpc_server = RPCServer(self)
-
- # Enactment modules
- info_enact = OpenNebulaResourcePoolInfo()
- vm_enact = OpenNebulaVMEnactment()
- # No deployment in OpenNebula. Using simulated one for now.
- deploy_enact = SimulatedDeploymentEnactment()
-
- # Slot table
- slottable = SlotTable()
-
- # Resource pool
- resourcepool = ResourcePool(info_enact, vm_enact, deploy_enact)
-
- # Deployment module
- deployment = UnmanagedDeploymentScheduler(slottable, resourcepool, deploy_enact)
-
- # Scheduler
- self.scheduler = Scheduler(slottable, resourcepool, deployment)
-
- # TODO: Having the slot table contained in the deployment scheduler, and also
- # in the "main" scheduler (which itself contains the same slot table) is far
- # from ideal, although this is mostly a consequence of the Scheduler class
- # being in need of some serious refactoring. This will be fixed (see Scheduler
- # class comments for more details)
-
- # Lease request frontends
- self.frontends = [OpenNebulaFrontend(self)]
-
-
- def init_logging(self):
- """Initializes logging
-
- """
-
- from haizea.resourcemanager.log import HaizeaLogger
- logger = logging.getLogger("")
- if self.daemon:
- handler = logging.FileHandler(self.config.get("logfile"))
- else:
- handler = logging.StreamHandler()
- formatter = logging.Formatter('[%(haizeatime)s] %(name)-7s %(message)s')
- handler.setFormatter(formatter)
- logger.addHandler(handler)
- level = logging.getLevelName(self.config.get("loglevel"))
- logger.setLevel(level)
- logging.setLoggerClass(HaizeaLogger)
-
-
- def daemonize(self):
- """Daemonizes the Haizea process.
-
- Based on code in: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
-
- """
- # First fork
- try:
- pid = os.fork()
- if pid > 0:
- # Exit first parent
- sys.exit(0)
- except OSError, e:
- sys.stderr.write("Failed to daemonize Haizea: (%d) %s\n" % (e.errno, e.strerror))
- sys.exit(1)
-
- # Decouple from parent environment.
- os.chdir(".")
- os.umask(0)
- os.setsid()
-
- # Second fork
- try:
- pid = os.fork()
- if pid > 0:
- # Exit second parent.
- sys.exit(0)
- except OSError, e:
- sys.stderr.write("Failed to daemonize Haizea: (%d) %s\n" % (e.errno, e.strerror))
- sys.exit(2)
-
- # Open file descriptors and print start message
- si = file(DAEMON_STDIN, 'r')
- so = file(DAEMON_STDOUT, 'a+')
- se = file(DAEMON_STDERR, 'a+', 0)
- pid = os.getpid()
- sys.stderr.write("\nStarted Haizea daemon with pid %i\n\n" % pid)
- sys.stderr.flush()
- file(self.pidfile,'w+').write("%i\n" % pid)
-
- # Redirect standard file descriptors.
- os.close(sys.stdin.fileno())
- os.close(sys.stdout.fileno())
- os.close(sys.stderr.fileno())
- os.dup2(si.fileno(), sys.stdin.fileno())
- os.dup2(so.fileno(), sys.stdout.fileno())
- os.dup2(se.fileno(), sys.stderr.fileno())
-
- def start(self):
- """Starts the resource manager"""
- self.logger.info("Starting resource manager")
-
- # Create counters to keep track of interesting data.
- self.accounting.create_counter(constants.COUNTER_ARACCEPTED, constants.AVERAGE_NONE)
- self.accounting.create_counter(constants.COUNTER_ARREJECTED, constants.AVERAGE_NONE)
- self.accounting.create_counter(constants.COUNTER_IMACCEPTED, constants.AVERAGE_NONE)
- self.accounting.create_counter(constants.COUNTER_IMREJECTED, constants.AVERAGE_NONE)
- self.accounting.create_counter(constants.COUNTER_BESTEFFORTCOMPLETED, constants.AVERAGE_NONE)
- self.accounting.create_counter(constants.COUNTER_QUEUESIZE, constants.AVERAGE_TIMEWEIGHTED)
- self.accounting.create_counter(constants.COUNTER_DISKUSAGE, constants.AVERAGE_NONE)
- self.accounting.create_counter(constants.COUNTER_CPUUTILIZATION, constants.AVERAGE_TIMEWEIGHTED)
-
- if self.daemon:
- self.daemonize()
- if self.rpc_server:
- self.rpc_server.start()
- # Start the clock
- self.clock.run()
-
- def stop(self):
- """Stops the resource manager"""
-
- self.logger.status("Stopping resource manager")
-
- # Stop collecting data (this finalizes counters)
- self.accounting.stop()
-
- # TODO: When gracefully stopping mid-scheduling, we need to figure out what to
- # do with leases that are still running.
-
- self.logger.status(" Completed best-effort leases: %i" % self.accounting.data.counters[constants.COUNTER_BESTEFFORTCOMPLETED])
- self.logger.status(" Accepted AR leases: %i" % self.accounting.data.counters[constants.COUNTER_ARACCEPTED])
- self.logger.status(" Rejected AR leases: %i" % self.accounting.data.counters[constants.COUNTER_ARREJECTED])
-
- # In debug mode, dump the lease descriptors.
- for lease in self.scheduler.completedleases.entries.values():
- lease.print_contents()
-
- # Write all collected data to disk
- self.accounting.save_to_disk()
-
- def process_requests(self, nexttime):
- """Process any new requests in the request frontend
-
- Checks the request frontend to see if there are any new requests that
- have to be processed. AR leases are sent directly to the schedule.
- Best-effort leases are queued.
-
- Arguments:
- nexttime -- The next time at which the scheduler can allocate resources.
- This is meant to be provided by the clock simply as a sanity
- measure when running in real time (to avoid scheduling something
- "now" to actually have "now" be in the past once the scheduling
- function returns. i.e., nexttime has nothing to do with whether
- there are resources available at that time or not.
-
- """
-
- # Get requests from frontend
- requests = []
- for frontend in self.frontends:
- requests += frontend.get_accumulated_requests()
- requests.sort(key=operator.attrgetter("submit_time"))
-
- for req in requests:
- self.scheduler.request_lease(req)
-
- # Run the scheduling function.
- try:
- self.scheduler.schedule(nexttime)
- except Exception, msg:
- # Exit if something goes horribly wrong
- self.logger.error("Exception in scheduling function. Dumping state..." )
- self.print_stats(logging.getLevelName("ERROR"), verbose=True)
- raise
-
- def process_reservations(self, time):
- """Process reservations starting/stopping at specified time"""
-
- # The scheduler takes care of this.
- try:
- self.scheduler.process_reservations(time)
- except Exception, msg:
- # Exit if something goes horribly wrong
- self.logger.error("Exception when processing reservations. Dumping state..." )
- self.print_stats(logging.getLevelName("ERROR"), verbose=True)
- raise
-
-
- def print_stats(self, loglevel, verbose=False):
- """Print some basic statistics in the log
-
- Arguments:
- loglevel -- log level at which to print stats
- verbose -- if True, will print the lease descriptor of all the scheduled
- and queued leases.
- """
-
- # Print clock stats and the next changepoint in slot table
- self.clock.print_stats(loglevel)
- self.logger.log(loglevel, "Next change point (in slot table): %s" % self.get_next_changepoint())
-
- # Print descriptors of scheduled leases
- scheduled = self.scheduler.leases.entries.keys()
- self.logger.log(loglevel, "Scheduled requests: %i" % len(scheduled))
- if verbose and len(scheduled)>0:
- self.logger.log(loglevel, "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")
- for k in scheduled:
- lease = self.scheduler.leases.get_lease(k)
- lease.print_contents(loglevel=loglevel)
- self.logger.log(loglevel, "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
-
- # Print queue size and descriptors of queued leases
- self.logger.log(loglevel, "Queue size: %i" % self.scheduler.queue.length())
- if verbose and self.scheduler.queue.length()>0:
- self.logger.log(loglevel, "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")
- for lease in self.scheduler.queue:
- lease.print_contents(loglevel=loglevel)
- self.logger.log(loglevel, "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
-
- def get_next_changepoint(self):
- """Return next changepoint in the slot table"""
- return self.scheduler.slottable.peekNextChangePoint(self.clock.get_time())
-
- def exists_leases_in_rm(self):
- """Return True if there are any leases still "in the system" """
- return self.scheduler.exists_scheduled_leases() or not self.scheduler.is_queue_empty()
-
- # TODO: Add more events. This is pending on actually getting interesting
- # events in OpenNebula 1.2. For now, the only event is a prematurely
- # ending VM.
- def notify_event(self, lease_id, event):
- try:
- self.scheduler.notify_event(lease_id, event)
- except Exception, msg:
- # Exit if something goes horribly wrong
- self.logger.error("Exception when notifying an event for lease %i. Dumping state..." % lease_id )
- self.print_stats(logging.getLevelName("ERROR"), verbose=True)
- raise
-
- def cancel_lease(self, lease_id):
- """Cancels a lease.
-
- Arguments:
- lease -- Lease to cancel
- """
- try:
- self.scheduler.cancel_lease(lease_id)
- except Exception, msg:
- # Exit if something goes horribly wrong
- self.logger.error("Exception when canceling lease %i. Dumping state..." % lease_id)
- self.print_stats(logging.getLevelName("ERROR"), verbose=True)
- raise
-
-
-class Clock(object):
- """Base class for the resource manager's clock.
-
- The clock is in charge of periodically waking the resource manager so it
- will process new requests and handle existing reservations. This is a
- base class defining abstract methods.
-
- """
- def __init__(self, rm):
- self.rm = rm
-
- def get_time(self):
- """Return the current time"""
- return abstract()
-
- def get_start_time(self):
- """Return the time at which the clock started ticking"""
- return abstract()
-
- def get_next_schedulable_time(self):
- """Return the next time at which resources could be scheduled.
-
- The "next schedulable time" server sanity measure when running
- in real time (to avoid scheduling something "now" to actually
- have "now" be in the past once the scheduling function returns.
- i.e., the "next schedulable time" has nothing to do with whether
- there are resources available at that time or not.
- """
- return abstract()
-
- def run(self):
- """Start and run the clock. This function is, in effect,
- the main loop of the resource manager."""
- return abstract()
-
- def print_stats(self, loglevel):
- """Print some basic statistics about the clock on the log
-
- Arguments:
- loglevel -- log level at which statistics should be printed.
- """
- return abstract()
-
-
-class SimulatedClock(Clock):
- """Simulates the passage of time... really fast.
-
- The simulated clock steps through time to produce an ideal schedule.
- See the run() function for a description of how time is incremented
- exactly in the simulated clock.
-
- """
-
- def __init__(self, rm, starttime):
- """Initialize the simulated clock, starting at the provided starttime"""
- Clock.__init__(self, rm)
- self.starttime = starttime
- self.time = starttime
- self.logger = logging.getLogger("CLOCK")
- self.statusinterval = self.rm.config.get("status-message-interval")
-
- def get_time(self):
- """See docstring in base Clock class."""
- return self.time
-
- def get_start_time(self):
- """See docstring in base Clock class."""
- return self.starttime
-
- def get_next_schedulable_time(self):
- """See docstring in base Clock class."""
- return self.time
-
- def run(self):
- """Runs the simulated clock through time.
-
- The clock starts at the provided start time. At each point in time,
- it wakes up the resource manager and then skips to the next time
- where "something" is happening (see __get_next_time for a more
- rigorous description of this).
-
- The clock stops when there is nothing left to do (no pending or
- queue requests, and no future reservations)
-
- The simulated clock can only work in conjunction with the
- tracefile request frontend.
- """
- self.logger.status("Starting simulated clock")
- self.rm.accounting.start(self.get_start_time())
- prevstatustime = self.time
- done = False
- # Main loop
- while not done:
- # Check to see if there are any leases which are ending prematurely.
- # Note that this is unique to simulation.
- prematureends = self.rm.scheduler.slottable.getPrematurelyEndingRes(self.time)
-
- # Notify the resource manager about the premature ends
- for rr in prematureends:
- self.rm.notify_event(rr.lease.id, constants.EVENT_END_VM)
-
- # Process reservations starting/stopping at the current time and
- # check if there are any new requests.
- self.rm.process_reservations(self.time)
- self.rm.process_requests(self.time)
-
- # Since processing requests may have resulted in new reservations
- # starting now, we process reservations again.
- self.rm.process_reservations(self.time)
-
- # Print a status message
- if self.statusinterval != None and (self.time - prevstatustime).minutes >= self.statusinterval:
- self.__print_status()
- prevstatustime = self.time
-
- # Skip to next point in time.
- self.time, done = self.__get_next_time()
-
- # Stop the resource manager
- self.logger.status("Stopping simulated clock")
- self.rm.stop()
-
- def print_stats(self, loglevel):
- """See docstring in base Clock class."""
- pass
-
- def __print_status(self):
- """Prints status summary."""
- self.logger.status("STATUS ---Begin---")
- self.logger.status("STATUS Completed best-effort leases: %i" % self.rm.accounting.data.counters[constants.COUNTER_BESTEFFORTCOMPLETED])
- self.logger.status("STATUS Queue size: %i" % self.rm.accounting.data.counters[constants.COUNTER_QUEUESIZE])
- self.logger.status("STATUS Best-effort reservations: %i" % self.rm.scheduler.numbesteffortres)
- self.logger.status("STATUS Accepted AR leases: %i" % self.rm.accounting.data.counters[constants.COUNTER_ARACCEPTED])
- self.logger.status("STATUS Rejected AR leases: %i" % self.rm.accounting.data.counters[constants.COUNTER_ARREJECTED])
- self.logger.status("STATUS ----End----")
-
- def __get_next_time(self):
- """Determines what is the next point in time to skip to.
-
- At a given point in time, the next time is the earliest of the following:
- * The arrival of the next lease request
- * The start or end of a reservation (a "changepoint" in the slot table)
- * A premature end of a lease
- """
- done = False
-
- # Determine candidate next times
- tracefrontend = self.__get_trace_frontend()
- nextchangepoint = self.rm.get_next_changepoint()
- nextprematureend = self.rm.scheduler.slottable.getNextPrematureEnd(self.time)
- nextreqtime = tracefrontend.get_next_request_time()
- self.logger.debug("Next change point (in slot table): %s" % nextchangepoint)
- self.logger.debug("Next request time: %s" % nextreqtime)
- self.logger.debug("Next premature end: %s" % nextprematureend)
-
- # The previous time is now
- prevtime = self.time
-
- # We initialize the next time to now too, to detect if
- # we've been unable to determine what the next time is.
- newtime = self.time
-
- # Find the earliest of the three, accounting for None values
- if nextchangepoint != None and nextreqtime == None:
- newtime = nextchangepoint
- elif nextchangepoint == None and nextreqtime != None:
- newtime = nextreqtime
- elif nextchangepoint != None and nextreqtime != None:
- newtime = min(nextchangepoint, nextreqtime)
-
- if nextprematureend != None:
- newtime = min(nextprematureend, newtime)
-
- if nextchangepoint == newtime:
- # Note that, above, we just "peeked" the next changepoint in the slottable.
- # If it turns out we're skipping to that point in time, then we need to
- # "get" it (this is because changepoints in the slottable are cached to
- # minimize access to the slottable. This optimization turned out to
- # be more trouble than it's worth and will probably be removed sometime
- # soon.
- newtime = self.rm.scheduler.slottable.getNextChangePoint(newtime)
-
- # If there's no more leases in the system, and no more pending requests,
- # then we're done.
- if not self.rm.exists_leases_in_rm() and not tracefrontend.exists_more_requests():
- done = True
-
- # We can also be done if we've specified that we want to stop when
- # the best-effort requests are all done or when they've all been submitted.
- stopwhen = self.rm.config.get("stop-when")
- besteffort = self.rm.scheduler.leases.get_leases(type = BestEffortLease)
- pendingbesteffort = [r for r in tracefrontend.requests if isinstance(r, BestEffortLease)]
- if stopwhen == constants.STOPWHEN_BEDONE:
- if self.rm.scheduler.isQueueEmpty() and len(besteffort) + len(pendingbesteffort) == 0:
- done = True
- elif stopwhen == constants.STOPWHEN_BESUBMITTED:
- if len(pendingbesteffort) == 0:
- done = True
-
- # If we didn't arrive at a new time, and we're not done, we've fallen into
- # an infinite loop. This is A Bad Thing(tm).
- if newtime == prevtime and done != True:
- self.logger.error("Simulated clock has fallen into an infinite loop. Dumping state..." )
- self.rm.print_stats(logging.getLevelName("ERROR"), verbose=True)
- raise Exception, "Simulated clock has fallen into an infinite loop."
-
- return newtime, done
-
- def __get_trace_frontend(self):
- """Gets the tracefile frontend from the resource manager"""
- frontends = self.rm.frontends
- tracef = [f for f in frontends if isinstance(f, TracefileFrontend)]
- if len(tracef) != 1:
- raise Exception, "The simulated clock can only work with a tracefile request frontend."
- else:
- return tracef[0]
-
-
-class RealClock(Clock):
- """A realtime clock.
-
- The real clock wakes up periodically to, in turn, tell the resource manager
- to wake up. The real clock can also be run in a "fastforward" mode for
- debugging purposes (however, unlike the simulated clock, the clock will
- always skip a fixed amount of time into the future).
- """
- def __init__(self, rm, quantum, non_sched, fastforward = False):
- """Initializes the real clock.
-
- Arguments:
- rm -- the resource manager
- quantum -- interval between clock wakeups
- fastforward -- if True, the clock won't actually sleep
- for the duration of the quantum."""
- Clock.__init__(self, rm)
- self.fastforward = fastforward
- if not self.fastforward:
- self.lastwakeup = None
- else:
- self.lastwakeup = round_datetime(now())
- self.logger = logging.getLogger("CLOCK")
- self.starttime = self.get_time()
- self.nextschedulable = None
- self.nextperiodicwakeup = None
- self.quantum = TimeDelta(seconds=quantum)
- self.non_sched = TimeDelta(seconds=non_sched)
-
- def get_time(self):
- """See docstring in base Clock class."""
- if not self.fastforward:
- return now()
- else:
- return self.lastwakeup
-
- def get_start_time(self):
- """See docstring in base Clock class."""
- return self.starttime
-
- def get_next_schedulable_time(self):
- """See docstring in base Clock class."""
- return self.nextschedulable
-
- def run(self):
- """Runs the real clock through time.
-
- The clock starts when run() is called. In each iteration of the main loop
- it will do the following:
- - Wake up the resource manager
- - Determine if there will be anything to do before the next
- time the clock will wake up (after the quantum has passed). Note
- that this information is readily available on the slot table.
- If so, set next-wakeup-time to (now + time until slot table
- event). Otherwise, set it to (now + quantum)
- - Sleep until next-wake-up-time
-
- The clock keeps on tickin' until a SIGINT signal (Ctrl-C if running in the
- foreground) or a SIGTERM signal is received.
- """
- self.logger.status("Starting clock")
- self.rm.accounting.start(self.get_start_time())
-
- signal.signal(signal.SIGINT, self.signalhandler_gracefulstop)
- signal.signal(signal.SIGTERM, self.signalhandler_gracefulstop)
-
- done = False
- # Main loop
- while not done:
- self.logger.status("Waking up to manage resources")
-
- # Save the waking time. We want to use a consistent time in the
- # resource manager operations (if we use now(), we'll get a different
- # time every time)
- if not self.fastforward:
- self.lastwakeup = round_datetime(self.get_time())
- self.logger.status("Wake-up time recorded as %s" % self.lastwakeup)
-
- # Next schedulable time
- self.nextschedulable = round_datetime(self.lastwakeup + self.non_sched)
-
- # Wake up the resource manager
- self.rm.process_reservations(self.lastwakeup)
- # TODO: Compute nextschedulable here, before processing requests
- self.rm.process_requests(self.nextschedulable)
-
- # Next wakeup time
- time_now = now()
- if self.lastwakeup + self.quantum <= time_now:
- quantums = (time_now - self.lastwakeup) / self.quantum
- quantums = int(ceil(quantums)) * self.quantum
- self.nextperiodicwakeup = round_datetime(self.lastwakeup + quantums)
- else:
- self.nextperiodicwakeup = round_datetime(self.lastwakeup + self.quantum)
-
- # Determine if there's anything to do before the next wakeup time
- nextchangepoint = self.rm.get_next_changepoint()
- if nextchangepoint != None and nextchangepoint <= self.nextperiodicwakeup:
- # We need to wake up earlier to handle a slot table event
- nextwakeup = nextchangepoint
- self.rm.scheduler.slottable.getNextChangePoint(self.lastwakeup)
- self.logger.status("Going back to sleep. Waking up at %s to handle slot table event." % nextwakeup)
- else:
- # Nothing to do before waking up
- nextwakeup = self.nextperiodicwakeup
- self.logger.status("Going back to sleep. Waking up at %s to see if something interesting has happened by then." % nextwakeup)
-
- # The only exit condition from the real clock is if the stop_when_no_more_leases
- # is set to True, and there's no more work left to do.
- stop_when_no_more_leases = self.rm.config.get("stop-when-no-more-leases")
- if stop_when_no_more_leases and not self.rm.exists_leases_in_rm():
- done = True
-
- # Sleep
- if not done:
- if not self.fastforward:
- sleep((nextwakeup - now()).seconds)
- else:
- self.lastwakeup = nextwakeup
-
- # Stop the resource manager
- self.logger.status("Stopping real clock")
- self.rm.stop()
-
- def print_stats(self, loglevel):
- """See docstring in base Clock class."""
- pass
-
- def signalhandler_gracefulstop(self, signum, frame):
- """Handler for SIGTERM and SIGINT. Allows Haizea to stop gracefully."""
- sigstr = ""
- if signum == signal.SIGTERM:
- sigstr = " (SIGTERM)"
- elif signum == signal.SIGINT:
- sigstr = " (SIGINT)"
- self.logger.status("Received signal %i%s" %(signum, sigstr))
- self.logger.status("Stopping gracefully...")
- self.rm.stop()
- sys.exit()
-
-if __name__ == "__main__":
- from haizea.resourcemanager.configfile import HaizeaConfig
- from haizea.common.config import ConfigException
- CONFIGFILE = "../../../etc/suspendresume.conf"
- try:
- CONFIG = HaizeaConfig.from_file(CONFIGFILE)
- except ConfigException, msg:
- print >> sys.stderr, "Error in configuration file:"
- print >> sys.stderr, msg
- exit(1)
- from haizea.resourcemanager.rm import ResourceManager
- RM = ResourceManager(CONFIG)
- RM.start()
\ No newline at end of file
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py (from rev 508, trunk/src/haizea/resourcemanager/rm.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rm.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,816 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+"""The rm (resource manager) module is the root of Haizea. If you want to
+see where the ball starts rolling, look at the following two functions:
+
+* rm.ResourceManager.__init__()
+* rm.ResourceManager.start()
+
+This module provides the following classes:
+
+* ResourceManager: The resource manager itself. Pretty much everything else
+ is contained in this class.
+* Clock: A base class for the resource manager's clock.
+* SimulatedClock: A clock for simulations.
+* RealClock: A clock that advances in realtime.
+"""
+
+import haizea.resourcemanager.accounting as accounting
+import haizea.common.constants as constants
+import haizea.resourcemanager.enact as enact
+from haizea.resourcemanager.deployment.unmanaged import UnmanagedDeploymentScheduler
+from haizea.resourcemanager.deployment.imagetransfer import ImageTransferDeploymentScheduler
+from haizea.resourcemanager.enact.opennebula import OpenNebulaResourcePoolInfo, OpenNebulaVMEnactment, OpenNebulaDummyDeploymentEnactment
+from haizea.resourcemanager.enact.simulated import SimulatedResourcePoolInfo, SimulatedVMEnactment, SimulatedDeploymentEnactment
+from haizea.resourcemanager.frontends.tracefile import TracefileFrontend
+from haizea.resourcemanager.frontends.opennebula import OpenNebulaFrontend
+from haizea.resourcemanager.frontends.rpc import RPCFrontend
+from haizea.resourcemanager.datastruct import Lease, ARLease, BestEffortLease, ImmediateLease, ResourceTuple
+from haizea.resourcemanager.scheduler import Scheduler
+from haizea.resourcemanager.slottable import SlotTable
+from haizea.resourcemanager.resourcepool import ResourcePool, ResourcePoolWithReusableImages
+from haizea.resourcemanager.rpcserver import RPCServer
+from haizea.common.utils import abstract, round_datetime, Singleton
+
+import operator
+import logging
+import signal
+import sys, os
+from time import sleep
+from math import ceil
+from mx.DateTime import now, TimeDelta
+
+DAEMON_STDOUT = DAEMON_STDIN = "/dev/null"
+DAEMON_STDERR = "/var/tmp/haizea.err"
+DEFAULT_LOGFILE = "/var/tmp/haizea.log"
+
+class ResourceManager(Singleton):
+ """The resource manager
+
+ This class is the root of Haizea. Pretty much everything else (scheduler,
+ enactment modules, etc.) is contained in this class. The ResourceManager
+ class is meant to be a singleton.
+
+ """
+
+ def __init__(self, config, daemon=False, pidfile=None):
+ """Initializes the resource manager.
+
+ Argument:
+ config -- a populated instance of haizea.common.config.RMConfig
+ daemon -- True if Haizea must run as a daemon, False if it must
+ run in the foreground
+ pidfile -- When running as a daemon, file to save pid to
+ """
+ self.config = config
+
+ # Create the RM components
+
+ mode = config.get("mode")
+
+ self.daemon = daemon
+ self.pidfile = pidfile
+
+ if mode == "simulated":
+ self.init_simulated_mode()
+ elif mode == "opennebula":
+ self.init_opennebula_mode()
+
+ # Statistics collection
+ self.accounting = accounting.AccountingDataCollection(self, self.config.get("datafile"))
+
+ self.logger = logging.getLogger("RM")
+
+ def init_simulated_mode(self):
+ """Initializes the resource manager in simulated mode
+
+ """
+
+ # Simulated-time simulations always run in the foreground
+ clock = self.config.get("clock")
+ if clock == constants.CLOCK_SIMULATED:
+ self.daemon = False
+
+ self.init_logging()
+
+ if clock == constants.CLOCK_SIMULATED:
+ starttime = self.config.get("starttime")
+ self.clock = SimulatedClock(self, starttime)
+ self.rpc_server = None
+ elif clock == constants.CLOCK_REAL:
+ wakeup_interval = self.config.get("wakeup-interval")
+ non_sched = self.config.get("non-schedulable-interval")
+ self.clock = RealClock(self, wakeup_interval, non_sched)
+ self.rpc_server = RPCServer(self)
+
+ # Enactment modules
+ info_enact = SimulatedResourcePoolInfo()
+ vm_enact = SimulatedVMEnactment()
+ deploy_enact = SimulatedDeploymentEnactment()
+
+ # Resource pool
+ deploy_type = self.config.get("lease-preparation")
+ if deploy_type == constants.DEPLOYMENT_TRANSFER:
+ if self.config.get("diskimage-reuse") == constants.REUSE_IMAGECACHES:
+ resourcepool = ResourcePoolWithReusableImages(info_enact, vm_enact, deploy_enact)
+ else:
+ resourcepool = ResourcePool(info_enact, vm_enact, deploy_enact)
+ else:
+ resourcepool = ResourcePool(info_enact, vm_enact, deploy_enact)
+
+ # Slot table
+ slottable = SlotTable()
+
+ # Deployment scheduler
+
+ if deploy_type == constants.DEPLOYMENT_UNMANAGED:
+ deployment_scheduler = UnmanagedDeploymentScheduler(slottable, resourcepool, deploy_enact)
+ elif deploy_type == constants.DEPLOYMENT_TRANSFER:
+ deployment_scheduler = ImageTransferDeploymentScheduler(slottable, resourcepool, deploy_enact)
+
+ # Scheduler
+ self.scheduler = Scheduler(slottable, resourcepool, deployment_scheduler)
+
+ # TODO: Having the slot table contained in the deployment scheduler, and also
+ # in the "main" scheduler (which itself contains the same slot table) is far
+ # from ideal, although this is mostly a consequence of the Scheduler class
+ # being in need of some serious refactoring. This will be fixed (see Scheduler
+ # class comments for more details)
+
+ # Lease request frontends
+ if clock == constants.CLOCK_SIMULATED:
+ # In pure simulation, we can only use the tracefile frontend
+ self.frontends = [TracefileFrontend(self, self.clock.get_start_time())]
+ elif clock == constants.CLOCK_REAL:
+ # In simulation with a real clock, only the RPC frontend can be used
+ self.frontends = [RPCFrontend(self)]
+
+ def init_opennebula_mode(self):
+ """Initializes the resource manager in OpenNebula mode
+
+ """
+ self.init_logging()
+
+ # The clock
+ wakeup_interval = self.config.get("wakeup-interval")
+ non_sched = self.config.get("non-schedulable-interval")
+ dry_run = self.config.get("dry-run")
+ fastforward = dry_run
+ self.clock = RealClock(self, wakeup_interval, non_sched, fastforward)
+
+ # RPC server
+ if dry_run:
+ # No need for an RPC server when doing a dry run
+ self.rpc_server = None
+ else:
+ self.rpc_server = RPCServer(self)
+
+ # Enactment modules
+ info_enact = OpenNebulaResourcePoolInfo()
+ vm_enact = OpenNebulaVMEnactment()
+ # No deployment in OpenNebula. Using dummy one for now.
+ deploy_enact = OpenNebulaDummyDeploymentEnactment()
+
+ # Slot table
+ slottable = SlotTable()
+
+ # Resource pool
+ resourcepool = ResourcePool(info_enact, vm_enact, deploy_enact)
+
+ # Deployment module
+ deployment = UnmanagedDeploymentScheduler(slottable, resourcepool, deploy_enact)
+
+ # Scheduler
+ self.scheduler = Scheduler(slottable, resourcepool, deployment)
+
+ # TODO: Having the slot table contained in the deployment scheduler, and also
+ # in the "main" scheduler (which itself contains the same slot table) is far
+ # from ideal, although this is mostly a consequence of the Scheduler class
+ # being in need of some serious refactoring. This will be fixed (see Scheduler
+ # class comments for more details)
+
+ # Lease request frontends
+ self.frontends = [OpenNebulaFrontend(self)]
+
+
+ def init_logging(self):
+ """Initializes logging
+
+ """
+
+ from haizea.resourcemanager.log import HaizeaLogger
+ logger = logging.getLogger("")
+ if self.daemon:
+ handler = logging.FileHandler(self.config.get("logfile"))
+ else:
+ handler = logging.StreamHandler()
+ formatter = logging.Formatter('[%(haizeatime)s] %(name)-7s %(message)s')
+ handler.setFormatter(formatter)
+ logger.addHandler(handler)
+ level = logging.getLevelName(self.config.get("loglevel"))
+ logger.setLevel(level)
+ logging.setLoggerClass(HaizeaLogger)
+
+
+ def daemonize(self):
+ """Daemonizes the Haizea process.
+
+ Based on code in: http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/66012
+
+ """
+ # First fork
+ try:
+ pid = os.fork()
+ if pid > 0:
+ # Exit first parent
+ sys.exit(0)
+ except OSError, e:
+ sys.stderr.write("Failed to daemonize Haizea: (%d) %s\n" % (e.errno, e.strerror))
+ sys.exit(1)
+
+ # Decouple from parent environment.
+ os.chdir(".")
+ os.umask(0)
+ os.setsid()
+
+ # Second fork
+ try:
+ pid = os.fork()
+ if pid > 0:
+ # Exit second parent.
+ sys.exit(0)
+ except OSError, e:
+ sys.stderr.write("Failed to daemonize Haizea: (%d) %s\n" % (e.errno, e.strerror))
+ sys.exit(2)
+
+ # Open file descriptors and print start message
+ si = file(DAEMON_STDIN, 'r')
+ so = file(DAEMON_STDOUT, 'a+')
+ se = file(DAEMON_STDERR, 'a+', 0)
+ pid = os.getpid()
+ sys.stderr.write("\nStarted Haizea daemon with pid %i\n\n" % pid)
+ sys.stderr.flush()
+ file(self.pidfile,'w+').write("%i\n" % pid)
+
+ # Redirect standard file descriptors.
+ os.close(sys.stdin.fileno())
+ os.close(sys.stdout.fileno())
+ os.close(sys.stderr.fileno())
+ os.dup2(si.fileno(), sys.stdin.fileno())
+ os.dup2(so.fileno(), sys.stdout.fileno())
+ os.dup2(se.fileno(), sys.stderr.fileno())
+
+ def start(self):
+ """Starts the resource manager"""
+ self.logger.info("Starting resource manager")
+
+ # Create counters to keep track of interesting data.
+ self.accounting.create_counter(constants.COUNTER_ARACCEPTED, constants.AVERAGE_NONE)
+ self.accounting.create_counter(constants.COUNTER_ARREJECTED, constants.AVERAGE_NONE)
+ self.accounting.create_counter(constants.COUNTER_IMACCEPTED, constants.AVERAGE_NONE)
+ self.accounting.create_counter(constants.COUNTER_IMREJECTED, constants.AVERAGE_NONE)
+ self.accounting.create_counter(constants.COUNTER_BESTEFFORTCOMPLETED, constants.AVERAGE_NONE)
+ self.accounting.create_counter(constants.COUNTER_QUEUESIZE, constants.AVERAGE_TIMEWEIGHTED)
+ self.accounting.create_counter(constants.COUNTER_DISKUSAGE, constants.AVERAGE_NONE)
+ self.accounting.create_counter(constants.COUNTER_CPUUTILIZATION, constants.AVERAGE_TIMEWEIGHTED)
+
+ if self.daemon:
+ self.daemonize()
+ if self.rpc_server:
+ self.rpc_server.start()
+ # Start the clock
+ self.clock.run()
+
+ def stop(self):
+ """Stops the resource manager"""
+
+ self.logger.status("Stopping resource manager")
+
+ # Stop collecting data (this finalizes counters)
+ self.accounting.stop()
+
+ # TODO: When gracefully stopping mid-scheduling, we need to figure out what to
+ # do with leases that are still running.
+
+ self.logger.status(" Completed best-effort leases: %i" % self.accounting.data.counters[constants.COUNTER_BESTEFFORTCOMPLETED])
+ self.logger.status(" Accepted AR leases: %i" % self.accounting.data.counters[constants.COUNTER_ARACCEPTED])
+ self.logger.status(" Rejected AR leases: %i" % self.accounting.data.counters[constants.COUNTER_ARREJECTED])
+
+ # In debug mode, dump the lease descriptors.
+ for lease in self.scheduler.completedleases.entries.values():
+ lease.print_contents()
+
+ # Write all collected data to disk
+ self.accounting.save_to_disk()
+
+ # Stop RPC server
+ if self.rpc_server != None:
+ self.rpc_server.stop()
+
+ def process_requests(self, nexttime):
+ """Process any new requests in the request frontend
+
+ Checks the request frontend to see if there are any new requests that
+ have to be processed. AR leases are sent directly to the schedule.
+ Best-effort leases are queued.
+
+ Arguments:
+ nexttime -- The next time at which the scheduler can allocate resources.
+ This is meant to be provided by the clock simply as a sanity
+ measure when running in real time (to avoid scheduling something
+ "now" to actually have "now" be in the past once the scheduling
+ function returns. i.e., nexttime has nothing to do with whether
+ there are resources available at that time or not.
+
+ """
+
+ # Get requests from frontend
+ requests = []
+ for frontend in self.frontends:
+ requests += frontend.get_accumulated_requests()
+ requests.sort(key=operator.attrgetter("submit_time"))
+
+ for req in requests:
+ self.scheduler.request_lease(req)
+
+ # Run the scheduling function.
+ try:
+ self.scheduler.schedule(nexttime)
+ except Exception, msg:
+ # Exit if something goes horribly wrong
+ self.logger.error("Exception in scheduling function. Dumping state..." )
+ self.print_stats(logging.getLevelName("ERROR"), verbose=True)
+ raise
+
+ def process_reservations(self, time):
+ """Process reservations starting/stopping at specified time"""
+
+ # The scheduler takes care of this.
+ try:
+ self.scheduler.process_reservations(time)
+ except Exception, msg:
+ # Exit if something goes horribly wrong
+ self.logger.error("Exception when processing reservations. Dumping state..." )
+ self.print_stats(logging.getLevelName("ERROR"), verbose=True)
+ raise
+
+
+ def print_stats(self, loglevel, verbose=False):
+ """Print some basic statistics in the log
+
+ Arguments:
+ loglevel -- log level at which to print stats
+ verbose -- if True, will print the lease descriptor of all the scheduled
+ and queued leases.
+ """
+
+ # Print clock stats and the next changepoint in slot table
+ self.clock.print_stats(loglevel)
+ self.logger.log(loglevel, "Next change point (in slot table): %s" % self.get_next_changepoint())
+
+ # Print descriptors of scheduled leases
+ scheduled = self.scheduler.leases.entries.keys()
+ self.logger.log(loglevel, "Scheduled requests: %i" % len(scheduled))
+ if verbose and len(scheduled)>0:
+ self.logger.log(loglevel, "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")
+ for k in scheduled:
+ lease = self.scheduler.leases.get_lease(k)
+ lease.print_contents(loglevel=loglevel)
+ self.logger.log(loglevel, "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
+
+ # Print queue size and descriptors of queued leases
+ self.logger.log(loglevel, "Queue size: %i" % self.scheduler.queue.length())
+ if verbose and self.scheduler.queue.length()>0:
+ self.logger.log(loglevel, "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv")
+ for lease in self.scheduler.queue:
+ lease.print_contents(loglevel=loglevel)
+ self.logger.log(loglevel, "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^")
+
+ def get_next_changepoint(self):
+ """Return next changepoint in the slot table"""
+ return self.scheduler.slottable.peekNextChangePoint(self.clock.get_time())
+
+ def exists_leases_in_rm(self):
+ """Return True if there are any leases still "in the system" """
+ return self.scheduler.exists_scheduled_leases() or not self.scheduler.is_queue_empty()
+
+ # TODO: Add more events. This is pending on actually getting interesting
+ # events in OpenNebula 1.2. For now, the only event is a prematurely
+ # ending VM.
+ def notify_event(self, lease_id, event):
+ try:
+ self.scheduler.notify_event(lease_id, event)
+ except Exception, msg:
+ # Exit if something goes horribly wrong
+ self.logger.error("Exception when notifying an event for lease %i. Dumping state..." % lease_id )
+ self.print_stats(logging.getLevelName("ERROR"), verbose=True)
+ raise
+
+ def cancel_lease(self, lease_id):
+ """Cancels a lease.
+
+ Arguments:
+ lease -- Lease to cancel
+ """
+ try:
+ self.scheduler.cancel_lease(lease_id)
+ except Exception, msg:
+ # Exit if something goes horribly wrong
+ self.logger.error("Exception when canceling lease %i. Dumping state..." % lease_id)
+ self.print_stats(logging.getLevelName("ERROR"), verbose=True)
+ raise
+
+
+class Clock(object):
+ """Base class for the resource manager's clock.
+
+ The clock is in charge of periodically waking the resource manager so it
+ will process new requests and handle existing reservations. This is a
+ base class defining abstract methods.
+
+ """
+ def __init__(self, rm):
+ self.rm = rm
+
+ def get_time(self):
+ """Return the current time"""
+ return abstract()
+
+ def get_start_time(self):
+ """Return the time at which the clock started ticking"""
+ return abstract()
+
+ def get_next_schedulable_time(self):
+ """Return the next time at which resources could be scheduled.
+
+ The "next schedulable time" server sanity measure when running
+ in real time (to avoid scheduling something "now" to actually
+ have "now" be in the past once the scheduling function returns.
+ i.e., the "next schedulable time" has nothing to do with whether
+ there are resources available at that time or not.
+ """
+ return abstract()
+
+ def run(self):
+ """Start and run the clock. This function is, in effect,
+ the main loop of the resource manager."""
+ return abstract()
+
+ def print_stats(self, loglevel):
+ """Print some basic statistics about the clock on the log
+
+ Arguments:
+ loglevel -- log level at which statistics should be printed.
+ """
+ return abstract()
+
+
+class SimulatedClock(Clock):
+ """Simulates the passage of time... really fast.
+
+ The simulated clock steps through time to produce an ideal schedule.
+ See the run() function for a description of how time is incremented
+ exactly in the simulated clock.
+
+ """
+
+ def __init__(self, rm, starttime):
+ """Initialize the simulated clock, starting at the provided starttime"""
+ Clock.__init__(self, rm)
+ self.starttime = starttime
+ self.time = starttime
+ self.logger = logging.getLogger("CLOCK")
+ self.statusinterval = self.rm.config.get("status-message-interval")
+
+ def get_time(self):
+ """See docstring in base Clock class."""
+ return self.time
+
+ def get_start_time(self):
+ """See docstring in base Clock class."""
+ return self.starttime
+
+ def get_next_schedulable_time(self):
+ """See docstring in base Clock class."""
+ return self.time
+
+ def run(self):
+ """Runs the simulated clock through time.
+
+ The clock starts at the provided start time. At each point in time,
+ it wakes up the resource manager and then skips to the next time
+ where "something" is happening (see __get_next_time for a more
+ rigorous description of this).
+
+ The clock stops when there is nothing left to do (no pending or
+ queue requests, and no future reservations)
+
+ The simulated clock can only work in conjunction with the
+ tracefile request frontend.
+ """
+ self.logger.status("Starting simulated clock")
+ self.rm.accounting.start(self.get_start_time())
+ prevstatustime = self.time
+ done = False
+ # Main loop
+ while not done:
+ # Check to see if there are any leases which are ending prematurely.
+ # Note that this is unique to simulation.
+ prematureends = self.rm.scheduler.slottable.getPrematurelyEndingRes(self.time)
+
+ # Notify the resource manager about the premature ends
+ for rr in prematureends:
+ self.rm.notify_event(rr.lease.id, constants.EVENT_END_VM)
+
+ # Process reservations starting/stopping at the current time and
+ # check if there are any new requests.
+ self.rm.process_reservations(self.time)
+ self.rm.process_requests(self.time)
+
+ # Since processing requests may have resulted in new reservations
+ # starting now, we process reservations again.
+ self.rm.process_reservations(self.time)
+
+ # Print a status message
+ if self.statusinterval != None and (self.time - prevstatustime).minutes >= self.statusinterval:
+ self.__print_status()
+ prevstatustime = self.time
+
+ # Skip to next point in time.
+ self.time, done = self.__get_next_time()
+
+ # Stop the resource manager
+ self.logger.status("Stopping simulated clock")
+ self.rm.stop()
+
+ def print_stats(self, loglevel):
+ """See docstring in base Clock class."""
+ pass
+
+ def __print_status(self):
+ """Prints status summary."""
+ self.logger.status("STATUS ---Begin---")
+ self.logger.status("STATUS Completed best-effort leases: %i" % self.rm.accounting.data.counters[constants.COUNTER_BESTEFFORTCOMPLETED])
+ self.logger.status("STATUS Queue size: %i" % self.rm.accounting.data.counters[constants.COUNTER_QUEUESIZE])
+ self.logger.status("STATUS Best-effort reservations: %i" % self.rm.scheduler.numbesteffortres)
+ self.logger.status("STATUS Accepted AR leases: %i" % self.rm.accounting.data.counters[constants.COUNTER_ARACCEPTED])
+ self.logger.status("STATUS Rejected AR leases: %i" % self.rm.accounting.data.counters[constants.COUNTER_ARREJECTED])
+ self.logger.status("STATUS ----End----")
+
+ def __get_next_time(self):
+ """Determines what is the next point in time to skip to.
+
+ At a given point in time, the next time is the earliest of the following:
+ * The arrival of the next lease request
+ * The start or end of a reservation (a "changepoint" in the slot table)
+ * A premature end of a lease
+ """
+ done = False
+
+ # Determine candidate next times
+ tracefrontend = self.__get_trace_frontend()
+ nextchangepoint = self.rm.get_next_changepoint()
+ nextprematureend = self.rm.scheduler.slottable.getNextPrematureEnd(self.time)
+ nextreqtime = tracefrontend.get_next_request_time()
+ self.logger.debug("Next change point (in slot table): %s" % nextchangepoint)
+ self.logger.debug("Next request time: %s" % nextreqtime)
+ self.logger.debug("Next premature end: %s" % nextprematureend)
+
+ # The previous time is now
+ prevtime = self.time
+
+ # We initialize the next time to now too, to detect if
+ # we've been unable to determine what the next time is.
+ newtime = self.time
+
+ # Find the earliest of the three, accounting for None values
+ if nextchangepoint != None and nextreqtime == None:
+ newtime = nextchangepoint
+ elif nextchangepoint == None and nextreqtime != None:
+ newtime = nextreqtime
+ elif nextchangepoint != None and nextreqtime != None:
+ newtime = min(nextchangepoint, nextreqtime)
+
+ if nextprematureend != None:
+ newtime = min(nextprematureend, newtime)
+
+ if nextchangepoint == newtime:
+ # Note that, above, we just "peeked" the next changepoint in the slottable.
+ # If it turns out we're skipping to that point in time, then we need to
+ # "get" it (this is because changepoints in the slottable are cached to
+ # minimize access to the slottable. This optimization turned out to
+ # be more trouble than it's worth and will probably be removed sometime
+ # soon.
+ newtime = self.rm.scheduler.slottable.getNextChangePoint(newtime)
+
+ # If there's no more leases in the system, and no more pending requests,
+ # then we're done.
+ if not self.rm.exists_leases_in_rm() and not tracefrontend.exists_more_requests():
+ done = True
+
+ # We can also be done if we've specified that we want to stop when
+ # the best-effort requests are all done or when they've all been submitted.
+ stopwhen = self.rm.config.get("stop-when")
+ besteffort = self.rm.scheduler.leases.get_leases(type = BestEffortLease)
+ pendingbesteffort = [r for r in tracefrontend.requests if isinstance(r, BestEffortLease)]
+ if stopwhen == constants.STOPWHEN_BEDONE:
+ if self.rm.scheduler.isQueueEmpty() and len(besteffort) + len(pendingbesteffort) == 0:
+ done = True
+ elif stopwhen == constants.STOPWHEN_BESUBMITTED:
+ if len(pendingbesteffort) == 0:
+ done = True
+
+ # If we didn't arrive at a new time, and we're not done, we've fallen into
+ # an infinite loop. This is A Bad Thing(tm).
+ if newtime == prevtime and done != True:
+ self.logger.error("Simulated clock has fallen into an infinite loop. Dumping state..." )
+ self.rm.print_stats(logging.getLevelName("ERROR"), verbose=True)
+ raise Exception, "Simulated clock has fallen into an infinite loop."
+
+ return newtime, done
+
+ def __get_trace_frontend(self):
+ """Gets the tracefile frontend from the resource manager"""
+ frontends = self.rm.frontends
+ tracef = [f for f in frontends if isinstance(f, TracefileFrontend)]
+ if len(tracef) != 1:
+ raise Exception, "The simulated clock can only work with a tracefile request frontend."
+ else:
+ return tracef[0]
+
+
+class RealClock(Clock):
+ """A realtime clock.
+
+ The real clock wakes up periodically to, in turn, tell the resource manager
+ to wake up. The real clock can also be run in a "fastforward" mode for
+ debugging purposes (however, unlike the simulated clock, the clock will
+ always skip a fixed amount of time into the future).
+ """
+ def __init__(self, rm, quantum, non_sched, fastforward = False):
+ """Initializes the real clock.
+
+ Arguments:
+ rm -- the resource manager
+ quantum -- interval between clock wakeups
+ fastforward -- if True, the clock won't actually sleep
+ for the duration of the quantum."""
+ Clock.__init__(self, rm)
+ self.fastforward = fastforward
+ if not self.fastforward:
+ self.lastwakeup = None
+ else:
+ self.lastwakeup = round_datetime(now())
+ self.logger = logging.getLogger("CLOCK")
+ self.starttime = self.get_time()
+ self.nextschedulable = None
+ self.nextperiodicwakeup = None
+ self.quantum = TimeDelta(seconds=quantum)
+ self.non_sched = TimeDelta(seconds=non_sched)
+
+ def get_time(self):
+ """See docstring in base Clock class."""
+ if not self.fastforward:
+ return now()
+ else:
+ return self.lastwakeup
+
+ def get_start_time(self):
+ """See docstring in base Clock class."""
+ return self.starttime
+
+ def get_next_schedulable_time(self):
+ """See docstring in base Clock class."""
+ return self.nextschedulable
+
+ def run(self):
+ """Runs the real clock through time.
+
+ The clock starts when run() is called. In each iteration of the main loop
+ it will do the following:
+ - Wake up the resource manager
+ - Determine if there will be anything to do before the next
+ time the clock will wake up (after the quantum has passed). Note
+ that this information is readily available on the slot table.
+ If so, set next-wakeup-time to (now + time until slot table
+ event). Otherwise, set it to (now + quantum)
+ - Sleep until next-wake-up-time
+
+ The clock keeps on tickin' until a SIGINT signal (Ctrl-C if running in the
+ foreground) or a SIGTERM signal is received.
+ """
+ self.logger.status("Starting clock")
+ self.rm.accounting.start(self.get_start_time())
+
+ signal.signal(signal.SIGINT, self.signalhandler_gracefulstop)
+ signal.signal(signal.SIGTERM, self.signalhandler_gracefulstop)
+
+ done = False
+ # Main loop
+ while not done:
+ self.logger.status("Waking up to manage resources")
+
+ # Save the waking time. We want to use a consistent time in the
+ # resource manager operations (if we use now(), we'll get a different
+ # time every time)
+ if not self.fastforward:
+ self.lastwakeup = round_datetime(self.get_time())
+ self.logger.status("Wake-up time recorded as %s" % self.lastwakeup)
+
+ # Next schedulable time
+ self.nextschedulable = round_datetime(self.lastwakeup + self.non_sched)
+
+ # Wake up the resource manager
+ self.rm.process_reservations(self.lastwakeup)
+ # TODO: Compute nextschedulable here, before processing requests
+ self.rm.process_requests(self.nextschedulable)
+
+ # Next wakeup time
+ time_now = now()
+ if self.lastwakeup + self.quantum <= time_now:
+ quantums = (time_now - self.lastwakeup) / self.quantum
+ quantums = int(ceil(quantums)) * self.quantum
+ self.nextperiodicwakeup = round_datetime(self.lastwakeup + quantums)
+ else:
+ self.nextperiodicwakeup = round_datetime(self.lastwakeup + self.quantum)
+
+ # Determine if there's anything to do before the next wakeup time
+ nextchangepoint = self.rm.get_next_changepoint()
+ if nextchangepoint != None and nextchangepoint <= self.nextperiodicwakeup:
+ # We need to wake up earlier to handle a slot table event
+ nextwakeup = nextchangepoint
+ self.rm.scheduler.slottable.getNextChangePoint(self.lastwakeup)
+ self.logger.status("Going back to sleep. Waking up at %s to handle slot table event." % nextwakeup)
+ else:
+ # Nothing to do before waking up
+ nextwakeup = self.nextperiodicwakeup
+ self.logger.status("Going back to sleep. Waking up at %s to see if something interesting has happened by then." % nextwakeup)
+
+ # The only exit condition from the real clock is if the stop_when_no_more_leases
+ # is set to True, and there's no more work left to do.
+ # TODO: This first if is a kludge. Other options should only interact with
+ # options through the configfile's get method. The "stop-when-no-more-leases"
+ # option is currently OpenNebula-specific (while the real clock isn't; it can
+ # be used by both the simulator and the OpenNebula mode). This has to be
+ # fixed.
+ if self.rm.config._options.has_key("stop-when-no-more-leases"):
+ stop_when_no_more_leases = self.rm.config.get("stop-when-no-more-leases")
+ if stop_when_no_more_leases and not self.rm.exists_leases_in_rm():
+ done = True
+
+ # Sleep
+ if not done:
+ if not self.fastforward:
+ sleep((nextwakeup - now()).seconds)
+ else:
+ self.lastwakeup = nextwakeup
+
+ # Stop the resource manager
+ self.logger.status("Stopping real clock")
+ self.rm.stop()
+
+ def print_stats(self, loglevel):
+ """See docstring in base Clock class."""
+ pass
+
+ def signalhandler_gracefulstop(self, signum, frame):
+ """Handler for SIGTERM and SIGINT. Allows Haizea to stop gracefully."""
+ sigstr = ""
+ if signum == signal.SIGTERM:
+ sigstr = " (SIGTERM)"
+ elif signum == signal.SIGINT:
+ sigstr = " (SIGINT)"
+ self.logger.status("Received signal %i%s" %(signum, sigstr))
+ self.logger.status("Stopping gracefully...")
+ self.rm.stop()
+ sys.exit()
+
+if __name__ == "__main__":
+ from haizea.resourcemanager.configfile import HaizeaConfig
+ from haizea.common.config import ConfigException
+ CONFIGFILE = "../../../etc/sample_trace.conf"
+ try:
+ CONFIG = HaizeaConfig.from_file(CONFIGFILE)
+ except ConfigException, msg:
+ print >> sys.stderr, "Error in configuration file:"
+ print >> sys.stderr, msg
+ exit(1)
+ from haizea.resourcemanager.rm import ResourceManager
+ RM = ResourceManager(CONFIG)
+ RM.start()
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py
===================================================================
--- trunk/src/haizea/resourcemanager/rpcserver.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,72 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-import threading
-import logging
-from SimpleXMLRPCServer import SimpleXMLRPCServer
-
-DEFAULT_HAIZEA_PORT = 42493
-
-class RPCServer(object):
- def __init__(self, rm):
- self.rm = rm
- self.logger = logging.getLogger("RPCSERVER")
- self.port = DEFAULT_HAIZEA_PORT
- self.server = SimpleXMLRPCServer(("localhost", self.port), allow_none=True)
- self.register_rpc(self.test_func)
- self.register_rpc(self.cancel_lease)
- self.register_rpc(self.get_leases)
- self.register_rpc(self.get_lease)
- self.register_rpc(self.get_queue)
- self.register_rpc(self.get_hosts)
- self.register_rpc(self.notify_event)
-
- def start(self):
- # Start the XML-RPC server
- server_thread = threading.Thread( target = self.serve )
- server_thread.start()
-
- def register_rpc(self, func):
- self.server.register_function(func)
-
- def serve(self):
- self.logger.info("RPC server started on port %i" % self.port)
- self.server.serve_forever()
-
- def test_func(self):
- self.logger.info("Test RPC function called")
- return 0
-
- def cancel_lease(self, lease_id):
- self.rm.cancel_lease(lease_id)
- return 0
-
- def get_leases(self):
- return [l.xmlrpc_marshall() for l in self.rm.scheduler.scheduledleases.get_leases()]
-
- def get_lease(self, lease_id):
- return 0
-
- def get_queue(self):
- return [l.xmlrpc_marshall() for l in self.rm.scheduler.queue]
-
- def get_hosts(self):
- return [h.xmlrpc_marshall() for h in self.rm.scheduler.resourcepool.nodes]
-
- def notify_event(self, lease_id, enactment_id, event):
- pass
\ No newline at end of file
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py (from rev 508, trunk/src/haizea/resourcemanager/rpcserver.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/rpcserver.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,86 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+import threading
+import logging
+from SimpleXMLRPCServer import SimpleXMLRPCServer
+
+DEFAULT_HAIZEA_PORT = 42493
+
+class StoppableSimpleXMLRPCServer(SimpleXMLRPCServer):
+ allow_reuse_address = True
+
+ def serve_forever(self):
+ self.run = True
+ while self.run:
+ self.handle_request()
+
+ def stop(self):
+ self.run = False
+
+class RPCServer(object):
+ def __init__(self, rm):
+ self.rm = rm
+ self.logger = logging.getLogger("RPCSERVER")
+ self.port = DEFAULT_HAIZEA_PORT
+ self.server = StoppableSimpleXMLRPCServer(("localhost", self.port), allow_none=True)
+ self.register_rpc(self.test_func)
+ self.register_rpc(self.cancel_lease)
+ self.register_rpc(self.get_leases)
+ self.register_rpc(self.get_lease)
+ self.register_rpc(self.get_queue)
+ self.register_rpc(self.get_hosts)
+ self.register_rpc(self.notify_event)
+
+ def start(self):
+ # Start the XML-RPC server
+ server_thread = threading.Thread( target = self.serve )
+ server_thread.start()
+
+ def stop(self):
+ self.server.stop()
+
+ def register_rpc(self, func):
+ self.server.register_function(func)
+
+ def serve(self):
+ self.logger.info("RPC server started on port %i" % self.port)
+ self.server.serve_forever()
+
+ def test_func(self):
+ self.logger.info("Test RPC function called")
+ return 0
+
+ def cancel_lease(self, lease_id):
+ self.rm.cancel_lease(lease_id)
+ return 0
+
+ def get_leases(self):
+ return [l.xmlrpc_marshall() for l in self.rm.scheduler.leases.get_leases()]
+
+ def get_lease(self, lease_id):
+ return 0
+
+ def get_queue(self):
+ return [l.xmlrpc_marshall() for l in self.rm.scheduler.queue]
+
+ def get_hosts(self):
+ return [h.xmlrpc_marshall() for h in self.rm.scheduler.resourcepool.nodes]
+
+ def notify_event(self, lease_id, enactment_id, event):
+ pass
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py
===================================================================
--- trunk/src/haizea/resourcemanager/scheduler.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,1471 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-
-"""This module provides the main classes for Haizea's scheduler, particularly
-the Scheduler class. The deployment scheduling code (everything that has to be
-done to prepare a lease) happens in the modules inside the
-haizea.resourcemanager.deployment package.
-
-This module provides the following classes:
-
-* SchedException: A scheduling exception
-* ReservationEventHandler: A simple wrapper class
-* Scheduler: Do I really need to spell this one out for you?
-
-TODO: The Scheduler class is in need of some serious refactoring. The likely outcome is
-that it will be divided into two classes: LeaseScheduler, which handles top-level
-lease constructs and doesn't interact with the slot table, and VMScheduler, which
-actually schedules the VMs. The slot table would be contained in VMScheduler and
-in the lease preparation scheduler. In turn, these two would be contained in
-LeaseScheduler.
-"""
-
-import haizea.resourcemanager.datastruct as ds
-import haizea.common.constants as constants
-from haizea.common.utils import round_datetime_delta, round_datetime, estimate_transfer_time, get_config, get_accounting, get_clock
-from haizea.resourcemanager.slottable import SlotTable, SlotFittingException
-from haizea.resourcemanager.datastruct import Lease, ARLease, BestEffortLease, ImmediateLease, ResourceReservation, VMResourceReservation
-from haizea.resourcemanager.resourcepool import ResourcePool, ResourcePoolWithReusableImages
-from operator import attrgetter, itemgetter
-from mx.DateTime import TimeDelta
-
-import logging
-
-class SchedException(Exception):
- """A simple exception class used for scheduling exceptions"""
- pass
-
-class NotSchedulableException(Exception):
- """A simple exception class used when a lease cannot be scheduled
-
- This exception must be raised when a lease cannot be scheduled
- (this is not necessarily an error condition, but the scheduler will
- have to react to it)
- """
- pass
-
-class CriticalSchedException(Exception):
- """A simple exception class used for critical scheduling exceptions
-
- This exception must be raised when a non-recoverable error happens
- (e.g., when there are unexplained inconsistencies in the schedule,
- typically resulting from a code error)
- """
- pass
-
-
-class ReservationEventHandler(object):
- """A wrapper for reservation event handlers.
-
- Reservations (in the slot table) can start and they can end. This class
- provides a convenient wrapper around the event handlers for these two
- events (see Scheduler.__register_handler for details on event handlers)
- """
- def __init__(self, on_start, on_end):
- self.on_start = on_start
- self.on_end = on_end
-
-class Scheduler(object):
- """The Haizea Scheduler
-
- Public methods:
- schedule -- The scheduling function
- process_reservations -- Processes starting/ending reservations at a given time
- enqueue -- Queues a best-effort request
- is_queue_empty -- Is the queue empty?
- exists_scheduled_leases -- Are there any leases scheduled?
-
- Private methods:
- __schedule_ar_lease -- Schedules an AR lease
- __schedule_besteffort_lease -- Schedules a best-effort lease
- __preempt -- Preempts a lease
- __reevaluate_schedule -- Reevaluate the schedule (used after resources become
- unexpectedly unavailable)
- _handle_* -- Reservation event handlers
-
- """
- def __init__(self, slottable, resourcepool, deployment_scheduler):
- self.slottable = slottable
- self.resourcepool = resourcepool
- self.deployment_scheduler = deployment_scheduler
- self.logger = logging.getLogger("SCHED")
-
- self.queue = ds.Queue(self)
- self.leases = ds.LeaseTable(self)
- self.completedleases = ds.LeaseTable(self)
-
- for n in self.resourcepool.get_nodes() + self.resourcepool.get_aux_nodes():
- self.slottable.add_node(n)
-
- self.handlers = {}
-
- self.register_handler(type = ds.VMResourceReservation,
- on_start = Scheduler._handle_start_vm,
- on_end = Scheduler._handle_end_vm)
-
- self.register_handler(type = ds.SuspensionResourceReservation,
- on_start = Scheduler._handle_start_suspend,
- on_end = Scheduler._handle_end_suspend)
-
- self.register_handler(type = ds.ResumptionResourceReservation,
- on_start = Scheduler._handle_start_resume,
- on_end = Scheduler._handle_end_resume)
-
- for (type, handler) in self.deployment_scheduler.handlers.items():
- self.handlers[type] = handler
-
- backfilling = get_config().get("backfilling")
- if backfilling == constants.BACKFILLING_OFF:
- self.maxres = 0
- elif backfilling == constants.BACKFILLING_AGGRESSIVE:
- self.maxres = 1
- elif backfilling == constants.BACKFILLING_CONSERVATIVE:
- self.maxres = 1000000 # Arbitrarily large
- elif backfilling == constants.BACKFILLING_INTERMEDIATE:
- self.maxres = get_config().get("backfilling-reservations")
-
- self.numbesteffortres = 0
-
- def schedule(self, nexttime):
- pending_leases = self.leases.get_leases_by_state(Lease.STATE_PENDING)
- ar_leases = [req for req in pending_leases if isinstance(req, ARLease)]
- im_leases = [req for req in pending_leases if isinstance(req, ImmediateLease)]
- be_leases = [req for req in pending_leases if isinstance(req, BestEffortLease)]
-
- # Queue best-effort requests
- for lease in be_leases:
- self.enqueue(lease)
-
- # Process immediate requests
- for lease_req in im_leases:
- self.__process_im_request(lease_req, nexttime)
-
- # Process AR requests
- for lease_req in ar_leases:
- self.__process_ar_request(lease_req, nexttime)
-
- # Process best-effort requests
- self.__process_queue(nexttime)
-
-
- def process_reservations(self, nowtime):
- starting = self.slottable.get_reservations_starting_at(nowtime)
- ending = self.slottable.get_reservations_ending_at(nowtime)
- for rr in ending:
- self._handle_end_rr(rr.lease, rr)
- self.handlers[type(rr)].on_end(self, rr.lease, rr)
-
- for rr in starting:
- self.handlers[type(rr)].on_start(self, rr.lease, rr)
-
- util = self.slottable.getUtilization(nowtime)
- get_accounting().append_stat(constants.COUNTER_CPUUTILIZATION, util)
-
- def register_handler(self, type, on_start, on_end):
- handler = ReservationEventHandler(on_start=on_start, on_end=on_end)
- self.handlers[type] = handler
-
- def enqueue(self, lease_req):
- """Queues a best-effort lease request"""
- get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease_req.id)
- lease_req.state = Lease.STATE_QUEUED
- self.queue.enqueue(lease_req)
- self.logger.info("Received (and queueing) best-effort lease request #%i, %i nodes for %s." % (lease_req.id, lease_req.numnodes, lease_req.duration.requested))
-
- def request_lease(self, lease):
- """
- Request a lease. At this point, it is simply marked as "Pending" and,
- next time the scheduling function is called, the fate of the
- lease will be determined (right now, AR+IM leases get scheduled
- right away, and best-effort leases get placed on a queue)
- """
- lease.state = Lease.STATE_PENDING
- self.leases.add(lease)
-
- def is_queue_empty(self):
- """Return True is the queue is empty, False otherwise"""
- return self.queue.is_empty()
-
-
- def exists_scheduled_leases(self):
- """Return True if there are any leases scheduled in the future"""
- return not self.slottable.is_empty()
-
- def cancel_lease(self, lease_id):
- """Cancels a lease.
-
- Arguments:
- lease_id -- ID of lease to cancel
- """
- time = get_clock().get_time()
-
- self.logger.info("Cancelling lease %i..." % lease_id)
- if self.leases.has_lease(lease_id):
- # The lease is either running, or scheduled to run
- lease = self.leases.get_lease(lease_id)
-
- if lease.state == Lease.STATE_ACTIVE:
- self.logger.info("Lease %i is active. Stopping active reservation..." % lease_id)
- rr = lease.get_active_reservations(time)[0]
- if isinstance(rr, VMResourceReservation):
- self._handle_unscheduled_end_vm(lease, rr, enact=True)
- # TODO: Handle cancelations in middle of suspensions and
- # resumptions
- elif lease.state in [Lease.STATE_SCHEDULED, Lease.STATE_READY]:
- self.logger.info("Lease %i is scheduled. Cancelling reservations." % lease_id)
- rrs = lease.get_scheduled_reservations()
- for r in rrs:
- lease.remove_rr(r)
- self.slottable.removeReservation(r)
- lease.state = Lease.STATE_CANCELLED
- self.completedleases.add(lease)
- self.leases.remove(lease)
- elif self.queue.has_lease(lease_id):
- # The lease is in the queue, waiting to be scheduled.
- # Cancelling is as simple as removing it from the queue
- self.logger.info("Lease %i is in the queue. Removing..." % lease_id)
- l = self.queue.get_lease(lease_id)
- self.queue.remove_lease(lease)
-
- def fail_lease(self, lease_id):
- """Transitions a lease to a failed state, and does any necessary cleaning up
-
- TODO: For now, just use the cancelling algorithm
-
- Arguments:
- lease -- Lease to fail
- """
- try:
- raise
- self.cancel_lease(lease_id)
- except Exception, msg:
- # Exit if something goes horribly wrong
- raise CriticalSchedException()
-
- def notify_event(self, lease_id, event):
- time = get_clock().get_time()
- if event == constants.EVENT_END_VM:
- lease = self.leases.get_lease(lease_id)
- rr = lease.get_active_reservations(time)[0]
- self._handle_unscheduled_end_vm(lease, rr, enact=False)
-
-
- def __process_ar_request(self, lease_req, nexttime):
- self.logger.info("Received AR lease request #%i, %i nodes from %s to %s." % (lease_req.id, lease_req.numnodes, lease_req.start.requested, lease_req.start.requested + lease_req.duration.requested))
- self.logger.debug(" Start : %s" % lease_req.start)
- self.logger.debug(" Duration: %s" % lease_req.duration)
- self.logger.debug(" ResReq : %s" % lease_req.requested_resources)
-
- accepted = False
- try:
- self.__schedule_ar_lease(lease_req, avoidpreempt=True, nexttime=nexttime)
- self.leases.add(lease_req)
- get_accounting().incr_counter(constants.COUNTER_ARACCEPTED, lease_req.id)
- accepted = True
- except SchedException, msg:
- # Our first try avoided preemption, try again
- # without avoiding preemption.
- # TODO: Roll this into the exact slot fitting algorithm
- try:
- self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
- self.logger.debug("LEASE-%i Trying again without avoiding preemption" % lease_req.id)
- self.__schedule_ar_lease(lease_req, nexttime, avoidpreempt=False)
- self.leases.add(lease_req)
- get_accounting().incr_counter(constants.COUNTER_ARACCEPTED, lease_req.id)
- accepted = True
- except SchedException, msg:
- get_accounting().incr_counter(constants.COUNTER_ARREJECTED, lease_req.id)
- self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
-
- if accepted:
- self.logger.info("AR lease request #%i has been accepted." % lease_req.id)
- else:
- self.logger.info("AR lease request #%i has been rejected." % lease_req.id)
-
-
- def __process_queue(self, nexttime):
- done = False
- newqueue = ds.Queue(self)
- while not done and not self.is_queue_empty():
- if self.numbesteffortres == self.maxres and self.slottable.isFull(nexttime):
- self.logger.debug("Used up all reservations and slot table is full. Skipping rest of queue.")
- done = True
- else:
- lease_req = self.queue.dequeue()
- try:
- self.logger.info("Next request in the queue is lease %i. Attempting to schedule..." % lease_req.id)
- self.logger.debug(" Duration: %s" % lease_req.duration)
- self.logger.debug(" ResReq : %s" % lease_req.requested_resources)
- self.__schedule_besteffort_lease(lease_req, nexttime)
- self.leases.add(lease_req)
- get_accounting().decr_counter(constants.COUNTER_QUEUESIZE, lease_req.id)
- except SchedException, msg:
- # Put back on queue
- newqueue.enqueue(lease_req)
- self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
- self.logger.info("Lease %i could not be scheduled at this time." % lease_req.id)
- if not self.is_backfilling():
- done = True
-
- for lease in self.queue:
- newqueue.enqueue(lease)
-
- self.queue = newqueue
-
-
- def __process_im_request(self, lease_req, nexttime):
- self.logger.info("Received immediate lease request #%i (%i nodes)" % (lease_req.id, lease_req.numnodes))
- self.logger.debug(" Duration: %s" % lease_req.duration)
- self.logger.debug(" ResReq : %s" % lease_req.requested_resources)
-
- try:
- self.__schedule_immediate_lease(lease_req, nexttime=nexttime)
- self.leases.add(lease_req)
- get_accounting().incr_counter(constants.COUNTER_IMACCEPTED, lease_req.id)
- self.logger.info("Immediate lease request #%i has been accepted." % lease_req.id)
- except SchedException, msg:
- get_accounting().incr_counter(constants.COUNTER_IMREJECTED, lease_req.id)
- self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
-
-
- def __schedule_ar_lease(self, lease_req, nexttime, avoidpreempt=True):
- start = lease_req.start.requested
- end = lease_req.start.requested + lease_req.duration.requested
- try:
- (nodeassignment, res, preemptions) = self.__fit_exact(lease_req, preemptible=False, canpreempt=True, avoidpreempt=avoidpreempt)
-
- if len(preemptions) > 0:
- leases = self.__find_preemptable_leases(preemptions, start, end)
- self.logger.info("Must preempt leases %s to make room for AR lease #%i" % ([l.id for l in leases], lease_req.id))
- for lease in leases:
- self.__preempt(lease, preemption_time=start)
-
- # Create VM resource reservations
- vmrr = ds.VMResourceReservation(lease_req, start, end, nodeassignment, res, False)
- vmrr.state = ResourceReservation.STATE_SCHEDULED
-
- # Schedule deployment overhead
- self.deployment_scheduler.schedule(lease_req, vmrr, nexttime)
-
- # Commit reservation to slot table
- # (we don't do this until the very end because the deployment overhead
- # scheduling could still throw an exception)
- lease_req.append_vmrr(vmrr)
- self.slottable.addReservation(vmrr)
- except SlotFittingException, msg:
- raise SchedException, "The requested AR lease is infeasible. Reason: %s" % msg
-
-
- def __schedule_besteffort_lease(self, lease, nexttime):
- try:
- # Schedule the VMs
- canreserve = self.__can_reserve_besteffort_in_future()
- (vmrr, in_future) = self.__fit_asap(lease, nexttime, allow_reservation_in_future = canreserve)
-
- # Schedule deployment
- if lease.state != Lease.STATE_SUSPENDED:
- self.deployment_scheduler.schedule(lease, vmrr, nexttime)
- else:
- # TODO: schedule migrations
- pass
-
- # At this point, the lease is feasible.
- # Commit changes by adding RRs to lease and to slot table
-
- # Add resource reservations to lease
- # TODO: deployment
- # TODO: migrations
- lease.append_vmrr(vmrr)
-
-
- # Add resource reservations to slottable
-
- # TODO: deployment
-
- # TODO: migrations
-
- # Resumptions (if any)
- for resmrr in vmrr.resm_rrs:
- self.slottable.addReservation(resmrr)
-
- # VM
- self.slottable.addReservation(vmrr)
-
- # Suspensions (if any)
- for susprr in vmrr.susp_rrs:
- self.slottable.addReservation(susprr)
-
- if in_future:
- self.numbesteffortres += 1
-
- lease.print_contents()
-
- except SchedException, msg:
- raise SchedException, "The requested best-effort lease is infeasible. Reason: %s" % msg
-
-
-
-
- def __schedule_immediate_lease(self, req, nexttime):
- try:
- (resmrr, vmrr, susprr, reservation) = self.__fit_asap(req, nexttime, allow_reservation_in_future=False)
- # Schedule deployment
- self.deployment_scheduler.schedule(req, vmrr, nexttime)
-
- req.append_rr(vmrr)
- self.slottable.addReservation(vmrr)
-
- req.print_contents()
- except SlotFittingException, msg:
- raise SchedException, "The requested immediate lease is infeasible. Reason: %s" % msg
-
- def __fit_exact(self, leasereq, preemptible=False, canpreempt=True, avoidpreempt=True):
- lease_id = leasereq.id
- start = leasereq.start.requested
- end = leasereq.start.requested + leasereq.duration.requested
- diskImageID = leasereq.diskimage_id
- numnodes = leasereq.numnodes
- resreq = leasereq.requested_resources
-
- availabilitywindow = self.slottable.availabilitywindow
-
- availabilitywindow.initWindow(start, resreq, canpreempt=canpreempt)
- availabilitywindow.printContents(withpreemption = False)
- availabilitywindow.printContents(withpreemption = True)
-
- mustpreempt = False
- unfeasiblewithoutpreemption = False
-
- fitatstart = availabilitywindow.fitAtStart(canpreempt = False)
- if fitatstart < numnodes:
- if not canpreempt:
- raise SlotFittingException, "Not enough resources in specified interval"
- else:
- unfeasiblewithoutpreemption = True
- feasibleend, canfitnopreempt = availabilitywindow.findPhysNodesForVMs(numnodes, end, strictend=True, canpreempt = False)
- fitatend = sum([n for n in canfitnopreempt.values()])
- if fitatend < numnodes:
- if not canpreempt:
- raise SlotFittingException, "Not enough resources in specified interval"
- else:
- unfeasiblewithoutpreemption = True
-
- canfitpreempt = None
- if canpreempt:
- fitatstart = availabilitywindow.fitAtStart(canpreempt = True)
- if fitatstart < numnodes:
- raise SlotFittingException, "Not enough resources in specified interval"
- feasibleendpreempt, canfitpreempt = availabilitywindow.findPhysNodesForVMs(numnodes, end, strictend=True, canpreempt = True)
- fitatend = sum([n for n in canfitpreempt.values()])
- if fitatend < numnodes:
- raise SlotFittingException, "Not enough resources in specified interval"
- else:
- if unfeasiblewithoutpreemption:
- mustpreempt = True
- else:
- mustpreempt = False
-
- # At this point we know if the lease is feasible, and if
- # will require preemption.
- if not mustpreempt:
- self.logger.debug("The VM reservations for this lease are feasible without preemption.")
- else:
- self.logger.debug("The VM reservations for this lease are feasible but will require preemption.")
-
- # merge canfitnopreempt and canfitpreempt
- canfit = {}
- for node in canfitnopreempt:
- vnodes = canfitnopreempt[node]
- canfit[node] = [vnodes, vnodes]
- for node in canfitpreempt:
- vnodes = canfitpreempt[node]
- if canfit.has_key(node):
- canfit[node][1] = vnodes
- else:
- canfit[node] = [0, vnodes]
-
- orderednodes = self.__choose_nodes(canfit, start, canpreempt, avoidpreempt)
-
- self.logger.debug("Node ordering: %s" % orderednodes)
-
- # vnode -> pnode
- nodeassignment = {}
-
- # pnode -> resourcetuple
- res = {}
-
- # physnode -> how many vnodes
- preemptions = {}
-
- vnode = 1
- if avoidpreempt:
- # First pass, without preemption
- for physnode in orderednodes:
- canfitinnode = canfit[physnode][0]
- for i in range(1, canfitinnode+1):
- nodeassignment[vnode] = physnode
- if res.has_key(physnode):
- res[physnode].incr(resreq)
- else:
- res[physnode] = ds.ResourceTuple.copy(resreq)
- canfit[physnode][0] -= 1
- canfit[physnode][1] -= 1
- vnode += 1
- if vnode > numnodes:
- break
- if vnode > numnodes:
- break
-
- # Second pass, with preemption
- if mustpreempt or not avoidpreempt:
- for physnode in orderednodes:
- canfitinnode = canfit[physnode][1]
- for i in range(1, canfitinnode+1):
- nodeassignment[vnode] = physnode
- if res.has_key(physnode):
- res[physnode].incr(resreq)
- else:
- res[physnode] = ds.ResourceTuple.copy(resreq)
- canfit[physnode][1] -= 1
- vnode += 1
- # Check if this will actually result in a preemption
- if canfit[physnode][0] == 0:
- if preemptions.has_key(physnode):
- preemptions[physnode].incr(resreq)
- else:
- preemptions[physnode] = ds.ResourceTuple.copy(resreq)
- else:
- canfit[physnode][0] -= 1
- if vnode > numnodes:
- break
- if vnode > numnodes:
- break
-
- if vnode <= numnodes:
- raise SchedException, "Availability window indicated that request but feasible, but could not fit it"
-
- return nodeassignment, res, preemptions
-
- def __fit_asap(self, lease, nexttime, allow_reservation_in_future = False):
- lease_id = lease.id
- remaining_duration = lease.duration.get_remaining_duration()
- numnodes = lease.numnodes
- requested_resources = lease.requested_resources
- preemptible = lease.preemptible
- mustresume = (lease.state == Lease.STATE_SUSPENDED)
- susptype = get_config().get("suspension")
- if susptype == constants.SUSPENSION_NONE or (susptype == constants.SUSPENSION_SERIAL and lease.numnodes == 1):
- suspendable = False
- else:
- suspendable = True
-
- # Determine earliest start time in each node
- if lease.state == Lease.STATE_QUEUED:
- # Figure out earliest start times based on
- # image schedule and reusable images
- earliest = self.deployment_scheduler.find_earliest_starting_times(lease, nexttime)
- elif lease.state == Lease.STATE_SUSPENDED:
- # No need to transfer images from repository
- # (only intra-node transfer)
- earliest = dict([(node+1, [nexttime, constants.REQTRANSFER_NO, None]) for node in range(lease.numnodes)])
-
-
- canmigrate = get_config().get("migration")
-
- #
- # STEP 1: FIGURE OUT THE MINIMUM DURATION
- #
-
- min_duration = self.__compute_scheduling_threshold(lease)
-
-
- #
- # STEP 2: FIND THE CHANGEPOINTS
- #
-
- # Find the changepoints, and the nodes we can use at each changepoint
- # Nodes may not be available at a changepoint because images
- # cannot be transferred at that time.
- if not mustresume:
- cps = [(node, e[0]) for node, e in earliest.items()]
- cps.sort(key=itemgetter(1))
- curcp = None
- changepoints = []
- nodes = []
- for node, time in cps:
- nodes.append(node)
- if time != curcp:
- changepoints.append([time, nodes[:]])
- curcp = time
- else:
- changepoints[-1][1] = nodes[:]
- else:
- if not canmigrate:
- vmrr = lease.get_last_vmrr()
- curnodes = set(vmrr.nodes.values())
- else:
- curnodes=None
- # If we have to resume this lease, make sure that
- # we have enough time to transfer the images.
- migratetime = self.__estimate_migration_time(lease)
- earliesttransfer = get_clock().get_time() + migratetime
-
- for n in earliest:
- earliest[n][0] = max(earliest[n][0], earliesttransfer)
-
- changepoints = list(set([x[0] for x in earliest.values()]))
- changepoints.sort()
- changepoints = [(x, curnodes) for x in changepoints]
-
- # If we can make reservations in the future,
- # we also consider future changepoints
- # (otherwise, we only allow the VMs to start "now", accounting
- # for the fact that vm images will have to be deployed)
- if allow_reservation_in_future:
- futurecp = self.slottable.findChangePointsAfter(changepoints[-1][0])
- futurecp = [(p,None) for p in futurecp]
- else:
- futurecp = []
-
-
-
- #
- # STEP 3: SLOT FITTING
- #
-
- # If resuming, we also have to allocate enough for the resumption
- if mustresume:
- duration = remaining_duration + self.__estimate_resume_time(lease)
- else:
- duration = remaining_duration
-
-
- # First, assuming we can't make reservations in the future
- start, end, canfit = self.__find_fit_at_points(
- changepoints,
- numnodes,
- requested_resources,
- duration,
- suspendable,
- min_duration)
-
- if start == None:
- if not allow_reservation_in_future:
- # We did not find a suitable starting time. This can happen
- # if we're unable to make future reservations
- raise SchedException, "Could not find enough resources for this request"
- else:
- mustsuspend = (end - start) < duration
- if mustsuspend and not suspendable:
- if not allow_reservation_in_future:
- raise SchedException, "Scheduling this lease would require preempting it, which is not allowed"
- else:
- start = None # No satisfactory start time
-
- # If we haven't been able to fit the lease, check if we can
- # reserve it in the future
- if start == None and allow_reservation_in_future:
- start, end, canfit = self.__find_fit_at_points(
- futurecp,
- numnodes,
- requested_resources,
- duration,
- suspendable,
- min_duration
- )
-
-
- if start in [p[0] for p in futurecp]:
- reservation = True
- else:
- reservation = False
-
-
- #
- # STEP 4: FINAL SLOT FITTING
- #
- # At this point, we know the lease fits, but we have to map it to
- # specific physical nodes.
-
- # Sort physical nodes
- physnodes = canfit.keys()
- if mustresume:
- # If we're resuming, we prefer resuming in the nodes we're already
- # deployed in, to minimize the number of transfers.
- vmrr = lease.get_last_vmrr()
- nodes = set(vmrr.nodes.values())
- availnodes = set(physnodes)
- deplnodes = availnodes.intersection(nodes)
- notdeplnodes = availnodes.difference(nodes)
- physnodes = list(deplnodes) + list(notdeplnodes)
- else:
- physnodes.sort() # Arbitrary, prioritize nodes, as in exact
-
- # Map to physical nodes
- mappings = {}
- res = {}
- vmnode = 1
- while vmnode <= numnodes:
- for n in physnodes:
- if canfit[n]>0:
- canfit[n] -= 1
- mappings[vmnode] = n
- if res.has_key(n):
- res[n].incr(requested_resources)
- else:
- res[n] = ds.ResourceTuple.copy(requested_resources)
- vmnode += 1
- break
-
-
- vmrr = ds.VMResourceReservation(lease, start, end, mappings, res, reservation)
- vmrr.state = ResourceReservation.STATE_SCHEDULED
-
- if mustresume:
- self.__schedule_resumption(vmrr, start)
-
- mustsuspend = (vmrr.end - vmrr.start) < remaining_duration
- if mustsuspend:
- self.__schedule_suspension(vmrr, end)
-
- # Compensate for any overestimation
- if (vmrr.end - vmrr.start) > remaining_duration:
- vmrr.end = vmrr.start + remaining_duration
-
- susp_str = res_str = ""
- if mustresume:
- res_str = " (resuming)"
- if mustsuspend:
- susp_str = " (suspending)"
- self.logger.info("Lease #%i has been scheduled on nodes %s from %s%s to %s%s" % (lease.id, mappings.values(), start, res_str, end, susp_str))
-
- return vmrr, reservation
-
- def __find_fit_at_points(self, changepoints, numnodes, resources, duration, suspendable, min_duration):
- start = None
- end = None
- canfit = None
- availabilitywindow = self.slottable.availabilitywindow
-
-
- for p in changepoints:
- availabilitywindow.initWindow(p[0], resources, p[1], canpreempt = False)
- availabilitywindow.printContents()
-
- if availabilitywindow.fitAtStart() >= numnodes:
- start=p[0]
- maxend = start + duration
- end, canfit = availabilitywindow.findPhysNodesForVMs(numnodes, maxend)
-
- self.logger.debug("This lease can be scheduled from %s to %s" % (start, end))
-
- if end < maxend:
- self.logger.debug("This lease will require suspension (maxend = %s)" % (maxend))
-
- if not suspendable:
- pass
- # If we can't suspend, this fit is no good, and we have to keep looking
- else:
- # If we can suspend, we still have to check if the lease will
- # be able to run for the specified minimum duration
- if end-start > min_duration:
- break # We found a fit; stop looking
- else:
- self.logger.debug("This starting time does not allow for the requested minimum duration (%s < %s)" % (end-start, min_duration))
- # Set start back to None, to indicate that we haven't
- # found a satisfactory start time
- start = None
- else:
- # We've found a satisfactory starting time
- break
-
- return start, end, canfit
-
- def __compute_susprem_times(self, vmrr, time, direction, exclusion, rate):
- times = [] # (start, end, pnode, vnodes)
-
- if exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
- # Global exclusion (which represents, e.g., reading/writing the memory image files
- # from a global file system) meaning no two suspensions/resumptions can happen at
- # the same time in the entire resource pool.
-
- t = time
- t_prev = None
-
- for (vnode,pnode) in vmrr.nodes.items():
- mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
- op_time = self.__compute_suspend_resume_time(mem, rate)
- t_prev = t
-
- if direction == constants.DIRECTION_FORWARD:
- t += op_time
- times.append((t_prev, t, pnode, [vnode]))
- elif direction == constants.DIRECTION_BACKWARD:
- t -= op_time
- times.append((t, t_prev, pnode, [vnode]))
-
- elif exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
- # Local exclusion (which represents, e.g., reading the memory image files
- # from a local file system) means no two resumptions can happen at the same
- # time in the same physical node.
- vnodes_in_pnode = {}
- for (vnode,pnode) in vmrr.nodes.items():
- vnodes_in_pnode.setdefault(pnode, []).append(vnode)
- for pnode in vnodes_in_pnode:
- t = time
- t_prev = None
- for vnode in vnodes_in_pnode[pnode]:
- mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
- op_time = self.__compute_suspend_resume_time(mem, rate)
- t_prev = t
-
- if direction == constants.DIRECTION_FORWARD:
- t += op_time
- times.append((t_prev, t, pnode, [vnode]))
- elif direction == constants.DIRECTION_BACKWARD:
- t -= op_time
- times.append((t, t_prev, pnode, [vnode]))
- # TODO: "consolidate" times (i.e., figure out what operations can be grouped
- # into a single RR. This will not be an issue when running with real hardware,
- # but might impact simulation performance.
-
- return times
-
-
- def __schedule_resumption(self, vmrr, resume_at):
- from haizea.resourcemanager.rm import ResourceManager
- config = ResourceManager.get_singleton().config
- resm_exclusion = config.get("suspendresume-exclusion")
- rate = self.resourcepool.info.get_suspendresume_rate()
-
- if resume_at < vmrr.start or resume_at > vmrr.end:
- raise SchedException, "Tried to schedule a resumption at %s, which is outside the VMRR's duration (%s-%s)" % (resume_at, vmrr.start, vmrr.end)
-
- times = self.__compute_susprem_times(vmrr, resume_at, constants.DIRECTION_FORWARD, resm_exclusion, rate)
- resume_rrs = []
- for (start, end, pnode, vnodes) in times:
- r = ds.ResourceTuple.create_empty()
- mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
- r.set_by_type(constants.RES_MEM, mem)
- r.set_by_type(constants.RES_DISK, mem)
- resmres = {pnode: r}
- resmrr = ds.ResumptionResourceReservation(vmrr.lease, start, end, resmres, vnodes, vmrr)
- resmrr.state = ResourceReservation.STATE_SCHEDULED
- resume_rrs.append(resmrr)
-
- resume_rrs.sort(key=attrgetter("start"))
-
- resm_end = resume_rrs[-1].end
- if resm_end > vmrr.end:
- raise SchedException, "Determined resumption would end at %s, after the VMRR's end (%s) -- Resume time not being properly estimated?" % (resm_end, vmrr.end)
-
- vmrr.update_start(resm_end)
- for resmrr in resume_rrs:
- vmrr.resm_rrs.append(resmrr)
-
-
- def __schedule_suspension(self, vmrr, suspend_by):
- from haizea.resourcemanager.rm import ResourceManager
- config = ResourceManager.get_singleton().config
- susp_exclusion = config.get("suspendresume-exclusion")
- rate = self.resourcepool.info.get_suspendresume_rate()
-
- if suspend_by < vmrr.start or suspend_by > vmrr.end:
- raise SchedException, "Tried to schedule a suspension by %s, which is outside the VMRR's duration (%s-%s)" % (suspend_by, vmrr.start, vmrr.end)
-
- times = self.__compute_susprem_times(vmrr, suspend_by, constants.DIRECTION_BACKWARD, susp_exclusion, rate)
- suspend_rrs = []
- for (start, end, pnode, vnodes) in times:
- r = ds.ResourceTuple.create_empty()
- mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
- r.set_by_type(constants.RES_MEM, mem)
- r.set_by_type(constants.RES_DISK, mem)
- suspres = {pnode: r}
- susprr = ds.SuspensionResourceReservation(vmrr.lease, start, end, suspres, vnodes, vmrr)
- susprr.state = ResourceReservation.STATE_SCHEDULED
- suspend_rrs.append(susprr)
-
- suspend_rrs.sort(key=attrgetter("start"))
-
- susp_start = suspend_rrs[0].start
- if susp_start < vmrr.start:
- raise SchedException, "Determined suspension should start at %s, before the VMRR's start (%s) -- Suspend time not being properly estimated?" % (susp_start, vmrr.start)
-
- vmrr.update_end(susp_start)
- for susprr in suspend_rrs:
- vmrr.susp_rrs.append(susprr)
-
- def __compute_suspend_resume_time(self, mem, rate):
- time = float(mem) / rate
- time = round_datetime_delta(TimeDelta(seconds = time))
- return time
-
- def __estimate_suspend_resume_time(self, lease):
- from haizea.resourcemanager.rm import ResourceManager
- config = ResourceManager.get_singleton().config
- susp_exclusion = config.get("suspendresume-exclusion")
- rate = self.resourcepool.info.get_suspendresume_rate()
- mem = lease.requested_resources.get_by_type(constants.RES_MEM)
- if susp_exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
- return lease.numnodes * self.__compute_suspend_resume_time(mem, rate)
- elif susp_exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
- # Overestimating
- return lease.numnodes * self.__compute_suspend_resume_time(mem, rate)
-
- def __estimate_suspend_time(self, lease):
- return self.__estimate_suspend_resume_time(lease)
-
- def __estimate_resume_time(self, lease):
- return self.__estimate_suspend_resume_time(lease)
-
-
- def __estimate_migration_time(self, lease):
- from haizea.resourcemanager.rm import ResourceManager
- config = ResourceManager.get_singleton().config
- whattomigrate = config.get("what-to-migrate")
- bandwidth = self.resourcepool.info.get_migration_bandwidth()
- if whattomigrate == constants.MIGRATE_NONE:
- return TimeDelta(seconds=0)
- else:
- if whattomigrate == constants.MIGRATE_MEM:
- mbtotransfer = lease.requested_resources.get_by_type(constants.RES_MEM)
- elif whattomigrate == constants.MIGRATE_MEMDISK:
- mbtotransfer = lease.diskimage_size + lease.requested_resources.get_by_type(constants.RES_MEM)
- return estimate_transfer_time(mbtotransfer, bandwidth)
-
- # TODO: Take into account other things like boot overhead, migration overhead, etc.
- def __compute_scheduling_threshold(self, lease):
- from haizea.resourcemanager.rm import ResourceManager
- config = ResourceManager.get_singleton().config
- threshold = config.get("force-scheduling-threshold")
- if threshold != None:
- # If there is a hard-coded threshold, use that
- return threshold
- else:
- factor = config.get("scheduling-threshold-factor")
- susp_overhead = self.__estimate_suspend_time(lease)
- safe_duration = susp_overhead
-
- if lease.state == Lease.STATE_SUSPENDED:
- resm_overhead = self.__estimate_resume_time(lease)
- safe_duration += resm_overhead
-
- # TODO: Incorporate other overheads into the minimum duration
- min_duration = safe_duration
-
- # At the very least, we want to allocate enough time for the
- # safe duration (otherwise, we'll end up with incorrect schedules,
- # where a lease is scheduled to suspend, but isn't even allocated
- # enough time to suspend).
- # The factor is assumed to be non-negative. i.e., a factor of 0
- # means we only allocate enough time for potential suspend/resume
- # operations, while a factor of 1 means the lease will get as much
- # running time as spend on the runtime overheads involved in setting
- # it up
- threshold = safe_duration + (min_duration * factor)
- return threshold
-
- def __choose_nodes(self, canfit, start, canpreempt, avoidpreempt):
- # TODO2: Choose appropriate prioritizing function based on a
- # config file, instead of hardcoding it)
- #
- # TODO3: Basing decisions only on CPU allocations. This is ok for now,
- # since the memory allocation is proportional to the CPU allocation.
- # Later on we need to come up with some sort of weighed average.
-
- nodes = canfit.keys()
-
- # TODO: The deployment module should just provide a list of nodes
- # it prefers
- nodeswithimg=[]
- #self.lease_deployment_type = get_config().get("lease-preparation")
- #if self.lease_deployment_type == constants.DEPLOYMENT_TRANSFER:
- # reusealg = get_config().get("diskimage-reuse")
- # if reusealg==constants.REUSE_IMAGECACHES:
- # nodeswithimg = self.resourcepool.getNodesWithImgInPool(diskImageID, start)
-
- # Compares node x and node y.
- # Returns "x is ??? than y" (???=BETTER/WORSE/EQUAL)
- def comparenodes(x, y):
- hasimgX = x in nodeswithimg
- hasimgY = y in nodeswithimg
-
- # First comparison: A node with no preemptible VMs is preferible
- # to one with preemptible VMs (i.e. we want to avoid preempting)
- canfitnopreemptionX = canfit[x][0]
- canfitpreemptionX = canfit[x][1]
- hasPreemptibleX = canfitpreemptionX > canfitnopreemptionX
-
- canfitnopreemptionY = canfit[y][0]
- canfitpreemptionY = canfit[y][1]
- hasPreemptibleY = canfitpreemptionY > canfitnopreemptionY
-
- # TODO: Factor out common code
- if avoidpreempt:
- if hasPreemptibleX and not hasPreemptibleY:
- return constants.WORSE
- elif not hasPreemptibleX and hasPreemptibleY:
- return constants.BETTER
- elif not hasPreemptibleX and not hasPreemptibleY:
- if hasimgX and not hasimgY:
- return constants.BETTER
- elif not hasimgX and hasimgY:
- return constants.WORSE
- else:
- if canfitnopreemptionX > canfitnopreemptionY: return constants.BETTER
- elif canfitnopreemptionX < canfitnopreemptionY: return constants.WORSE
- else: return constants.EQUAL
- elif hasPreemptibleX and hasPreemptibleY:
- # If both have (some) preemptible resources, we prefer those
- # that involve the less preemptions
- preemptX = canfitpreemptionX - canfitnopreemptionX
- preemptY = canfitpreemptionY - canfitnopreemptionY
- if preemptX < preemptY:
- return constants.BETTER
- elif preemptX > preemptY:
- return constants.WORSE
- else:
- if hasimgX and not hasimgY: return constants.BETTER
- elif not hasimgX and hasimgY: return constants.WORSE
- else: return constants.EQUAL
- elif not avoidpreempt:
- # First criteria: Can we reuse image?
- if hasimgX and not hasimgY:
- return constants.BETTER
- elif not hasimgX and hasimgY:
- return constants.WORSE
- else:
- # Now we just want to avoid preemption
- if hasPreemptibleX and not hasPreemptibleY:
- return constants.WORSE
- elif not hasPreemptibleX and hasPreemptibleY:
- return constants.BETTER
- elif hasPreemptibleX and hasPreemptibleY:
- # If both have (some) preemptible resources, we prefer those
- # that involve the less preemptions
- preemptX = canfitpreemptionX - canfitnopreemptionX
- preemptY = canfitpreemptionY - canfitnopreemptionY
- if preemptX < preemptY:
- return constants.BETTER
- elif preemptX > preemptY:
- return constants.WORSE
- else:
- if hasimgX and not hasimgY: return constants.BETTER
- elif not hasimgX and hasimgY: return constants.WORSE
- else: return constants.EQUAL
- else:
- return constants.EQUAL
-
- # Order nodes
- nodes.sort(comparenodes)
- return nodes
-
- def __find_preemptable_leases(self, mustpreempt, startTime, endTime):
- def comparepreemptability(rrX, rrY):
- if rrX.lease.submit_time > rrY.lease.submit_time:
- return constants.BETTER
- elif rrX.lease.submit_time < rrY.lease.submit_time:
- return constants.WORSE
- else:
- return constants.EQUAL
-
- def preemptedEnough(amountToPreempt):
- for node in amountToPreempt:
- if not amountToPreempt[node].is_zero_or_less():
- return False
- return True
-
- # Get allocations at the specified time
- atstart = set()
- atmiddle = set()
- nodes = set(mustpreempt.keys())
-
- reservationsAtStart = self.slottable.getReservationsAt(startTime)
- reservationsAtStart = [r for r in reservationsAtStart if r.is_preemptible()
- and len(set(r.resources_in_pnode.keys()) & nodes)>0]
-
- reservationsAtMiddle = self.slottable.get_reservations_starting_between(startTime, endTime)
- reservationsAtMiddle = [r for r in reservationsAtMiddle if r.is_preemptible()
- and len(set(r.resources_in_pnode.keys()) & nodes)>0]
-
- reservationsAtStart.sort(comparepreemptability)
- reservationsAtMiddle.sort(comparepreemptability)
-
- amountToPreempt = {}
- for n in mustpreempt:
- amountToPreempt[n] = ds.ResourceTuple.copy(mustpreempt[n])
-
- # First step: CHOOSE RESOURCES TO PREEMPT AT START OF RESERVATION
- for r in reservationsAtStart:
- # The following will really only come into play when we have
- # multiple VMs per node
- mustpreemptres = False
- for n in r.resources_in_pnode.keys():
- # Don't need to preempt if we've already preempted all
- # the needed resources in node n
- if amountToPreempt.has_key(n) and not amountToPreempt[n].is_zero_or_less():
- amountToPreempt[n].decr(r.resources_in_pnode[n])
- mustpreemptres = True
- if mustpreemptres:
- atstart.add(r)
- if preemptedEnough(amountToPreempt):
- break
-
- # Second step: CHOOSE RESOURCES TO PREEMPT DURING RESERVATION
- if len(reservationsAtMiddle)>0:
- changepoints = set()
- for r in reservationsAtMiddle:
- changepoints.add(r.start)
- changepoints = list(changepoints)
- changepoints.sort()
-
- for cp in changepoints:
- amountToPreempt = {}
- for n in mustpreempt:
- amountToPreempt[n] = ds.ResourceTuple.copy(mustpreempt[n])
- reservations = [r for r in reservationsAtMiddle
- if r.start <= cp and cp < r.end]
- for r in reservations:
- mustpreemptres = False
- for n in r.resources_in_pnode.keys():
- if amountToPreempt.has_key(n) and not amountToPreempt[n].is_zero_or_less():
- amountToPreempt[n].decr(r.resources_in_pnode[n])
- mustpreemptres = True
- if mustpreemptres:
- atmiddle.add(r)
- if preemptedEnough(amountToPreempt):
- break
-
- self.logger.debug("Preempting leases (at start of reservation): %s" % [r.lease.id for r in atstart])
- self.logger.debug("Preempting leases (in middle of reservation): %s" % [r.lease.id for r in atmiddle])
-
- leases = [r.lease for r in atstart|atmiddle]
-
- return leases
-
- def __preempt(self, lease, preemption_time):
- self.logger.info("Preempting lease #%i..." % (lease.id))
- self.logger.vdebug("Lease before preemption:")
- lease.print_contents()
- vmrr = lease.get_last_vmrr()
- suspendresumerate = self.resourcepool.info.get_suspendresume_rate()
-
- if vmrr.state == ResourceReservation.STATE_SCHEDULED and vmrr.start >= preemption_time:
- self.logger.info("... lease #%i has been cancelled and requeued." % lease.id)
- self.logger.debug("Lease was set to start in the middle of the preempting lease.")
- lease.state = Lease.STATE_PENDING
- if vmrr.backfill_reservation == True:
- self.numbesteffortres -= 1
- lease.remove_rr(vmrr)
- self.slottable.removeReservation(vmrr)
- # if susprr != None:
- # lease.remove_rr(susprr)
- # self.slottable.removeReservation(susprr)
- for vnode, pnode in lease.vmimagemap.items():
- self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
- self.deployment_scheduler.cancel_deployment(lease)
- lease.vmimagemap = {}
- # TODO: Change state back to queued
- self.queue.enqueue_in_order(lease)
- get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease.id)
- else:
- susptype = get_config().get("suspension")
- timebeforesuspend = preemption_time - vmrr.start
- # TODO: Determine if it is in fact the initial VMRR or not. Right now
- # we conservatively overestimate
- canmigrate = get_config().get("migration")
- suspendthreshold = lease.get_suspend_threshold(initial=False, suspendrate=suspendresumerate, migrating=canmigrate)
- # We can't suspend if we're under the suspend threshold
- suspendable = timebeforesuspend >= suspendthreshold
- if suspendable and (susptype == constants.SUSPENSION_ALL or (lease.numnodes == 1 and susptype == constants.SUSPENSION_SERIAL)):
- self.logger.info("... lease #%i will be suspended at %s." % (lease.id, preemption_time))
- # Careful: VMRR update,etc. will have to be done here
- self.__schedule_suspension(lease, preemption_time)
- else:
- self.logger.info("... lease #%i has been cancelled and requeued (cannot be suspended)" % lease.id)
- lease.state = Lease.STATE_PENDING
- if vmrr.backfill_reservation == True:
- self.numbesteffortres -= 1
- lease.remove_rr(vmrr)
- self.slottable.removeReservation(vmrr)
- #if susprr != None:
- # lease.remove_rr(susprr)
- # self.slottable.removeReservation(susprr)
- if lease.state == Lease.STATE_SUSPENDED:
- resmrr = lease.prev_rr(vmrr)
- lease.remove_rr(resmrr)
- self.slottable.removeReservation(resmrr)
- for vnode, pnode in lease.vmimagemap.items():
- self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
- self.deployment_scheduler.cancel_deployment(lease)
- lease.vmimagemap = {}
- # TODO: Change state back to queued
- self.queue.enqueue_in_order(lease)
- get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease.id)
- self.logger.vdebug("Lease after preemption:")
- lease.print_contents()
-
- def __reevaluate_schedule(self, endinglease, nodes, nexttime, checkedleases):
- self.logger.debug("Reevaluating schedule. Checking for leases scheduled in nodes %s after %s" %(nodes, nexttime))
- leases = []
- # TODO: "getNextLeasesScheduledInNodes" has to be moved to the slot table
- #leases = self.scheduledleases.getNextLeasesScheduledInNodes(nexttime, nodes)
- leases = [l for l in leases if isinstance(l, ds.BestEffortLease) and not l in checkedleases]
- for lease in leases:
- self.logger.debug("Found lease %i" % l.id)
- l.print_contents()
- # Earliest time can't be earlier than time when images will be
- # available in node
- earliest = max(nexttime, lease.imagesavail)
- self.__slideback(lease, earliest)
- checkedleases.append(l)
- #for l in leases:
- # vmrr, susprr = l.getLastVMRR()
- # self.reevaluateSchedule(l, vmrr.nodes.values(), vmrr.end, checkedleases)
-
- def __slideback(self, lease, earliest):
- pass
-# (vmrr, susprr) = lease.get_last_vmrr()
-# vmrrnew = copy.copy(vmrr)
-# nodes = vmrrnew.nodes.values()
-# if lease.state == Lease.LEASE_STATE_SUSPENDED:
-# resmrr = lease.prev_rr(vmrr)
-# originalstart = resmrr.start
-# else:
-# resmrr = None
-# originalstart = vmrrnew.start
-# cp = self.findChangePointsAfter(after=earliest, until=originalstart, nodes=nodes)
-# cp = [earliest] + cp
-# newstart = None
-# for p in cp:
-# self.availabilitywindow.initWindow(p, lease.requested_resources, canpreempt=False)
-# self.availabilitywindow.printContents()
-# if self.availabilitywindow.fitAtStart(nodes=nodes) >= lease.numnodes:
-# (end, canfit) = self.availabilitywindow.findPhysNodesForVMs(lease.numnodes, originalstart)
-# if end == originalstart and set(nodes) <= set(canfit.keys()):
-# self.logger.debug("Can slide back to %s" % p)
-# newstart = p
-# break
-# if newstart == None:
-# # Can't slide back. Leave as is.
-# pass
-# else:
-# diff = originalstart - newstart
-# if resmrr != None:
-# resmrrnew = copy.copy(resmrr)
-# resmrrnew.start -= diff
-# resmrrnew.end -= diff
-# self.updateReservationWithKeyChange(resmrr, resmrrnew)
-# vmrrnew.start -= diff
-#
-# # If the lease was going to be suspended, check to see if
-# # we don't need to suspend any more.
-# remdur = lease.duration.get_remaining_duration()
-# if susprr != None and vmrrnew.end - newstart >= remdur:
-# vmrrnew.end = vmrrnew.start + remdur
-# #vmrrnew.oncomplete = constants.ONCOMPLETE_ENDLEASE
-# lease.remove_rr(susprr)
-# self.removeReservation(susprr)
-# else:
-# vmrrnew.end -= diff
-# # ONLY for simulation
-# if vmrrnew.prematureend != None:
-# vmrrnew.prematureend -= diff
-# self.updateReservationWithKeyChange(vmrr, vmrrnew)
-# self.dirty()
-# self.logger.vdebug("New lease descriptor (after slideback):")
-# lease.print_contents()
-
-
-
- #-------------------------------------------------------------------#
- # #
- # SLOT TABLE EVENT HANDLERS #
- # #
- #-------------------------------------------------------------------#
-
- def _handle_start_vm(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleStartVM" % l.id)
- l.print_contents()
- if l.state == Lease.STATE_READY:
- l.state = Lease.STATE_ACTIVE
- rr.state = ResourceReservation.STATE_ACTIVE
- now_time = get_clock().get_time()
- l.start.actual = now_time
-
- try:
- self.deployment_scheduler.check(l, rr)
- self.resourcepool.start_vms(l, rr)
- # The next two lines have to be moved somewhere more
- # appropriate inside the resourcepool module
- for (vnode, pnode) in rr.nodes.items():
- l.diskimagemap[vnode] = pnode
- except Exception, e:
- self.logger.error("ERROR when starting VMs.")
- raise
- elif l.state == Lease.STATE_RESUMED_READY:
- l.state = Lease.STATE_ACTIVE
- rr.state = ResourceReservation.STATE_ACTIVE
- # No enactment to do here, since all the suspend/resume actions are
- # handled during the suspend/resume RRs
- l.print_contents()
- self.logger.debug("LEASE-%i End of handleStartVM" % l.id)
- self.logger.info("Started VMs for lease %i on nodes %s" % (l.id, rr.nodes.values()))
-
- # TODO: Replace enact with a saner way of handling leases that have failed or
- # ended prematurely.
- # Possibly factor out the "clean up" code to a separate function
- def _handle_end_vm(self, l, rr, enact=True):
- self.logger.debug("LEASE-%i Start of handleEndVM" % l.id)
- self.logger.vdebug("LEASE-%i Before:" % l.id)
- l.print_contents()
- now_time = round_datetime(get_clock().get_time())
- diff = now_time - rr.start
- l.duration.accumulate_duration(diff)
- rr.state = ResourceReservation.STATE_DONE
- if not rr.is_suspending():
- self.resourcepool.stop_vms(l, rr)
- l.state = Lease.STATE_DONE
- l.duration.actual = l.duration.accumulated
- l.end = now_time
- self.completedleases.add(l)
- self.leases.remove(l)
- if isinstance(l, ds.BestEffortLease):
- get_accounting().incr_counter(constants.COUNTER_BESTEFFORTCOMPLETED, l.id)
-
- if isinstance(l, ds.BestEffortLease):
- if rr.backfill_reservation == True:
- self.numbesteffortres -= 1
- self.logger.vdebug("LEASE-%i After:" % l.id)
- l.print_contents()
- self.logger.debug("LEASE-%i End of handleEndVM" % l.id)
- self.logger.info("Stopped VMs for lease %i on nodes %s" % (l.id, rr.nodes.values()))
-
- def _handle_unscheduled_end_vm(self, l, rr, enact=False):
- self.logger.info("LEASE-%i The VM has ended prematurely." % l.id)
- self._handle_end_rr(l, rr)
- if rr.is_suspending():
- rrs = l.next_rrs(rr)
- for r in rrs:
- l.remove_rr(r)
- self.slottable.removeReservation(r)
- rr.end = get_clock().get_time()
- self._handle_end_vm(l, rr, enact=enact)
- nexttime = get_clock().get_next_schedulable_time()
- if self.is_backfilling():
- # We need to reevaluate the schedule to see if there are any future
- # reservations that we can slide back.
- self.__reevaluate_schedule(l, rr.nodes.values(), nexttime, [])
-
- def _handle_start_suspend(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleStartSuspend" % l.id)
- l.print_contents()
- rr.state = ResourceReservation.STATE_ACTIVE
- self.resourcepool.suspend_vms(l, rr)
- for vnode in rr.vnodes:
- pnode = rr.vmrr.nodes[vnode]
- l.memimagemap[vnode] = pnode
- if rr.is_first():
- l.state = Lease.STATE_SUSPENDING
- l.print_contents()
- self.logger.info("Suspending lease %i..." % (l.id))
- self.logger.debug("LEASE-%i End of handleStartSuspend" % l.id)
-
- def _handle_end_suspend(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleEndSuspend" % l.id)
- l.print_contents()
- # TODO: React to incomplete suspend
- self.resourcepool.verify_suspend(l, rr)
- rr.state = ResourceReservation.STATE_DONE
- if rr.is_last():
- l.state = Lease.STATE_SUSPENDED
- self.__enqueue_in_order(l)
- l.print_contents()
- self.logger.debug("LEASE-%i End of handleEndSuspend" % l.id)
- self.logger.info("Lease %i suspended." % (l.id))
-
- def _handle_start_resume(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleStartResume" % l.id)
- l.print_contents()
- self.resourcepool.resume_vms(l, rr)
- rr.state = ResourceReservation.STATE_ACTIVE
- if rr.is_first():
- l.state = Lease.STATE_RESUMING
- l.print_contents()
- self.logger.info("Resuming lease %i..." % (l.id))
- self.logger.debug("LEASE-%i End of handleStartResume" % l.id)
-
- def _handle_end_resume(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleEndResume" % l.id)
- l.print_contents()
- # TODO: React to incomplete resume
- self.resourcepool.verify_resume(l, rr)
- rr.state = ResourceReservation.STATE_DONE
- if rr.is_last():
- l.state = Lease.STATE_RESUMED_READY
- self.logger.info("Resumed lease %i" % (l.id))
- for vnode, pnode in rr.vmrr.nodes.items():
- self.resourcepool.remove_ramfile(pnode, l.id, vnode)
- l.print_contents()
- self.logger.debug("LEASE-%i End of handleEndResume" % l.id)
-
- def _handle_start_migrate(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleStartMigrate" % l.id)
- l.print_contents()
-
- l.print_contents()
- self.logger.debug("LEASE-%i End of handleStartMigrate" % l.id)
- self.logger.info("Migrating lease %i..." % (l.id))
-
- def _handle_end_migrate(self, l, rr):
- self.logger.debug("LEASE-%i Start of handleEndMigrate" % l.id)
- l.print_contents()
-
-# if lease.state == Lease.STATE_SUSPENDED:
-# # Update VM image mappings, since we might be resuming
-# # in different nodes.
-# for vnode, pnode in lease.vmimagemap.items():
-# self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
-# lease.vmimagemap = vmrr.nodes
-# for vnode, pnode in lease.vmimagemap.items():
-# self.resourcepool.add_diskimage(pnode, lease.diskimage_id, lease.diskimage_size, lease.id, vnode)
-#
-# # Update RAM file mappings
-# for vnode, pnode in lease.memimagemap.items():
-# self.resourcepool.remove_ramfile(pnode, lease.id, vnode)
-# for vnode, pnode in vmrr.nodes.items():
-# self.resourcepool.add_ramfile(pnode, lease.id, vnode, lease.requested_resources.get_by_type(constants.RES_MEM))
-# lease.memimagemap[vnode] = pnode
-
- l.print_contents()
- self.logger.debug("LEASE-%i End of handleEndMigrate" % l.id)
- self.logger.info("Migrated lease %i..." % (l.id))
-
- def _handle_end_rr(self, l, rr):
- self.slottable.removeReservation(rr)
-
- def __enqueue_in_order(self, lease):
- get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease.id)
- self.queue.enqueue_in_order(lease)
-
- def __can_reserve_besteffort_in_future(self):
- return self.numbesteffortres < self.maxres
-
- def is_backfilling(self):
- return self.maxres > 0
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py (from rev 519, trunk/src/haizea/resourcemanager/scheduler.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/scheduler.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,1473 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+
+"""This module provides the main classes for Haizea's scheduler, particularly
+the Scheduler class. The deployment scheduling code (everything that has to be
+done to prepare a lease) happens in the modules inside the
+haizea.resourcemanager.deployment package.
+
+This module provides the following classes:
+
+* SchedException: A scheduling exception
+* ReservationEventHandler: A simple wrapper class
+* Scheduler: Do I really need to spell this one out for you?
+
+TODO: The Scheduler class is in need of some serious refactoring. The likely outcome is
+that it will be divided into two classes: LeaseScheduler, which handles top-level
+lease constructs and doesn't interact with the slot table, and VMScheduler, which
+actually schedules the VMs. The slot table would be contained in VMScheduler and
+in the lease preparation scheduler. In turn, these two would be contained in
+LeaseScheduler.
+"""
+
+import haizea.resourcemanager.datastruct as ds
+import haizea.common.constants as constants
+from haizea.common.utils import round_datetime_delta, round_datetime, estimate_transfer_time, get_config, get_accounting, get_clock
+from haizea.resourcemanager.slottable import SlotTable, SlotFittingException
+from haizea.resourcemanager.datastruct import Lease, ARLease, BestEffortLease, ImmediateLease, ResourceReservation, VMResourceReservation
+from haizea.resourcemanager.resourcepool import ResourcePool, ResourcePoolWithReusableImages
+from operator import attrgetter, itemgetter
+from mx.DateTime import TimeDelta
+
+import logging
+
+class SchedException(Exception):
+ """A simple exception class used for scheduling exceptions"""
+ pass
+
+class NotSchedulableException(Exception):
+ """A simple exception class used when a lease cannot be scheduled
+
+ This exception must be raised when a lease cannot be scheduled
+ (this is not necessarily an error condition, but the scheduler will
+ have to react to it)
+ """
+ pass
+
+class CriticalSchedException(Exception):
+ """A simple exception class used for critical scheduling exceptions
+
+ This exception must be raised when a non-recoverable error happens
+ (e.g., when there are unexplained inconsistencies in the schedule,
+ typically resulting from a code error)
+ """
+ pass
+
+
+class ReservationEventHandler(object):
+ """A wrapper for reservation event handlers.
+
+ Reservations (in the slot table) can start and they can end. This class
+ provides a convenient wrapper around the event handlers for these two
+ events (see Scheduler.__register_handler for details on event handlers)
+ """
+ def __init__(self, on_start, on_end):
+ self.on_start = on_start
+ self.on_end = on_end
+
+class Scheduler(object):
+ """The Haizea Scheduler
+
+ Public methods:
+ schedule -- The scheduling function
+ process_reservations -- Processes starting/ending reservations at a given time
+ enqueue -- Queues a best-effort request
+ is_queue_empty -- Is the queue empty?
+ exists_scheduled_leases -- Are there any leases scheduled?
+
+ Private methods:
+ __schedule_ar_lease -- Schedules an AR lease
+ __schedule_besteffort_lease -- Schedules a best-effort lease
+ __preempt -- Preempts a lease
+ __reevaluate_schedule -- Reevaluate the schedule (used after resources become
+ unexpectedly unavailable)
+ _handle_* -- Reservation event handlers
+
+ """
+ def __init__(self, slottable, resourcepool, deployment_scheduler):
+ self.slottable = slottable
+ self.resourcepool = resourcepool
+ self.deployment_scheduler = deployment_scheduler
+ self.logger = logging.getLogger("SCHED")
+
+ self.queue = ds.Queue(self)
+ self.leases = ds.LeaseTable(self)
+ self.completedleases = ds.LeaseTable(self)
+
+ for n in self.resourcepool.get_nodes() + self.resourcepool.get_aux_nodes():
+ self.slottable.add_node(n)
+
+ self.handlers = {}
+
+ self.register_handler(type = ds.VMResourceReservation,
+ on_start = Scheduler._handle_start_vm,
+ on_end = Scheduler._handle_end_vm)
+
+ self.register_handler(type = ds.SuspensionResourceReservation,
+ on_start = Scheduler._handle_start_suspend,
+ on_end = Scheduler._handle_end_suspend)
+
+ self.register_handler(type = ds.ResumptionResourceReservation,
+ on_start = Scheduler._handle_start_resume,
+ on_end = Scheduler._handle_end_resume)
+
+ for (type, handler) in self.deployment_scheduler.handlers.items():
+ self.handlers[type] = handler
+
+ backfilling = get_config().get("backfilling")
+ if backfilling == constants.BACKFILLING_OFF:
+ self.maxres = 0
+ elif backfilling == constants.BACKFILLING_AGGRESSIVE:
+ self.maxres = 1
+ elif backfilling == constants.BACKFILLING_CONSERVATIVE:
+ self.maxres = 1000000 # Arbitrarily large
+ elif backfilling == constants.BACKFILLING_INTERMEDIATE:
+ self.maxres = get_config().get("backfilling-reservations")
+
+ self.numbesteffortres = 0
+
+ def schedule(self, nexttime):
+ pending_leases = self.leases.get_leases_by_state(Lease.STATE_PENDING)
+ ar_leases = [req for req in pending_leases if isinstance(req, ARLease)]
+ im_leases = [req for req in pending_leases if isinstance(req, ImmediateLease)]
+ be_leases = [req for req in pending_leases if isinstance(req, BestEffortLease)]
+
+ # Queue best-effort requests
+ for lease in be_leases:
+ self.enqueue(lease)
+
+ # Process immediate requests
+ for lease_req in im_leases:
+ self.__process_im_request(lease_req, nexttime)
+
+ # Process AR requests
+ for lease_req in ar_leases:
+ self.__process_ar_request(lease_req, nexttime)
+
+ # Process best-effort requests
+ self.__process_queue(nexttime)
+
+
+ def process_reservations(self, nowtime):
+ starting = self.slottable.get_reservations_starting_at(nowtime)
+ starting = [res for res in starting if res.state == ResourceReservation.STATE_SCHEDULED]
+ ending = self.slottable.get_reservations_ending_at(nowtime)
+ ending = [res for res in ending if res.state == ResourceReservation.STATE_ACTIVE]
+ for rr in ending:
+ self._handle_end_rr(rr.lease, rr)
+ self.handlers[type(rr)].on_end(self, rr.lease, rr)
+
+ for rr in starting:
+ self.handlers[type(rr)].on_start(self, rr.lease, rr)
+
+ util = self.slottable.getUtilization(nowtime)
+ get_accounting().append_stat(constants.COUNTER_CPUUTILIZATION, util)
+
+ def register_handler(self, type, on_start, on_end):
+ handler = ReservationEventHandler(on_start=on_start, on_end=on_end)
+ self.handlers[type] = handler
+
+ def enqueue(self, lease_req):
+ """Queues a best-effort lease request"""
+ get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease_req.id)
+ lease_req.state = Lease.STATE_QUEUED
+ self.queue.enqueue(lease_req)
+ self.logger.info("Received (and queueing) best-effort lease request #%i, %i nodes for %s." % (lease_req.id, lease_req.numnodes, lease_req.duration.requested))
+
+ def request_lease(self, lease):
+ """
+ Request a lease. At this point, it is simply marked as "Pending" and,
+ next time the scheduling function is called, the fate of the
+ lease will be determined (right now, AR+IM leases get scheduled
+ right away, and best-effort leases get placed on a queue)
+ """
+ lease.state = Lease.STATE_PENDING
+ self.leases.add(lease)
+
+ def is_queue_empty(self):
+ """Return True is the queue is empty, False otherwise"""
+ return self.queue.is_empty()
+
+
+ def exists_scheduled_leases(self):
+ """Return True if there are any leases scheduled in the future"""
+ return not self.slottable.is_empty()
+
+ def cancel_lease(self, lease_id):
+ """Cancels a lease.
+
+ Arguments:
+ lease_id -- ID of lease to cancel
+ """
+ time = get_clock().get_time()
+
+ self.logger.info("Cancelling lease %i..." % lease_id)
+ if self.leases.has_lease(lease_id):
+ # The lease is either running, or scheduled to run
+ lease = self.leases.get_lease(lease_id)
+
+ if lease.state == Lease.STATE_ACTIVE:
+ self.logger.info("Lease %i is active. Stopping active reservation..." % lease_id)
+ rr = lease.get_active_reservations(time)[0]
+ if isinstance(rr, VMResourceReservation):
+ self._handle_unscheduled_end_vm(lease, rr, enact=True)
+ # TODO: Handle cancelations in middle of suspensions and
+ # resumptions
+ elif lease.state in [Lease.STATE_SCHEDULED, Lease.STATE_READY]:
+ self.logger.info("Lease %i is scheduled. Cancelling reservations." % lease_id)
+ rrs = lease.get_scheduled_reservations()
+ for r in rrs:
+ lease.remove_rr(r)
+ self.slottable.removeReservation(r)
+ lease.state = Lease.STATE_CANCELLED
+ self.completedleases.add(lease)
+ self.leases.remove(lease)
+ elif self.queue.has_lease(lease_id):
+ # The lease is in the queue, waiting to be scheduled.
+ # Cancelling is as simple as removing it from the queue
+ self.logger.info("Lease %i is in the queue. Removing..." % lease_id)
+ l = self.queue.get_lease(lease_id)
+ self.queue.remove_lease(lease)
+
+ def fail_lease(self, lease_id):
+ """Transitions a lease to a failed state, and does any necessary cleaning up
+
+ TODO: For now, just use the cancelling algorithm
+
+ Arguments:
+ lease -- Lease to fail
+ """
+ try:
+ raise
+ self.cancel_lease(lease_id)
+ except Exception, msg:
+ # Exit if something goes horribly wrong
+ raise CriticalSchedException()
+
+ def notify_event(self, lease_id, event):
+ time = get_clock().get_time()
+ if event == constants.EVENT_END_VM:
+ lease = self.leases.get_lease(lease_id)
+ vmrr = lease.get_last_vmrr()
+ self._handle_unscheduled_end_vm(lease, vmrr, enact=False)
+
+
+ def __process_ar_request(self, lease_req, nexttime):
+ self.logger.info("Received AR lease request #%i, %i nodes from %s to %s." % (lease_req.id, lease_req.numnodes, lease_req.start.requested, lease_req.start.requested + lease_req.duration.requested))
+ self.logger.debug(" Start : %s" % lease_req.start)
+ self.logger.debug(" Duration: %s" % lease_req.duration)
+ self.logger.debug(" ResReq : %s" % lease_req.requested_resources)
+
+ accepted = False
+ try:
+ self.__schedule_ar_lease(lease_req, avoidpreempt=True, nexttime=nexttime)
+ self.leases.add(lease_req)
+ get_accounting().incr_counter(constants.COUNTER_ARACCEPTED, lease_req.id)
+ accepted = True
+ except SchedException, msg:
+ # Our first try avoided preemption, try again
+ # without avoiding preemption.
+ # TODO: Roll this into the exact slot fitting algorithm
+ try:
+ self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
+ self.logger.debug("LEASE-%i Trying again without avoiding preemption" % lease_req.id)
+ self.__schedule_ar_lease(lease_req, nexttime, avoidpreempt=False)
+ self.leases.add(lease_req)
+ get_accounting().incr_counter(constants.COUNTER_ARACCEPTED, lease_req.id)
+ accepted = True
+ except SchedException, msg:
+ get_accounting().incr_counter(constants.COUNTER_ARREJECTED, lease_req.id)
+ self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
+
+ if accepted:
+ self.logger.info("AR lease request #%i has been accepted." % lease_req.id)
+ else:
+ self.logger.info("AR lease request #%i has been rejected." % lease_req.id)
+
+
+ def __process_queue(self, nexttime):
+ done = False
+ newqueue = ds.Queue(self)
+ while not done and not self.is_queue_empty():
+ if self.numbesteffortres == self.maxres and self.slottable.isFull(nexttime):
+ self.logger.debug("Used up all reservations and slot table is full. Skipping rest of queue.")
+ done = True
+ else:
+ lease_req = self.queue.dequeue()
+ try:
+ self.logger.info("Next request in the queue is lease %i. Attempting to schedule..." % lease_req.id)
+ self.logger.debug(" Duration: %s" % lease_req.duration)
+ self.logger.debug(" ResReq : %s" % lease_req.requested_resources)
+ self.__schedule_besteffort_lease(lease_req, nexttime)
+ self.leases.add(lease_req)
+ get_accounting().decr_counter(constants.COUNTER_QUEUESIZE, lease_req.id)
+ except SchedException, msg:
+ # Put back on queue
+ newqueue.enqueue(lease_req)
+ self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
+ self.logger.info("Lease %i could not be scheduled at this time." % lease_req.id)
+ if not self.is_backfilling():
+ done = True
+
+ for lease in self.queue:
+ newqueue.enqueue(lease)
+
+ self.queue = newqueue
+
+
+ def __process_im_request(self, lease_req, nexttime):
+ self.logger.info("Received immediate lease request #%i (%i nodes)" % (lease_req.id, lease_req.numnodes))
+ self.logger.debug(" Duration: %s" % lease_req.duration)
+ self.logger.debug(" ResReq : %s" % lease_req.requested_resources)
+
+ try:
+ self.__schedule_immediate_lease(lease_req, nexttime=nexttime)
+ self.leases.add(lease_req)
+ get_accounting().incr_counter(constants.COUNTER_IMACCEPTED, lease_req.id)
+ self.logger.info("Immediate lease request #%i has been accepted." % lease_req.id)
+ except SchedException, msg:
+ get_accounting().incr_counter(constants.COUNTER_IMREJECTED, lease_req.id)
+ self.logger.debug("LEASE-%i Scheduling exception: %s" % (lease_req.id, msg))
+
+
+ def __schedule_ar_lease(self, lease_req, nexttime, avoidpreempt=True):
+ start = lease_req.start.requested
+ end = lease_req.start.requested + lease_req.duration.requested
+ try:
+ (nodeassignment, res, preemptions) = self.__fit_exact(lease_req, preemptible=False, canpreempt=True, avoidpreempt=avoidpreempt)
+
+ if len(preemptions) > 0:
+ leases = self.__find_preemptable_leases(preemptions, start, end)
+ self.logger.info("Must preempt leases %s to make room for AR lease #%i" % ([l.id for l in leases], lease_req.id))
+ for lease in leases:
+ self.__preempt(lease, preemption_time=start)
+
+ # Create VM resource reservations
+ vmrr = ds.VMResourceReservation(lease_req, start, end, nodeassignment, res, False)
+ vmrr.state = ResourceReservation.STATE_SCHEDULED
+
+ # Schedule deployment overhead
+ self.deployment_scheduler.schedule(lease_req, vmrr, nexttime)
+
+ # Commit reservation to slot table
+ # (we don't do this until the very end because the deployment overhead
+ # scheduling could still throw an exception)
+ lease_req.append_vmrr(vmrr)
+ self.slottable.addReservation(vmrr)
+ except Exception, msg:
+ raise SchedException, "The requested AR lease is infeasible. Reason: %s" % msg
+
+
+ def __schedule_besteffort_lease(self, lease, nexttime):
+ try:
+ # Schedule the VMs
+ canreserve = self.__can_reserve_besteffort_in_future()
+ (vmrr, in_future) = self.__fit_asap(lease, nexttime, allow_reservation_in_future = canreserve)
+
+ # Schedule deployment
+ if lease.state != Lease.STATE_SUSPENDED:
+ self.deployment_scheduler.schedule(lease, vmrr, nexttime)
+ else:
+ # TODO: schedule migrations
+ pass
+
+ # At this point, the lease is feasible.
+ # Commit changes by adding RRs to lease and to slot table
+
+ # Add resource reservations to lease
+ # TODO: deployment
+ # TODO: migrations
+ lease.append_vmrr(vmrr)
+
+
+ # Add resource reservations to slottable
+
+ # TODO: deployment
+
+ # TODO: migrations
+
+ # Resumptions (if any)
+ for resmrr in vmrr.resm_rrs:
+ self.slottable.addReservation(resmrr)
+
+ # VM
+ self.slottable.addReservation(vmrr)
+
+ # Suspensions (if any)
+ for susprr in vmrr.susp_rrs:
+ self.slottable.addReservation(susprr)
+
+ if in_future:
+ self.numbesteffortres += 1
+
+ lease.print_contents()
+
+ except SchedException, msg:
+ raise SchedException, "The requested best-effort lease is infeasible. Reason: %s" % msg
+
+
+
+
+ def __schedule_immediate_lease(self, req, nexttime):
+ try:
+ (resmrr, vmrr, susprr, reservation) = self.__fit_asap(req, nexttime, allow_reservation_in_future=False)
+ # Schedule deployment
+ self.deployment_scheduler.schedule(req, vmrr, nexttime)
+
+ req.append_rr(vmrr)
+ self.slottable.addReservation(vmrr)
+
+ req.print_contents()
+ except SlotFittingException, msg:
+ raise SchedException, "The requested immediate lease is infeasible. Reason: %s" % msg
+
+ def __fit_exact(self, leasereq, preemptible=False, canpreempt=True, avoidpreempt=True):
+ lease_id = leasereq.id
+ start = leasereq.start.requested
+ end = leasereq.start.requested + leasereq.duration.requested
+ diskImageID = leasereq.diskimage_id
+ numnodes = leasereq.numnodes
+ resreq = leasereq.requested_resources
+
+ availabilitywindow = self.slottable.availabilitywindow
+
+ availabilitywindow.initWindow(start, resreq, canpreempt=canpreempt)
+ availabilitywindow.printContents(withpreemption = False)
+ availabilitywindow.printContents(withpreemption = True)
+
+ mustpreempt = False
+ unfeasiblewithoutpreemption = False
+
+ fitatstart = availabilitywindow.fitAtStart(canpreempt = False)
+ if fitatstart < numnodes:
+ if not canpreempt:
+ raise SlotFittingException, "Not enough resources in specified interval"
+ else:
+ unfeasiblewithoutpreemption = True
+ feasibleend, canfitnopreempt = availabilitywindow.findPhysNodesForVMs(numnodes, end, strictend=True, canpreempt = False)
+ fitatend = sum([n for n in canfitnopreempt.values()])
+ if fitatend < numnodes:
+ if not canpreempt:
+ raise SlotFittingException, "Not enough resources in specified interval"
+ else:
+ unfeasiblewithoutpreemption = True
+
+ canfitpreempt = None
+ if canpreempt:
+ fitatstart = availabilitywindow.fitAtStart(canpreempt = True)
+ if fitatstart < numnodes:
+ raise SlotFittingException, "Not enough resources in specified interval"
+ feasibleendpreempt, canfitpreempt = availabilitywindow.findPhysNodesForVMs(numnodes, end, strictend=True, canpreempt = True)
+ fitatend = sum([n for n in canfitpreempt.values()])
+ if fitatend < numnodes:
+ raise SlotFittingException, "Not enough resources in specified interval"
+ else:
+ if unfeasiblewithoutpreemption:
+ mustpreempt = True
+ else:
+ mustpreempt = False
+
+ # At this point we know if the lease is feasible, and if
+ # will require preemption.
+ if not mustpreempt:
+ self.logger.debug("The VM reservations for this lease are feasible without preemption.")
+ else:
+ self.logger.debug("The VM reservations for this lease are feasible but will require preemption.")
+
+ # merge canfitnopreempt and canfitpreempt
+ canfit = {}
+ for node in canfitnopreempt:
+ vnodes = canfitnopreempt[node]
+ canfit[node] = [vnodes, vnodes]
+ for node in canfitpreempt:
+ vnodes = canfitpreempt[node]
+ if canfit.has_key(node):
+ canfit[node][1] = vnodes
+ else:
+ canfit[node] = [0, vnodes]
+
+ orderednodes = self.__choose_nodes(canfit, start, canpreempt, avoidpreempt)
+
+ self.logger.debug("Node ordering: %s" % orderednodes)
+
+ # vnode -> pnode
+ nodeassignment = {}
+
+ # pnode -> resourcetuple
+ res = {}
+
+ # physnode -> how many vnodes
+ preemptions = {}
+
+ vnode = 1
+ if avoidpreempt:
+ # First pass, without preemption
+ for physnode in orderednodes:
+ canfitinnode = canfit[physnode][0]
+ for i in range(1, canfitinnode+1):
+ nodeassignment[vnode] = physnode
+ if res.has_key(physnode):
+ res[physnode].incr(resreq)
+ else:
+ res[physnode] = ds.ResourceTuple.copy(resreq)
+ canfit[physnode][0] -= 1
+ canfit[physnode][1] -= 1
+ vnode += 1
+ if vnode > numnodes:
+ break
+ if vnode > numnodes:
+ break
+
+ # Second pass, with preemption
+ if mustpreempt or not avoidpreempt:
+ for physnode in orderednodes:
+ canfitinnode = canfit[physnode][1]
+ for i in range(1, canfitinnode+1):
+ nodeassignment[vnode] = physnode
+ if res.has_key(physnode):
+ res[physnode].incr(resreq)
+ else:
+ res[physnode] = ds.ResourceTuple.copy(resreq)
+ canfit[physnode][1] -= 1
+ vnode += 1
+ # Check if this will actually result in a preemption
+ if canfit[physnode][0] == 0:
+ if preemptions.has_key(physnode):
+ preemptions[physnode].incr(resreq)
+ else:
+ preemptions[physnode] = ds.ResourceTuple.copy(resreq)
+ else:
+ canfit[physnode][0] -= 1
+ if vnode > numnodes:
+ break
+ if vnode > numnodes:
+ break
+
+ if vnode <= numnodes:
+ raise SchedException, "Availability window indicated that request but feasible, but could not fit it"
+
+ return nodeassignment, res, preemptions
+
+ def __fit_asap(self, lease, nexttime, allow_reservation_in_future = False):
+ lease_id = lease.id
+ remaining_duration = lease.duration.get_remaining_duration()
+ numnodes = lease.numnodes
+ requested_resources = lease.requested_resources
+ preemptible = lease.preemptible
+ mustresume = (lease.state == Lease.STATE_SUSPENDED)
+ susptype = get_config().get("suspension")
+ if susptype == constants.SUSPENSION_NONE or (susptype == constants.SUSPENSION_SERIAL and lease.numnodes == 1):
+ suspendable = False
+ else:
+ suspendable = True
+
+ # Determine earliest start time in each node
+ if lease.state == Lease.STATE_QUEUED:
+ # Figure out earliest start times based on
+ # image schedule and reusable images
+ earliest = self.deployment_scheduler.find_earliest_starting_times(lease, nexttime)
+ elif lease.state == Lease.STATE_SUSPENDED:
+ # No need to transfer images from repository
+ # (only intra-node transfer)
+ earliest = dict([(node+1, [nexttime, constants.REQTRANSFER_NO, None]) for node in range(lease.numnodes)])
+
+
+ canmigrate = get_config().get("migration")
+
+ #
+ # STEP 1: FIGURE OUT THE MINIMUM DURATION
+ #
+
+ min_duration = self.__compute_scheduling_threshold(lease)
+
+
+ #
+ # STEP 2: FIND THE CHANGEPOINTS
+ #
+
+ # Find the changepoints, and the nodes we can use at each changepoint
+ # Nodes may not be available at a changepoint because images
+ # cannot be transferred at that time.
+ if not mustresume:
+ cps = [(node, e[0]) for node, e in earliest.items()]
+ cps.sort(key=itemgetter(1))
+ curcp = None
+ changepoints = []
+ nodes = []
+ for node, time in cps:
+ nodes.append(node)
+ if time != curcp:
+ changepoints.append([time, nodes[:]])
+ curcp = time
+ else:
+ changepoints[-1][1] = nodes[:]
+ else:
+ if not canmigrate:
+ vmrr = lease.get_last_vmrr()
+ curnodes = set(vmrr.nodes.values())
+ else:
+ curnodes=None
+ # If we have to resume this lease, make sure that
+ # we have enough time to transfer the images.
+ migratetime = self.__estimate_migration_time(lease)
+ earliesttransfer = get_clock().get_time() + migratetime
+
+ for n in earliest:
+ earliest[n][0] = max(earliest[n][0], earliesttransfer)
+
+ changepoints = list(set([x[0] for x in earliest.values()]))
+ changepoints.sort()
+ changepoints = [(x, curnodes) for x in changepoints]
+
+ # If we can make reservations in the future,
+ # we also consider future changepoints
+ # (otherwise, we only allow the VMs to start "now", accounting
+ # for the fact that vm images will have to be deployed)
+ if allow_reservation_in_future:
+ futurecp = self.slottable.findChangePointsAfter(changepoints[-1][0])
+ futurecp = [(p,None) for p in futurecp]
+ else:
+ futurecp = []
+
+
+
+ #
+ # STEP 3: SLOT FITTING
+ #
+
+ # If resuming, we also have to allocate enough for the resumption
+ if mustresume:
+ duration = remaining_duration + self.__estimate_resume_time(lease)
+ else:
+ duration = remaining_duration
+
+
+ # First, assuming we can't make reservations in the future
+ start, end, canfit = self.__find_fit_at_points(
+ changepoints,
+ numnodes,
+ requested_resources,
+ duration,
+ suspendable,
+ min_duration)
+
+ if start == None:
+ if not allow_reservation_in_future:
+ # We did not find a suitable starting time. This can happen
+ # if we're unable to make future reservations
+ raise SchedException, "Could not find enough resources for this request"
+ else:
+ mustsuspend = (end - start) < duration
+ if mustsuspend and not suspendable:
+ if not allow_reservation_in_future:
+ raise SchedException, "Scheduling this lease would require preempting it, which is not allowed"
+ else:
+ start = None # No satisfactory start time
+
+ # If we haven't been able to fit the lease, check if we can
+ # reserve it in the future
+ if start == None and allow_reservation_in_future:
+ start, end, canfit = self.__find_fit_at_points(
+ futurecp,
+ numnodes,
+ requested_resources,
+ duration,
+ suspendable,
+ min_duration
+ )
+
+
+ if start in [p[0] for p in futurecp]:
+ reservation = True
+ else:
+ reservation = False
+
+
+ #
+ # STEP 4: FINAL SLOT FITTING
+ #
+ # At this point, we know the lease fits, but we have to map it to
+ # specific physical nodes.
+
+ # Sort physical nodes
+ physnodes = canfit.keys()
+ if mustresume:
+ # If we're resuming, we prefer resuming in the nodes we're already
+ # deployed in, to minimize the number of transfers.
+ vmrr = lease.get_last_vmrr()
+ nodes = set(vmrr.nodes.values())
+ availnodes = set(physnodes)
+ deplnodes = availnodes.intersection(nodes)
+ notdeplnodes = availnodes.difference(nodes)
+ physnodes = list(deplnodes) + list(notdeplnodes)
+ else:
+ physnodes.sort() # Arbitrary, prioritize nodes, as in exact
+
+ # Map to physical nodes
+ mappings = {}
+ res = {}
+ vmnode = 1
+ while vmnode <= numnodes:
+ for n in physnodes:
+ if canfit[n]>0:
+ canfit[n] -= 1
+ mappings[vmnode] = n
+ if res.has_key(n):
+ res[n].incr(requested_resources)
+ else:
+ res[n] = ds.ResourceTuple.copy(requested_resources)
+ vmnode += 1
+ break
+
+
+ vmrr = ds.VMResourceReservation(lease, start, end, mappings, res, reservation)
+ vmrr.state = ResourceReservation.STATE_SCHEDULED
+
+ if mustresume:
+ self.__schedule_resumption(vmrr, start)
+
+ mustsuspend = (vmrr.end - vmrr.start) < remaining_duration
+ if mustsuspend:
+ self.__schedule_suspension(vmrr, end)
+
+ # Compensate for any overestimation
+ if (vmrr.end - vmrr.start) > remaining_duration:
+ vmrr.end = vmrr.start + remaining_duration
+
+ susp_str = res_str = ""
+ if mustresume:
+ res_str = " (resuming)"
+ if mustsuspend:
+ susp_str = " (suspending)"
+ self.logger.info("Lease #%i has been scheduled on nodes %s from %s%s to %s%s" % (lease.id, mappings.values(), start, res_str, end, susp_str))
+
+ return vmrr, reservation
+
+ def __find_fit_at_points(self, changepoints, numnodes, resources, duration, suspendable, min_duration):
+ start = None
+ end = None
+ canfit = None
+ availabilitywindow = self.slottable.availabilitywindow
+
+
+ for p in changepoints:
+ availabilitywindow.initWindow(p[0], resources, p[1], canpreempt = False)
+ availabilitywindow.printContents()
+
+ if availabilitywindow.fitAtStart() >= numnodes:
+ start=p[0]
+ maxend = start + duration
+ end, canfit = availabilitywindow.findPhysNodesForVMs(numnodes, maxend)
+
+ self.logger.debug("This lease can be scheduled from %s to %s" % (start, end))
+
+ if end < maxend:
+ self.logger.debug("This lease will require suspension (maxend = %s)" % (maxend))
+
+ if not suspendable:
+ pass
+ # If we can't suspend, this fit is no good, and we have to keep looking
+ else:
+ # If we can suspend, we still have to check if the lease will
+ # be able to run for the specified minimum duration
+ if end-start > min_duration:
+ break # We found a fit; stop looking
+ else:
+ self.logger.debug("This starting time does not allow for the requested minimum duration (%s < %s)" % (end-start, min_duration))
+ # Set start back to None, to indicate that we haven't
+ # found a satisfactory start time
+ start = None
+ else:
+ # We've found a satisfactory starting time
+ break
+
+ return start, end, canfit
+
+ def __compute_susprem_times(self, vmrr, time, direction, exclusion, rate):
+ times = [] # (start, end, pnode, vnodes)
+
+ if exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
+ # Global exclusion (which represents, e.g., reading/writing the memory image files
+ # from a global file system) meaning no two suspensions/resumptions can happen at
+ # the same time in the entire resource pool.
+
+ t = time
+ t_prev = None
+
+ for (vnode,pnode) in vmrr.nodes.items():
+ mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
+ op_time = self.__compute_suspend_resume_time(mem, rate)
+ t_prev = t
+
+ if direction == constants.DIRECTION_FORWARD:
+ t += op_time
+ times.append((t_prev, t, pnode, [vnode]))
+ elif direction == constants.DIRECTION_BACKWARD:
+ t -= op_time
+ times.append((t, t_prev, pnode, [vnode]))
+
+ elif exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
+ # Local exclusion (which represents, e.g., reading the memory image files
+ # from a local file system) means no two resumptions can happen at the same
+ # time in the same physical node.
+ vnodes_in_pnode = {}
+ for (vnode,pnode) in vmrr.nodes.items():
+ vnodes_in_pnode.setdefault(pnode, []).append(vnode)
+ for pnode in vnodes_in_pnode:
+ t = time
+ t_prev = None
+ for vnode in vnodes_in_pnode[pnode]:
+ mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
+ op_time = self.__compute_suspend_resume_time(mem, rate)
+ t_prev = t
+
+ if direction == constants.DIRECTION_FORWARD:
+ t += op_time
+ times.append((t_prev, t, pnode, [vnode]))
+ elif direction == constants.DIRECTION_BACKWARD:
+ t -= op_time
+ times.append((t, t_prev, pnode, [vnode]))
+ # TODO: "consolidate" times (i.e., figure out what operations can be grouped
+ # into a single RR. This will not be an issue when running with real hardware,
+ # but might impact simulation performance.
+
+ return times
+
+
+ def __schedule_resumption(self, vmrr, resume_at):
+ from haizea.resourcemanager.rm import ResourceManager
+ config = ResourceManager.get_singleton().config
+ resm_exclusion = config.get("suspendresume-exclusion")
+ rate = self.resourcepool.info.get_suspendresume_rate()
+
+ if resume_at < vmrr.start or resume_at > vmrr.end:
+ raise SchedException, "Tried to schedule a resumption at %s, which is outside the VMRR's duration (%s-%s)" % (resume_at, vmrr.start, vmrr.end)
+
+ times = self.__compute_susprem_times(vmrr, resume_at, constants.DIRECTION_FORWARD, resm_exclusion, rate)
+ resume_rrs = []
+ for (start, end, pnode, vnodes) in times:
+ r = ds.ResourceTuple.create_empty()
+ mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
+ r.set_by_type(constants.RES_MEM, mem)
+ r.set_by_type(constants.RES_DISK, mem)
+ resmres = {pnode: r}
+ resmrr = ds.ResumptionResourceReservation(vmrr.lease, start, end, resmres, vnodes, vmrr)
+ resmrr.state = ResourceReservation.STATE_SCHEDULED
+ resume_rrs.append(resmrr)
+
+ resume_rrs.sort(key=attrgetter("start"))
+
+ resm_end = resume_rrs[-1].end
+ if resm_end > vmrr.end:
+ raise SchedException, "Determined resumption would end at %s, after the VMRR's end (%s) -- Resume time not being properly estimated?" % (resm_end, vmrr.end)
+
+ vmrr.update_start(resm_end)
+ for resmrr in resume_rrs:
+ vmrr.resm_rrs.append(resmrr)
+
+
+ def __schedule_suspension(self, vmrr, suspend_by):
+ from haizea.resourcemanager.rm import ResourceManager
+ config = ResourceManager.get_singleton().config
+ susp_exclusion = config.get("suspendresume-exclusion")
+ rate = self.resourcepool.info.get_suspendresume_rate()
+
+ if suspend_by < vmrr.start or suspend_by > vmrr.end:
+ raise SchedException, "Tried to schedule a suspension by %s, which is outside the VMRR's duration (%s-%s)" % (suspend_by, vmrr.start, vmrr.end)
+
+ times = self.__compute_susprem_times(vmrr, suspend_by, constants.DIRECTION_BACKWARD, susp_exclusion, rate)
+ suspend_rrs = []
+ for (start, end, pnode, vnodes) in times:
+ r = ds.ResourceTuple.create_empty()
+ mem = vmrr.lease.requested_resources.get_by_type(constants.RES_MEM)
+ r.set_by_type(constants.RES_MEM, mem)
+ r.set_by_type(constants.RES_DISK, mem)
+ suspres = {pnode: r}
+ susprr = ds.SuspensionResourceReservation(vmrr.lease, start, end, suspres, vnodes, vmrr)
+ susprr.state = ResourceReservation.STATE_SCHEDULED
+ suspend_rrs.append(susprr)
+
+ suspend_rrs.sort(key=attrgetter("start"))
+
+ susp_start = suspend_rrs[0].start
+ if susp_start < vmrr.start:
+ raise SchedException, "Determined suspension should start at %s, before the VMRR's start (%s) -- Suspend time not being properly estimated?" % (susp_start, vmrr.start)
+
+ vmrr.update_end(susp_start)
+
+ # If we're already suspending, remove previous susprrs
+ if vmrr.is_suspending():
+ for susprr in vmrr.susp_rrs:
+ self.slottable.removeReservation(susprr)
+ vmrr.susp_rrs = []
+
+ for susprr in suspend_rrs:
+ vmrr.susp_rrs.append(susprr)
+
+ def __compute_suspend_resume_time(self, mem, rate):
+ time = float(mem) / rate
+ time = round_datetime_delta(TimeDelta(seconds = time))
+ return time
+
+ def __estimate_suspend_resume_time(self, lease):
+ from haizea.resourcemanager.rm import ResourceManager
+ config = ResourceManager.get_singleton().config
+ susp_exclusion = config.get("suspendresume-exclusion")
+ rate = self.resourcepool.info.get_suspendresume_rate()
+ mem = lease.requested_resources.get_by_type(constants.RES_MEM)
+ if susp_exclusion == constants.SUSPRES_EXCLUSION_GLOBAL:
+ return lease.numnodes * self.__compute_suspend_resume_time(mem, rate)
+ elif susp_exclusion == constants.SUSPRES_EXCLUSION_LOCAL:
+ # Overestimating
+ return lease.numnodes * self.__compute_suspend_resume_time(mem, rate)
+
+ def __estimate_suspend_time(self, lease):
+ return self.__estimate_suspend_resume_time(lease)
+
+ def __estimate_resume_time(self, lease):
+ return self.__estimate_suspend_resume_time(lease)
+
+
+ def __estimate_migration_time(self, lease):
+ from haizea.resourcemanager.rm import ResourceManager
+ config = ResourceManager.get_singleton().config
+ whattomigrate = config.get("what-to-migrate")
+ bandwidth = self.resourcepool.info.get_migration_bandwidth()
+ if whattomigrate == constants.MIGRATE_NONE:
+ return TimeDelta(seconds=0)
+ else:
+ if whattomigrate == constants.MIGRATE_MEM:
+ mbtotransfer = lease.requested_resources.get_by_type(constants.RES_MEM)
+ elif whattomigrate == constants.MIGRATE_MEMDISK:
+ mbtotransfer = lease.diskimage_size + lease.requested_resources.get_by_type(constants.RES_MEM)
+ return estimate_transfer_time(mbtotransfer, bandwidth)
+
+ # TODO: Take into account other things like boot overhead, migration overhead, etc.
+ def __compute_scheduling_threshold(self, lease):
+ from haizea.resourcemanager.rm import ResourceManager
+ config = ResourceManager.get_singleton().config
+ threshold = config.get("force-scheduling-threshold")
+ if threshold != None:
+ # If there is a hard-coded threshold, use that
+ return threshold
+ else:
+ factor = config.get("scheduling-threshold-factor")
+ susp_overhead = self.__estimate_suspend_time(lease)
+ safe_duration = susp_overhead
+
+ if lease.state == Lease.STATE_SUSPENDED:
+ resm_overhead = self.__estimate_resume_time(lease)
+ safe_duration += resm_overhead
+
+ # TODO: Incorporate other overheads into the minimum duration
+ min_duration = safe_duration
+
+ # At the very least, we want to allocate enough time for the
+ # safe duration (otherwise, we'll end up with incorrect schedules,
+ # where a lease is scheduled to suspend, but isn't even allocated
+ # enough time to suspend).
+ # The factor is assumed to be non-negative. i.e., a factor of 0
+ # means we only allocate enough time for potential suspend/resume
+ # operations, while a factor of 1 means the lease will get as much
+ # running time as spend on the runtime overheads involved in setting
+ # it up
+ threshold = safe_duration + (min_duration * factor)
+ return threshold
+
+ def __choose_nodes(self, canfit, start, canpreempt, avoidpreempt):
+ # TODO2: Choose appropriate prioritizing function based on a
+ # config file, instead of hardcoding it)
+ #
+ # TODO3: Basing decisions only on CPU allocations. This is ok for now,
+ # since the memory allocation is proportional to the CPU allocation.
+ # Later on we need to come up with some sort of weighed average.
+
+ nodes = canfit.keys()
+
+ # TODO: The deployment module should just provide a list of nodes
+ # it prefers
+ nodeswithimg=[]
+ #self.lease_deployment_type = get_config().get("lease-preparation")
+ #if self.lease_deployment_type == constants.DEPLOYMENT_TRANSFER:
+ # reusealg = get_config().get("diskimage-reuse")
+ # if reusealg==constants.REUSE_IMAGECACHES:
+ # nodeswithimg = self.resourcepool.getNodesWithImgInPool(diskImageID, start)
+
+ # Compares node x and node y.
+ # Returns "x is ??? than y" (???=BETTER/WORSE/EQUAL)
+ def comparenodes(x, y):
+ hasimgX = x in nodeswithimg
+ hasimgY = y in nodeswithimg
+
+ # First comparison: A node with no preemptible VMs is preferible
+ # to one with preemptible VMs (i.e. we want to avoid preempting)
+ canfitnopreemptionX = canfit[x][0]
+ canfitpreemptionX = canfit[x][1]
+ hasPreemptibleX = canfitpreemptionX > canfitnopreemptionX
+
+ canfitnopreemptionY = canfit[y][0]
+ canfitpreemptionY = canfit[y][1]
+ hasPreemptibleY = canfitpreemptionY > canfitnopreemptionY
+
+ # TODO: Factor out common code
+ if avoidpreempt:
+ if hasPreemptibleX and not hasPreemptibleY:
+ return constants.WORSE
+ elif not hasPreemptibleX and hasPreemptibleY:
+ return constants.BETTER
+ elif not hasPreemptibleX and not hasPreemptibleY:
+ if hasimgX and not hasimgY:
+ return constants.BETTER
+ elif not hasimgX and hasimgY:
+ return constants.WORSE
+ else:
+ if canfitnopreemptionX > canfitnopreemptionY: return constants.BETTER
+ elif canfitnopreemptionX < canfitnopreemptionY: return constants.WORSE
+ else: return constants.EQUAL
+ elif hasPreemptibleX and hasPreemptibleY:
+ # If both have (some) preemptible resources, we prefer those
+ # that involve the less preemptions
+ preemptX = canfitpreemptionX - canfitnopreemptionX
+ preemptY = canfitpreemptionY - canfitnopreemptionY
+ if preemptX < preemptY:
+ return constants.BETTER
+ elif preemptX > preemptY:
+ return constants.WORSE
+ else:
+ if hasimgX and not hasimgY: return constants.BETTER
+ elif not hasimgX and hasimgY: return constants.WORSE
+ else: return constants.EQUAL
+ elif not avoidpreempt:
+ # First criteria: Can we reuse image?
+ if hasimgX and not hasimgY:
+ return constants.BETTER
+ elif not hasimgX and hasimgY:
+ return constants.WORSE
+ else:
+ # Now we just want to avoid preemption
+ if hasPreemptibleX and not hasPreemptibleY:
+ return constants.WORSE
+ elif not hasPreemptibleX and hasPreemptibleY:
+ return constants.BETTER
+ elif hasPreemptibleX and hasPreemptibleY:
+ # If both have (some) preemptible resources, we prefer those
+ # that involve the less preemptions
+ preemptX = canfitpreemptionX - canfitnopreemptionX
+ preemptY = canfitpreemptionY - canfitnopreemptionY
+ if preemptX < preemptY:
+ return constants.BETTER
+ elif preemptX > preemptY:
+ return constants.WORSE
+ else:
+ if hasimgX and not hasimgY: return constants.BETTER
+ elif not hasimgX and hasimgY: return constants.WORSE
+ else: return constants.EQUAL
+ else:
+ return constants.EQUAL
+
+ # Order nodes
+ nodes.sort(comparenodes)
+ return nodes
+
+ def __find_preemptable_leases(self, mustpreempt, startTime, endTime):
+ def comparepreemptability(rrX, rrY):
+ if rrX.lease.submit_time > rrY.lease.submit_time:
+ return constants.BETTER
+ elif rrX.lease.submit_time < rrY.lease.submit_time:
+ return constants.WORSE
+ else:
+ return constants.EQUAL
+
+ def preemptedEnough(amountToPreempt):
+ for node in amountToPreempt:
+ if not amountToPreempt[node].is_zero_or_less():
+ return False
+ return True
+
+ # Get allocations at the specified time
+ atstart = set()
+ atmiddle = set()
+ nodes = set(mustpreempt.keys())
+
+ reservationsAtStart = self.slottable.getReservationsAt(startTime)
+ reservationsAtStart = [r for r in reservationsAtStart if r.is_preemptible()
+ and len(set(r.resources_in_pnode.keys()) & nodes)>0]
+
+ reservationsAtMiddle = self.slottable.get_reservations_starting_between(startTime, endTime)
+ reservationsAtMiddle = [r for r in reservationsAtMiddle if r.is_preemptible()
+ and len(set(r.resources_in_pnode.keys()) & nodes)>0]
+
+ reservationsAtStart.sort(comparepreemptability)
+ reservationsAtMiddle.sort(comparepreemptability)
+
+ amountToPreempt = {}
+ for n in mustpreempt:
+ amountToPreempt[n] = ds.ResourceTuple.copy(mustpreempt[n])
+
+ # First step: CHOOSE RESOURCES TO PREEMPT AT START OF RESERVATION
+ for r in reservationsAtStart:
+ # The following will really only come into play when we have
+ # multiple VMs per node
+ mustpreemptres = False
+ for n in r.resources_in_pnode.keys():
+ # Don't need to preempt if we've already preempted all
+ # the needed resources in node n
+ if amountToPreempt.has_key(n) and not amountToPreempt[n].is_zero_or_less():
+ amountToPreempt[n].decr(r.resources_in_pnode[n])
+ mustpreemptres = True
+ if mustpreemptres:
+ atstart.add(r)
+ if preemptedEnough(amountToPreempt):
+ break
+
+ # Second step: CHOOSE RESOURCES TO PREEMPT DURING RESERVATION
+ if len(reservationsAtMiddle)>0:
+ changepoints = set()
+ for r in reservationsAtMiddle:
+ changepoints.add(r.start)
+ changepoints = list(changepoints)
+ changepoints.sort()
+
+ for cp in changepoints:
+ amountToPreempt = {}
+ for n in mustpreempt:
+ amountToPreempt[n] = ds.ResourceTuple.copy(mustpreempt[n])
+ reservations = [r for r in reservationsAtMiddle
+ if r.start <= cp and cp < r.end]
+ for r in reservations:
+ mustpreemptres = False
+ for n in r.resources_in_pnode.keys():
+ if amountToPreempt.has_key(n) and not amountToPreempt[n].is_zero_or_less():
+ amountToPreempt[n].decr(r.resources_in_pnode[n])
+ mustpreemptres = True
+ if mustpreemptres:
+ atmiddle.add(r)
+ if preemptedEnough(amountToPreempt):
+ break
+
+ self.logger.debug("Preempting leases (at start of reservation): %s" % [r.lease.id for r in atstart])
+ self.logger.debug("Preempting leases (in middle of reservation): %s" % [r.lease.id for r in atmiddle])
+
+ leases = [r.lease for r in atstart|atmiddle]
+
+ return leases
+
+ def __preempt(self, lease, preemption_time):
+
+ self.logger.info("Preempting lease #%i..." % (lease.id))
+ self.logger.vdebug("Lease before preemption:")
+ lease.print_contents()
+ vmrr = lease.get_last_vmrr()
+
+ if vmrr.state == ResourceReservation.STATE_SCHEDULED and vmrr.start >= preemption_time:
+ self.logger.debug("Lease was set to start in the middle of the preempting lease.")
+ must_cancel_and_requeue = True
+ else:
+ susptype = get_config().get("suspension")
+ if susptype == constants.SUSPENSION_NONE:
+ must_cancel_and_requeue = True
+ else:
+ time_until_suspend = preemption_time - vmrr.start
+ min_duration = self.__compute_scheduling_threshold(lease)
+ can_suspend = time_until_suspend >= min_duration
+ if not can_suspend:
+ self.logger.debug("Suspending the lease does not meet scheduling threshold.")
+ must_cancel_and_requeue = True
+ else:
+ if lease.numnodes > 1 and susptype == constants.SUSPENSION_SERIAL:
+ self.logger.debug("Can't suspend lease because only suspension of single-node leases is allowed.")
+ must_cancel_and_requeue = True
+ else:
+ self.logger.debug("Lease can be suspended")
+ must_cancel_and_requeue = False
+
+ if must_cancel_and_requeue:
+ self.logger.info("... lease #%i has been cancelled and requeued." % lease.id)
+ if vmrr.backfill_reservation == True:
+ self.numbesteffortres -= 1
+ if vmrr.is_suspending():
+ for susprr in vmrr.susp_rrs:
+ self.slottable.removeReservation(susprr)
+ lease.remove_vmrr(vmrr)
+ self.slottable.removeReservation(vmrr)
+ for vnode, pnode in lease.diskimagemap.items():
+ self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
+ self.deployment_scheduler.cancel_deployment(lease)
+ lease.diskimagemap = {}
+ lease.state = Lease.STATE_QUEUED
+ self.__enqueue_in_order(lease)
+ get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease.id)
+ else:
+ self.logger.info("... lease #%i will be suspended at %s." % (lease.id, preemption_time))
+ # Save original start and end time of the vmrr
+ old_start = vmrr.start
+ old_end = vmrr.end
+ self.__schedule_suspension(vmrr, preemption_time)
+ self.slottable.update_reservation_with_key_change(vmrr, old_start, old_end)
+ for susprr in vmrr.susp_rrs:
+ self.slottable.addReservation(susprr)
+
+
+ self.logger.vdebug("Lease after preemption:")
+ lease.print_contents()
+
+ def __reevaluate_schedule(self, endinglease, nodes, nexttime, checkedleases):
+ self.logger.debug("Reevaluating schedule. Checking for leases scheduled in nodes %s after %s" %(nodes, nexttime))
+ leases = []
+ vmrrs = self.slottable.get_next_reservations_in_nodes(nexttime, nodes, rr_type=VMResourceReservation, immediately_next=True)
+ leases = set([rr.lease for rr in vmrrs])
+ leases = [l for l in leases if isinstance(l, ds.BestEffortLease) and not l in checkedleases]
+ for lease in leases:
+ self.logger.debug("Found lease %i" % l.id)
+ l.print_contents()
+ # Earliest time can't be earlier than time when images will be
+ # available in node
+ earliest = max(nexttime, lease.imagesavail)
+ self.__slideback(lease, earliest)
+ checkedleases.append(l)
+ #for l in leases:
+ # vmrr, susprr = l.getLastVMRR()
+ # self.reevaluateSchedule(l, vmrr.nodes.values(), vmrr.end, checkedleases)
+
+ def __slideback(self, lease, earliest):
+ vmrr = lease.get_last_vmrr()
+ # Save original start and end time of the vmrr
+ old_start = vmrr.start
+ old_end = vmrr.end
+ nodes = vmrr.nodes.values()
+ if lease.state == Lease.STATE_SUSPENDED:
+ originalstart = vmrr.resm_rrs[0].start
+ else:
+ originalstart = vmrr.start
+ cp = self.slottable.findChangePointsAfter(after=earliest, until=originalstart, nodes=nodes)
+ cp = [earliest] + cp
+ newstart = None
+ for p in cp:
+ self.slottable.availabilitywindow.initWindow(p, lease.requested_resources, canpreempt=False)
+ self.slottable.availabilitywindow.printContents()
+ if self.slottable.availabilitywindow.fitAtStart(nodes=nodes) >= lease.numnodes:
+ (end, canfit) = self.slottable.availabilitywindow.findPhysNodesForVMs(lease.numnodes, originalstart)
+ if end == originalstart and set(nodes) <= set(canfit.keys()):
+ self.logger.debug("Can slide back to %s" % p)
+ newstart = p
+ break
+ if newstart == None:
+ # Can't slide back. Leave as is.
+ pass
+ else:
+ diff = originalstart - newstart
+ if lease.state == Lease.STATE_SUSPENDED:
+ for resmrr in vmrr.resm_rrs:
+ resmrr_old_start = resmrr.start
+ resmrr_old_end = resmrr.end
+ resmrr.start -= diff
+ resmrr.end -= diff
+ self.slottable.update_reservation_with_key_change(resmrr, resmrr_old_start, resmrr_old_end)
+ vmrr.update_start(vmrr.start - diff)
+
+ # If the lease was going to be suspended, check to see if
+ # we don't need to suspend any more.
+ remdur = lease.duration.get_remaining_duration()
+ if vmrr.is_suspending() and vmrr.end - newstart >= remdur:
+ vmrr.update_end(vmrr.start + remdur)
+ for susprr in vmrr.susp_rrs:
+ self.slottable.removeReservation(susprr)
+ vmrr.susp_rrs = []
+ else:
+ vmrr.update_end(vmrr.end - diff)
+
+ self.slottable.update_reservation_with_key_change(vmrr, old_start, old_end)
+ self.logger.vdebug("New lease descriptor (after slideback):")
+ lease.print_contents()
+
+
+
+ #-------------------------------------------------------------------#
+ # #
+ # SLOT TABLE EVENT HANDLERS #
+ # #
+ #-------------------------------------------------------------------#
+
+ def _handle_start_vm(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleStartVM" % l.id)
+ l.print_contents()
+ if l.state == Lease.STATE_READY:
+ l.state = Lease.STATE_ACTIVE
+ rr.state = ResourceReservation.STATE_ACTIVE
+ now_time = get_clock().get_time()
+ l.start.actual = now_time
+
+ try:
+ self.deployment_scheduler.check(l, rr)
+ self.resourcepool.start_vms(l, rr)
+ # The next two lines have to be moved somewhere more
+ # appropriate inside the resourcepool module
+ for (vnode, pnode) in rr.nodes.items():
+ l.diskimagemap[vnode] = pnode
+ except Exception, e:
+ self.logger.error("ERROR when starting VMs.")
+ raise
+ elif l.state == Lease.STATE_RESUMED_READY:
+ l.state = Lease.STATE_ACTIVE
+ rr.state = ResourceReservation.STATE_ACTIVE
+ # No enactment to do here, since all the suspend/resume actions are
+ # handled during the suspend/resume RRs
+ l.print_contents()
+ self.logger.debug("LEASE-%i End of handleStartVM" % l.id)
+ self.logger.info("Started VMs for lease %i on nodes %s" % (l.id, rr.nodes.values()))
+
+ # TODO: Replace enact with a saner way of handling leases that have failed or
+ # ended prematurely.
+ # Possibly factor out the "clean up" code to a separate function
+ def _handle_end_vm(self, l, rr, enact=True):
+ self.logger.debug("LEASE-%i Start of handleEndVM" % l.id)
+ self.logger.vdebug("LEASE-%i Before:" % l.id)
+ l.print_contents()
+ now_time = round_datetime(get_clock().get_time())
+ diff = now_time - rr.start
+ l.duration.accumulate_duration(diff)
+ rr.state = ResourceReservation.STATE_DONE
+ if not rr.is_suspending():
+ self.resourcepool.stop_vms(l, rr)
+ l.state = Lease.STATE_DONE
+ l.duration.actual = l.duration.accumulated
+ l.end = now_time
+ self.completedleases.add(l)
+ self.leases.remove(l)
+ if isinstance(l, ds.BestEffortLease):
+ get_accounting().incr_counter(constants.COUNTER_BESTEFFORTCOMPLETED, l.id)
+
+ if isinstance(l, ds.BestEffortLease):
+ if rr.backfill_reservation == True:
+ self.numbesteffortres -= 1
+ self.logger.vdebug("LEASE-%i After:" % l.id)
+ l.print_contents()
+ self.logger.debug("LEASE-%i End of handleEndVM" % l.id)
+ self.logger.info("Stopped VMs for lease %i on nodes %s" % (l.id, rr.nodes.values()))
+
+ def _handle_unscheduled_end_vm(self, l, vmrr, enact=False):
+ self.logger.info("LEASE-%i The VM has ended prematurely." % l.id)
+ self._handle_end_rr(l, vmrr)
+ if vmrr.is_suspending():
+ for susprr in vmrr.susp_rrs:
+ self.slottable.removeReservation(susprr)
+ vmrr.end = get_clock().get_time()
+ self._handle_end_vm(l, vmrr, enact=enact)
+ nexttime = get_clock().get_next_schedulable_time()
+ if self.is_backfilling():
+ # We need to reevaluate the schedule to see if there are any future
+ # reservations that we can slide back.
+ self.__reevaluate_schedule(l, vmrr.nodes.values(), nexttime, [])
+
+ def _handle_start_suspend(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleStartSuspend" % l.id)
+ l.print_contents()
+ rr.state = ResourceReservation.STATE_ACTIVE
+ self.resourcepool.suspend_vms(l, rr)
+ for vnode in rr.vnodes:
+ pnode = rr.vmrr.nodes[vnode]
+ l.memimagemap[vnode] = pnode
+ if rr.is_first():
+ l.state = Lease.STATE_SUSPENDING
+ l.print_contents()
+ self.logger.info("Suspending lease %i..." % (l.id))
+ self.logger.debug("LEASE-%i End of handleStartSuspend" % l.id)
+
+ def _handle_end_suspend(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleEndSuspend" % l.id)
+ l.print_contents()
+ # TODO: React to incomplete suspend
+ self.resourcepool.verify_suspend(l, rr)
+ rr.state = ResourceReservation.STATE_DONE
+ if rr.is_last():
+ l.state = Lease.STATE_SUSPENDED
+ self.__enqueue_in_order(l)
+ l.print_contents()
+ self.logger.debug("LEASE-%i End of handleEndSuspend" % l.id)
+ self.logger.info("Lease %i suspended." % (l.id))
+
+ def _handle_start_resume(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleStartResume" % l.id)
+ l.print_contents()
+ self.resourcepool.resume_vms(l, rr)
+ rr.state = ResourceReservation.STATE_ACTIVE
+ if rr.is_first():
+ l.state = Lease.STATE_RESUMING
+ l.print_contents()
+ self.logger.info("Resuming lease %i..." % (l.id))
+ self.logger.debug("LEASE-%i End of handleStartResume" % l.id)
+
+ def _handle_end_resume(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleEndResume" % l.id)
+ l.print_contents()
+ # TODO: React to incomplete resume
+ self.resourcepool.verify_resume(l, rr)
+ rr.state = ResourceReservation.STATE_DONE
+ if rr.is_last():
+ l.state = Lease.STATE_RESUMED_READY
+ self.logger.info("Resumed lease %i" % (l.id))
+ for vnode, pnode in rr.vmrr.nodes.items():
+ self.resourcepool.remove_ramfile(pnode, l.id, vnode)
+ l.print_contents()
+ self.logger.debug("LEASE-%i End of handleEndResume" % l.id)
+
+ def _handle_start_migrate(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleStartMigrate" % l.id)
+ l.print_contents()
+
+ l.print_contents()
+ self.logger.debug("LEASE-%i End of handleStartMigrate" % l.id)
+ self.logger.info("Migrating lease %i..." % (l.id))
+
+ def _handle_end_migrate(self, l, rr):
+ self.logger.debug("LEASE-%i Start of handleEndMigrate" % l.id)
+ l.print_contents()
+
+# if lease.state == Lease.STATE_SUSPENDED:
+# # Update VM image mappings, since we might be resuming
+# # in different nodes.
+# for vnode, pnode in lease.vmimagemap.items():
+# self.resourcepool.remove_diskimage(pnode, lease.id, vnode)
+# lease.vmimagemap = vmrr.nodes
+# for vnode, pnode in lease.vmimagemap.items():
+# self.resourcepool.add_diskimage(pnode, lease.diskimage_id, lease.diskimage_size, lease.id, vnode)
+#
+# # Update RAM file mappings
+# for vnode, pnode in lease.memimagemap.items():
+# self.resourcepool.remove_ramfile(pnode, lease.id, vnode)
+# for vnode, pnode in vmrr.nodes.items():
+# self.resourcepool.add_ramfile(pnode, lease.id, vnode, lease.requested_resources.get_by_type(constants.RES_MEM))
+# lease.memimagemap[vnode] = pnode
+
+ l.print_contents()
+ self.logger.debug("LEASE-%i End of handleEndMigrate" % l.id)
+ self.logger.info("Migrated lease %i..." % (l.id))
+
+ def _handle_end_rr(self, l, rr):
+ self.slottable.removeReservation(rr)
+
+ def __enqueue_in_order(self, lease):
+ get_accounting().incr_counter(constants.COUNTER_QUEUESIZE, lease.id)
+ self.queue.enqueue_in_order(lease)
+
+ def __can_reserve_besteffort_in_future(self):
+ return self.numbesteffortres < self.maxres
+
+ def is_backfilling(self):
+ return self.maxres > 0
Deleted: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py
===================================================================
--- trunk/src/haizea/resourcemanager/slottable.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,458 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-from mx.DateTime import ISO, TimeDelta
-from operator import attrgetter, itemgetter
-import haizea.common.constants as constants
-import haizea.resourcemanager.datastruct as ds
-import bisect
-import copy
-import logging
-
-class SlotFittingException(Exception):
- pass
-
-class CriticalSlotFittingException(Exception):
- pass
-
-
-class Node(object):
- def __init__(self, capacity, capacitywithpreemption, resourcepoolnode):
- self.capacity = ds.ResourceTuple.copy(capacity)
- self.capacitywithpreemption = ds.ResourceTuple.copy(capacitywithpreemption)
- self.resourcepoolnode = resourcepoolnode
-
- @classmethod
- def from_resourcepool_node(cls, node):
- capacity = node.get_capacity()
- return cls(capacity, capacity, node)
-
-class NodeList(object):
- def __init__(self):
- self.nodelist = []
-
- def add(self, node):
- self.nodelist.append(node)
-
- def __getitem__(self, n):
- return self.nodelist[n-1]
-
- def copy(self):
- nodelist = NodeList()
- for n in self.nodelist:
- nodelist.add(Node(n.capacity, n.capacitywithpreemption, n.resourcepoolnode))
- return nodelist
-
- def toPairList(self, onlynodes=None):
- nodelist = []
- for i, n in enumerate(self.nodelist):
- if onlynodes == None or (onlynodes != None and i+1 in onlynodes):
- nodelist.append((i+1,Node(n.capacity, n.capacitywithpreemption, n.resourcepoolnode)))
- return nodelist
-
- def toDict(self):
- nodelist = self.copy()
- return dict([(i+1, v) for i, v in enumerate(nodelist)])
-
-class KeyValueWrapper(object):
- def __init__(self, key, value):
- self.key = key
- self.value = value
-
- def __cmp__(self, other):
- return cmp(self.key, other.key)
-
-class SlotTable(object):
- def __init__(self):
- self.logger = logging.getLogger("SLOT")
- self.nodes = NodeList()
- self.reservations = []
- self.reservationsByStart = []
- self.reservationsByEnd = []
- self.availabilitycache = {}
- self.changepointcache = None
-
- self.availabilitywindow = AvailabilityWindow(self)
-
- def add_node(self, resourcepoolnode):
- self.nodes.add(Node.from_resourcepool_node(resourcepoolnode))
-
- def is_empty(self):
- return (len(self.reservationsByStart) == 0)
-
- def dirty(self):
- # You're a dirty, dirty slot table and you should be
- # ashamed of having outdated caches!
- self.availabilitycache = {}
- self.changepointcache = None
-
- def getAvailabilityCacheMiss(self, time):
- nodes = self.nodes.copy()
- reservations = self.getReservationsAt(time)
- # Find how much resources are available on each node
- for r in reservations:
- for node in r.resources_in_pnode:
- nodes[node].capacity.decr(r.resources_in_pnode[node])
- if not r.is_preemptible():
- nodes[node].capacitywithpreemption.decr(r.resources_in_pnode[node])
-
- self.availabilitycache[time] = nodes
-
- def getAvailability(self, time, resreq=None, onlynodes=None):
- if not self.availabilitycache.has_key(time):
- self.getAvailabilityCacheMiss(time)
- # Cache miss
-
- if onlynodes != None:
- onlynodes = set(onlynodes)
-
- nodes = self.availabilitycache[time].toPairList(onlynodes)
- #nodes = {}
- #for n in self.availabilitycache[time]:
- # nodes[n] = Node(self.availabilitycache[time][n].capacity.res, self.availabilitycache[time][n].capacitywithpreemption.res)
-
- # Keep only those nodes with enough resources
- if resreq != None:
- newnodes = []
- for i, node in nodes:
- if not resreq.fits_in(node.capacity) and not resreq.fits_in(node.capacitywithpreemption):
- pass
- else:
- newnodes.append((i, node))
- nodes = newnodes
-
- return dict(nodes)
-
- def getUtilization(self, time, restype=constants.RES_CPU):
- nodes = self.getAvailability(time)
- total = sum([n.capacity.get_by_type(restype) for n in self.nodes.nodelist])
- avail = sum([n.capacity.get_by_type(restype) for n in nodes.values()])
- return 1.0 - (float(avail)/total)
-
- def getReservationsAt(self, time):
- item = KeyValueWrapper(time, None)
- startpos = bisect.bisect_right(self.reservationsByStart, item)
- bystart = set([x.value for x in self.reservationsByStart[:startpos]])
- endpos = bisect.bisect_right(self.reservationsByEnd, item)
- byend = set([x.value for x in self.reservationsByEnd[endpos:]])
- res = bystart & byend
- return list(res)
-
- def get_reservations_starting_between(self, start, end):
- startitem = KeyValueWrapper(start, None)
- enditem = KeyValueWrapper(end, None)
- startpos = bisect.bisect_left(self.reservationsByStart, startitem)
- endpos = bisect.bisect_right(self.reservationsByStart, enditem)
- res = [x.value for x in self.reservationsByStart[startpos:endpos]]
- return res
-
- def get_reservations_ending_between(self, start, end):
- startitem = KeyValueWrapper(start, None)
- enditem = KeyValueWrapper(end, None)
- startpos = bisect.bisect_left(self.reservationsByEnd, startitem)
- endpos = bisect.bisect_right(self.reservationsByEnd, enditem)
- res = [x.value for x in self.reservationsByStart[startpos:endpos]]
- return res
-
- def get_reservations_starting_at(self, time):
- return self.get_reservations_starting_between(time, time)
-
- def get_reservations_ending_at(self, time):
- return self.get_reservations_ending_between(time, time)
-
- # ONLY for simulation
- def getNextPrematureEnd(self, after):
- # Inefficient, but ok since this query seldom happens
- res = [i.value for i in self.reservationsByEnd if isinstance(i.value, ds.VMResourceReservation) and i.value.prematureend > after]
- if len(res) > 0:
- prematureends = [r.prematureend for r in res]
- prematureends.sort()
- return prematureends[0]
- else:
- return None
-
- # ONLY for simulation
- def getPrematurelyEndingRes(self, t):
- return [i.value for i in self.reservationsByEnd if isinstance(i.value, ds.VMResourceReservation) and i.value.prematureend == t]
-
-
- def getReservationsWithChangePointsAfter(self, after):
- item = KeyValueWrapper(after, None)
- startpos = bisect.bisect_right(self.reservationsByStart, item)
- bystart = set([x.value for x in self.reservationsByStart[:startpos]])
- endpos = bisect.bisect_right(self.reservationsByEnd, item)
- byend = set([x.value for x in self.reservationsByEnd[endpos:]])
- res = bystart | byend
- return list(res)
-
- def addReservation(self, rr):
- startitem = KeyValueWrapper(rr.start, rr)
- enditem = KeyValueWrapper(rr.end, rr)
- bisect.insort(self.reservationsByStart, startitem)
- bisect.insort(self.reservationsByEnd, enditem)
- self.dirty()
-
- # If the slot table keys are not modified (start / end time)
- # Just remove and reinsert.
- def updateReservation(self, rr):
- # TODO: Might be more efficient to resort lists
- self.removeReservation(rr)
- self.addReservation(rr)
- self.dirty()
-
- # If the slot table keys are modified (start and/or end time)
- # provide the old reservation (so we can remove it using
- # the original keys) and also the new reservation
- def updateReservationWithKeyChange(self, rrold, rrnew):
- # TODO: Might be more efficient to resort lists
- self.removeReservation(rrold)
- self.addReservation(rrnew)
- rrold.lease.replace_rr(rrold, rrnew)
- self.dirty()
-
-
- def getIndexOfReservation(self, rlist, rr, key):
- item = KeyValueWrapper(key, None)
- pos = bisect.bisect_left(rlist, item)
- found = False
- while not found:
- if rlist[pos].value == rr:
- found = True
- else:
- pos += 1
- return pos
-
- def removeReservation(self, rr, start=None, end=None):
- if start == None:
- start = rr.start
- if end == None:
- end = rr.start
- posstart = self.getIndexOfReservation(self.reservationsByStart, rr, start)
- posend = self.getIndexOfReservation(self.reservationsByEnd, rr, end)
- self.reservationsByStart.pop(posstart)
- self.reservationsByEnd.pop(posend)
- self.dirty()
-
-
- def findChangePointsAfter(self, after, until=None, nodes=None):
- changepoints = set()
- res = self.getReservationsWithChangePointsAfter(after)
- for rr in res:
- if nodes == None or (nodes != None and len(set(rr.resources_in_pnode.keys()) & set(nodes)) > 0):
- if rr.start > after:
- changepoints.add(rr.start)
- if rr.end > after:
- changepoints.add(rr.end)
- changepoints = list(changepoints)
- if until != None:
- changepoints = [c for c in changepoints if c < until]
- changepoints.sort()
- return changepoints
-
- def peekNextChangePoint(self, time):
- if self.changepointcache == None:
- # Cache is empty
- changepoints = self.findChangePointsAfter(time)
- changepoints.reverse()
- self.changepointcache = changepoints
- if len(self.changepointcache) == 0:
- return None
- else:
- return self.changepointcache[-1]
-
- def getNextChangePoint(self, time):
- p = self.peekNextChangePoint(time)
- if p != None:
- self.changepointcache.pop()
- return p
-
- def isFull(self, time):
- nodes = self.getAvailability(time)
- avail = sum([node.capacity.get_by_type(constants.RES_CPU) for node in nodes.values()])
- return (avail == 0)
-
-
-
-class AvailEntry(object):
- def __init__(self, time, avail, availpreempt, resreq):
- self.time = time
- self.avail = avail
- self.availpreempt = availpreempt
-
- if avail == None and availpreempt == None:
- self.canfit = 0
- self.canfitpreempt = 0
- else:
- self.canfit = resreq.get_num_fits_in(avail)
- if availpreempt == None:
- self.canfitpreempt = 0
- else:
- self.canfitpreempt = resreq.get_num_fits_in(availpreempt)
-
- def getCanfit(self, canpreempt):
- if canpreempt:
- return self.canfitpreempt
- else:
- return self.canfit
-
-
-class AvailabilityWindow(object):
- def __init__(self, slottable):
- self.slottable = slottable
- self.logger = logging.getLogger("SLOTTABLE.WIN")
- self.time = None
- self.resreq = None
- self.onlynodes = None
- self.avail = None
-
- # Create avail structure
- def initWindow(self, time, resreq, onlynodes = None, canpreempt=False):
- self.time = time
- self.resreq = resreq
- self.onlynodes = onlynodes
-
- self.avail = {}
-
- # Availability at initial time
- availatstart = self.slottable.getAvailability(self.time, self.resreq, self.onlynodes)
-
- for node in availatstart:
- capacity = availatstart[node].capacity
- if canpreempt:
- capacitywithpreemption = availatstart[node].capacitywithpreemption
- else:
- capacitywithpreemption = None
- self.avail[node] = [AvailEntry(self.time, capacity, capacitywithpreemption, self.resreq)]
-
- # Determine the availability at the subsequent change points
- nodes = set(availatstart.keys())
- changepoints = self.slottable.findChangePointsAfter(self.time, nodes=self.avail.keys())
- for p in changepoints:
- availatpoint = self.slottable.getAvailability(p, self.resreq, nodes)
- newnodes = set(availatpoint.keys())
-
- # Add entries for nodes that have no resources available
- # (for, at least, one VM)
- fullnodes = nodes - newnodes
- for node in fullnodes:
- self.avail[node].append(AvailEntry(p, None, None, None))
- nodes.remove(node)
-
- # For the rest, only interested if the available resources
- # Decrease in the window
- for node in newnodes:
- capacity = availatpoint[node].capacity
- fits = self.resreq.get_num_fits_in(capacity)
- if canpreempt:
- capacitywithpreemption = availatpoint[node].capacitywithpreemption
- fitswithpreemption = self.resreq.get_num_fits_in(capacitywithpreemption)
- prevavail = self.avail[node][-1]
- if not canpreempt and prevavail.getCanfit(canpreempt=False) > fits:
- self.avail[node].append(AvailEntry(p, capacity, capacitywithpreemption, self.resreq))
- elif canpreempt and (prevavail.getCanfit(canpreempt=False) > fits or prevavail.getCanfit(canpreempt=True) > fitswithpreemption):
- self.avail[node].append(AvailEntry(p, capacity, capacitywithpreemption, self.resreq))
-
-
- def fitAtStart(self, nodes = None, canpreempt = False):
- if nodes != None:
- avail = [v for (k, v) in self.avail.items() if k in nodes]
- else:
- avail = self.avail.values()
- if canpreempt:
- return sum([e[0].canfitpreempt for e in avail])
- else:
- return sum([e[0].canfit for e in avail])
-
- # TODO: Also return the amount of resources that would have to be
- # preempted in each physnode
- def findPhysNodesForVMs(self, numnodes, maxend, strictend=False, canpreempt=False):
- # Returns the physical nodes that can run all VMs, and the
- # time at which the VMs must end
- canfit = dict([(n, v[0].getCanfit(canpreempt)) for (n, v) in self.avail.items()])
- entries = []
- for n in self.avail.keys():
- entries += [(n, e) for e in self.avail[n][1:]]
- getTime = lambda x: x[1].time
- entries.sort(key=getTime)
- if strictend:
- end = None
- else:
- end = maxend
- for e in entries:
- physnode = e[0]
- entry = e[1]
-
- if entry.time >= maxend:
- # Can run to its maximum duration
- break
- else:
- diff = canfit[physnode] - entry.getCanfit(canpreempt)
- totalcanfit = sum([n for n in canfit.values()]) - diff
- if totalcanfit < numnodes and not strictend:
- # Not enough resources. Must end here
- end = entry.time
- break
- else:
- # Update canfit
- canfit[physnode] = entry.getCanfit(canpreempt)
-
- # Filter out nodes where we can't fit any vms
- canfit = dict([(n, v) for (n, v) in canfit.items() if v > 0])
-
- return end, canfit
-
-
- def printContents(self, nodes = None, withpreemption = False):
- if self.logger.getEffectiveLevel() == constants.LOGLEVEL_VDEBUG:
- if nodes == None:
- physnodes = self.avail.keys()
- else:
- physnodes = [k for k in self.avail.keys() if k in nodes]
- physnodes.sort()
- if withpreemption:
- p = "(with preemption)"
- else:
- p = "(without preemption)"
- self.logger.vdebug("AVAILABILITY WINDOW (time=%s, nodes=%s) %s"%(self.time, nodes, p))
- for n in physnodes:
- contents = "Node %i --- " % n
- for x in self.avail[n]:
- contents += "[ %s " % x.time
- contents += "{ "
- if x.avail == None and x.availpreempt == None:
- contents += "END "
- else:
- if withpreemption:
- res = x.availpreempt
- canfit = x.canfitpreempt
- else:
- res = x.avail
- canfit = x.canfit
- contents += "%s" % res
- contents += "} (Fits: %i) ] " % canfit
- self.logger.vdebug(contents)
-
-
-
-
-
-
-
-
-
-
\ No newline at end of file
Copied: branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py (from rev 518, trunk/src/haizea/resourcemanager/slottable.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/resourcemanager/slottable.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,495 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+from mx.DateTime import ISO, TimeDelta
+from operator import attrgetter, itemgetter
+import haizea.common.constants as constants
+import haizea.resourcemanager.datastruct as ds
+import bisect
+import copy
+import logging
+
+class SlotFittingException(Exception):
+ pass
+
+class CriticalSlotFittingException(Exception):
+ pass
+
+
+class Node(object):
+ def __init__(self, capacity, capacitywithpreemption, resourcepoolnode):
+ self.capacity = ds.ResourceTuple.copy(capacity)
+ self.capacitywithpreemption = ds.ResourceTuple.copy(capacitywithpreemption)
+ self.resourcepoolnode = resourcepoolnode
+
+ @classmethod
+ def from_resourcepool_node(cls, node):
+ capacity = node.get_capacity()
+ return cls(capacity, capacity, node)
+
+class NodeList(object):
+ def __init__(self):
+ self.nodelist = []
+
+ def add(self, node):
+ self.nodelist.append(node)
+
+ def __getitem__(self, n):
+ return self.nodelist[n-1]
+
+ def copy(self):
+ nodelist = NodeList()
+ for n in self.nodelist:
+ nodelist.add(Node(n.capacity, n.capacitywithpreemption, n.resourcepoolnode))
+ return nodelist
+
+ def toPairList(self, onlynodes=None):
+ nodelist = []
+ for i, n in enumerate(self.nodelist):
+ if onlynodes == None or (onlynodes != None and i+1 in onlynodes):
+ nodelist.append((i+1,Node(n.capacity, n.capacitywithpreemption, n.resourcepoolnode)))
+ return nodelist
+
+ def toDict(self):
+ nodelist = self.copy()
+ return dict([(i+1, v) for i, v in enumerate(nodelist)])
+
+class KeyValueWrapper(object):
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
+
+ def __cmp__(self, other):
+ return cmp(self.key, other.key)
+
+class SlotTable(object):
+ def __init__(self):
+ self.logger = logging.getLogger("SLOT")
+ self.nodes = NodeList()
+ self.reservations = []
+ self.reservationsByStart = []
+ self.reservationsByEnd = []
+ self.availabilitycache = {}
+ self.changepointcache = None
+
+ self.availabilitywindow = AvailabilityWindow(self)
+
+ def add_node(self, resourcepoolnode):
+ self.nodes.add(Node.from_resourcepool_node(resourcepoolnode))
+
+ def is_empty(self):
+ return (len(self.reservationsByStart) == 0)
+
+ def dirty(self):
+ # You're a dirty, dirty slot table and you should be
+ # ashamed of having outdated caches!
+ self.availabilitycache = {}
+ self.changepointcache = None
+
+ def getAvailabilityCacheMiss(self, time):
+ nodes = self.nodes.copy()
+ reservations = self.getReservationsAt(time)
+ # Find how much resources are available on each node
+ for r in reservations:
+ for node in r.resources_in_pnode:
+ nodes[node].capacity.decr(r.resources_in_pnode[node])
+ if not r.is_preemptible():
+ nodes[node].capacitywithpreemption.decr(r.resources_in_pnode[node])
+
+ self.availabilitycache[time] = nodes
+
+ def getAvailability(self, time, resreq=None, onlynodes=None):
+ if not self.availabilitycache.has_key(time):
+ self.getAvailabilityCacheMiss(time)
+ # Cache miss
+
+ if onlynodes != None:
+ onlynodes = set(onlynodes)
+
+ nodes = self.availabilitycache[time].toPairList(onlynodes)
+ #nodes = {}
+ #for n in self.availabilitycache[time]:
+ # nodes[n] = Node(self.availabilitycache[time][n].capacity.res, self.availabilitycache[time][n].capacitywithpreemption.res)
+
+ # Keep only those nodes with enough resources
+ if resreq != None:
+ newnodes = []
+ for i, node in nodes:
+ if not resreq.fits_in(node.capacity) and not resreq.fits_in(node.capacitywithpreemption):
+ pass
+ else:
+ newnodes.append((i, node))
+ nodes = newnodes
+
+ return dict(nodes)
+
+ def getUtilization(self, time, restype=constants.RES_CPU):
+ nodes = self.getAvailability(time)
+ total = sum([n.capacity.get_by_type(restype) for n in self.nodes.nodelist])
+ avail = sum([n.capacity.get_by_type(restype) for n in nodes.values()])
+ return 1.0 - (float(avail)/total)
+
+ def getReservationsAt(self, time):
+ item = KeyValueWrapper(time, None)
+ startpos = bisect.bisect_right(self.reservationsByStart, item)
+ bystart = set([x.value for x in self.reservationsByStart[:startpos]])
+ endpos = bisect.bisect_right(self.reservationsByEnd, item)
+ byend = set([x.value for x in self.reservationsByEnd[endpos:]])
+ res = bystart & byend
+ return list(res)
+
+ def get_reservations_starting_between(self, start, end):
+ startitem = KeyValueWrapper(start, None)
+ enditem = KeyValueWrapper(end, None)
+ startpos = bisect.bisect_left(self.reservationsByStart, startitem)
+ endpos = bisect.bisect_right(self.reservationsByStart, enditem)
+ res = [x.value for x in self.reservationsByStart[startpos:endpos]]
+ return res
+
+ def get_reservations_starting_after(self, start):
+ startitem = KeyValueWrapper(start, None)
+ startpos = bisect.bisect_left(self.reservationsByStart, startitem)
+ res = [x.value for x in self.reservationsByStart[startpos:]]
+ return res
+
+ def get_reservations_ending_between(self, start, end):
+ startitem = KeyValueWrapper(start, None)
+ enditem = KeyValueWrapper(end, None)
+ startpos = bisect.bisect_left(self.reservationsByEnd, startitem)
+ endpos = bisect.bisect_right(self.reservationsByEnd, enditem)
+ res = [x.value for x in self.reservationsByEnd[startpos:endpos]]
+ return res
+
+ def get_reservations_starting_at(self, time):
+ return self.get_reservations_starting_between(time, time)
+
+ def get_reservations_ending_at(self, time):
+ return self.get_reservations_ending_between(time, time)
+
+ # ONLY for simulation
+ def getNextPrematureEnd(self, after):
+ # Inefficient, but ok since this query seldom happens
+ res = [i.value for i in self.reservationsByEnd if isinstance(i.value, ds.VMResourceReservation) and i.value.prematureend > after]
+ if len(res) > 0:
+ prematureends = [r.prematureend for r in res]
+ prematureends.sort()
+ return prematureends[0]
+ else:
+ return None
+
+ # ONLY for simulation
+ def getPrematurelyEndingRes(self, t):
+ return [i.value for i in self.reservationsByEnd if isinstance(i.value, ds.VMResourceReservation) and i.value.prematureend == t]
+
+
+ def getReservationsWithChangePointsAfter(self, after):
+ item = KeyValueWrapper(after, None)
+ startpos = bisect.bisect_right(self.reservationsByStart, item)
+ bystart = set([x.value for x in self.reservationsByStart[:startpos]])
+ endpos = bisect.bisect_right(self.reservationsByEnd, item)
+ byend = set([x.value for x in self.reservationsByEnd[endpos:]])
+ res = bystart | byend
+ return list(res)
+
+ def addReservation(self, rr):
+ startitem = KeyValueWrapper(rr.start, rr)
+ enditem = KeyValueWrapper(rr.end, rr)
+ bisect.insort(self.reservationsByStart, startitem)
+ bisect.insort(self.reservationsByEnd, enditem)
+ self.dirty()
+
+ # If the slot table keys are not modified (start / end time)
+ # Just remove and reinsert.
+ def updateReservation(self, rr):
+ # TODO: Might be more efficient to resort lists
+ self.removeReservation(rr)
+ self.addReservation(rr)
+ self.dirty()
+
+ # If the slot table keys are modified (start and/or end time)
+ # provide the old keys (so we can remove it using
+ # the m) and updated reservation
+ def update_reservation_with_key_change(self, rr, old_start, old_end):
+ # TODO: Might be more efficient to resort lists
+ self.removeReservation(rr, old_start, old_end)
+ self.addReservation(rr)
+ self.dirty()
+
+
+ def getIndexOfReservation(self, rlist, rr, key):
+ item = KeyValueWrapper(key, None)
+ pos = bisect.bisect_left(rlist, item)
+ found = False
+ while not found:
+ if rlist[pos].value == rr:
+ found = True
+ else:
+ pos += 1
+ return pos
+
+ def removeReservation(self, rr, start=None, end=None):
+ if start == None:
+ start = rr.start
+ if end == None:
+ end = rr.start
+ posstart = self.getIndexOfReservation(self.reservationsByStart, rr, start)
+ posend = self.getIndexOfReservation(self.reservationsByEnd, rr, end)
+ self.reservationsByStart.pop(posstart)
+ self.reservationsByEnd.pop(posend)
+ self.dirty()
+
+
+ def findChangePointsAfter(self, after, until=None, nodes=None):
+ changepoints = set()
+ res = self.getReservationsWithChangePointsAfter(after)
+ for rr in res:
+ if nodes == None or (nodes != None and len(set(rr.resources_in_pnode.keys()) & set(nodes)) > 0):
+ if rr.start > after:
+ changepoints.add(rr.start)
+ if rr.end > after:
+ changepoints.add(rr.end)
+ changepoints = list(changepoints)
+ if until != None:
+ changepoints = [c for c in changepoints if c < until]
+ changepoints.sort()
+ return changepoints
+
+ def peekNextChangePoint(self, time):
+ if self.changepointcache == None:
+ # Cache is empty
+ changepoints = self.findChangePointsAfter(time)
+ changepoints.reverse()
+ self.changepointcache = changepoints
+ if len(self.changepointcache) == 0:
+ return None
+ else:
+ return self.changepointcache[-1]
+
+ def getNextChangePoint(self, time):
+ p = self.peekNextChangePoint(time)
+ if p != None:
+ self.changepointcache.pop()
+ return p
+
+ def isFull(self, time):
+ nodes = self.getAvailability(time)
+ avail = sum([node.capacity.get_by_type(constants.RES_CPU) for node in nodes.values()])
+ return (avail == 0)
+
+ def get_next_reservations_in_nodes(self, time, nodes, rr_type=None, immediately_next = False):
+ nodes = set(nodes)
+ rrs_in_nodes = []
+ earliest_end_time = {}
+ rrs = self.get_reservations_starting_after(time)
+ if rr_type != None:
+ rrs = [rr for rr in rrs if isinstance(rr, rr_type)]
+
+ # Filter the RRs by nodes
+ for r in rrs:
+ rr_nodes = set(rr.resources_in_pnode.keys())
+ if len(nodes & rr_nodes) > 0:
+ rrs_in_nodes.append(rr)
+ end = rr.end
+ for n in rr_nodes:
+ if not earliest_end_time.has_key(n):
+ earliest_end_time[n] = end
+ else:
+ if end < earliest_end_time[n]:
+ earliest_end_time[n] = end
+
+ if immediately_next:
+ # We only want to include the ones that are immediately
+ # next.
+ rr_nodes_excl = set()
+ for n in nodes:
+ if earliest_end_time.has_key(n):
+ end = earliest_end_time[n]
+ rrs = [rr for rr in rrs_in_nodes if n in rr.resources_in_pnode.keys() and rr.start < end]
+ rr_nodes_excl.update(rrs)
+ rrs_in_nodes = list(rr_nodes_excl)
+
+ return rrs_in_nodes
+
+class AvailEntry(object):
+ def __init__(self, time, avail, availpreempt, resreq):
+ self.time = time
+ self.avail = avail
+ self.availpreempt = availpreempt
+
+ if avail == None and availpreempt == None:
+ self.canfit = 0
+ self.canfitpreempt = 0
+ else:
+ self.canfit = resreq.get_num_fits_in(avail)
+ if availpreempt == None:
+ self.canfitpreempt = 0
+ else:
+ self.canfitpreempt = resreq.get_num_fits_in(availpreempt)
+
+ def getCanfit(self, canpreempt):
+ if canpreempt:
+ return self.canfitpreempt
+ else:
+ return self.canfit
+
+
+class AvailabilityWindow(object):
+ def __init__(self, slottable):
+ self.slottable = slottable
+ self.logger = logging.getLogger("SLOTTABLE.WIN")
+ self.time = None
+ self.resreq = None
+ self.onlynodes = None
+ self.avail = None
+
+ # Create avail structure
+ def initWindow(self, time, resreq, onlynodes = None, canpreempt=False):
+ self.time = time
+ self.resreq = resreq
+ self.onlynodes = onlynodes
+
+ self.avail = {}
+
+ # Availability at initial time
+ availatstart = self.slottable.getAvailability(self.time, self.resreq, self.onlynodes)
+
+ for node in availatstart:
+ capacity = availatstart[node].capacity
+ if canpreempt:
+ capacitywithpreemption = availatstart[node].capacitywithpreemption
+ else:
+ capacitywithpreemption = None
+ self.avail[node] = [AvailEntry(self.time, capacity, capacitywithpreemption, self.resreq)]
+
+ # Determine the availability at the subsequent change points
+ nodes = set(availatstart.keys())
+ changepoints = self.slottable.findChangePointsAfter(self.time, nodes=self.avail.keys())
+ for p in changepoints:
+ availatpoint = self.slottable.getAvailability(p, self.resreq, nodes)
+ newnodes = set(availatpoint.keys())
+
+ # Add entries for nodes that have no resources available
+ # (for, at least, one VM)
+ fullnodes = nodes - newnodes
+ for node in fullnodes:
+ self.avail[node].append(AvailEntry(p, None, None, None))
+ nodes.remove(node)
+
+ # For the rest, only interested if the available resources
+ # Decrease in the window
+ for node in newnodes:
+ capacity = availatpoint[node].capacity
+ fits = self.resreq.get_num_fits_in(capacity)
+ if canpreempt:
+ capacitywithpreemption = availatpoint[node].capacitywithpreemption
+ fitswithpreemption = self.resreq.get_num_fits_in(capacitywithpreemption)
+ prevavail = self.avail[node][-1]
+ if not canpreempt and prevavail.getCanfit(canpreempt=False) > fits:
+ self.avail[node].append(AvailEntry(p, capacity, capacitywithpreemption, self.resreq))
+ elif canpreempt and (prevavail.getCanfit(canpreempt=False) > fits or prevavail.getCanfit(canpreempt=True) > fitswithpreemption):
+ self.avail[node].append(AvailEntry(p, capacity, capacitywithpreemption, self.resreq))
+
+
+ def fitAtStart(self, nodes = None, canpreempt = False):
+ if nodes != None:
+ avail = [v for (k, v) in self.avail.items() if k in nodes]
+ else:
+ avail = self.avail.values()
+ if canpreempt:
+ return sum([e[0].canfitpreempt for e in avail])
+ else:
+ return sum([e[0].canfit for e in avail])
+
+ # TODO: Also return the amount of resources that would have to be
+ # preempted in each physnode
+ def findPhysNodesForVMs(self, numnodes, maxend, strictend=False, canpreempt=False):
+ # Returns the physical nodes that can run all VMs, and the
+ # time at which the VMs must end
+ canfit = dict([(n, v[0].getCanfit(canpreempt)) for (n, v) in self.avail.items()])
+ entries = []
+ for n in self.avail.keys():
+ entries += [(n, e) for e in self.avail[n][1:]]
+ getTime = lambda x: x[1].time
+ entries.sort(key=getTime)
+ if strictend:
+ end = None
+ else:
+ end = maxend
+ for e in entries:
+ physnode = e[0]
+ entry = e[1]
+
+ if entry.time >= maxend:
+ # Can run to its maximum duration
+ break
+ else:
+ diff = canfit[physnode] - entry.getCanfit(canpreempt)
+ totalcanfit = sum([n for n in canfit.values()]) - diff
+ if totalcanfit < numnodes and not strictend:
+ # Not enough resources. Must end here
+ end = entry.time
+ break
+ else:
+ # Update canfit
+ canfit[physnode] = entry.getCanfit(canpreempt)
+
+ # Filter out nodes where we can't fit any vms
+ canfit = dict([(n, v) for (n, v) in canfit.items() if v > 0])
+
+ return end, canfit
+
+
+ def printContents(self, nodes = None, withpreemption = False):
+ if self.logger.getEffectiveLevel() == constants.LOGLEVEL_VDEBUG:
+ if nodes == None:
+ physnodes = self.avail.keys()
+ else:
+ physnodes = [k for k in self.avail.keys() if k in nodes]
+ physnodes.sort()
+ if withpreemption:
+ p = "(with preemption)"
+ else:
+ p = "(without preemption)"
+ self.logger.vdebug("AVAILABILITY WINDOW (time=%s, nodes=%s) %s"%(self.time, nodes, p))
+ for n in physnodes:
+ contents = "Node %i --- " % n
+ for x in self.avail[n]:
+ contents += "[ %s " % x.time
+ contents += "{ "
+ if x.avail == None and x.availpreempt == None:
+ contents += "END "
+ else:
+ if withpreemption:
+ res = x.availpreempt
+ canfit = x.canfitpreempt
+ else:
+ res = x.avail
+ canfit = x.canfit
+ contents += "%s" % res
+ contents += "} (Fits: %i) ] " % canfit
+ self.logger.vdebug(contents)
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
Modified: branches/TP1.3-scheduler-refactoring/haizea/traces/generators.py
===================================================================
--- trunk/src/haizea/traces/generators.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/traces/generators.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -15,87 +15,88 @@
# See the License for the specific language governing permissions and #
# limitations under the License. #
# -------------------------------------------------------------------------- #
+pass
-from haizea.common.config import TraceConfig, ImageConfig
-from haizea.traces.formats import LWF, LWFEntry
-
-def generateTrace(config, file, guaranteeAvg = False):
- tracedur = config.getTraceDuration()
-
- print config.intervaldist.getAvg()
-
- avgnumreq = tracedur / config.intervaldist.getAvg()
- idealaccumdur = avgnumreq * config.durationdist.getAvg() * config.numnodesdist.getAvg()
-
- print avgnumreq
- print config.durationdist.getAvg()
- print config.numnodesdist.getAvg()
- print idealaccumdur
-
- good = False
- deadlineavg = config.deadlinedist.get()
-
- while not good:
- entries = []
- time = - deadlineavg
- accumdur = 0
- while time + deadlineavg + config.durationdist.getAvg() < tracedur:
- entry = LWFEntry()
- entry.reqTime = time
- entry.startTime = time + config.deadlinedist.get()
- entry.duration = config.durationdist.get()
- entry.realDuration = entry.duration
- entry.numNodes = config.numnodesdist.get()
- entry.CPU = 1
- entry.mem = 1024
- entry.disk = 0
- entry.vmImage = "NONE.img"
- entry.vmImageSize = 600
- accumdur += entry.duration * entry.numNodes
- entries.append(entry)
-
- interval = config.intervaldist.get()
- time += interval
-
- if not guaranteeAvg:
- good = True
- else:
- dev = abs((accumdur / idealaccumdur) - 1)
- if dev < 0.01:
- print "Deviation is satisfactory: %.3f" % dev
- good = True
- else:
- print "Deviation is too big: %.3f. Generating again." % dev
-
- for e in entries:
- if e.reqTime < 0:
- e.reqTime = 0
-
- lwf = LWF(entries)
- lwf.toFile(file)
-
-
-def generateImages(config, file):
- f = open(file, "w")
-
- # Write image sizes
- for i in config.images:
- print >>f, "%s %i" % (i, config.sizedist.get())
-
- print >>f, "#"
-
- l = config.getFileLength()
- for i in xrange(l):
- print >>f, config.imagedist.get()
-
- f.close()
-
-
-if __name__ == "__main__":
- configfile="../configfiles/images.conf"
- imagefile="../traces/examples/generated.images"
-
-
- config = ImageConfig.fromFile(configfile)
-
- generateImages(config, imagefile)
\ No newline at end of file
+#from haizea.common.config import TraceConfig, ImageConfig
+#from haizea.traces.formats import LWF, LWFEntry
+#
+#def generateTrace(config, file, guaranteeAvg = False):
+# tracedur = config.getTraceDuration()
+#
+# print config.intervaldist.getAvg()
+#
+# avgnumreq = tracedur / config.intervaldist.getAvg()
+# idealaccumdur = avgnumreq * config.durationdist.getAvg() * config.numnodesdist.getAvg()
+#
+# print avgnumreq
+# print config.durationdist.getAvg()
+# print config.numnodesdist.getAvg()
+# print idealaccumdur
+#
+# good = False
+# deadlineavg = config.deadlinedist.get()
+#
+# while not good:
+# entries = []
+# time = - deadlineavg
+# accumdur = 0
+# while time + deadlineavg + config.durationdist.getAvg() < tracedur:
+# entry = LWFEntry()
+# entry.reqTime = time
+# entry.startTime = time + config.deadlinedist.get()
+# entry.duration = config.durationdist.get()
+# entry.realDuration = entry.duration
+# entry.numNodes = config.numnodesdist.get()
+# entry.CPU = 1
+# entry.mem = 1024
+# entry.disk = 0
+# entry.vmImage = "NONE.img"
+# entry.vmImageSize = 600
+# accumdur += entry.duration * entry.numNodes
+# entries.append(entry)
+#
+# interval = config.intervaldist.get()
+# time += interval
+#
+# if not guaranteeAvg:
+# good = True
+# else:
+# dev = abs((accumdur / idealaccumdur) - 1)
+# if dev < 0.01:
+# print "Deviation is satisfactory: %.3f" % dev
+# good = True
+# else:
+# print "Deviation is too big: %.3f. Generating again." % dev
+#
+# for e in entries:
+# if e.reqTime < 0:
+# e.reqTime = 0
+#
+# lwf = LWF(entries)
+# lwf.toFile(file)
+#
+#
+#def generateImages(config, file):
+# f = open(file, "w")
+#
+# # Write image sizes
+# for i in config.images:
+# print >>f, "%s %i" % (i, config.sizedist.get())
+#
+# print >>f, "#"
+#
+# l = config.getFileLength()
+# for i in xrange(l):
+# print >>f, config.imagedist.get()
+#
+# f.close()
+#
+#
+#if __name__ == "__main__":
+# configfile="../configfiles/images.conf"
+# imagefile="../traces/examples/generated.images"
+#
+#
+# config = ImageConfig.fromFile(configfile)
+#
+# generateImages(config, imagefile)
\ No newline at end of file
Deleted: branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py
===================================================================
--- trunk/src/haizea/traces/readers.py 2008-09-16 10:43:48 UTC (rev 501)
+++ branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -1,106 +0,0 @@
-# -------------------------------------------------------------------------- #
-# Copyright 2006-2008, University of Chicago #
-# Copyright 2008, Distributed Systems Architecture Group, Universidad #
-# Complutense de Madrid (dsa-research.org) #
-# #
-# Licensed under the Apache License, Version 2.0 (the "License"); you may #
-# not use this file except in compliance with the License. You may obtain #
-# a copy of the License at #
-# #
-# http://www.apache.org/licenses/LICENSE-2.0 #
-# #
-# Unless required by applicable law or agreed to in writing, software #
-# distributed under the License is distributed on an "AS IS" BASIS, #
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
-# See the License for the specific language governing permissions and #
-# limitations under the License. #
-# -------------------------------------------------------------------------- #
-
-from mx.DateTime import TimeDelta
-from haizea.resourcemanager.datastruct import ARLease, BestEffortLease, ResourceTuple
-import haizea.common.constants as constants
-import haizea.traces.formats as formats
-
-def SWF(tracefile, config):
- file = open (tracefile, "r")
- requests = []
- inittime = config.get("starttime")
- for line in file:
- if line[0]!=';':
- req = None
- fields = line.split()
- reqtime = float(fields[8])
- runtime = int(fields[3]) # 3: RunTime
- waittime = int(fields[2])
- status = int(fields[10])
-
- if reqtime > 0:
- tSubmit = int(fields[1]) # 1: Submission time
- tSubmit = inittime + TimeDelta(seconds=tSubmit)
- vmimage = "NOIMAGE"
- vmimagesize = 600 # Arbitrary
- numnodes = int(fields[7]) # 7: reqNProcs
- resreq = ResourceTuple.create_empty()
- resreq.set_by_type(constants.RES_CPU, 1) # One CPU per VM, should be configurable
- resreq.set_by_type(constants.RES_MEM, 1024) # Should be configurable
- resreq.set_by_type(constants.RES_DISK, vmimagesize + 0) # Should be configurable
- maxdur = TimeDelta(seconds=reqtime)
- if runtime < 0 and status==5:
- # This is a job that got cancelled while waiting in the queue
- realdur = maxdur
- maxqueuetime = tSubmit + TimeDelta(seconds=waittime)
- else:
- if runtime == 0:
- runtime = 1 # Runtime of 0 is <0.5 rounded down.
- realdur = TimeDelta(seconds=runtime) # 3: RunTime
- maxqueuetime = None
- if realdur > maxdur:
- realdur = maxdur
- preemptible = True
- req = BestEffortLease(tSubmit, maxdur, vmimage, vmimagesize, numnodes, resreq, preemptible, realdur)
- req.state = constants.LEASE_STATE_PENDING
- requests.append(req)
- return requests
-
-def IMG(imgfile):
- file = open (imgfile, "r")
- imagesizes = {}
- images = []
- state = 0 # 0 -> Reading image sizes 1 -> Reading image sequence
- for line in file:
- if line[0]=='#':
- state = 1
- elif state == 0:
- image, size = line.split()
- imagesizes[image] = int(size)
- elif state == 1:
- images.append(line.strip())
- return imagesizes, images
-
-def LWF(tracefile, inittime):
- file = formats.LWF.fromFile(tracefile)
- requests = []
- for entry in file.entries:
- tSubmit = inittime + TimeDelta(seconds=entry.reqTime)
- if entry.startTime == -1:
- tStart = None
- else:
- tStart = inittime + TimeDelta(seconds=entry.startTime)
- duration = TimeDelta(seconds=entry.duration)
- realduration = TimeDelta(seconds=entry.realDuration)
- vmimage = entry.vmImage
- vmimagesize = entry.vmImageSize
- numnodes = entry.numNodes
- resreq = ResourceTuple.create_empty()
- resreq.set_by_type(constants.RES_CPU, entry.CPU)
- resreq.set_by_type(constants.RES_MEM, entry.mem)
- resreq.set_by_type(constants.RES_DISK, vmimagesize + entry.disk)
- if tStart == None:
- preemptible = True
- req = BestEffortLease(tSubmit, duration, vmimage, vmimagesize, numnodes, resreq, preemptible, realduration)
- else:
- preemptible = False
- req = ARLease(tSubmit, tStart, duration, vmimage, vmimagesize, numnodes, resreq, preemptible, realduration)
- req.state = constants.LEASE_STATE_PENDING
- requests.append(req)
- return requests
Copied: branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py (from rev 504, trunk/src/haizea/traces/readers.py)
===================================================================
--- branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py (rev 0)
+++ branches/TP1.3-scheduler-refactoring/haizea/traces/readers.py 2008-10-20 16:50:12 UTC (rev 537)
@@ -0,0 +1,106 @@
+# -------------------------------------------------------------------------- #
+# Copyright 2006-2008, University of Chicago #
+# Copyright 2008, Distributed Systems Architecture Group, Universidad #
+# Complutense de Madrid (dsa-research.org) #
+# #
+# Licensed under the Apache License, Version 2.0 (the "License"); you may #
+# not use this file except in compliance with the License. You may obtain #
+# a copy of the License at #
+# #
+# http://www.apache.org/licenses/LICENSE-2.0 #
+# #
+# Unless required by applicable law or agreed to in writing, software #
+# distributed under the License is distributed on an "AS IS" BASIS, #
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
+# See the License for the specific language governing permissions and #
+# limitations under the License. #
+# -------------------------------------------------------------------------- #
+
+from mx.DateTime import TimeDelta
+from haizea.resourcemanager.datastruct import Lease, ARLease, BestEffortLease, ResourceTuple
+import haizea.common.constants as constants
+import haizea.traces.formats as formats
+
+def SWF(tracefile, config):
+ file = open (tracefile, "r")
+ requests = []
+ inittime = config.get("starttime")
+ for line in file:
+ if line[0]!=';':
+ req = None
+ fields = line.split()
+ reqtime = float(fields[8])
+ runtime = int(fields[3]) # 3: RunTime
+ waittime = int(fields[2])
+ status = int(fields[10])
+
+ if reqtime > 0:
+ tSubmit = int(fields[1]) # 1: Submission time
+ tSubmit = inittime + TimeDelta(seconds=tSubmit)
+ vmimage = "NOIMAGE"
+ vmimagesize = 600 # Arbitrary
+ numnodes = int(fields[7]) # 7: reqNProcs
+ resreq = ResourceTuple.create_empty()
+ resreq.set_by_type(constants.RES_CPU, 1) # One CPU per VM, should be configurable
+ resreq.set_by_type(constants.RES_MEM, 1024) # Should be configurable
+ resreq.set_by_type(constants.RES_DISK, vmimagesize + 0) # Should be configurable
+ maxdur = TimeDelta(seconds=reqtime)
+ if runtime < 0 and status==5:
+ # This is a job that got cancelled while waiting in the queue
+ realdur = maxdur
+ maxqueuetime = tSubmit + TimeDelta(seconds=waittime)
+ else:
+ if runtime == 0:
+ runtime = 1 # Runtime of 0 is <0.5 rounded down.
+ realdur = TimeDelta(seconds=runtime) # 3: RunTime
+ maxqueuetime = None
+ if realdur > maxdur:
+ realdur = maxdur
+ preemptible = True
+ req = BestEffortLease(tSubmit, maxdur, vmimage, vmimagesize, numnodes, resreq, preemptible, realdur)
+ req.state = Lease.STATE_NEW
+ requests.append(req)
+ return requests
+
+def IMG(imgfile):
+ file = open (imgfile, "r")
+ imagesizes = {}
+ images = []
+ state = 0 # 0 -> Reading image sizes 1 -> Reading image sequence
+ for line in file:
+ if line[0]=='#':
+ state = 1
+ elif state == 0:
+ image, size = line.split()
+ imagesizes[image] = int(size)
+ elif state == 1:
+ images.append(line.strip())
+ return imagesizes, images
+
+def LWF(tracefile, inittime):
+ file = formats.LWF.fromFile(tracefile)
+ requests = []
+ for entry in file.entries:
+ tSubmit = inittime + TimeDelta(seconds=entry.reqTime)
+ if entry.startTime == -1:
+ tStart = None
+ else:
+ tStart = inittime + TimeDelta(seconds=entry.startTime)
+ duration = TimeDelta(seconds=entry.duration)
+ realduration = TimeDelta(seconds=entry.realDuration)
+ vmimage = entry.vmImage
+ vmimagesize = entry.vmImageSize
+ numnodes = entry.numNodes
+ resreq = ResourceTuple.create_empty()
+ resreq.set_by_type(constants.RES_CPU, entry.CPU)
+ resreq.set_by_type(constants.RES_MEM, entry.mem)
+ resreq.set_by_type(constants.RES_DISK, vmimagesize + entry.disk)
+ if tStart == None:
+ preemptible = True
+ req = BestEffortLease(tSubmit, duration, vmimage, vmimagesize, numnodes, resreq, preemptible, realduration)
+ else:
+ preemptible = False
+ req = ARLease(tSubmit, tStart, duration, vmimage, vmimagesize, numnodes, resreq, preemptible, realduration)
+ req.state = Lease.STATE_NEW
+ requests.append(req)
+ return requests
More information about the Haizea-commit
mailing list