/***********************************************************************/ /* PROGRAM: sched12.cpp */ /* LANGUAGE: C++ */ /* AUTHOR: Rob Hill 10 March 2001 */ /* PURPOSE: Main program for optimizing scheduler and simulation */ /* framework for embedded processor simulation of image */ /* algebra algorithms using FPGAs, SIMDs, etc. */ /* NEEDS: All files listed in INCLUDE directives below */ /***********************************************************************/ /* IMPORTANT NOTE: In the comments below, the word "operation" refers */ /* interchangeably to an operation, or to the variable*/ /* that stores the result of an operation. */ /* EXAMPLE: If b = ITXGCN(a,t), i.e., image "a" is */ /* convolved with template "t", then we refer to */ /* that particular convolution as "b" and to the */ /* TYPE of the operation as "ITXCGN" (fixed-point */ /* generalized image-template convolution. */ /***********************************************************************/ #include /* C++ I/O library */ #include /* C++ file I/O library */ #include /* C software engr/debugging library */ #include /* C++ string class library */ #include /* C++ SGI/STL extension (hash tables) */ #include /* C standard library */ #include /* C++ list class library */ #include /* C++ set (sorted list) class library */ #include /* C++ standard algorithms library */ #include "opdict.h" /* C++ and AIM operand dictionary */ #include "opnames.h" /* C++ and AIM operand label converter */ #include /* C system clock interface library */ #include /* C limits (MAX_INT,...) header */ #include "global.h" /* List of global variables from MOCs */ int granularity = GRANULARITY; /* Set global simulation granularity */ /* in simulation clock ticks per */ /* simulated second */ class opdict optypes; /* Table of global variables that tell */ /* the execution time and defaults */ /* for each simulated operation. */ //#include "ploptree.h" /* Defines the class for an operation */ bool verbose = false; /* Default to non-verbose mode */ bool report = true; /* Default to report mode */ /* oppcmp - Functor that applies a partial order based on priority */ /* for op pointers */ struct oppcmp { bool operator()(const op *a,const op *b) const { return ((*a) < (*b)); /* Dereference and explicitly call the */ /* less-than op (comparison) */ } }; /* order_opp - Function that applies lexicographical order to all */ /* operators - names are ensured to be unique in "opnames" */ bool order_opp(op *a, op *b) { return a->name < b->name; } /* dump_edges - Visitor class that is used to create an HTML/Java */ /* to display a flexible tree in a Java applet */ class dump_edges :public visitor { ostream &os; /* output stream to dump edges into */ bool prefix_comma; /* state variable to accomodate "," */ virtual void accept(op *o) { /* acceptor called by every tree node */ if(o->lhs) { /* if has left child */ if(prefix_comma) os << ','; /* os << o->name << '-' << o->lhs->name; /* output edge */ prefix_comma = true; /* will need comma soon */ } if(o->rhs) { /* if has right child */ if(prefix_comma) os << ','; /* if we need comma */ os << o->name << '-' << o->rhs->name; /* output edge */ prefix_comma = true; /* will need comma soon */ } } public: /* constructor for dump_edges */ dump_edges(ostream &ostr) : os(ostr),prefix_comma(false) {} /* operator() - Accepts the root of the tree you want to traverse */ /* (i.e., that you want to recurse over) and it invokes */ /* visit_children to traverse the tree in pre-order */ void operator()(op *o) { prefix_comma = false; /* in case operator() called many times */ o->visit_children(this); /* invoke visit_children */ } }; /* reset - Visitor class that reset the time left for an operation */ /* and marks the operation as unscheduled */ struct reset :public visitor { virtual void accept(op *o) { o->reset(); } void operator () (op *o) { o->visit_children(this); } /* perform tree traversal */ }; /* area - Visitor class that calculates the total amount of processing */ /* time for all operations, irrespective of any devices. In */ /* Version 12, this merely counts the number of operations. */ /* Note that "area" may visit a node multiple times. */ struct area :public visitor { int a; area() : a(0) {} virtual void accept(op *o) {a += o->want_clock();} /* proc'g time/op */ int operator() (op *o) { o->visit_children(this); return a;} }; /* dag_area - Visitor class that calculates the total amount of time */ /* per operation, but visits each node only once. */ struct dag_area :public visitor { int a; /* amount of processing required */ set s; /* set of op pointers visited in tree */ dag_area() : a(0) {} /* dag_area - method construction */ virtual void accept(op *o) { /* acceptor function for the tree (dag) */ if(s.find(o) == s.end()) { /* if we've not seen operation o before */ a += o->want_clock(); /* then add cost of o to accumulator a */ s.insert(o); /* and insert o into set s (visited ops)*/ } } int operator() (op *o) { o->visit_children(this); return a;} }; /* set_pri_default - Visitor class that assigns the basic priority for */ /* an operation o based on its descendants */ struct set_pri_default :public visitor { virtual void accept(op *o) { o->set_pri(); } /* sets priority of o */ /* to its proc'g time */ void operator () (op *o) { o->visit_children(this); o->propogate_pri(0); } /* propogate_pri adds priority of o to */ /* each of its children recursively */ }; /* set_pri_reconfig - Visitor class that assigns the base priority for */ /* an operation o based on its reconfig time. If it is a */ /* good candidate for reconfiguration, a higher priority is */ /* assigned. */ struct set_pri_reconfig :public visitor { virtual void accept(op *o) { o->set_pri(); /* sets basic priority to current area */ if(o->good_reconfig()) o->inc_pri(1000); /* boost priority by 1k*/ } void operator () (op *o) { o->visit_children(this); o->propogate_pri(0); } }; /* inc_pri - Visitor class that increments the priority for o and all */ /* of o's descendants by an amount "pri". */ struct inc_pri :public visitor { int pri; virtual void accept(op *o) { o->inc_pri(pri); } void operator () (op *o,int p) { pri = p; o->visit_children(this); } }; /* dump_expr - Visitor class that calls printop recursively to list */ /* all the operations in the tree whose root is o. This */ /* routine is invoked in the command file "sch.out" by */ /* the command "! dump ", where denotes */ /* the name of the root operation of the subtree that */ /* is listed. */ struct dump_expr :public visitor { ostream &os; /* set output stream */ virtual void accept(op *o) {o->printop(os);os<visit_children(this);} }; /* devlist - Forward declaration of the device container (the class */ /* that manages all the simulated devices). */ class devlist; class readyset :public visitor { /* forms a set of current ops ordered*/ /* by priority */ public: set rset; /* ready set definition (name = rset)*/ public: virtual void accept(op *o) { /* acceptor called for each tree op */ if(o->ready() && /* if o is ready for processing and */ !o->scheduled) rset.insert(o); /* o hasn't been scheduled, */ /* then o is added to rset */ } set & get(op *o) { /* entry point for "devlist" */ rset.clear(); /* empty the rset */ o->visit_children(this); /* traverse tree of which o is root */ return rset; /* return the rset from the traversal*/ } void print_set(ostream &os=cout) { /* lists operations in rset */ bool want_comma = false; /* if card(rset) = 1, don't use "," */ os << '['; /* start output with open bracket */ for(set::iterator i=rset.begin();i!=rset.end();i++) { if(want_comma) os << ','; /* for each element in list, */ os << (*i)->name; /* print its name (comma-delimited)*/ want_comma = true; /* card(rset) > 1, we need comma */ } os << ']'; /* output a closed bracked (end rset)*/ } friend devlist; /* devlist can see private members */ }; /* Find all operations that have low or minimal reconfiguration times */ class good_reconfigset :public readyset { virtual void accept(op *o) { if(o->good_reconfig()) rset.insert(o); } }; int inf_cpu(op *o) { /* Simulates running op "o" with an */ int wall_time = 0; /* infinite number of CPUs */ int min_tick = INT_MAX; /* Smallest time for dvc state change*/ int cpu_req = 0; /* Number of CPUs required to eval o */ readyset rs; /* Currently ready ops to be exec'd */ assert(o != NULL); /* Make sure that o is non-null */ reset()(o); /* Initializes all ops exec time */ while(!o->done()) { /* While the root op not completed.. */ rs.get(o); /* Load all ready ops to ready set */ /* cpu_reg = size(largest ready set) */ if(rs.rset.size() > cpu_req) cpu_req = rs.rset.size(); for(set::iterator i = rs.rset.begin(); i!=rs.rset.end(); ++i) { /* Loop over ready set */ /* Check to see which operation in ready set wants minimum exe time */ if((*i)->want_clock() < min_tick) min_tick = (*i)->want_clock(); } assert(min_tick > 0); /* Make sure that min_tick > 0 */ /* Loop over all operations in ready set and process each operation by */ /* advancing the simulation clock min_tick for each operation in "rs" */ for(set::iterator i = rs.rset.begin(); i!=rs.rset.end(); ++i) { (*i)->process(min_tick); } wall_time += min_tick; } /* If the report flag is true, then print cpu_req to "cout" */ if(report) { cout << cpu_req << " CPU's required for perfect schedule" << endl; } return wall_time; /* Return time that ops need to exec */ } void print_readyset(op *o) { /* Dump operations in readyset */ readyset r; r.get(o); /* Put all ready ops from op to r */ r.print_set(cout); /* Print readyset r to cout */ } struct cap { /* Capability definition for device */ /* Set "scale" as time in seconds it takes operation to complete */ float scale; int ctime; /* Reconfiguration time */ cap() {}; /* Constructor */ cap(float s,int c) : scale(s),ctime(c) {} }; class dev { /* Device class definition */ protected: vector captbl; /* Capabilities of a given device */ string name; /* Device name */ op *o; /* Operation device currently execs */ int idle,active,time_left; /* "idle" = how long device idle */ /* "active" = how long device active */ /* "time_left" = amount of processing*/ /* time to finish reconfig or "o" */ int config; /* Current op device can perform */ /* without being reconfigured */ vector hist; /* Operations device has performed */ public: /* Constructor for device class */ dev(const string &n,float speed=1.0) : captbl(optypes.num_defs(),cap(speed,0)) ,name(n),o(0),idle(0),active(0), time_left(0),config(-1),hist() {} dev(istream &in) : o(0), idle(0),active(0),time_left(0),config(-1),hist() { set_from_stream(in); } /* "cost_for(t)" returns the capability table entry for op type "t" */ int cost_for(int t) {return (int) captbl[t].scale;} /* "set_from_stream" reads in device definition from file "istream" */ /* and parses the definition, putting parameters into capability */ /* table, also naming the device */ void set_from_stream(istream &in) { bool got_cap; /* Flag to show capability read ok */ captbl.clear(); /* Clear capability table */ captbl.resize(optypes.num_defs(),cap(0.0,0)); /* Alloc space */ in >> name; /* Get device name from input stream */ do { /* Read a capability from input strm */ got_cap = readcap(in); /* Set flag that we have capability */ } while(got_cap && !in.eof()); /* Check for end-of-file */ } bool readcap(istream &in) { /* Read capability from input stream */ string type; /* Type of op device can compute */ float scale; /* Device exe time for op in seconds */ int ctime; /* Reconfig time for op on device */ in >> type; /* Get device type from input stream */ if(type == "}") return false; /* If end of device definition found */ in >> scale >> ctime; /* ..load the scale and config time */ /* There exists a device type called "*", which is a wildcard that */ /* supports the definition of devices that can compute all types of */ /* operations of interest to the algorithm/hardware simulation. For */ /* example, "*" could denote a general-purpose CPU, but not a special */ /* purpose ALU (e.g., integer addition and multiplication only) */ /* For every entry in the capability table (each entry denotes a type */ /* of operation that a device can compute) that does not currently have*/ /* a definition, give it the definition associated with the operation */ /* type "*", i.e., *'s scale and configuration time. */ if(type == "*") { /* Detect device type "*" */ for(int i=0;iget_type()); } /* Determine if current device can compute operation of type "t" */ bool has_cap(int t) { return captbl[t].scale != 0.0; /* Check to see if scale is nonzero*/ } /* Attempt to assign operation "opp" to the current device. */ bool schedule(op *opp) { /* If the operation is not ready, we can't schedule it. */ if(!opp->ready()) return false; /* If the device is not configured for "opp", then attempt reconfig. */ if(config != opp->get_type()) { /* If the device can't be configured for "opp", then fail. */ if(!has_cap(opp->get_type())) return false; /* Coerce the current device to wait "ctime" (reconfig time) cycles */ /* before it begins processing. */ time_left = captbl[opp->get_type()].ctime; /* If the device does not require reconfiguration to compute "opp", */ /* then start computing "opp" on the current device. */ if(time_left == 0) config = opp->get_type(); } o = opp; /* Assign new current operation */ o->scheduled = true; /* Tag operation as scheduled */ o->scale(captbl[o->get_type()].scale); /* Get exe time for "o" */ hist.push_back(o->name); /* Record "o" assigned to device */ return true; /* Indicate successful scheduling */ } /* unschedule(): Remove current operation from device */ void unschedule() { if(o) { /* If we have an operation, */ o->scheduled = false; /* mark as unscheduled */ o=0; /* forget about operation "o" */ hist.pop_back(); /* by removing "o" from history */ } } /* working_on(): Determine if the current device is executing "opp" */ bool working_on(const op *opp) const { /* Device still needs processing time and has current operation "opp" */ return (!want_work() && opp == o); } /* time_after_rconfig(): Advance the simulated device until its */ /* reconfiguration is completed, or until current exe time is used up. */ int time_after_reconfig(int time) { if(time_left != 0) { /* If some reconfig/exe time left */ if(time > time_left) { /* If simulator has provided more */ /* simulation time than time_left */ assert(o); /* then we must have an operation */ time -= time_left; /* remove reconfig time from current */ /* time slice alloted by simulator */ time_left = 0; /* no longer need to reconfigure */ config = o->get_type(); /* Indicate device was reconfig'd */ return time; /* Return remaining time to simulator */ } else { /* Otherwise, if time is insufficient */ time_left -= time; /* for reconfig, then do as much as */ return 0; /* possible, and indicate that */ /* reconfig is incomplete */ } } else { return time; /* Return time to the simulator */ } } /* process() - simulates amount of time devices uses to compute the */ /* current operation "o", or reconfiguration, or idle */ int process(int time) { int work_time=time_after_reconfig(time); /* max procg time for "o" */ int itime = time - work_time; /* compute idle time */ if(o && o->ready()) { /* if we have an op and it's ready */ int scaled = work_time; /* time available given device speed */ if(scaled == 0) scaled = 1; /* ensure process always executed */ itime += o->process(scaled); /* incr idle time by unused time */ if(verbose) { /* if we want extensive reporting */ cout << name << " spent " << time << " on " << o->name << " (" << itime << " wasted, "<< scaled << " work done)" << endl; /* then output to report */ } } else { /* if no op or if "o" is not ready */ itime = time; /* then idle for entire time slice */ if(verbose) { /* if we want extensive reporting */ cout << name << " idle for " << time << endl; /* output report */ } } idle += itime; /* increment member var "idle" for dvc*/ active += time-itime; /* increment member var active time */ return time - itime; /* return time device was active */ } /* want_clock() - Determine amount of time before next state change */ int want_clock() const { if(time_left) { /* if reconfiguration not completed */ return time_left; /* then return remaining reconfig time*/ } else if(o==0) { /* if we have no operation */ return 0; /* then return 0 since no time needed */ } else { /* otherwise we have an operation */ return (int)(o->want_clock()); /* return time "o" req's on dvc*/ } } /* "<" - Compare two devices to see how long each device will take to */ /* change state */ bool operator < (const dev &d) const { /* return true if current dvc*/ return want_clock() < d.want_clock(); /* req's less time than "d" */ } /* is_idle() - Check to see if current device is idle */ bool is_idle() const {return want_clock() == 0;} /* want_work() - Check to see if the current device should have a new */ /* operation assigned to it */ bool want_work() const {return o==0 || o->done();} const string& get_name() const {return name;} /* reset_stats() - Clears history for the current device */ void reset_stats() { active = idle = 0; hist.clear(); } /* reset() - Detaches the current operation "o" from, and resets the */ /* status of, the current device */ void reset() { if(o) o->scheduled = false; /* if we have an operation, then mark*/ o = 0; config = -1; /* operation ("o") as unscheduled */ reset_stats(); /* and reset device status */ } /* dump_hist() - Output a list of operations performed on current dvc */ void dump_hist(ostream &os=cout) { os << name << ":["; /* start a format sequence with "[" */ if(!hist.empty()) { /* if current device has history */ for(int i=0;i report stream */ } /* specify iterator to the first element of the ready set */ set::iterator o = rs.rset.begin(); /* get random number from random number generator */ int rnum = rand()*rs.rset.size()/RAND_MAX; int i = 0; /* get "rnum"-th element in readyset */ while(o != rs.rset.end() && i report stream */ } int min_reconfig = INT_MAX; /* sentinel value for reconfig time */ set::iterator biggest = rs.rset.begin(); while(biggest != rs.rset.end() && (*biggest)->scheduled) ++biggest; if(biggest != rs.rset.end()){ /* setup forward iterator to find the */ return d.schedule(*biggest); /* lowest priority op and schedule */ } else { /* it on device "d" */ if(verbose) cout << "could not schedule " << d.get_name() << endl; return false; /* otherwise op not scheduled */ } } }; /* schedule_biggest - Another type of scheduling algorithm that imple- */ /* ments highest-priority-first scheduling strategy */ class schedule_biggest :public scheduler { virtual void prioritize(op *o) { if(report) cout << "pure priority order schedule" << endl; default_setup(o); } virtual /* pick - Given a set of operations to choose from, and a device to */ /* which the operations are to be mapped, we pick an operation */ /* based on the result returned by the random number generator. */ bool pick(readyset &rs,dev &d){ if(verbose) { /* if verbose mode indicated */ rs.print_set(cout); /* then print the ready set */ cout << endl; /* EOL -> report stream */ } int min_reconfig = INT_MAX; /* sentinel value for reconfig time */ set::reverse_iterator biggest = rs.rset.rbegin(); while(biggest != rs.rset.rend() && (*biggest)->scheduled) ++biggest; if(biggest != rs.rset.rend()){ /* reverse iterator to find highest */ return d.schedule(*biggest); /* priority op to schedule on "d" */ } else { /* if no op found, then no schedule */ if(verbose) cout << "could not schedule " << d.get_name() << endl; return false; } } }; /* schedule_biggest_min_reconfig - Another type of scheduling algorithm */ /* that implements lowest-reconfiguration-time-first scheduling */ class schedule_biggest_min_reconfig :public scheduler { virtual void prioritize(op *o) { if(report) cout << "min reconfig normal priority order schedule" << endl; default_setup(o); } /* pick - Chooses an operation "o" to be scheduled on device "d" from */ /* readyset. This finds the highest priority operation that */ /* does not require reconfiguration. */ virtual bool pick(readyset &rs,dev &d) { if(d.want_clock() == 0) { int min_reconfig = INT_MAX; set::reverse_iterator min_r = rs.rset.rend(); for(set::reverse_iterator biggest = rs.rset.rbegin(); biggest != rs.rset.rend();++biggest) { /* iterate backwards */ if((*biggest)->scheduled) continue; /* if not scheduled */ if(d.config_cost(*biggest)::reverse_iterator min_r = rs.rset.rend(); for(set::reverse_iterator biggest = rs.rset.rbegin(); biggest != rs.rset.rend();++biggest) { /* get highest priority*/ if((*biggest)->scheduled) continue; if(d.config_cost(*biggest) devs; public: void add_dev(dev &d) { /* add new device to devlist */ devs.push_back(d); } int process(int t) { /* advance simulation clock t ticks */ for(int i=0;i::iterator i=r.rset.begin();i!=r.rset.end();i++) { for(int j=0;jget_type()))get_type()); min_op = *i; /* get op that "i" points to */ min_dev = j; /* get the j-th device */ } } } } if(min < INT_MAX) { /* if we have found min-cost device */ devs[min_dev].schedule(min_op); /* then schedule op on device */ return true; } else return false; } /* ssched - Primary scheduling procedure that schedules, and simulates */ /* all scheduled, operations to their completion. In other */ /* words, "ssched" simulates evaluation of an expression. */ int ssched(op *o,scheduler *s) { int wall_time =0; /* initialize wall time to zero */ int want,tstep = INT_MAX; /* sentinel values for want,timestep */ s->prioritize(o); /* prioritize ops by scheduler's system*/ reset(); /* reset devices to default parameters */ int t_area = area()(o); /* total amount of time for subtree(o) */ int d_area = dag_area()(o); /* total time for subtree(o) as a DAG */ while(!o->done()) { /* while operation "o" not completed */ while(pick_min_dev(o,s)); /* ensure every device scheduled */ for(int i=0;i &order,op *root) { reset::reset()(root); /* reset all operations from "root" op */ reset(); /* reset all devices */ int next=0; /* index of the next operation */ bool idle=true; /* initialize idle flag */ int want,tstep,wall_time=0; /* initialize timekeeping variables */ while(!root->done()) { /* while expression not yet evaluated */ idle=true; /* initialize idle flag */ for(int i=0;i 10 ops or devices */ /* with workstations having <= 1GHz CPU clock.) */ int exaustive_sched(op *o) { vector ops; /* Input vector of operations */ vector best_dev; /* Lowest-cost sequence of devices */ int last_time,min_time=INT_MAX; /* timekeeping variables */ int p=0; /* number of trials attempted thus far */ if(o) o->pack_vec(ops); /* packs subtree(o) into a vector */ sort(ops.begin(),ops.end()); /* sort ops in vector on memory location*/ /* sort devices on order_dev, which is the predicate function that imposes*/ /* a strict partial order on the devices according to device name - this */ /* implements a lexicographical ordering of the devices by name */ sort(devs.begin(),devs.end(),order_dev); do { /* outer scheduling loop - devices */ do { /* inner scheduling loop - operations */ ++p; /* increment number of attempts */ /* return simulated computation time for the current permutation of ops */ last_time=inorder(ops,o); if(last_time>0 && last_time < min_time) { /* if time is new min.*/ min_time = last_time; /* then set new minimum time */ best_dev=devs; /* and save this device ordering */ if(verbose) { /* if verbose, tell the report about it */ cout<<" New min :" << min_time << endl; for(int i=0;iname << " ";} cout << endl; } } } while(next_permutation(ops.begin(),ops.end())); } while(next_permutation(devs.begin(),devs.end(),order_dev)); if(report) { /* report result of exhaustive schedule */ cout << p << " trials "<< ops.size() << " operations " << endl; for(int i=0;i" << endl; os << "" << endl; os << ""; } /* find_seed - Search for a seed that generates a useful random scheduler */ /* i.e., one that produces the shortest completion time for */ /* subtree(o). */ int find_seed(op *o,devlist &dl,int trials,int start=1) { schedule_random s(1); /* start at seed = 1 */ int last,minseed = INT_MAX; /* seed variables */ bool oldrept = report,oldverb = verbose; report=false;verbose=false; for(int i=0;i> name; /* Initialize the parser loop */ do { /* Loop for parsing file sch.out */ if(name == "/*") { /* Start of a comment */ while(name != "*/") { /* Loop through until end-of-comment */ in >> name; /* Get a token */ if(in.eof() && name != "*/") { /* Check for end-of-file */ cerr << "expected */ in " << infilename << endl; exit(2); /* Complain if EOF found before cmt-end */ } } } switch(name[0]) { /* Switching on the first char of token */ case '{' : /* Open device definition */ dl.add_dev(*(new dev(in))); /* Construct device from input stream */ break; /* End of case stmt */ /* Syntax: "$