diff -urN linux-2.4.20-ac2/Makefile linux-2.4.20-ac2-ctx16/Makefile --- linux-2.4.20-ac2/Makefile Fri Feb 21 00:51:46 2003 +++ linux-2.4.20-ac2-ctx16/Makefile Wed Feb 19 22:29:33 2003 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 20 -EXTRAVERSION = -ac2 +EXTRAVERSION = -ac2-ctx16 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) Binary files linux-2.4.20-ac2/arch/i386/boot/bzImage.ok and linux-2.4.20-ac2-ctx16/arch/i386/boot/bzImage.ok differ diff -urN linux-2.4.20-ac2/fs/proc/array.c linux-2.4.20-ac2-ctx16/fs/proc/array.c --- linux-2.4.20-ac2/fs/proc/array.c Fri Feb 21 00:51:47 2003 +++ linux-2.4.20-ac2-ctx16/fs/proc/array.c Thu Feb 20 22:09:00 2003 @@ -309,16 +309,25 @@ } *buffer++ = ']'; *buffer++ = '\n'; - buffer += sprintf (buffer,"ctxticks: %d %ld %d\n" - ,atomic_read(&task->s_info->ticks),task->counter - ,task->s_info->refcount); + buffer += sprintf (buffer,"ctxsched: %d/%d = (FR: %d per %d; ago %d)\n" + "prio: %d (static prio = %d)\n" + "ctxnproc: %d\n" + ,task->s_info->tokens + ,task->s_info->tokens_max + ,task->s_info->tokens_fr + ,task->s_info->tokens_div + ,(jiffies - task->s_info->tokens_jfy) + ,task->prio + ,task->static_prio + ,task->s_info->refcount); + buffer += sprintf (buffer, "tokens_left: %e\n", tokens_left(task)); buffer += sprintf (buffer,"ctxflags: %d\n" ,task->s_info->flags); buffer += sprintf (buffer,"initpid: %d\n" ,task->s_info->initpid); }else{ buffer += sprintf (buffer,"s_context: %d\n",task->s_context); - buffer += sprintf (buffer,"ctxticks: none\n"); + buffer += sprintf (buffer,"ctxsched: none\n"); buffer += sprintf (buffer,"ctxflags: none\n"); buffer += sprintf (buffer,"initpid: none\n"); } diff -urN linux-2.4.20-ac2/include/linux/sched.h linux-2.4.20-ac2-ctx16/include/linux/sched.h --- linux-2.4.20-ac2/include/linux/sched.h Fri Feb 21 00:51:48 2003 +++ linux-2.4.20-ac2-ctx16/include/linux/sched.h Fri Feb 21 00:58:42 2003 @@ -174,12 +174,25 @@ * user-space. This allows kernel threads to set their * priority to a value higher than any user task. Note: * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + * + * Illustrative Example Priority Ranges: + * 0 .. 70 - Real Time kernel threads/head-room + * 71 .. 80 - nice -20 .. -11 tasks with bonuses + * 81 .. 90 - nice -10 .. -1 tasks with bonuses + * 91 .. 99 - nice 0 tasks with bonuses + * 100 - nice 0 with no bonuses or penalties + * 101 .. 109 - nice 0 tasks with penalties + * 110 .. 119 - nice 0 tasks with harsh penalties + * 120 .. 129 - nice +1 .. +10 tasks with harsh penalties + * 130 .. 139 - nice +11 .. +19 tasks with harsh penalties + * 149 - idle thread */ -#define MAX_USER_RT_PRIO 100 +#define MAX_USER_RT_PRIO 80 #define MAX_RT_PRIO MAX_USER_RT_PRIO +#define MAX_PRIO_USER 120 +#define MAX_PRIO (MAX_PRIO_USER + 29) -#define MAX_PRIO (MAX_RT_PRIO + 40) /* * The maximum RT priority is configurable. If the resulting @@ -339,10 +352,15 @@ char nodename[65]; char domainname[65]; int flags; /* S_CTX_INFO_xxx */ - atomic_t ticks; /* Number of ticks used by all process */ - /* in the s_context */ int initpid; /* PID of the logical process 1 of the */ /* of the context */ + int tokens; /* number of CPU tokens in this context */ + int tokens_fr; /* Fill rate: add X tokens... */ + int tokens_div; /* Divisor: per Y jiffies */ + int tokens_max; /* Limit: no more than N tokens */ + unsigned long tokens_jfy; /* add an integral multiple of Y to this */ + spinlock_t tokens_lock; /* lock for this structure */ + void *data1; void *data2; void *data3; @@ -560,8 +578,8 @@ addr_limit: KERNEL_DS, \ exec_domain: &default_exec_domain, \ lock_depth: -1, \ - prio: MAX_PRIO-20, \ - static_prio: MAX_PRIO-20, \ + prio: MAX_USER_RT_PRIO+20, \ + static_prio: MAX_USER_RT_PRIO+20, \ policy: SCHED_OTHER, \ cpus_allowed: -1, \ mm: NULL, \ @@ -1019,6 +1037,56 @@ void sys_release_ip_info (struct iproot_info *); void sys_assign_ip_info (struct iproot_info *); void sys_alloc_ip_info (void); + +/* scheduling stuff */ +int tokens_left(struct task_struct *); +int tokens_recalc(struct task_struct *); + +/* update the token allocation for a process */ +static inline void tokens_alloc(struct task_struct *tsk) +{ + if (tsk->s_info && (tsk->s_info->flags & S_CTX_INFO_SCHED)) + { + spin_lock(&tsk->s_info->tokens_lock); + + tokens_recalc(tsk); + + /* else if (eat && tsk->s_info->tokens > 0) + { + tsk->s_info->tokens--; + } */ + spin_unlock(&tsk->s_info->tokens_lock); + } +} + +/* eat a token, only allocating more if we run out. + + Return: + 1 if a token was eaten, + 0 if the process has no tokens left + -1 if tokens do not apply +*/ +static inline int eat_token(struct task_struct *tsk) +{ + int eaten = -1; + if (tsk->s_info && (tsk->s_info->flags & S_CTX_INFO_SCHED) ) + { + spin_lock(&tsk->s_info->tokens_lock); + if (tsk->s_info->tokens || tokens_recalc(tsk)) + { + tsk->s_info->tokens--; + eaten = 1; + } + else + { + eaten = 0; + } + spin_unlock(&tsk->s_info->tokens_lock); + } + return eaten; +} + + static inline void clear_need_resched(void) { diff -urN linux-2.4.20-ac2/kernel/sched.c linux-2.4.20-ac2-ctx16/kernel/sched.c --- linux-2.4.20-ac2/kernel/sched.c Fri Feb 21 00:51:48 2003 +++ linux-2.4.20-ac2-ctx16/kernel/sched.c Fri Feb 21 01:07:13 2003 @@ -43,7 +43,7 @@ */ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) -#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO_USER)) /* * These are the 'tuning knobs' of the scheduler: @@ -51,13 +51,17 @@ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs, * maximum timeslice is 300 msecs. Timeslices get refilled after * they expire. + * + * Note: when processes start getting to really low priorities they + * will quickly hit the MIN_TIMESLICE. */ #define MIN_TIMESLICE ( 10 * HZ / 1000) #define MAX_TIMESLICE (300 * HZ / 1000) #define CHILD_PENALTY 95 #define PARENT_PENALTY 100 #define EXIT_WEIGHT 3 -#define PRIO_BONUS_RATIO 25 +#define PRIO_BONUS_RATIO 20 +#define VAVAVOOM_RATIO 50 #define INTERACTIVE_DELTA 2 #define MAX_SLEEP_AVG (2*HZ) #define STARVATION_LIMIT (2*HZ) @@ -90,15 +94,10 @@ * too hard. */ -#define SCALE(v1,v1_max,v2_max) \ - (v1) * (v2_max) / (v1_max) - -#define DELTA(p) \ - (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \ - INTERACTIVE_DELTA) - +/* Note: the algorithm here has changed, but the above table hasn't */ #define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) + (p->sleep_avg - MAX_SLEEP_AVG*0.1 \ + > TASK_USER_PRIO(p)*MAX_SLEEP_AVG / MAX_USER_PRIO) /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] @@ -109,14 +108,20 @@ * priority thread gets MIN_TIMESLICE worth of execution time. * * task_timeslice() is the interface that is used by the scheduler. + * + * IMHO this should be on a logarithmic rather than geometric scale. */ #define BASE_TIMESLICE(p) (MIN_TIMESLICE + \ ((MAX_TIMESLICE - MIN_TIMESLICE) * \ - (MAX_PRIO-1-(p)->static_prio)/(MAX_USER_PRIO - 1))) + (MAX_PRIO_USER-1-(p)->prio)/(MAX_PRIO_USER - 1))) static inline unsigned int task_timeslice(task_t *p) { - return BASE_TIMESLICE(p); + if (p->prio >= MAX_PRIO_USER) { + return MIN_TIMESLICE; + } else { + return BASE_TIMESLICE(p); + } } /* @@ -238,23 +243,52 @@ * priority but is modified by bonuses/penalties. * * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] - * into the -5 ... 0 ... +5 bonus/penalty range. + * into a -4 ... 0 ... +4 bonus/penalty range. * - * We use 25% of the full 0...39 priority range so that: + * Additionally, we scale another amount based on the number of CPU + * tokens currently held by the s_context, if the process is part of + * an s_context (and the appropriate SCHED flag is set). This ranges + * from -5 ... 0 ... +15. + * + * So, the total bonus is -9 .. 0 .. +19 + * + * We use ~ 50% of the full 0...39 priority range so that: * * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. - * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * 2) nice -20 CPU hogs do not get preemepted by nice 0 tasks, unless + * that security context is far exceeding its CPU allocation. * * Both properties are important to certain workloads. */ static inline int effective_prio(task_t *p) { - int bonus, prio; + int bonus, prio, vavavoom, max; bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 - MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2; - prio = p->static_prio - bonus; + /* lots of tokens = lots of vavavoom + no tokens = no vavavoom */ + if ((vavavoom = tokens_left(p)) >= 0) + { + max = p->s_info->tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = MAX_USER_PRIO * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + + /* alternative, geometric mapping + vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom + / p->s_info->tokens_max - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2 );*/ + } else + { + vavavoom = 0; + } + /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2 ); */ + + prio = p->static_prio - bonus + vavavoom; if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -263,6 +297,56 @@ } /* + * tokens_left - return the number of tokens in the process' + * s_context's cpu token bucket, allocating more tokens + * if necessary. + * + * Returns -1 if the process does not have a bucket. + */ +int tokens_left(struct task_struct *tsk) +{ + + if (tsk->s_info && (tsk->s_info->flags & S_CTX_INFO_SCHED)) + { + /* see if any more tokens are due */ + tokens_alloc(tsk); + + return tsk->s_info->tokens; + } + else + { + return -1; + } +} + +/* + * tokens_recalc - recalculate the s_context's scheduling tokens + * + * a spinlock on tsk->s_info->tokens_lock must be acquired before + * calling this function. + */ +int tokens_recalc(struct task_struct *tsk) +{ + unsigned long diff; + + diff = jiffies - tsk->s_info->tokens_jfy; + + if (diff < tsk->s_info->tokens_div) + { + return 0; + } + + diff = ( diff / tsk->s_info->tokens_div ); + + tsk->s_info->tokens += diff * tsk->s_info->tokens_fr; + tsk->s_info->tokens_jfy += diff * tsk->s_info->tokens_div; + if (tsk->s_info->tokens > tsk->s_info->tokens_max) + tsk->s_info->tokens = tsk->s_info->tokens_max; + + return 1; +} + +/* * activate_task - move a task to the runqueue. * * Also update all the scheduling statistics stuff. (sleep average @@ -865,6 +949,7 @@ */ if (p->sleep_avg) p->sleep_avg--; + if (!--p->time_slice) { dequeue_task(p, rq->active); set_tsk_need_resched(p); diff -urN linux-2.4.20-ac2/kernel/sys.c linux-2.4.20-ac2-ctx16/kernel/sys.c --- linux-2.4.20-ac2/kernel/sys.c Fri Feb 21 00:51:48 2003 +++ linux-2.4.20-ac2-ctx16/kernel/sys.c Thu Feb 20 23:11:54 2003 @@ -1073,7 +1073,15 @@ // printk ("new s_info %d\n",current->pid); s_info->s_context[0] = current->s_context; s_info->refcount = 1; - atomic_set (&s_info->ticks,current->counter); + + /* scheduling; hard code as constants for now */ + s_info->tokens_fr = 1; + s_info->tokens_div = 4; + s_info->tokens = HZ * 5; + s_info->tokens_max = HZ * 10; + s_info->tokens_jfy = jiffies; + s_info->tokens_lock = SPIN_LOCK_UNLOCKED; + s_info->flags = 0; s_info->initpid = 0; down_read (&uts_sem); diff -urN linux-2.4.20-ac2/kernel/timer.c linux-2.4.20-ac2-ctx16/kernel/timer.c --- linux-2.4.20-ac2/kernel/timer.c Fri Feb 21 00:51:48 2003 +++ linux-2.4.20-ac2-ctx16/kernel/timer.c Wed Feb 19 22:31:02 2003 @@ -599,11 +599,8 @@ struct task_struct *p = current; int cpu = smp_processor_id(), system = user_tick ^ 1; - if (p->s_info != NULL - && (p->s_info->flags & S_CTX_INFO_SCHED)!=0){ - // atomic_sub (ticks*p->s_info->refcount, &p->s_info->ticks); - atomic_dec (&p->s_info->ticks); - } + eat_token(p); + update_one_process(p, user_tick, system, cpu); scheduler_tick(user_tick, system); }