diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/Makefile linux-2.4.21-pre4-ac6-ctx16/Makefile --- linux-2.4.21-pre4-ac6-ctx16-rough/Makefile Tue Feb 25 06:30:14 2003 +++ linux-2.4.21-pre4-ac6-ctx16/Makefile Tue Feb 25 06:25:19 2003 @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 21 -EXTRAVERSION = -pre4-ac6 +EXTRAVERSION = -pre4-ac6-ctx16 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/fs/jfs/xattr.c linux-2.4.21-pre4-ac6-ctx16/fs/jfs/xattr.c --- linux-2.4.21-pre4-ac6-ctx16-rough/fs/jfs/xattr.c Fri Nov 29 12:53:15 2002 +++ linux-2.4.21-pre4-ac6-ctx16/fs/jfs/xattr.c Mon Feb 24 20:00:23 2003 @@ -646,7 +646,7 @@ if (IS_RDONLY(inode)) return -EROFS; - if (IS_IMMUTABLE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) + if (IS_IMMUTABLE_FILE(inode) || IS_APPEND(inode) || S_ISLNK(inode->i_mode)) return -EPERM; if((strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) != 0) && diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/fs/proc/array.c linux-2.4.21-pre4-ac6-ctx16/fs/proc/array.c --- linux-2.4.21-pre4-ac6-ctx16-rough/fs/proc/array.c Tue Feb 25 06:30:14 2003 +++ linux-2.4.21-pre4-ac6-ctx16/fs/proc/array.c Mon Feb 24 19:34:37 2003 @@ -309,16 +309,25 @@ } *buffer++ = ']'; *buffer++ = '\n'; - buffer += sprintf (buffer,"ctxticks: %d %ld %d\n" - ,atomic_read(&task->s_info->ticks),task->counter - ,task->s_info->refcount); + buffer += sprintf (buffer,"ctxsched: %d/%d = (FR: %d per %d; ago %d)\n" + "prio: %d (static prio = %d)\n" + "ctxnproc: %d\n" + ,task->s_info->tokens + ,task->s_info->tokens_max + ,task->s_info->tokens_fr + ,task->s_info->tokens_div + ,(jiffies - task->s_info->tokens_jfy) + ,task->prio + ,task->static_prio + ,task->s_info->refcount); + buffer += sprintf (buffer, "tokens_left: %e\n", tokens_left(task)); buffer += sprintf (buffer,"ctxflags: %d\n" ,task->s_info->flags); buffer += sprintf (buffer,"initpid: %d\n" ,task->s_info->initpid); }else{ buffer += sprintf (buffer,"s_context: %d\n",task->s_context); - buffer += sprintf (buffer,"ctxticks: none\n"); + buffer += sprintf (buffer,"ctxsched: none\n"); buffer += sprintf (buffer,"ctxflags: none\n"); buffer += sprintf (buffer,"initpid: none\n"); } diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/include/linux/sched.h linux-2.4.21-pre4-ac6-ctx16/include/linux/sched.h --- linux-2.4.21-pre4-ac6-ctx16-rough/include/linux/sched.h Tue Feb 25 06:30:14 2003 +++ linux-2.4.21-pre4-ac6-ctx16/include/linux/sched.h Mon Feb 24 22:16:36 2003 @@ -175,12 +175,25 @@ * user-space. This allows kernel threads to set their * priority to a value higher than any user task. Note: * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + * + * Illustrative Example Priority Ranges: + * 0 .. 70 - Real Time kernel threads/head-room + * 71 .. 80 - nice -20 .. -11 tasks with bonuses + * 81 .. 90 - nice -10 .. -1 tasks with bonuses + * 91 .. 99 - nice 0 tasks with bonuses + * 100 - nice 0 with no bonuses or penalties + * 101 .. 109 - nice 0 tasks with penalties + * 110 .. 119 - nice 0 tasks with harsh penalties + * 120 .. 129 - nice +1 .. +10 tasks with harsh penalties + * 130 .. 139 - nice +11 .. +19 tasks with harsh penalties + * 149 - idle thread */ -#define MAX_USER_RT_PRIO 100 +#define MAX_USER_RT_PRIO 80 #define MAX_RT_PRIO MAX_USER_RT_PRIO +#define MAX_PRIO_USER 120 +#define MAX_PRIO (MAX_PRIO_USER + 29) -#define MAX_PRIO (MAX_RT_PRIO + 40) /* * The maximum RT priority is configurable. If the resulting @@ -339,10 +352,15 @@ char nodename[65]; char domainname[65]; int flags; /* S_CTX_INFO_xxx */ - atomic_t ticks; /* Number of ticks used by all process */ - /* in the s_context */ int initpid; /* PID of the logical process 1 of the */ /* of the context */ + int tokens; /* number of CPU tokens in this context */ + int tokens_fr; /* Fill rate: add X tokens... */ + int tokens_div; /* Divisor: per Y jiffies */ + int tokens_max; /* Limit: no more than N tokens */ + unsigned long tokens_jfy; /* add an integral multiple of Y to this */ + spinlock_t tokens_lock; /* lock for this structure */ + void *data1; void *data2; void *data3; @@ -562,8 +580,8 @@ addr_limit: KERNEL_DS, \ exec_domain: &default_exec_domain, \ lock_depth: -1, \ - prio: MAX_PRIO-20, \ - static_prio: MAX_PRIO-20, \ + prio: MAX_USER_RT_PRIO+20, \ + static_prio: MAX_USER_RT_PRIO+20, \ policy: SCHED_OTHER, \ cpus_allowed: ~0UL, \ mm: NULL, \ @@ -1021,6 +1039,56 @@ void sys_release_ip_info (struct iproot_info *); void sys_assign_ip_info (struct iproot_info *); void sys_alloc_ip_info (void); + +/* scheduling stuff */ +int tokens_left(struct task_struct *); +int tokens_recalc(struct task_struct *); + +/* update the token allocation for a process */ +static inline void tokens_alloc(struct task_struct *tsk) +{ + if (tsk->s_info && (tsk->s_info->flags & S_CTX_INFO_SCHED)) + { + spin_lock(&tsk->s_info->tokens_lock); + + tokens_recalc(tsk); + + /* else if (eat && tsk->s_info->tokens > 0) + { + tsk->s_info->tokens--; + } */ + spin_unlock(&tsk->s_info->tokens_lock); + } +} + +/* eat a token, only allocating more if we run out. + + Return: + 1 if a token was eaten, + 0 if the process has no tokens left + -1 if tokens do not apply +*/ +static inline int eat_token(struct task_struct *tsk) +{ + int eaten = -1; + if (tsk->s_info && (tsk->s_info->flags & S_CTX_INFO_SCHED) ) + { + spin_lock(&tsk->s_info->tokens_lock); + if (tsk->s_info->tokens || tokens_recalc(tsk)) + { + tsk->s_info->tokens--; + eaten = 1; + } + else + { + eaten = 0; + } + spin_unlock(&tsk->s_info->tokens_lock); + } + return eaten; +} + + static inline void clear_need_resched(void) { diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/kernel/exit.c linux-2.4.21-pre4-ac6-ctx16/kernel/exit.c --- linux-2.4.21-pre4-ac6-ctx16-rough/kernel/exit.c Tue Feb 25 06:30:15 2003 +++ linux-2.4.21-pre4-ac6-ctx16/kernel/exit.c Mon Feb 24 22:14:23 2003 @@ -42,6 +42,8 @@ current->cmaj_flt += p->maj_flt + p->cmaj_flt; current->cnswap += p->nswap + p->cnswap; sched_exit(p); + sys_release_s_info(p); + sys_release_ip_info(p->ip_info); p->pid = 0; free_task_struct(p); } @@ -66,8 +68,6 @@ break; } if (p->pid == pgrp) - sys_release_s_info(p); - sys_release_ip_info(p->ip_info); fallback = p->session; } read_unlock(&tasklist_lock); diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/kernel/sched.c linux-2.4.21-pre4-ac6-ctx16/kernel/sched.c --- linux-2.4.21-pre4-ac6-ctx16-rough/kernel/sched.c Tue Feb 25 06:30:15 2003 +++ linux-2.4.21-pre4-ac6-ctx16/kernel/sched.c Mon Feb 24 19:34:37 2003 @@ -43,7 +43,7 @@ */ #define USER_PRIO(p) ((p)-MAX_RT_PRIO) #define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) -#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO_USER)) /* * These are the 'tuning knobs' of the scheduler: @@ -51,13 +51,17 @@ * Minimum timeslice is 10 msecs, default timeslice is 150 msecs, * maximum timeslice is 300 msecs. Timeslices get refilled after * they expire. + * + * Note: when processes start getting to really low priorities they + * will quickly hit the MIN_TIMESLICE. */ #define MIN_TIMESLICE ( 10 * HZ / 1000) #define MAX_TIMESLICE (300 * HZ / 1000) #define CHILD_PENALTY 95 #define PARENT_PENALTY 100 #define EXIT_WEIGHT 3 -#define PRIO_BONUS_RATIO 25 +#define PRIO_BONUS_RATIO 20 +#define VAVAVOOM_RATIO 50 #define INTERACTIVE_DELTA 2 #define MAX_SLEEP_AVG (2*HZ) #define STARVATION_LIMIT (2*HZ) @@ -90,15 +94,10 @@ * too hard. */ -#define SCALE(v1,v1_max,v2_max) \ - (v1) * (v2_max) / (v1_max) - -#define DELTA(p) \ - (SCALE(TASK_NICE(p), 40, MAX_USER_PRIO*PRIO_BONUS_RATIO/100) + \ - INTERACTIVE_DELTA) - +/* Note: the algorithm here has changed, but the above table hasn't */ #define TASK_INTERACTIVE(p) \ - ((p)->prio <= (p)->static_prio - DELTA(p)) + (p->sleep_avg - MAX_SLEEP_AVG*0.1 \ + > TASK_USER_PRIO(p)*MAX_SLEEP_AVG / MAX_USER_PRIO) /* * BASE_TIMESLICE scales user-nice values [ -20 ... 19 ] @@ -109,14 +108,20 @@ * priority thread gets MIN_TIMESLICE worth of execution time. * * task_timeslice() is the interface that is used by the scheduler. + * + * IMHO this should be on a logarithmic rather than geometric scale. */ #define BASE_TIMESLICE(p) (MIN_TIMESLICE + \ ((MAX_TIMESLICE - MIN_TIMESLICE) * \ - (MAX_PRIO-1-(p)->static_prio)/(MAX_USER_PRIO - 1))) + (MAX_PRIO_USER-1-(p)->prio)/(MAX_PRIO_USER - 1))) static inline unsigned int task_timeslice(task_t *p) { - return BASE_TIMESLICE(p); + if (p->prio >= MAX_PRIO_USER) { + return MIN_TIMESLICE; + } else { + return BASE_TIMESLICE(p); + } } /* @@ -238,23 +243,52 @@ * priority but is modified by bonuses/penalties. * * We scale the actual sleep average [0 .... MAX_SLEEP_AVG] - * into the -5 ... 0 ... +5 bonus/penalty range. + * into a -4 ... 0 ... +4 bonus/penalty range. * - * We use 25% of the full 0...39 priority range so that: + * Additionally, we scale another amount based on the number of CPU + * tokens currently held by the s_context, if the process is part of + * an s_context (and the appropriate SCHED flag is set). This ranges + * from -5 ... 0 ... +15. + * + * So, the total bonus is -9 .. 0 .. +19 + * + * We use ~ 50% of the full 0...39 priority range so that: * * 1) nice +19 interactive tasks do not preempt nice 0 CPU hogs. - * 2) nice -20 CPU hogs do not get preempted by nice 0 tasks. + * 2) nice -20 CPU hogs do not get preemepted by nice 0 tasks, unless + * that security context is far exceeding its CPU allocation. * * Both properties are important to certain workloads. */ static inline int effective_prio(task_t *p) { - int bonus, prio; + int bonus, prio, vavavoom, max; bonus = MAX_USER_PRIO*PRIO_BONUS_RATIO*p->sleep_avg/MAX_SLEEP_AVG/100 - MAX_USER_PRIO*PRIO_BONUS_RATIO/100/2; - prio = p->static_prio - bonus; + /* lots of tokens = lots of vavavoom + no tokens = no vavavoom */ + if ((vavavoom = tokens_left(p)) >= 0) + { + max = p->s_info->tokens_max; + vavavoom = max - vavavoom; + max = max * max; + vavavoom = MAX_USER_PRIO * VAVAVOOM_RATIO / 100 + * (vavavoom*vavavoom - (max >> 2)) / max; + + /* alternative, geometric mapping + vavavoom = -( MAX_USER_PRIO*VAVAVOOM_RATIO/100 * vavavoom + / p->s_info->tokens_max - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2 );*/ + } else + { + vavavoom = 0; + } + /* vavavoom = ( MAX_USER_PRIO*VAVAVOOM_RATIO/100*tokens_left(p) - + MAX_USER_PRIO*VAVAVOOM_RATIO/100/2 ); */ + + prio = p->static_prio - bonus + vavavoom; if (prio < MAX_RT_PRIO) prio = MAX_RT_PRIO; if (prio > MAX_PRIO-1) @@ -263,6 +297,56 @@ } /* + * tokens_left - return the number of tokens in the process' + * s_context's cpu token bucket, allocating more tokens + * if necessary. + * + * Returns -1 if the process does not have a bucket. + */ +int tokens_left(struct task_struct *tsk) +{ + + if (tsk->s_info && (tsk->s_info->flags & S_CTX_INFO_SCHED)) + { + /* see if any more tokens are due */ + tokens_alloc(tsk); + + return tsk->s_info->tokens; + } + else + { + return -1; + } +} + +/* + * tokens_recalc - recalculate the s_context's scheduling tokens + * + * a spinlock on tsk->s_info->tokens_lock must be acquired before + * calling this function. + */ +int tokens_recalc(struct task_struct *tsk) +{ + unsigned long diff; + + diff = jiffies - tsk->s_info->tokens_jfy; + + if (diff < tsk->s_info->tokens_div) + { + return 0; + } + + diff = ( diff / tsk->s_info->tokens_div ); + + tsk->s_info->tokens += diff * tsk->s_info->tokens_fr; + tsk->s_info->tokens_jfy += diff * tsk->s_info->tokens_div; + if (tsk->s_info->tokens > tsk->s_info->tokens_max) + tsk->s_info->tokens = tsk->s_info->tokens_max; + + return 1; +} + +/* * activate_task - move a task to the runqueue. * * Also update all the scheduling statistics stuff. (sleep average @@ -865,6 +949,7 @@ */ if (p->sleep_avg) p->sleep_avg--; + if (!--p->time_slice) { dequeue_task(p, rq->active); set_tsk_need_resched(p); diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/kernel/sys.c linux-2.4.21-pre4-ac6-ctx16/kernel/sys.c --- linux-2.4.21-pre4-ac6-ctx16-rough/kernel/sys.c Tue Feb 25 06:30:15 2003 +++ linux-2.4.21-pre4-ac6-ctx16/kernel/sys.c Mon Feb 24 22:42:07 2003 @@ -1063,9 +1063,11 @@ if (p->s_info != NULL){ p->s_info->refcount--; if (p->s_info->refcount == 0){ - // printk ("vfree s_info %d\n",p->pid); + //printk ("vfree s_info %d (%x)\n",p->pid,p->s_info); vfree (p->s_info); p->s_info = NULL; + //} else { + //printk ("-- s_info %d (%x)\n",p->pid,p->s_info); } } up_write (&uts_sem); @@ -1076,7 +1078,10 @@ void sys_assign_s_info (struct task_struct *p) { down_write (&uts_sem); - if (p->s_info != NULL) p->s_info->refcount++; + if (p->s_info != NULL) { + p->s_info->refcount++; + // printk("++ s_info %d (%x)\n", p->pid,p->s_info); + } up_write (&uts_sem); } @@ -1089,10 +1094,18 @@ struct context_info *s_info = vmalloc(sizeof(struct context_info)); if (s_info != NULL){ memset (s_info,0,sizeof(*s_info)); - // printk ("new s_info %d\n",current->pid); + //printk ("new s_info %d (%x)\n",current->pid, s_info); s_info->s_context[0] = current->s_context; s_info->refcount = 1; - atomic_set (&s_info->ticks,current->counter); + + /* scheduling; hard code as constants for now */ + s_info->tokens_fr = 1; + s_info->tokens_div = 4; + s_info->tokens = HZ * 5; + s_info->tokens_max = HZ * 10; + s_info->tokens_jfy = jiffies; + s_info->tokens_lock = SPIN_LOCK_UNLOCKED; + s_info->flags = 0; s_info->initpid = 0; down_read (&uts_sem); @@ -1119,8 +1132,10 @@ down_write (&uts_sem); ip_info->refcount--; if (ip_info->refcount == 0){ - // printk ("vfree s_info %d\n",p->pid); + //printk ("vfree ip_info %d (%x)\n",current->pid, ip_info); vfree (ip_info); + //} else { + //printk ("-- ip_info %d (%x)\n",current->pid, ip_info); } up_write (&uts_sem); } @@ -1133,6 +1148,7 @@ if (ip_info != NULL){ down_write (&uts_sem); ip_info->refcount++; + //printk ("++ ip_info %d (%x)\n",current->pid, ip_info); up_write (&uts_sem); } } @@ -1144,7 +1160,7 @@ void sys_alloc_ip_info() { struct iproot_info *ip_info = vmalloc(sizeof(struct iproot_info)); - // printk ("new s_info %d\n",current->pid); + //printk ("new ip_info %d (%x)\n",current->pid, ip_info); memset (ip_info,0,sizeof(*ip_info)); ip_info->refcount = 1; sys_release_ip_info (current->ip_info); diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/kernel/timer.c linux-2.4.21-pre4-ac6-ctx16/kernel/timer.c --- linux-2.4.21-pre4-ac6-ctx16-rough/kernel/timer.c Tue Feb 25 06:30:15 2003 +++ linux-2.4.21-pre4-ac6-ctx16/kernel/timer.c Mon Feb 24 19:34:37 2003 @@ -599,11 +599,8 @@ struct task_struct *p = current; int cpu = smp_processor_id(), system = user_tick ^ 1; - if (p->s_info != NULL - && (p->s_info->flags & S_CTX_INFO_SCHED)!=0){ - // atomic_sub (ticks*p->s_info->refcount, &p->s_info->ticks); - atomic_dec (&p->s_info->ticks); - } + eat_token(p); + update_one_process(p, user_tick, system, cpu); scheduler_tick(user_tick, system); } diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/net/ipv4/tcp_ipv4.c linux-2.4.21-pre4-ac6-ctx16/net/ipv4/tcp_ipv4.c --- linux-2.4.21-pre4-ac6-ctx16-rough/net/ipv4/tcp_ipv4.c Tue Feb 25 06:30:15 2003 +++ linux-2.4.21-pre4-ac6-ctx16/net/ipv4/tcp_ipv4.c Mon Feb 24 19:19:50 2003 @@ -513,11 +513,11 @@ #else score = 1; #endif - if(rcv_saddr) { - if (rcv_saddr != daddr) - continue; + if (tcp_addr_in_list(rcv_saddr,daddr,sk->ip_info)){ score+=2; - } + }else{ + continue; + } if (sk->bound_dev_if) { if (sk->bound_dev_if != dif) continue; diff -urN linux-2.4.21-pre4-ac6-ctx16-rough/net/ipv4/udp.c linux-2.4.21-pre4-ac6-ctx16/net/ipv4/udp.c --- linux-2.4.21-pre4-ac6-ctx16-rough/net/ipv4/udp.c Tue Feb 25 06:30:15 2003 +++ linux-2.4.21-pre4-ac6-ctx16/net/ipv4/udp.c Mon Feb 24 20:00:47 2003 @@ -233,17 +233,17 @@ struct sock *sk, *result = NULL; unsigned short hnum = ntohs(dport); int badness = -1; + int score; for(sk = udp_hash[hnum & (UDP_HTABLE_SIZE - 1)]; sk != NULL; sk = sk->next) { if(sk->num == hnum && !ipv6_only_sock(sk)) { - int score; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) score = sk->family == PF_INET ? 1 : 0; #else score = 1; }else if (sk->ip_info != NULL){ if (udp_in_list (sk->ip_info,daddr)){ - score++; + score+=2; }else{ continue; }