| File: | kern/kern_sched.c |
| Warning: | line 774, column 29 The left operand of '&' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: kern_sched.c,v 1.73 2021/09/09 18:41:39 mpi Exp $ */ | |||
| 2 | /* | |||
| 3 | * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> | |||
| 4 | * | |||
| 5 | * Permission to use, copy, modify, and distribute this software for any | |||
| 6 | * purpose with or without fee is hereby granted, provided that the above | |||
| 7 | * copyright notice and this permission notice appear in all copies. | |||
| 8 | * | |||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
| 16 | */ | |||
| 17 | ||||
| 18 | #include <sys/param.h> | |||
| 19 | ||||
| 20 | #include <sys/sched.h> | |||
| 21 | #include <sys/proc.h> | |||
| 22 | #include <sys/kthread.h> | |||
| 23 | #include <sys/systm.h> | |||
| 24 | #include <sys/resourcevar.h> | |||
| 25 | #include <sys/signalvar.h> | |||
| 26 | #include <sys/mutex.h> | |||
| 27 | #include <sys/task.h> | |||
| 28 | #include <sys/smr.h> | |||
| 29 | #include <sys/tracepoint.h> | |||
| 30 | ||||
| 31 | #include <uvm/uvm_extern.h> | |||
| 32 | ||||
| 33 | void sched_kthreads_create(void *); | |||
| 34 | ||||
| 35 | int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); | |||
| 36 | struct proc *sched_steal_proc(struct cpu_info *); | |||
| 37 | ||||
| 38 | /* | |||
| 39 | * To help choosing which cpu should run which process we keep track | |||
| 40 | * of cpus which are currently idle and which cpus have processes | |||
| 41 | * queued. | |||
| 42 | */ | |||
| 43 | struct cpuset sched_idle_cpus; | |||
| 44 | struct cpuset sched_queued_cpus; | |||
| 45 | struct cpuset sched_all_cpus; | |||
| 46 | ||||
| 47 | /* | |||
| 48 | * Some general scheduler counters. | |||
| 49 | */ | |||
| 50 | uint64_t sched_nmigrations; /* Cpu migration counter */ | |||
| 51 | uint64_t sched_nomigrations; /* Cpu no migration counter */ | |||
| 52 | uint64_t sched_noidle; /* Times we didn't pick the idle task */ | |||
| 53 | uint64_t sched_stolen; /* Times we stole proc from other cpus */ | |||
| 54 | uint64_t sched_choose; /* Times we chose a cpu */ | |||
| 55 | uint64_t sched_wasidle; /* Times we came out of idle */ | |||
| 56 | ||||
| 57 | int sched_smt; | |||
| 58 | ||||
| 59 | /* | |||
| 60 | * A few notes about cpu_switchto that is implemented in MD code. | |||
| 61 | * | |||
| 62 | * cpu_switchto takes two arguments, the old proc and the proc | |||
| 63 | * it should switch to. The new proc will never be NULL, so we always have | |||
| 64 | * a saved state that we need to switch to. The old proc however can | |||
| 65 | * be NULL if the process is exiting. NULL for the old proc simply | |||
| 66 | * means "don't bother saving old state". | |||
| 67 | * | |||
| 68 | * cpu_switchto is supposed to atomically load the new state of the process | |||
| 69 | * including the pcb, pmap and setting curproc, the p_cpu pointer in the | |||
| 70 | * proc and p_stat to SONPROC. Atomically with respect to interrupts, other | |||
| 71 | * cpus in the system must not depend on this state being consistent. | |||
| 72 | * Therefore no locking is necessary in cpu_switchto other than blocking | |||
| 73 | * interrupts during the context switch. | |||
| 74 | */ | |||
| 75 | ||||
| 76 | /* | |||
| 77 | * sched_init_cpu is called from main() for the boot cpu, then it's the | |||
| 78 | * responsibility of the MD code to call it for all other cpus. | |||
| 79 | */ | |||
| 80 | void | |||
| 81 | sched_init_cpu(struct cpu_info *ci) | |||
| 82 | { | |||
| 83 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 84 | int i; | |||
| 85 | ||||
| 86 | for (i = 0; i < SCHED_NQS32; i++) | |||
| 87 | TAILQ_INIT(&spc->spc_qs[i])do { (&spc->spc_qs[i])->tqh_first = ((void *)0); (& spc->spc_qs[i])->tqh_last = &(&spc->spc_qs[i ])->tqh_first; } while (0); | |||
| 88 | ||||
| 89 | spc->spc_idleproc = NULL((void *)0); | |||
| 90 | ||||
| 91 | kthread_create_deferred(sched_kthreads_create, ci); | |||
| 92 | ||||
| 93 | LIST_INIT(&spc->spc_deadproc)do { ((&spc->spc_deadproc)->lh_first) = ((void *)0) ; } while (0); | |||
| 94 | SIMPLEQ_INIT(&spc->spc_deferred)do { (&spc->spc_deferred)->sqh_first = ((void *)0); (&spc->spc_deferred)->sqh_last = &(&spc-> spc_deferred)->sqh_first; } while (0); | |||
| 95 | ||||
| 96 | /* | |||
| 97 | * Slight hack here until the cpuset code handles cpu_info | |||
| 98 | * structures. | |||
| 99 | */ | |||
| 100 | cpuset_init_cpu(ci); | |||
| 101 | ||||
| 102 | #ifdef __HAVE_CPU_TOPOLOGY | |||
| 103 | if (!sched_smt && ci->ci_smt_id > 0) | |||
| 104 | return; | |||
| 105 | #endif | |||
| 106 | cpuset_add(&sched_all_cpus, ci); | |||
| 107 | } | |||
| 108 | ||||
| 109 | void | |||
| 110 | sched_kthreads_create(void *v) | |||
| 111 | { | |||
| 112 | struct cpu_info *ci = v; | |||
| 113 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 114 | static int num; | |||
| 115 | ||||
| 116 | if (fork1(&proc0, FORK_SHAREVM0x00000080|FORK_SHAREFILES0x00000010|FORK_NOZOMBIE0x00000040| | |||
| 117 | FORK_SYSTEM0x00000020|FORK_IDLE0x00000004, sched_idle, ci, NULL((void *)0), | |||
| 118 | &spc->spc_idleproc)) | |||
| 119 | panic("fork idle"); | |||
| 120 | ||||
| 121 | /* Name it as specified. */ | |||
| 122 | snprintf(spc->spc_idleproc->p_p->ps_comm, | |||
| 123 | sizeof(spc->spc_idleproc->p_p->ps_comm), | |||
| 124 | "idle%d", num); | |||
| 125 | ||||
| 126 | num++; | |||
| 127 | } | |||
| 128 | ||||
| 129 | void | |||
| 130 | sched_idle(void *v) | |||
| 131 | { | |||
| 132 | struct schedstate_percpu *spc; | |||
| 133 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
| 134 | struct cpu_info *ci = v; | |||
| 135 | int s; | |||
| 136 | ||||
| 137 | KERNEL_UNLOCK()_kernel_unlock(); | |||
| 138 | ||||
| 139 | spc = &ci->ci_schedstate; | |||
| 140 | ||||
| 141 | /* | |||
| 142 | * First time we enter here, we're not supposed to idle, | |||
| 143 | * just go away for a while. | |||
| 144 | */ | |||
| 145 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 146 | cpuset_add(&sched_idle_cpus, ci); | |||
| 147 | p->p_stat = SSLEEP3; | |||
| 148 | p->p_cpu = ci; | |||
| 149 | atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_CPUPEG0x40000000); | |||
| 150 | mi_switch(); | |||
| 151 | cpuset_del(&sched_idle_cpus, ci); | |||
| 152 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 153 | ||||
| 154 | KASSERT(ci == curcpu())((ci == ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self ))); __ci;})) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 154, "ci == curcpu()")); | |||
| 155 | KASSERT(curproc == spc->spc_idleproc)((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == spc->spc_idleproc) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/kern/kern_sched.c", 155, "curproc == spc->spc_idleproc" )); | |||
| 156 | ||||
| 157 | while (1) { | |||
| 158 | while (!cpu_is_idle(curcpu())((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_schedstate.spc_whichqs == 0)) { | |||
| 159 | struct proc *dead; | |||
| 160 | ||||
| 161 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 162 | p->p_stat = SSLEEP3; | |||
| 163 | mi_switch(); | |||
| 164 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 165 | ||||
| 166 | while ((dead = LIST_FIRST(&spc->spc_deadproc)((&spc->spc_deadproc)->lh_first))) { | |||
| 167 | LIST_REMOVE(dead, p_hash)do { if ((dead)->p_hash.le_next != ((void *)0)) (dead)-> p_hash.le_next->p_hash.le_prev = (dead)->p_hash.le_prev ; *(dead)->p_hash.le_prev = (dead)->p_hash.le_next; ((dead )->p_hash.le_prev) = ((void *)-1); ((dead)->p_hash.le_next ) = ((void *)-1); } while (0); | |||
| 168 | exit2(dead); | |||
| 169 | } | |||
| 170 | } | |||
| 171 | ||||
| 172 | splassert(IPL_NONE)do { if (splassert_ctl > 0) { splassert_check(0x0, __func__ ); } } while (0); | |||
| 173 | ||||
| 174 | smr_idle(); | |||
| 175 | ||||
| 176 | cpuset_add(&sched_idle_cpus, ci); | |||
| 177 | cpu_idle_enter(); | |||
| 178 | while (spc->spc_whichqs == 0) { | |||
| 179 | #ifdef MULTIPROCESSOR1 | |||
| 180 | if (spc->spc_schedflags & SPCF_SHOULDHALT0x0004 && | |||
| 181 | (spc->spc_schedflags & SPCF_HALTED0x0008) == 0) { | |||
| 182 | cpuset_del(&sched_idle_cpus, ci); | |||
| 183 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 184 | atomic_setbits_intx86_atomic_setbits_u32(&spc->spc_schedflags, | |||
| 185 | spc->spc_whichqs ? 0 : SPCF_HALTED0x0008); | |||
| 186 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 187 | wakeup(spc); | |||
| 188 | } | |||
| 189 | #endif | |||
| 190 | cpu_idle_cycle(); | |||
| 191 | } | |||
| 192 | cpu_idle_leave(); | |||
| 193 | cpuset_del(&sched_idle_cpus, ci); | |||
| 194 | } | |||
| 195 | } | |||
| 196 | ||||
| 197 | /* | |||
| 198 | * To free our address space we have to jump through a few hoops. | |||
| 199 | * The freeing is done by the reaper, but until we have one reaper | |||
| 200 | * per cpu, we have no way of putting this proc on the deadproc list | |||
| 201 | * and waking up the reaper without risking having our address space and | |||
| 202 | * stack torn from under us before we manage to switch to another proc. | |||
| 203 | * Therefore we have a per-cpu list of dead processes where we put this | |||
| 204 | * proc and have idle clean up that list and move it to the reaper list. | |||
| 205 | * All this will be unnecessary once we can bind the reaper this cpu | |||
| 206 | * and not risk having it switch to another in case it sleeps. | |||
| 207 | */ | |||
| 208 | void | |||
| 209 | sched_exit(struct proc *p) | |||
| 210 | { | |||
| 211 | struct schedstate_percpu *spc = &curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate; | |||
| 212 | struct timespec ts; | |||
| 213 | struct proc *idle; | |||
| 214 | int s; | |||
| 215 | ||||
| 216 | nanouptime(&ts); | |||
| 217 | timespecsub(&ts, &spc->spc_runtime, &ts)do { (&ts)->tv_sec = (&ts)->tv_sec - (&spc-> spc_runtime)->tv_sec; (&ts)->tv_nsec = (&ts)-> tv_nsec - (&spc->spc_runtime)->tv_nsec; if ((&ts )->tv_nsec < 0) { (&ts)->tv_sec--; (&ts)-> tv_nsec += 1000000000L; } } while (0); | |||
| 218 | timespecadd(&p->p_rtime, &ts, &p->p_rtime)do { (&p->p_rtime)->tv_sec = (&p->p_rtime)-> tv_sec + (&ts)->tv_sec; (&p->p_rtime)->tv_nsec = (&p->p_rtime)->tv_nsec + (&ts)->tv_nsec; if ((&p->p_rtime)->tv_nsec >= 1000000000L) { (& p->p_rtime)->tv_sec++; (&p->p_rtime)->tv_nsec -= 1000000000L; } } while (0); | |||
| 219 | ||||
| 220 | LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash)do { if (((p)->p_hash.le_next = (&spc->spc_deadproc )->lh_first) != ((void *)0)) (&spc->spc_deadproc)-> lh_first->p_hash.le_prev = &(p)->p_hash.le_next; (& spc->spc_deadproc)->lh_first = (p); (p)->p_hash.le_prev = &(&spc->spc_deadproc)->lh_first; } while (0); | |||
| 221 | ||||
| 222 | #ifdef MULTIPROCESSOR1 | |||
| 223 | /* This process no longer needs to hold the kernel lock. */ | |||
| 224 | KERNEL_ASSERT_LOCKED()((_kernel_lock_held()) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 224, "_kernel_lock_held()")); | |||
| 225 | __mp_release_all(&kernel_lock); | |||
| 226 | #endif | |||
| 227 | ||||
| 228 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 229 | idle = spc->spc_idleproc; | |||
| 230 | idle->p_stat = SRUN2; | |||
| 231 | cpu_switchto(NULL((void *)0), idle); | |||
| 232 | panic("cpu_switchto returned"); | |||
| 233 | } | |||
| 234 | ||||
| 235 | /* | |||
| 236 | * Run queue management. | |||
| 237 | */ | |||
| 238 | void | |||
| 239 | sched_init_runqueues(void) | |||
| 240 | { | |||
| 241 | } | |||
| 242 | ||||
| 243 | void | |||
| 244 | setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) | |||
| 245 | { | |||
| 246 | struct schedstate_percpu *spc; | |||
| 247 | int queue = prio >> 2; | |||
| 248 | ||||
| 249 | if (ci == NULL((void *)0)) | |||
| 250 | ci = sched_choosecpu(p); | |||
| 251 | ||||
| 252 | KASSERT(ci != NULL)((ci != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 252, "ci != NULL")); | |||
| 253 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/kern_sched.c", 253, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); | |||
| 254 | ||||
| 255 | p->p_cpu = ci; | |||
| 256 | p->p_stat = SRUN2; | |||
| 257 | p->p_runpri = prio; | |||
| 258 | ||||
| 259 | spc = &p->p_cpu->ci_schedstate; | |||
| 260 | spc->spc_nrun++; | |||
| 261 | TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_enqueue); struct dt_probe *dtp = &(dt_static_sched_enqueue); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0) | |||
| 262 | p->p_p->ps_pid)do { extern struct dt_probe (dt_static_sched_enqueue); struct dt_probe *dtp = &(dt_static_sched_enqueue); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0); | |||
| 263 | ||||
| 264 | TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq)do { (p)->p_runq.tqe_next = ((void *)0); (p)->p_runq.tqe_prev = (&spc->spc_qs[queue])->tqh_last; *(&spc-> spc_qs[queue])->tqh_last = (p); (&spc->spc_qs[queue ])->tqh_last = &(p)->p_runq.tqe_next; } while (0); | |||
| 265 | spc->spc_whichqs |= (1 << queue); | |||
| 266 | cpuset_add(&sched_queued_cpus, p->p_cpu); | |||
| 267 | ||||
| 268 | if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) | |||
| 269 | cpu_unidle(p->p_cpu); | |||
| 270 | ||||
| 271 | if (prio < spc->spc_curpriority) | |||
| 272 | need_resched(ci); | |||
| 273 | } | |||
| 274 | ||||
| 275 | void | |||
| 276 | remrunqueue(struct proc *p) | |||
| 277 | { | |||
| 278 | struct schedstate_percpu *spc; | |||
| 279 | int queue = p->p_runpri >> 2; | |||
| 280 | ||||
| 281 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/kern_sched.c", 281, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); | |||
| 282 | spc = &p->p_cpu->ci_schedstate; | |||
| 283 | spc->spc_nrun--; | |||
| 284 | TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_dequeue); struct dt_probe *dtp = &(dt_static_sched_dequeue); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0) | |||
| 285 | p->p_p->ps_pid)do { extern struct dt_probe (dt_static_sched_dequeue); struct dt_probe *dtp = &(dt_static_sched_dequeue); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0); | |||
| 286 | ||||
| 287 | TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq)do { if (((p)->p_runq.tqe_next) != ((void *)0)) (p)->p_runq .tqe_next->p_runq.tqe_prev = (p)->p_runq.tqe_prev; else (&spc->spc_qs[queue])->tqh_last = (p)->p_runq.tqe_prev ; *(p)->p_runq.tqe_prev = (p)->p_runq.tqe_next; ((p)-> p_runq.tqe_prev) = ((void *)-1); ((p)->p_runq.tqe_next) = ( (void *)-1); } while (0); | |||
| 288 | if (TAILQ_EMPTY(&spc->spc_qs[queue])(((&spc->spc_qs[queue])->tqh_first) == ((void *)0))) { | |||
| 289 | spc->spc_whichqs &= ~(1 << queue); | |||
| 290 | if (spc->spc_whichqs == 0) | |||
| 291 | cpuset_del(&sched_queued_cpus, p->p_cpu); | |||
| 292 | } | |||
| 293 | } | |||
| 294 | ||||
| 295 | struct proc * | |||
| 296 | sched_chooseproc(void) | |||
| 297 | { | |||
| 298 | struct schedstate_percpu *spc = &curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate; | |||
| 299 | struct proc *p; | |||
| 300 | int queue; | |||
| 301 | ||||
| 302 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/kern_sched.c", 302, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); | |||
| 303 | ||||
| 304 | #ifdef MULTIPROCESSOR1 | |||
| 305 | if (spc->spc_schedflags & SPCF_SHOULDHALT0x0004) { | |||
| 306 | if (spc->spc_whichqs) { | |||
| 307 | for (queue = 0; queue < SCHED_NQS32; queue++) { | |||
| 308 | while ((p = TAILQ_FIRST(&spc->spc_qs[queue])((&spc->spc_qs[queue])->tqh_first))) { | |||
| 309 | remrunqueue(p); | |||
| 310 | setrunqueue(NULL((void *)0), p, p->p_runpri); | |||
| 311 | if (p->p_cpu == curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) { | |||
| 312 | KASSERT(p->p_flag & P_CPUPEG)((p->p_flag & 0x40000000) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 312, "p->p_flag & P_CPUPEG" )); | |||
| 313 | goto again; | |||
| 314 | } | |||
| 315 | } | |||
| 316 | } | |||
| 317 | } | |||
| 318 | p = spc->spc_idleproc; | |||
| 319 | KASSERT(p)((p) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 319, "p")); | |||
| 320 | KASSERT(p->p_wchan == NULL)((p->p_wchan == ((void *)0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 320, "p->p_wchan == NULL" )); | |||
| 321 | p->p_stat = SRUN2; | |||
| 322 | return (p); | |||
| 323 | } | |||
| 324 | #endif | |||
| 325 | ||||
| 326 | again: | |||
| 327 | if (spc->spc_whichqs) { | |||
| 328 | queue = ffs(spc->spc_whichqs) - 1; | |||
| 329 | p = TAILQ_FIRST(&spc->spc_qs[queue])((&spc->spc_qs[queue])->tqh_first); | |||
| 330 | remrunqueue(p); | |||
| 331 | sched_noidle++; | |||
| 332 | if (p->p_stat != SRUN2) | |||
| 333 | panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat); | |||
| 334 | } else if ((p = sched_steal_proc(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))) == NULL((void *)0)) { | |||
| 335 | p = spc->spc_idleproc; | |||
| 336 | if (p == NULL((void *)0)) { | |||
| 337 | int s; | |||
| 338 | /* | |||
| 339 | * We get here if someone decides to switch during | |||
| 340 | * boot before forking kthreads, bleh. | |||
| 341 | * This is kind of like a stupid idle loop. | |||
| 342 | */ | |||
| 343 | #ifdef MULTIPROCESSOR1 | |||
| 344 | __mp_unlock(&sched_lock); | |||
| 345 | #endif | |||
| 346 | spl0()spllower(0x0); | |||
| 347 | delay(10)(*delay_func)(10); | |||
| 348 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 349 | goto again; | |||
| 350 | } | |||
| 351 | KASSERT(p)((p) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 351, "p")); | |||
| 352 | p->p_stat = SRUN2; | |||
| 353 | } | |||
| 354 | ||||
| 355 | KASSERT(p->p_wchan == NULL)((p->p_wchan == ((void *)0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 355, "p->p_wchan == NULL" )); | |||
| 356 | return (p); | |||
| 357 | } | |||
| 358 | ||||
| 359 | struct cpu_info * | |||
| 360 | sched_choosecpu_fork(struct proc *parent, int flags) | |||
| 361 | { | |||
| 362 | #ifdef MULTIPROCESSOR1 | |||
| 363 | struct cpu_info *choice = NULL((void *)0); | |||
| 364 | fixpt_t load, best_load = ~0; | |||
| 365 | int run, best_run = INT_MAX0x7fffffff; | |||
| 366 | struct cpu_info *ci; | |||
| 367 | struct cpuset set; | |||
| 368 | ||||
| 369 | #if 0 | |||
| 370 | /* | |||
| 371 | * XXX | |||
| 372 | * Don't do this until we have a painless way to move the cpu in exec. | |||
| 373 | * Preferably when nuking the old pmap and getting a new one on a | |||
| 374 | * new cpu. | |||
| 375 | */ | |||
| 376 | /* | |||
| 377 | * PPWAIT forks are simple. We know that the parent will not | |||
| 378 | * run until we exec and choose another cpu, so we just steal its | |||
| 379 | * cpu. | |||
| 380 | */ | |||
| 381 | if (flags & FORK_PPWAIT0x00000008) | |||
| 382 | return (parent->p_cpu); | |||
| 383 | #endif | |||
| 384 | ||||
| 385 | /* | |||
| 386 | * Look at all cpus that are currently idle and have nothing queued. | |||
| 387 | * If there are none, pick the one with least queued procs first, | |||
| 388 | * then the one with lowest load average. | |||
| 389 | */ | |||
| 390 | cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); | |||
| 391 | cpuset_intersection(&set, &set, &sched_all_cpus); | |||
| 392 | if (cpuset_first(&set) == NULL((void *)0)) | |||
| 393 | cpuset_copy(&set, &sched_all_cpus); | |||
| 394 | ||||
| 395 | while ((ci = cpuset_first(&set)) != NULL((void *)0)) { | |||
| 396 | cpuset_del(&set, ci); | |||
| 397 | ||||
| 398 | load = ci->ci_schedstate.spc_ldavg; | |||
| 399 | run = ci->ci_schedstate.spc_nrun; | |||
| 400 | ||||
| 401 | if (choice == NULL((void *)0) || run < best_run || | |||
| 402 | (run == best_run &&load < best_load)) { | |||
| 403 | choice = ci; | |||
| 404 | best_load = load; | |||
| 405 | best_run = run; | |||
| 406 | } | |||
| 407 | } | |||
| 408 | ||||
| 409 | return (choice); | |||
| 410 | #else | |||
| 411 | return (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
| 412 | #endif | |||
| 413 | } | |||
| 414 | ||||
| 415 | struct cpu_info * | |||
| 416 | sched_choosecpu(struct proc *p) | |||
| 417 | { | |||
| 418 | #ifdef MULTIPROCESSOR1 | |||
| 419 | struct cpu_info *choice = NULL((void *)0); | |||
| 420 | int last_cost = INT_MAX0x7fffffff; | |||
| 421 | struct cpu_info *ci; | |||
| 422 | struct cpuset set; | |||
| 423 | ||||
| 424 | /* | |||
| 425 | * If pegged to a cpu, don't allow it to move. | |||
| 426 | */ | |||
| 427 | if (p->p_flag & P_CPUPEG0x40000000) | |||
| 428 | return (p->p_cpu); | |||
| 429 | ||||
| 430 | sched_choose++; | |||
| 431 | ||||
| 432 | /* | |||
| 433 | * Look at all cpus that are currently idle and have nothing queued. | |||
| 434 | * If there are none, pick the cheapest of those. | |||
| 435 | * (idle + queued could mean that the cpu is handling an interrupt | |||
| 436 | * at this moment and haven't had time to leave idle yet). | |||
| 437 | */ | |||
| 438 | cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); | |||
| 439 | cpuset_intersection(&set, &set, &sched_all_cpus); | |||
| 440 | ||||
| 441 | /* | |||
| 442 | * First, just check if our current cpu is in that set, if it is, | |||
| 443 | * this is simple. | |||
| 444 | * Also, our cpu might not be idle, but if it's the current cpu | |||
| 445 | * and it has nothing else queued and we're curproc, take it. | |||
| 446 | */ | |||
| 447 | if (cpuset_isset(&set, p->p_cpu) || | |||
| 448 | (p->p_cpu == curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}) && p->p_cpu->ci_schedstate.spc_nrun == 0 && | |||
| 449 | (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT0x0004) == 0 && | |||
| 450 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == p)) { | |||
| 451 | sched_wasidle++; | |||
| 452 | return (p->p_cpu); | |||
| 453 | } | |||
| 454 | ||||
| 455 | if (cpuset_first(&set) == NULL((void *)0)) | |||
| 456 | cpuset_copy(&set, &sched_all_cpus); | |||
| 457 | ||||
| 458 | while ((ci = cpuset_first(&set)) != NULL((void *)0)) { | |||
| 459 | int cost = sched_proc_to_cpu_cost(ci, p); | |||
| 460 | ||||
| 461 | if (choice == NULL((void *)0) || cost < last_cost) { | |||
| 462 | choice = ci; | |||
| 463 | last_cost = cost; | |||
| 464 | } | |||
| 465 | cpuset_del(&set, ci); | |||
| 466 | } | |||
| 467 | ||||
| 468 | if (p->p_cpu != choice) | |||
| 469 | sched_nmigrations++; | |||
| 470 | else | |||
| 471 | sched_nomigrations++; | |||
| 472 | ||||
| 473 | return (choice); | |||
| 474 | #else | |||
| 475 | return (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
| 476 | #endif | |||
| 477 | } | |||
| 478 | ||||
| 479 | /* | |||
| 480 | * Attempt to steal a proc from some cpu. | |||
| 481 | */ | |||
| 482 | struct proc * | |||
| 483 | sched_steal_proc(struct cpu_info *self) | |||
| 484 | { | |||
| 485 | struct proc *best = NULL((void *)0); | |||
| 486 | #ifdef MULTIPROCESSOR1 | |||
| 487 | struct schedstate_percpu *spc; | |||
| 488 | int bestcost = INT_MAX0x7fffffff; | |||
| 489 | struct cpu_info *ci; | |||
| 490 | struct cpuset set; | |||
| 491 | ||||
| 492 | KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0)(((self->ci_schedstate.spc_schedflags & 0x0004) == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 492, "(self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0" )); | |||
| 493 | ||||
| 494 | /* Don't steal if we don't want to schedule processes in this CPU. */ | |||
| 495 | if (!cpuset_isset(&sched_all_cpus, self)) | |||
| 496 | return (NULL((void *)0)); | |||
| 497 | ||||
| 498 | cpuset_copy(&set, &sched_queued_cpus); | |||
| 499 | ||||
| 500 | while ((ci = cpuset_first(&set)) != NULL((void *)0)) { | |||
| 501 | struct proc *p; | |||
| 502 | int queue; | |||
| 503 | int cost; | |||
| 504 | ||||
| 505 | cpuset_del(&set, ci); | |||
| 506 | ||||
| 507 | spc = &ci->ci_schedstate; | |||
| 508 | ||||
| 509 | queue = ffs(spc->spc_whichqs) - 1; | |||
| 510 | TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq)for((p) = ((&spc->spc_qs[queue])->tqh_first); (p) != ((void *)0); (p) = ((p)->p_runq.tqe_next)) { | |||
| 511 | if (p->p_flag & P_CPUPEG0x40000000) | |||
| 512 | continue; | |||
| 513 | ||||
| 514 | cost = sched_proc_to_cpu_cost(self, p); | |||
| 515 | ||||
| 516 | if (best == NULL((void *)0) || cost < bestcost) { | |||
| 517 | best = p; | |||
| 518 | bestcost = cost; | |||
| 519 | } | |||
| 520 | } | |||
| 521 | } | |||
| 522 | if (best == NULL((void *)0)) | |||
| 523 | return (NULL((void *)0)); | |||
| 524 | ||||
| 525 | remrunqueue(best); | |||
| 526 | best->p_cpu = self; | |||
| 527 | ||||
| 528 | sched_stolen++; | |||
| 529 | #endif | |||
| 530 | return (best); | |||
| 531 | } | |||
| 532 | ||||
| 533 | #ifdef MULTIPROCESSOR1 | |||
| 534 | /* | |||
| 535 | * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). | |||
| 536 | */ | |||
| 537 | static int | |||
| 538 | log2(unsigned int i) | |||
| 539 | { | |||
| 540 | int ret = 0; | |||
| 541 | ||||
| 542 | while (i >>= 1) | |||
| 543 | ret++; | |||
| 544 | ||||
| 545 | return (ret); | |||
| 546 | } | |||
| 547 | ||||
| 548 | /* | |||
| 549 | * Calculate the cost of moving the proc to this cpu. | |||
| 550 | * | |||
| 551 | * What we want is some guesstimate of how much "performance" it will | |||
| 552 | * cost us to move the proc here. Not just for caches and TLBs and NUMA | |||
| 553 | * memory, but also for the proc itself. A highly loaded cpu might not | |||
| 554 | * be the best candidate for this proc since it won't get run. | |||
| 555 | * | |||
| 556 | * Just total guesstimates for now. | |||
| 557 | */ | |||
| 558 | ||||
| 559 | int sched_cost_load = 1; | |||
| 560 | int sched_cost_priority = 1; | |||
| 561 | int sched_cost_runnable = 3; | |||
| 562 | int sched_cost_resident = 1; | |||
| 563 | #endif | |||
| 564 | ||||
| 565 | int | |||
| 566 | sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) | |||
| 567 | { | |||
| 568 | int cost = 0; | |||
| 569 | #ifdef MULTIPROCESSOR1 | |||
| 570 | struct schedstate_percpu *spc; | |||
| 571 | int l2resident = 0; | |||
| 572 | ||||
| 573 | spc = &ci->ci_schedstate; | |||
| 574 | ||||
| 575 | /* | |||
| 576 | * First, account for the priority of the proc we want to move. | |||
| 577 | * More willing to move, the lower the priority of the destination | |||
| 578 | * and the higher the priority of the proc. | |||
| 579 | */ | |||
| 580 | if (!cpuset_isset(&sched_idle_cpus, ci)) { | |||
| 581 | cost += (p->p_usrpri - spc->spc_curpriority) * | |||
| 582 | sched_cost_priority; | |||
| 583 | cost += sched_cost_runnable; | |||
| 584 | } | |||
| 585 | if (cpuset_isset(&sched_queued_cpus, ci)) | |||
| 586 | cost += spc->spc_nrun * sched_cost_runnable; | |||
| 587 | ||||
| 588 | /* | |||
| 589 | * Try to avoid the primary cpu as it handles hardware interrupts. | |||
| 590 | * | |||
| 591 | * XXX Needs to be revisited when we distribute interrupts | |||
| 592 | * over cpus. | |||
| 593 | */ | |||
| 594 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 595 | cost += sched_cost_runnable; | |||
| 596 | ||||
| 597 | /* | |||
| 598 | * Higher load on the destination means we don't want to go there. | |||
| 599 | */ | |||
| 600 | cost += ((sched_cost_load * spc->spc_ldavg) >> FSHIFT11); | |||
| 601 | ||||
| 602 | /* | |||
| 603 | * If the proc is on this cpu already, lower the cost by how much | |||
| 604 | * it has been running and an estimate of its footprint. | |||
| 605 | */ | |||
| 606 | if (p->p_cpu == ci && p->p_slptime == 0) { | |||
| 607 | l2resident = | |||
| 608 | log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)((p->p_vmspace->vm_map.pmap)->pm_stats.resident_count )); | |||
| 609 | cost -= l2resident * sched_cost_resident; | |||
| 610 | } | |||
| 611 | #endif | |||
| 612 | return (cost); | |||
| 613 | } | |||
| 614 | ||||
| 615 | /* | |||
| 616 | * Peg a proc to a cpu. | |||
| 617 | */ | |||
| 618 | void | |||
| 619 | sched_peg_curproc(struct cpu_info *ci) | |||
| 620 | { | |||
| 621 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
| 622 | int s; | |||
| 623 | ||||
| 624 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 625 | atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_CPUPEG0x40000000); | |||
| 626 | setrunqueue(ci, p, p->p_usrpri); | |||
| 627 | p->p_ru.ru_nvcsw++; | |||
| 628 | mi_switch(); | |||
| 629 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 630 | } | |||
| 631 | ||||
| 632 | #ifdef MULTIPROCESSOR1 | |||
| 633 | ||||
| 634 | void | |||
| 635 | sched_start_secondary_cpus(void) | |||
| 636 | { | |||
| 637 | CPU_INFO_ITERATORint cii; | |||
| 638 | struct cpu_info *ci; | |||
| 639 | ||||
| 640 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 641 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 642 | ||||
| 643 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 644 | continue; | |||
| 645 | atomic_clearbits_intx86_atomic_clearbits_u32(&spc->spc_schedflags, | |||
| 646 | SPCF_SHOULDHALT0x0004 | SPCF_HALTED0x0008); | |||
| 647 | #ifdef __HAVE_CPU_TOPOLOGY | |||
| 648 | if (!sched_smt && ci->ci_smt_id > 0) | |||
| 649 | continue; | |||
| 650 | #endif | |||
| 651 | cpuset_add(&sched_all_cpus, ci); | |||
| 652 | } | |||
| 653 | } | |||
| 654 | ||||
| 655 | void | |||
| 656 | sched_stop_secondary_cpus(void) | |||
| 657 | { | |||
| 658 | CPU_INFO_ITERATORint cii; | |||
| 659 | struct cpu_info *ci; | |||
| 660 | ||||
| 661 | /* | |||
| 662 | * Make sure we stop the secondary CPUs. | |||
| 663 | */ | |||
| 664 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 665 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 666 | ||||
| 667 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 668 | continue; | |||
| 669 | cpuset_del(&sched_all_cpus, ci); | |||
| 670 | atomic_setbits_intx86_atomic_setbits_u32(&spc->spc_schedflags, SPCF_SHOULDHALT0x0004); | |||
| 671 | } | |||
| 672 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 673 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 674 | struct sleep_state sls; | |||
| 675 | ||||
| 676 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 677 | continue; | |||
| 678 | while ((spc->spc_schedflags & SPCF_HALTED0x0008) == 0) { | |||
| 679 | sleep_setup(&sls, spc, PZERO22, "schedstate", 0); | |||
| 680 | sleep_finish(&sls, | |||
| 681 | (spc->spc_schedflags & SPCF_HALTED0x0008) == 0); | |||
| 682 | } | |||
| 683 | } | |||
| 684 | } | |||
| 685 | ||||
| 686 | struct sched_barrier_state { | |||
| 687 | struct cpu_info *ci; | |||
| 688 | struct cond cond; | |||
| 689 | }; | |||
| 690 | ||||
| 691 | void | |||
| 692 | sched_barrier_task(void *arg) | |||
| 693 | { | |||
| 694 | struct sched_barrier_state *sb = arg; | |||
| 695 | struct cpu_info *ci = sb->ci; | |||
| 696 | ||||
| 697 | sched_peg_curproc(ci); | |||
| ||||
| 698 | cond_signal(&sb->cond); | |||
| 699 | atomic_clearbits_intx86_atomic_clearbits_u32(&curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_flag, P_CPUPEG0x40000000); | |||
| 700 | } | |||
| 701 | ||||
| 702 | void | |||
| 703 | sched_barrier(struct cpu_info *ci) | |||
| 704 | { | |||
| 705 | struct sched_barrier_state sb; | |||
| 706 | struct task task; | |||
| 707 | CPU_INFO_ITERATORint cii; | |||
| 708 | ||||
| 709 | if (ci == NULL((void *)0)) { | |||
| 710 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 711 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 712 | break; | |||
| 713 | } | |||
| 714 | } | |||
| 715 | KASSERT(ci != NULL)((ci != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 715, "ci != NULL")); | |||
| 716 | ||||
| 717 | if (ci == curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) | |||
| 718 | return; | |||
| 719 | ||||
| 720 | sb.ci = ci; | |||
| 721 | cond_init(&sb.cond); | |||
| 722 | task_set(&task, sched_barrier_task, &sb); | |||
| 723 | ||||
| 724 | task_add(systqmp, &task); | |||
| 725 | cond_wait(&sb.cond, "sbar"); | |||
| 726 | } | |||
| 727 | ||||
| 728 | #else | |||
| 729 | ||||
| 730 | void | |||
| 731 | sched_barrier(struct cpu_info *ci) | |||
| 732 | { | |||
| 733 | } | |||
| 734 | ||||
| 735 | #endif | |||
| 736 | ||||
| 737 | /* | |||
| 738 | * Functions to manipulate cpu sets. | |||
| 739 | */ | |||
| 740 | struct cpu_info *cpuset_infos[MAXCPUS64]; | |||
| 741 | static struct cpuset cpuset_all; | |||
| 742 | ||||
| 743 | void | |||
| 744 | cpuset_init_cpu(struct cpu_info *ci) | |||
| 745 | { | |||
| 746 | cpuset_add(&cpuset_all, ci); | |||
| 747 | cpuset_infos[CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0)] = ci; | |||
| 748 | } | |||
| 749 | ||||
| 750 | void | |||
| 751 | cpuset_clear(struct cpuset *cs) | |||
| 752 | { | |||
| 753 | memset(cs, 0, sizeof(*cs))__builtin_memset((cs), (0), (sizeof(*cs))); | |||
| 754 | } | |||
| 755 | ||||
| 756 | void | |||
| 757 | cpuset_add(struct cpuset *cs, struct cpu_info *ci) | |||
| 758 | { | |||
| 759 | unsigned int num = CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0); | |||
| 760 | atomic_setbits_intx86_atomic_setbits_u32(&cs->cs_set[num/32], (1 << (num % 32))); | |||
| 761 | } | |||
| 762 | ||||
| 763 | void | |||
| 764 | cpuset_del(struct cpuset *cs, struct cpu_info *ci) | |||
| 765 | { | |||
| 766 | unsigned int num = CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0); | |||
| 767 | atomic_clearbits_intx86_atomic_clearbits_u32(&cs->cs_set[num/32], (1 << (num % 32))); | |||
| 768 | } | |||
| 769 | ||||
| 770 | int | |||
| 771 | cpuset_isset(struct cpuset *cs, struct cpu_info *ci) | |||
| 772 | { | |||
| 773 | unsigned int num = CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0); | |||
| 774 | return (cs->cs_set[num/32] & (1 << (num % 32))); | |||
| ||||
| 775 | } | |||
| 776 | ||||
| 777 | void | |||
| 778 | cpuset_add_all(struct cpuset *cs) | |||
| 779 | { | |||
| 780 | cpuset_copy(cs, &cpuset_all); | |||
| 781 | } | |||
| 782 | ||||
| 783 | void | |||
| 784 | cpuset_copy(struct cpuset *to, struct cpuset *from) | |||
| 785 | { | |||
| 786 | memcpy(to, from, sizeof(*to))__builtin_memcpy((to), (from), (sizeof(*to))); | |||
| 787 | } | |||
| 788 | ||||
| 789 | struct cpu_info * | |||
| 790 | cpuset_first(struct cpuset *cs) | |||
| 791 | { | |||
| 792 | int i; | |||
| 793 | ||||
| 794 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 795 | if (cs->cs_set[i]) | |||
| 796 | return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]); | |||
| 797 | ||||
| 798 | return (NULL((void *)0)); | |||
| 799 | } | |||
| 800 | ||||
| 801 | void | |||
| 802 | cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b) | |||
| 803 | { | |||
| 804 | int i; | |||
| 805 | ||||
| 806 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 807 | to->cs_set[i] = a->cs_set[i] | b->cs_set[i]; | |||
| 808 | } | |||
| 809 | ||||
| 810 | void | |||
| 811 | cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b) | |||
| 812 | { | |||
| 813 | int i; | |||
| 814 | ||||
| 815 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 816 | to->cs_set[i] = a->cs_set[i] & b->cs_set[i]; | |||
| 817 | } | |||
| 818 | ||||
| 819 | void | |||
| 820 | cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b) | |||
| 821 | { | |||
| 822 | int i; | |||
| 823 | ||||
| 824 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 825 | to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i]; | |||
| 826 | } | |||
| 827 | ||||
| 828 | int | |||
| 829 | cpuset_cardinality(struct cpuset *cs) | |||
| 830 | { | |||
| 831 | int cardinality, i, n; | |||
| 832 | ||||
| 833 | cardinality = 0; | |||
| 834 | ||||
| 835 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 836 | for (n = cs->cs_set[i]; n != 0; n &= n - 1) | |||
| 837 | cardinality++; | |||
| 838 | ||||
| 839 | return (cardinality); | |||
| 840 | } | |||
| 841 | ||||
| 842 | int | |||
| 843 | sysctl_hwncpuonline(void) | |||
| 844 | { | |||
| 845 | return cpuset_cardinality(&sched_all_cpus); | |||
| 846 | } | |||
| 847 | ||||
| 848 | int | |||
| 849 | cpu_is_online(struct cpu_info *ci) | |||
| 850 | { | |||
| 851 | return cpuset_isset(&sched_all_cpus, ci); | |||
| 852 | } | |||
| 853 | ||||
| 854 | #ifdef __HAVE_CPU_TOPOLOGY | |||
| 855 | ||||
| 856 | #include <sys/sysctl.h> | |||
| 857 | ||||
| 858 | int | |||
| 859 | sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) | |||
| 860 | { | |||
| 861 | CPU_INFO_ITERATORint cii; | |||
| 862 | struct cpu_info *ci; | |||
| 863 | int err, newsmt; | |||
| 864 | ||||
| 865 | newsmt = sched_smt; | |||
| 866 | err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); | |||
| 867 | if (err) | |||
| 868 | return err; | |||
| 869 | if (newsmt == sched_smt) | |||
| 870 | return 0; | |||
| 871 | ||||
| 872 | sched_smt = newsmt; | |||
| 873 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 874 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 875 | continue; | |||
| 876 | if (ci->ci_smt_id == 0) | |||
| 877 | continue; | |||
| 878 | if (sched_smt) | |||
| 879 | cpuset_add(&sched_all_cpus, ci); | |||
| 880 | else | |||
| 881 | cpuset_del(&sched_all_cpus, ci); | |||
| 882 | } | |||
| 883 | ||||
| 884 | return 0; | |||
| 885 | } | |||
| 886 | ||||
| 887 | #endif |