| File: | kern/kern_sched.c |
| Warning: | line 790, column 29 The left operand of '&' is a garbage value |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: kern_sched.c,v 1.93 2023/10/24 13:20:11 claudio Exp $ */ | |||
| 2 | /* | |||
| 3 | * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> | |||
| 4 | * | |||
| 5 | * Permission to use, copy, modify, and distribute this software for any | |||
| 6 | * purpose with or without fee is hereby granted, provided that the above | |||
| 7 | * copyright notice and this permission notice appear in all copies. | |||
| 8 | * | |||
| 9 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |||
| 10 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |||
| 11 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |||
| 12 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |||
| 13 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |||
| 14 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |||
| 15 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |||
| 16 | */ | |||
| 17 | ||||
| 18 | #include <sys/param.h> | |||
| 19 | ||||
| 20 | #include <sys/sched.h> | |||
| 21 | #include <sys/proc.h> | |||
| 22 | #include <sys/kthread.h> | |||
| 23 | #include <sys/systm.h> | |||
| 24 | #include <sys/clockintr.h> | |||
| 25 | #include <sys/resourcevar.h> | |||
| 26 | #include <sys/task.h> | |||
| 27 | #include <sys/time.h> | |||
| 28 | #include <sys/smr.h> | |||
| 29 | #include <sys/tracepoint.h> | |||
| 30 | ||||
| 31 | #include <uvm/uvm_extern.h> | |||
| 32 | ||||
| 33 | void sched_kthreads_create(void *); | |||
| 34 | ||||
| 35 | int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); | |||
| 36 | struct proc *sched_steal_proc(struct cpu_info *); | |||
| 37 | ||||
| 38 | /* | |||
| 39 | * To help choosing which cpu should run which process we keep track | |||
| 40 | * of cpus which are currently idle and which cpus have processes | |||
| 41 | * queued. | |||
| 42 | */ | |||
| 43 | struct cpuset sched_idle_cpus; | |||
| 44 | struct cpuset sched_queued_cpus; | |||
| 45 | struct cpuset sched_all_cpus; | |||
| 46 | ||||
| 47 | /* | |||
| 48 | * Some general scheduler counters. | |||
| 49 | */ | |||
| 50 | uint64_t sched_nmigrations; /* Cpu migration counter */ | |||
| 51 | uint64_t sched_nomigrations; /* Cpu no migration counter */ | |||
| 52 | uint64_t sched_noidle; /* Times we didn't pick the idle task */ | |||
| 53 | uint64_t sched_stolen; /* Times we stole proc from other cpus */ | |||
| 54 | uint64_t sched_choose; /* Times we chose a cpu */ | |||
| 55 | uint64_t sched_wasidle; /* Times we came out of idle */ | |||
| 56 | ||||
| 57 | int sched_smt; | |||
| 58 | ||||
| 59 | /* | |||
| 60 | * A few notes about cpu_switchto that is implemented in MD code. | |||
| 61 | * | |||
| 62 | * cpu_switchto takes two arguments, the old proc and the proc | |||
| 63 | * it should switch to. The new proc will never be NULL, so we always have | |||
| 64 | * a saved state that we need to switch to. The old proc however can | |||
| 65 | * be NULL if the process is exiting. NULL for the old proc simply | |||
| 66 | * means "don't bother saving old state". | |||
| 67 | * | |||
| 68 | * cpu_switchto is supposed to atomically load the new state of the process | |||
| 69 | * including the pcb, pmap and setting curproc, the p_cpu pointer in the | |||
| 70 | * proc and p_stat to SONPROC. Atomically with respect to interrupts, other | |||
| 71 | * cpus in the system must not depend on this state being consistent. | |||
| 72 | * Therefore no locking is necessary in cpu_switchto other than blocking | |||
| 73 | * interrupts during the context switch. | |||
| 74 | */ | |||
| 75 | ||||
| 76 | /* | |||
| 77 | * sched_init_cpu is called from main() for the boot cpu, then it's the | |||
| 78 | * responsibility of the MD code to call it for all other cpus. | |||
| 79 | */ | |||
| 80 | void | |||
| 81 | sched_init_cpu(struct cpu_info *ci) | |||
| 82 | { | |||
| 83 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 84 | int i; | |||
| 85 | ||||
| 86 | for (i = 0; i < SCHED_NQS32; i++) | |||
| 87 | TAILQ_INIT(&spc->spc_qs[i])do { (&spc->spc_qs[i])->tqh_first = ((void *)0); (& spc->spc_qs[i])->tqh_last = &(&spc->spc_qs[i ])->tqh_first; } while (0); | |||
| 88 | ||||
| 89 | spc->spc_idleproc = NULL((void *)0); | |||
| 90 | ||||
| 91 | spc->spc_itimer = clockintr_establish(ci, itimer_update, NULL((void *)0)); | |||
| 92 | if (spc->spc_itimer == NULL((void *)0)) | |||
| 93 | panic("%s: clockintr_establish itimer_update", __func__); | |||
| 94 | spc->spc_profclock = clockintr_establish(ci, profclock, NULL((void *)0)); | |||
| 95 | if (spc->spc_profclock == NULL((void *)0)) | |||
| 96 | panic("%s: clockintr_establish profclock", __func__); | |||
| 97 | spc->spc_roundrobin = clockintr_establish(ci, roundrobin, NULL((void *)0)); | |||
| 98 | if (spc->spc_roundrobin == NULL((void *)0)) | |||
| 99 | panic("%s: clockintr_establish roundrobin", __func__); | |||
| 100 | spc->spc_statclock = clockintr_establish(ci, statclock, NULL((void *)0)); | |||
| 101 | if (spc->spc_statclock == NULL((void *)0)) | |||
| 102 | panic("%s: clockintr_establish statclock", __func__); | |||
| 103 | ||||
| 104 | kthread_create_deferred(sched_kthreads_create, ci); | |||
| 105 | ||||
| 106 | LIST_INIT(&spc->spc_deadproc)do { ((&spc->spc_deadproc)->lh_first) = ((void *)0) ; } while (0); | |||
| 107 | SIMPLEQ_INIT(&spc->spc_deferred)do { (&spc->spc_deferred)->sqh_first = ((void *)0); (&spc->spc_deferred)->sqh_last = &(&spc-> spc_deferred)->sqh_first; } while (0); | |||
| 108 | ||||
| 109 | /* | |||
| 110 | * Slight hack here until the cpuset code handles cpu_info | |||
| 111 | * structures. | |||
| 112 | */ | |||
| 113 | cpuset_init_cpu(ci); | |||
| 114 | ||||
| 115 | #ifdef __HAVE_CPU_TOPOLOGY | |||
| 116 | if (!sched_smt && ci->ci_smt_id > 0) | |||
| 117 | return; | |||
| 118 | #endif | |||
| 119 | cpuset_add(&sched_all_cpus, ci); | |||
| 120 | } | |||
| 121 | ||||
| 122 | void | |||
| 123 | sched_kthreads_create(void *v) | |||
| 124 | { | |||
| 125 | struct cpu_info *ci = v; | |||
| 126 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 127 | static int num; | |||
| 128 | ||||
| 129 | if (fork1(&proc0, FORK_SHAREVM0x00000080|FORK_SHAREFILES0x00000010|FORK_NOZOMBIE0x00000040| | |||
| 130 | FORK_SYSTEM0x00000020|FORK_IDLE0x00000004, sched_idle, ci, NULL((void *)0), | |||
| 131 | &spc->spc_idleproc)) | |||
| 132 | panic("fork idle"); | |||
| 133 | ||||
| 134 | /* Name it as specified. */ | |||
| 135 | snprintf(spc->spc_idleproc->p_p->ps_comm, | |||
| 136 | sizeof(spc->spc_idleproc->p_p->ps_comm), | |||
| 137 | "idle%d", num); | |||
| 138 | ||||
| 139 | num++; | |||
| 140 | } | |||
| 141 | ||||
| 142 | void | |||
| 143 | sched_idle(void *v) | |||
| 144 | { | |||
| 145 | struct schedstate_percpu *spc; | |||
| 146 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
| 147 | struct cpu_info *ci = v; | |||
| 148 | int s; | |||
| 149 | ||||
| 150 | KERNEL_UNLOCK()_kernel_unlock(); | |||
| 151 | ||||
| 152 | spc = &ci->ci_schedstate; | |||
| 153 | ||||
| 154 | /* | |||
| 155 | * First time we enter here, we're not supposed to idle, | |||
| 156 | * just go away for a while. | |||
| 157 | */ | |||
| 158 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 159 | cpuset_add(&sched_idle_cpus, ci); | |||
| 160 | p->p_stat = SSLEEP3; | |||
| 161 | p->p_cpu = ci; | |||
| 162 | atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_CPUPEG0x40000000); | |||
| 163 | mi_switch(); | |||
| 164 | cpuset_del(&sched_idle_cpus, ci); | |||
| 165 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 166 | ||||
| 167 | KASSERT(ci == curcpu())((ci == ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self ))); __ci;})) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 167, "ci == curcpu()")); | |||
| 168 | KASSERT(curproc == spc->spc_idleproc)((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == spc->spc_idleproc) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/kern/kern_sched.c", 168, "curproc == spc->spc_idleproc" )); | |||
| 169 | ||||
| 170 | while (1) { | |||
| 171 | while (!cpu_is_idle(curcpu())((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_schedstate.spc_whichqs == 0)) { | |||
| 172 | struct proc *dead; | |||
| 173 | ||||
| 174 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 175 | p->p_stat = SSLEEP3; | |||
| 176 | mi_switch(); | |||
| 177 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 178 | ||||
| 179 | while ((dead = LIST_FIRST(&spc->spc_deadproc)((&spc->spc_deadproc)->lh_first))) { | |||
| 180 | LIST_REMOVE(dead, p_hash)do { if ((dead)->p_hash.le_next != ((void *)0)) (dead)-> p_hash.le_next->p_hash.le_prev = (dead)->p_hash.le_prev ; *(dead)->p_hash.le_prev = (dead)->p_hash.le_next; ((dead )->p_hash.le_prev) = ((void *)-1); ((dead)->p_hash.le_next ) = ((void *)-1); } while (0); | |||
| 181 | exit2(dead); | |||
| 182 | } | |||
| 183 | } | |||
| 184 | ||||
| 185 | splassert(IPL_NONE)do { if (splassert_ctl > 0) { splassert_check(0x0, __func__ ); } } while (0); | |||
| 186 | ||||
| 187 | smr_idle(); | |||
| 188 | ||||
| 189 | cpuset_add(&sched_idle_cpus, ci); | |||
| 190 | cpu_idle_enter()do { } while (0); | |||
| 191 | while (spc->spc_whichqs == 0) { | |||
| 192 | #ifdef MULTIPROCESSOR1 | |||
| 193 | if (spc->spc_schedflags & SPCF_SHOULDHALT0x0004 && | |||
| 194 | (spc->spc_schedflags & SPCF_HALTED0x0008) == 0) { | |||
| 195 | cpuset_del(&sched_idle_cpus, ci); | |||
| 196 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 197 | atomic_setbits_intx86_atomic_setbits_u32(&spc->spc_schedflags, | |||
| 198 | spc->spc_whichqs ? 0 : SPCF_HALTED0x0008); | |||
| 199 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 200 | wakeup(spc); | |||
| 201 | } | |||
| 202 | #endif | |||
| 203 | cpu_idle_cycle()(*cpu_idle_cycle_fcn)(); | |||
| 204 | } | |||
| 205 | cpu_idle_leave()do { } while (0); | |||
| 206 | cpuset_del(&sched_idle_cpus, ci); | |||
| 207 | } | |||
| 208 | } | |||
| 209 | ||||
| 210 | /* | |||
| 211 | * To free our address space we have to jump through a few hoops. | |||
| 212 | * The freeing is done by the reaper, but until we have one reaper | |||
| 213 | * per cpu, we have no way of putting this proc on the deadproc list | |||
| 214 | * and waking up the reaper without risking having our address space and | |||
| 215 | * stack torn from under us before we manage to switch to another proc. | |||
| 216 | * Therefore we have a per-cpu list of dead processes where we put this | |||
| 217 | * proc and have idle clean up that list and move it to the reaper list. | |||
| 218 | * All this will be unnecessary once we can bind the reaper this cpu | |||
| 219 | * and not risk having it switch to another in case it sleeps. | |||
| 220 | */ | |||
| 221 | void | |||
| 222 | sched_exit(struct proc *p) | |||
| 223 | { | |||
| 224 | struct schedstate_percpu *spc = &curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate; | |||
| 225 | ||||
| 226 | LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash)do { if (((p)->p_hash.le_next = (&spc->spc_deadproc )->lh_first) != ((void *)0)) (&spc->spc_deadproc)-> lh_first->p_hash.le_prev = &(p)->p_hash.le_next; (& spc->spc_deadproc)->lh_first = (p); (p)->p_hash.le_prev = &(&spc->spc_deadproc)->lh_first; } while (0); | |||
| 227 | ||||
| 228 | KERNEL_ASSERT_LOCKED()((_kernel_lock_held()) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 228, "_kernel_lock_held()")); | |||
| 229 | sched_toidle(); | |||
| 230 | } | |||
| 231 | ||||
| 232 | void | |||
| 233 | sched_toidle(void) | |||
| 234 | { | |||
| 235 | struct schedstate_percpu *spc = &curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate; | |||
| 236 | struct proc *idle; | |||
| 237 | int s; | |||
| 238 | ||||
| 239 | #ifdef MULTIPROCESSOR1 | |||
| 240 | /* This process no longer needs to hold the kernel lock. */ | |||
| 241 | if (_kernel_lock_held()) | |||
| 242 | __mp_release_all(&kernel_lock); | |||
| 243 | #endif | |||
| 244 | ||||
| 245 | if (ISSET(spc->spc_schedflags, SPCF_ITIMER)((spc->spc_schedflags) & (0x0020))) { | |||
| 246 | atomic_clearbits_intx86_atomic_clearbits_u32(&spc->spc_schedflags, SPCF_ITIMER0x0020); | |||
| 247 | clockintr_cancel(spc->spc_itimer); | |||
| 248 | } | |||
| 249 | if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)((spc->spc_schedflags) & (0x0010))) { | |||
| 250 | atomic_clearbits_intx86_atomic_clearbits_u32(&spc->spc_schedflags, SPCF_PROFCLOCK0x0010); | |||
| 251 | clockintr_cancel(spc->spc_profclock); | |||
| 252 | } | |||
| 253 | ||||
| 254 | atomic_clearbits_intx86_atomic_clearbits_u32(&spc->spc_schedflags, SPCF_SWITCHCLEAR(0x0001|0x0002)); | |||
| 255 | ||||
| 256 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 257 | ||||
| 258 | idle = spc->spc_idleproc; | |||
| 259 | idle->p_stat = SRUN2; | |||
| 260 | ||||
| 261 | uvmexp.swtch++; | |||
| 262 | TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_off__cpu); struct dt_probe *dtp = &(dt_static_sched_off__cpu); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, idle->p_tid + 100000 , idle->p_p->ps_pid); } } while (0) | |||
| 263 | idle->p_p->ps_pid)do { extern struct dt_probe (dt_static_sched_off__cpu); struct dt_probe *dtp = &(dt_static_sched_off__cpu); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, idle->p_tid + 100000 , idle->p_p->ps_pid); } } while (0); | |||
| 264 | cpu_switchto(NULL((void *)0), idle); | |||
| 265 | panic("cpu_switchto returned"); | |||
| 266 | } | |||
| 267 | ||||
| 268 | /* | |||
| 269 | * Run queue management. | |||
| 270 | */ | |||
| 271 | void | |||
| 272 | sched_init_runqueues(void) | |||
| 273 | { | |||
| 274 | } | |||
| 275 | ||||
| 276 | void | |||
| 277 | setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) | |||
| 278 | { | |||
| 279 | struct schedstate_percpu *spc; | |||
| 280 | int queue = prio >> 2; | |||
| 281 | ||||
| 282 | if (ci == NULL((void *)0)) | |||
| 283 | ci = sched_choosecpu(p); | |||
| 284 | ||||
| 285 | KASSERT(ci != NULL)((ci != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 285, "ci != NULL")); | |||
| 286 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/kern_sched.c", 286, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); | |||
| 287 | KASSERT(p->p_wchan == NULL)((p->p_wchan == ((void *)0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 287, "p->p_wchan == NULL" )); | |||
| 288 | ||||
| 289 | p->p_cpu = ci; | |||
| 290 | p->p_stat = SRUN2; | |||
| 291 | p->p_runpri = prio; | |||
| 292 | ||||
| 293 | spc = &p->p_cpu->ci_schedstate; | |||
| 294 | spc->spc_nrun++; | |||
| 295 | TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_enqueue); struct dt_probe *dtp = &(dt_static_sched_enqueue); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0) | |||
| 296 | p->p_p->ps_pid)do { extern struct dt_probe (dt_static_sched_enqueue); struct dt_probe *dtp = &(dt_static_sched_enqueue); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0); | |||
| 297 | ||||
| 298 | TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq)do { (p)->p_runq.tqe_next = ((void *)0); (p)->p_runq.tqe_prev = (&spc->spc_qs[queue])->tqh_last; *(&spc-> spc_qs[queue])->tqh_last = (p); (&spc->spc_qs[queue ])->tqh_last = &(p)->p_runq.tqe_next; } while (0); | |||
| 299 | spc->spc_whichqs |= (1U << queue); | |||
| 300 | cpuset_add(&sched_queued_cpus, p->p_cpu); | |||
| 301 | ||||
| 302 | if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) | |||
| 303 | cpu_unidle(p->p_cpu); | |||
| 304 | ||||
| 305 | if (prio < spc->spc_curpriority) | |||
| 306 | need_resched(ci); | |||
| 307 | } | |||
| 308 | ||||
| 309 | void | |||
| 310 | remrunqueue(struct proc *p) | |||
| 311 | { | |||
| 312 | struct schedstate_percpu *spc; | |||
| 313 | int queue = p->p_runpri >> 2; | |||
| 314 | ||||
| 315 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/kern_sched.c", 315, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); | |||
| 316 | spc = &p->p_cpu->ci_schedstate; | |||
| 317 | spc->spc_nrun--; | |||
| 318 | TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_dequeue); struct dt_probe *dtp = &(dt_static_sched_dequeue); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0) | |||
| 319 | p->p_p->ps_pid)do { extern struct dt_probe (dt_static_sched_dequeue); struct dt_probe *dtp = &(dt_static_sched_dequeue); if (__builtin_expect (((dt_tracing) != 0), 0) && __builtin_expect(((dtp-> dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp-> dtp_prov; dtpv->dtpv_enter(dtpv, dtp, p->p_tid + 100000 , p->p_p->ps_pid); } } while (0); | |||
| 320 | ||||
| 321 | TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq)do { if (((p)->p_runq.tqe_next) != ((void *)0)) (p)->p_runq .tqe_next->p_runq.tqe_prev = (p)->p_runq.tqe_prev; else (&spc->spc_qs[queue])->tqh_last = (p)->p_runq.tqe_prev ; *(p)->p_runq.tqe_prev = (p)->p_runq.tqe_next; ((p)-> p_runq.tqe_prev) = ((void *)-1); ((p)->p_runq.tqe_next) = ( (void *)-1); } while (0); | |||
| 322 | if (TAILQ_EMPTY(&spc->spc_qs[queue])(((&spc->spc_qs[queue])->tqh_first) == ((void *)0))) { | |||
| 323 | spc->spc_whichqs &= ~(1U << queue); | |||
| 324 | if (spc->spc_whichqs == 0) | |||
| 325 | cpuset_del(&sched_queued_cpus, p->p_cpu); | |||
| 326 | } | |||
| 327 | } | |||
| 328 | ||||
| 329 | struct proc * | |||
| 330 | sched_chooseproc(void) | |||
| 331 | { | |||
| 332 | struct schedstate_percpu *spc = &curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate; | |||
| 333 | struct proc *p; | |||
| 334 | int queue; | |||
| 335 | ||||
| 336 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/kern_sched.c", 336, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); | |||
| 337 | ||||
| 338 | #ifdef MULTIPROCESSOR1 | |||
| 339 | if (spc->spc_schedflags & SPCF_SHOULDHALT0x0004) { | |||
| 340 | if (spc->spc_whichqs) { | |||
| 341 | for (queue = 0; queue < SCHED_NQS32; queue++) { | |||
| 342 | while ((p = TAILQ_FIRST(&spc->spc_qs[queue])((&spc->spc_qs[queue])->tqh_first))) { | |||
| 343 | remrunqueue(p); | |||
| 344 | setrunqueue(NULL((void *)0), p, p->p_runpri); | |||
| 345 | if (p->p_cpu == curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) { | |||
| 346 | KASSERT(p->p_flag & P_CPUPEG)((p->p_flag & 0x40000000) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 346, "p->p_flag & P_CPUPEG" )); | |||
| 347 | goto again; | |||
| 348 | } | |||
| 349 | } | |||
| 350 | } | |||
| 351 | } | |||
| 352 | p = spc->spc_idleproc; | |||
| 353 | if (p == NULL((void *)0)) | |||
| 354 | panic("no idleproc set on CPU%d", | |||
| 355 | CPU_INFO_UNIT(curcpu())((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_dev ? (({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_dev->dv_unit : 0)); | |||
| 356 | p->p_stat = SRUN2; | |||
| 357 | KASSERT(p->p_wchan == NULL)((p->p_wchan == ((void *)0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 357, "p->p_wchan == NULL" )); | |||
| 358 | return (p); | |||
| 359 | } | |||
| 360 | again: | |||
| 361 | #endif | |||
| 362 | ||||
| 363 | if (spc->spc_whichqs) { | |||
| 364 | queue = ffs(spc->spc_whichqs) - 1; | |||
| 365 | p = TAILQ_FIRST(&spc->spc_qs[queue])((&spc->spc_qs[queue])->tqh_first); | |||
| 366 | remrunqueue(p); | |||
| 367 | sched_noidle++; | |||
| 368 | if (p->p_stat != SRUN2) | |||
| 369 | panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat); | |||
| 370 | } else if ((p = sched_steal_proc(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))) == NULL((void *)0)) { | |||
| 371 | p = spc->spc_idleproc; | |||
| 372 | if (p == NULL((void *)0)) | |||
| 373 | panic("no idleproc set on CPU%d", | |||
| 374 | CPU_INFO_UNIT(curcpu())((({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_dev ? (({struct cpu_info *__ci; asm volatile ("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_dev->dv_unit : 0)); | |||
| 375 | p->p_stat = SRUN2; | |||
| 376 | } | |||
| 377 | ||||
| 378 | KASSERT(p->p_wchan == NULL)((p->p_wchan == ((void *)0)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/kern/kern_sched.c", 378, "p->p_wchan == NULL" )); | |||
| 379 | return (p); | |||
| 380 | } | |||
| 381 | ||||
| 382 | struct cpu_info * | |||
| 383 | sched_choosecpu_fork(struct proc *parent, int flags) | |||
| 384 | { | |||
| 385 | #ifdef MULTIPROCESSOR1 | |||
| 386 | struct cpu_info *choice = NULL((void *)0); | |||
| 387 | int run, best_run = INT_MAX0x7fffffff; | |||
| 388 | struct cpu_info *ci; | |||
| 389 | struct cpuset set; | |||
| 390 | ||||
| 391 | #if 0 | |||
| 392 | /* | |||
| 393 | * XXX | |||
| 394 | * Don't do this until we have a painless way to move the cpu in exec. | |||
| 395 | * Preferably when nuking the old pmap and getting a new one on a | |||
| 396 | * new cpu. | |||
| 397 | */ | |||
| 398 | /* | |||
| 399 | * PPWAIT forks are simple. We know that the parent will not | |||
| 400 | * run until we exec and choose another cpu, so we just steal its | |||
| 401 | * cpu. | |||
| 402 | */ | |||
| 403 | if (flags & FORK_PPWAIT0x00000008) | |||
| 404 | return (parent->p_cpu); | |||
| 405 | #endif | |||
| 406 | ||||
| 407 | /* | |||
| 408 | * Look at all cpus that are currently idle and have nothing queued. | |||
| 409 | * If there are none, pick the one with least queued procs first, | |||
| 410 | * then the one with lowest load average. | |||
| 411 | */ | |||
| 412 | cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); | |||
| 413 | cpuset_intersection(&set, &set, &sched_all_cpus); | |||
| 414 | if (cpuset_first(&set) == NULL((void *)0)) | |||
| 415 | cpuset_copy(&set, &sched_all_cpus); | |||
| 416 | ||||
| 417 | while ((ci = cpuset_first(&set)) != NULL((void *)0)) { | |||
| 418 | cpuset_del(&set, ci); | |||
| 419 | ||||
| 420 | run = ci->ci_schedstate.spc_nrun; | |||
| 421 | ||||
| 422 | if (choice == NULL((void *)0) || run < best_run) { | |||
| 423 | choice = ci; | |||
| 424 | best_run = run; | |||
| 425 | } | |||
| 426 | } | |||
| 427 | ||||
| 428 | return (choice); | |||
| 429 | #else | |||
| 430 | return (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
| 431 | #endif | |||
| 432 | } | |||
| 433 | ||||
| 434 | struct cpu_info * | |||
| 435 | sched_choosecpu(struct proc *p) | |||
| 436 | { | |||
| 437 | #ifdef MULTIPROCESSOR1 | |||
| 438 | struct cpu_info *choice = NULL((void *)0); | |||
| 439 | int last_cost = INT_MAX0x7fffffff; | |||
| 440 | struct cpu_info *ci; | |||
| 441 | struct cpuset set; | |||
| 442 | ||||
| 443 | /* | |||
| 444 | * If pegged to a cpu, don't allow it to move. | |||
| 445 | */ | |||
| 446 | if (p->p_flag & P_CPUPEG0x40000000) | |||
| 447 | return (p->p_cpu); | |||
| 448 | ||||
| 449 | sched_choose++; | |||
| 450 | ||||
| 451 | /* | |||
| 452 | * Look at all cpus that are currently idle and have nothing queued. | |||
| 453 | * If there are none, pick the cheapest of those. | |||
| 454 | * (idle + queued could mean that the cpu is handling an interrupt | |||
| 455 | * at this moment and haven't had time to leave idle yet). | |||
| 456 | */ | |||
| 457 | cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); | |||
| 458 | cpuset_intersection(&set, &set, &sched_all_cpus); | |||
| 459 | ||||
| 460 | /* | |||
| 461 | * First, just check if our current cpu is in that set, if it is, | |||
| 462 | * this is simple. | |||
| 463 | * Also, our cpu might not be idle, but if it's the current cpu | |||
| 464 | * and it has nothing else queued and we're curproc, take it. | |||
| 465 | */ | |||
| 466 | if (cpuset_isset(&set, p->p_cpu) || | |||
| 467 | (p->p_cpu == curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}) && p->p_cpu->ci_schedstate.spc_nrun == 0 && | |||
| 468 | (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT0x0004) == 0 && | |||
| 469 | curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc == p)) { | |||
| 470 | sched_wasidle++; | |||
| 471 | return (p->p_cpu); | |||
| 472 | } | |||
| 473 | ||||
| 474 | if (cpuset_first(&set) == NULL((void *)0)) | |||
| 475 | cpuset_copy(&set, &sched_all_cpus); | |||
| 476 | ||||
| 477 | while ((ci = cpuset_first(&set)) != NULL((void *)0)) { | |||
| 478 | int cost = sched_proc_to_cpu_cost(ci, p); | |||
| 479 | ||||
| 480 | if (choice == NULL((void *)0) || cost < last_cost) { | |||
| 481 | choice = ci; | |||
| 482 | last_cost = cost; | |||
| 483 | } | |||
| 484 | cpuset_del(&set, ci); | |||
| 485 | } | |||
| 486 | ||||
| 487 | if (p->p_cpu != choice) | |||
| 488 | sched_nmigrations++; | |||
| 489 | else | |||
| 490 | sched_nomigrations++; | |||
| 491 | ||||
| 492 | return (choice); | |||
| 493 | #else | |||
| 494 | return (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
| 495 | #endif | |||
| 496 | } | |||
| 497 | ||||
| 498 | /* | |||
| 499 | * Attempt to steal a proc from some cpu. | |||
| 500 | */ | |||
| 501 | struct proc * | |||
| 502 | sched_steal_proc(struct cpu_info *self) | |||
| 503 | { | |||
| 504 | struct proc *best = NULL((void *)0); | |||
| 505 | #ifdef MULTIPROCESSOR1 | |||
| 506 | struct schedstate_percpu *spc; | |||
| 507 | int bestcost = INT_MAX0x7fffffff; | |||
| 508 | struct cpu_info *ci; | |||
| 509 | struct cpuset set; | |||
| 510 | ||||
| 511 | KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0)(((self->ci_schedstate.spc_schedflags & 0x0004) == 0) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 511, "(self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0" )); | |||
| 512 | ||||
| 513 | /* Don't steal if we don't want to schedule processes in this CPU. */ | |||
| 514 | if (!cpuset_isset(&sched_all_cpus, self)) | |||
| 515 | return (NULL((void *)0)); | |||
| 516 | ||||
| 517 | cpuset_copy(&set, &sched_queued_cpus); | |||
| 518 | ||||
| 519 | while ((ci = cpuset_first(&set)) != NULL((void *)0)) { | |||
| 520 | struct proc *p; | |||
| 521 | int queue; | |||
| 522 | int cost; | |||
| 523 | ||||
| 524 | cpuset_del(&set, ci); | |||
| 525 | ||||
| 526 | spc = &ci->ci_schedstate; | |||
| 527 | ||||
| 528 | queue = ffs(spc->spc_whichqs) - 1; | |||
| 529 | TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq)for((p) = ((&spc->spc_qs[queue])->tqh_first); (p) != ((void *)0); (p) = ((p)->p_runq.tqe_next)) { | |||
| 530 | if (p->p_flag & P_CPUPEG0x40000000) | |||
| 531 | continue; | |||
| 532 | ||||
| 533 | cost = sched_proc_to_cpu_cost(self, p); | |||
| 534 | ||||
| 535 | if (best == NULL((void *)0) || cost < bestcost) { | |||
| 536 | best = p; | |||
| 537 | bestcost = cost; | |||
| 538 | } | |||
| 539 | } | |||
| 540 | } | |||
| 541 | if (best == NULL((void *)0)) | |||
| 542 | return (NULL((void *)0)); | |||
| 543 | ||||
| 544 | TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_steal); struct dt_probe *dtp = &(dt_static_sched_steal); if (__builtin_expect((( dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv ->dtpv_enter(dtpv, dtp, best->p_tid + 100000, best-> p_p->ps_pid, ((self)->ci_dev ? (self)->ci_dev->dv_unit : 0)); } } while (0) | |||
| 545 | best->p_p->ps_pid, CPU_INFO_UNIT(self))do { extern struct dt_probe (dt_static_sched_steal); struct dt_probe *dtp = &(dt_static_sched_steal); if (__builtin_expect((( dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv ->dtpv_enter(dtpv, dtp, best->p_tid + 100000, best-> p_p->ps_pid, ((self)->ci_dev ? (self)->ci_dev->dv_unit : 0)); } } while (0); | |||
| 546 | ||||
| 547 | remrunqueue(best); | |||
| 548 | best->p_cpu = self; | |||
| 549 | ||||
| 550 | sched_stolen++; | |||
| 551 | #endif | |||
| 552 | return (best); | |||
| 553 | } | |||
| 554 | ||||
| 555 | #ifdef MULTIPROCESSOR1 | |||
| 556 | /* | |||
| 557 | * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). | |||
| 558 | */ | |||
| 559 | static int | |||
| 560 | log2(unsigned int i) | |||
| 561 | { | |||
| 562 | int ret = 0; | |||
| 563 | ||||
| 564 | while (i >>= 1) | |||
| 565 | ret++; | |||
| 566 | ||||
| 567 | return (ret); | |||
| 568 | } | |||
| 569 | ||||
| 570 | /* | |||
| 571 | * Calculate the cost of moving the proc to this cpu. | |||
| 572 | * | |||
| 573 | * What we want is some guesstimate of how much "performance" it will | |||
| 574 | * cost us to move the proc here. Not just for caches and TLBs and NUMA | |||
| 575 | * memory, but also for the proc itself. A highly loaded cpu might not | |||
| 576 | * be the best candidate for this proc since it won't get run. | |||
| 577 | * | |||
| 578 | * Just total guesstimates for now. | |||
| 579 | */ | |||
| 580 | ||||
| 581 | int sched_cost_load = 1; | |||
| 582 | int sched_cost_priority = 1; | |||
| 583 | int sched_cost_runnable = 3; | |||
| 584 | int sched_cost_resident = 1; | |||
| 585 | #endif | |||
| 586 | ||||
| 587 | int | |||
| 588 | sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) | |||
| 589 | { | |||
| 590 | int cost = 0; | |||
| 591 | #ifdef MULTIPROCESSOR1 | |||
| 592 | struct schedstate_percpu *spc; | |||
| 593 | int l2resident = 0; | |||
| 594 | ||||
| 595 | spc = &ci->ci_schedstate; | |||
| 596 | ||||
| 597 | /* | |||
| 598 | * First, account for the priority of the proc we want to move. | |||
| 599 | * More willing to move, the lower the priority of the destination | |||
| 600 | * and the higher the priority of the proc. | |||
| 601 | */ | |||
| 602 | if (!cpuset_isset(&sched_idle_cpus, ci)) { | |||
| 603 | cost += (p->p_usrpri - spc->spc_curpriority) * | |||
| 604 | sched_cost_priority; | |||
| 605 | cost += sched_cost_runnable; | |||
| 606 | } | |||
| 607 | if (cpuset_isset(&sched_queued_cpus, ci)) | |||
| 608 | cost += spc->spc_nrun * sched_cost_runnable; | |||
| 609 | ||||
| 610 | /* | |||
| 611 | * Try to avoid the primary cpu as it handles hardware interrupts. | |||
| 612 | * | |||
| 613 | * XXX Needs to be revisited when we distribute interrupts | |||
| 614 | * over cpus. | |||
| 615 | */ | |||
| 616 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 617 | cost += sched_cost_runnable; | |||
| 618 | ||||
| 619 | /* | |||
| 620 | * If the proc is on this cpu already, lower the cost by how much | |||
| 621 | * it has been running and an estimate of its footprint. | |||
| 622 | */ | |||
| 623 | if (p->p_cpu == ci && p->p_slptime == 0) { | |||
| 624 | l2resident = | |||
| 625 | log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)((p->p_vmspace->vm_map.pmap)->pm_stats.resident_count )); | |||
| 626 | cost -= l2resident * sched_cost_resident; | |||
| 627 | } | |||
| 628 | #endif | |||
| 629 | return (cost); | |||
| 630 | } | |||
| 631 | ||||
| 632 | /* | |||
| 633 | * Peg a proc to a cpu. | |||
| 634 | */ | |||
| 635 | void | |||
| 636 | sched_peg_curproc(struct cpu_info *ci) | |||
| 637 | { | |||
| 638 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; | |||
| 639 | int s; | |||
| 640 | ||||
| 641 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 642 | atomic_setbits_intx86_atomic_setbits_u32(&p->p_flag, P_CPUPEG0x40000000); | |||
| 643 | setrunqueue(ci, p, p->p_usrpri); | |||
| 644 | p->p_ru.ru_nvcsw++; | |||
| 645 | mi_switch(); | |||
| 646 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); | |||
| 647 | } | |||
| 648 | ||||
| 649 | #ifdef MULTIPROCESSOR1 | |||
| 650 | ||||
| 651 | void | |||
| 652 | sched_start_secondary_cpus(void) | |||
| 653 | { | |||
| 654 | CPU_INFO_ITERATORint cii; | |||
| 655 | struct cpu_info *ci; | |||
| 656 | ||||
| 657 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 658 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 659 | ||||
| 660 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 661 | continue; | |||
| 662 | atomic_clearbits_intx86_atomic_clearbits_u32(&spc->spc_schedflags, | |||
| 663 | SPCF_SHOULDHALT0x0004 | SPCF_HALTED0x0008); | |||
| 664 | #ifdef __HAVE_CPU_TOPOLOGY | |||
| 665 | if (!sched_smt && ci->ci_smt_id > 0) | |||
| 666 | continue; | |||
| 667 | #endif | |||
| 668 | cpuset_add(&sched_all_cpus, ci); | |||
| 669 | } | |||
| 670 | } | |||
| 671 | ||||
| 672 | void | |||
| 673 | sched_stop_secondary_cpus(void) | |||
| 674 | { | |||
| 675 | CPU_INFO_ITERATORint cii; | |||
| 676 | struct cpu_info *ci; | |||
| 677 | ||||
| 678 | /* | |||
| 679 | * Make sure we stop the secondary CPUs. | |||
| 680 | */ | |||
| 681 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 682 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 683 | ||||
| 684 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 685 | continue; | |||
| 686 | cpuset_del(&sched_all_cpus, ci); | |||
| 687 | atomic_setbits_intx86_atomic_setbits_u32(&spc->spc_schedflags, SPCF_SHOULDHALT0x0004); | |||
| 688 | } | |||
| 689 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 690 | struct schedstate_percpu *spc = &ci->ci_schedstate; | |||
| 691 | ||||
| 692 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 693 | continue; | |||
| 694 | while ((spc->spc_schedflags & SPCF_HALTED0x0008) == 0) { | |||
| 695 | sleep_setup(spc, PZERO22, "schedstate"); | |||
| 696 | sleep_finish(0, | |||
| 697 | (spc->spc_schedflags & SPCF_HALTED0x0008) == 0); | |||
| 698 | } | |||
| 699 | } | |||
| 700 | } | |||
| 701 | ||||
| 702 | struct sched_barrier_state { | |||
| 703 | struct cpu_info *ci; | |||
| 704 | struct cond cond; | |||
| 705 | }; | |||
| 706 | ||||
| 707 | void | |||
| 708 | sched_barrier_task(void *arg) | |||
| 709 | { | |||
| 710 | struct sched_barrier_state *sb = arg; | |||
| 711 | struct cpu_info *ci = sb->ci; | |||
| 712 | ||||
| 713 | sched_peg_curproc(ci); | |||
| ||||
| 714 | cond_signal(&sb->cond); | |||
| 715 | atomic_clearbits_intx86_atomic_clearbits_u32(&curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc->p_flag, P_CPUPEG0x40000000); | |||
| 716 | } | |||
| 717 | ||||
| 718 | void | |||
| 719 | sched_barrier(struct cpu_info *ci) | |||
| 720 | { | |||
| 721 | struct sched_barrier_state sb; | |||
| 722 | struct task task; | |||
| 723 | CPU_INFO_ITERATORint cii; | |||
| 724 | ||||
| 725 | if (ci == NULL((void *)0)) { | |||
| 726 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 727 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 728 | break; | |||
| 729 | } | |||
| 730 | } | |||
| 731 | KASSERT(ci != NULL)((ci != ((void *)0)) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/kern_sched.c" , 731, "ci != NULL")); | |||
| 732 | ||||
| 733 | if (ci == curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})) | |||
| 734 | return; | |||
| 735 | ||||
| 736 | sb.ci = ci; | |||
| 737 | cond_init(&sb.cond); | |||
| 738 | task_set(&task, sched_barrier_task, &sb); | |||
| 739 | ||||
| 740 | task_add(systqmp, &task); | |||
| 741 | cond_wait(&sb.cond, "sbar"); | |||
| 742 | } | |||
| 743 | ||||
| 744 | #else | |||
| 745 | ||||
| 746 | void | |||
| 747 | sched_barrier(struct cpu_info *ci) | |||
| 748 | { | |||
| 749 | } | |||
| 750 | ||||
| 751 | #endif | |||
| 752 | ||||
| 753 | /* | |||
| 754 | * Functions to manipulate cpu sets. | |||
| 755 | */ | |||
| 756 | struct cpu_info *cpuset_infos[MAXCPUS64]; | |||
| 757 | static struct cpuset cpuset_all; | |||
| 758 | ||||
| 759 | void | |||
| 760 | cpuset_init_cpu(struct cpu_info *ci) | |||
| 761 | { | |||
| 762 | cpuset_add(&cpuset_all, ci); | |||
| 763 | cpuset_infos[CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0)] = ci; | |||
| 764 | } | |||
| 765 | ||||
| 766 | void | |||
| 767 | cpuset_clear(struct cpuset *cs) | |||
| 768 | { | |||
| 769 | memset(cs, 0, sizeof(*cs))__builtin_memset((cs), (0), (sizeof(*cs))); | |||
| 770 | } | |||
| 771 | ||||
| 772 | void | |||
| 773 | cpuset_add(struct cpuset *cs, struct cpu_info *ci) | |||
| 774 | { | |||
| 775 | unsigned int num = CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0); | |||
| 776 | atomic_setbits_intx86_atomic_setbits_u32(&cs->cs_set[num/32], (1U << (num % 32))); | |||
| 777 | } | |||
| 778 | ||||
| 779 | void | |||
| 780 | cpuset_del(struct cpuset *cs, struct cpu_info *ci) | |||
| 781 | { | |||
| 782 | unsigned int num = CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0); | |||
| 783 | atomic_clearbits_intx86_atomic_clearbits_u32(&cs->cs_set[num/32], (1U << (num % 32))); | |||
| 784 | } | |||
| 785 | ||||
| 786 | int | |||
| 787 | cpuset_isset(struct cpuset *cs, struct cpu_info *ci) | |||
| 788 | { | |||
| 789 | unsigned int num = CPU_INFO_UNIT(ci)((ci)->ci_dev ? (ci)->ci_dev->dv_unit : 0); | |||
| 790 | return (cs->cs_set[num/32] & (1U << (num % 32))); | |||
| ||||
| 791 | } | |||
| 792 | ||||
| 793 | void | |||
| 794 | cpuset_add_all(struct cpuset *cs) | |||
| 795 | { | |||
| 796 | cpuset_copy(cs, &cpuset_all); | |||
| 797 | } | |||
| 798 | ||||
| 799 | void | |||
| 800 | cpuset_copy(struct cpuset *to, struct cpuset *from) | |||
| 801 | { | |||
| 802 | memcpy(to, from, sizeof(*to))__builtin_memcpy((to), (from), (sizeof(*to))); | |||
| 803 | } | |||
| 804 | ||||
| 805 | struct cpu_info * | |||
| 806 | cpuset_first(struct cpuset *cs) | |||
| 807 | { | |||
| 808 | int i; | |||
| 809 | ||||
| 810 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 811 | if (cs->cs_set[i]) | |||
| 812 | return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]); | |||
| 813 | ||||
| 814 | return (NULL((void *)0)); | |||
| 815 | } | |||
| 816 | ||||
| 817 | void | |||
| 818 | cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b) | |||
| 819 | { | |||
| 820 | int i; | |||
| 821 | ||||
| 822 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 823 | to->cs_set[i] = a->cs_set[i] | b->cs_set[i]; | |||
| 824 | } | |||
| 825 | ||||
| 826 | void | |||
| 827 | cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b) | |||
| 828 | { | |||
| 829 | int i; | |||
| 830 | ||||
| 831 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 832 | to->cs_set[i] = a->cs_set[i] & b->cs_set[i]; | |||
| 833 | } | |||
| 834 | ||||
| 835 | void | |||
| 836 | cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b) | |||
| 837 | { | |||
| 838 | int i; | |||
| 839 | ||||
| 840 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 841 | to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i]; | |||
| 842 | } | |||
| 843 | ||||
| 844 | int | |||
| 845 | cpuset_cardinality(struct cpuset *cs) | |||
| 846 | { | |||
| 847 | int cardinality, i, n; | |||
| 848 | ||||
| 849 | cardinality = 0; | |||
| 850 | ||||
| 851 | for (i = 0; i < CPUSET_ASIZE(ncpus)(((ncpus) - 1)/32 + 1); i++) | |||
| 852 | for (n = cs->cs_set[i]; n != 0; n &= n - 1) | |||
| 853 | cardinality++; | |||
| 854 | ||||
| 855 | return (cardinality); | |||
| 856 | } | |||
| 857 | ||||
| 858 | int | |||
| 859 | sysctl_hwncpuonline(void) | |||
| 860 | { | |||
| 861 | return cpuset_cardinality(&sched_all_cpus); | |||
| 862 | } | |||
| 863 | ||||
| 864 | int | |||
| 865 | cpu_is_online(struct cpu_info *ci) | |||
| 866 | { | |||
| 867 | return cpuset_isset(&sched_all_cpus, ci); | |||
| 868 | } | |||
| 869 | ||||
| 870 | #ifdef __HAVE_CPU_TOPOLOGY | |||
| 871 | ||||
| 872 | #include <sys/sysctl.h> | |||
| 873 | ||||
| 874 | int | |||
| 875 | sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) | |||
| 876 | { | |||
| 877 | CPU_INFO_ITERATORint cii; | |||
| 878 | struct cpu_info *ci; | |||
| 879 | int err, newsmt; | |||
| 880 | ||||
| 881 | newsmt = sched_smt; | |||
| 882 | err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); | |||
| 883 | if (err) | |||
| 884 | return err; | |||
| 885 | if (newsmt == sched_smt) | |||
| 886 | return 0; | |||
| 887 | ||||
| 888 | sched_smt = newsmt; | |||
| 889 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 890 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008) || !CPU_IS_RUNNING(ci)((ci)->ci_flags & 0x2000)) | |||
| 891 | continue; | |||
| 892 | if (ci->ci_smt_id == 0) | |||
| 893 | continue; | |||
| 894 | if (sched_smt) | |||
| 895 | cpuset_add(&sched_all_cpus, ci); | |||
| 896 | else | |||
| 897 | cpuset_del(&sched_all_cpus, ci); | |||
| 898 | } | |||
| 899 | ||||
| 900 | return 0; | |||
| 901 | } | |||
| 902 | ||||
| 903 | #endif |