| File: | kern/sched_bsd.c |
| Warning: | line 548, column 3 Value stored to 'speedup' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: sched_bsd.c,v 1.70 2021/10/30 23:24:48 deraadt Exp $ */ |
| 2 | /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */ |
| 3 | |
| 4 | /*- |
| 5 | * Copyright (c) 1982, 1986, 1990, 1991, 1993 |
| 6 | * The Regents of the University of California. All rights reserved. |
| 7 | * (c) UNIX System Laboratories, Inc. |
| 8 | * All or some portions of this file are derived from material licensed |
| 9 | * to the University of California by American Telephone and Telegraph |
| 10 | * Co. or Unix System Laboratories, Inc. and are reproduced herein with |
| 11 | * the permission of UNIX System Laboratories, Inc. |
| 12 | * |
| 13 | * Redistribution and use in source and binary forms, with or without |
| 14 | * modification, are permitted provided that the following conditions |
| 15 | * are met: |
| 16 | * 1. Redistributions of source code must retain the above copyright |
| 17 | * notice, this list of conditions and the following disclaimer. |
| 18 | * 2. Redistributions in binary form must reproduce the above copyright |
| 19 | * notice, this list of conditions and the following disclaimer in the |
| 20 | * documentation and/or other materials provided with the distribution. |
| 21 | * 3. Neither the name of the University nor the names of its contributors |
| 22 | * may be used to endorse or promote products derived from this software |
| 23 | * without specific prior written permission. |
| 24 | * |
| 25 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| 26 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 27 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 28 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| 29 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 30 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| 31 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| 32 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| 33 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| 34 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| 35 | * SUCH DAMAGE. |
| 36 | * |
| 37 | * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94 |
| 38 | */ |
| 39 | |
| 40 | #include <sys/param.h> |
| 41 | #include <sys/systm.h> |
| 42 | #include <sys/proc.h> |
| 43 | #include <sys/kernel.h> |
| 44 | #include <sys/malloc.h> |
| 45 | #include <sys/signalvar.h> |
| 46 | #include <sys/resourcevar.h> |
| 47 | #include <uvm/uvm_extern.h> |
| 48 | #include <sys/sched.h> |
| 49 | #include <sys/timeout.h> |
| 50 | #include <sys/smr.h> |
| 51 | #include <sys/tracepoint.h> |
| 52 | |
| 53 | #ifdef KTRACE1 |
| 54 | #include <sys/ktrace.h> |
| 55 | #endif |
| 56 | |
| 57 | |
| 58 | int lbolt; /* once a second sleep address */ |
| 59 | int rrticks_init; /* # of hardclock ticks per roundrobin() */ |
| 60 | |
| 61 | #ifdef MULTIPROCESSOR1 |
| 62 | struct __mp_lock sched_lock; |
| 63 | #endif |
| 64 | |
| 65 | void schedcpu(void *); |
| 66 | uint32_t decay_aftersleep(uint32_t, uint32_t); |
| 67 | |
| 68 | /* |
| 69 | * Force switch among equal priority processes every 100ms. |
| 70 | */ |
| 71 | void |
| 72 | roundrobin(struct cpu_info *ci) |
| 73 | { |
| 74 | struct schedstate_percpu *spc = &ci->ci_schedstate; |
| 75 | |
| 76 | spc->spc_rrticks = rrticks_init; |
| 77 | |
| 78 | if (ci->ci_curproc != NULL((void *)0)) { |
| 79 | if (spc->spc_schedflags & SPCF_SEENRR0x0001) { |
| 80 | /* |
| 81 | * The process has already been through a roundrobin |
| 82 | * without switching and may be hogging the CPU. |
| 83 | * Indicate that the process should yield. |
| 84 | */ |
| 85 | atomic_setbits_intx86_atomic_setbits_u32(&spc->spc_schedflags, |
| 86 | SPCF_SHOULDYIELD0x0002); |
| 87 | } else { |
| 88 | atomic_setbits_intx86_atomic_setbits_u32(&spc->spc_schedflags, |
| 89 | SPCF_SEENRR0x0001); |
| 90 | } |
| 91 | } |
| 92 | |
| 93 | if (spc->spc_nrun) |
| 94 | need_resched(ci); |
| 95 | } |
| 96 | |
| 97 | /* |
| 98 | * Constants for digital decay and forget: |
| 99 | * 90% of (p_estcpu) usage in 5 * loadav time |
| 100 | * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) |
| 101 | * Note that, as ps(1) mentions, this can let percentages |
| 102 | * total over 100% (I've seen 137.9% for 3 processes). |
| 103 | * |
| 104 | * Note that hardclock updates p_estcpu and p_cpticks independently. |
| 105 | * |
| 106 | * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. |
| 107 | * That is, the system wants to compute a value of decay such |
| 108 | * that the following for loop: |
| 109 | * for (i = 0; i < (5 * loadavg); i++) |
| 110 | * p_estcpu *= decay; |
| 111 | * will compute |
| 112 | * p_estcpu *= 0.1; |
| 113 | * for all values of loadavg: |
| 114 | * |
| 115 | * Mathematically this loop can be expressed by saying: |
| 116 | * decay ** (5 * loadavg) ~= .1 |
| 117 | * |
| 118 | * The system computes decay as: |
| 119 | * decay = (2 * loadavg) / (2 * loadavg + 1) |
| 120 | * |
| 121 | * We wish to prove that the system's computation of decay |
| 122 | * will always fulfill the equation: |
| 123 | * decay ** (5 * loadavg) ~= .1 |
| 124 | * |
| 125 | * If we compute b as: |
| 126 | * b = 2 * loadavg |
| 127 | * then |
| 128 | * decay = b / (b + 1) |
| 129 | * |
| 130 | * We now need to prove two things: |
| 131 | * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) |
| 132 | * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) |
| 133 | * |
| 134 | * Facts: |
| 135 | * For x close to zero, exp(x) =~ 1 + x, since |
| 136 | * exp(x) = 0! + x**1/1! + x**2/2! + ... . |
| 137 | * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. |
| 138 | * For x close to zero, ln(1+x) =~ x, since |
| 139 | * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 |
| 140 | * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). |
| 141 | * ln(.1) =~ -2.30 |
| 142 | * |
| 143 | * Proof of (1): |
| 144 | * Solve (factor)**(power) =~ .1 given power (5*loadav): |
| 145 | * solving for factor, |
| 146 | * ln(factor) =~ (-2.30/5*loadav), or |
| 147 | * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = |
| 148 | * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED |
| 149 | * |
| 150 | * Proof of (2): |
| 151 | * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): |
| 152 | * solving for power, |
| 153 | * power*ln(b/(b+1)) =~ -2.30, or |
| 154 | * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED |
| 155 | * |
| 156 | * Actual power values for the implemented algorithm are as follows: |
| 157 | * loadav: 1 2 3 4 |
| 158 | * power: 5.68 10.32 14.94 19.55 |
| 159 | */ |
| 160 | |
| 161 | /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ |
| 162 | #define loadfactor(loadav)(2 * (loadav)) (2 * (loadav)) |
| 163 | #define decay_cpu(loadfac, cpu)(((loadfac) * (cpu)) / ((loadfac) + (1<<11))) (((loadfac) * (cpu)) / ((loadfac) + FSCALE(1<<11))) |
| 164 | |
| 165 | /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ |
| 166 | fixpt_t ccpu = 0.95122942450071400909 * FSCALE(1<<11); /* exp(-1/20) */ |
| 167 | |
| 168 | /* |
| 169 | * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the |
| 170 | * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below |
| 171 | * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). |
| 172 | * |
| 173 | * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: |
| 174 | * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). |
| 175 | * |
| 176 | * If you don't want to bother with the faster/more-accurate formula, you |
| 177 | * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate |
| 178 | * (more general) method of calculating the %age of CPU used by a process. |
| 179 | */ |
| 180 | #define CCPU_SHIFT11 11 |
| 181 | |
| 182 | /* |
| 183 | * Recompute process priorities, every second. |
| 184 | */ |
| 185 | void |
| 186 | schedcpu(void *arg) |
| 187 | { |
| 188 | struct timeout *to = (struct timeout *)arg; |
| 189 | fixpt_t loadfac = loadfactor(averunnable.ldavg[0])(2 * (averunnable.ldavg[0])); |
| 190 | struct proc *p; |
| 191 | int s; |
| 192 | unsigned int newcpu; |
| 193 | int phz; |
| 194 | |
| 195 | /* |
| 196 | * If we have a statistics clock, use that to calculate CPU |
| 197 | * time, otherwise revert to using the profiling clock (which, |
| 198 | * in turn, defaults to hz if there is no separate profiling |
| 199 | * clock available) |
| 200 | */ |
| 201 | phz = stathz ? stathz : profhz; |
| 202 | KASSERT(phz)((phz) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/sched_bsd.c" , 202, "phz")); |
| 203 | |
| 204 | LIST_FOREACH(p, &allproc, p_list)for((p) = ((&allproc)->lh_first); (p)!= ((void *)0); ( p) = ((p)->p_list.le_next)) { |
| 205 | /* |
| 206 | * Idle threads are never placed on the runqueue, |
| 207 | * therefore computing their priority is pointless. |
| 208 | */ |
| 209 | if (p->p_cpu != NULL((void *)0) && |
| 210 | p->p_cpu->ci_schedstate.spc_idleproc == p) |
| 211 | continue; |
| 212 | /* |
| 213 | * Increment sleep time (if sleeping). We ignore overflow. |
| 214 | */ |
| 215 | if (p->p_stat == SSLEEP3 || p->p_stat == SSTOP4) |
| 216 | p->p_slptime++; |
| 217 | p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT11; |
| 218 | /* |
| 219 | * If the process has slept the entire second, |
| 220 | * stop recalculating its priority until it wakes up. |
| 221 | */ |
| 222 | if (p->p_slptime > 1) |
| 223 | continue; |
| 224 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); |
| 225 | /* |
| 226 | * p_pctcpu is only for diagnostic tools such as ps. |
| 227 | */ |
| 228 | #if (FSHIFT11 >= CCPU_SHIFT11) |
| 229 | p->p_pctcpu += (phz == 100)? |
| 230 | ((fixpt_t) p->p_cpticks) << (FSHIFT11 - CCPU_SHIFT11): |
| 231 | 100 * (((fixpt_t) p->p_cpticks) |
| 232 | << (FSHIFT11 - CCPU_SHIFT11)) / phz; |
| 233 | #else |
| 234 | p->p_pctcpu += ((FSCALE(1<<11) - ccpu) * |
| 235 | (p->p_cpticks * FSCALE(1<<11) / phz)) >> FSHIFT11; |
| 236 | #endif |
| 237 | p->p_cpticks = 0; |
| 238 | newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu)(((loadfac) * (p->p_estcpu)) / ((loadfac) + (1<<11)) ); |
| 239 | setpriority(p, newcpu, p->p_p->ps_nice); |
| 240 | |
| 241 | if (p->p_stat == SRUN2 && |
| 242 | (p->p_runpri / SCHED_PPQ(128 / 32)) != (p->p_usrpri / SCHED_PPQ(128 / 32))) { |
| 243 | remrunqueue(p); |
| 244 | setrunqueue(p->p_cpu, p, p->p_usrpri); |
| 245 | } |
| 246 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); |
| 247 | } |
| 248 | uvm_meter(); |
| 249 | wakeup(&lbolt); |
| 250 | timeout_add_sec(to, 1); |
| 251 | } |
| 252 | |
| 253 | /* |
| 254 | * Recalculate the priority of a process after it has slept for a while. |
| 255 | * For all load averages >= 1 and max p_estcpu of 255, sleeping for at |
| 256 | * least six times the loadfactor will decay p_estcpu to zero. |
| 257 | */ |
| 258 | uint32_t |
| 259 | decay_aftersleep(uint32_t estcpu, uint32_t slptime) |
| 260 | { |
| 261 | fixpt_t loadfac = loadfactor(averunnable.ldavg[0])(2 * (averunnable.ldavg[0])); |
| 262 | uint32_t newcpu; |
| 263 | |
| 264 | if (slptime > 5 * loadfac) |
| 265 | newcpu = 0; |
| 266 | else { |
| 267 | newcpu = estcpu; |
| 268 | slptime--; /* the first time was done in schedcpu */ |
| 269 | while (newcpu && --slptime) |
| 270 | newcpu = decay_cpu(loadfac, newcpu)(((loadfac) * (newcpu)) / ((loadfac) + (1<<11))); |
| 271 | |
| 272 | } |
| 273 | |
| 274 | return (newcpu); |
| 275 | } |
| 276 | |
| 277 | /* |
| 278 | * General yield call. Puts the current process back on its run queue and |
| 279 | * performs a voluntary context switch. |
| 280 | */ |
| 281 | void |
| 282 | yield(void) |
| 283 | { |
| 284 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
| 285 | int s; |
| 286 | |
| 287 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); |
| 288 | setrunqueue(p->p_cpu, p, p->p_usrpri); |
| 289 | p->p_ru.ru_nvcsw++; |
| 290 | mi_switch(); |
| 291 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); |
| 292 | } |
| 293 | |
| 294 | /* |
| 295 | * General preemption call. Puts the current process back on its run queue |
| 296 | * and performs an involuntary context switch. If a process is supplied, |
| 297 | * we switch to that process. Otherwise, we use the normal process selection |
| 298 | * criteria. |
| 299 | */ |
| 300 | void |
| 301 | preempt(void) |
| 302 | { |
| 303 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
| 304 | int s; |
| 305 | |
| 306 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); |
| 307 | setrunqueue(p->p_cpu, p, p->p_usrpri); |
| 308 | p->p_ru.ru_nivcsw++; |
| 309 | mi_switch(); |
| 310 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); |
| 311 | } |
| 312 | |
| 313 | void |
| 314 | mi_switch(void) |
| 315 | { |
| 316 | struct schedstate_percpu *spc = &curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_schedstate; |
| 317 | struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})->ci_curproc; |
| 318 | struct proc *nextproc; |
| 319 | struct process *pr = p->p_p; |
| 320 | struct timespec ts; |
| 321 | #ifdef MULTIPROCESSOR1 |
| 322 | int hold_count; |
| 323 | int sched_count; |
| 324 | #endif |
| 325 | |
| 326 | assertwaitok(); |
| 327 | KASSERT(p->p_stat != SONPROC)((p->p_stat != 7) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/sched_bsd.c" , 327, "p->p_stat != SONPROC")); |
| 328 | |
| 329 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/sched_bsd.c", 329, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); |
| 330 | |
| 331 | #ifdef MULTIPROCESSOR1 |
| 332 | /* |
| 333 | * Release the kernel_lock, as we are about to yield the CPU. |
| 334 | */ |
| 335 | sched_count = __mp_release_all_but_one(&sched_lock); |
| 336 | if (_kernel_lock_held()) |
| 337 | hold_count = __mp_release_all(&kernel_lock); |
| 338 | else |
| 339 | hold_count = 0; |
| 340 | #endif |
| 341 | |
| 342 | /* |
| 343 | * Compute the amount of time during which the current |
| 344 | * process was running, and add that to its total so far. |
| 345 | */ |
| 346 | nanouptime(&ts); |
| 347 | if (timespeccmp(&ts, &spc->spc_runtime, <)(((&ts)->tv_sec == (&spc->spc_runtime)->tv_sec ) ? ((&ts)->tv_nsec < (&spc->spc_runtime)-> tv_nsec) : ((&ts)->tv_sec < (&spc->spc_runtime )->tv_sec))) { |
| 348 | #if 0 |
| 349 | printf("uptime is not monotonic! " |
| 350 | "ts=%lld.%09lu, runtime=%lld.%09lu\n", |
| 351 | (long long)tv.tv_sec, tv.tv_nsec, |
| 352 | (long long)spc->spc_runtime.tv_sec, |
| 353 | spc->spc_runtime.tv_nsec); |
| 354 | #endif |
| 355 | } else { |
| 356 | timespecsub(&ts, &spc->spc_runtime, &ts)do { (&ts)->tv_sec = (&ts)->tv_sec - (&spc-> spc_runtime)->tv_sec; (&ts)->tv_nsec = (&ts)-> tv_nsec - (&spc->spc_runtime)->tv_nsec; if ((&ts )->tv_nsec < 0) { (&ts)->tv_sec--; (&ts)-> tv_nsec += 1000000000L; } } while (0); |
| 357 | timespecadd(&p->p_rtime, &ts, &p->p_rtime)do { (&p->p_rtime)->tv_sec = (&p->p_rtime)-> tv_sec + (&ts)->tv_sec; (&p->p_rtime)->tv_nsec = (&p->p_rtime)->tv_nsec + (&ts)->tv_nsec; if ((&p->p_rtime)->tv_nsec >= 1000000000L) { (& p->p_rtime)->tv_sec++; (&p->p_rtime)->tv_nsec -= 1000000000L; } } while (0); |
| 358 | } |
| 359 | |
| 360 | /* add the time counts for this thread to the process's total */ |
| 361 | tuagg_unlocked(pr, p); |
| 362 | |
| 363 | /* |
| 364 | * Process is about to yield the CPU; clear the appropriate |
| 365 | * scheduling flags. |
| 366 | */ |
| 367 | atomic_clearbits_intx86_atomic_clearbits_u32(&spc->spc_schedflags, SPCF_SWITCHCLEAR(0x0001|0x0002)); |
| 368 | |
| 369 | nextproc = sched_chooseproc(); |
| 370 | |
| 371 | if (p != nextproc) { |
| 372 | uvmexp.swtch++; |
| 373 | TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,do { extern struct dt_probe (dt_static_sched_off__cpu); struct dt_probe *dtp = &(dt_static_sched_off__cpu); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, nextproc->p_tid + 100000, nextproc->p_p->ps_pid); } } while (0) |
| 374 | nextproc->p_p->ps_pid)do { extern struct dt_probe (dt_static_sched_off__cpu); struct dt_probe *dtp = &(dt_static_sched_off__cpu); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, nextproc->p_tid + 100000, nextproc->p_p->ps_pid); } } while (0); |
| 375 | cpu_switchto(p, nextproc); |
| 376 | TRACEPOINT(sched, on__cpu, NULL)do { extern struct dt_probe (dt_static_sched_on__cpu); struct dt_probe *dtp = &(dt_static_sched_on__cpu); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, ((void *)0)); } } while (0); |
| 377 | } else { |
| 378 | TRACEPOINT(sched, remain__cpu, NULL)do { extern struct dt_probe (dt_static_sched_remain__cpu); struct dt_probe *dtp = &(dt_static_sched_remain__cpu); struct dt_provider *dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->dtp_recording ) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, ((void *)0)); } } while (0); |
| 379 | p->p_stat = SONPROC7; |
| 380 | } |
| 381 | |
| 382 | clear_resched(curcpu())(({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}))->ci_want_resched = 0; |
| 383 | |
| 384 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/sched_bsd.c", 384, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); |
| 385 | |
| 386 | /* |
| 387 | * To preserve lock ordering, we need to release the sched lock |
| 388 | * and grab it after we grab the big lock. |
| 389 | * In the future, when the sched lock isn't recursive, we'll |
| 390 | * just release it here. |
| 391 | */ |
| 392 | #ifdef MULTIPROCESSOR1 |
| 393 | __mp_unlock(&sched_lock); |
| 394 | #endif |
| 395 | |
| 396 | SCHED_ASSERT_UNLOCKED()do { ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci ; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})) == 0) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/kern/sched_bsd.c", 396, "__mp_lock_held(&sched_lock, curcpu()) == 0" )); } while (0); |
| 397 | |
| 398 | smr_idle(); |
| 399 | |
| 400 | /* |
| 401 | * We're running again; record our new start time. We might |
| 402 | * be running on a new CPU now, so don't use the cache'd |
| 403 | * schedstate_percpu pointer. |
| 404 | */ |
| 405 | KASSERT(p->p_cpu == curcpu())((p->p_cpu == ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self ))); __ci;})) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/kern/sched_bsd.c" , 405, "p->p_cpu == curcpu()")); |
| 406 | |
| 407 | nanouptime(&p->p_cpu->ci_schedstate.spc_runtime); |
| 408 | |
| 409 | #ifdef MULTIPROCESSOR1 |
| 410 | /* |
| 411 | * Reacquire the kernel_lock now. We do this after we've |
| 412 | * released the scheduler lock to avoid deadlock, and before |
| 413 | * we reacquire the interlock and the scheduler lock. |
| 414 | */ |
| 415 | if (hold_count) |
| 416 | __mp_acquire_count(&kernel_lock, hold_count); |
| 417 | __mp_acquire_count(&sched_lock, sched_count + 1); |
| 418 | #endif |
| 419 | } |
| 420 | |
| 421 | /* |
| 422 | * Change process state to be runnable, |
| 423 | * placing it on the run queue. |
| 424 | */ |
| 425 | void |
| 426 | setrunnable(struct proc *p) |
| 427 | { |
| 428 | struct process *pr = p->p_p; |
| 429 | u_char prio; |
| 430 | |
| 431 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/sched_bsd.c", 431, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); |
| 432 | |
| 433 | switch (p->p_stat) { |
| 434 | case 0: |
| 435 | case SRUN2: |
| 436 | case SONPROC7: |
| 437 | case SDEAD6: |
| 438 | case SIDL1: |
| 439 | default: |
| 440 | panic("setrunnable"); |
| 441 | case SSTOP4: |
| 442 | /* |
| 443 | * If we're being traced (possibly because someone attached us |
| 444 | * while we were stopped), check for a signal from the debugger. |
| 445 | */ |
| 446 | if ((pr->ps_flags & PS_TRACED0x00000200) != 0 && pr->ps_xsig != 0) |
| 447 | atomic_setbits_intx86_atomic_setbits_u32(&p->p_siglist, sigmask(pr->ps_xsig)(1U << ((pr->ps_xsig)-1))); |
| 448 | prio = p->p_usrpri; |
| 449 | unsleep(p); |
| 450 | break; |
| 451 | case SSLEEP3: |
| 452 | prio = p->p_slppri; |
| 453 | unsleep(p); /* e.g. when sending signals */ |
| 454 | break; |
| 455 | } |
| 456 | setrunqueue(NULL((void *)0), p, prio); |
| 457 | if (p->p_slptime > 1) { |
| 458 | uint32_t newcpu; |
| 459 | |
| 460 | newcpu = decay_aftersleep(p->p_estcpu, p->p_slptime); |
| 461 | setpriority(p, newcpu, pr->ps_nice); |
| 462 | } |
| 463 | p->p_slptime = 0; |
| 464 | } |
| 465 | |
| 466 | /* |
| 467 | * Compute the priority of a process. |
| 468 | */ |
| 469 | void |
| 470 | setpriority(struct proc *p, uint32_t newcpu, uint8_t nice) |
| 471 | { |
| 472 | unsigned int newprio; |
| 473 | |
| 474 | newprio = min((PUSER50 + newcpu + NICE_WEIGHT2 * (nice - NZERO20)), MAXPRI127); |
| 475 | |
| 476 | SCHED_ASSERT_LOCKED()do { do { if (splassert_ctl > 0) { splassert_check(0xc, __func__ ); } } while (0); ((__mp_lock_held(&sched_lock, ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;}))) ? (void)0 : __assert( "diagnostic ", "/usr/src/sys/kern/sched_bsd.c", 476, "__mp_lock_held(&sched_lock, curcpu())" )); } while (0); |
| 477 | p->p_estcpu = newcpu; |
| 478 | p->p_usrpri = newprio; |
| 479 | } |
| 480 | |
| 481 | /* |
| 482 | * We adjust the priority of the current process. The priority of a process |
| 483 | * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) |
| 484 | * is increased here. The formula for computing priorities (in kern_synch.c) |
| 485 | * will compute a different value each time p_estcpu increases. This can |
| 486 | * cause a switch, but unless the priority crosses a PPQ boundary the actual |
| 487 | * queue will not change. The cpu usage estimator ramps up quite quickly |
| 488 | * when the process is running (linearly), and decays away exponentially, at |
| 489 | * a rate which is proportionally slower when the system is busy. The basic |
| 490 | * principle is that the system will 90% forget that the process used a lot |
| 491 | * of CPU time in 5 * loadav seconds. This causes the system to favor |
| 492 | * processes which haven't run much recently, and to round-robin among other |
| 493 | * processes. |
| 494 | */ |
| 495 | void |
| 496 | schedclock(struct proc *p) |
| 497 | { |
| 498 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); |
| 499 | struct schedstate_percpu *spc = &ci->ci_schedstate; |
| 500 | uint32_t newcpu; |
| 501 | int s; |
| 502 | |
| 503 | if (p == spc->spc_idleproc || spc->spc_spinning) |
| 504 | return; |
| 505 | |
| 506 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); |
| 507 | newcpu = ESTCPULIM(p->p_estcpu + 1)min((p->p_estcpu + 1), 2 * 20 - (128 / 32)); |
| 508 | setpriority(p, newcpu, p->p_p->ps_nice); |
| 509 | SCHED_UNLOCK(s)do { __mp_unlock(&sched_lock); spllower(s); } while ( 0); |
| 510 | } |
| 511 | |
| 512 | void (*cpu_setperf)(int); |
| 513 | |
| 514 | #define PERFPOL_MANUAL0 0 |
| 515 | #define PERFPOL_AUTO1 1 |
| 516 | #define PERFPOL_HIGH2 2 |
| 517 | int perflevel = 100; |
| 518 | int perfpolicy = PERFPOL_AUTO1; |
| 519 | |
| 520 | #ifndef SMALL_KERNEL |
| 521 | /* |
| 522 | * The code below handles CPU throttling. |
| 523 | */ |
| 524 | #include <sys/sysctl.h> |
| 525 | |
| 526 | void setperf_auto(void *); |
| 527 | struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL){ .to_list = { ((void *)0), ((void *)0) }, .to_abstime = { .tv_sec = 0, .tv_nsec = 0 }, .to_func = ((setperf_auto)), .to_arg = ( (((void *)0))), .to_time = 0, .to_flags = (0) | 0x04, .to_kclock = ((-1)) }; |
| 528 | extern int hw_power; |
| 529 | |
| 530 | void |
| 531 | setperf_auto(void *v) |
| 532 | { |
| 533 | static uint64_t *idleticks, *totalticks; |
| 534 | static int downbeats; |
| 535 | int i, j = 0; |
| 536 | int speedup = 0; |
| 537 | CPU_INFO_ITERATORint cii; |
| 538 | struct cpu_info *ci; |
| 539 | uint64_t idle, total, allidle = 0, alltotal = 0; |
| 540 | |
| 541 | if (perfpolicy != PERFPOL_AUTO1) |
| 542 | return; |
| 543 | |
| 544 | if (cpu_setperf == NULL((void *)0)) |
| 545 | return; |
| 546 | |
| 547 | if (hw_power) { |
| 548 | speedup = 1; |
Value stored to 'speedup' is never read | |
| 549 | goto faster; |
| 550 | } |
| 551 | |
| 552 | if (!idleticks) |
| 553 | if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks), |
| 554 | M_DEVBUF2, M_NOWAIT0x0002 | M_ZERO0x0008))) |
| 555 | return; |
| 556 | if (!totalticks) |
| 557 | if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks), |
| 558 | M_DEVBUF2, M_NOWAIT0x0002 | M_ZERO0x0008))) { |
| 559 | free(idleticks, M_DEVBUF2, |
| 560 | sizeof(*idleticks) * ncpusfound); |
| 561 | return; |
| 562 | } |
| 563 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { |
| 564 | if (!cpu_is_online(ci)) |
| 565 | continue; |
| 566 | total = 0; |
| 567 | for (i = 0; i < CPUSTATES6; i++) { |
| 568 | total += ci->ci_schedstate.spc_cp_time[i]; |
| 569 | } |
| 570 | total -= totalticks[j]; |
| 571 | idle = ci->ci_schedstate.spc_cp_time[CP_IDLE5] - idleticks[j]; |
| 572 | if (idle < total / 3) |
| 573 | speedup = 1; |
| 574 | alltotal += total; |
| 575 | allidle += idle; |
| 576 | idleticks[j] += idle; |
| 577 | totalticks[j] += total; |
| 578 | j++; |
| 579 | } |
| 580 | if (allidle < alltotal / 2) |
| 581 | speedup = 1; |
| 582 | if (speedup) |
| 583 | downbeats = 5; |
| 584 | |
| 585 | if (speedup && perflevel != 100) { |
| 586 | faster: |
| 587 | perflevel = 100; |
| 588 | cpu_setperf(perflevel); |
| 589 | } else if (!speedup && perflevel != 0 && --downbeats <= 0) { |
| 590 | perflevel = 0; |
| 591 | cpu_setperf(perflevel); |
| 592 | } |
| 593 | |
| 594 | timeout_add_msec(&setperf_to, 100); |
| 595 | } |
| 596 | |
| 597 | int |
| 598 | sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen) |
| 599 | { |
| 600 | int err; |
| 601 | |
| 602 | if (!cpu_setperf) |
| 603 | return EOPNOTSUPP45; |
| 604 | |
| 605 | if (perfpolicy != PERFPOL_MANUAL0) |
| 606 | return sysctl_rdint(oldp, oldlenp, newp, perflevel); |
| 607 | |
| 608 | err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, |
| 609 | &perflevel, 0, 100); |
| 610 | if (err) |
| 611 | return err; |
| 612 | |
| 613 | if (newp != NULL((void *)0)) |
| 614 | cpu_setperf(perflevel); |
| 615 | |
| 616 | return 0; |
| 617 | } |
| 618 | |
| 619 | int |
| 620 | sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen) |
| 621 | { |
| 622 | char policy[32]; |
| 623 | int err; |
| 624 | |
| 625 | if (!cpu_setperf) |
| 626 | return EOPNOTSUPP45; |
| 627 | |
| 628 | switch (perfpolicy) { |
| 629 | case PERFPOL_MANUAL0: |
| 630 | strlcpy(policy, "manual", sizeof(policy)); |
| 631 | break; |
| 632 | case PERFPOL_AUTO1: |
| 633 | strlcpy(policy, "auto", sizeof(policy)); |
| 634 | break; |
| 635 | case PERFPOL_HIGH2: |
| 636 | strlcpy(policy, "high", sizeof(policy)); |
| 637 | break; |
| 638 | default: |
| 639 | strlcpy(policy, "unknown", sizeof(policy)); |
| 640 | break; |
| 641 | } |
| 642 | |
| 643 | if (newp == NULL((void *)0)) |
| 644 | return sysctl_rdstring(oldp, oldlenp, newp, policy); |
| 645 | |
| 646 | err = sysctl_string(oldp, oldlenp, newp, newlen, policy, sizeof(policy)); |
| 647 | if (err) |
| 648 | return err; |
| 649 | if (strcmp(policy, "manual") == 0) |
| 650 | perfpolicy = PERFPOL_MANUAL0; |
| 651 | else if (strcmp(policy, "auto") == 0) |
| 652 | perfpolicy = PERFPOL_AUTO1; |
| 653 | else if (strcmp(policy, "high") == 0) |
| 654 | perfpolicy = PERFPOL_HIGH2; |
| 655 | else |
| 656 | return EINVAL22; |
| 657 | |
| 658 | if (perfpolicy == PERFPOL_AUTO1) { |
| 659 | timeout_add_msec(&setperf_to, 200); |
| 660 | } else if (perfpolicy == PERFPOL_HIGH2) { |
| 661 | perflevel = 100; |
| 662 | cpu_setperf(perflevel); |
| 663 | } |
| 664 | return 0; |
| 665 | } |
| 666 | #endif |
| 667 | |
| 668 | void |
| 669 | scheduler_start(void) |
| 670 | { |
| 671 | static struct timeout schedcpu_to; |
| 672 | |
| 673 | /* |
| 674 | * We avoid polluting the global namespace by keeping the scheduler |
| 675 | * timeouts static in this function. |
| 676 | * We setup the timeout here and kick schedcpu once to make it do |
| 677 | * its job. |
| 678 | */ |
| 679 | timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); |
| 680 | |
| 681 | rrticks_init = hz / 10; |
| 682 | schedcpu(&schedcpu_to); |
| 683 | |
| 684 | #ifndef SMALL_KERNEL |
| 685 | if (perfpolicy == PERFPOL_AUTO1) |
| 686 | timeout_add_msec(&setperf_to, 200); |
| 687 | #endif |
| 688 | } |
| 689 |