File: | kern/kern_tc.c |
Warning: | line 905, column 35 The result of the left shift is undefined because the left operand is negative |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: kern_tc.c,v 1.75 2021/10/24 00:02:25 jsg Exp $ */ | ||||
2 | |||||
3 | /* | ||||
4 | * Copyright (c) 2000 Poul-Henning Kamp <phk@FreeBSD.org> | ||||
5 | * | ||||
6 | * Permission to use, copy, modify, and distribute this software for any | ||||
7 | * purpose with or without fee is hereby granted, provided that the above | ||||
8 | * copyright notice and this permission notice appear in all copies. | ||||
9 | * | ||||
10 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | ||||
11 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | ||||
12 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | ||||
13 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | ||||
14 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | ||||
15 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | ||||
16 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | ||||
17 | */ | ||||
18 | |||||
19 | /* | ||||
20 | * If we meet some day, and you think this stuff is worth it, you | ||||
21 | * can buy me a beer in return. Poul-Henning Kamp | ||||
22 | */ | ||||
23 | |||||
24 | #include <sys/param.h> | ||||
25 | #include <sys/atomic.h> | ||||
26 | #include <sys/kernel.h> | ||||
27 | #include <sys/mutex.h> | ||||
28 | #include <sys/rwlock.h> | ||||
29 | #include <sys/stdint.h> | ||||
30 | #include <sys/timeout.h> | ||||
31 | #include <sys/sysctl.h> | ||||
32 | #include <sys/syslog.h> | ||||
33 | #include <sys/systm.h> | ||||
34 | #include <sys/timetc.h> | ||||
35 | #include <sys/queue.h> | ||||
36 | #include <sys/malloc.h> | ||||
37 | |||||
38 | u_int dummy_get_timecount(struct timecounter *); | ||||
39 | |||||
40 | int sysctl_tc_hardware(void *, size_t *, void *, size_t); | ||||
41 | int sysctl_tc_choice(void *, size_t *, void *, size_t); | ||||
42 | |||||
43 | /* | ||||
44 | * Implement a dummy timecounter which we can use until we get a real one | ||||
45 | * in the air. This allows the console and other early stuff to use | ||||
46 | * time services. | ||||
47 | */ | ||||
48 | |||||
49 | u_int | ||||
50 | dummy_get_timecount(struct timecounter *tc) | ||||
51 | { | ||||
52 | static u_int now; | ||||
53 | |||||
54 | return atomic_inc_int_nv(&now)_atomic_add_int_nv((&now), 1); | ||||
55 | } | ||||
56 | |||||
57 | static struct timecounter dummy_timecounter = { | ||||
58 | .tc_get_timecount = dummy_get_timecount, | ||||
59 | .tc_poll_pps = NULL((void *)0), | ||||
60 | .tc_counter_mask = ~0u, | ||||
61 | .tc_frequency = 1000000, | ||||
62 | .tc_name = "dummy", | ||||
63 | .tc_quality = -1000000, | ||||
64 | .tc_priv = NULL((void *)0), | ||||
65 | .tc_user = 0, | ||||
66 | }; | ||||
67 | |||||
68 | /* | ||||
69 | * Locks used to protect struct members, global variables in this file: | ||||
70 | * I immutable after initialization | ||||
71 | * T tc_lock | ||||
72 | * W windup_mtx | ||||
73 | */ | ||||
74 | |||||
75 | struct timehands { | ||||
76 | /* These fields must be initialized by the driver. */ | ||||
77 | struct timecounter *th_counter; /* [W] */ | ||||
78 | int64_t th_adjtimedelta; /* [T,W] */ | ||||
79 | struct bintime th_next_ntp_update; /* [T,W] */ | ||||
80 | int64_t th_adjustment; /* [W] */ | ||||
81 | u_int64_t th_scale; /* [W] */ | ||||
82 | u_int th_offset_count; /* [W] */ | ||||
83 | struct bintime th_boottime; /* [T,W] */ | ||||
84 | struct bintime th_offset; /* [W] */ | ||||
85 | struct bintime th_naptime; /* [W] */ | ||||
86 | struct timeval th_microtime; /* [W] */ | ||||
87 | struct timespec th_nanotime; /* [W] */ | ||||
88 | /* Fields not to be copied in tc_windup start with th_generation. */ | ||||
89 | volatile u_int th_generation; /* [W] */ | ||||
90 | struct timehands *th_next; /* [I] */ | ||||
91 | }; | ||||
92 | |||||
93 | static struct timehands th0; | ||||
94 | static struct timehands th1 = { | ||||
95 | .th_next = &th0 | ||||
96 | }; | ||||
97 | static struct timehands th0 = { | ||||
98 | .th_counter = &dummy_timecounter, | ||||
99 | .th_scale = UINT64_MAX0xffffffffffffffffULL / 1000000, | ||||
100 | .th_offset = { .sec = 1, .frac = 0 }, | ||||
101 | .th_generation = 1, | ||||
102 | .th_next = &th1 | ||||
103 | }; | ||||
104 | |||||
105 | struct rwlock tc_lock = RWLOCK_INITIALIZER("tc_lock"){ 0, "tc_lock" }; | ||||
106 | |||||
107 | /* | ||||
108 | * tc_windup() must be called before leaving this mutex. | ||||
109 | */ | ||||
110 | struct mutex windup_mtx = MUTEX_INITIALIZER(IPL_CLOCK){ ((void *)0), ((((0xc)) > 0x0 && ((0xc)) < 0x9 ) ? 0x9 : ((0xc))), 0x0 }; | ||||
111 | |||||
112 | static struct timehands *volatile timehands = &th0; /* [W] */ | ||||
113 | struct timecounter *timecounter = &dummy_timecounter; /* [T] */ | ||||
114 | static SLIST_HEAD(, timecounter)struct { struct timecounter *slh_first; } tc_list = SLIST_HEAD_INITIALIZER(tc_list){ ((void *)0) }; | ||||
115 | |||||
116 | /* | ||||
117 | * These are updated from tc_windup(). They are useful when | ||||
118 | * examining kernel core dumps. | ||||
119 | */ | ||||
120 | volatile time_t naptime = 0; | ||||
121 | volatile time_t time_second = 1; | ||||
122 | volatile time_t time_uptime = 0; | ||||
123 | |||||
124 | static int timestepwarnings; | ||||
125 | |||||
126 | void ntp_update_second(struct timehands *); | ||||
127 | void tc_windup(struct bintime *, struct bintime *, int64_t *); | ||||
128 | |||||
129 | /* | ||||
130 | * Return the difference between the timehands' counter value now and what | ||||
131 | * was when we copied it to the timehands' offset_count. | ||||
132 | */ | ||||
133 | static __inline u_int | ||||
134 | tc_delta(struct timehands *th) | ||||
135 | { | ||||
136 | struct timecounter *tc; | ||||
137 | |||||
138 | tc = th->th_counter; | ||||
139 | return ((tc->tc_get_timecount(tc) - th->th_offset_count) & | ||||
140 | tc->tc_counter_mask); | ||||
141 | } | ||||
142 | |||||
143 | /* | ||||
144 | * Functions for reading the time. We have to loop until we are sure that | ||||
145 | * the timehands that we operated on was not updated under our feet. See | ||||
146 | * the comment in <sys/time.h> for a description of these functions. | ||||
147 | */ | ||||
148 | |||||
149 | void | ||||
150 | binboottime(struct bintime *bt) | ||||
151 | { | ||||
152 | struct timehands *th; | ||||
153 | u_int gen; | ||||
154 | |||||
155 | do { | ||||
156 | th = timehands; | ||||
157 | gen = th->th_generation; | ||||
158 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
159 | *bt = th->th_boottime; | ||||
160 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
161 | } while (gen == 0 || gen != th->th_generation); | ||||
162 | } | ||||
163 | |||||
164 | void | ||||
165 | microboottime(struct timeval *tvp) | ||||
166 | { | ||||
167 | struct bintime bt; | ||||
168 | |||||
169 | binboottime(&bt); | ||||
170 | BINTIME_TO_TIMEVAL(&bt, tvp); | ||||
171 | } | ||||
172 | |||||
173 | void | ||||
174 | nanoboottime(struct timespec *tsp) | ||||
175 | { | ||||
176 | struct bintime bt; | ||||
177 | |||||
178 | binboottime(&bt); | ||||
179 | BINTIME_TO_TIMESPEC(&bt, tsp); | ||||
180 | } | ||||
181 | |||||
182 | void | ||||
183 | binuptime(struct bintime *bt) | ||||
184 | { | ||||
185 | struct timehands *th; | ||||
186 | u_int gen; | ||||
187 | |||||
188 | do { | ||||
189 | th = timehands; | ||||
190 | gen = th->th_generation; | ||||
191 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
192 | *bt = th->th_offset; | ||||
193 | bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt); | ||||
194 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
195 | } while (gen == 0 || gen != th->th_generation); | ||||
196 | } | ||||
197 | |||||
198 | void | ||||
199 | getbinuptime(struct bintime *bt) | ||||
200 | { | ||||
201 | struct timehands *th; | ||||
202 | u_int gen; | ||||
203 | |||||
204 | do { | ||||
205 | th = timehands; | ||||
206 | gen = th->th_generation; | ||||
207 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
208 | *bt = th->th_offset; | ||||
209 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
210 | } while (gen == 0 || gen != th->th_generation); | ||||
211 | } | ||||
212 | |||||
213 | void | ||||
214 | nanouptime(struct timespec *tsp) | ||||
215 | { | ||||
216 | struct bintime bt; | ||||
217 | |||||
218 | binuptime(&bt); | ||||
219 | BINTIME_TO_TIMESPEC(&bt, tsp); | ||||
220 | } | ||||
221 | |||||
222 | void | ||||
223 | microuptime(struct timeval *tvp) | ||||
224 | { | ||||
225 | struct bintime bt; | ||||
226 | |||||
227 | binuptime(&bt); | ||||
228 | BINTIME_TO_TIMEVAL(&bt, tvp); | ||||
229 | } | ||||
230 | |||||
231 | time_t | ||||
232 | getuptime(void) | ||||
233 | { | ||||
234 | #if defined(__LP64__1) | ||||
235 | return time_uptime; /* atomic */ | ||||
236 | #else | ||||
237 | time_t now; | ||||
238 | struct timehands *th; | ||||
239 | u_int gen; | ||||
240 | |||||
241 | do { | ||||
242 | th = timehands; | ||||
243 | gen = th->th_generation; | ||||
244 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
245 | now = th->th_offset.sec; | ||||
246 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
247 | } while (gen == 0 || gen != th->th_generation); | ||||
248 | |||||
249 | return now; | ||||
250 | #endif | ||||
251 | } | ||||
252 | |||||
253 | uint64_t | ||||
254 | nsecuptime(void) | ||||
255 | { | ||||
256 | struct bintime bt; | ||||
257 | |||||
258 | binuptime(&bt); | ||||
259 | return BINTIME_TO_NSEC(&bt); | ||||
260 | } | ||||
261 | |||||
262 | uint64_t | ||||
263 | getnsecuptime(void) | ||||
264 | { | ||||
265 | struct bintime bt; | ||||
266 | |||||
267 | getbinuptime(&bt); | ||||
268 | return BINTIME_TO_NSEC(&bt); | ||||
269 | } | ||||
270 | |||||
271 | void | ||||
272 | binruntime(struct bintime *bt) | ||||
273 | { | ||||
274 | struct timehands *th; | ||||
275 | u_int gen; | ||||
276 | |||||
277 | do { | ||||
278 | th = timehands; | ||||
279 | gen = th->th_generation; | ||||
280 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
281 | bintimeaddfrac(&th->th_offset, th->th_scale * tc_delta(th), bt); | ||||
282 | bintimesub(bt, &th->th_naptime, bt); | ||||
283 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
284 | } while (gen == 0 || gen != th->th_generation); | ||||
285 | } | ||||
286 | |||||
287 | void | ||||
288 | nanoruntime(struct timespec *ts) | ||||
289 | { | ||||
290 | struct bintime bt; | ||||
291 | |||||
292 | binruntime(&bt); | ||||
293 | BINTIME_TO_TIMESPEC(&bt, ts); | ||||
294 | } | ||||
295 | |||||
296 | void | ||||
297 | bintime(struct bintime *bt) | ||||
298 | { | ||||
299 | struct timehands *th; | ||||
300 | u_int gen; | ||||
301 | |||||
302 | do { | ||||
303 | th = timehands; | ||||
304 | gen = th->th_generation; | ||||
305 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
306 | *bt = th->th_offset; | ||||
307 | bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt); | ||||
308 | bintimeadd(bt, &th->th_boottime, bt); | ||||
309 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
310 | } while (gen == 0 || gen != th->th_generation); | ||||
311 | } | ||||
312 | |||||
313 | void | ||||
314 | nanotime(struct timespec *tsp) | ||||
315 | { | ||||
316 | struct bintime bt; | ||||
317 | |||||
318 | bintime(&bt); | ||||
319 | BINTIME_TO_TIMESPEC(&bt, tsp); | ||||
320 | } | ||||
321 | |||||
322 | void | ||||
323 | microtime(struct timeval *tvp) | ||||
324 | { | ||||
325 | struct bintime bt; | ||||
326 | |||||
327 | bintime(&bt); | ||||
328 | BINTIME_TO_TIMEVAL(&bt, tvp); | ||||
329 | } | ||||
330 | |||||
331 | time_t | ||||
332 | gettime(void) | ||||
333 | { | ||||
334 | #if defined(__LP64__1) | ||||
335 | return time_second; /* atomic */ | ||||
336 | #else | ||||
337 | time_t now; | ||||
338 | struct timehands *th; | ||||
339 | u_int gen; | ||||
340 | |||||
341 | do { | ||||
342 | th = timehands; | ||||
343 | gen = th->th_generation; | ||||
344 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
345 | now = th->th_microtime.tv_sec; | ||||
346 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
347 | } while (gen == 0 || gen != th->th_generation); | ||||
348 | |||||
349 | return now; | ||||
350 | #endif | ||||
351 | } | ||||
352 | |||||
353 | void | ||||
354 | getnanouptime(struct timespec *tsp) | ||||
355 | { | ||||
356 | struct timehands *th; | ||||
357 | u_int gen; | ||||
358 | |||||
359 | do { | ||||
360 | th = timehands; | ||||
361 | gen = th->th_generation; | ||||
362 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
363 | BINTIME_TO_TIMESPEC(&th->th_offset, tsp); | ||||
364 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
365 | } while (gen == 0 || gen != th->th_generation); | ||||
366 | } | ||||
367 | |||||
368 | void | ||||
369 | getmicrouptime(struct timeval *tvp) | ||||
370 | { | ||||
371 | struct timehands *th; | ||||
372 | u_int gen; | ||||
373 | |||||
374 | do { | ||||
375 | th = timehands; | ||||
376 | gen = th->th_generation; | ||||
377 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
378 | BINTIME_TO_TIMEVAL(&th->th_offset, tvp); | ||||
379 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
380 | } while (gen == 0 || gen != th->th_generation); | ||||
381 | } | ||||
382 | |||||
383 | void | ||||
384 | getnanotime(struct timespec *tsp) | ||||
385 | { | ||||
386 | struct timehands *th; | ||||
387 | u_int gen; | ||||
388 | |||||
389 | do { | ||||
390 | th = timehands; | ||||
391 | gen = th->th_generation; | ||||
392 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
393 | *tsp = th->th_nanotime; | ||||
394 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
395 | } while (gen == 0 || gen != th->th_generation); | ||||
396 | } | ||||
397 | |||||
398 | void | ||||
399 | getmicrotime(struct timeval *tvp) | ||||
400 | { | ||||
401 | struct timehands *th; | ||||
402 | u_int gen; | ||||
403 | |||||
404 | do { | ||||
405 | th = timehands; | ||||
406 | gen = th->th_generation; | ||||
407 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
408 | *tvp = th->th_microtime; | ||||
409 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
410 | } while (gen == 0 || gen != th->th_generation); | ||||
411 | } | ||||
412 | |||||
413 | /* | ||||
414 | * Initialize a new timecounter and possibly use it. | ||||
415 | */ | ||||
416 | void | ||||
417 | tc_init(struct timecounter *tc) | ||||
418 | { | ||||
419 | u_int64_t tmp; | ||||
420 | u_int u; | ||||
421 | |||||
422 | u = tc->tc_frequency / tc->tc_counter_mask; | ||||
423 | /* XXX: We need some margin here, 10% is a guess */ | ||||
424 | u *= 11; | ||||
425 | u /= 10; | ||||
426 | if (tc->tc_quality >= 0) { | ||||
427 | if (u > hz) { | ||||
428 | tc->tc_quality = -2000; | ||||
429 | printf("Timecounter \"%s\" frequency %lu Hz", | ||||
430 | tc->tc_name, (unsigned long)tc->tc_frequency); | ||||
431 | printf(" -- Insufficient hz, needs at least %u\n", u); | ||||
432 | } | ||||
433 | } | ||||
434 | |||||
435 | /* Determine the counter's precision. */ | ||||
436 | for (tmp = 1; (tmp & tc->tc_counter_mask) == 0; tmp <<= 1) | ||||
437 | continue; | ||||
438 | tc->tc_precision = tmp; | ||||
439 | |||||
440 | SLIST_INSERT_HEAD(&tc_list, tc, tc_next)do { (tc)->tc_next.sle_next = (&tc_list)->slh_first ; (&tc_list)->slh_first = (tc); } while (0); | ||||
441 | |||||
442 | /* | ||||
443 | * Never automatically use a timecounter with negative quality. | ||||
444 | * Even though we run on the dummy counter, switching here may be | ||||
445 | * worse since this timecounter may not be monotonic. | ||||
446 | */ | ||||
447 | if (tc->tc_quality < 0) | ||||
448 | return; | ||||
449 | if (tc->tc_quality < timecounter->tc_quality) | ||||
450 | return; | ||||
451 | if (tc->tc_quality == timecounter->tc_quality && | ||||
452 | tc->tc_frequency < timecounter->tc_frequency) | ||||
453 | return; | ||||
454 | (void)tc->tc_get_timecount(tc); | ||||
455 | enqueue_randomness(tc->tc_get_timecount(tc)); | ||||
456 | |||||
457 | timecounter = tc; | ||||
458 | } | ||||
459 | |||||
460 | /* Report the frequency of the current timecounter. */ | ||||
461 | u_int64_t | ||||
462 | tc_getfrequency(void) | ||||
463 | { | ||||
464 | return (timehands->th_counter->tc_frequency); | ||||
465 | } | ||||
466 | |||||
467 | /* Report the precision of the current timecounter. */ | ||||
468 | u_int64_t | ||||
469 | tc_getprecision(void) | ||||
470 | { | ||||
471 | return (timehands->th_counter->tc_precision); | ||||
472 | } | ||||
473 | |||||
474 | /* | ||||
475 | * Step our concept of UTC, aka the realtime clock. | ||||
476 | * This is done by modifying our estimate of when we booted. | ||||
477 | * | ||||
478 | * Any ongoing adjustment is meaningless after a clock jump, | ||||
479 | * so we zero adjtimedelta here as well. | ||||
480 | */ | ||||
481 | void | ||||
482 | tc_setrealtimeclock(const struct timespec *ts) | ||||
483 | { | ||||
484 | struct bintime boottime, old_utc, uptime, utc; | ||||
485 | struct timespec tmp; | ||||
486 | int64_t zero = 0; | ||||
487 | |||||
488 | TIMESPEC_TO_BINTIME(ts, &utc); | ||||
489 | |||||
490 | rw_enter_write(&tc_lock); | ||||
491 | mtx_enter(&windup_mtx); | ||||
492 | |||||
493 | binuptime(&uptime); | ||||
494 | bintimesub(&utc, &uptime, &boottime); | ||||
495 | bintimeadd(&timehands->th_boottime, &uptime, &old_utc); | ||||
496 | /* XXX fiddle all the little crinkly bits around the fiords... */ | ||||
497 | tc_windup(&boottime, NULL((void *)0), &zero); | ||||
498 | |||||
499 | mtx_leave(&windup_mtx); | ||||
500 | rw_exit_write(&tc_lock); | ||||
501 | |||||
502 | enqueue_randomness(ts->tv_sec); | ||||
503 | |||||
504 | if (timestepwarnings) { | ||||
505 | BINTIME_TO_TIMESPEC(&old_utc, &tmp); | ||||
506 | log(LOG_INFO6, "Time stepped from %lld.%09ld to %lld.%09ld\n", | ||||
507 | (long long)tmp.tv_sec, tmp.tv_nsec, | ||||
508 | (long long)ts->tv_sec, ts->tv_nsec); | ||||
509 | } | ||||
510 | } | ||||
511 | |||||
512 | /* | ||||
513 | * Step the monotonic and realtime clocks, triggering any timeouts that | ||||
514 | * should have occurred across the interval. | ||||
515 | */ | ||||
516 | void | ||||
517 | tc_setclock(const struct timespec *ts) | ||||
518 | { | ||||
519 | struct bintime new_naptime, old_naptime, uptime, utc; | ||||
520 | struct timespec tmp; | ||||
521 | static int first = 1; | ||||
522 | #ifndef SMALL_KERNEL | ||||
523 | struct bintime elapsed; | ||||
524 | long long adj_ticks; | ||||
525 | #endif | ||||
526 | |||||
527 | /* | ||||
528 | * When we're called for the first time, during boot when | ||||
529 | * the root partition is mounted, we need to set boottime. | ||||
530 | */ | ||||
531 | if (first) { | ||||
532 | tc_setrealtimeclock(ts); | ||||
533 | first = 0; | ||||
534 | return; | ||||
535 | } | ||||
536 | |||||
537 | enqueue_randomness(ts->tv_sec); | ||||
538 | |||||
539 | TIMESPEC_TO_BINTIME(ts, &utc); | ||||
540 | |||||
541 | mtx_enter(&windup_mtx); | ||||
542 | |||||
543 | bintimesub(&utc, &timehands->th_boottime, &uptime); | ||||
544 | old_naptime = timehands->th_naptime; | ||||
545 | /* XXX fiddle all the little crinkly bits around the fiords... */ | ||||
546 | tc_windup(NULL((void *)0), &uptime, NULL((void *)0)); | ||||
547 | new_naptime = timehands->th_naptime; | ||||
548 | |||||
549 | mtx_leave(&windup_mtx); | ||||
550 | |||||
551 | if (bintimecmp(&old_naptime, &new_naptime, ==)((&old_naptime)->sec == (&new_naptime)->sec ? ( &old_naptime)->frac == (&new_naptime)->frac : ( &old_naptime)->sec == (&new_naptime)->sec)) { | ||||
552 | BINTIME_TO_TIMESPEC(&uptime, &tmp); | ||||
553 | printf("%s: cannot rewind uptime to %lld.%09ld\n", | ||||
554 | __func__, (long long)tmp.tv_sec, tmp.tv_nsec); | ||||
555 | } | ||||
556 | |||||
557 | #ifndef SMALL_KERNEL | ||||
558 | /* convert the bintime to ticks */ | ||||
559 | bintimesub(&new_naptime, &old_naptime, &elapsed); | ||||
560 | adj_ticks = BINTIME_TO_NSEC(&elapsed) / tick_nsec; | ||||
561 | if (adj_ticks > 0) { | ||||
562 | if (adj_ticks > INT_MAX0x7fffffff) | ||||
563 | adj_ticks = INT_MAX0x7fffffff; | ||||
564 | timeout_adjust_ticks(adj_ticks); | ||||
565 | } | ||||
566 | #endif | ||||
567 | } | ||||
568 | |||||
569 | void | ||||
570 | tc_update_timekeep(void) | ||||
571 | { | ||||
572 | static struct timecounter *last_tc = NULL((void *)0); | ||||
573 | struct timehands *th; | ||||
574 | |||||
575 | MUTEX_ASSERT_LOCKED(&windup_mtx)do { if (((&windup_mtx)->mtx_owner != ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})) && !(panicstr || db_active)) panic("mutex %p not held in %s", (&windup_mtx ), __func__); } while (0); | ||||
576 | |||||
577 | if (timekeep == NULL((void *)0)) | ||||
578 | return; | ||||
579 | |||||
580 | th = timehands; | ||||
581 | timekeep->tk_generation = 0; | ||||
582 | membar_producer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
583 | timekeep->tk_scale = th->th_scale; | ||||
584 | timekeep->tk_offset_count = th->th_offset_count; | ||||
585 | timekeep->tk_offset = th->th_offset; | ||||
586 | timekeep->tk_naptime = th->th_naptime; | ||||
587 | timekeep->tk_boottime = th->th_boottime; | ||||
588 | if (last_tc != th->th_counter) { | ||||
589 | timekeep->tk_counter_mask = th->th_counter->tc_counter_mask; | ||||
590 | timekeep->tk_user = th->th_counter->tc_user; | ||||
591 | last_tc = th->th_counter; | ||||
592 | } | ||||
593 | membar_producer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
594 | timekeep->tk_generation = th->th_generation; | ||||
595 | |||||
596 | return; | ||||
597 | } | ||||
598 | |||||
599 | /* | ||||
600 | * Initialize the next struct timehands in the ring and make | ||||
601 | * it the active timehands. Along the way we might switch to a different | ||||
602 | * timecounter and/or do seconds processing in NTP. Slightly magic. | ||||
603 | */ | ||||
604 | void | ||||
605 | tc_windup(struct bintime *new_boottime, struct bintime *new_offset, | ||||
606 | int64_t *new_adjtimedelta) | ||||
607 | { | ||||
608 | struct bintime bt; | ||||
609 | struct timecounter *active_tc; | ||||
610 | struct timehands *th, *tho; | ||||
611 | u_int64_t scale; | ||||
612 | u_int delta, ncount, ogen; | ||||
613 | |||||
614 | if (new_boottime
| ||||
615 | rw_assert_wrlock(&tc_lock); | ||||
616 | MUTEX_ASSERT_LOCKED(&windup_mtx)do { if (((&windup_mtx)->mtx_owner != ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})) && !(panicstr || db_active)) panic("mutex %p not held in %s", (&windup_mtx ), __func__); } while (0); | ||||
617 | |||||
618 | active_tc = timecounter; | ||||
619 | |||||
620 | /* | ||||
621 | * Make the next timehands a copy of the current one, but do not | ||||
622 | * overwrite the generation or next pointer. While we update | ||||
623 | * the contents, the generation must be zero. | ||||
624 | */ | ||||
625 | tho = timehands; | ||||
626 | ogen = tho->th_generation; | ||||
627 | th = tho->th_next; | ||||
628 | th->th_generation = 0; | ||||
629 | membar_producer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
630 | memcpy(th, tho, offsetof(struct timehands, th_generation))__builtin_memcpy((th), (tho), (__builtin_offsetof(struct timehands , th_generation))); | ||||
631 | |||||
632 | /* | ||||
633 | * Capture a timecounter delta on the current timecounter and if | ||||
634 | * changing timecounters, a counter value from the new timecounter. | ||||
635 | * Update the offset fields accordingly. | ||||
636 | */ | ||||
637 | delta = tc_delta(th); | ||||
638 | if (th->th_counter != active_tc) | ||||
639 | ncount = active_tc->tc_get_timecount(active_tc); | ||||
640 | else | ||||
641 | ncount = 0; | ||||
642 | th->th_offset_count += delta; | ||||
643 | th->th_offset_count &= th->th_counter->tc_counter_mask; | ||||
644 | bintimeaddfrac(&th->th_offset, th->th_scale * delta, &th->th_offset); | ||||
645 | |||||
646 | /* | ||||
647 | * Ignore new offsets that predate the current offset. | ||||
648 | * If changing the offset, first increase the naptime | ||||
649 | * accordingly. | ||||
650 | */ | ||||
651 | if (new_offset
th->th_offset)->frac < (new_offset)->frac : (& th->th_offset)->sec < (new_offset)->sec)) { | ||||
652 | bintimesub(new_offset, &th->th_offset, &bt); | ||||
653 | bintimeadd(&th->th_naptime, &bt, &th->th_naptime); | ||||
654 | naptime = th->th_naptime.sec; | ||||
655 | th->th_offset = *new_offset; | ||||
656 | } | ||||
657 | |||||
658 | #ifdef notyet | ||||
659 | /* | ||||
660 | * Hardware latching timecounters may not generate interrupts on | ||||
661 | * PPS events, so instead we poll them. There is a finite risk that | ||||
662 | * the hardware might capture a count which is later than the one we | ||||
663 | * got above, and therefore possibly in the next NTP second which might | ||||
664 | * have a different rate than the current NTP second. It doesn't | ||||
665 | * matter in practice. | ||||
666 | */ | ||||
667 | if (tho->th_counter->tc_poll_pps) | ||||
668 | tho->th_counter->tc_poll_pps(tho->th_counter); | ||||
669 | #endif | ||||
670 | |||||
671 | /* | ||||
672 | * If changing the boot time or clock adjustment, do so before | ||||
673 | * NTP processing. | ||||
674 | */ | ||||
675 | if (new_boottime
| ||||
676 | th->th_boottime = *new_boottime; | ||||
677 | if (new_adjtimedelta
| ||||
678 | th->th_adjtimedelta = *new_adjtimedelta; | ||||
679 | /* Reset the NTP update period. */ | ||||
680 | bintimesub(&th->th_offset, &th->th_naptime, | ||||
681 | &th->th_next_ntp_update); | ||||
682 | } | ||||
683 | |||||
684 | /* | ||||
685 | * Deal with NTP second processing. The while-loop normally | ||||
686 | * iterates at most once, but in extreme situations it might | ||||
687 | * keep NTP sane if tc_windup() is not run for several seconds. | ||||
688 | */ | ||||
689 | bintimesub(&th->th_offset, &th->th_naptime, &bt); | ||||
690 | while (bintimecmp(&th->th_next_ntp_update, &bt, <=)((&th->th_next_ntp_update)->sec == (&bt)->sec ? (&th->th_next_ntp_update)->frac <= (&bt)-> frac : (&th->th_next_ntp_update)->sec <= (&bt )->sec)) { | ||||
691 | ntp_update_second(th); | ||||
692 | th->th_next_ntp_update.sec++; | ||||
693 | } | ||||
694 | |||||
695 | /* Update the UTC timestamps used by the get*() functions. */ | ||||
696 | bintimeadd(&th->th_boottime, &th->th_offset, &bt); | ||||
697 | BINTIME_TO_TIMEVAL(&bt, &th->th_microtime); | ||||
698 | BINTIME_TO_TIMESPEC(&bt, &th->th_nanotime); | ||||
699 | |||||
700 | /* Now is a good time to change timecounters. */ | ||||
701 | if (th->th_counter != active_tc) { | ||||
702 | th->th_counter = active_tc; | ||||
703 | th->th_offset_count = ncount; | ||||
704 | } | ||||
705 | |||||
706 | /*- | ||||
707 | * Recalculate the scaling factor. We want the number of 1/2^64 | ||||
708 | * fractions of a second per period of the hardware counter, taking | ||||
709 | * into account the th_adjustment factor which the NTP PLL/adjtime(2) | ||||
710 | * processing provides us with. | ||||
711 | * | ||||
712 | * The th_adjustment is nanoseconds per second with 32 bit binary | ||||
713 | * fraction and we want 64 bit binary fraction of second: | ||||
714 | * | ||||
715 | * x = a * 2^32 / 10^9 = a * 4.294967296 | ||||
716 | * | ||||
717 | * The range of th_adjustment is +/- 5000PPM so inside a 64bit int | ||||
718 | * we can only multiply by about 850 without overflowing, but that | ||||
719 | * leaves suitably precise fractions for multiply before divide. | ||||
720 | * | ||||
721 | * Divide before multiply with a fraction of 2199/512 results in a | ||||
722 | * systematic undercompensation of 10PPM of th_adjustment. On a | ||||
723 | * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. | ||||
724 | * | ||||
725 | * We happily sacrifice the lowest of the 64 bits of our result | ||||
726 | * to the goddess of code clarity. | ||||
727 | * | ||||
728 | */ | ||||
729 | scale = (u_int64_t)1 << 63; | ||||
730 | scale += \ | ||||
731 | ((th->th_adjustment + th->th_counter->tc_freq_adj) / 1024) * 2199; | ||||
732 | scale /= th->th_counter->tc_frequency; | ||||
733 | th->th_scale = scale * 2; | ||||
734 | |||||
735 | /* | ||||
736 | * Now that the struct timehands is again consistent, set the new | ||||
737 | * generation number, making sure to not make it zero. | ||||
738 | */ | ||||
739 | if (++ogen == 0) | ||||
740 | ogen = 1; | ||||
741 | membar_producer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
742 | th->th_generation = ogen; | ||||
743 | |||||
744 | /* Go live with the new struct timehands. */ | ||||
745 | time_second = th->th_microtime.tv_sec; | ||||
746 | time_uptime = th->th_offset.sec; | ||||
747 | membar_producer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
748 | timehands = th; | ||||
749 | |||||
750 | tc_update_timekeep(); | ||||
751 | } | ||||
752 | |||||
753 | /* Report or change the active timecounter hardware. */ | ||||
754 | int | ||||
755 | sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen) | ||||
756 | { | ||||
757 | char newname[32]; | ||||
758 | struct timecounter *newtc, *tc; | ||||
759 | int error; | ||||
760 | |||||
761 | tc = timecounter; | ||||
762 | strlcpy(newname, tc->tc_name, sizeof(newname)); | ||||
763 | |||||
764 | error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname)); | ||||
765 | if (error != 0 || strcmp(newname, tc->tc_name) == 0) | ||||
766 | return (error); | ||||
767 | SLIST_FOREACH(newtc, &tc_list, tc_next)for((newtc) = ((&tc_list)->slh_first); (newtc) != ((void *)0); (newtc) = ((newtc)->tc_next.sle_next)) { | ||||
768 | if (strcmp(newname, newtc->tc_name) != 0) | ||||
769 | continue; | ||||
770 | |||||
771 | /* Warm up new timecounter. */ | ||||
772 | (void)newtc->tc_get_timecount(newtc); | ||||
773 | (void)newtc->tc_get_timecount(newtc); | ||||
774 | |||||
775 | rw_enter_write(&tc_lock); | ||||
776 | timecounter = newtc; | ||||
777 | rw_exit_write(&tc_lock); | ||||
778 | |||||
779 | return (0); | ||||
780 | } | ||||
781 | return (EINVAL22); | ||||
782 | } | ||||
783 | |||||
784 | /* Report or change the active timecounter hardware. */ | ||||
785 | int | ||||
786 | sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen) | ||||
787 | { | ||||
788 | char buf[32], *spc, *choices; | ||||
789 | struct timecounter *tc; | ||||
790 | int error, maxlen; | ||||
791 | |||||
792 | if (SLIST_EMPTY(&tc_list)(((&tc_list)->slh_first) == ((void *)0))) | ||||
793 | return (sysctl_rdstring(oldp, oldlenp, newp, "")); | ||||
794 | |||||
795 | spc = ""; | ||||
796 | maxlen = 0; | ||||
797 | SLIST_FOREACH(tc, &tc_list, tc_next)for((tc) = ((&tc_list)->slh_first); (tc) != ((void *)0 ); (tc) = ((tc)->tc_next.sle_next)) | ||||
798 | maxlen += sizeof(buf); | ||||
799 | choices = malloc(maxlen, M_TEMP127, M_WAITOK0x0001); | ||||
800 | *choices = '\0'; | ||||
801 | SLIST_FOREACH(tc, &tc_list, tc_next)for((tc) = ((&tc_list)->slh_first); (tc) != ((void *)0 ); (tc) = ((tc)->tc_next.sle_next)) { | ||||
802 | snprintf(buf, sizeof(buf), "%s%s(%d)", | ||||
803 | spc, tc->tc_name, tc->tc_quality); | ||||
804 | spc = " "; | ||||
805 | strlcat(choices, buf, maxlen); | ||||
806 | } | ||||
807 | error = sysctl_rdstring(oldp, oldlenp, newp, choices); | ||||
808 | free(choices, M_TEMP127, maxlen); | ||||
809 | return (error); | ||||
810 | } | ||||
811 | |||||
812 | /* | ||||
813 | * Timecounters need to be updated every so often to prevent the hardware | ||||
814 | * counter from overflowing. Updating also recalculates the cached values | ||||
815 | * used by the get*() family of functions, so their precision depends on | ||||
816 | * the update frequency. | ||||
817 | */ | ||||
818 | static int tc_tick; | ||||
819 | |||||
820 | void | ||||
821 | tc_ticktock(void) | ||||
822 | { | ||||
823 | static int count; | ||||
824 | |||||
825 | if (++count < tc_tick) | ||||
826 | return; | ||||
827 | if (!mtx_enter_try(&windup_mtx)) | ||||
828 | return; | ||||
829 | count = 0; | ||||
830 | tc_windup(NULL((void *)0), NULL((void *)0), NULL((void *)0)); | ||||
831 | mtx_leave(&windup_mtx); | ||||
832 | } | ||||
833 | |||||
834 | void | ||||
835 | inittimecounter(void) | ||||
836 | { | ||||
837 | #ifdef DEBUG | ||||
838 | u_int p; | ||||
839 | #endif | ||||
840 | |||||
841 | /* | ||||
842 | * Set the initial timeout to | ||||
843 | * max(1, <approx. number of hardclock ticks in a millisecond>). | ||||
844 | * People should probably not use the sysctl to set the timeout | ||||
845 | * to smaller than its initial value, since that value is the | ||||
846 | * smallest reasonable one. If they want better timestamps they | ||||
847 | * should use the non-"get"* functions. | ||||
848 | */ | ||||
849 | if (hz > 1000) | ||||
850 | tc_tick = (hz + 500) / 1000; | ||||
851 | else | ||||
852 | tc_tick = 1; | ||||
853 | #ifdef DEBUG | ||||
854 | p = (tc_tick * 1000000) / hz; | ||||
855 | printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); | ||||
856 | #endif | ||||
857 | |||||
858 | /* warm up new timecounter (again) and get rolling. */ | ||||
859 | (void)timecounter->tc_get_timecount(timecounter); | ||||
860 | (void)timecounter->tc_get_timecount(timecounter); | ||||
861 | } | ||||
862 | |||||
863 | const struct sysctl_bounded_args tc_vars[] = { | ||||
864 | { KERN_TIMECOUNTER_TICK1, &tc_tick, SYSCTL_INT_READONLY1,0 }, | ||||
865 | { KERN_TIMECOUNTER_TIMESTEPWARNINGS2, ×tepwarnings, 0, 1 }, | ||||
866 | }; | ||||
867 | |||||
868 | /* | ||||
869 | * Return timecounter-related information. | ||||
870 | */ | ||||
871 | int | ||||
872 | sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp, | ||||
873 | void *newp, size_t newlen) | ||||
874 | { | ||||
875 | if (namelen != 1) | ||||
876 | return (ENOTDIR20); | ||||
877 | |||||
878 | switch (name[0]) { | ||||
879 | case KERN_TIMECOUNTER_HARDWARE3: | ||||
880 | return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen)); | ||||
881 | case KERN_TIMECOUNTER_CHOICE4: | ||||
882 | return (sysctl_tc_choice(oldp, oldlenp, newp, newlen)); | ||||
883 | default: | ||||
884 | return (sysctl_bounded_arr(tc_vars, nitems(tc_vars)(sizeof((tc_vars)) / sizeof((tc_vars)[0])), name, | ||||
885 | namelen, oldp, oldlenp, newp, newlen)); | ||||
886 | } | ||||
887 | /* NOTREACHED */ | ||||
888 | } | ||||
889 | |||||
890 | /* | ||||
891 | * Skew the timehands according to any adjtime(2) adjustment. | ||||
892 | */ | ||||
893 | void | ||||
894 | ntp_update_second(struct timehands *th) | ||||
895 | { | ||||
896 | int64_t adj; | ||||
897 | |||||
898 | MUTEX_ASSERT_LOCKED(&windup_mtx)do { if (((&windup_mtx)->mtx_owner != ({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof (struct cpu_info, ci_self))); __ci;})) && !(panicstr || db_active)) panic("mutex %p not held in %s", (&windup_mtx ), __func__); } while (0); | ||||
899 | |||||
900 | if (th->th_adjtimedelta > 0) | ||||
901 | adj = MIN(5000, th->th_adjtimedelta)(((5000)<(th->th_adjtimedelta))?(5000):(th->th_adjtimedelta )); | ||||
902 | else | ||||
903 | adj = MAX(-5000, th->th_adjtimedelta)(((-5000)>(th->th_adjtimedelta))?(-5000):(th->th_adjtimedelta )); | ||||
904 | th->th_adjtimedelta -= adj; | ||||
905 | th->th_adjustment = (adj * 1000) << 32; | ||||
| |||||
906 | } | ||||
907 | |||||
908 | void | ||||
909 | tc_adjfreq(int64_t *old, int64_t *new) | ||||
910 | { | ||||
911 | if (old != NULL((void *)0)) { | ||||
912 | rw_assert_anylock(&tc_lock); | ||||
913 | *old = timecounter->tc_freq_adj; | ||||
914 | } | ||||
915 | if (new != NULL((void *)0)) { | ||||
916 | rw_assert_wrlock(&tc_lock); | ||||
917 | mtx_enter(&windup_mtx); | ||||
918 | timecounter->tc_freq_adj = *new; | ||||
919 | tc_windup(NULL((void *)0), NULL((void *)0), NULL((void *)0)); | ||||
920 | mtx_leave(&windup_mtx); | ||||
921 | } | ||||
922 | } | ||||
923 | |||||
924 | void | ||||
925 | tc_adjtime(int64_t *old, int64_t *new) | ||||
926 | { | ||||
927 | struct timehands *th; | ||||
928 | u_int gen; | ||||
929 | |||||
930 | if (old != NULL((void *)0)) { | ||||
| |||||
931 | do { | ||||
932 | th = timehands; | ||||
933 | gen = th->th_generation; | ||||
934 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
935 | *old = th->th_adjtimedelta; | ||||
936 | membar_consumer()do { __asm volatile("" ::: "memory"); } while (0); | ||||
937 | } while (gen == 0 || gen != th->th_generation); | ||||
938 | } | ||||
939 | if (new != NULL((void *)0)) { | ||||
940 | rw_assert_wrlock(&tc_lock); | ||||
941 | mtx_enter(&windup_mtx); | ||||
942 | tc_windup(NULL((void *)0), NULL((void *)0), new); | ||||
943 | mtx_leave(&windup_mtx); | ||||
944 | } | ||||
945 | } |