File: | arch/amd64/amd64/tsc.c |
Warning: | line 90, column 4 Value stored to 'count' is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: tsc.c,v 1.31 2023/02/04 19:19:36 cheloha Exp $ */ |
2 | /* |
3 | * Copyright (c) 2008 The NetBSD Foundation, Inc. |
4 | * Copyright (c) 2016,2017 Reyk Floeter <reyk@openbsd.org> |
5 | * Copyright (c) 2017 Adam Steen <adam@adamsteen.com.au> |
6 | * Copyright (c) 2017 Mike Belopuhov <mike@openbsd.org> |
7 | * Copyright (c) 2019 Paul Irofti <paul@irofti.net> |
8 | * |
9 | * Permission to use, copy, modify, and distribute this software for any |
10 | * purpose with or without fee is hereby granted, provided that the above |
11 | * copyright notice and this permission notice appear in all copies. |
12 | * |
13 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
14 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
15 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR |
16 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
17 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN |
18 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF |
19 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
20 | */ |
21 | |
22 | #include <sys/param.h> |
23 | #include <sys/systm.h> |
24 | #include <sys/timetc.h> |
25 | #include <sys/atomic.h> |
26 | |
27 | #include <machine/cpu.h> |
28 | #include <machine/cpufunc.h> |
29 | |
30 | #define RECALIBRATE_MAX_RETRIES5 5 |
31 | #define RECALIBRATE_SMI_THRESHOLD50000 50000 |
32 | #define RECALIBRATE_DELAY_THRESHOLD50 50 |
33 | |
34 | int tsc_recalibrate; |
35 | |
36 | uint64_t tsc_frequency; |
37 | int tsc_is_invariant; |
38 | |
39 | u_int tsc_get_timecount_lfence(struct timecounter *tc); |
40 | u_int tsc_get_timecount_rdtscp(struct timecounter *tc); |
41 | void tsc_delay(int usecs); |
42 | |
43 | #include "lapic.h" |
44 | #if NLAPIC1 > 0 |
45 | extern u_int32_t lapic_per_second; |
46 | #endif |
47 | |
48 | u_int64_t (*tsc_rdtsc)(void) = rdtsc_lfence; |
49 | |
50 | struct timecounter tsc_timecounter = { |
51 | .tc_get_timecount = tsc_get_timecount_lfence, |
52 | .tc_counter_mask = ~0u, |
53 | .tc_frequency = 0, |
54 | .tc_name = "tsc", |
55 | .tc_quality = -1000, |
56 | .tc_priv = NULL((void *)0), |
57 | .tc_user = TC_TSC_LFENCE1, |
58 | }; |
59 | |
60 | uint64_t |
61 | tsc_freq_cpuid(struct cpu_info *ci) |
62 | { |
63 | uint64_t count; |
64 | uint32_t eax, ebx, khz, dummy; |
65 | |
66 | if (!strcmp(cpu_vendor, "GenuineIntel") && |
67 | cpuid_level >= 0x15) { |
68 | eax = ebx = khz = dummy = 0; |
69 | CPUID(0x15, eax, ebx, khz, dummy)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (khz), "=d" (dummy) : "a" (0x15)); |
70 | khz /= 1000; |
71 | if (khz == 0) { |
72 | switch (ci->ci_model) { |
73 | case 0x4e: /* Skylake mobile */ |
74 | case 0x5e: /* Skylake desktop */ |
75 | case 0x8e: /* Kabylake mobile */ |
76 | case 0x9e: /* Kabylake desktop */ |
77 | case 0xa5: /* CML-H CML-S62 CML-S102 */ |
78 | case 0xa6: /* CML-U62 */ |
79 | khz = 24000; /* 24.0 MHz */ |
80 | break; |
81 | case 0x5f: /* Atom Denverton */ |
82 | khz = 25000; /* 25.0 MHz */ |
83 | break; |
84 | case 0x5c: /* Atom Goldmont */ |
85 | khz = 19200; /* 19.2 MHz */ |
86 | break; |
87 | } |
88 | } |
89 | if (ebx == 0 || eax == 0) |
90 | count = 0; |
Value stored to 'count' is never read | |
91 | else if ((count = (uint64_t)khz * (uint64_t)ebx / eax) != 0) { |
92 | #if NLAPIC1 > 0 |
93 | lapic_per_second = khz * 1000; |
94 | #endif |
95 | return (count * 1000); |
96 | } |
97 | } |
98 | |
99 | return (0); |
100 | } |
101 | |
102 | uint64_t |
103 | tsc_freq_msr(struct cpu_info *ci) |
104 | { |
105 | uint64_t base, def, divisor, multiplier; |
106 | |
107 | if (strcmp(cpu_vendor, "AuthenticAMD") != 0) |
108 | return 0; |
109 | |
110 | /* |
111 | * All 10h+ CPUs have Core::X86::Msr:HWCR and the TscFreqSel |
112 | * bit. If TscFreqSel hasn't been set, the TSC isn't advancing |
113 | * at the core P0 frequency and we need to calibrate by hand. |
114 | */ |
115 | if (ci->ci_family < 0x10) |
116 | return 0; |
117 | if (!ISSET(rdmsr(MSR_HWCR), HWCR_TSCFREQSEL)((rdmsr(0xc0010015)) & (0x01000000))) |
118 | return 0; |
119 | |
120 | /* |
121 | * In 10h+ CPUs, Core::X86::Msr::PStateDef defines the voltage |
122 | * and frequency for each core P-state. We want the P0 frequency. |
123 | * If the En bit isn't set, the register doesn't define a valid |
124 | * P-state. |
125 | */ |
126 | def = rdmsr(MSR_PSTATEDEF(0)(0xc0010064 + (0))); |
127 | if (!ISSET(def, PSTATEDEF_EN)((def) & (0x8000000000000000ULL))) |
128 | return 0; |
129 | |
130 | switch (ci->ci_family) { |
131 | case 0x17: |
132 | case 0x19: |
133 | /* |
134 | * PPR for AMD Family 17h [...]: |
135 | * Models 01h,08h B2, Rev 3.03, pp. 33, 139-140 |
136 | * Model 18h B1, Rev 3.16, pp. 36, 143-144 |
137 | * Model 60h A1, Rev 3.06, pp. 33, 155-157 |
138 | * Model 71h B0, Rev 3.06, pp. 28, 150-151 |
139 | * |
140 | * PPR for AMD Family 19h [...]: |
141 | * Model 21h B0, Rev 3.05, pp. 33, 166-167 |
142 | * |
143 | * OSRR for AMD Family 17h processors, |
144 | * Models 00h-2Fh, Rev 3.03, pp. 130-131 |
145 | */ |
146 | base = 200000000; /* 200.0 MHz */ |
147 | divisor = (def >> 8) & 0x3f; |
148 | if (divisor <= 0x07 || divisor >= 0x2d) |
149 | return 0; /* reserved */ |
150 | if (divisor >= 0x1b && divisor % 2 == 1) |
151 | return 0; /* reserved */ |
152 | multiplier = def & 0xff; |
153 | if (multiplier <= 0x0f) |
154 | return 0; /* reserved */ |
155 | break; |
156 | default: |
157 | return 0; |
158 | } |
159 | |
160 | return base * multiplier / divisor; |
161 | } |
162 | |
163 | void |
164 | tsc_identify(struct cpu_info *ci) |
165 | { |
166 | if (!(ci->ci_flags & CPUF_PRIMARY0x0008) || |
167 | !(ci->ci_flags & CPUF_CONST_TSC0x0040) || |
168 | !(ci->ci_flags & CPUF_INVAR_TSC0x0100)) |
169 | return; |
170 | |
171 | /* Prefer RDTSCP where supported. */ |
172 | if (ISSET(ci->ci_feature_eflags, CPUID_RDTSCP)((ci->ci_feature_eflags) & (0x08000000))) { |
173 | tsc_rdtsc = rdtscp; |
174 | tsc_timecounter.tc_get_timecount = tsc_get_timecount_rdtscp; |
175 | tsc_timecounter.tc_user = TC_TSC_RDTSCP2; |
176 | } |
177 | |
178 | tsc_is_invariant = 1; |
179 | |
180 | tsc_frequency = tsc_freq_cpuid(ci); |
181 | if (tsc_frequency == 0) |
182 | tsc_frequency = tsc_freq_msr(ci); |
183 | if (tsc_frequency > 0) |
184 | delay_init(tsc_delay, 5000); |
185 | } |
186 | |
187 | static inline int |
188 | get_tsc_and_timecount(struct timecounter *tc, uint64_t *tsc, uint64_t *count) |
189 | { |
190 | uint64_t n, tsc1, tsc2; |
191 | int i; |
192 | |
193 | for (i = 0; i < RECALIBRATE_MAX_RETRIES5; i++) { |
194 | tsc1 = tsc_rdtsc(); |
195 | n = (tc->tc_get_timecount(tc) & tc->tc_counter_mask); |
196 | tsc2 = tsc_rdtsc(); |
197 | |
198 | if ((tsc2 - tsc1) < RECALIBRATE_SMI_THRESHOLD50000) { |
199 | *count = n; |
200 | *tsc = tsc2; |
201 | return (0); |
202 | } |
203 | } |
204 | return (1); |
205 | } |
206 | |
207 | static inline uint64_t |
208 | calculate_tsc_freq(uint64_t tsc1, uint64_t tsc2, int usec) |
209 | { |
210 | uint64_t delta; |
211 | |
212 | delta = (tsc2 - tsc1); |
213 | return (delta * 1000000 / usec); |
214 | } |
215 | |
216 | static inline uint64_t |
217 | calculate_tc_delay(struct timecounter *tc, uint64_t count1, uint64_t count2) |
218 | { |
219 | uint64_t delta; |
220 | |
221 | if (count2 < count1) |
222 | count2 += tc->tc_counter_mask; |
223 | |
224 | delta = (count2 - count1); |
225 | return (delta * 1000000 / tc->tc_frequency); |
226 | } |
227 | |
228 | uint64_t |
229 | measure_tsc_freq(struct timecounter *tc) |
230 | { |
231 | uint64_t count1, count2, frequency, min_freq, tsc1, tsc2; |
232 | u_long s; |
233 | int delay_usec, i, err1, err2, usec, success = 0; |
234 | |
235 | /* warmup the timers */ |
236 | for (i = 0; i < 3; i++) { |
237 | (void)tc->tc_get_timecount(tc); |
238 | (void)rdtsc(); |
239 | } |
240 | |
241 | min_freq = ULLONG_MAX0xffffffffffffffffULL; |
242 | |
243 | delay_usec = 100000; |
244 | for (i = 0; i < 3; i++) { |
245 | s = intr_disable(); |
246 | |
247 | err1 = get_tsc_and_timecount(tc, &tsc1, &count1); |
248 | delay(delay_usec)(*delay_func)(delay_usec); |
249 | err2 = get_tsc_and_timecount(tc, &tsc2, &count2); |
250 | |
251 | intr_restore(s); |
252 | |
253 | if (err1 || err2) |
254 | continue; |
255 | |
256 | usec = calculate_tc_delay(tc, count1, count2); |
257 | |
258 | if ((usec < (delay_usec - RECALIBRATE_DELAY_THRESHOLD50)) || |
259 | (usec > (delay_usec + RECALIBRATE_DELAY_THRESHOLD50))) |
260 | continue; |
261 | |
262 | frequency = calculate_tsc_freq(tsc1, tsc2, usec); |
263 | |
264 | min_freq = MIN(min_freq, frequency)(((min_freq)<(frequency))?(min_freq):(frequency)); |
265 | success++; |
266 | } |
267 | |
268 | return (success > 1 ? min_freq : 0); |
269 | } |
270 | |
271 | void |
272 | calibrate_tsc_freq(void) |
273 | { |
274 | struct timecounter *reference = tsc_timecounter.tc_priv; |
275 | uint64_t freq; |
276 | |
277 | if (!reference || !tsc_recalibrate) |
278 | return; |
279 | |
280 | if ((freq = measure_tsc_freq(reference)) == 0) |
281 | return; |
282 | tsc_frequency = freq; |
283 | tsc_timecounter.tc_frequency = freq; |
284 | if (tsc_is_invariant) |
285 | tsc_timecounter.tc_quality = 2000; |
286 | } |
287 | |
288 | void |
289 | cpu_recalibrate_tsc(struct timecounter *tc) |
290 | { |
291 | struct timecounter *reference = tsc_timecounter.tc_priv; |
292 | |
293 | /* Prevent recalibration with a worse timecounter source */ |
294 | if (reference && reference->tc_quality > tc->tc_quality) |
295 | return; |
296 | |
297 | tsc_timecounter.tc_priv = tc; |
298 | calibrate_tsc_freq(); |
299 | } |
300 | |
301 | u_int |
302 | tsc_get_timecount_lfence(struct timecounter *tc) |
303 | { |
304 | return rdtsc_lfence(); |
305 | } |
306 | |
307 | u_int |
308 | tsc_get_timecount_rdtscp(struct timecounter *tc) |
309 | { |
310 | return rdtscp(); |
311 | } |
312 | |
313 | void |
314 | tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq) |
315 | { |
316 | if (!(ci->ci_flags & CPUF_PRIMARY0x0008) || |
317 | !(ci->ci_flags & CPUF_CONST_TSC0x0040) || |
318 | !(ci->ci_flags & CPUF_INVAR_TSC0x0100)) |
319 | return; |
320 | |
321 | /* Newer CPUs don't require recalibration */ |
322 | if (tsc_frequency > 0) { |
323 | tsc_timecounter.tc_frequency = tsc_frequency; |
324 | tsc_timecounter.tc_quality = 2000; |
325 | } else { |
326 | tsc_recalibrate = 1; |
327 | tsc_frequency = cpufreq; |
328 | tsc_timecounter.tc_frequency = cpufreq; |
329 | calibrate_tsc_freq(); |
330 | } |
331 | |
332 | tc_init(&tsc_timecounter); |
333 | } |
334 | |
335 | void |
336 | tsc_delay(int usecs) |
337 | { |
338 | uint64_t interval, start; |
339 | |
340 | interval = (uint64_t)usecs * tsc_frequency / 1000000; |
341 | start = tsc_rdtsc(); |
342 | while (tsc_rdtsc() - start < interval) |
343 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
344 | } |
345 | |
346 | #ifdef MULTIPROCESSOR1 |
347 | |
348 | /* |
349 | * Protections for global variables in this code: |
350 | * |
351 | * a Modified atomically |
352 | * b Protected by a barrier |
353 | * p Only modified by the primary CPU |
354 | */ |
355 | |
356 | #define TSC_TEST_MSECS1 1 /* Test round duration */ |
357 | #define TSC_TEST_ROUNDS2 2 /* Number of test rounds */ |
358 | |
359 | /* |
360 | * tsc_test_status.val is isolated to its own cache line to limit |
361 | * false sharing and reduce the test's margin of error. |
362 | */ |
363 | struct tsc_test_status { |
364 | volatile uint64_t val; /* [a] Latest RDTSC value */ |
365 | uint64_t pad1[7]; |
366 | uint64_t lag_count; /* [b] Number of lags seen by CPU */ |
367 | uint64_t lag_max; /* [b] Biggest lag seen by CPU */ |
368 | int64_t adj; /* [b] Initial IA32_TSC_ADJUST value */ |
369 | uint64_t pad2[5]; |
370 | } __aligned(64)__attribute__((__aligned__(64))); |
371 | struct tsc_test_status tsc_ap_status; /* Test results from AP */ |
372 | struct tsc_test_status tsc_bp_status; /* Test results from BP */ |
373 | uint64_t tsc_test_cycles; /* [p] TSC cycles per test round */ |
374 | const char *tsc_ap_name; /* [b] Name of AP running test */ |
375 | volatile u_int tsc_egress_barrier; /* [a] Test end barrier */ |
376 | volatile u_int tsc_ingress_barrier; /* [a] Test start barrier */ |
377 | volatile u_int tsc_test_rounds; /* [p] Remaining test rounds */ |
378 | int tsc_is_synchronized = 1; /* [p] Have we ever failed the test? */ |
379 | |
380 | void tsc_adjust_reset(struct cpu_info *, struct tsc_test_status *); |
381 | void tsc_report_test_results(void); |
382 | void tsc_test_ap(void); |
383 | void tsc_test_bp(void); |
384 | |
385 | void |
386 | tsc_test_sync_bp(struct cpu_info *ci) |
387 | { |
388 | if (!tsc_is_invariant) |
389 | return; |
390 | #ifndef TSC_DEBUG |
391 | /* No point in testing again if we already failed. */ |
392 | if (!tsc_is_synchronized) |
393 | return; |
394 | #endif |
395 | /* Reset IA32_TSC_ADJUST if it exists. */ |
396 | tsc_adjust_reset(ci, &tsc_bp_status); |
397 | |
398 | /* Reset the test cycle limit and round count. */ |
399 | tsc_test_cycles = TSC_TEST_MSECS1 * tsc_frequency / 1000; |
400 | tsc_test_rounds = TSC_TEST_ROUNDS2; |
401 | |
402 | do { |
403 | /* |
404 | * Pass through the ingress barrier, run the test, |
405 | * then wait for the AP to reach the egress barrier. |
406 | */ |
407 | atomic_inc_int(&tsc_ingress_barrier)_atomic_inc_int(&tsc_ingress_barrier); |
408 | while (tsc_ingress_barrier != 2) |
409 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
410 | tsc_test_bp(); |
411 | while (tsc_egress_barrier != 1) |
412 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
413 | |
414 | /* |
415 | * Report what happened. Adjust the TSC's quality |
416 | * if this is the first time we've failed the test. |
417 | */ |
418 | tsc_report_test_results(); |
419 | if (tsc_ap_status.lag_count || tsc_bp_status.lag_count) { |
420 | if (tsc_is_synchronized) { |
421 | tsc_is_synchronized = 0; |
422 | tc_reset_quality(&tsc_timecounter, -1000); |
423 | } |
424 | tsc_test_rounds = 0; |
425 | } else |
426 | tsc_test_rounds--; |
427 | |
428 | /* |
429 | * Clean up for the next round. It is safe to reset the |
430 | * ingress barrier because at this point we know the AP |
431 | * has reached the egress barrier. |
432 | */ |
433 | memset(&tsc_ap_status, 0, sizeof tsc_ap_status)__builtin_memset((&tsc_ap_status), (0), (sizeof tsc_ap_status )); |
434 | memset(&tsc_bp_status, 0, sizeof tsc_bp_status)__builtin_memset((&tsc_bp_status), (0), (sizeof tsc_bp_status )); |
435 | tsc_ingress_barrier = 0; |
436 | if (tsc_test_rounds == 0) |
437 | tsc_ap_name = NULL((void *)0); |
438 | |
439 | /* |
440 | * Pass through the egress barrier and release the AP. |
441 | * The AP is responsible for resetting the egress barrier. |
442 | */ |
443 | if (atomic_inc_int_nv(&tsc_egress_barrier)_atomic_add_int_nv((&tsc_egress_barrier), 1) != 2) |
444 | panic("%s: unexpected egress count", __func__); |
445 | } while (tsc_test_rounds > 0); |
446 | } |
447 | |
448 | void |
449 | tsc_test_sync_ap(struct cpu_info *ci) |
450 | { |
451 | if (!tsc_is_invariant) |
452 | return; |
453 | #ifndef TSC_DEBUG |
454 | if (!tsc_is_synchronized) |
455 | return; |
456 | #endif |
457 | /* The BP needs our name in order to report any problems. */ |
458 | if (atomic_cas_ptr(&tsc_ap_name, NULL, ci->ci_dev->dv_xname)_atomic_cas_ptr((&tsc_ap_name), (((void *)0)), (ci->ci_dev ->dv_xname)) != NULL((void *)0)) { |
459 | panic("%s: %s: tsc_ap_name is not NULL: %s", |
460 | __func__, ci->ci_dev->dv_xname, tsc_ap_name); |
461 | } |
462 | |
463 | tsc_adjust_reset(ci, &tsc_ap_status); |
464 | |
465 | /* |
466 | * The AP is only responsible for running the test and |
467 | * resetting the egress barrier. The BP handles everything |
468 | * else. |
469 | */ |
470 | do { |
471 | atomic_inc_int(&tsc_ingress_barrier)_atomic_inc_int(&tsc_ingress_barrier); |
472 | while (tsc_ingress_barrier != 2) |
473 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
474 | tsc_test_ap(); |
475 | atomic_inc_int(&tsc_egress_barrier)_atomic_inc_int(&tsc_egress_barrier); |
476 | while (atomic_cas_uint(&tsc_egress_barrier, 2, 0)_atomic_cas_uint((&tsc_egress_barrier), (2), (0)) != 2) |
477 | CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory"); |
478 | } while (tsc_test_rounds > 0); |
479 | } |
480 | |
481 | void |
482 | tsc_report_test_results(void) |
483 | { |
484 | #ifdef TSC_DEBUG |
485 | u_int round = TSC_TEST_ROUNDS2 - tsc_test_rounds + 1; |
486 | |
487 | if (tsc_bp_status.adj != 0) { |
488 | printf("tsc: cpu0: IA32_TSC_ADJUST: %lld -> 0\n", |
489 | tsc_bp_status.adj); |
490 | } |
491 | if (tsc_ap_status.adj != 0) { |
492 | printf("tsc: %s: IA32_TSC_ADJUST: %lld -> 0\n", |
493 | tsc_ap_name, tsc_ap_status.adj); |
494 | } |
495 | if (tsc_ap_status.lag_count > 0 || tsc_bp_status.lag_count > 0) { |
496 | printf("tsc: cpu0/%s: sync test round %u/%u failed\n", |
497 | tsc_ap_name, round, TSC_TEST_ROUNDS2); |
498 | } |
499 | if (tsc_bp_status.lag_count > 0) { |
500 | printf("tsc: cpu0/%s: cpu0: %llu lags %llu cycles\n", |
501 | tsc_ap_name, tsc_bp_status.lag_count, |
502 | tsc_bp_status.lag_max); |
503 | } |
504 | if (tsc_ap_status.lag_count > 0) { |
505 | printf("tsc: cpu0/%s: %s: %llu lags %llu cycles\n", |
506 | tsc_ap_name, tsc_ap_name, tsc_ap_status.lag_count, |
507 | tsc_ap_status.lag_max); |
508 | } |
509 | #else |
510 | if (tsc_ap_status.lag_count > 0 || tsc_bp_status.lag_count > 0) |
511 | printf("tsc: cpu0/%s: sync test failed\n", tsc_ap_name); |
512 | #endif /* TSC_DEBUG */ |
513 | } |
514 | |
515 | /* |
516 | * Reset IA32_TSC_ADJUST if we have it. |
517 | */ |
518 | void |
519 | tsc_adjust_reset(struct cpu_info *ci, struct tsc_test_status *tts) |
520 | { |
521 | if (ISSET(ci->ci_feature_sefflags_ebx, SEFF0EBX_TSC_ADJUST)((ci->ci_feature_sefflags_ebx) & (0x00000002))) { |
522 | tts->adj = rdmsr(MSR_TSC_ADJUST0x03b); |
523 | if (tts->adj != 0) |
524 | wrmsr(MSR_TSC_ADJUST0x03b, 0); |
525 | } |
526 | } |
527 | |
528 | void |
529 | tsc_test_ap(void) |
530 | { |
531 | uint64_t ap_val, bp_val, end, lag; |
532 | |
533 | ap_val = tsc_rdtsc(); |
534 | end = ap_val + tsc_test_cycles; |
535 | while (__predict_true(ap_val < end)__builtin_expect(((ap_val < end) != 0), 1)) { |
536 | /* |
537 | * Get the BP's latest TSC value, then read the AP's |
538 | * TSC. LFENCE is a serializing instruction, so we |
539 | * know bp_val predates ap_val. If ap_val is smaller |
540 | * than bp_val then the AP's TSC must trail that of |
541 | * the BP and the counters cannot be synchronized. |
542 | */ |
543 | bp_val = tsc_bp_status.val; |
544 | ap_val = tsc_rdtsc(); |
545 | tsc_ap_status.val = ap_val; |
546 | |
547 | /* |
548 | * Record the magnitude of the problem if the AP's TSC |
549 | * trails the BP's TSC. |
550 | */ |
551 | if (__predict_false(ap_val < bp_val)__builtin_expect(((ap_val < bp_val) != 0), 0)) { |
552 | tsc_ap_status.lag_count++; |
553 | lag = bp_val - ap_val; |
554 | if (tsc_ap_status.lag_max < lag) |
555 | tsc_ap_status.lag_max = lag; |
556 | } |
557 | } |
558 | } |
559 | |
560 | /* |
561 | * This is similar to tsc_test_ap(), but with all relevant variables |
562 | * flipped around to run the test from the BP's perspective. |
563 | */ |
564 | void |
565 | tsc_test_bp(void) |
566 | { |
567 | uint64_t ap_val, bp_val, end, lag; |
568 | |
569 | bp_val = tsc_rdtsc(); |
570 | end = bp_val + tsc_test_cycles; |
571 | while (__predict_true(bp_val < end)__builtin_expect(((bp_val < end) != 0), 1)) { |
572 | ap_val = tsc_ap_status.val; |
573 | bp_val = tsc_rdtsc(); |
574 | tsc_bp_status.val = bp_val; |
575 | |
576 | if (__predict_false(bp_val < ap_val)__builtin_expect(((bp_val < ap_val) != 0), 0)) { |
577 | tsc_bp_status.lag_count++; |
578 | lag = ap_val - bp_val; |
579 | if (tsc_bp_status.lag_max < lag) |
580 | tsc_bp_status.lag_max = lag; |
581 | } |
582 | } |
583 | } |
584 | |
585 | #endif /* MULTIPROCESSOR */ |