Annotation of sys/kern/kern_tc.c, Revision 1.1.1.1
1.1 nbrk 1: /*-
2: * ----------------------------------------------------------------------------
3: * "THE BEER-WARE LICENSE" (Revision 42):
4: * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
5: * can do whatever you want with this stuff. If we meet some day, and you think
6: * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
7: * ----------------------------------------------------------------------------
8: *
9: * $OpenBSD: kern_tc.c,v 1.9 2007/05/09 17:42:19 deraadt Exp $
10: * $FreeBSD: src/sys/kern/kern_tc.c,v 1.148 2003/03/18 08:45:23 phk Exp $
11: */
12:
13: #include <sys/param.h>
14: #include <sys/kernel.h>
15: #include <sys/sysctl.h>
16: #include <sys/syslog.h>
17: #include <sys/systm.h>
18: #include <sys/timetc.h>
19: #include <sys/malloc.h>
20:
21: #ifdef __HAVE_TIMECOUNTER
22: /*
23: * A large step happens on boot. This constant detects such steps.
24: * It is relatively small so that ntp_update_second gets called enough
25: * in the typical 'missed a couple of seconds' case, but doesn't loop
26: * forever when the time step is large.
27: */
28: #define LARGE_STEP 200
29:
30: u_int dummy_get_timecount(struct timecounter *);
31:
32: void ntp_update_second(int64_t *, time_t *);
33: int sysctl_tc_hardware(void *, size_t *, void *, size_t);
34: int sysctl_tc_choice(void *, size_t *, void *, size_t);
35:
36: /*
37: * Implement a dummy timecounter which we can use until we get a real one
38: * in the air. This allows the console and other early stuff to use
39: * time services.
40: */
41:
42: u_int
43: dummy_get_timecount(struct timecounter *tc)
44: {
45: static u_int now;
46:
47: return (++now);
48: }
49:
50: static struct timecounter dummy_timecounter = {
51: dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000
52: };
53:
54: struct timehands {
55: /* These fields must be initialized by the driver. */
56: struct timecounter *th_counter;
57: int64_t th_adjustment;
58: u_int64_t th_scale;
59: u_int th_offset_count;
60: struct bintime th_offset;
61: struct timeval th_microtime;
62: struct timespec th_nanotime;
63: /* Fields not to be copied in tc_windup start with th_generation. */
64: volatile u_int th_generation;
65: struct timehands *th_next;
66: };
67:
68: extern struct timehands th0;
69: static struct timehands th9 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th0};
70: static struct timehands th8 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th9};
71: static struct timehands th7 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th8};
72: static struct timehands th6 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th7};
73: static struct timehands th5 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th6};
74: static struct timehands th4 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th5};
75: static struct timehands th3 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th4};
76: static struct timehands th2 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th3};
77: static struct timehands th1 = { NULL, 0, 0, 0, {0, 0}, {0, 0}, {0, 0}, 0, &th2};
78: static struct timehands th0 = {
79: &dummy_timecounter,
80: 0,
81: (uint64_t)-1 / 1000000,
82: 0,
83: {1, 0},
84: {0, 0},
85: {0, 0},
86: 1,
87: &th1
88: };
89:
90: static struct timehands *volatile timehands = &th0;
91: struct timecounter *timecounter = &dummy_timecounter;
92: static struct timecounter *timecounters = &dummy_timecounter;
93:
94: volatile time_t time_second = 1;
95: volatile time_t time_uptime = 0;
96:
97: extern struct timeval adjtimedelta;
98: static struct bintime boottimebin;
99: static int timestepwarnings;
100:
101: void tc_windup(void);
102:
103: /*
104: * Return the difference between the timehands' counter value now and what
105: * was when we copied it to the timehands' offset_count.
106: */
107: static __inline u_int
108: tc_delta(struct timehands *th)
109: {
110: struct timecounter *tc;
111:
112: tc = th->th_counter;
113: return ((tc->tc_get_timecount(tc) - th->th_offset_count) &
114: tc->tc_counter_mask);
115: }
116:
117: /*
118: * Functions for reading the time. We have to loop until we are sure that
119: * the timehands that we operated on was not updated under our feet. See
120: * the comment in <sys/time.h> for a description of these 12 functions.
121: */
122:
123: void
124: binuptime(struct bintime *bt)
125: {
126: struct timehands *th;
127: u_int gen;
128:
129: do {
130: th = timehands;
131: gen = th->th_generation;
132: *bt = th->th_offset;
133: bintime_addx(bt, th->th_scale * tc_delta(th));
134: } while (gen == 0 || gen != th->th_generation);
135: }
136:
137: void
138: nanouptime(struct timespec *tsp)
139: {
140: struct bintime bt;
141:
142: binuptime(&bt);
143: bintime2timespec(&bt, tsp);
144: }
145:
146: void
147: microuptime(struct timeval *tvp)
148: {
149: struct bintime bt;
150:
151: binuptime(&bt);
152: bintime2timeval(&bt, tvp);
153: }
154:
155: void
156: bintime(struct bintime *bt)
157: {
158:
159: binuptime(bt);
160: bintime_add(bt, &boottimebin);
161: }
162:
163: void
164: nanotime(struct timespec *tsp)
165: {
166: struct bintime bt;
167:
168: bintime(&bt);
169: bintime2timespec(&bt, tsp);
170: }
171:
172: void
173: microtime(struct timeval *tvp)
174: {
175: struct bintime bt;
176:
177: bintime(&bt);
178: bintime2timeval(&bt, tvp);
179: }
180:
181: void
182: getnanouptime(struct timespec *tsp)
183: {
184: struct timehands *th;
185: u_int gen;
186:
187: do {
188: th = timehands;
189: gen = th->th_generation;
190: bintime2timespec(&th->th_offset, tsp);
191: } while (gen == 0 || gen != th->th_generation);
192: }
193:
194: void
195: getmicrouptime(struct timeval *tvp)
196: {
197: struct timehands *th;
198: u_int gen;
199:
200: do {
201: th = timehands;
202: gen = th->th_generation;
203: bintime2timeval(&th->th_offset, tvp);
204: } while (gen == 0 || gen != th->th_generation);
205: }
206:
207: void
208: getnanotime(struct timespec *tsp)
209: {
210: struct timehands *th;
211: u_int gen;
212:
213: do {
214: th = timehands;
215: gen = th->th_generation;
216: *tsp = th->th_nanotime;
217: } while (gen == 0 || gen != th->th_generation);
218: }
219:
220: void
221: getmicrotime(struct timeval *tvp)
222: {
223: struct timehands *th;
224: u_int gen;
225:
226: do {
227: th = timehands;
228: gen = th->th_generation;
229: *tvp = th->th_microtime;
230: } while (gen == 0 || gen != th->th_generation);
231: }
232:
233: /*
234: * Initialize a new timecounter and possibly use it.
235: */
236: void
237: tc_init(struct timecounter *tc)
238: {
239: u_int u;
240:
241: u = tc->tc_frequency / tc->tc_counter_mask;
242: /* XXX: We need some margin here, 10% is a guess */
243: u *= 11;
244: u /= 10;
245: if (tc->tc_quality >= 0) {
246: if (u > hz) {
247: tc->tc_quality = -2000;
248: printf("Timecounter \"%s\" frequency %lu Hz",
249: tc->tc_name, (unsigned long)tc->tc_frequency);
250: printf(" -- Insufficient hz, needs at least %u\n", u);
251: }
252: }
253:
254: tc->tc_next = timecounters;
255: timecounters = tc;
256: /*
257: * Never automatically use a timecounter with negative quality.
258: * Even though we run on the dummy counter, switching here may be
259: * worse since this timecounter may not be monotonous.
260: */
261: if (tc->tc_quality < 0)
262: return;
263: if (tc->tc_quality < timecounter->tc_quality)
264: return;
265: if (tc->tc_quality == timecounter->tc_quality &&
266: tc->tc_frequency < timecounter->tc_frequency)
267: return;
268: (void)tc->tc_get_timecount(tc);
269: (void)tc->tc_get_timecount(tc);
270: timecounter = tc;
271: }
272:
273: /* Report the frequency of the current timecounter. */
274: u_int64_t
275: tc_getfrequency(void)
276: {
277:
278: return (timehands->th_counter->tc_frequency);
279: }
280:
281: /*
282: * Step our concept of UTC. This is done by modifying our estimate of
283: * when we booted.
284: * XXX: not locked.
285: */
286: void
287: tc_setclock(struct timespec *ts)
288: {
289: struct timespec ts2;
290: struct bintime bt, bt2;
291:
292: binuptime(&bt2);
293: timespec2bintime(ts, &bt);
294: bintime_sub(&bt, &bt2);
295: bintime_add(&bt2, &boottimebin);
296: boottimebin = bt;
297: bintime2timeval(&bt, &boottime);
298:
299: /* XXX fiddle all the little crinkly bits around the fiords... */
300: tc_windup();
301: if (timestepwarnings) {
302: bintime2timespec(&bt2, &ts2);
303: log(LOG_INFO, "Time stepped from %ld.%09ld to %ld.%09ld\n",
304: (long)ts2.tv_sec, ts2.tv_nsec,
305: (long)ts->tv_sec, ts->tv_nsec);
306: }
307: }
308:
309: /*
310: * Initialize the next struct timehands in the ring and make
311: * it the active timehands. Along the way we might switch to a different
312: * timecounter and/or do seconds processing in NTP. Slightly magic.
313: */
314: void
315: tc_windup(void)
316: {
317: struct bintime bt;
318: struct timehands *th, *tho;
319: u_int64_t scale;
320: u_int delta, ncount, ogen;
321: int i;
322: #ifdef leapsecs
323: time_t t;
324: #endif
325:
326: /*
327: * Make the next timehands a copy of the current one, but do not
328: * overwrite the generation or next pointer. While we update
329: * the contents, the generation must be zero.
330: */
331: tho = timehands;
332: th = tho->th_next;
333: ogen = th->th_generation;
334: th->th_generation = 0;
335: bcopy(tho, th, offsetof(struct timehands, th_generation));
336:
337: /*
338: * Capture a timecounter delta on the current timecounter and if
339: * changing timecounters, a counter value from the new timecounter.
340: * Update the offset fields accordingly.
341: */
342: delta = tc_delta(th);
343: if (th->th_counter != timecounter)
344: ncount = timecounter->tc_get_timecount(timecounter);
345: else
346: ncount = 0;
347: th->th_offset_count += delta;
348: th->th_offset_count &= th->th_counter->tc_counter_mask;
349: bintime_addx(&th->th_offset, th->th_scale * delta);
350:
351: #ifdef notyet
352: /*
353: * Hardware latching timecounters may not generate interrupts on
354: * PPS events, so instead we poll them. There is a finite risk that
355: * the hardware might capture a count which is later than the one we
356: * got above, and therefore possibly in the next NTP second which might
357: * have a different rate than the current NTP second. It doesn't
358: * matter in practice.
359: */
360: if (tho->th_counter->tc_poll_pps)
361: tho->th_counter->tc_poll_pps(tho->th_counter);
362: #endif
363:
364: /*
365: * Deal with NTP second processing. The for loop normally
366: * iterates at most once, but in extreme situations it might
367: * keep NTP sane if timeouts are not run for several seconds.
368: * At boot, the time step can be large when the TOD hardware
369: * has been read, so on really large steps, we call
370: * ntp_update_second only twice. We need to call it twice in
371: * case we missed a leap second.
372: */
373: bt = th->th_offset;
374: bintime_add(&bt, &boottimebin);
375: i = bt.sec - tho->th_microtime.tv_sec;
376: if (i > LARGE_STEP)
377: i = 2;
378: for (; i > 0; i--)
379: ntp_update_second(&th->th_adjustment, &bt.sec);
380:
381: /* Update the UTC timestamps used by the get*() functions. */
382: /* XXX shouldn't do this here. Should force non-`get' versions. */
383: bintime2timeval(&bt, &th->th_microtime);
384: bintime2timespec(&bt, &th->th_nanotime);
385:
386: /* Now is a good time to change timecounters. */
387: if (th->th_counter != timecounter) {
388: th->th_counter = timecounter;
389: th->th_offset_count = ncount;
390: }
391:
392: /*-
393: * Recalculate the scaling factor. We want the number of 1/2^64
394: * fractions of a second per period of the hardware counter, taking
395: * into account the th_adjustment factor which the NTP PLL/adjtime(2)
396: * processing provides us with.
397: *
398: * The th_adjustment is nanoseconds per second with 32 bit binary
399: * fraction and we want 64 bit binary fraction of second:
400: *
401: * x = a * 2^32 / 10^9 = a * 4.294967296
402: *
403: * The range of th_adjustment is +/- 5000PPM so inside a 64bit int
404: * we can only multiply by about 850 without overflowing, but that
405: * leaves suitably precise fractions for multiply before divide.
406: *
407: * Divide before multiply with a fraction of 2199/512 results in a
408: * systematic undercompensation of 10PPM of th_adjustment. On a
409: * 5000PPM adjustment this is a 0.05PPM error. This is acceptable.
410: *
411: * We happily sacrifice the lowest of the 64 bits of our result
412: * to the goddess of code clarity.
413: *
414: */
415: scale = (u_int64_t)1 << 63;
416: scale += (th->th_adjustment / 1024) * 2199;
417: scale /= th->th_counter->tc_frequency;
418: th->th_scale = scale * 2;
419:
420: /*
421: * Now that the struct timehands is again consistent, set the new
422: * generation number, making sure to not make it zero.
423: */
424: if (++ogen == 0)
425: ogen = 1;
426: th->th_generation = ogen;
427:
428: /* Go live with the new struct timehands. */
429: time_second = th->th_microtime.tv_sec;
430: time_uptime = th->th_offset.sec;
431: timehands = th;
432: }
433:
434: /* Report or change the active timecounter hardware. */
435: int
436: sysctl_tc_hardware(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
437: {
438: char newname[32];
439: struct timecounter *newtc, *tc;
440: int error;
441:
442: tc = timecounter;
443: strlcpy(newname, tc->tc_name, sizeof(newname));
444:
445: error = sysctl_string(oldp, oldlenp, newp, newlen, newname, sizeof(newname));
446: if (error != 0 || strcmp(newname, tc->tc_name) == 0)
447: return (error);
448: for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) {
449: if (strcmp(newname, newtc->tc_name) != 0)
450: continue;
451:
452: /* Warm up new timecounter. */
453: (void)newtc->tc_get_timecount(newtc);
454: (void)newtc->tc_get_timecount(newtc);
455:
456: timecounter = newtc;
457: return (0);
458: }
459: return (EINVAL);
460: }
461:
462: /* Report or change the active timecounter hardware. */
463: int
464: sysctl_tc_choice(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
465: {
466: char buf[32], *spc, *choices;
467: struct timecounter *tc;
468: int error, maxlen;
469:
470: spc = "";
471: error = 0;
472: maxlen = 0;
473: for (tc = timecounters; tc != NULL; tc = tc->tc_next)
474: maxlen += sizeof(buf);
475: choices = malloc(maxlen, M_TEMP, M_WAITOK);
476: *choices = '\0';
477: for (tc = timecounters; tc != NULL; tc = tc->tc_next) {
478: snprintf(buf, sizeof(buf), "%s%s(%d)",
479: spc, tc->tc_name, tc->tc_quality);
480: spc = " ";
481: strlcat(choices, buf, maxlen);
482: }
483: error = sysctl_rdstring(oldp, oldlenp, newp, choices);
484: free(choices, M_TEMP);
485: return (error);
486: }
487:
488: /*
489: * Timecounters need to be updated every so often to prevent the hardware
490: * counter from overflowing. Updating also recalculates the cached values
491: * used by the get*() family of functions, so their precision depends on
492: * the update frequency.
493: */
494: static int tc_tick;
495:
496: void
497: tc_ticktock(void)
498: {
499: static int count;
500:
501: if (++count < tc_tick)
502: return;
503: count = 0;
504: tc_windup();
505: }
506:
507: void
508: inittimecounter(void)
509: {
510: u_int p;
511:
512: /*
513: * Set the initial timeout to
514: * max(1, <approx. number of hardclock ticks in a millisecond>).
515: * People should probably not use the sysctl to set the timeout
516: * to smaller than its inital value, since that value is the
517: * smallest reasonable one. If they want better timestamps they
518: * should use the non-"get"* functions.
519: */
520: if (hz > 1000)
521: tc_tick = (hz + 500) / 1000;
522: else
523: tc_tick = 1;
524: p = (tc_tick * 1000000) / hz;
525: #ifdef DEBUG
526: printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
527: #endif
528:
529: /* warm up new timecounter (again) and get rolling. */
530: (void)timecounter->tc_get_timecount(timecounter);
531: (void)timecounter->tc_get_timecount(timecounter);
532: }
533:
534: /*
535: * Return timecounter-related information.
536: */
537: int
538: sysctl_tc(int *name, u_int namelen, void *oldp, size_t *oldlenp,
539: void *newp, size_t newlen)
540: {
541: if (namelen != 1)
542: return (ENOTDIR);
543:
544: switch (name[0]) {
545: case KERN_TIMECOUNTER_TICK:
546: return (sysctl_rdint(oldp, oldlenp, newp, tc_tick));
547: case KERN_TIMECOUNTER_TIMESTEPWARNINGS:
548: return (sysctl_int(oldp, oldlenp, newp, newlen,
549: ×tepwarnings));
550: case KERN_TIMECOUNTER_HARDWARE:
551: return (sysctl_tc_hardware(oldp, oldlenp, newp, newlen));
552: case KERN_TIMECOUNTER_CHOICE:
553: return (sysctl_tc_choice(oldp, oldlenp, newp, newlen));
554: default:
555: return (EOPNOTSUPP);
556: }
557: /* NOTREACHED */
558: }
559:
560: void
561: ntp_update_second(int64_t *adjust, time_t *sec)
562: {
563: struct timeval adj;
564:
565: /* Skew time according to any adjtime(2) adjustments. */
566: timerclear(&adj);
567: if (adjtimedelta.tv_sec > 0)
568: adj.tv_usec = 5000;
569: else if (adjtimedelta.tv_sec == 0)
570: adj.tv_usec = MIN(500, adjtimedelta.tv_usec);
571: else if (adjtimedelta.tv_sec < -1)
572: adj.tv_usec = -5000;
573: else if (adjtimedelta.tv_sec == -1)
574: adj.tv_usec = MAX(-500, adjtimedelta.tv_usec - 1000000);
575: timersub(&adjtimedelta, &adj, &adjtimedelta);
576: *adjust = ((int64_t)adj.tv_usec * 1000) << 32;
577: *adjust += timecounter->tc_freq_adj;
578: }
579:
580: int
581: tc_adjfreq(int64_t *old, int64_t *new)
582: {
583: if (old != NULL) {
584: *old = timecounter->tc_freq_adj;
585: }
586: if (new != NULL) {
587: timecounter->tc_freq_adj = *new;
588: }
589: return 0;
590: }
591: #endif /* __HAVE_TIMECOUNTER */
CVSweb