Annotation of prex/usr/lib/libc/stdio/vfscanf.c, Revision 1.1.1.1
1.1 nbrk 1: /*-
2: * Copyright (c) 1990, 1993
3: * The Regents of the University of California. All rights reserved.
4: *
5: * This code is derived from software contributed to Berkeley by
6: * Chris Torek.
7: *
8: * Redistribution and use in source and binary forms, with or without
9: * modification, are permitted provided that the following conditions
10: * are met:
11: * 1. Redistributions of source code must retain the above copyright
12: * notice, this list of conditions and the following disclaimer.
13: * 2. Redistributions in binary form must reproduce the above copyright
14: * notice, this list of conditions and the following disclaimer in the
15: * documentation and/or other materials provided with the distribution.
16: * 3. Neither the name of the University nor the names of its contributors
17: * may be used to endorse or promote products derived from this software
18: * without specific prior written permission.
19: *
20: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30: * SUCH DAMAGE.
31: */
32:
33: #include <stdio.h>
34: #include <stdlib.h>
35: #include <ctype.h>
36: #include <stdarg.h>
37: #include "local.h"
38:
39: #define BUF 513 /* Maximum length of numeric string. */
40:
41: /*
42: * Flags used during conversion.
43: */
44: #define LONG 0x01 /* l: long or double */
45: #define LONGDBL 0x02 /* L: long double; unimplemented */
46: #define SHORT 0x04 /* h: short */
47: #define SUPPRESS 0x08 /* suppress assignment */
48: #define POINTER 0x10 /* weird %p pointer (`fake hex') */
49: #define NOSKIP 0x20 /* do not skip blanks */
50:
51: /*
52: * The following are used in numeric conversions only:
53: * SIGNOK, NDIGITS, DPTOK, and EXPOK are for floating point;
54: * SIGNOK, NDIGITS, PFXOK, and NZDIGITS are for integral.
55: */
56: #define SIGNOK 0x40 /* +/- is (still) legal */
57: #define NDIGITS 0x80 /* no digits detected */
58:
59: #define DPTOK 0x100 /* (float) decimal point is still legal */
60: #define EXPOK 0x200 /* (float) exponent (e+3, etc) still legal */
61:
62: #define PFXOK 0x100 /* 0x prefix is (still) legal */
63: #define NZDIGITS 0x200 /* no zero digits detected */
64:
65: /*
66: * Conversion types.
67: */
68: #define CT_CHAR 0 /* %c conversion */
69: #define CT_CCL 1 /* %[...] conversion */
70: #define CT_STRING 2 /* %s conversion */
71: #define CT_INT 3 /* integer, i.e., strtol or strtoul */
72: #define CT_FLOAT 4 /* floating, i.e., strtod */
73:
74: #define u_char unsigned char
75: #define u_long unsigned long
76:
77: static u_char *__sccl(char *, u_char *);
78:
79: /*
80: * vfscanf
81: */
82: int
83: __svfscanf(fp, fmt0, ap)
84: FILE *fp;
85: char const *fmt0;
86: va_list ap;
87: {
88: u_char *fmt = (u_char *)fmt0;
89: int c; /* character from format, or conversion */
90: size_t width; /* field width, or 0 */
91: char *p; /* points into all kinds of strings */
92: int n; /* handy integer */
93: int flags; /* flags as defined above */
94: char *p0; /* saves original value of p when necessary */
95: int nassigned; /* number of fields assigned */
96: int nread; /* number of characters consumed from fp */
97: int base; /* base argument to strtol/strtoul */
98: u_long (*ccfn)(const char *, char **, int);
99: /* conversion function (strtol/strtoul) */
100: char ccltab[256]; /* character class table for %[...] */
101: char buf[BUF]; /* buffer for numeric conversions */
102:
103: /* `basefix' is used to avoid `if' tests in the integer scanner */
104: static short basefix[17] =
105: { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
106:
107: nassigned = 0;
108: nread = 0;
109: base = 0; /* XXX just to keep gcc happy */
110: ccfn = NULL; /* XXX just to keep gcc happy */
111: for (;;) {
112: c = *fmt++;
113: if (c == 0)
114: return (nassigned);
115: if (isspace(c)) {
116: for (;;) {
117: if (fp->_r <= 0 && __srefill(fp))
118: return (nassigned);
119: if (!isspace(*fp->_p))
120: break;
121: nread++, fp->_r--, fp->_p++;
122: }
123: continue;
124: }
125: if (c != '%')
126: goto literal;
127: width = 0;
128: flags = 0;
129: /*
130: * switch on the format. continue if done;
131: * break once format type is derived.
132: */
133: again: c = *fmt++;
134: switch (c) {
135: case '%':
136: literal:
137: if (fp->_r <= 0 && __srefill(fp))
138: goto input_failure;
139: if (*fp->_p != c)
140: goto match_failure;
141: fp->_r--, fp->_p++;
142: nread++;
143: continue;
144:
145: case '*':
146: flags |= SUPPRESS;
147: goto again;
148: case 'l':
149: flags |= LONG;
150: goto again;
151: case 'L':
152: flags |= LONGDBL;
153: goto again;
154: case 'h':
155: flags |= SHORT;
156: goto again;
157:
158: case '0': case '1': case '2': case '3': case '4':
159: case '5': case '6': case '7': case '8': case '9':
160: width = width * 10 + c - '0';
161: goto again;
162:
163: /*
164: * Conversions.
165: * Those marked `compat' are for 4.[123]BSD compatibility.
166: *
167: * (According to ANSI, E and X formats are supposed
168: * to the same as e and x. Sorry about that.)
169: */
170: case 'D': /* compat */
171: flags |= LONG;
172: /* FALLTHROUGH */
173: case 'd':
174: c = CT_INT;
175: ccfn = (u_long (*)(const char *, char **, int))strtol;
176: base = 10;
177: break;
178:
179: case 'i':
180: c = CT_INT;
181: ccfn = (u_long (*)(const char *, char **, int))strtol;
182: base = 0;
183: break;
184:
185: case 'O': /* compat */
186: flags |= LONG;
187: /* FALLTHROUGH */
188: case 'o':
189: c = CT_INT;
190: ccfn = strtoul;
191: base = 8;
192: break;
193:
194: case 'u':
195: c = CT_INT;
196: ccfn = strtoul;
197: base = 10;
198: break;
199:
200: case 'X': /* compat XXX */
201: flags |= LONG;
202: /* FALLTHROUGH */
203: case 'x':
204: flags |= PFXOK; /* enable 0x prefixing */
205: c = CT_INT;
206: ccfn = strtoul;
207: base = 16;
208: break;
209:
210: case 's':
211: c = CT_STRING;
212: break;
213:
214: case '[':
215: fmt = __sccl(ccltab, fmt);
216: flags |= NOSKIP;
217: c = CT_CCL;
218: break;
219:
220: case 'c':
221: flags |= NOSKIP;
222: c = CT_CHAR;
223: break;
224:
225: case 'p': /* pointer format is like hex */
226: flags |= POINTER | PFXOK;
227: c = CT_INT;
228: ccfn = strtoul;
229: base = 16;
230: break;
231:
232: case 'n':
233: if (flags & SUPPRESS) /* ??? */
234: continue;
235: if (flags & SHORT)
236: *va_arg(ap, short *) = nread;
237: else if (flags & LONG)
238: *va_arg(ap, long *) = nread;
239: else
240: *va_arg(ap, int *) = nread;
241: continue;
242:
243: /*
244: * Disgusting backwards compatibility hacks. XXX
245: */
246: case '\0': /* compat */
247: return (EOF);
248:
249: default: /* compat */
250: if (isupper(c))
251: flags |= LONG;
252: c = CT_INT;
253: ccfn = (u_long (*)(const char *, char **, int))strtol;
254: base = 10;
255: break;
256: }
257:
258: /*
259: * We have a conversion that requires input.
260: */
261: if (fp->_r <= 0 && __srefill(fp))
262: goto input_failure;
263:
264: /*
265: * Consume leading white space, except for formats
266: * that suppress this.
267: */
268: if ((flags & NOSKIP) == 0) {
269: while (isspace(*fp->_p)) {
270: nread++;
271: if (--fp->_r > 0)
272: fp->_p++;
273: else if (__srefill(fp))
274: goto input_failure;
275: }
276: /*
277: * Note that there is at least one character in
278: * the buffer, so conversions that do not set NOSKIP
279: * ca no longer result in an input failure.
280: */
281: }
282:
283: /*
284: * Do the conversion.
285: */
286: switch (c) {
287:
288: case CT_CHAR:
289: /* scan arbitrary characters (sets NOSKIP) */
290: if (width == 0)
291: width = 1;
292: if (flags & SUPPRESS) {
293: size_t sum = 0;
294: for (;;) {
295: if ((size_t)(n = fp->_r) < width) {
296: sum += n;
297: width -= n;
298: fp->_p += n;
299: if (__srefill(fp)) {
300: if (sum == 0)
301: goto input_failure;
302: break;
303: }
304: } else {
305: sum += width;
306: fp->_r -= width;
307: fp->_p += width;
308: break;
309: }
310: }
311: nread += sum;
312: } else {
313: size_t r = fread((void *)va_arg(ap, char *), 1,
314: width, fp);
315:
316: if (r == 0)
317: goto input_failure;
318: nread += r;
319: nassigned++;
320: }
321: break;
322:
323: case CT_CCL:
324: /* scan a (nonempty) character class (sets NOSKIP) */
325: if (width == 0)
326: width = ~0; /* `infinity' */
327: /* take only those things in the class */
328: if (flags & SUPPRESS) {
329: n = 0;
330: while (ccltab[*fp->_p]) {
331: n++, fp->_r--, fp->_p++;
332: if (--width == 0)
333: break;
334: if (fp->_r <= 0 && __srefill(fp)) {
335: if (n == 0)
336: goto input_failure;
337: break;
338: }
339: }
340: if (n == 0)
341: goto match_failure;
342: } else {
343: p0 = p = va_arg(ap, char *);
344: while (ccltab[*fp->_p]) {
345: fp->_r--;
346: *p++ = *fp->_p++;
347: if (--width == 0)
348: break;
349: if (fp->_r <= 0 && __srefill(fp)) {
350: if (p == p0)
351: goto input_failure;
352: break;
353: }
354: }
355: n = p - p0;
356: if (n == 0)
357: goto match_failure;
358: *p = 0;
359: nassigned++;
360: }
361: nread += n;
362: break;
363:
364: case CT_STRING:
365: /* like CCL, but zero-length string OK, & no NOSKIP */
366: if (width == 0)
367: width = ~0;
368: if (flags & SUPPRESS) {
369: n = 0;
370: while (!isspace(*fp->_p)) {
371: n++, fp->_r--, fp->_p++;
372: if (--width == 0)
373: break;
374: if (fp->_r <= 0 && __srefill(fp))
375: break;
376: }
377: nread += n;
378: } else {
379: p0 = p = va_arg(ap, char *);
380: while (!isspace(*fp->_p)) {
381: fp->_r--;
382: *p++ = *fp->_p++;
383: if (--width == 0)
384: break;
385: if (fp->_r <= 0 && __srefill(fp))
386: break;
387: }
388: *p = 0;
389: nread += p - p0;
390: nassigned++;
391: }
392: continue;
393:
394: case CT_INT:
395: /* scan an integer as if by strtol/strtoul */
396: #ifdef hardway
397: if (width == 0 || width > sizeof(buf) - 1)
398: width = sizeof(buf) - 1;
399: #else
400: /* size_t is unsigned, hence this optimisation */
401: if (--width > sizeof(buf) - 2)
402: width = sizeof(buf) - 2;
403: width++;
404: #endif
405: flags |= SIGNOK | NDIGITS | NZDIGITS;
406: for (p = buf; width; width--) {
407: c = *fp->_p;
408: /*
409: * Switch on the character; `goto ok'
410: * if we accept it as a part of number.
411: */
412: switch (c) {
413:
414: /*
415: * The digit 0 is always legal, but is
416: * special. For %i conversions, if no
417: * digits (zero or nonzero) have been
418: * scanned (only signs), we will have
419: * base==0. In that case, we should set
420: * it to 8 and enable 0x prefixing.
421: * Also, if we have not scanned zero digits
422: * before this, do not turn off prefixing
423: * (someone else will turn it off if we
424: * have scanned any nonzero digits).
425: */
426: case '0':
427: if (base == 0) {
428: base = 8;
429: flags |= PFXOK;
430: }
431: if (flags & NZDIGITS)
432: flags &= ~(SIGNOK|NZDIGITS|NDIGITS);
433: else
434: flags &= ~(SIGNOK|PFXOK|NDIGITS);
435: goto ok;
436:
437: /* 1 through 7 always legal */
438: case '1': case '2': case '3':
439: case '4': case '5': case '6': case '7':
440: base = basefix[base];
441: flags &= ~(SIGNOK | PFXOK | NDIGITS);
442: goto ok;
443:
444: /* digits 8 and 9 ok iff decimal or hex */
445: case '8': case '9':
446: base = basefix[base];
447: if (base <= 8)
448: break; /* not legal here */
449: flags &= ~(SIGNOK | PFXOK | NDIGITS);
450: goto ok;
451:
452: /* letters ok iff hex */
453: case 'A': case 'B': case 'C':
454: case 'D': case 'E': case 'F':
455: case 'a': case 'b': case 'c':
456: case 'd': case 'e': case 'f':
457: /* no need to fix base here */
458: if (base <= 10)
459: break; /* not legal here */
460: flags &= ~(SIGNOK | PFXOK | NDIGITS);
461: goto ok;
462:
463: /* sign ok only as first character */
464: case '+': case '-':
465: if (flags & SIGNOK) {
466: flags &= ~SIGNOK;
467: goto ok;
468: }
469: break;
470:
471: /* x ok iff flag still set & 2nd char */
472: case 'x': case 'X':
473: if (flags & PFXOK && p == buf + 1) {
474: base = 16; /* if %i */
475: flags &= ~PFXOK;
476: goto ok;
477: }
478: break;
479: }
480:
481: /*
482: * If we got here, c is not a legal character
483: * for a number. Stop accumulating digits.
484: */
485: break;
486: ok:
487: /*
488: * c is legal: store it and look at the next.
489: */
490: *p++ = c;
491: if (--fp->_r > 0)
492: fp->_p++;
493: else if (__srefill(fp))
494: break; /* EOF */
495: }
496: /*
497: * If we had only a sign, it is no good; push
498: * back the sign. If the number ends in `x',
499: * it was [sign] '0' 'x', so push back the x
500: * and treat it as [sign] '0'.
501: */
502: if (flags & NDIGITS) {
503: if (p > buf)
504: (void) ungetc(*(u_char *)--p, fp);
505: goto match_failure;
506: }
507: c = ((u_char *)p)[-1];
508: if (c == 'x' || c == 'X') {
509: --p;
510: (void) ungetc(c, fp);
511: }
512: if ((flags & SUPPRESS) == 0) {
513: u_long res;
514:
515: *p = 0;
516: res = (*ccfn)(buf, (char **)NULL, base);
517: if (flags & POINTER)
518: *va_arg(ap, void **) = (void *)res;
519: else if (flags & SHORT)
520: *va_arg(ap, short *) = res;
521: else if (flags & LONG)
522: *va_arg(ap, long *) = res;
523: else
524: *va_arg(ap, int *) = res;
525: nassigned++;
526: }
527: nread += p - buf;
528: break;
529:
530: }
531: }
532: input_failure:
533: return (nassigned ? nassigned : -1);
534: match_failure:
535: return (nassigned);
536: }
537:
538: /*
539: * Fill in the given table from the scanset at the given format
540: * (just after `['). Return a pointer to the character past the
541: * closing `]'. The table has a 1 wherever characters should be
542: * considered part of the scanset.
543: */
544: static u_char *
545: __sccl(tab, fmt)
546: char *tab;
547: u_char *fmt;
548: {
549: int c, n, v;
550:
551: /* first `clear' the whole table */
552: c = *fmt++; /* first char hat => negated scanset */
553: if (c == '^') {
554: v = 1; /* default => accept */
555: c = *fmt++; /* get new first char */
556: } else
557: v = 0; /* default => reject */
558: /* should probably use memset here */
559: for (n = 0; n < 256; n++)
560: tab[n] = v;
561: if (c == 0)
562: return (fmt - 1);/* format ended before closing ] */
563:
564: /*
565: * Now set the entries corresponding to the actual scanset
566: * to the opposite of the above.
567: *
568: * The first character may be ']' (or '-') without being special;
569: * the last character may be '-'.
570: */
571: v = 1 - v;
572: for (;;) {
573: tab[c] = v; /* take character c */
574: doswitch:
575: n = *fmt++; /* and examine the next */
576: switch (n) {
577:
578: case 0: /* format ended too soon */
579: return (fmt - 1);
580:
581: case '-':
582: /*
583: * A scanset of the form
584: * [01+-]
585: * is defined as `the digit 0, the digit 1,
586: * the character +, the character -', but
587: * the effect of a scanset such as
588: * [a-zA-Z0-9]
589: * is implementation defined. The V7 Unix
590: * scanf treats `a-z' as `the letters a through
591: * z', but treats `a-a' as `the letter a, the
592: * character -, and the letter a'.
593: *
594: * For compatibility, the `-' is not considerd
595: * to define a range if the character following
596: * it is either a close bracket (required by ANSI)
597: * or is not numerically greater than the character
598: * we just stored in the table (c).
599: */
600: n = *fmt;
601: if (n == ']' || n < c) {
602: c = '-';
603: break; /* resume the for(;;) */
604: }
605: fmt++;
606: do { /* fill in the range */
607: tab[++c] = v;
608: } while (c < n);
609: #if 1 /* XXX another disgusting compatibility hack */
610: /*
611: * Alas, the V7 Unix scanf also treats formats
612: * such as [a-c-e] as `the letters a through e'.
613: * This too is permitted by the standard....
614: */
615: goto doswitch;
616: #else
617: c = *fmt++;
618: if (c == 0)
619: return (fmt - 1);
620: if (c == ']')
621: return (fmt);
622: #endif
623: break;
624:
625: case ']': /* end of scanset */
626: return (fmt);
627:
628: default: /* just another character */
629: c = n;
630: break;
631: }
632: }
633: /* NOTREACHED */
634: }
CVSweb