Annotation of sys/kern/subr_disk.c, Revision 1.1.1.1
1.1 nbrk 1: /* $OpenBSD: subr_disk.c,v 1.64 2007/08/05 04:26:21 krw Exp $ */
2: /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */
3:
4: /*
5: * Copyright (c) 1995 Jason R. Thorpe. All rights reserved.
6: * Copyright (c) 1982, 1986, 1988, 1993
7: * The Regents of the University of California. All rights reserved.
8: * (c) UNIX System Laboratories, Inc.
9: * All or some portions of this file are derived from material licensed
10: * to the University of California by American Telephone and Telegraph
11: * Co. or Unix System Laboratories, Inc. and are reproduced herein with
12: * the permission of UNIX System Laboratories, Inc.
13: *
14: * Redistribution and use in source and binary forms, with or without
15: * modification, are permitted provided that the following conditions
16: * are met:
17: * 1. Redistributions of source code must retain the above copyright
18: * notice, this list of conditions and the following disclaimer.
19: * 2. Redistributions in binary form must reproduce the above copyright
20: * notice, this list of conditions and the following disclaimer in the
21: * documentation and/or other materials provided with the distribution.
22: * 3. Neither the name of the University nor the names of its contributors
23: * may be used to endorse or promote products derived from this software
24: * without specific prior written permission.
25: *
26: * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27: * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29: * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30: * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31: * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32: * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34: * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35: * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36: * SUCH DAMAGE.
37: *
38: * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
39: */
40:
41: #include <sys/param.h>
42: #include <sys/systm.h>
43: #include <sys/kernel.h>
44: #include <sys/malloc.h>
45: #include <sys/fcntl.h>
46: #include <sys/buf.h>
47: #include <sys/stat.h>
48: #include <sys/syslog.h>
49: #include <sys/device.h>
50: #include <sys/time.h>
51: #include <sys/disklabel.h>
52: #include <sys/conf.h>
53: #include <sys/lock.h>
54: #include <sys/disk.h>
55: #include <sys/reboot.h>
56: #include <sys/dkio.h>
57: #include <sys/dkstat.h> /* XXX */
58: #include <sys/proc.h>
59: #include <uvm/uvm_extern.h>
60:
61: #include <dev/rndvar.h>
62: #include <dev/cons.h>
63:
64: /*
65: * A global list of all disks attached to the system. May grow or
66: * shrink over time.
67: */
68: struct disklist_head disklist; /* TAILQ_HEAD */
69: int disk_count; /* number of drives in global disklist */
70: int disk_change; /* set if a disk has been attached/detached
71: * since last we looked at this variable. This
72: * is reset by hw_sysctl()
73: */
74:
75: /*
76: * Seek sort for disks. We depend on the driver which calls us using b_resid
77: * as the current cylinder number.
78: *
79: * The argument ap structure holds a b_actf activity chain pointer on which we
80: * keep two queues, sorted in ascending cylinder order. The first queue holds
81: * those requests which are positioned after the current cylinder (in the first
82: * request); the second holds requests which came in after their cylinder number
83: * was passed. Thus we implement a one way scan, retracting after reaching the
84: * end of the drive to the first request on the second queue, at which time it
85: * becomes the first queue.
86: *
87: * A one-way scan is natural because of the way UNIX read-ahead blocks are
88: * allocated.
89: */
90:
91: void
92: disksort(struct buf *ap, struct buf *bp)
93: {
94: struct buf *bq;
95:
96: /* If the queue is empty, then it's easy. */
97: if (ap->b_actf == NULL) {
98: bp->b_actf = NULL;
99: ap->b_actf = bp;
100: return;
101: }
102:
103: /*
104: * If we lie after the first (currently active) request, then we
105: * must locate the second request list and add ourselves to it.
106: */
107: bq = ap->b_actf;
108: if (bp->b_cylinder < bq->b_cylinder) {
109: while (bq->b_actf) {
110: /*
111: * Check for an ``inversion'' in the normally ascending
112: * cylinder numbers, indicating the start of the second
113: * request list.
114: */
115: if (bq->b_actf->b_cylinder < bq->b_cylinder) {
116: /*
117: * Search the second request list for the first
118: * request at a larger cylinder number. We go
119: * before that; if there is no such request, we
120: * go at end.
121: */
122: do {
123: if (bp->b_cylinder <
124: bq->b_actf->b_cylinder)
125: goto insert;
126: if (bp->b_cylinder ==
127: bq->b_actf->b_cylinder &&
128: bp->b_blkno < bq->b_actf->b_blkno)
129: goto insert;
130: bq = bq->b_actf;
131: } while (bq->b_actf);
132: goto insert; /* after last */
133: }
134: bq = bq->b_actf;
135: }
136: /*
137: * No inversions... we will go after the last, and
138: * be the first request in the second request list.
139: */
140: goto insert;
141: }
142: /*
143: * Request is at/after the current request...
144: * sort in the first request list.
145: */
146: while (bq->b_actf) {
147: /*
148: * We want to go after the current request if there is an
149: * inversion after it (i.e. it is the end of the first
150: * request list), or if the next request is a larger cylinder
151: * than our request.
152: */
153: if (bq->b_actf->b_cylinder < bq->b_cylinder ||
154: bp->b_cylinder < bq->b_actf->b_cylinder ||
155: (bp->b_cylinder == bq->b_actf->b_cylinder &&
156: bp->b_blkno < bq->b_actf->b_blkno))
157: goto insert;
158: bq = bq->b_actf;
159: }
160: /*
161: * Neither a second list nor a larger request... we go at the end of
162: * the first list, which is the same as the end of the whole schebang.
163: */
164: insert: bp->b_actf = bq->b_actf;
165: bq->b_actf = bp;
166: }
167:
168: /*
169: * Compute checksum for disk label.
170: */
171: u_int
172: dkcksum(struct disklabel *lp)
173: {
174: u_int16_t *start, *end;
175: u_int16_t sum = 0;
176:
177: start = (u_int16_t *)lp;
178: end = (u_int16_t *)&lp->d_partitions[lp->d_npartitions];
179: while (start < end)
180: sum ^= *start++;
181: return (sum);
182: }
183:
184: char *
185: initdisklabel(struct disklabel *lp)
186: {
187: int i;
188:
189: /* minimal requirements for archetypal disk label */
190: if (lp->d_secsize < DEV_BSIZE)
191: lp->d_secsize = DEV_BSIZE;
192: if (DL_GETDSIZE(lp) == 0)
193: DL_SETDSIZE(lp, MAXDISKSIZE);
194: if (lp->d_secpercyl == 0)
195: return ("invalid geometry");
196: lp->d_npartitions = RAW_PART + 1;
197: for (i = 0; i < RAW_PART; i++) {
198: DL_SETPSIZE(&lp->d_partitions[i], 0);
199: DL_SETPOFFSET(&lp->d_partitions[i], 0);
200: }
201: if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) == 0)
202: DL_SETPSIZE(&lp->d_partitions[RAW_PART], DL_GETDSIZE(lp));
203: DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
204: lp->d_version = 1;
205: lp->d_bbsize = 8192;
206: lp->d_sbsize = 64*1024; /* XXX ? */
207: return (NULL);
208: }
209:
210: /*
211: * Check an incoming block to make sure it is a disklabel, convert it to
212: * a newer version if needed, etc etc.
213: */
214: char *
215: checkdisklabel(void *rlp, struct disklabel *lp)
216: {
217: struct disklabel *dlp = rlp;
218: struct __partitionv0 *v0pp;
219: struct partition *pp;
220: daddr64_t disksize;
221: char *msg = NULL;
222: int i;
223:
224: if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC)
225: msg = "no disk label";
226: else if (dlp->d_npartitions > MAXPARTITIONS)
227: msg = "unreasonable partition count";
228: else if (dkcksum(dlp) != 0)
229: msg = "disk label corrupted";
230:
231: if (msg) {
232: u_int16_t *start, *end, sum = 0;
233:
234: /* If it is byte-swapped, attempt to convert it */
235: if (swap32(dlp->d_magic) != DISKMAGIC ||
236: swap32(dlp->d_magic2) != DISKMAGIC ||
237: swap16(dlp->d_npartitions) > MAXPARTITIONS)
238: return (msg);
239:
240: /*
241: * Need a byte-swap aware dkcksum varient
242: * inlined, because dkcksum uses a sub-field
243: */
244: start = (u_int16_t *)dlp;
245: end = (u_int16_t *)&dlp->d_partitions[
246: swap16(dlp->d_npartitions)];
247: while (start < end)
248: sum ^= *start++;
249: if (sum != 0)
250: return (msg);
251:
252: dlp->d_magic = swap32(dlp->d_magic);
253: dlp->d_type = swap16(dlp->d_type);
254: dlp->d_subtype = swap16(dlp->d_subtype);
255:
256: /* d_typename and d_packname are strings */
257:
258: dlp->d_secsize = swap32(dlp->d_secsize);
259: dlp->d_nsectors = swap32(dlp->d_nsectors);
260: dlp->d_ntracks = swap32(dlp->d_ntracks);
261: dlp->d_ncylinders = swap32(dlp->d_ncylinders);
262: dlp->d_secpercyl = swap32(dlp->d_secpercyl);
263: dlp->d_secperunit = swap32(dlp->d_secperunit);
264:
265: dlp->d_sparespertrack = swap16(dlp->d_sparespertrack);
266: dlp->d_sparespercyl = swap16(dlp->d_sparespercyl);
267:
268: dlp->d_acylinders = swap32(dlp->d_acylinders);
269:
270: dlp->d_rpm = swap16(dlp->d_rpm);
271: dlp->d_interleave = swap16(dlp->d_interleave);
272: dlp->d_trackskew = swap16(dlp->d_trackskew);
273: dlp->d_cylskew = swap16(dlp->d_cylskew);
274: dlp->d_headswitch = swap32(dlp->d_headswitch);
275: dlp->d_trkseek = swap32(dlp->d_trkseek);
276: dlp->d_flags = swap32(dlp->d_flags);
277:
278: for (i = 0; i < NDDATA; i++)
279: dlp->d_drivedata[i] = swap32(dlp->d_drivedata[i]);
280:
281: dlp->d_secperunith = swap16(dlp->d_secperunith);
282: dlp->d_version = swap16(dlp->d_version);
283:
284: for (i = 0; i < NSPARE; i++)
285: dlp->d_spare[i] = swap32(dlp->d_spare[i]);
286:
287: dlp->d_magic2 = swap32(dlp->d_magic2);
288: dlp->d_checksum = swap16(dlp->d_checksum);
289:
290: dlp->d_npartitions = swap16(dlp->d_npartitions);
291: dlp->d_bbsize = swap32(dlp->d_bbsize);
292: dlp->d_sbsize = swap32(dlp->d_sbsize);
293:
294: for (i = 0; i < MAXPARTITIONS; i++) {
295: pp = &dlp->d_partitions[i];
296: pp->p_size = swap32(pp->p_size);
297: pp->p_offset = swap32(pp->p_offset);
298: if (dlp->d_version == 0) {
299: v0pp = (struct __partitionv0 *)pp;
300: v0pp->p_fsize = swap32(v0pp->p_fsize);
301: } else {
302: pp->p_offseth = swap16(pp->p_offseth);
303: pp->p_sizeh = swap16(pp->p_sizeh);
304: }
305: pp->p_cpg = swap16(pp->p_cpg);
306: }
307:
308: dlp->d_checksum = 0;
309: dlp->d_checksum = dkcksum(dlp);
310: msg = NULL;
311: }
312:
313: /* XXX should verify lots of other fields and whine a lot */
314:
315: if (msg)
316: return (msg);
317:
318: /* Initial passed in lp contains the real disk size. */
319: disksize = DL_GETDSIZE(lp);
320:
321: if (lp != dlp)
322: *lp = *dlp;
323:
324: if (lp->d_version == 0) {
325: lp->d_version = 1;
326: lp->d_secperunith = 0;
327:
328: v0pp = (struct __partitionv0 *)lp->d_partitions;
329: pp = lp->d_partitions;
330: for (i = 0; i < lp->d_npartitions; i++, pp++, v0pp++) {
331: pp->p_fragblock = DISKLABELV1_FFS_FRAGBLOCK(v0pp->
332: p_fsize, v0pp->p_frag);
333: pp->p_offseth = 0;
334: pp->p_sizeh = 0;
335: }
336: }
337:
338: #ifdef DEBUG
339: if (DL_GETDSIZE(lp) != disksize)
340: printf("on-disk disklabel has incorrect disksize (%lld)\n",
341: DL_GETDSIZE(lp));
342: if (DL_GETPSIZE(&lp->d_partitions[RAW_PART]) != disksize)
343: printf("on-disk disklabel RAW_PART has incorrect size (%lld)\n",
344: DL_GETPSIZE(&lp->d_partitions[RAW_PART]));
345: if (DL_GETPOFFSET(&lp->d_partitions[RAW_PART]) != 0)
346: printf("on-disk disklabel RAW_PART offset != 0 (%lld)\n",
347: DL_GETPOFFSET(&lp->d_partitions[RAW_PART]));
348: #endif
349: DL_SETDSIZE(lp, disksize);
350: DL_SETPSIZE(&lp->d_partitions[RAW_PART], disksize);
351: DL_SETPOFFSET(&lp->d_partitions[RAW_PART], 0);
352:
353: lp->d_checksum = 0;
354: lp->d_checksum = dkcksum(lp);
355: return (msg);
356: }
357:
358: /*
359: * If dos partition table requested, attempt to load it and
360: * find disklabel inside a DOS partition. Return buffer
361: * for use in signalling errors if requested.
362: *
363: * We would like to check if each MBR has a valid BOOT_MAGIC, but
364: * we cannot because it doesn't always exist. So.. we assume the
365: * MBR is valid.
366: */
367: char *
368: readdoslabel(struct buf *bp, void (*strat)(struct buf *),
369: struct disklabel *lp, int *partoffp, int spoofonly)
370: {
371: struct dos_partition dp[NDOSPART], *dp2;
372: u_int32_t extoff = 0;
373: daddr64_t part_blkno = DOSBBSECTOR;
374: int dospartoff = 0, i, ourpart = -1;
375: int wander = 1, n = 0, loop = 0;
376:
377: if (lp->d_secpercyl == 0)
378: return ("invalid label, d_secpercyl == 0");
379: if (lp->d_secsize == 0)
380: return ("invalid label, d_secsize == 0");
381:
382: /* do DOS partitions in the process of getting disklabel? */
383:
384: /*
385: * Read dos partition table, follow extended partitions.
386: * Map the partitions to disklabel entries i-p
387: */
388: while (wander && n < 8 && loop < 8) {
389: loop++;
390: wander = 0;
391: if (part_blkno < extoff)
392: part_blkno = extoff;
393:
394: /* read boot record */
395: bp->b_blkno = part_blkno;
396: bp->b_bcount = lp->d_secsize;
397: bp->b_flags = B_BUSY | B_READ;
398: (*strat)(bp);
399: if (biowait(bp)) {
400: /*wrong*/ if (partoffp)
401: /*wrong*/ *partoffp = -1;
402: return ("dos partition I/O error");
403: }
404:
405: bcopy(bp->b_data + DOSPARTOFF, dp, sizeof(dp));
406:
407: if (ourpart == -1 && part_blkno == DOSBBSECTOR) {
408: /* Search for our MBR partition */
409: for (dp2=dp, i=0; i < NDOSPART && ourpart == -1;
410: i++, dp2++)
411: if (letoh32(dp2->dp_size) &&
412: dp2->dp_typ == DOSPTYP_OPENBSD)
413: ourpart = i;
414: if (ourpart == -1)
415: goto donot;
416: /*
417: * This is our MBR partition. need sector
418: * address for SCSI/IDE, cylinder for
419: * ESDI/ST506/RLL
420: */
421: dp2 = &dp[ourpart];
422: dospartoff = letoh32(dp2->dp_start) + part_blkno;
423:
424: /* found our OpenBSD partition, finish up */
425: if (partoffp)
426: goto notfat;
427:
428: if (lp->d_ntracks == 0)
429: lp->d_ntracks = dp2->dp_ehd + 1;
430: if (lp->d_nsectors == 0)
431: lp->d_nsectors = DPSECT(dp2->dp_esect);
432: if (lp->d_secpercyl == 0)
433: lp->d_secpercyl = lp->d_ntracks *
434: lp->d_nsectors;
435: }
436: donot:
437: /*
438: * In case the disklabel read below fails, we want to
439: * provide a fake label in i-p.
440: */
441: for (dp2=dp, i=0; i < NDOSPART && n < 8; i++, dp2++) {
442: struct partition *pp = &lp->d_partitions[8+n];
443:
444: if (dp2->dp_typ == DOSPTYP_OPENBSD)
445: continue;
446: if (letoh32(dp2->dp_size) > DL_GETDSIZE(lp))
447: continue;
448: if (letoh32(dp2->dp_start) > DL_GETDSIZE(lp))
449: continue;
450: if (letoh32(dp2->dp_size) == 0)
451: continue;
452: if (letoh32(dp2->dp_start))
453: DL_SETPOFFSET(pp,
454: letoh32(dp2->dp_start) + part_blkno);
455:
456: DL_SETPSIZE(pp, letoh32(dp2->dp_size));
457:
458: switch (dp2->dp_typ) {
459: case DOSPTYP_UNUSED:
460: pp->p_fstype = FS_UNUSED;
461: n++;
462: break;
463:
464: case DOSPTYP_LINUX:
465: pp->p_fstype = FS_EXT2FS;
466: n++;
467: break;
468:
469: case DOSPTYP_FAT12:
470: case DOSPTYP_FAT16S:
471: case DOSPTYP_FAT16B:
472: case DOSPTYP_FAT16L:
473: case DOSPTYP_FAT32:
474: case DOSPTYP_FAT32L:
475: pp->p_fstype = FS_MSDOS;
476: n++;
477: break;
478: case DOSPTYP_EXTEND:
479: case DOSPTYP_EXTENDL:
480: part_blkno = letoh32(dp2->dp_start) + extoff;
481: if (!extoff) {
482: extoff = letoh32(dp2->dp_start);
483: part_blkno = 0;
484: }
485: wander = 1;
486: break;
487: default:
488: pp->p_fstype = FS_OTHER;
489: n++;
490: break;
491: }
492: }
493: }
494: lp->d_npartitions = MAXPARTITIONS;
495:
496: if (n == 0 && part_blkno == DOSBBSECTOR) {
497: u_int16_t fattest;
498:
499: /* Check for a short jump instruction. */
500: fattest = ((bp->b_data[0] << 8) & 0xff00) |
501: (bp->b_data[2] & 0xff);
502: if (fattest != 0xeb90 && fattest != 0xe900)
503: goto notfat;
504:
505: /* Check for a valid bytes per sector value. */
506: fattest = ((bp->b_data[12] << 8) & 0xff00) |
507: (bp->b_data[11] & 0xff);
508: if (fattest < 512 || fattest > 4096 || (fattest % 512 != 0))
509: goto notfat;
510:
511: /* Check the end of sector marker. */
512: fattest = ((bp->b_data[510] << 8) & 0xff00) |
513: (bp->b_data[511] & 0xff);
514: if (fattest != 0x55aa)
515: goto notfat;
516:
517: /* Looks like a FAT filesystem. Spoof 'i'. */
518: DL_SETPSIZE(&lp->d_partitions['i' - 'a'],
519: DL_GETPSIZE(&lp->d_partitions[RAW_PART]));
520: DL_SETPOFFSET(&lp->d_partitions['i' - 'a'], 0);
521: lp->d_partitions['i' - 'a'].p_fstype = FS_MSDOS;
522: }
523: notfat:
524:
525: /* record the OpenBSD partition's placement for the caller */
526: if (partoffp)
527: *partoffp = dospartoff;
528:
529: /* don't read the on-disk label if we are in spoofed-only mode */
530: if (spoofonly)
531: return (NULL);
532:
533: bp->b_blkno = dospartoff + DOS_LABELSECTOR;
534: bp->b_bcount = lp->d_secsize;
535: bp->b_flags = B_BUSY | B_READ;
536: (*strat)(bp);
537: if (biowait(bp))
538: return ("disk label I/O error");
539:
540: /* sub-MBR disklabels are always at a LABELOFFSET of 0 */
541: return checkdisklabel(bp->b_data, lp);
542: }
543:
544: /*
545: * Check new disk label for sensibility
546: * before setting it.
547: */
548: int
549: setdisklabel(struct disklabel *olp, struct disklabel *nlp, u_int openmask)
550: {
551: int i;
552: struct partition *opp, *npp;
553:
554: /* sanity clause */
555: if (nlp->d_secpercyl == 0 || nlp->d_secsize == 0 ||
556: (nlp->d_secsize % DEV_BSIZE) != 0)
557: return (EINVAL);
558:
559: /* special case to allow disklabel to be invalidated */
560: if (nlp->d_magic == 0xffffffff) {
561: *olp = *nlp;
562: return (0);
563: }
564:
565: if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
566: dkcksum(nlp) != 0)
567: return (EINVAL);
568:
569: /* XXX missing check if other dos partitions will be overwritten */
570:
571: while (openmask != 0) {
572: i = ffs(openmask) - 1;
573: openmask &= ~(1 << i);
574: if (nlp->d_npartitions <= i)
575: return (EBUSY);
576: opp = &olp->d_partitions[i];
577: npp = &nlp->d_partitions[i];
578: if (DL_GETPOFFSET(npp) != DL_GETPOFFSET(opp) ||
579: DL_GETPSIZE(npp) < DL_GETPSIZE(opp))
580: return (EBUSY);
581: /*
582: * Copy internally-set partition information
583: * if new label doesn't include it. XXX
584: */
585: if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
586: npp->p_fstype = opp->p_fstype;
587: npp->p_fragblock = opp->p_fragblock;
588: npp->p_cpg = opp->p_cpg;
589: }
590: }
591: nlp->d_checksum = 0;
592: nlp->d_checksum = dkcksum(nlp);
593: *olp = *nlp;
594: return (0);
595: }
596:
597: /*
598: * Determine the size of the transfer, and make sure it is within the
599: * boundaries of the partition. Adjust transfer if needed, and signal errors or
600: * early completion.
601: */
602: int
603: bounds_check_with_label(struct buf *bp, struct disklabel *lp, int wlabel)
604: {
605: #define blockpersec(count, lp) ((count) * (((lp)->d_secsize) / DEV_BSIZE))
606: struct partition *p = &lp->d_partitions[DISKPART(bp->b_dev)];
607: daddr64_t sz = howmany(bp->b_bcount, DEV_BSIZE);
608:
609: /* avoid division by zero */
610: if (lp->d_secpercyl == 0)
611: goto bad;
612:
613: /* beyond partition? */
614: if (bp->b_blkno + sz > blockpersec(DL_GETPSIZE(p), lp)) {
615: sz = blockpersec(DL_GETPSIZE(p), lp) - bp->b_blkno;
616: if (sz == 0) {
617: /* If exactly at end of disk, return EOF. */
618: bp->b_resid = bp->b_bcount;
619: return (-1);
620: }
621: if (sz < 0)
622: /* If past end of disk, return EINVAL. */
623: goto bad;
624:
625: /* Otherwise, truncate request. */
626: bp->b_bcount = sz << DEV_BSHIFT;
627: }
628:
629: /* calculate cylinder for disksort to order transfers with */
630: bp->b_cylinder = (bp->b_blkno + blockpersec(DL_GETPOFFSET(p), lp)) /
631: blockpersec(lp->d_secpercyl, lp);
632: return (1);
633:
634: bad:
635: bp->b_error = EINVAL;
636: bp->b_flags |= B_ERROR;
637: return (-1);
638: }
639:
640: /*
641: * Disk error is the preface to plaintive error messages
642: * about failing disk transfers. It prints messages of the form
643:
644: hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
645:
646: * if the offset of the error in the transfer and a disk label
647: * are both available. blkdone should be -1 if the position of the error
648: * is unknown; the disklabel pointer may be null from drivers that have not
649: * been converted to use them. The message is printed with printf
650: * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
651: * The message should be completed (with at least a newline) with printf
652: * or addlog, respectively. There is no trailing space.
653: */
654: void
655: diskerr(struct buf *bp, char *dname, char *what, int pri, int blkdone,
656: struct disklabel *lp)
657: {
658: int unit = DISKUNIT(bp->b_dev), part = DISKPART(bp->b_dev);
659: int (*pr)(const char *, ...);
660: char partname = 'a' + part;
661: daddr64_t sn;
662:
663: if (pri != LOG_PRINTF) {
664: static const char fmt[] = "";
665: log(pri, fmt);
666: pr = addlog;
667: } else
668: pr = printf;
669: (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
670: bp->b_flags & B_READ ? "read" : "writ");
671: sn = bp->b_blkno;
672: if (bp->b_bcount <= DEV_BSIZE)
673: (*pr)("%lld", sn);
674: else {
675: if (blkdone >= 0) {
676: sn += blkdone;
677: (*pr)("%lld of ", sn);
678: }
679: (*pr)("%lld-%lld", bp->b_blkno,
680: bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
681: }
682: if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
683: sn += DL_GETPOFFSET(&lp->d_partitions[part]);
684: (*pr)(" (%s%d bn %lld; cn %lld", dname, unit, sn,
685: sn / lp->d_secpercyl);
686: sn %= lp->d_secpercyl;
687: (*pr)(" tn %lld sn %lld)", sn / lp->d_nsectors,
688: sn % lp->d_nsectors);
689: }
690: }
691:
692: /*
693: * Initialize the disklist. Called by main() before autoconfiguration.
694: */
695: void
696: disk_init(void)
697: {
698:
699: TAILQ_INIT(&disklist);
700: disk_count = disk_change = 0;
701: }
702:
703: int
704: disk_construct(struct disk *diskp, char *lockname)
705: {
706: rw_init(&diskp->dk_lock, lockname);
707:
708: diskp->dk_flags |= DKF_CONSTRUCTED;
709:
710: return (0);
711: }
712:
713: /*
714: * Attach a disk.
715: */
716: void
717: disk_attach(struct disk *diskp)
718: {
719:
720: if (!ISSET(diskp->dk_flags, DKF_CONSTRUCTED))
721: disk_construct(diskp, diskp->dk_name);
722:
723: /*
724: * Allocate and initialize the disklabel structures. Note that
725: * it's not safe to sleep here, since we're probably going to be
726: * called during autoconfiguration.
727: */
728: diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT);
729: if (diskp->dk_label == NULL)
730: panic("disk_attach: can't allocate storage for disklabel");
731:
732: bzero(diskp->dk_label, sizeof(struct disklabel));
733:
734: /*
735: * Set the attached timestamp.
736: */
737: microuptime(&diskp->dk_attachtime);
738:
739: /*
740: * Link into the disklist.
741: */
742: TAILQ_INSERT_TAIL(&disklist, diskp, dk_link);
743: ++disk_count;
744: disk_change = 1;
745: }
746:
747: /*
748: * Detach a disk.
749: */
750: void
751: disk_detach(struct disk *diskp)
752: {
753:
754: /*
755: * Free the space used by the disklabel structures.
756: */
757: free(diskp->dk_label, M_DEVBUF);
758:
759: /*
760: * Remove from the disklist.
761: */
762: TAILQ_REMOVE(&disklist, diskp, dk_link);
763: disk_change = 1;
764: if (--disk_count < 0)
765: panic("disk_detach: disk_count < 0");
766: }
767:
768: /*
769: * Increment a disk's busy counter. If the counter is going from
770: * 0 to 1, set the timestamp.
771: */
772: void
773: disk_busy(struct disk *diskp)
774: {
775:
776: /*
777: * XXX We'd like to use something as accurate as microtime(),
778: * but that doesn't depend on the system TOD clock.
779: */
780: if (diskp->dk_busy++ == 0) {
781: microuptime(&diskp->dk_timestamp);
782: }
783: }
784:
785: /*
786: * Decrement a disk's busy counter, increment the byte count, total busy
787: * time, and reset the timestamp.
788: */
789: void
790: disk_unbusy(struct disk *diskp, long bcount, int read)
791: {
792: struct timeval dv_time, diff_time;
793:
794: if (diskp->dk_busy-- == 0)
795: printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name);
796:
797: microuptime(&dv_time);
798:
799: timersub(&dv_time, &diskp->dk_timestamp, &diff_time);
800: timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time);
801:
802: diskp->dk_timestamp = dv_time;
803: if (bcount > 0) {
804: if (read) {
805: diskp->dk_rbytes += bcount;
806: diskp->dk_rxfer++;
807: } else {
808: diskp->dk_wbytes += bcount;
809: diskp->dk_wxfer++;
810: }
811: } else
812: diskp->dk_seek++;
813:
814: add_disk_randomness(bcount ^ diff_time.tv_usec);
815: }
816:
817: int
818: disk_lock(struct disk *dk)
819: {
820: int error;
821:
822: error = rw_enter(&dk->dk_lock, RW_WRITE|RW_INTR);
823:
824: return (error);
825: }
826:
827: void
828: disk_unlock(struct disk *dk)
829: {
830: rw_exit(&dk->dk_lock);
831: }
832:
833: int
834: dk_mountroot(void)
835: {
836: dev_t rawdev, rrootdev;
837: int part = DISKPART(rootdev);
838: int (*mountrootfn)(void);
839: struct disklabel dl;
840: int error;
841:
842: rrootdev = blktochr(rootdev);
843: rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART);
844: #ifdef DEBUG
845: printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev,
846: rrootdev, rawdev);
847: #endif
848:
849: /*
850: * open device, ioctl for the disklabel, and close it.
851: */
852: error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD,
853: S_IFCHR, curproc);
854: if (error)
855: panic("cannot open disk, 0x%x/0x%x, error %d",
856: rootdev, rrootdev, error);
857: error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO,
858: (caddr_t)&dl, FREAD, curproc);
859: if (error)
860: panic("cannot read disk label, 0x%x/0x%x, error %d",
861: rootdev, rrootdev, error);
862: (void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD,
863: S_IFCHR, curproc);
864:
865: if (DL_GETPSIZE(&dl.d_partitions[part]) == 0)
866: panic("root filesystem has size 0");
867: switch (dl.d_partitions[part].p_fstype) {
868: #ifdef EXT2FS
869: case FS_EXT2FS:
870: {
871: extern int ext2fs_mountroot(void);
872: mountrootfn = ext2fs_mountroot;
873: }
874: break;
875: #endif
876: #ifdef FFS
877: case FS_BSDFFS:
878: {
879: extern int ffs_mountroot(void);
880: mountrootfn = ffs_mountroot;
881: }
882: break;
883: #endif
884: #ifdef CD9660
885: case FS_ISO9660:
886: {
887: extern int cd9660_mountroot(void);
888: mountrootfn = cd9660_mountroot;
889: }
890: break;
891: #endif
892: default:
893: #ifdef FFS
894: {
895: extern int ffs_mountroot(void);
896:
897: printf("filesystem type %d not known.. assuming ffs\n",
898: dl.d_partitions[part].p_fstype);
899: mountrootfn = ffs_mountroot;
900: }
901: #else
902: panic("disk 0x%x/0x%x filesystem type %d not known",
903: rootdev, rrootdev, dl.d_partitions[part].p_fstype);
904: #endif
905: }
906: return (*mountrootfn)();
907: }
908:
909: struct bufq *
910: bufq_default_alloc(void)
911: {
912: struct bufq_default *bq;
913:
914: bq = malloc(sizeof(*bq), M_DEVBUF, M_NOWAIT);
915: if (bq == NULL)
916: panic("bufq_default_alloc: no memory");
917:
918: memset(bq, 0, sizeof(*bq));
919: bq->bufq.bufq_free = bufq_default_free;
920: bq->bufq.bufq_add = bufq_default_add;
921: bq->bufq.bufq_get = bufq_default_get;
922:
923: return ((struct bufq *)bq);
924: }
925:
926: void
927: bufq_default_free(struct bufq *bq)
928: {
929: free(bq, M_DEVBUF);
930: }
931:
932: void
933: bufq_default_add(struct bufq *bq, struct buf *bp)
934: {
935: struct bufq_default *bufq = (struct bufq_default *)bq;
936: struct proc *p = bp->b_proc;
937: struct buf *head;
938:
939: if (p == NULL || p->p_nice < NZERO)
940: head = &bufq->bufq_head[0];
941: else if (p->p_nice == NZERO)
942: head = &bufq->bufq_head[1];
943: else
944: head = &bufq->bufq_head[2];
945:
946: disksort(head, bp);
947: }
948:
949: struct buf *
950: bufq_default_get(struct bufq *bq)
951: {
952: struct bufq_default *bufq = (struct bufq_default *)bq;
953: struct buf *bp, *head;
954: int i;
955:
956: for (i = 0; i < 3; i++) {
957: head = &bufq->bufq_head[i];
958: if ((bp = head->b_actf))
959: break;
960: }
961: if (bp == NULL)
962: return (NULL);
963: head->b_actf = bp->b_actf;
964: return (bp);
965: }
966:
967: #ifdef RAMDISK_HOOKS
968: static struct device fakerdrootdev = { DV_DISK, {}, NULL, 0, "rd0", NULL };
969: #endif
970:
971: struct device *
972: getdisk(char *str, int len, int defpart, dev_t *devp)
973: {
974: struct device *dv;
975:
976: if ((dv = parsedisk(str, len, defpart, devp)) == NULL) {
977: printf("use one of: exit");
978: #ifdef RAMDISK_HOOKS
979: printf(" %s[a-p]", fakerdrootdev.dv_xname);
980: #endif
981: TAILQ_FOREACH(dv, &alldevs, dv_list) {
982: if (dv->dv_class == DV_DISK)
983: printf(" %s[a-p]", dv->dv_xname);
984: #if defined(NFSCLIENT)
985: if (dv->dv_class == DV_IFNET)
986: printf(" %s", dv->dv_xname);
987: #endif
988: }
989: printf("\n");
990: }
991: return (dv);
992: }
993:
994: struct device *
995: parsedisk(char *str, int len, int defpart, dev_t *devp)
996: {
997: struct device *dv;
998: char c;
999: int majdev, part;
1000:
1001: if (len == 0)
1002: return (NULL);
1003: c = str[len-1];
1004: if (c >= 'a' && (c - 'a') < MAXPARTITIONS) {
1005: part = c - 'a';
1006: len -= 1;
1007: } else
1008: part = defpart;
1009:
1010: #ifdef RAMDISK_HOOKS
1011: if (strcmp(str, fakerdrootdev.dv_xname) == 0) {
1012: dv = &fakerdrootdev;
1013: goto gotdisk;
1014: }
1015: #endif
1016:
1017: TAILQ_FOREACH(dv, &alldevs, dv_list) {
1018: if (dv->dv_class == DV_DISK &&
1019: strncmp(str, dv->dv_xname, len) == 0 &&
1020: dv->dv_xname[len] == '\0') {
1021: #ifdef RAMDISK_HOOKS
1022: gotdisk:
1023: #endif
1024: majdev = findblkmajor(dv);
1025: if (majdev < 0)
1026: panic("parsedisk");
1027: *devp = MAKEDISKDEV(majdev, dv->dv_unit, part);
1028: break;
1029: }
1030: #if defined(NFSCLIENT)
1031: if (dv->dv_class == DV_IFNET &&
1032: strncmp(str, dv->dv_xname, len) == 0 &&
1033: dv->dv_xname[len] == '\0') {
1034: *devp = NODEV;
1035: break;
1036: }
1037: #endif
1038: }
1039:
1040: return (dv);
1041: }
1042:
1043: void
1044: setroot(struct device *bootdv, int part, int exitflags)
1045: {
1046: int majdev, unit, len, s;
1047: struct swdevt *swp;
1048: struct device *rootdv, *dv;
1049: dev_t nrootdev, nswapdev = NODEV, temp = NODEV;
1050: char buf[128];
1051: #if defined(NFSCLIENT)
1052: extern char *nfsbootdevname;
1053: #endif
1054:
1055: if (boothowto & RB_DFLTROOT)
1056: return;
1057:
1058: #ifdef RAMDISK_HOOKS
1059: bootdv = &fakerdrootdev;
1060: mountroot = NULL;
1061: part = 0;
1062: #endif
1063:
1064: /*
1065: * If `swap generic' and we couldn't determine boot device,
1066: * ask the user.
1067: */
1068: if (mountroot == NULL && bootdv == NULL)
1069: boothowto |= RB_ASKNAME;
1070: if (boothowto & RB_ASKNAME) {
1071: while (1) {
1072: printf("root device");
1073: if (bootdv != NULL) {
1074: printf(" (default %s", bootdv->dv_xname);
1075: if (bootdv->dv_class == DV_DISK)
1076: printf("%c", 'a' + part);
1077: printf(")");
1078: }
1079: printf(": ");
1080: s = splhigh();
1081: cnpollc(TRUE);
1082: len = getsn(buf, sizeof(buf));
1083: cnpollc(FALSE);
1084: splx(s);
1085: if (strcmp(buf, "exit") == 0)
1086: boot(exitflags);
1087: if (len == 0 && bootdv != NULL) {
1088: strlcpy(buf, bootdv->dv_xname, sizeof buf);
1089: len = strlen(buf);
1090: }
1091: if (len > 0 && buf[len - 1] == '*') {
1092: buf[--len] = '\0';
1093: dv = getdisk(buf, len, part, &nrootdev);
1094: if (dv != NULL) {
1095: rootdv = dv;
1096: nswapdev = nrootdev;
1097: goto gotswap;
1098: }
1099: }
1100: dv = getdisk(buf, len, part, &nrootdev);
1101: if (dv != NULL) {
1102: rootdv = dv;
1103: break;
1104: }
1105: }
1106:
1107: if (rootdv->dv_class == DV_IFNET)
1108: goto gotswap;
1109:
1110: /* try to build swap device out of new root device */
1111: while (1) {
1112: printf("swap device");
1113: if (rootdv != NULL)
1114: printf(" (default %s%s)", rootdv->dv_xname,
1115: rootdv->dv_class == DV_DISK ? "b" : "");
1116: printf(": ");
1117: s = splhigh();
1118: cnpollc(TRUE);
1119: len = getsn(buf, sizeof(buf));
1120: cnpollc(FALSE);
1121: splx(s);
1122: if (strcmp(buf, "exit") == 0)
1123: boot(exitflags);
1124: if (len == 0 && rootdv != NULL) {
1125: switch (rootdv->dv_class) {
1126: case DV_IFNET:
1127: nswapdev = NODEV;
1128: break;
1129: case DV_DISK:
1130: nswapdev = MAKEDISKDEV(major(nrootdev),
1131: DISKUNIT(nrootdev), 1);
1132: if (nswapdev == nrootdev)
1133: continue;
1134: break;
1135: default:
1136: break;
1137: }
1138: break;
1139: }
1140: dv = getdisk(buf, len, 1, &nswapdev);
1141: if (dv) {
1142: if (dv->dv_class == DV_IFNET)
1143: nswapdev = NODEV;
1144: if (nswapdev == nrootdev)
1145: continue;
1146: break;
1147: }
1148: }
1149: gotswap:
1150: rootdev = nrootdev;
1151: dumpdev = nswapdev;
1152: swdevt[0].sw_dev = nswapdev;
1153: swdevt[1].sw_dev = NODEV;
1154: #if defined(NFSCLIENT)
1155: } else if (mountroot == nfs_mountroot) {
1156: rootdv = bootdv;
1157: rootdev = dumpdev = swapdev = NODEV;
1158: #endif
1159: } else if (mountroot == NULL) {
1160: /* `swap generic': Use the device the ROM told us to use */
1161: rootdv = bootdv;
1162: majdev = findblkmajor(rootdv);
1163: if (majdev >= 0) {
1164: /*
1165: * Root and swap are on the disk.
1166: * Assume swap is on partition b.
1167: */
1168: rootdev = MAKEDISKDEV(majdev, rootdv->dv_unit, part);
1169: nswapdev = MAKEDISKDEV(majdev, rootdv->dv_unit, 1);
1170: } else {
1171: /*
1172: * Root and swap are on a net.
1173: */
1174: nswapdev = NODEV;
1175: }
1176: dumpdev = nswapdev;
1177: swdevt[0].sw_dev = nswapdev;
1178: /* swdevt[1].sw_dev = NODEV; */
1179: } else {
1180: /* Completely pre-configured, but we want rootdv .. */
1181: majdev = major(rootdev);
1182: if (findblkname(majdev) == NULL)
1183: return;
1184: unit = DISKUNIT(rootdev);
1185: part = DISKPART(rootdev);
1186: snprintf(buf, sizeof buf, "%s%d%c",
1187: findblkname(majdev), unit, 'a' + part);
1188: rootdv = parsedisk(buf, strlen(buf), 0, &nrootdev);
1189: }
1190:
1191: switch (rootdv->dv_class) {
1192: #if defined(NFSCLIENT)
1193: case DV_IFNET:
1194: mountroot = nfs_mountroot;
1195: nfsbootdevname = rootdv->dv_xname;
1196: return;
1197: #endif
1198: case DV_DISK:
1199: mountroot = dk_mountroot;
1200: part = DISKPART(rootdev);
1201: break;
1202: default:
1203: printf("can't figure root, hope your kernel is right\n");
1204: return;
1205: }
1206:
1207: printf("root on %s%c", rootdv->dv_xname, 'a' + part);
1208:
1209: /*
1210: * Make the swap partition on the root drive the primary swap.
1211: */
1212: for (swp = swdevt; swp->sw_dev != NODEV; swp++) {
1213: if (major(rootdev) == major(swp->sw_dev) &&
1214: DISKUNIT(rootdev) == DISKUNIT(swp->sw_dev)) {
1215: temp = swdevt[0].sw_dev;
1216: swdevt[0].sw_dev = swp->sw_dev;
1217: swp->sw_dev = temp;
1218: break;
1219: }
1220: }
1221: if (swp->sw_dev != NODEV) {
1222: /*
1223: * If dumpdev was the same as the old primary swap device,
1224: * move it to the new primary swap device.
1225: */
1226: if (temp == dumpdev)
1227: dumpdev = swdevt[0].sw_dev;
1228: }
1229: if (swdevt[0].sw_dev != NODEV)
1230: printf(" swap on %s%d%c", findblkname(major(swdevt[0].sw_dev)),
1231: DISKUNIT(swdevt[0].sw_dev),
1232: 'a' + DISKPART(swdevt[0].sw_dev));
1233: if (dumpdev != NODEV)
1234: printf(" dump on %s%d%c", findblkname(major(dumpdev)),
1235: DISKUNIT(dumpdev), 'a' + DISKPART(dumpdev));
1236: printf("\n");
1237: }
1238:
1239: extern struct nam2blk nam2blk[];
1240:
1241: int
1242: findblkmajor(struct device *dv)
1243: {
1244: char *name = dv->dv_xname;
1245: int i;
1246:
1247: for (i = 0; nam2blk[i].name; i++)
1248: if (!strncmp(name, nam2blk[i].name, strlen(nam2blk[i].name)))
1249: return (nam2blk[i].maj);
1250: return (-1);
1251: }
1252:
1253: char *
1254: findblkname(int maj)
1255: {
1256: int i;
1257:
1258: for (i = 0; nam2blk[i].name; i++)
1259: if (nam2blk[i].maj == maj)
1260: return (nam2blk[i].name);
1261: return (NULL);
1262: }
CVSweb