source: branches/minix3-book/servers/fs/read.c@ 23

Last change on this file since 23 was 4, checked in by Mattia Monga, 14 years ago

Importazione sorgenti libro

File size: 18.5 KB
RevLine 
[4]1/* This file contains the heart of the mechanism used to read (and write)
2 * files. Read and write requests are split up into chunks that do not cross
3 * block boundaries. Each chunk is then processed in turn. Reads on special
4 * files are also detected and handled.
5 *
6 * The entry points into this file are
7 * do_read: perform the READ system call by calling read_write
8 * read_write: actually do the work of READ and WRITE
9 * read_map: given an inode and file position, look up its zone number
10 * rd_indir: read an entry in an indirect block
11 * read_ahead: manage the block read ahead business
12 */
13
14#include "fs.h"
15#include <fcntl.h>
16#include <minix/com.h>
17#include "buf.h"
18#include "file.h"
19#include "fproc.h"
20#include "inode.h"
21#include "param.h"
22#include "super.h"
23
24FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, off_t position,
25 unsigned off, int chunk, unsigned left, int rw_flag,
26 char *buff, int seg, int usr, int block_size, int *completed));
27
28/*===========================================================================*
29 * do_read *
30 *===========================================================================*/
31PUBLIC int do_read()
32{
33 return(read_write(READING));
34}
35
36/*===========================================================================*
37 * read_write *
38 *===========================================================================*/
39PUBLIC int read_write(rw_flag)
40int rw_flag; /* READING or WRITING */
41{
42/* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
43
44 register struct inode *rip;
45 register struct filp *f;
46 off_t bytes_left, f_size, position;
47 unsigned int off, cum_io;
48 int op, oflags, r, chunk, usr, seg, block_spec, char_spec;
49 int regular, partial_pipe = 0, partial_cnt = 0;
50 mode_t mode_word;
51 struct filp *wf;
52 int block_size;
53 int completed, r2 = OK;
54 phys_bytes p;
55
56 /* left unfinished rw_chunk()s from previous call! this can't happen.
57 * it means something has gone wrong we can't repair now.
58 */
59 if (bufs_in_use < 0) {
60 panic(__FILE__,"start - bufs_in_use negative", bufs_in_use);
61 }
62
63 /* MM loads segments by putting funny things in upper 10 bits of 'fd'. */
64 if (who == PM_PROC_NR && (m_in.fd & (~BYTE)) ) {
65 usr = m_in.fd >> 7;
66 seg = (m_in.fd >> 5) & 03;
67 m_in.fd &= 037; /* get rid of user and segment bits */
68 } else {
69 usr = who; /* normal case */
70 seg = D;
71 }
72
73 /* If the file descriptor is valid, get the inode, size and mode. */
74 if (m_in.nbytes < 0) return(EINVAL);
75 if ((f = get_filp(m_in.fd)) == NIL_FILP) return(err_code);
76 if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
77 return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
78 }
79 if (m_in.nbytes == 0)
80 return(0); /* so char special files need not check for 0*/
81
82 /* check if user process has the memory it needs.
83 * if not, copying will fail later.
84 * do this after 0-check above because umap doesn't want to map 0 bytes.
85 */
86 if ((r = sys_umap(usr, seg, (vir_bytes) m_in.buffer, m_in.nbytes, &p)) != OK)
87 return r;
88 position = f->filp_pos;
89 oflags = f->filp_flags;
90 rip = f->filp_ino;
91 f_size = rip->i_size;
92 r = OK;
93 if (rip->i_pipe == I_PIPE) {
94 /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
95 cum_io = fp->fp_cum_io_partial;
96 } else {
97 cum_io = 0;
98 }
99 op = (rw_flag == READING ? DEV_READ : DEV_WRITE);
100 mode_word = rip->i_mode & I_TYPE;
101 regular = mode_word == I_REGULAR || mode_word == I_NAMED_PIPE;
102
103 if ((char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0))) {
104 if (rip->i_zone[0] == NO_DEV)
105 panic(__FILE__,"read_write tries to read from "
106 "character device NO_DEV", NO_NUM);
107 block_size = get_block_size(rip->i_zone[0]);
108 }
109 if ((block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0))) {
110 f_size = ULONG_MAX;
111 if (rip->i_zone[0] == NO_DEV)
112 panic(__FILE__,"read_write tries to read from "
113 " block device NO_DEV", NO_NUM);
114 block_size = get_block_size(rip->i_zone[0]);
115 }
116
117 if (!char_spec && !block_spec)
118 block_size = rip->i_sp->s_block_size;
119
120 rdwt_err = OK; /* set to EIO if disk error occurs */
121
122 /* Check for character special files. */
123 if (char_spec) {
124 dev_t dev;
125 dev = (dev_t) rip->i_zone[0];
126 r = dev_io(op, dev, usr, m_in.buffer, position, m_in.nbytes, oflags);
127 if (r >= 0) {
128 cum_io = r;
129 position += r;
130 r = OK;
131 }
132 } else {
133 if (rw_flag == WRITING && block_spec == 0) {
134 /* Check in advance to see if file will grow too big. */
135 if (position > rip->i_sp->s_max_size - m_in.nbytes)
136 return(EFBIG);
137
138 /* Check for O_APPEND flag. */
139 if (oflags & O_APPEND) position = f_size;
140
141 /* Clear the zone containing present EOF if hole about
142 * to be created. This is necessary because all unwritten
143 * blocks prior to the EOF must read as zeros.
144 */
145 if (position > f_size) clear_zone(rip, f_size, 0);
146 }
147
148 /* Pipes are a little different. Check. */
149 if (rip->i_pipe == I_PIPE) {
150 r = pipe_check(rip, rw_flag, oflags,
151 m_in.nbytes, position, &partial_cnt, 0);
152 if (r <= 0) return(r);
153 }
154
155 if (partial_cnt > 0) partial_pipe = 1;
156
157 /* Split the transfer into chunks that don't span two blocks. */
158 while (m_in.nbytes != 0) {
159
160 off = (unsigned int) (position % block_size);/* offset in blk*/
161 if (partial_pipe) { /* pipes only */
162 chunk = MIN(partial_cnt, block_size - off);
163 } else
164 chunk = MIN(m_in.nbytes, block_size - off);
165 if (chunk < 0) chunk = block_size - off;
166
167 if (rw_flag == READING) {
168 bytes_left = f_size - position;
169 if (position >= f_size) break; /* we are beyond EOF */
170 if (chunk > bytes_left) chunk = (int) bytes_left;
171 }
172
173 /* Read or write 'chunk' bytes. */
174 r = rw_chunk(rip, position, off, chunk, (unsigned) m_in.nbytes,
175 rw_flag, m_in.buffer, seg, usr, block_size, &completed);
176
177 if (r != OK) break; /* EOF reached */
178 if (rdwt_err < 0) break;
179
180 /* Update counters and pointers. */
181 m_in.buffer += chunk; /* user buffer address */
182 m_in.nbytes -= chunk; /* bytes yet to be read */
183 cum_io += chunk; /* bytes read so far */
184 position += chunk; /* position within the file */
185
186 if (partial_pipe) {
187 partial_cnt -= chunk;
188 if (partial_cnt <= 0) break;
189 }
190 }
191 }
192
193 /* On write, update file size and access time. */
194 if (rw_flag == WRITING) {
195 if (regular || mode_word == I_DIRECTORY) {
196 if (position > f_size) rip->i_size = position;
197 }
198 } else {
199 if (rip->i_pipe == I_PIPE) {
200 if ( position >= rip->i_size) {
201 /* Reset pipe pointers. */
202 rip->i_size = 0; /* no data left */
203 position = 0; /* reset reader(s) */
204 wf = find_filp(rip, W_BIT);
205 if (wf != NIL_FILP) wf->filp_pos = 0;
206 }
207 }
208 }
209 f->filp_pos = position;
210
211 /* Check to see if read-ahead is called for, and if so, set it up. */
212 if (rw_flag == READING && rip->i_seek == NO_SEEK && position % block_size== 0
213 && (regular || mode_word == I_DIRECTORY)) {
214 rdahed_inode = rip;
215 rdahedpos = position;
216 }
217 rip->i_seek = NO_SEEK;
218
219 if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
220 if (rdwt_err == END_OF_FILE) r = OK;
221
222 /* if user-space copying failed, read/write failed. */
223 if (r == OK && r2 != OK) {
224 r = r2;
225 }
226 if (r == OK) {
227 if (rw_flag == READING) rip->i_update |= ATIME;
228 if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
229 rip->i_dirt = DIRTY; /* inode is thus now dirty */
230 if (partial_pipe) {
231 partial_pipe = 0;
232 /* partial write on pipe with */
233 /* O_NONBLOCK, return write count */
234 if (!(oflags & O_NONBLOCK)) {
235 fp->fp_cum_io_partial = cum_io;
236 suspend(XPIPE); /* partial write on pipe with */
237 return(SUSPEND); /* nbyte > PIPE_SIZE - non-atomic */
238 }
239 }
240 fp->fp_cum_io_partial = 0;
241 return(cum_io);
242 }
243 if (bufs_in_use < 0) {
244 panic(__FILE__,"end - bufs_in_use negative", bufs_in_use);
245 }
246 return(r);
247}
248
249/*===========================================================================*
250 * rw_chunk *
251 *===========================================================================*/
252PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, buff,
253 seg, usr, block_size, completed)
254register struct inode *rip; /* pointer to inode for file to be rd/wr */
255off_t position; /* position within file to read or write */
256unsigned off; /* off within the current block */
257int chunk; /* number of bytes to read or write */
258unsigned left; /* max number of bytes wanted after position */
259int rw_flag; /* READING or WRITING */
260char *buff; /* virtual address of the user buffer */
261int seg; /* T or D segment in user space */
262int usr; /* which user process */
263int block_size; /* block size of FS operating on */
264int *completed; /* number of bytes copied */
265{
266/* Read or write (part of) a block. */
267
268 register struct buf *bp;
269 register int r = OK;
270 int n, block_spec;
271 block_t b;
272 dev_t dev;
273
274 *completed = 0;
275
276 block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
277 if (block_spec) {
278 b = position/block_size;
279 dev = (dev_t) rip->i_zone[0];
280 } else {
281 b = read_map(rip, position);
282 dev = rip->i_dev;
283 }
284
285 if (!block_spec && b == NO_BLOCK) {
286 if (rw_flag == READING) {
287 /* Reading from a nonexistent block. Must read as all zeros.*/
288 bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */
289 zero_block(bp);
290 } else {
291 /* Writing to a nonexistent block. Create and enter in inode.*/
292 if ((bp= new_block(rip, position)) == NIL_BUF)return(err_code);
293 }
294 } else if (rw_flag == READING) {
295 /* Read and read ahead if convenient. */
296 bp = rahead(rip, b, position, left);
297 } else {
298 /* Normally an existing block to be partially overwritten is first read
299 * in. However, a full block need not be read in. If it is already in
300 * the cache, acquire it, otherwise just acquire a free buffer.
301 */
302 n = (chunk == block_size ? NO_READ : NORMAL);
303 if (!block_spec && off == 0 && position >= rip->i_size) n = NO_READ;
304 bp = get_block(dev, b, n);
305 }
306
307 /* In all cases, bp now points to a valid buffer. */
308 if (bp == NIL_BUF) {
309 panic(__FILE__,"bp not valid in rw_chunk, this can't happen", NO_NUM);
310 }
311 if (rw_flag == WRITING && chunk != block_size && !block_spec &&
312 position >= rip->i_size && off == 0) {
313 zero_block(bp);
314 }
315
316 if (rw_flag == READING) {
317 /* Copy a chunk from the block buffer to user space. */
318 r = sys_vircopy(FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
319 usr, seg, (phys_bytes) buff,
320 (phys_bytes) chunk);
321 } else {
322 /* Copy a chunk from user space to the block buffer. */
323 r = sys_vircopy(usr, seg, (phys_bytes) buff,
324 FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
325 (phys_bytes) chunk);
326 bp->b_dirt = DIRTY;
327 }
328 n = (off + chunk == block_size ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
329 put_block(bp, n);
330
331 return(r);
332}
333
334
335/*===========================================================================*
336 * read_map *
337 *===========================================================================*/
338PUBLIC block_t read_map(rip, position)
339register struct inode *rip; /* ptr to inode to map from */
340off_t position; /* position in file whose blk wanted */
341{
342/* Given an inode and a position within the corresponding file, locate the
343 * block (not zone) number in which that position is to be found and return it.
344 */
345
346 register struct buf *bp;
347 register zone_t z;
348 int scale, boff, dzones, nr_indirects, index, zind, ex;
349 block_t b;
350 long excess, zone, block_pos;
351
352 scale = rip->i_sp->s_log_zone_size; /* for block-zone conversion */
353 block_pos = position/rip->i_sp->s_block_size; /* relative blk # in file */
354 zone = block_pos >> scale; /* position's zone */
355 boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
356 dzones = rip->i_ndzones;
357 nr_indirects = rip->i_nindirs;
358
359 /* Is 'position' to be found in the inode itself? */
360 if (zone < dzones) {
361 zind = (int) zone; /* index should be an int */
362 z = rip->i_zone[zind];
363 if (z == NO_ZONE) return(NO_BLOCK);
364 b = ((block_t) z << scale) + boff;
365 return(b);
366 }
367
368 /* It is not in the inode, so it must be single or double indirect. */
369 excess = zone - dzones; /* first Vx_NR_DZONES don't count */
370
371 if (excess < nr_indirects) {
372 /* 'position' can be located via the single indirect block. */
373 z = rip->i_zone[dzones];
374 } else {
375 /* 'position' can be located via the double indirect block. */
376 if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
377 excess -= nr_indirects; /* single indir doesn't count*/
378 b = (block_t) z << scale;
379 bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */
380 index = (int) (excess/nr_indirects);
381 z = rd_indir(bp, index); /* z= zone for single*/
382 put_block(bp, INDIRECT_BLOCK); /* release double ind block */
383 excess = excess % nr_indirects; /* index into single ind blk */
384 }
385
386 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
387 if (z == NO_ZONE) return(NO_BLOCK);
388 b = (block_t) z << scale; /* b is blk # for single ind */
389 bp = get_block(rip->i_dev, b, NORMAL); /* get single indirect block */
390 ex = (int) excess; /* need an integer */
391 z = rd_indir(bp, ex); /* get block pointed to */
392 put_block(bp, INDIRECT_BLOCK); /* release single indir blk */
393 if (z == NO_ZONE) return(NO_BLOCK);
394 b = ((block_t) z << scale) + boff;
395 return(b);
396}
397
398/*===========================================================================*
399 * rd_indir *
400 *===========================================================================*/
401PUBLIC zone_t rd_indir(bp, index)
402struct buf *bp; /* pointer to indirect block */
403int index; /* index into *bp */
404{
405/* Given a pointer to an indirect block, read one entry. The reason for
406 * making a separate routine out of this is that there are four cases:
407 * V1 (IBM and 68000), and V2 (IBM and 68000).
408 */
409
410 struct super_block *sp;
411 zone_t zone; /* V2 zones are longs (shorts in V1) */
412
413 sp = get_super(bp->b_dev); /* need super block to find file sys type */
414
415 /* read a zone from an indirect block */
416 if (sp->s_version == V1)
417 zone = (zone_t) conv2(sp->s_native, (int) bp->b_v1_ind[index]);
418 else
419 zone = (zone_t) conv4(sp->s_native, (long) bp->b_v2_ind[index]);
420
421 if (zone != NO_ZONE &&
422 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
423 printf("Illegal zone number %ld in indirect block, index %d\n",
424 (long) zone, index);
425 panic(__FILE__,"check file system", NO_NUM);
426 }
427 return(zone);
428}
429
430/*===========================================================================*
431 * read_ahead *
432 *===========================================================================*/
433PUBLIC void read_ahead()
434{
435/* Read a block into the cache before it is needed. */
436 int block_size;
437 register struct inode *rip;
438 struct buf *bp;
439 block_t b;
440
441 rip = rdahed_inode; /* pointer to inode to read ahead from */
442 block_size = get_block_size(rip->i_dev);
443 rdahed_inode = NIL_INODE; /* turn off read ahead */
444 if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */
445 bp = rahead(rip, b, rdahedpos, block_size);
446 put_block(bp, PARTIAL_DATA_BLOCK);
447}
448
449/*===========================================================================*
450 * rahead *
451 *===========================================================================*/
452PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead)
453register struct inode *rip; /* pointer to inode for file to be read */
454block_t baseblock; /* block at current position */
455off_t position; /* position within file */
456unsigned bytes_ahead; /* bytes beyond position for immediate use */
457{
458/* Fetch a block from the cache or the device. If a physical read is
459 * required, prefetch as many more blocks as convenient into the cache.
460 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
461 * The device driver may decide it knows better and stop reading at a
462 * cylinder boundary (or after an error). Rw_scattered() puts an optional
463 * flag on all reads to allow this.
464 */
465 int block_size;
466/* Minimum number of blocks to prefetch. */
467# define BLOCKS_MINIMUM (NR_BUFS < 50 ? 18 : 32)
468 int block_spec, scale, read_q_size;
469 unsigned int blocks_ahead, fragment;
470 block_t block, blocks_left;
471 off_t ind1_pos;
472 dev_t dev;
473 struct buf *bp;
474 static struct buf *read_q[NR_BUFS];
475
476 block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
477 if (block_spec) {
478 dev = (dev_t) rip->i_zone[0];
479 } else {
480 dev = rip->i_dev;
481 }
482 block_size = get_block_size(dev);
483
484 block = baseblock;
485 bp = get_block(dev, block, PREFETCH);
486 if (bp->b_dev != NO_DEV) return(bp);
487
488 /* The best guess for the number of blocks to prefetch: A lot.
489 * It is impossible to tell what the device looks like, so we don't even
490 * try to guess the geometry, but leave it to the driver.
491 *
492 * The floppy driver can read a full track with no rotational delay, and it
493 * avoids reading partial tracks if it can, so handing it enough buffers to
494 * read two tracks is perfect. (Two, because some diskette types have
495 * an odd number of sectors per track, so a block may span tracks.)
496 *
497 * The disk drivers don't try to be smart. With todays disks it is
498 * impossible to tell what the real geometry looks like, so it is best to
499 * read as much as you can. With luck the caching on the drive allows
500 * for a little time to start the next read.
501 *
502 * The current solution below is a bit of a hack, it just reads blocks from
503 * the current file position hoping that more of the file can be found. A
504 * better solution must look at the already available zone pointers and
505 * indirect blocks (but don't call read_map!).
506 */
507
508 fragment = position % block_size;
509 position -= fragment;
510 bytes_ahead += fragment;
511
512 blocks_ahead = (bytes_ahead + block_size - 1) / block_size;
513
514 if (block_spec && rip->i_size == 0) {
515 blocks_left = NR_IOREQS;
516 } else {
517 blocks_left = (rip->i_size - position + block_size - 1) / block_size;
518
519 /* Go for the first indirect block if we are in its neighborhood. */
520 if (!block_spec) {
521 scale = rip->i_sp->s_log_zone_size;
522 ind1_pos = (off_t) rip->i_ndzones * (block_size << scale);
523 if (position <= ind1_pos && rip->i_size > ind1_pos) {
524 blocks_ahead++;
525 blocks_left++;
526 }
527 }
528 }
529
530 /* No more than the maximum request. */
531 if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
532
533 /* Read at least the minimum number of blocks, but not after a seek. */
534 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
535 blocks_ahead = BLOCKS_MINIMUM;
536
537 /* Can't go past end of file. */
538 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
539
540 read_q_size = 0;
541
542 /* Acquire block buffers. */
543 for (;;) {
544 read_q[read_q_size++] = bp;
545
546 if (--blocks_ahead == 0) break;
547
548 /* Don't trash the cache, leave 4 free. */
549 if (bufs_in_use >= NR_BUFS - 4) break;
550
551 block++;
552
553 bp = get_block(dev, block, PREFETCH);
554 if (bp->b_dev != NO_DEV) {
555 /* Oops, block already in the cache, get out. */
556 put_block(bp, FULL_DATA_BLOCK);
557 break;
558 }
559 }
560 rw_scattered(dev, read_q, read_q_size, READING);
561 return(get_block(dev, baseblock, NORMAL));
562}
Note: See TracBrowser for help on using the repository browser.