1 /* This file contains the heart of the mechanism used to read (and write)
2 * files. Read and write requests are split up into chunks that do not cross
3 * block boundaries. Each chunk is then processed in turn. Reads on special
4 * files are also detected and handled.
5 *
6 * The entry points into this file are
7 * do_read: perform the READ system call by calling read_write
8 * read_write: actually do the work of READ and WRITE
9 * read_map: given an inode and file position, look up its zone number
10 * rd_indir: read an entry in an indirect block
11 * read_ahead: manage the block read ahead business
12 */
13
14 #include "fs.h"
15 #include <fcntl.h>
16 #include <minix/com.h>
17 #include "buf.h"
18 #include "file.h"
19 #include "fproc.h"
20 #include "inode.h"
21 #include "param.h"
22 #include "super.h"
23
24 #define FD_MASK 077 /* max file descriptor is 63 */
25
26 PRIVATE message umess; /* message for asking SYSTASK for user copy */
27
28 FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, off_t position,
29 unsigned off, int chunk, unsigned left, int rw_flag,
30 char *buff, int seg, int usr) );
31
32 /*===========================================================================*
33 * do_read *
34 *===========================================================================*/
35 PUBLIC int do_read()
36 {
37 return(read_write(READING));
38 }
39
40
41 /*===========================================================================*
42 * read_write *
43 *===========================================================================*/
44 PUBLIC int read_write(rw_flag)
45 int rw_flag; /* READING or WRITING */
46 {
47 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
48
49 register struct inode *rip;
50 register struct filp *f;
51 off_t bytes_left, f_size, position;
52 unsigned int off, cum_io;
53 int op, oflags, r, chunk, usr, seg, block_spec, char_spec;
54 int regular, partial_pipe = 0, partial_cnt = 0;
55 dev_t dev;
56 mode_t mode_word;
57 struct filp *wf;
58
59 /* MM loads segments by putting funny things in upper 10 bits of 'fd'. */
60 if (who == MM_PROC_NR && (fd & (~BYTE)) ) {
61 usr = (fd >> 8) & BYTE;
62 seg = (fd >> 6) & 03;
63 fd &= FD_MASK; /* get rid of user and segment bits */
64 } else {
65 usr = who; /* normal case */
66 seg = D;
67 }
68
69 /* If the file descriptor is valid, get the inode, size and mode. */
70 if (nbytes < 0) return(EINVAL);
71 if ((f = get_filp(fd)) == NIL_FILP) return(err_code);
72 if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
73 return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
74 }
75 if (nbytes == 0) return(0); /* so char special files need not check for 0*/
76 position = f->filp_pos;
77 if (position > MAX_FILE_POS) return(EINVAL);
78 if (position + nbytes < position) return(EINVAL); /* unsigned overflow */
79 oflags = f->filp_flags;
80 rip = f->filp_ino;
81 f_size = rip->i_size;
82 r = OK;
83 if (rip->i_pipe == I_PIPE) {
84 /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
85 cum_io = fp->fp_cum_io_partial;
86 } else {
87 cum_io = 0;
88 }
89 op = (rw_flag == READING ? DEV_READ : DEV_WRITE);
90 mode_word = rip->i_mode & I_TYPE;
91 regular = mode_word == I_REGULAR || mode_word == I_NAMED_PIPE;
92
93 char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0);
94 block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0);
95 if (block_spec) f_size = LONG_MAX;
96 rdwt_err = OK; /* set to EIO if disk error occurs */
97
98 /* Check for character special files. */
99 if (char_spec) {
100 dev = (dev_t) rip->i_zone[0];
101 r = dev_io(op, oflags & O_NONBLOCK, dev, position, nbytes, who,buffer);
102 if (r >= 0) {
103 cum_io = r;
104 position += r;
105 r = OK;
106 }
107 } else {
108 if (rw_flag == WRITING && block_spec == 0) {
109 /* Check in advance to see if file will grow too big. */
110 if (position > rip->i_sp->s_max_size - nbytes) return(EFBIG);
111
112 /* Check for O_APPEND flag. */
113 if (oflags & O_APPEND) position = f_size;
114
115 /* Clear the zone containing present EOF if hole about
116 * to be created. This is necessary because all unwritten
117 * blocks prior to the EOF must read as zeros.
118 */
119 if (position > f_size) clear_zone(rip, f_size, 0);
120 }
121
122 /* Pipes are a little different. Check. */
123 if (rip->i_pipe == I_PIPE) {
124 r = pipe_check(rip,rw_flag,oflags,nbytes,position,&partial_cnt);
125 if (r <= 0) return(r);
126 }
127
128 if (partial_cnt > 0) partial_pipe = 1;
129
130 /* Split the transfer into chunks that don't span two blocks. */
131 while (nbytes != 0) {
132 off = (unsigned int) (position % BLOCK_SIZE);/* offset in blk*/
133 if (partial_pipe) { /* pipes only */
134 chunk = MIN(partial_cnt, BLOCK_SIZE - off);
135 } else
136 chunk = MIN(nbytes, BLOCK_SIZE - off);
137 if (chunk < 0) chunk = BLOCK_SIZE - off;
138
139 if (rw_flag == READING) {
140 bytes_left = f_size - position;
141 if (position >= f_size) break; /* we are beyond EOF */
142 if (chunk > bytes_left) chunk = (int) bytes_left;
143 }
144
145 /* Read or write 'chunk' bytes. */
146 r = rw_chunk(rip, position, off, chunk, (unsigned) nbytes,
147 rw_flag, buffer, seg, usr);
148 if (r != OK) break; /* EOF reached */
149 if (rdwt_err < 0) break;
150
151 /* Update counters and pointers. */
152 buffer += chunk; /* user buffer address */
153 nbytes -= chunk; /* bytes yet to be read */
154 cum_io += chunk; /* bytes read so far */
155 position += chunk; /* position within the file */
156
157 if (partial_pipe) {
158 partial_cnt -= chunk;
159 if (partial_cnt <= 0) break;
160 }
161 }
162 }
163
164 /* On write, update file size and access time. */
165 if (rw_flag == WRITING) {
166 if (regular || mode_word == I_DIRECTORY) {
167 if (position > f_size) rip->i_size = position;
168 }
169 } else {
170 if (rip->i_pipe == I_PIPE && position >= rip->i_size) {
171 /* Reset pipe pointers. */
172 rip->i_size = 0; /* no data left */
173 position = 0; /* reset reader(s) */
174 if ( (wf = find_filp(rip, W_BIT)) != NIL_FILP) wf->filp_pos =0;
175 }
176 }
177 f->filp_pos = position;
178
179 /* Check to see if read-ahead is called for, and if so, set it up. */
180 if (rw_flag == READING && rip->i_seek == NO_SEEK && position % BLOCK_SIZE== 0
181 && (regular || mode_word == I_DIRECTORY)) {
182 rdahed_inode = rip;
183 rdahedpos = position;
184 }
185 rip->i_seek = NO_SEEK;
186
187 if (rdwt_err != OK) r = rdwt_err; /* check for disk error */
188 if (rdwt_err == END_OF_FILE) r = OK;
189 if (r == OK) {
190 if (rw_flag == READING) rip->i_update |= ATIME;
191 if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
192 rip->i_dirt = DIRTY; /* inode is thus now dirty */
193 if (partial_pipe) {
194 partial_pipe = 0;
195 /* partial write on pipe with */
196 /* O_NONBLOCK, return write count */
197 if (!(oflags & O_NONBLOCK)) {
198 fp->fp_cum_io_partial = cum_io;
199 suspend(XPIPE); /* partial write on pipe with */
200 return(0); /* nbyte > PIPE_SIZE - non-atomic */
201 }
202 }
203 fp->fp_cum_io_partial = 0;
204 return(cum_io);
205 } else {
206 return(r);
207 }
208 }
209
210
211 /*===========================================================================*
212 * rw_chunk *
213 *===========================================================================*/
214 PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, buff, seg, usr)
215 register struct inode *rip; /* pointer to inode for file to be rd/wr */
216 off_t position; /* position within file to read or write */
217 unsigned off; /* off within the current block */
218 int chunk; /* number of bytes to read or write */
219 unsigned left; /* max number of bytes wanted after position */
220 int rw_flag; /* READING or WRITING */
221 char *buff; /* virtual address of the user buffer */
222 int seg; /* T or D segment in user space */
223 int usr; /* which user process */
224 {
225 /* Read or write (part of) a block. */
226
227 register struct buf *bp;
228 register int r;
229 int n, block_spec;
230 block_t b;
231 dev_t dev;
232
233 block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
234 if (block_spec) {
235 b = position/BLOCK_SIZE;
236 dev = (dev_t) rip->i_zone[0];
237 } else {
238 b = read_map(rip, position);
239 dev = rip->i_dev;
240 }
241
242 if (!block_spec && b == NO_BLOCK) {
243 if (rw_flag == READING) {
244 /* Reading from a nonexistent block. Must read as all zeros.*/
245 bp = get_block(NO_DEV, NO_BLOCK, NORMAL); /* get a buffer */
246 zero_block(bp);
247 } else {
248 /* Writing to a nonexistent block. Create and enter in inode.*/
249 if ((bp= new_block(rip, position)) == NIL_BUF)return(err_code);
250 }
251 } else if (rw_flag == READING) {
252 /* Read and read ahead if convenient. */
253 bp = rahead(rip, b, position, left);
254 } else {
255 /* Normally an existing block to be partially overwritten is first read
256 * in. However, a full block need not be read in. If it is already in
257 * the cache, acquire it, otherwise just acquire a free buffer.
258 */
259 n = (chunk == BLOCK_SIZE ? NO_READ : NORMAL);
260 if (!block_spec && off == 0 && position >= rip->i_size) n = NO_READ;
261 bp = get_block(dev, b, n);
262 }
263
264 /* In all cases, bp now points to a valid buffer. */
265 if (rw_flag == WRITING && chunk != BLOCK_SIZE && !block_spec &&
266 position >= rip->i_size && off == 0) {
267 zero_block(bp);
268 }
269 if (rw_flag == READING) {
270 /* Copy a chunk from the block buffer to user space. */
271 r = sys_copy(FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
272 usr, seg, (phys_bytes) buff,
273 (phys_bytes) chunk);
274 } else {
275 /* Copy a chunk from user space to the block buffer. */
276 r = sys_copy(usr, seg, (phys_bytes) buff,
277 FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
278 (phys_bytes) chunk);
279 bp->b_dirt = DIRTY;
280 }
281 n = (off + chunk == BLOCK_SIZE ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
282 put_block(bp, n);
283 return(r);
284 }
285
286
287 /*===========================================================================*
288 * read_map *
289 *===========================================================================*/
290 PUBLIC block_t read_map(rip, position)
291 register struct inode *rip; /* ptr to inode to map from */
292 off_t position; /* position in file whose blk wanted */
293 {
294 /* Given an inode and a position within the corresponding file, locate the
295 * block (not zone) number in which that position is to be found and return it.
296 */
297
298 register struct buf *bp;
299 register zone_t z;
300 int scale, boff, dzones, nr_indirects, index, zind, ex;
301 block_t b;
302 long excess, zone, block_pos;
303
304 scale = rip->i_sp->s_log_zone_size; /* for block-zone conversion */
305 block_pos = position/BLOCK_SIZE; /* relative blk # in file */
306 zone = block_pos >> scale; /* position's zone */
307 boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
308 dzones = rip->i_ndzones;
309 nr_indirects = rip->i_nindirs;
310
311 /* Is 'position' to be found in the inode itself? */
312 if (zone < dzones) {
313 zind = (int) zone; /* index should be an int */
314 z = rip->i_zone[zind];
315 if (z == NO_ZONE) return(NO_BLOCK);
316 b = ((block_t) z << scale) + boff;
317 return(b);
318 }
319
320 /* It is not in the inode, so it must be single or double indirect. */
321 excess = zone - dzones; /* first Vx_NR_DZONES don't count */
322
323 if (excess < nr_indirects) {
324 /* 'position' can be located via the single indirect block. */
325 z = rip->i_zone[dzones];
326 } else {
327 /* 'position' can be located via the double indirect block. */
328 if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
329 excess -= nr_indirects; /* single indir doesn't count*/
330 b = (block_t) z << scale;
331 bp = get_block(rip->i_dev, b, NORMAL); /* get double indirect block */
332 index = (int) (excess/nr_indirects);
333 z = rd_indir(bp, index); /* z= zone for single*/
334 put_block(bp, INDIRECT_BLOCK); /* release double ind block */
335 excess = excess % nr_indirects; /* index into single ind blk */
336 }
337
338 /* 'z' is zone num for single indirect block; 'excess' is index into it. */
339 if (z == NO_ZONE) return(NO_BLOCK);
340 b = (block_t) z << scale; /* b is blk # for single ind */
341 bp = get_block(rip->i_dev, b, NORMAL); /* get single indirect block */
342 ex = (int) excess; /* need an integer */
343 z = rd_indir(bp, ex); /* get block pointed to */
344 put_block(bp, INDIRECT_BLOCK); /* release single indir blk */
345 if (z == NO_ZONE) return(NO_BLOCK);
346 b = ((block_t) z << scale) + boff;
347 return(b);
348 }
349
350
351 /*===========================================================================*
352 * rd_indir *
353 *===========================================================================*/
354 PUBLIC zone_t rd_indir(bp, index)
355 struct buf *bp; /* pointer to indirect block */
356 int index; /* index into *bp */
357 {
358 /* Given a pointer to an indirect block, read one entry. The reason for
359 * making a separate routine out of this is that there are four cases:
360 * V1 (IBM and 68000), and V2 (IBM and 68000).
361 */
362
363 struct super_block *sp;
364 zone_t zone; /* V2 zones are longs (shorts in V1) */
365
366 sp = get_super(bp->b_dev); /* need super block to find file sys type */
367
368 /* read a zone from an indirect block */
369 if (sp->s_version == V1)
370 zone = (zone_t) conv2(sp->s_native, (int) bp->b_v1_ind[index]);
371 else
372 zone = (zone_t) conv4(sp->s_native, (long) bp->b_v2_ind[index]);
373
374 if (zone != NO_ZONE &&
375 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
376 printf("Illegal zone number %ld in indirect block, index %d\n",
377 (long) zone, index);
378 panic("check file system", NO_NUM);
379 }
380 return(zone);
381 }
382
383
384 /*===========================================================================*
385 * read_ahead *
386 *===========================================================================*/
387 PUBLIC void read_ahead()
388 {
389 /* Read a block into the cache before it is needed. */
390
391 register struct inode *rip;
392 struct buf *bp;
393 block_t b;
394
395 rip = rdahed_inode; /* pointer to inode to read ahead from */
396 rdahed_inode = NIL_INODE; /* turn off read ahead */
397 if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return; /* at EOF */
398 bp = rahead(rip, b, rdahedpos, BLOCK_SIZE);
399 put_block(bp, PARTIAL_DATA_BLOCK);
400 }
401
402
403 /*===========================================================================*
404 * rahead *
405 *===========================================================================*/
406 PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead)
407 register struct inode *rip; /* pointer to inode for file to be read */
408 block_t baseblock; /* block at current position */
409 off_t position; /* position within file */
410 unsigned bytes_ahead; /* bytes beyond position for immediate use */
411 {
412 /* Fetch a block from the cache or the device. If a physical read is
413 * required, prefetch as many more blocks as convenient into the cache.
414 * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
415 * The device driver may decide it knows better and stop reading at a
416 * cylinder boundary (or after an error). Rw_scattered() puts an optional
417 * flag on all reads to allow this.
418 */
419
420 /* Minimum number of blocks to prefetch. */
421 # define BLOCKS_MINIMUM (NR_BUFS < 50 ? 18 : 32)
422
423 int block_spec, scale, read_q_size;
424 unsigned int blocks_ahead, fragment;
425 block_t block, blocks_left;
426 off_t ind1_pos;
427 dev_t dev;
428 struct buf *bp;
429 static struct buf *read_q[NR_BUFS];
430
431 block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
432 if (block_spec) {
433 dev = (dev_t) rip->i_zone[0];
434 } else {
435 dev = rip->i_dev;
436 }
437
438 block = baseblock;
439 bp = get_block(dev, block, PREFETCH);
440 if (bp->b_dev != NO_DEV) return(bp);
441
442 /* The best guess for the number of blocks to prefetch: A lot.
443 * It is impossible to tell what the device looks like, so we don't even
444 * try to guess the geometry, but leave it to the driver.
445 *
446 * The floppy driver can read a full track with no rotational delay, and it
447 * avoids reading partial tracks if it can, so handing it enough buffers to
448 * read two tracks is perfect. (Two, because some diskette types have
449 * an odd number of sectors per track, so a block may span tracks.)
450 *
451 * The disk drivers don't try to be smart. With todays disks it is
452 * impossible to tell what the real geometry looks like, so it is best to
453 * read as much as you can. With luck the caching on the drive allows
454 * for a little time to start the next read.
455 *
456 * The current solution below is a bit of a hack, it just reads blocks from
457 * the current file position hoping that more of the file can be found. A
458 * better solution must look at the already available zone pointers and
459 * indirect blocks (but don't call read_map!).
460 */
461
462 fragment = position % BLOCK_SIZE;
463 position -= fragment;
464 bytes_ahead += fragment;
465
466 blocks_ahead = (bytes_ahead + BLOCK_SIZE - 1) / BLOCK_SIZE;
467
468 if (block_spec && rip->i_size == 0) {
469 blocks_left = NR_IOREQS;
470 } else {
471 blocks_left = (rip->i_size - position + BLOCK_SIZE - 1) / BLOCK_SIZE;
472
473 /* Go for the first indirect block if we are in its neighborhood. */
474 if (!block_spec) {
475 scale = rip->i_sp->s_log_zone_size;
476 ind1_pos = (off_t) rip->i_ndzones * (BLOCK_SIZE << scale);
477 if (position <= ind1_pos && rip->i_size > ind1_pos) {
478 blocks_ahead++;
479 blocks_left++;
480 }
481 }
482 }
483
484 /* No more than the maximum request. */
485 if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
486
487 /* Read at least the minimum number of blocks, but not after a seek. */
488 if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
489 blocks_ahead = BLOCKS_MINIMUM;
490
491 /* Can't go past end of file. */
492 if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
493
494 read_q_size = 0;
495
496 /* Acquire block buffers. */
497 for (;;) {
498 read_q[read_q_size++] = bp;
499
500 if (--blocks_ahead == 0) break;
501
502 /* Don't trash the cache, leave 4 free. */
503 if (bufs_in_use >= NR_BUFS - 4) break;
504
505 block++;
506
507 bp = get_block(dev, block, PREFETCH);
508 if (bp->b_dev != NO_DEV) {
509 /* Oops, block already in the cache, get out. */
510 put_block(bp, FULL_DATA_BLOCK);
511 break;
512 }
513 }
514 rw_scattered(dev, read_q, read_q_size, READING);
515 return(get_block(dev, baseblock, NORMAL));
516 }
517
This page was automatically generated by the
LXR engine.
Visit the LXR main site for more
information.