~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Minix Cross Reference
Minix/fs/read.c


  1 /* This file contains the heart of the mechanism used to read (and write)
  2  * files.  Read and write requests are split up into chunks that do not cross
  3  * block boundaries.  Each chunk is then processed in turn.  Reads on special
  4  * files are also detected and handled.
  5  *
  6  * The entry points into this file are
  7  *   do_read:    perform the READ system call by calling read_write
  8  *   read_write: actually do the work of READ and WRITE
  9  *   read_map:   given an inode and file position, look up its zone number
 10  *   rd_indir:   read an entry in an indirect block 
 11  *   read_ahead: manage the block read ahead business
 12  */
 13 
 14 #include "fs.h"
 15 #include <fcntl.h>
 16 #include <minix/com.h>
 17 #include "buf.h"
 18 #include "file.h"
 19 #include "fproc.h"
 20 #include "inode.h"
 21 #include "param.h"
 22 #include "super.h"
 23 
 24 #define FD_MASK          077    /* max file descriptor is 63 */
 25 
 26 PRIVATE message umess;          /* message for asking SYSTASK for user copy */
 27 
 28 FORWARD _PROTOTYPE( int rw_chunk, (struct inode *rip, off_t position,
 29                         unsigned off, int chunk, unsigned left, int rw_flag,
 30                         char *buff, int seg, int usr)                   );
 31 
 32 /*===========================================================================*
 33  *                              do_read                                      *
 34  *===========================================================================*/
 35 PUBLIC int do_read()
 36 {
 37   return(read_write(READING));
 38 }
 39 
 40 
 41 /*===========================================================================*
 42  *                              read_write                                   *
 43  *===========================================================================*/
 44 PUBLIC int read_write(rw_flag)
 45 int rw_flag;                    /* READING or WRITING */
 46 {
 47 /* Perform read(fd, buffer, nbytes) or write(fd, buffer, nbytes) call. */
 48 
 49   register struct inode *rip;
 50   register struct filp *f;
 51   off_t bytes_left, f_size, position;
 52   unsigned int off, cum_io;
 53   int op, oflags, r, chunk, usr, seg, block_spec, char_spec;
 54   int regular, partial_pipe = 0, partial_cnt = 0;
 55   dev_t dev;
 56   mode_t mode_word;
 57   struct filp *wf;
 58 
 59   /* MM loads segments by putting funny things in upper 10 bits of 'fd'. */
 60   if (who == MM_PROC_NR && (fd & (~BYTE)) ) {
 61         usr = (fd >> 8) & BYTE;
 62         seg = (fd >> 6) & 03;
 63         fd &= FD_MASK;          /* get rid of user and segment bits */
 64   } else {
 65         usr = who;              /* normal case */
 66         seg = D;
 67   }
 68 
 69   /* If the file descriptor is valid, get the inode, size and mode. */
 70   if (nbytes < 0) return(EINVAL);
 71   if ((f = get_filp(fd)) == NIL_FILP) return(err_code);
 72   if (((f->filp_mode) & (rw_flag == READING ? R_BIT : W_BIT)) == 0) {
 73         return(f->filp_mode == FILP_CLOSED ? EIO : EBADF);
 74   }
 75   if (nbytes == 0) return(0);   /* so char special files need not check for 0*/
 76   position = f->filp_pos;
 77   if (position > MAX_FILE_POS) return(EINVAL);
 78   if (position + nbytes < position) return(EINVAL); /* unsigned overflow */
 79   oflags = f->filp_flags;
 80   rip = f->filp_ino;
 81   f_size = rip->i_size;
 82   r = OK;
 83   if (rip->i_pipe == I_PIPE) {
 84         /* fp->fp_cum_io_partial is only nonzero when doing partial writes */
 85         cum_io = fp->fp_cum_io_partial; 
 86   } else {
 87         cum_io = 0;
 88   }
 89   op = (rw_flag == READING ? DEV_READ : DEV_WRITE);
 90   mode_word = rip->i_mode & I_TYPE;
 91   regular = mode_word == I_REGULAR || mode_word == I_NAMED_PIPE;
 92 
 93   char_spec = (mode_word == I_CHAR_SPECIAL ? 1 : 0);
 94   block_spec = (mode_word == I_BLOCK_SPECIAL ? 1 : 0);
 95   if (block_spec) f_size = LONG_MAX;
 96   rdwt_err = OK;                /* set to EIO if disk error occurs */
 97 
 98   /* Check for character special files. */
 99   if (char_spec) {
100         dev = (dev_t) rip->i_zone[0];
101         r = dev_io(op, oflags & O_NONBLOCK, dev, position, nbytes, who,buffer);
102         if (r >= 0) {
103                 cum_io = r;
104                 position += r;
105                 r = OK;
106         }
107   } else {
108         if (rw_flag == WRITING && block_spec == 0) {
109                 /* Check in advance to see if file will grow too big. */
110                 if (position > rip->i_sp->s_max_size - nbytes) return(EFBIG);
111 
112                 /* Check for O_APPEND flag. */
113                 if (oflags & O_APPEND) position = f_size;
114 
115                 /* Clear the zone containing present EOF if hole about
116                  * to be created.  This is necessary because all unwritten
117                  * blocks prior to the EOF must read as zeros.
118                  */
119                 if (position > f_size) clear_zone(rip, f_size, 0);
120         }
121 
122         /* Pipes are a little different.  Check. */
123         if (rip->i_pipe == I_PIPE) {
124                r = pipe_check(rip,rw_flag,oflags,nbytes,position,&partial_cnt);
125                if (r <= 0) return(r);
126         }
127 
128         if (partial_cnt > 0) partial_pipe = 1;
129 
130         /* Split the transfer into chunks that don't span two blocks. */
131         while (nbytes != 0) {
132                 off = (unsigned int) (position % BLOCK_SIZE);/* offset in blk*/
133                 if (partial_pipe) {  /* pipes only */
134                         chunk = MIN(partial_cnt, BLOCK_SIZE - off);
135                 } else
136                         chunk = MIN(nbytes, BLOCK_SIZE - off);
137                 if (chunk < 0) chunk = BLOCK_SIZE - off;
138 
139                 if (rw_flag == READING) {
140                         bytes_left = f_size - position;
141                         if (position >= f_size) break;  /* we are beyond EOF */
142                         if (chunk > bytes_left) chunk = (int) bytes_left;
143                 }
144 
145                 /* Read or write 'chunk' bytes. */
146                 r = rw_chunk(rip, position, off, chunk, (unsigned) nbytes,
147                              rw_flag, buffer, seg, usr);
148                 if (r != OK) break;     /* EOF reached */
149                 if (rdwt_err < 0) break;
150 
151                 /* Update counters and pointers. */
152                 buffer += chunk;        /* user buffer address */
153                 nbytes -= chunk;        /* bytes yet to be read */
154                 cum_io += chunk;        /* bytes read so far */
155                 position += chunk;      /* position within the file */
156 
157                 if (partial_pipe) {
158                         partial_cnt -= chunk;
159                         if (partial_cnt <= 0)  break;
160                 }
161         }
162   }
163 
164   /* On write, update file size and access time. */
165   if (rw_flag == WRITING) {
166         if (regular || mode_word == I_DIRECTORY) {
167                 if (position > f_size) rip->i_size = position;
168         }
169   } else {
170         if (rip->i_pipe == I_PIPE && position >= rip->i_size) {
171                 /* Reset pipe pointers. */
172                 rip->i_size = 0;        /* no data left */
173                 position = 0;           /* reset reader(s) */
174                 if ( (wf = find_filp(rip, W_BIT)) != NIL_FILP) wf->filp_pos =0;
175         }
176   }
177   f->filp_pos = position;
178 
179   /* Check to see if read-ahead is called for, and if so, set it up. */
180   if (rw_flag == READING && rip->i_seek == NO_SEEK && position % BLOCK_SIZE== 0
181                 && (regular || mode_word == I_DIRECTORY)) {
182         rdahed_inode = rip;
183         rdahedpos = position;
184   }
185   rip->i_seek = NO_SEEK;
186 
187   if (rdwt_err != OK) r = rdwt_err;     /* check for disk error */
188   if (rdwt_err == END_OF_FILE) r = OK;
189   if (r == OK) {
190         if (rw_flag == READING) rip->i_update |= ATIME;
191         if (rw_flag == WRITING) rip->i_update |= CTIME | MTIME;
192         rip->i_dirt = DIRTY;            /* inode is thus now dirty */
193         if (partial_pipe) {
194                 partial_pipe = 0;
195                         /* partial write on pipe with */
196                         /* O_NONBLOCK, return write count */
197                 if (!(oflags & O_NONBLOCK)) {
198                         fp->fp_cum_io_partial = cum_io;
199                         suspend(XPIPE); /* partial write on pipe with */
200                         return(0);      /* nbyte > PIPE_SIZE - non-atomic */
201                 }
202         }
203         fp->fp_cum_io_partial = 0;
204         return(cum_io);
205   } else {
206         return(r);
207   }
208 }
209 
210 
211 /*===========================================================================*
212  *                              rw_chunk                                     *
213  *===========================================================================*/
214 PRIVATE int rw_chunk(rip, position, off, chunk, left, rw_flag, buff, seg, usr)
215 register struct inode *rip;     /* pointer to inode for file to be rd/wr */
216 off_t position;                 /* position within file to read or write */
217 unsigned off;                   /* off within the current block */
218 int chunk;                      /* number of bytes to read or write */
219 unsigned left;                  /* max number of bytes wanted after position */
220 int rw_flag;                    /* READING or WRITING */
221 char *buff;                     /* virtual address of the user buffer */
222 int seg;                        /* T or D segment in user space */
223 int usr;                        /* which user process */
224 {
225 /* Read or write (part of) a block. */
226 
227   register struct buf *bp;
228   register int r;
229   int n, block_spec;
230   block_t b;
231   dev_t dev;
232 
233   block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
234   if (block_spec) {
235         b = position/BLOCK_SIZE;
236         dev = (dev_t) rip->i_zone[0];
237   } else {
238         b = read_map(rip, position);
239         dev = rip->i_dev;
240   }
241 
242   if (!block_spec && b == NO_BLOCK) {
243         if (rw_flag == READING) {
244                 /* Reading from a nonexistent block.  Must read as all zeros.*/
245                 bp = get_block(NO_DEV, NO_BLOCK, NORMAL);    /* get a buffer */
246                 zero_block(bp);
247         } else {
248                 /* Writing to a nonexistent block. Create and enter in inode.*/
249                 if ((bp= new_block(rip, position)) == NIL_BUF)return(err_code);
250         }
251   } else if (rw_flag == READING) {
252         /* Read and read ahead if convenient. */
253         bp = rahead(rip, b, position, left);
254   } else {
255         /* Normally an existing block to be partially overwritten is first read
256          * in.  However, a full block need not be read in.  If it is already in
257          * the cache, acquire it, otherwise just acquire a free buffer.
258          */
259         n = (chunk == BLOCK_SIZE ? NO_READ : NORMAL);
260         if (!block_spec && off == 0 && position >= rip->i_size) n = NO_READ;
261         bp = get_block(dev, b, n);
262   }
263 
264   /* In all cases, bp now points to a valid buffer. */
265   if (rw_flag == WRITING && chunk != BLOCK_SIZE && !block_spec &&
266                                         position >= rip->i_size && off == 0) {
267         zero_block(bp);
268   }
269   if (rw_flag == READING) {
270         /* Copy a chunk from the block buffer to user space. */
271         r = sys_copy(FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
272                         usr, seg, (phys_bytes) buff,
273                         (phys_bytes) chunk);
274   } else {
275         /* Copy a chunk from user space to the block buffer. */
276         r = sys_copy(usr, seg, (phys_bytes) buff,
277                         FS_PROC_NR, D, (phys_bytes) (bp->b_data+off),
278                         (phys_bytes) chunk);
279         bp->b_dirt = DIRTY;
280   }
281   n = (off + chunk == BLOCK_SIZE ? FULL_DATA_BLOCK : PARTIAL_DATA_BLOCK);
282   put_block(bp, n);
283   return(r);
284 }
285 
286 
287 /*===========================================================================*
288  *                              read_map                                     *
289  *===========================================================================*/
290 PUBLIC block_t read_map(rip, position)
291 register struct inode *rip;     /* ptr to inode to map from */
292 off_t position;                 /* position in file whose blk wanted */
293 {
294 /* Given an inode and a position within the corresponding file, locate the
295  * block (not zone) number in which that position is to be found and return it.
296  */
297 
298   register struct buf *bp;
299   register zone_t z;
300   int scale, boff, dzones, nr_indirects, index, zind, ex;
301   block_t b;
302   long excess, zone, block_pos;
303   
304   scale = rip->i_sp->s_log_zone_size;   /* for block-zone conversion */
305   block_pos = position/BLOCK_SIZE;      /* relative blk # in file */
306   zone = block_pos >> scale;    /* position's zone */
307   boff = (int) (block_pos - (zone << scale) ); /* relative blk # within zone */
308   dzones = rip->i_ndzones;
309   nr_indirects = rip->i_nindirs;
310 
311   /* Is 'position' to be found in the inode itself? */
312   if (zone < dzones) {
313         zind = (int) zone;      /* index should be an int */
314         z = rip->i_zone[zind];
315         if (z == NO_ZONE) return(NO_BLOCK);
316         b = ((block_t) z << scale) + boff;
317         return(b);
318   }
319 
320   /* It is not in the inode, so it must be single or double indirect. */
321   excess = zone - dzones;       /* first Vx_NR_DZONES don't count */
322 
323   if (excess < nr_indirects) {
324         /* 'position' can be located via the single indirect block. */
325         z = rip->i_zone[dzones];
326   } else {
327         /* 'position' can be located via the double indirect block. */
328         if ( (z = rip->i_zone[dzones+1]) == NO_ZONE) return(NO_BLOCK);
329         excess -= nr_indirects;                 /* single indir doesn't count*/
330         b = (block_t) z << scale;
331         bp = get_block(rip->i_dev, b, NORMAL);  /* get double indirect block */
332         index = (int) (excess/nr_indirects);
333         z = rd_indir(bp, index);                /* z= zone for single*/
334         put_block(bp, INDIRECT_BLOCK);          /* release double ind block */
335         excess = excess % nr_indirects;         /* index into single ind blk */
336   }
337 
338   /* 'z' is zone num for single indirect block; 'excess' is index into it. */
339   if (z == NO_ZONE) return(NO_BLOCK);
340   b = (block_t) z << scale;                     /* b is blk # for single ind */
341   bp = get_block(rip->i_dev, b, NORMAL);        /* get single indirect block */
342   ex = (int) excess;                            /* need an integer */
343   z = rd_indir(bp, ex);                         /* get block pointed to */
344   put_block(bp, INDIRECT_BLOCK);                /* release single indir blk */
345   if (z == NO_ZONE) return(NO_BLOCK);
346   b = ((block_t) z << scale) + boff;
347   return(b);
348 }
349 
350 
351 /*===========================================================================*
352  *                              rd_indir                                     *
353  *===========================================================================*/
354 PUBLIC zone_t rd_indir(bp, index)
355 struct buf *bp;                 /* pointer to indirect block */
356 int index;                      /* index into *bp */
357 {
358 /* Given a pointer to an indirect block, read one entry.  The reason for
359  * making a separate routine out of this is that there are four cases:
360  * V1 (IBM and 68000), and V2 (IBM and 68000).
361  */
362 
363   struct super_block *sp;
364   zone_t zone;                  /* V2 zones are longs (shorts in V1) */
365 
366   sp = get_super(bp->b_dev);    /* need super block to find file sys type */
367 
368   /* read a zone from an indirect block */
369   if (sp->s_version == V1)
370         zone = (zone_t) conv2(sp->s_native, (int)  bp->b_v1_ind[index]);
371   else
372         zone = (zone_t) conv4(sp->s_native, (long) bp->b_v2_ind[index]);
373 
374   if (zone != NO_ZONE &&
375                 (zone < (zone_t) sp->s_firstdatazone || zone >= sp->s_zones)) {
376         printf("Illegal zone number %ld in indirect block, index %d\n",
377                (long) zone, index);
378         panic("check file system", NO_NUM);
379   }
380   return(zone);
381 }
382 
383 
384 /*===========================================================================*
385  *                              read_ahead                                   *
386  *===========================================================================*/
387 PUBLIC void read_ahead()
388 {
389 /* Read a block into the cache before it is needed. */
390 
391   register struct inode *rip;
392   struct buf *bp;
393   block_t b;
394 
395   rip = rdahed_inode;           /* pointer to inode to read ahead from */
396   rdahed_inode = NIL_INODE;     /* turn off read ahead */
397   if ( (b = read_map(rip, rdahedpos)) == NO_BLOCK) return;      /* at EOF */
398   bp = rahead(rip, b, rdahedpos, BLOCK_SIZE);
399   put_block(bp, PARTIAL_DATA_BLOCK);
400 }
401 
402 
403 /*===========================================================================*
404  *                              rahead                                       *
405  *===========================================================================*/
406 PUBLIC struct buf *rahead(rip, baseblock, position, bytes_ahead)
407 register struct inode *rip;     /* pointer to inode for file to be read */
408 block_t baseblock;              /* block at current position */
409 off_t position;                 /* position within file */
410 unsigned bytes_ahead;           /* bytes beyond position for immediate use */
411 {
412 /* Fetch a block from the cache or the device.  If a physical read is
413  * required, prefetch as many more blocks as convenient into the cache.
414  * This usually covers bytes_ahead and is at least BLOCKS_MINIMUM.
415  * The device driver may decide it knows better and stop reading at a
416  * cylinder boundary (or after an error).  Rw_scattered() puts an optional
417  * flag on all reads to allow this.
418  */
419 
420 /* Minimum number of blocks to prefetch. */
421 # define BLOCKS_MINIMUM         (NR_BUFS < 50 ? 18 : 32)
422 
423   int block_spec, scale, read_q_size;
424   unsigned int blocks_ahead, fragment;
425   block_t block, blocks_left;
426   off_t ind1_pos;
427   dev_t dev;
428   struct buf *bp;
429   static struct buf *read_q[NR_BUFS];
430 
431   block_spec = (rip->i_mode & I_TYPE) == I_BLOCK_SPECIAL;
432   if (block_spec) {
433         dev = (dev_t) rip->i_zone[0];
434   } else {
435         dev = rip->i_dev;
436   }
437 
438   block = baseblock;
439   bp = get_block(dev, block, PREFETCH);
440   if (bp->b_dev != NO_DEV) return(bp);
441 
442   /* The best guess for the number of blocks to prefetch:  A lot.
443    * It is impossible to tell what the device looks like, so we don't even
444    * try to guess the geometry, but leave it to the driver.
445    *
446    * The floppy driver can read a full track with no rotational delay, and it
447    * avoids reading partial tracks if it can, so handing it enough buffers to
448    * read two tracks is perfect.  (Two, because some diskette types have
449    * an odd number of sectors per track, so a block may span tracks.)
450    *
451    * The disk drivers don't try to be smart.  With todays disks it is
452    * impossible to tell what the real geometry looks like, so it is best to
453    * read as much as you can.  With luck the caching on the drive allows
454    * for a little time to start the next read.
455    *
456    * The current solution below is a bit of a hack, it just reads blocks from
457    * the current file position hoping that more of the file can be found.  A
458    * better solution must look at the already available zone pointers and
459    * indirect blocks (but don't call read_map!).
460    */
461 
462   fragment = position % BLOCK_SIZE;
463   position -= fragment;
464   bytes_ahead += fragment;
465 
466   blocks_ahead = (bytes_ahead + BLOCK_SIZE - 1) / BLOCK_SIZE;
467 
468   if (block_spec && rip->i_size == 0) {
469         blocks_left = NR_IOREQS;
470   } else {
471         blocks_left = (rip->i_size - position + BLOCK_SIZE - 1) / BLOCK_SIZE;
472 
473         /* Go for the first indirect block if we are in its neighborhood. */
474         if (!block_spec) {
475                 scale = rip->i_sp->s_log_zone_size;
476                 ind1_pos = (off_t) rip->i_ndzones * (BLOCK_SIZE << scale);
477                 if (position <= ind1_pos && rip->i_size > ind1_pos) {
478                         blocks_ahead++;
479                         blocks_left++;
480                 }
481         }
482   }
483 
484   /* No more than the maximum request. */
485   if (blocks_ahead > NR_IOREQS) blocks_ahead = NR_IOREQS;
486 
487   /* Read at least the minimum number of blocks, but not after a seek. */
488   if (blocks_ahead < BLOCKS_MINIMUM && rip->i_seek == NO_SEEK)
489         blocks_ahead = BLOCKS_MINIMUM;
490 
491   /* Can't go past end of file. */
492   if (blocks_ahead > blocks_left) blocks_ahead = blocks_left;
493 
494   read_q_size = 0;
495 
496   /* Acquire block buffers. */
497   for (;;) {
498         read_q[read_q_size++] = bp;
499 
500         if (--blocks_ahead == 0) break;
501 
502         /* Don't trash the cache, leave 4 free. */
503         if (bufs_in_use >= NR_BUFS - 4) break;
504 
505         block++;
506 
507         bp = get_block(dev, block, PREFETCH);
508         if (bp->b_dev != NO_DEV) {
509                 /* Oops, block already in the cache, get out. */
510                 put_block(bp, FULL_DATA_BLOCK);
511                 break;
512         }
513   }
514   rw_scattered(dev, read_q, read_q_size, READING);
515   return(get_block(dev, baseblock, NORMAL));
516 }
517 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.