~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

Minix Cross Reference
Minix/fs/cache.c


  1 /* The file system maintains a buffer cache to reduce the number of disk
  2  * accesses needed.  Whenever a read or write to the disk is done, a check is
  3  * first made to see if the block is in the cache.  This file manages the
  4  * cache.
  5  *
  6  * The entry points into this file are:
  7  *   get_block:   request to fetch a block for reading or writing from cache
  8  *   put_block:   return a block previously requested with get_block
  9  *   alloc_zone:  allocate a new zone (to increase the length of a file)
 10  *   free_zone:   release a zone (when a file is removed)
 11  *   rw_block:    read or write a block from the disk itself
 12  *   invalidate:  remove all the cache blocks on some device
 13  */
 14 
 15 #include "fs.h"
 16 #include <minix/com.h>
 17 #include <minix/boot.h>
 18 #include "buf.h"
 19 #include "file.h"
 20 #include "fproc.h"
 21 #include "super.h"
 22 
 23 FORWARD _PROTOTYPE( void rm_lru, (struct buf *bp) );
 24 
 25 /*===========================================================================*
 26  *                              get_block                                    *
 27  *===========================================================================*/
 28 PUBLIC struct buf *get_block(dev, block, only_search)
 29 register dev_t dev;             /* on which device is the block? */
 30 register block_t block;         /* which block is wanted? */
 31 int only_search;                /* if NO_READ, don't read, else act normal */
 32 {
 33 /* Check to see if the requested block is in the block cache.  If so, return
 34  * a pointer to it.  If not, evict some other block and fetch it (unless
 35  * 'only_search' is 1).  All the blocks in the cache that are not in use
 36  * are linked together in a chain, with 'front' pointing to the least recently
 37  * used block and 'rear' to the most recently used block.  If 'only_search' is
 38  * 1, the block being requested will be overwritten in its entirety, so it is
 39  * only necessary to see if it is in the cache; if it is not, any free buffer
 40  * will do.  It is not necessary to actually read the block in from disk.
 41  * If 'only_search' is PREFETCH, the block need not be read from the disk,
 42  * and the device is not to be marked on the block, so callers can tell if
 43  * the block returned is valid.
 44  * In addition to the LRU chain, there is also a hash chain to link together
 45  * blocks whose block numbers end with the same bit strings, for fast lookup.
 46  */
 47 
 48   int b;
 49   register struct buf *bp, *prev_ptr;
 50 
 51   /* Search the hash chain for (dev, block). Do_read() can use 
 52    * get_block(NO_DEV ...) to get an unnamed block to fill with zeros when
 53    * someone wants to read from a hole in a file, in which case this search
 54    * is skipped
 55    */
 56   if (dev != NO_DEV) {
 57         b = (int) block & HASH_MASK;
 58         bp = buf_hash[b];
 59         while (bp != NIL_BUF) {
 60                 if (bp->b_blocknr == block && bp->b_dev == dev) {
 61                         /* Block needed has been found. */
 62                         if (bp->b_count == 0) rm_lru(bp);
 63                         bp->b_count++;  /* record that block is in use */
 64                         return(bp);
 65                 } else {
 66                         /* This block is not the one sought. */
 67                         bp = bp->b_hash; /* move to next block on hash chain */
 68                 }
 69         }
 70   }
 71 
 72   /* Desired block is not on available chain.  Take oldest block ('front'). */
 73   if ((bp = front) == NIL_BUF) panic("all buffers in use", NR_BUFS);
 74   rm_lru(bp);
 75 
 76   /* Remove the block that was just taken from its hash chain. */
 77   b = (int) bp->b_blocknr & HASH_MASK;
 78   prev_ptr = buf_hash[b];
 79   if (prev_ptr == bp) {
 80         buf_hash[b] = bp->b_hash;
 81   } else {
 82         /* The block just taken is not on the front of its hash chain. */
 83         while (prev_ptr->b_hash != NIL_BUF)
 84                 if (prev_ptr->b_hash == bp) {
 85                         prev_ptr->b_hash = bp->b_hash;  /* found it */
 86                         break;
 87                 } else {
 88                         prev_ptr = prev_ptr->b_hash;    /* keep looking */
 89                 }
 90   }
 91 
 92   /* If the block taken is dirty, make it clean by writing it to the disk.
 93    * Avoid hysteresis by flushing all other dirty blocks for the same device.
 94    */
 95   if (bp->b_dev != NO_DEV) {
 96         if (bp->b_dirt == DIRTY) flushall(bp->b_dev);
 97 #if ENABLE_CACHE2
 98         put_block2(bp);
 99 #endif
100   }
101 
102   /* Fill in block's parameters and add it to the hash chain where it goes. */
103   bp->b_dev = dev;              /* fill in device number */
104   bp->b_blocknr = block;        /* fill in block number */
105   bp->b_count++;                /* record that block is being used */
106   b = (int) bp->b_blocknr & HASH_MASK;
107   bp->b_hash = buf_hash[b];
108   buf_hash[b] = bp;             /* add to hash list */
109 
110   /* Go get the requested block unless searching or prefetching. */
111   if (dev != NO_DEV) {
112 #if ENABLE_CACHE2
113         if (get_block2(bp, only_search)) /* in 2nd level cache */;
114         else
115 #endif
116         if (only_search == PREFETCH) bp->b_dev = NO_DEV;
117         else
118         if (only_search == NORMAL) rw_block(bp, READING);
119   }
120   return(bp);                   /* return the newly acquired block */
121 }
122 
123 
124 /*===========================================================================*
125  *                              put_block                                    *
126  *===========================================================================*/
127 PUBLIC void put_block(bp, block_type)
128 register struct buf *bp;        /* pointer to the buffer to be released */
129 int block_type;                 /* INODE_BLOCK, DIRECTORY_BLOCK, or whatever */
130 {
131 /* Return a block to the list of available blocks.   Depending on 'block_type'
132  * it may be put on the front or rear of the LRU chain.  Blocks that are
133  * expected to be needed again shortly (e.g., partially full data blocks)
134  * go on the rear; blocks that are unlikely to be needed again shortly
135  * (e.g., full data blocks) go on the front.  Blocks whose loss can hurt
136  * the integrity of the file system (e.g., inode blocks) are written to
137  * disk immediately if they are dirty.
138  */
139 
140   if (bp == NIL_BUF) return;    /* it is easier to check here than in caller */
141 
142   bp->b_count--;                /* there is one use fewer now */
143   if (bp->b_count != 0) return; /* block is still in use */
144 
145   bufs_in_use--;                /* one fewer block buffers in use */
146 
147   /* Put this block back on the LRU chain.  If the ONE_SHOT bit is set in
148    * 'block_type', the block is not likely to be needed again shortly, so put
149    * it on the front of the LRU chain where it will be the first one to be
150    * taken when a free buffer is needed later.
151    */
152   if (block_type & ONE_SHOT) {
153         /* Block probably won't be needed quickly. Put it on front of chain.
154          * It will be the next block to be evicted from the cache.
155          */
156         bp->b_prev = NIL_BUF;
157         bp->b_next = front;
158         if (front == NIL_BUF)
159                 rear = bp;      /* LRU chain was empty */
160         else
161                 front->b_prev = bp;
162         front = bp;
163   } else {
164         /* Block probably will be needed quickly.  Put it on rear of chain.
165          * It will not be evicted from the cache for a long time.
166          */
167         bp->b_prev = rear;
168         bp->b_next = NIL_BUF;
169         if (rear == NIL_BUF)
170                 front = bp;
171         else
172                 rear->b_next = bp;
173         rear = bp;
174   }
175 
176   /* Some blocks are so important (e.g., inodes, indirect blocks) that they
177    * should be written to the disk immediately to avoid messing up the file
178    * system in the event of a crash.
179    */
180   if ((block_type & WRITE_IMMED) && bp->b_dirt==DIRTY && bp->b_dev != NO_DEV)
181         rw_block(bp, WRITING);
182 }
183 
184 
185 /*===========================================================================*
186  *                              alloc_zone                                   *
187  *===========================================================================*/
188 PUBLIC zone_t alloc_zone(dev, z)
189 dev_t dev;                      /* device where zone wanted */
190 zone_t z;                       /* try to allocate new zone near this one */
191 {
192 /* Allocate a new zone on the indicated device and return its number. */
193 
194   int major, minor;
195   bit_t b, bit;
196   struct super_block *sp;
197 
198   /* Note that the routine alloc_bit() returns 1 for the lowest possible
199    * zone, which corresponds to sp->s_firstdatazone.  To convert a value
200    * between the bit number, 'b', used by alloc_bit() and the zone number, 'z',
201    * stored in the inode, use the formula:
202    *     z = b + sp->s_firstdatazone - 1
203    * Alloc_bit() never returns 0, since this is used for NO_BIT (failure).
204    */
205   sp = get_super(dev);          /* find the super_block for this device */
206 
207   /* If z is 0, skip initial part of the map known to be fully in use. */
208   if (z == sp->s_firstdatazone) {
209         bit = sp->s_zsearch;
210   } else {
211         bit = (bit_t) z - (sp->s_firstdatazone - 1);
212   }
213   b = alloc_bit(sp, ZMAP, bit);
214   if (b == NO_BIT) {
215         err_code = ENOSPC;
216         major = (int) (sp->s_dev >> MAJOR) & BYTE;
217         minor = (int) (sp->s_dev >> MINOR) & BYTE;
218         printf("No space on %sdevice %d/%d\n",
219                 sp->s_dev == ROOT_DEV ? "root " : "", major, minor);
220         return(NO_ZONE);
221   }
222   if (z == sp->s_firstdatazone) sp->s_zsearch = b;      /* for next time */
223   return(sp->s_firstdatazone - 1 + (zone_t) b);
224 }
225 
226 
227 /*===========================================================================*
228  *                              free_zone                                    *
229  *===========================================================================*/
230 PUBLIC void free_zone(dev, numb)
231 dev_t dev;                              /* device where zone located */
232 zone_t numb;                            /* zone to be returned */
233 {
234 /* Return a zone. */
235 
236   register struct super_block *sp;
237   bit_t bit;
238 
239   /* Locate the appropriate super_block and return bit. */
240   sp = get_super(dev);
241   if (numb < sp->s_firstdatazone || numb >= sp->s_zones) return;
242   bit = (bit_t) (numb - (sp->s_firstdatazone - 1));
243   free_bit(sp, ZMAP, bit);
244   if (bit < sp->s_zsearch) sp->s_zsearch = bit;
245 }
246 
247 
248 /*===========================================================================*
249  *                              rw_block                                     *
250  *===========================================================================*/
251 PUBLIC void rw_block(bp, rw_flag)
252 register struct buf *bp;        /* buffer pointer */
253 int rw_flag;                    /* READING or WRITING */
254 {
255 /* Read or write a disk block. This is the only routine in which actual disk
256  * I/O is invoked. If an error occurs, a message is printed here, but the error
257  * is not reported to the caller.  If the error occurred while purging a block
258  * from the cache, it is not clear what the caller could do about it anyway.
259  */
260 
261   int r, op;
262   off_t pos;
263   dev_t dev;
264 
265   if ( (dev = bp->b_dev) != NO_DEV) {
266         pos = (off_t) bp->b_blocknr * BLOCK_SIZE;
267         op = (rw_flag == READING ? DEV_READ : DEV_WRITE);
268         r = dev_io(op, FALSE, dev, pos, BLOCK_SIZE, FS_PROC_NR, bp->b_data);
269         if (r != BLOCK_SIZE) {
270             if (r >= 0) r = END_OF_FILE;
271             if (r != END_OF_FILE)
272               printf("Unrecoverable disk error on device %d/%d, block %ld\n",
273                         (dev>>MAJOR)&BYTE, (dev>>MINOR)&BYTE, bp->b_blocknr);
274                 bp->b_dev = NO_DEV;     /* invalidate block */
275 
276                 /* Report read errors to interested parties. */
277                 if (rw_flag == READING) rdwt_err = r;
278         }
279   }
280 
281   bp->b_dirt = CLEAN;
282 }
283 
284 
285 /*===========================================================================*
286  *                              invalidate                                   *
287  *===========================================================================*/
288 PUBLIC void invalidate(device)
289 dev_t device;                   /* device whose blocks are to be purged */
290 {
291 /* Remove all the blocks belonging to some device from the cache. */
292 
293   register struct buf *bp;
294 
295   for (bp = &buf[0]; bp < &buf[NR_BUFS]; bp++)
296         if (bp->b_dev == device) bp->b_dev = NO_DEV;
297 
298 #if ENABLE_CACHE2
299   invalidate2(device);
300 #endif
301 }
302 
303 
304 /*==========================================================================*
305  *                              flushall                                    *
306  *==========================================================================*/
307 PUBLIC void flushall(dev)
308 dev_t dev;                      /* device to flush */
309 {
310 /* Flush all dirty blocks for one device. */
311 
312   register struct buf *bp;
313   static struct buf *dirty[NR_BUFS];    /* static so it isn't on stack */
314   int ndirty;
315 
316   for (bp = &buf[0], ndirty = 0; bp < &buf[NR_BUFS]; bp++)
317         if (bp->b_dirt == DIRTY && bp->b_dev == dev) dirty[ndirty++] = bp;
318   rw_scattered(dev, dirty, ndirty, WRITING);
319 }
320 
321 
322 /*===========================================================================*
323  *                              rw_scattered                                 *
324  *===========================================================================*/
325 PUBLIC void rw_scattered(dev, bufq, bufqsize, rw_flag)
326 dev_t dev;                      /* major-minor device number */
327 struct buf **bufq;              /* pointer to array of buffers */
328 int bufqsize;                   /* number of buffers */
329 int rw_flag;                    /* READING or WRITING */
330 {
331 /* Read or write scattered data from a device. */
332 
333   register struct buf *bp;
334   int gap;
335   register int i;
336   register struct iorequest_s *iop;
337   static struct iorequest_s iovec[NR_IOREQS];  /* static so it isn't on stack */
338   int j;
339 
340   /* (Shell) sort buffers on b_blocknr. */
341   gap = 1;
342   do
343         gap = 3 * gap + 1;
344   while (gap <= bufqsize);
345   while (gap != 1) {
346         gap /= 3;
347         for (j = gap; j < bufqsize; j++) {
348                 for (i = j - gap;
349                      i >= 0 && bufq[i]->b_blocknr > bufq[i + gap]->b_blocknr;
350                      i -= gap) {
351                         bp = bufq[i];
352                         bufq[i] = bufq[i + gap];
353                         bufq[i + gap] = bp;
354                 }
355         }
356   }
357 
358   /* Set up i/o vector and do i/o.  The result of dev_io is discarded because
359    * all results are returned in the vector.  If dev_io fails completely, the
360    * vector is unchanged and all results are taken as errors.
361    */  
362   while (bufqsize > 0) {
363         for (j = 0, iop = iovec; j < NR_IOREQS && j < bufqsize; j++, iop++) {
364                 bp = bufq[j];
365                 iop->io_position = (off_t) bp->b_blocknr * BLOCK_SIZE;
366                 iop->io_buf = bp->b_data;
367                 iop->io_nbytes = BLOCK_SIZE;
368                 iop->io_request = rw_flag == WRITING ?
369                                   DEV_WRITE : DEV_READ | OPTIONAL_IO;
370         }
371         (void) dev_io(SCATTERED_IO, 0, dev, (off_t) 0, j, FS_PROC_NR,
372                                                         (char *) iovec);
373 
374         /* Harvest the results.  Leave read errors for rw_block() to complain. */
375         for (i = 0, iop = iovec; i < j; i++, iop++) {
376                 bp = bufq[i];
377                 if (rw_flag == READING) {
378                     if (iop->io_nbytes == 0)
379                         bp->b_dev = dev;        /* validate block */
380                     put_block(bp, PARTIAL_DATA_BLOCK);
381                 } else {
382                     if (iop->io_nbytes != 0) {
383                      printf("Unrecoverable write error on device %d/%d, block %ld\n",
384                                 (dev>>MAJOR)&BYTE, (dev>>MINOR)&BYTE, bp->b_blocknr);
385                         bp->b_dev = NO_DEV;     /* invalidate block */
386                     }
387                     bp->b_dirt = CLEAN;
388                 }
389         }
390         bufq += j;
391         bufqsize -= j;
392   }
393 }
394 
395 
396 /*===========================================================================*
397  *                              rm_lru                                       *
398  *===========================================================================*/
399 PRIVATE void rm_lru(bp)
400 struct buf *bp;
401 {
402 /* Remove a block from its LRU chain. */
403 
404   struct buf *next_ptr, *prev_ptr;
405 
406   bufs_in_use++;
407   next_ptr = bp->b_next;        /* successor on LRU chain */
408   prev_ptr = bp->b_prev;        /* predecessor on LRU chain */
409   if (prev_ptr != NIL_BUF)
410         prev_ptr->b_next = next_ptr;
411   else
412         front = next_ptr;       /* this block was at front of chain */
413 
414   if (next_ptr != NIL_BUF)
415         next_ptr->b_prev = prev_ptr;
416   else
417         rear = prev_ptr;        /* this block was at rear of chain */
418 }
419 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~ [ freetext search ] ~ [ file search ] ~

This page was automatically generated by the LXR engine.
Visit the LXR main site for more information.