inode缓存与dentry缓存

1. inode缓存

   1: struct inode {
   2:     /* RCU path lookup touches following: */
   3:     umode_t            i_mode;
   4:     uid_t            i_uid;
   5:     gid_t            i_gid;
   6:     const struct inode_operations    *i_op;
   7:     struct super_block    *i_sb;
   8:  
   9:     spinlock_t        i_lock;    /* i_blocks, i_bytes, maybe i_size */
  10:     unsigned int        i_flags;
  11:     unsigned long        i_state;
  12: #ifdef CONFIG_SECURITY
  13:     void            *i_security;
  14: #endif
  15:     struct mutex        i_mutex;
  16:  
  17:  
  18:     unsigned long        dirtied_when;    /* jiffies of first dirtying */
  19:  
  20:     struct hlist_node    i_hash;
  21:     struct list_head    i_wb_list;    /* backing dev IO list */
  22:     struct list_head    i_lru;        /* inode LRU list */
  23:     struct list_head    i_sb_list;
  24:     union {
  25:         struct list_head    i_dentry;
  26:         struct rcu_head        i_rcu;
  27:     };
  28:     unsigned long        i_ino;
  29:     atomic_t        i_count;
  30:     unsigned int        i_nlink;
  31:     dev_t            i_rdev;
  32:     unsigned int        i_blkbits;
  33:     u64            i_version;
  34:     loff_t            i_size;
  35: #ifdef __NEED_I_SIZE_ORDERED
  36:     seqcount_t        i_size_seqcount;
  37: #endif
  38:     struct timespec        i_atime;
  39:     struct timespec        i_mtime;
  40:     struct timespec        i_ctime;
  41:     blkcnt_t        i_blocks;
  42:     unsigned short          i_bytes;
  43:     struct rw_semaphore    i_alloc_sem;
  44:     const struct file_operations    *i_fop;    /* former ->i_op->default_file_ops */
  45:     struct file_lock    *i_flock;
  46:     struct address_space    *i_mapping;
  47:     struct address_space    i_data;
  48: #ifdef CONFIG_QUOTA
  49:     struct dquot        *i_dquot[MAXQUOTAS];
  50: #endif
  51:     struct list_head    i_devices;
  52:     union {
  53:         struct pipe_inode_info    *i_pipe;
  54:         struct block_device    *i_bdev;
  55:         struct cdev        *i_cdev;
  56:     };
  57:  
  58:     __u32            i_generation;
  59:  
  60: #ifdef CONFIG_FSNOTIFY
  61:     __u32            i_fsnotify_mask; /* all events this inode cares about */
  62:     struct hlist_head    i_fsnotify_marks;
  63: #endif
  64:  
  65: #ifdef CONFIG_IMA
  66:     atomic_t        i_readcount; /* struct files open RO */
  67: #endif
  68:     atomic_t        i_writecount;
  69: #ifdef CONFIG_FS_POSIX_ACL
  70:     struct posix_acl    *i_acl;
  71:     struct posix_acl    *i_default_acl;
  72: #endif
  73:     void            *i_private; /* fs or device private pointer */
  74: };

inode可能处于三种状态:

1)unused,里面没有保存有效的内容,可以被复用为新的用途;

2)in use,正在被使用,其成员i_count以及i_nlink一定大于0,此时inode与文件系统或者说设备上的文件相关联,但是自从上次与设备同步后,内容没有发生改变,即不是dirty的;

3)dirty,inode里面的内容已经与文件系统中的文件内容不一致了,即脏了,需要进行文件同步操作。

 

前两种状态的inode都各自位于一个全局的链表中,而第三种的inode位于super_block结构体中的一个链表中。

 

先看inode结构体中的一个成员:

struct list_head    i_lru;        /* inode LRU list */

 

对应着一个全局的链表:

static LIST_HEAD(inode_lru);
static DEFINE_SPINLOCK(inode_lru_lock);

 

   1: /*
   2:  * Called when we‘re dropping the last reference
   3:  * to an inode.
   4:  *
   5:  * Call the FS "drop_inode()" function, defaulting to
   6:  * the legacy UNIX filesystem behaviour.  If it tells
   7:  * us to evict inode, do so.  Otherwise, retain inode
   8:  * in cache if fs is alive, sync and evict if fs is
   9:  * shutting down.
  10:  */
  11: static void iput_final(struct inode *inode)
  12: {
  13:     struct super_block *sb = inode->i_sb;
  14:     const struct super_operations *op = inode->i_sb->s_op;
  15:     int drop;
  16:  
  17:     WARN_ON(inode->i_state & I_NEW);
  18:  
  19:     if (op && op->drop_inode)
  20:         drop = op->drop_inode(inode);
  21:     else
  22:         drop = generic_drop_inode(inode);
  23:  
  24:     if (!drop && (sb->s_flags & MS_ACTIVE)) {
  25:         inode->i_state |= I_REFERENCED;
  26:         if (!(inode->i_state & (I_DIRTY|I_SYNC)))
  27:             inode_lru_list_add(inode);
  28:         spin_unlock(&inode->i_lock);
  29:         return;
  30:     }
  31:  
  32:     if (!drop) {
  33:         inode->i_state |= I_WILL_FREE;
  34:         spin_unlock(&inode->i_lock);
  35:         write_inode_now(inode, 1);
  36:         spin_lock(&inode->i_lock);
  37:         WARN_ON(inode->i_state & I_NEW);
  38:         inode->i_state &= ~I_WILL_FREE;
  39:     }
  40:  
  41:     inode->i_state |= I_FREEING;
  42:     inode_lru_list_del(inode);
  43:     spin_unlock(&inode->i_lock);
  44:  
  45:     evict(inode);
  46: }

函数iput_final是在当inode没有被任何地方引用后,即变成了unused状态后,回收inode的机制。

if (op && op->drop_inode)
        drop = op->drop_inode(inode);
    else
        drop = generic_drop_inode(inode);

drop为0时,表示i_nlink为0,并且inode没有保存着inode_hashtable中的拉链表,即这个inode可以被释放掉。

   1: /*
   2:  * Normal UNIX filesystem behaviour: delete the
   3:  * inode when the usage count drops to zero, and
   4:  * i_nlink is zero.
   5:  */
   6: int generic_drop_inode(struct inode *inode)
   7: {
   8:     return !inode->i_nlink || inode_unhashed(inode);
   9: }
  10: EXPORT_SYMBOL_GPL(generic_drop_inode);

if (!drop && (sb->s_flags & MS_ACTIVE)) {
        inode->i_state |= I_REFERENCED;
        if (!(inode->i_state & (I_DIRTY|I_SYNC)))
           inode_lru_list_add(inode);
        spin_unlock(&inode->i_lock);
        return;
    }

如果superblock还存在在系统中,就调用inode_lru_list_add将inode添加到unused列表中,即将inode缓存起来。

否则,就先调用write_inode_now写回到磁盘上,再调用inode_lru_list_del将已经缓存下来的inode删除掉,最后调用evict函数将inode彻底删除。

   1: static void inode_lru_list_add(struct inode *inode)
   2: {
   3:     spin_lock(&inode_lru_lock);
   4:     if (list_empty(&inode->i_lru)) {
   5:         list_add(&inode->i_lru, &inode_lru);
   6:         inodes_stat.nr_unused++;
   7:     }
   8:     spin_unlock(&inode_lru_lock);
   9: }

因此inode_lru就是全局的unused inode列表,通过“Least Recently Used”的顺序保存。

 

此外,操作inode_lru的函数还有prune_icache

   1: /*
   2:  * Scan `goal‘ inodes on the unused list for freeable ones. They are moved to a
   3:  * temporary list and then are freed outside inode_lru_lock by dispose_list().
   4:  *
   5:  * Any inodes which are pinned purely because of attached pagecache have their
   6:  * pagecache removed.  If the inode has metadata buffers attached to
   7:  * mapping->private_list then try to remove them.
   8:  *
   9:  * If the inode has the I_REFERENCED flag set, then it means that it has been
  10:  * used recently - the flag is set in iput_final(). When we encounter such an
  11:  * inode, clear the flag and move it to the back of the LRU so it gets another
  12:  * pass through the LRU before it gets reclaimed. This is necessary because of
  13:  * the fact we are doing lazy LRU updates to minimise lock contention so the
  14:  * LRU does not have strict ordering. Hence we don‘t want to reclaim inodes
  15:  * with this flag set because they are the inodes that are out of order.
  16:  */
  17: static void prune_icache(int nr_to_scan)
  18: {
  19:     LIST_HEAD(freeable);
  20:     int nr_scanned;
  21:     unsigned long reap = 0;
  22:  
  23:     down_read(&iprune_sem);
  24:     spin_lock(&inode_lru_lock);
  25:     for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
  26:         struct inode *inode;
  27:  
  28:         if (list_empty(&inode_lru))
  29:             break;
  30:  
  31:         inode = list_entry(inode_lru.prev, struct inode, i_lru);
  32:  
  33:         /*
  34:          * we are inverting the inode_lru_lock/inode->i_lock here,
  35:          * so use a trylock. If we fail to get the lock, just move the
  36:          * inode to the back of the list so we don‘t spin on it.
  37:          */
  38:         if (!spin_trylock(&inode->i_lock)) {
  39:             list_move(&inode->i_lru, &inode_lru);
  40:             continue;
  41:         }
  42:  
  43:         /*
  44:          * Referenced or dirty inodes are still in use. Give them
  45:          * another pass through the LRU as we canot reclaim them now.
  46:          */
  47:         if (atomic_read(&inode->i_count) ||
  48:             (inode->i_state & ~I_REFERENCED)) {
  49:             list_del_init(&inode->i_lru);
  50:             spin_unlock(&inode->i_lock);
  51:             inodes_stat.nr_unused--;
  52:             continue;
  53:         }
  54:  
  55:         /* recently referenced inodes get one more pass */
  56:         if (inode->i_state & I_REFERENCED) {
  57:             inode->i_state &= ~I_REFERENCED;
  58:             list_move(&inode->i_lru, &inode_lru);
  59:             spin_unlock(&inode->i_lock);
  60:             continue;
  61:         }
  62:         if (inode_has_buffers(inode) || inode->i_data.nrpages) {
  63:             __iget(inode);
  64:             spin_unlock(&inode->i_lock);
  65:             spin_unlock(&inode_lru_lock);
  66:             if (remove_inode_buffers(inode))
  67:                 reap += invalidate_mapping_pages(&inode->i_data,
  68:                                 0, -1);
  69:             iput(inode);
  70:             spin_lock(&inode_lru_lock);
  71:  
  72:             if (inode != list_entry(inode_lru.next,
  73:                         struct inode, i_lru))
  74:                 continue;    /* wrong inode or list_empty */
  75:             /* avoid lock inversions with trylock */
  76:             if (!spin_trylock(&inode->i_lock))
  77:                 continue;
  78:             if (!can_unuse(inode)) {
  79:                 spin_unlock(&inode->i_lock);
  80:                 continue;
  81:             }
  82:         }
  83:         WARN_ON(inode->i_state & I_NEW);
  84:         inode->i_state |= I_FREEING;
  85:         spin_unlock(&inode->i_lock);
  86:  
  87:         list_move(&inode->i_lru, &freeable);
  88:         inodes_stat.nr_unused--;
  89:     }
  90:     if (current_is_kswapd())
  91:         __count_vm_events(KSWAPD_INODESTEAL, reap);
  92:     else
  93:         __count_vm_events(PGINODESTEAL, reap);
  94:     spin_unlock(&inode_lru_lock);
  95:  
  96:     dispose_list(&freeable);
  97:     up_read(&iprune_sem);
  98: }

该函数的作用是在内存压力较大时,通过缩减缓存的inode列表inode_lru以释放出更多的内存。

该函数就是从inode_lru中从头开始取inode出来,做一些简单检查,如果inode还有一些原因需要继续存在在缓存中,就将该inode移到链表的尾部,然后检查下一个inode。

使得inode继续保留的原因包括:无法获取到操作inode中数据的锁i_lock;inode中的数据是脏的;inode的使用计数非0;inode刚刚被引用过等等。

 

还有一个比较实用的问题,我们看到在调用iput_final时,检查如果i_nlink为0,并且没有被用作拉链表的话,就将其放到缓存inode_lru中,但是在prune_icache时,会检查i_count引用计数是否为0。

这也就是说,如果一个inode对应的磁盘文件已经被删除了,但是还有进程对其进行操作的话,那么它不会被直接删除,而是会保存在缓存中,也就是说对其操作的进程仍然可以对已经缓存下来的数据页面page进行操作。

直到没有进程再对其进行操作了,才有可能被清除出缓存。

 

inode中有两个链表头元素,分别是i_sb_list和i_wb_list,其中i_sb_list是super_block->s_inodes列表的元素,而i_wb_list是用于维护设备的后备inode列表。

 

2. dentry缓存

dentry缓存的目的,为了减少对慢速磁盘的访问,每当VFS文件系统对底层的数据进行访问时,都会将访问的结果缓存下来,保存成一个dentry对象。

 

而且dentry对象的组织与管理,是和inode缓存极其相似的,也有一个hash表,和一个lru队列。

而且当内存压力较大时,也会调用prune_dcache来企图释放lru中优先级较低的dentry项目。

 

   1: static struct hlist_bl_head *dentry_hashtable __read_mostly;

在super_block中

   1: /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
   2:     struct list_head    s_dentry_lru;    /* unused dentry lru */

inode缓存与dentry缓存

上一篇:黑马程序员---集合框架2(HashSet,TreeSet,泛型)


下一篇:Android SDK 目录和作用详解