内核内存管理

内存管理系统可分为两部分,分别是内核空间内存管理和用户空间内存管理:

  • 内存管理子系统的职责是:进程请求内存时分配可用内存,进程释放内存后回收内存,以及跟踪系统内存使用情况。现代操作系统要求能够使多个程序共享系统资源,同时要求内存限制对于开发者是透明的。在这种情况下,虚拟内存应运而生。虚拟内存可以使得进程可以访问比实际内存大得多的空间,并且使得多个程序共享内存显得更加有效。

  • 当程序从内存中取得数据的时候,需要使用地址指出需要访问的内存位置(注意:这个地址是虚拟地址,他们组成的进程的虚拟地址空间)。每个进程都有自己的虚拟地址空间,这样做的好处是可以防止非法读取或覆盖其他进程的数据(虚拟地址允许进程使用超过物理内存的内存空间,因此操作系统可以给每个进程提供独立的虚拟线性地址空间。

页作为内存管理的基本单元,页的许多状态需要被记录下来(比如,内核需要直到什么时候可以被回收),因此内核为内核中的每个页都准备了页描述符 struct page{}. 系统在初始化时根据物理内存的大小建立一个page结构数组mem_map, 作为物理页面的仓库。

struct page {
 // 位图,每一位表示页面的一个属性
    unsigned long flags;        
    /*
     * Five words (20/40 bytes) are available in this union.
     * WARNING: bit 0 of the first word is used for PageTail(). That
     * means the other users of this union MUST NOT use the bit to
     * avoid collision and false-positive PageTail().
     */
    union {
        struct {    /* Page cache and anonymous pages */
            /**
             * @lru: Pageout list, eg. active_list protected by
             * pgdat->lru_lock.  Sometimes used as a generic list
             * by the page owner.
             */
// 链表头,用于各链表上维护该页,以便按页将不同类别分组,主要有3个用途:伙伴算法、slab分配器、被用户态使用或被当作页缓存使用
            struct list_head lru; 
            /* See page-flags.h for PAGE_MAPPING_FLAGS */
            struct address_space *mapping;
// 在映射的虚拟空间(vm_area)内的偏移,一个文件可能只映射一部分,假设映射了1M的空间,index指的就是在1M空间内的偏移,而不是整个文件内的偏移
            pgoff_t index;        
            /**
             * @private: Mapping-private opaque data.
             * Usually used for buffer_heads if PagePrivate.
             * Used for swp_entry_t if PageSwapCache.
             * Indicates order in the buddy system if PageBuddy.
             */
            unsigned long private;
        };
        struct {    /* page_pool used by netstack */
            /**
             * @dma_addr: might require a 64-bit value even on
             * 32-bit architectures.
             */
            dma_addr_t dma_addr;
        };
        struct {    /* slab, slob and slub */
            union {
                struct list_head slab_list;
                struct {    /* Partial pages */
                    struct page *next;
#ifdef CONFIG_64BIT
                    int pages;    /* Nr of pages left */
                    int pobjects;    /* Approximate count */
#else
                    short int pages;
                    short int pobjects;
#endif
                };
            };
            struct kmem_cache *slab_cache; /* not slob */
            /* Double-word boundary */
            void *freelist;        /* first free object */
            union {
                void *s_mem;    /* slab: first object */
                unsigned long counters;        /* SLUB */
                struct {            /* SLUB */
                    unsigned inuse:16;
                    unsigned objects:15;
                    unsigned frozen:1;
                };
            };
        };
        struct {    /* Tail pages of compound page */
            unsigned long compound_head;    /* Bit zero is set */

            /* First tail page only */
            unsigned char compound_dtor;
            unsigned char compound_order;
            atomic_t compound_mapcount;
            unsigned int compound_nr; /* 1 << compound_order */
        };
        struct {    /* Second tail page of compound page */
            unsigned long _compound_pad_1;    /* compound_head */
            atomic_t hpage_pinned_refcount;
            /* For both global and memcg */
            struct list_head deferred_list;
        };
        struct {    /* Page table pages */
            unsigned long _pt_pad_1;    /* compound_head */
            pgtable_t pmd_huge_pte; /* protected by page->ptl */
            unsigned long _pt_pad_2;    /* mapping */
            union {
                struct mm_struct *pt_mm; /* x86 pgds only */
                atomic_t pt_frag_refcount; /* powerpc */
            };
#if ALLOC_SPLIT_PTLOCKS
            spinlock_t *ptl;
#else
            spinlock_t ptl;
#endif
        };
        struct {    /* ZONE_DEVICE pages */
            /** @pgmap: Points to the hosting device page map. */
            struct dev_pagemap *pgmap;
            void *zone_device_data;
            /*
             * ZONE_DEVICE private pages are counted as being
             * mapped so the next 3 words hold the mapping, index,
             * and private fields from the source anonymous or
             * page cache page while the page is migrated to device
             * private memory.
             * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
             * use the mapping, index, and private fields when
             * pmem backed DAX files are mapped.
             */
        };

        /** @rcu_head: You can use this to free a page by RCU. */
        struct rcu_head rcu_head;
    };

    union {        /* This union is 4 bytes in size. */
        /*
         * If the page can be mapped to userspace, encodes the number
         * of times this page is referenced by a page table.
         */
        atomic_t _mapcount;

        /*
         * If the page is neither PageSlab nor mappable to userspace,
         * the value stored here may help determine what this page
         * is used for.  See page-flags.h for a list of page types
         * which are currently stored here.
         */
        unsigned int page_type;

        unsigned int active;        /* SLAB */
        int units;            /* SLOB */
    };

    /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
    atomic_t _refcount;

#ifdef CONFIG_MEMCG
    union {
        struct mem_cgroup *mem_cgroup;
        struct obj_cgroup **obj_cgroups;
    };
#endif

    /*
     * On machines where all RAM is mapped into kernel address space,
     * we can simply calculate the virtual address. On machines with
     * highmem some memory is mapped into kernel virtual memory
     * dynamically, so we need a place to store that address.
     * Note that this field could be 16 bits on x86 ... ;)
     *
     * Architectures with slow multiplication can define
     * WANT_PAGE_VIRTUAL in asm/page.h
     */
#if defined(WANT_PAGE_VIRTUAL)
    void *virtual;            /* Kernel virtual address (NULL if
                       not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
    int _last_cpupid;
#endif
} _struct_page_alignment;
文档更新时间: 2021-03-15 17:02   作者:周国强