malloc 100M内存成功时,Linux把100M内存全部以只读的形式,映射到一个全部清0的页面。
当应用程序写100M中每一页任意字节时,会发出page fault。 linux 内核收到缺页中断后,从硬件寄存器中读取到,包括缺页中断发生的原因和虚拟地址。Linux从内存条申请一页内存,执行cow,把页面重新拷贝到新申请的页表,再把进程页表中的虚拟地址,指向一个新的物理地址,权限也被改成R+W。
C标准库封装的核心函数,系统调用brk(实现在mm/mmap.c中)
SYSCALL_DEFINE1(brk, unsigned long, brk)
SYSCALL_DEFINE1定义在include/linux/syscalls.h头文件,1表示后面有1个参数,2有2个参数...
#define SYSCALL_DEFINE1(name, ...) SYSCALL_DEFINEx(1, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE2(name, ...) SYSCALL_DEFINEx(2, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE3(name, ...) SYSCALL_DEFINEx(3, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE4(name, ...) SYSCALL_DEFINEx(4, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__)
#define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__)
宏展开,SYSCALL_METADATA用于ftrace调试系统调用
#define SYSCALL_DEFINEx(x, sname, ...) \
SYSCALL_METADATA(sname, x, __VA_ARGS__) \
__SYSCALL_DEFINEx(x, sname, __VA_ARGS__) 和架构有关
继续展开
#ifndef __SYSCALL_DEFINEx
#define __SYSCALL_DEFINEx(x, name, ...) \
__diag_push(); \
__diag_ignore(GCC, 8, "-Wattribute-alias", \
"Type aliasing is used to sanitize syscall arguments");\
asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_sys##name)))); \
ALLOW_ERROR_INJECTION(sys##name, ERRNO); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
asmlinkage long __se_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \
{ \
long ret = __do_sys##name(__MAP(x,__SC_CAST,__VA_ARGS__));\
__MAP(x,__SC_TEST,__VA_ARGS__); \
__PROTECT(x, ret,__MAP(x,__SC_ARGS,__VA_ARGS__)); \
return ret; \
} \
__diag_pop(); \
static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
#endif /* __SYSCALL_DEFINEx */
将brk替换进去就是
__arm64_sys_brk
__se_sys_brk
__do_sys_brk
首先看下__do_sys_brk
SYSCALL_DEFINE1(brk, unsigned long, brk)
{
unsigned long retval; //返回值
unsigned long newbrk, oldbrk; //新旧两个brk地址,oldbrk + 分配size = newbrk
struct mm_struct *mm = current->mm; //current真是神,啥时候都能用
struct vm_area_struct *next; //vma
unsigned long min_brk;
bool populate; //填充
LIST_HEAD(uf); //初始化个链表
/*读写信号线,因为需要修改vma的链表,申请个写的*/
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
#ifdef CONFIG_COMPAT_BRK //关着的
/*
* CONFIG_COMPAT_BRK can still be overridden by setting
* randomize_va_space to 2, which will still cause mm->start_brk
* to be arbitrarily shifted
*/
if (current->brk_randomized)
min_brk = mm->start_brk;
else
min_brk = mm->end_data;
#else
min_brk = mm->start_brk; //用这里,把堆的起始地址赋值给min_brk
#endif
if (brk < min_brk) //卧槽这个都做判断,有心了
goto out;
/*
* Check against rlimit here. If this check is done later after the test
* of oldbrk with newbrk then it can escape the test and let the data
* segment grow beyond its set limit the in case where the limit is
* not page aligned -Ram Gupta 边界判断
*/
if (check_data_rlimit(rlimit(RLIMIT_DATA), brk, mm->start_brk,
mm->end_data, mm->start_data))
goto out;
newbrk = PAGE_ALIGN(brk); //brk是等于要指向的新边界地址
oldbrk = PAGE_ALIGN(mm->brk); //mm_struct有2个参数,brk是当前堆地址,start_brk是起始地址
if (oldbrk == newbrk) //不需要移动边界
goto set_brk;
/* Always allow shrinking brk.
* 如果新边界<旧边界地址,表示进程请求释放空间,调用do_munmap来释放
*/
if (brk <= mm->brk) {
if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf))
goto set_brk;
goto out;
}
/* Check against existing mmap mappings.
* 以旧边界去查找VMA,如果找到VMA包含oldbrk,说明这块VMA已经使用了,不用再申请
*/
next = find_vma(mm, oldbrk);
if (next && newbrk + PAGE_SIZE > vm_start_gap(next))
goto out;
/* Ok, looks good - let it rip.
* 如果没找到,就继续申请分配一个VMA
*/
if (do_brk_flags(oldbrk, newbrk-oldbrk, 0, &uf) < 0)
goto out;
set_brk: //设置本次请求的brk到进程mm描述符
mm->brk = brk;
/*应用程序可以使用mlockall()系统调用来把进程中全部的进程虚拟空间地址加锁,防止内存被交换出去。
* 因此,mm->def_flags会设置VM_LOCKED标志位,调用mm_populate来立刻分配物理内存
*/
populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
up_write(&mm->mmap_sem);//释放写信号量
userfaultfd_unmap_complete(mm, &uf);
if (populate) //如果没有这个标志位,做缺页异常的时候分配物理地址
mm_populate(oldbrk, newbrk - oldbrk);
return brk;
out:
retval = mm->brk;
up_write(&mm->mmap_sem);
return retval;
}
do_brk_flags分配个新的VMA
/*
* this is really a simplified "do_mmap". it only handles
* anonymous maps. eventually we may be able to do some
* brk-specific accounting here.
*/
参数含义
addr:旧边界地址
len:申请内存的大小
flags:分配时传递的标志位
uf:临时链表
static int do_brk_flags(unsigned long addr, unsigned long len, unsigned long flags, struct list_head *uf)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct rb_node **rb_link, *rb_parent;
pgoff_t pgoff = addr >> PAGE_SHIFT;
int error;
/* Until we need other flags, refuse anything except VM_EXEC. */
if ((flags & (~VM_EXEC)) != 0)
return -EINVAL;
flags |= VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
/*返回一段没有映射过的空间的起始地址*/
error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
if (offset_in_page(error))
return error;
error = mlock_future_check(mm, mm->def_flags, len);
if (error)
return error;
/*
* mm->mmap_sem is required to protect against another thread
* changing the mappings in case we sleep.
*/
verify_mm_writelocked(mm);
/*
* Clear old maps. this also does some error checking for us
* 遍历红黑树,根据addr查找最合适的节点
* 若返回0,表示找到了
* 若返回-ENOMEM,表示和现有VMA重叠,会调用do_munmap函数来释放这段重叠空间
*/
while (find_vma_links(mm, addr, addr + len, &prev, &rb_link,
&rb_parent)) {
if (do_munmap(mm, addr, len, uf))
return -ENOMEM;
}
/* Check against address space limits *after* clearing old maps... */
if (!may_expand_vm(mm, flags, len >> PAGE_SHIFT))
return -ENOMEM;
if (mm->map_count > sysctl_max_map_count)
return -ENOMEM;
if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
return -ENOMEM;
/* Can we just expand an old private anonymous mapping?
* 检查能否合并addr附近的VMA
*/
vma = vma_merge(mm, prev, addr, addr + len, flags,
NULL, NULL, pgoff, NULL, NULL_VM_UFFD_CTX, NULL);
if (vma)
goto out;
/* 无法合并就新建一个VMA,地址为【addr,addr+len】
* create a vma struct for an anonymous mapping
*/
vma = vm_area_alloc(mm);
if (!vma) {
vm_unacct_memory(len >> PAGE_SHIFT);
return -ENOMEM;
}
/*属性填充*/
vma_set_anonymous(vma);
vma->vm_start = addr;
vma->vm_end = addr + len;
vma->vm_pgoff = pgoff;
vma->vm_flags = flags;
vma->vm_page_prot = vm_get_page_prot(flags);
/*将新建的VMA添加到mm->mmap链表和红黑树中*/
vma_link(mm, vma, prev, rb_link, rb_parent);
out:
perf_event_mmap(vma);
mm->total_vm += len >> PAGE_SHIFT;
mm->data_vm += len >> PAGE_SHIFT;
if (flags & VM_LOCKED)
mm->locked_vm += (len >> PAGE_SHIFT);
vma->vm_flags |= VM_SOFTDIRTY;
return 0;
}
__mm_populate()分配物理内存
函数实现在mm/gup.c
/*
* __mm_populate - populate and/or mlock pages within a range of address space.
*
* This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
* flags. VMAs must be already marked with the desired vm_flags, and
* mmap_sem must not be held.
* start:VMA的起始地址;len:VMA的长度;ignore_errors:表示当分配页面发生错误时会继续重试
*/
int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
{
struct mm_struct *mm = current->mm; //current的mm_struct
unsigned long end, nstart, nend; //end是vma结束地址
struct vm_area_struct *vma = NULL; //定义个vma指针
int locked = 0;
long ret = 0;
end = start + len;
/*从起始地址开始遍历*/
for (nstart = start; nstart < end; nstart = nend) {
/*
* We want to fault in pages for [nstart; end) address range.
* Find first corresponding VMA.
*/
if (!locked) {
locked = 1;
down_read(&mm->mmap_sem); //申请读信号量
vma = find_vma(mm, nstart); //查找VMA
} else if (nstart >= vma->vm_end)
vma = vma->vm_next; //下一个VMA
if (!vma || vma->vm_start >= end)
break;
/*
* Set [nstart; nend) to intersection of desired address
* range with the first VMA. Also, skip undesirable VMA types.
*/
nend = min(end, vma->vm_end);
if (vma->vm_flags & (VM_IO | VM_PFNMAP))
continue;
if (nstart < vma->vm_start)
nstart = vma->vm_start;
/*
* Now fault in a range of pages. populate_vma_page_range()
* double checks the vma flags, so that it won't mlock pages
* if the vma was already munlocked.
* 人为制造缺页异常并完成地址映射,返回的应该是成功申请到的物理内存页数
*/
ret = populate_vma_page_range(vma, nstart, nend, &locked);
if (ret < 0) {
if (ignore_errors) {
ret = 0;
continue; /* continue at next VMA */
}
break;
}
nend = nstart + ret * PAGE_SIZE; /*一次如果没有申请映射完,会循环继续*/
ret = 0;
}
if (locked)
up_read(&mm->mmap_sem);
return ret; /* 0 or negative error code */
}
populate_vma_page_range会调用__get_user_pages ,主要用于锁住内存,保证用户空间分配的内存不会被释放,很多驱动程序用这个接口来为用户态程序分配物理内存。
/*
* tsk:目标进程的task_struct数据结构
* mm:目标进程的内存描述符,mm = vma->vm_mm;
* start:VMA的虚拟起始地址
* nr_pages:需要分配的物理页面,nr_pages = (end - start) / PAGE_SIZE;
* gup_flags:内部使用的锁定属性
* pages:锁定页面的指针,他是一个page指针数组
* vmas:映射每一个物理页面的VMA,他是一个VMA的指针数组
* nonblocking:判断是否需要等待mmap_sem读者信号量或者等待磁盘IO,为1不等待
*/
//用于把用户空间的虚拟内存空间传到内核空间,内核空间为其分配物理内存并建立页表映射
static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
{
long i = 0;
unsigned int page_mask;
struct vm_area_struct *vma = NULL;
if (!nr_pages)
return 0;
start = untagged_addr(start);
VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET));
/*
* If FOLL_FORCE is set then do not force a full fault as the hinting
* fault information is unrelated to the reference behaviour of a task
* using the address space
*/
if (!(gup_flags & FOLL_FORCE))
gup_flags |= FOLL_NUMA;
/*前面的都是扯淡,这里开始,对nr_pages循环依次处理*/
do {
struct page *page;
unsigned int foll_flags = gup_flags;
unsigned int page_increm;
/* first iteration or cross vma bound */
if (!vma || start >= vma->vm_end) {
vma = find_extend_vma(mm, start); //查找VMA
if (!vma && in_gate_area(mm, start)) {
int ret;
ret = get_gate_page(mm, start & PAGE_MASK,
gup_flags, &vma,
pages ? &pages[i] : NULL);
if (ret)
return i ? : ret;
page_mask = 0;
goto next_page;
}
if (!vma || check_vma_flags(vma, gup_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) { //判断是否支持巨页,默认不支持
i = follow_hugetlb_page(mm, vma, pages, vmas,
&start, &nr_pages, i,
gup_flags, nonblocking);
continue;
}
}
retry:
/*
* If we have a pending SIGKILL, don't keep faulting pages and
* potentially allocating memory.
* 当前进程收到SIGKILL信号,就不用继续分配内存了,直接报错退出
*/
if (unlikely(fatal_signal_pending(current)))
return i ? i : -ERESTARTSYS;
cond_resched(); //判断当前进程是否需要调度,从而优化系统延迟
/*查看VMA中的虚拟页面是否已经分配了物理内存,返回在用户进程地址空间已经有映射的普通映射页面的 *page数据结构
*/
page = follow_page_mask(vma, start, foll_flags, &page_mask);
if (!page) { //如果没有返回page结构,调用faultin_page来人为触发缺页异常
int ret;
ret = faultin_page(tsk, vma, start, &foll_flags,
nonblocking);
switch (ret) {
case 0:
goto retry;
case -EFAULT:
case -ENOMEM:
case -EHWPOISON:
return i ? i : ret;
case -EBUSY:
return i;
case -ENOENT:
goto next_page;
}
BUG();
} else if (PTR_ERR(page) == -EEXIST) {
/*
* Proper page table entry exists, but no corresponding
* struct page.
*/
goto next_page;
} else if (IS_ERR(page)) {
return i ? i : PTR_ERR(page);
}
if (pages) {
pages[i] = page;
/*分配到页面了,指针数组中,然后调用下面两个函数来刷新这些页面对应的高速缓存*/
flush_anon_page(vma, page, start);
flush_dcache_page(page);
page_mask = 0;
}
next_page:
if (vmas) {
vmas[i] = vma;
page_mask = 0;
}
page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
if (page_increm > nr_pages)
page_increm = nr_pages;
i += page_increm;
start += page_increm * PAGE_SIZE;
nr_pages -= page_increm;
} while (nr_pages);
return i;
}
查看VMA中的虚拟页面是否已经分配了物理内存 follow_page_mask
/**
* follow_page_mask - look up a page descriptor from a user-virtual address
* @vma: vm_area_struct mapping @address 参数address所属的VMA
* @address: virtual address to look up 虚拟地址,用于查找页表的虚拟地址
* @flags: flags modifying lookup behaviour 内部使用的标志位
* @page_mask: on output, *page_mask is set according to the size of the page
*
* @flags can have FOLL_ flags set, defined in <linux/mm.h>
*
* Returns the mapped (struct page *), %NULL if no mapping exists, or
* an error pointer if there is a mapping to something not represented
* by a page descriptor (see also vm_normal_page()).
*/
struct page *follow_page_mask(struct vm_area_struct *vma,
unsigned long address, unsigned int flags,
unsigned int *page_mask)
{
pgd_t *pgd;
struct page *page;
struct mm_struct *mm = vma->vm_mm;
*page_mask = 0;
...
/*pgd_offset宏可以通过mm_struct和虚拟地址,找到进程页表的PGD页表项,mm->pgd,如果为空或者页表项 无效,报错返回*/
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
return no_page_table(vma, flags);
//都是巨页,不看了不看了
...
return follow_p4d_mask(vma, address, pgd, flags, page_mask);
}
接下来就是多级页表的套娃递归操作了
- follow_p4d_mask -> p4d_offset宏,找到P4D页表项,然后调用 follow_pud_mask
- follow_pud_mask -> pud_offset宏,找到PUD页表项,然后调用 follow_pmd_mask
- follow_pmd_mask -> pmd_offset宏,找到PMD页表项,然后调用 follow_page_pte
- follow_page_pte 中,遍历PTE,并返回address对应的物理页面的page结构
重点看下follow_page_pte
依旧在 mm/gup.c中实现
static struct page *follow_page_pte(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags)
{
struct mm_struct *mm = vma->vm_mm;
struct dev_pagemap *pgmap = NULL;
struct page *page;
spinlock_t *ptl; //自旋锁,等会用
pte_t *ptep, pte;
retry:
/*检查PMD是否有效*/
if (unlikely(pmd_bad(*pmd)))
return no_page_table(vma, flags);
/*通过这个宏和pmd,address转换成pte*/
ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
pte = *ptep;
/*处理页表不在内存中的情况*/
if (!pte_present(pte)) {
swp_entry_t entry;
/*
* KSM's break_ksm() relies upon recognizing a ksm page
* even while it is being migrated, so for that case we
* need migration_entry_wait().
*/
//没有定义FOLL_MIGRATION,这个页面没有在页面合并过程中出现,返回错误
if (likely(!(flags & FOLL_MIGRATION)))
goto no_page;
//pte为空,返回错误
if (pte_none(pte))
goto no_page;
//如果PTE是正在合并的swap页面,调用is_migration_entry等待这个页面合并完成再尝试
entry = pte_to_swp_entry(pte);
if (!is_migration_entry(entry))
goto no_page;
pte_unmap_unlock(ptep, ptl);
migration_entry_wait(mm, pmd, address);
goto retry;
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
//如果分配掩码支持可写,但是pte只读,返回NULL,解锁
if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
pte_unmap_unlock(ptep, ptl);
return NULL;
}
/*重中之重,vm_normal_page根据PTE返回普通映射页面的page结构*/
page = vm_normal_page(vma, address, pte);
/*处理设备映射页面的情况*/
if (!page && pte_devmap(pte) && (flags & FOLL_GET)) {
/*
* Only return device mapping pages in the FOLL_GET case since
* they are only valid while holding the pgmap reference.
*/
pgmap = get_dev_pagemap(pte_pfn(pte), NULL);
if (pgmap)
page = pte_page(pte);
else
goto no_page;
/*处理没有返回有效页面的情况*/
} else if (unlikely(!page)) {
if (flags & FOLL_DUMP) {
/* Avoid special (like zero) pages in core dumps */
page = ERR_PTR(-EFAULT);
goto out;
}
if (is_zero_pfn(pte_pfn(pte))) {
page = pte_page(pte);
} else {
int ret;
ret = follow_pfn_pte(vma, address, ptep, flags);
page = ERR_PTR(ret);
goto out;
}
}
if (flags & FOLL_SPLIT && PageTransCompound(page)) {
int ret;
get_page(page);
pte_unmap_unlock(ptep, ptl);
lock_page(page);
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
if (ret)
return ERR_PTR(ret);
goto retry;
}
/*get_page会增加页面的refcount*/
if (flags & FOLL_GET) {
if (unlikely(!try_get_page(page))) {
page = ERR_PTR(-ENOMEM);
goto out;
}
/* drop the pgmap reference now that we hold the page */
if (pgmap) {
put_dev_pagemap(pgmap);
pgmap = NULL;
}
}
/*调用mark_page_accessed函数设置页面是活跃的*/
if (flags & FOLL_TOUCH) {
if ((flags & FOLL_WRITE) &&
!pte_dirty(pte) && !PageDirty(page))
set_page_dirty(page);
/*
* pte_mkyoung() would be more correct here, but atomic care
* is needed to avoid losing the dirty bit: it is easier to use
* mark_page_accessed().
*/
mark_page_accessed(page);
}
if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
/* Do not mlock pte-mapped THP */
if (PageTransCompound(page))
goto out;
/*
* The preliminary mapping check is mainly to avoid the
* pointless overhead of lock_page on the ZERO_PAGE
* which might bounce very badly if there is contention.
*
* If the page is already locked, we don't need to
* handle it now - vmscan will handle it later if and
* when it attempts to reclaim the page.
*/
if (page->mapping && trylock_page(page)) {
lru_add_drain(); /* push cached pages to LRU */
/*
* Because we lock page here, and migration is
* blocked by the pte's page reference, and we
* know the page is still mapped, we don't even
* need to check for file-cache page truncation.
*/
mlock_vma_page(page);
unlock_page(page);
}
}
out:
pte_unmap_unlock(ptep, ptl);
return page;
no_page:
pte_unmap_unlock(ptep, ptl);
if (!pte_none(pte))
return NULL;
return no_page_table(vma, flags);
}
vm_normal_page()根据PTE返回普通映射页面的page结构
主要目的是过滤掉特殊映射的页面
换地方了,在mm/memory.c中
有点难,看不太懂
struct page *__vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte, bool with_public_device,
unsigned long vma_flags)
{
unsigned long pfn = pte_pfn(pte);
if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
if (likely(!pte_special(pte)))
goto check_pfn;
if (vma->vm_ops && vma->vm_ops->find_special_page)
return vma->vm_ops->find_special_page(vma, addr);
if (vma_flags & (VM_PFNMAP | VM_MIXEDMAP))
return NULL;
if (is_zero_pfn(pfn))
return NULL;
/*
* Device public pages are special pages (they are ZONE_DEVICE
* pages but different from persistent memory). They behave
* allmost like normal pages. The difference is that they are
* not on the lru and thus should never be involve with any-
* thing that involve lru manipulation (mlock, numa balancing,
* ...).
*
* This is why we still want to return NULL for such page from
* vm_normal_page() so that we do not have to special case all
* call site of vm_normal_page().
*/
if (likely(pfn <= highest_memmap_pfn)) {
struct page *page = pfn_to_page(pfn);
if (is_device_public_page(page)) {
if (with_public_device)
return page;
return NULL;
}
}
if (pte_devmap(pte))
return NULL;
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
/* !CONFIG_ARCH_HAS_PTE_SPECIAL case follows: */
/*
* This part should never get called when CONFIG_SPECULATIVE_PAGE_FAULT
* is set. This is mainly because we can't rely on vm_start.
*/
if (unlikely(vma_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma_flags & VM_MIXEDMAP) {
if (!pfn_valid(pfn))
return NULL;
goto out;
} else {
unsigned long off;
off = (addr - vma->vm_start) >> PAGE_SHIFT;
if (pfn == vma->vm_pgoff + off)
return NULL;
if (!is_cow_mapping(vma_flags))
return NULL;
}
}
if (is_zero_pfn(pfn))
return NULL;
check_pfn:
if (unlikely(pfn > highest_memmap_pfn)) {
print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
/*
* NOTE! We still have PageReserved() pages in the page tables.
* eg. VDSO mappings can cause them to exist.
*/
out:
return pfn_to_page(pfn);
}