diff options
| -rw-r--r-- | Documentation/filesystems/proc.rst | 5 | ||||
| -rw-r--r-- | fs/proc/array.c | 2 | ||||
| -rw-r--r-- | include/linux/huge_mm.h | 20 | ||||
| -rw-r--r-- | include/linux/mm_types.h | 13 | ||||
| -rw-r--r-- | include/uapi/linux/prctl.h | 10 | ||||
| -rw-r--r-- | kernel/sys.c | 59 | ||||
| -rw-r--r-- | mm/khugepaged.c | 2 |
7 files changed, 82 insertions, 29 deletions
diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2971551b7235..915a3e44bc12 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -291,8 +291,9 @@ It's slow but very precise. HugetlbPages size of hugetlb memory portions CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) - THP_enabled process is allowed to use THP (returns 0 when - PR_SET_THP_DISABLE is set on the process + THP_enabled process is allowed to use THP (returns 0 when + PR_SET_THP_DISABLE is set on the process to disable + THP completely, not just partially) Threads number of threads SigQ number of signals queued/max. number for queue SigPnd bitmap of pending signals for the thread diff --git a/fs/proc/array.c b/fs/proc/array.c index c286dc12325e..d84b291dd1ed 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -422,7 +422,7 @@ static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm) bool thp_enabled = IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE); if (thp_enabled) - thp_enabled = !mm_flags_test(MMF_DISABLE_THP, mm); + thp_enabled = !mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm); seq_printf(m, "THP_enabled:\t%d\n", thp_enabled); } diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 84b7eebe0d68..22b8b067b295 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -318,16 +318,26 @@ struct thpsize { (transparent_hugepage_flags & \ (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG)) +/* + * Check whether THPs are explicitly disabled for this VMA, for example, + * through madvise or prctl. + */ static inline bool vma_thp_disabled(struct vm_area_struct *vma, vm_flags_t vm_flags) { + /* Are THPs disabled for this VMA? */ + if (vm_flags & VM_NOHUGEPAGE) + return true; + /* Are THPs disabled for all VMAs in the whole process? */ + if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, vma->vm_mm)) + return true; /* - * Explicitly disabled through madvise or prctl, or some - * architectures may disable THP for some mappings, for - * example, s390 kvm. + * Are THPs disabled only for VMAs where we didn't get an explicit + * advise to use them? */ - return (vm_flags & VM_NOHUGEPAGE) || - mm_flags_test(MMF_DISABLE_THP, vma->vm_mm); + if (vm_flags & VM_HUGEPAGE) + return false; + return mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, vma->vm_mm); } static inline bool thp_disabled_by_hw(void) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 05475b5fd516..d247da2fdb52 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1792,19 +1792,16 @@ enum { #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when mm is available for khugepaged */ -/* - * This one-shot flag is dropped due to necessity of changing exe once again - * on NFS restore - */ -//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +#define MMF_HUGE_ZERO_FOLIO 18 /* mm has ever used the global huge zero folio */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ #define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */ #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ -#define MMF_HUGE_ZERO_FOLIO 23 /* mm has ever used the global huge zero folio */ -#define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ -#define MMF_DISABLE_THP_MASK BIT(MMF_DISABLE_THP) +#define MMF_DISABLE_THP_EXCEPT_ADVISED 23 /* no THP except when advised (e.g., VM_HUGEPAGE) */ +#define MMF_DISABLE_THP_COMPLETELY 24 /* no THP for all VMAs */ +#define MMF_DISABLE_THP_MASK (BIT(MMF_DISABLE_THP_COMPLETELY) | \ + BIT(MMF_DISABLE_THP_EXCEPT_ADVISED)) #define MMF_OOM_REAP_QUEUED 25 /* mm was queued for oom_reaper */ #define MMF_MULTIPROCESS 26 /* mm is shared between processes */ /* diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index ed3aed264aeb..150b6deebfb1 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -177,7 +177,17 @@ struct prctl_mm_map { #define PR_GET_TID_ADDRESS 40 +/* + * Flags for PR_SET_THP_DISABLE are only applicable when disabling. Bit 0 + * is reserved, so PR_GET_THP_DISABLE can return "1 | flags", to effectively + * return "1" when no flags were specified for PR_SET_THP_DISABLE. + */ #define PR_SET_THP_DISABLE 41 +/* + * Don't disable THPs when explicitly advised (e.g., MADV_HUGEPAGE / + * VM_HUGEPAGE). + */ +# define PR_THP_DISABLE_EXCEPT_ADVISED (1 << 1) #define PR_GET_THP_DISABLE 42 /* diff --git a/kernel/sys.c b/kernel/sys.c index 605f7fe9a143..a46d9b75880b 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2452,6 +2452,51 @@ static int prctl_get_auxv(void __user *addr, unsigned long len) return sizeof(mm->saved_auxv); } +static int prctl_get_thp_disable(unsigned long arg2, unsigned long arg3, + unsigned long arg4, unsigned long arg5) +{ + struct mm_struct *mm = current->mm; + + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + + /* If disabled, we return "1 | flags", otherwise 0. */ + if (mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm)) + return 1; + else if (mm_flags_test(MMF_DISABLE_THP_EXCEPT_ADVISED, mm)) + return 1 | PR_THP_DISABLE_EXCEPT_ADVISED; + return 0; +} + +static int prctl_set_thp_disable(bool thp_disable, unsigned long flags, + unsigned long arg4, unsigned long arg5) +{ + struct mm_struct *mm = current->mm; + + if (arg4 || arg5) + return -EINVAL; + + /* Flags are only allowed when disabling. */ + if ((!thp_disable && flags) || (flags & ~PR_THP_DISABLE_EXCEPT_ADVISED)) + return -EINVAL; + if (mmap_write_lock_killable(current->mm)) + return -EINTR; + if (thp_disable) { + if (flags & PR_THP_DISABLE_EXCEPT_ADVISED) { + mm_flags_clear(MMF_DISABLE_THP_COMPLETELY, mm); + mm_flags_set(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); + } else { + mm_flags_set(MMF_DISABLE_THP_COMPLETELY, mm); + mm_flags_clear(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); + } + } else { + mm_flags_clear(MMF_DISABLE_THP_COMPLETELY, mm); + mm_flags_clear(MMF_DISABLE_THP_EXCEPT_ADVISED, mm); + } + mmap_write_unlock(current->mm); + return 0; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2625,20 +2670,10 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; return task_no_new_privs(current) ? 1 : 0; case PR_GET_THP_DISABLE: - if (arg2 || arg3 || arg4 || arg5) - return -EINVAL; - error = !!mm_flags_test(MMF_DISABLE_THP, me->mm); + error = prctl_get_thp_disable(arg2, arg3, arg4, arg5); break; case PR_SET_THP_DISABLE: - if (arg3 || arg4 || arg5) - return -EINVAL; - if (mmap_write_lock_killable(me->mm)) - return -EINTR; - if (arg2) - mm_flags_set(MMF_DISABLE_THP, me->mm); - else - mm_flags_clear(MMF_DISABLE_THP, me->mm); - mmap_write_unlock(me->mm); + error = prctl_set_thp_disable(arg2, arg3, arg4, arg5); break; case PR_MPX_ENABLE_MANAGEMENT: case PR_MPX_DISABLE_MANAGEMENT: diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 550eb00116c5..1a416b865997 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -410,7 +410,7 @@ static inline int hpage_collapse_test_exit(struct mm_struct *mm) static inline int hpage_collapse_test_exit_or_disable(struct mm_struct *mm) { return hpage_collapse_test_exit(mm) || - mm_flags_test(MMF_DISABLE_THP, mm); + mm_flags_test(MMF_DISABLE_THP_COMPLETELY, mm); } static bool hugepage_pmd_enabled(void) |
