From ecc7e37d4dadd16f6be125ca496feccd05454da4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:20 +0100 Subject: x86/io: Speedup schedule out of I/O bitmap user There is no requirement to update the TSS I/O bitmap when a thread using it is scheduled out and the incoming thread does not use it. For the permission check based on the TSS I/O bitmap the CPU calculates the memory location of the I/O bitmap by the address of the TSS and the io_bitmap_base member of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the offset to an address outside of the TSS limit. If an I/O instruction is issued from user space the TSS limit causes #GP to be raised in the same was as valid I/O bitmap with all bits set to 1 would do. This removes the extra work when an I/O bitmap using task is scheduled out and puts the burden on the rare I/O bitmap users when they are scheduled in. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6e0a3b43d027..6d0059c21969 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -330,8 +330,23 @@ struct x86_hw_tss { #define IO_BITMAP_BITS 65536 #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss)) -#define INVALID_IO_BITMAP_OFFSET 0x8000 + +#define IO_BITMAP_OFFSET_VALID \ + (offsetof(struct tss_struct, io_bitmap) - \ + offsetof(struct tss_struct, x86_tss)) + +/* + * sizeof(unsigned long) coming from an extra "long" at the end + * of the iobitmap. + * + * -1? seg base+limit should be pointing to the address of the + * last valid byte + */ +#define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) + +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) struct entry_stack { unsigned long words[64]; @@ -349,6 +364,15 @@ struct tss_struct { */ struct x86_hw_tss x86_tss; + /* + * Store the dirty size of the last io bitmap offender. The next + * one will have to do the cleanup as the switch out to a non io + * bitmap user will just set x86_tss.io_bitmap_base to a value + * outside of the TSS limit. So for sane tasks there is no need to + * actually touch the io_bitmap at all. + */ + unsigned int io_bitmap_prev_max; + /* * The extra 1 is there because the CPU will access an * additional byte beyond the end of the IO permission @@ -360,16 +384,6 @@ struct tss_struct { DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); -/* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ -#define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) - /* Per CPU interrupt stacks */ struct irq_stack { char stack[IRQ_STACK_SIZE]; -- cgit v1.2.3 From f5848e5fd2f813c3a8009a642dfbcf635287c199 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Nov 2019 18:45:29 +0100 Subject: x86/tss: Move I/O bitmap data into a seperate struct Move the non hardware portion of I/O bitmap data into a seperate struct for readability sake. Originally-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/processor.h | 35 +++++++++++++++++++++-------------- arch/x86/kernel/cpu/common.c | 4 ++-- arch/x86/kernel/ioport.c | 4 ++-- arch/x86/kernel/process.c | 6 +++--- 4 files changed, 28 insertions(+), 21 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6d0059c21969..cd7cd7d10b81 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -328,11 +328,11 @@ struct x86_hw_tss { * IO-bitmap sizes: */ #define IO_BITMAP_BITS 65536 -#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8) -#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long)) +#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) +#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) -#define IO_BITMAP_OFFSET_VALID \ - (offsetof(struct tss_struct, io_bitmap) - \ +#define IO_BITMAP_OFFSET_VALID \ + (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) /* @@ -356,14 +356,10 @@ struct entry_stack_page { struct entry_stack stack; } __aligned(PAGE_SIZE); -struct tss_struct { - /* - * The fixed hardware portion. This must not cross a page boundary - * at risk of violating the SDM's advice and potentially triggering - * errata. - */ - struct x86_hw_tss x86_tss; - +/* + * All IO bitmap related data stored in the TSS: + */ +struct x86_io_bitmap { /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io @@ -371,7 +367,7 @@ struct tss_struct { * outside of the TSS limit. So for sane tasks there is no need to * actually touch the io_bitmap at all. */ - unsigned int io_bitmap_prev_max; + unsigned int prev_max; /* * The extra 1 is there because the CPU will access an @@ -379,7 +375,18 @@ struct tss_struct { * bitmap. The extra byte must be all 1 bits, and must * be within the limit. */ - unsigned long io_bitmap[IO_BITMAP_LONGS + 1]; + unsigned long bitmap[IO_BITMAP_LONGS + 1]; +}; + +struct tss_struct { + /* + * The fixed hardware portion. This must not cross a page boundary + * at risk of violating the SDM's advice and potentially triggering + * errata. + */ + struct x86_hw_tss x86_tss; + + struct x86_io_bitmap io_bitmap; } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8c1000a57b55..3aee167246f7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1861,8 +1861,8 @@ void cpu_init(void) /* Initialize the TSS. */ tss_setup_ist(tss); tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; - tss->io_bitmap_prev_max = 0; - memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap)); + tss->io_bitmap.prev_max = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index eed218a3fd48..80d99bb826f3 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -81,9 +81,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) /* Update the TSS */ tss = this_cpu_ptr(&cpu_tss_rw); - memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated); + memcpy(tss->io_bitmap.bitmap, t->io_bitmap_ptr, bytes_updated); /* Store the new end of the zero bits */ - tss->io_bitmap_prev_max = bytes; + tss->io_bitmap.prev_max = bytes; /* Make the bitmap base in the TSS valid */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* Make sure the TSS limit covers the I/O bitmap. */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 2444fe296de5..35f1c80df7a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -374,11 +374,11 @@ static inline void switch_to_bitmap(struct thread_struct *next, * bits permitted, then the copy needs to cover those as * well so they get turned off. */ - memcpy(tss->io_bitmap, next->io_bitmap_ptr, - max(tss->io_bitmap_prev_max, next->io_bitmap_max)); + memcpy(tss->io_bitmap.bitmap, next->io_bitmap_ptr, + max(tss->io_bitmap.prev_max, next->io_bitmap_max)); /* Store the new max and set io_bitmap_base valid */ - tss->io_bitmap_prev_max = next->io_bitmap_max; + tss->io_bitmap.prev_max = next->io_bitmap_max; tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* -- cgit v1.2.3 From 577d5cd7e5851d3832066cd0422475fa7db2ee17 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:21 +0100 Subject: x86/ioperm: Move iobitmap data into a struct No point in having all the data in thread_struct, especially as upcoming changes add more. Make the bitmap in the new struct accessible as array of longs and as array of characters via a union, so both the bitmap functions and the update logic can avoid type casts. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_bitmap.h | 13 +++++++++++++ arch/x86/include/asm/processor.h | 6 ++---- arch/x86/kernel/ioport.c | 27 ++++++++++++++------------- arch/x86/kernel/process.c | 38 ++++++++++++++++++++------------------ arch/x86/kernel/ptrace.c | 12 ++++++++---- 5 files changed, 57 insertions(+), 39 deletions(-) create mode 100644 arch/x86/include/asm/io_bitmap.h (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h new file mode 100644 index 000000000000..1a12b9ff5e4e --- /dev/null +++ b/arch/x86/include/asm/io_bitmap.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_IOBITMAP_H +#define _ASM_X86_IOBITMAP_H + +#include + +struct io_bitmap { + /* The maximum number of bytes to copy so all zero bits are covered */ + unsigned int max; + unsigned long bitmap[IO_BITMAP_LONGS]; +}; + +#endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index cd7cd7d10b81..c949e0e5cbe6 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -7,6 +7,7 @@ /* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; +struct io_bitmap; struct vm86; #include @@ -501,10 +502,8 @@ struct thread_struct { struct vm86 *vm86; #endif /* IO permissions: */ - unsigned long *io_bitmap_ptr; + struct io_bitmap *io_bitmap; unsigned long iopl; - /* Max allowed port in the bitmap, in bytes: */ - unsigned io_bitmap_max; mm_segment_t addr_limit; @@ -862,7 +861,6 @@ static inline void spin_lock_prefetch(const void *x) #define INIT_THREAD { \ .sp0 = TOP_OF_INIT_STACK, \ .sysenter_cs = __KERNEL_CS, \ - .io_bitmap_ptr = NULL, \ .addr_limit = KERNEL_DS, \ } diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 80d99bb826f3..05f77f3af46d 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -11,6 +11,7 @@ #include #include +#include #include /* @@ -21,7 +22,7 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) unsigned int i, max_long, bytes, bytes_updated; struct thread_struct *t = ¤t->thread; struct tss_struct *tss; - unsigned long *bitmap; + struct io_bitmap *iobm; if ((from + num <= from) || (from + num > IO_BITMAP_BITS)) return -EINVAL; @@ -34,16 +35,16 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) * IO bitmap up. ioperm() is much less timing critical than clone(), * this is why we delay this operation until now: */ - bitmap = t->io_bitmap_ptr; - if (!bitmap) { + iobm = t->io_bitmap; + if (!iobm) { /* No point to allocate a bitmap just to clear permissions */ if (!turn_on) return 0; - bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); - if (!bitmap) + iobm = kmalloc(sizeof(*iobm), GFP_KERNEL); + if (!iobm) return -ENOMEM; - memset(bitmap, 0xff, IO_BITMAP_BYTES); + memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap)); } /* @@ -52,9 +53,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) */ preempt_disable(); if (turn_on) - bitmap_clear(bitmap, from, num); + bitmap_clear(iobm->bitmap, from, num); else - bitmap_set(bitmap, from, num); + bitmap_set(iobm->bitmap, from, num); /* * Search for a (possibly new) maximum. This is simple and stupid, @@ -62,26 +63,26 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) */ max_long = 0; for (i = 0; i < IO_BITMAP_LONGS; i++) { - if (bitmap[i] != ~0UL) + if (iobm->bitmap[i] != ~0UL) max_long = i; } bytes = (max_long + 1) * sizeof(unsigned long); - bytes_updated = max(bytes, t->io_bitmap_max); + bytes_updated = max(bytes, t->io_bitmap->max); /* Update the thread data */ - t->io_bitmap_max = bytes; + iobm->max = bytes; /* * Store the bitmap pointer (might be the same if the task already * head one). Set the TIF flag, just in case this is the first * invocation. */ - t->io_bitmap_ptr = bitmap; + t->io_bitmap = iobm; set_thread_flag(TIF_IO_BITMAP); /* Update the TSS */ tss = this_cpu_ptr(&cpu_tss_rw); - memcpy(tss->io_bitmap.bitmap, t->io_bitmap_ptr, bytes_updated); + memcpy(tss->io_bitmap.bitmap, iobm->bitmap, bytes_updated); /* Store the new end of the zero bits */ tss->io_bitmap.prev_max = bytes; /* Make the bitmap base in the TSS valid */ diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 35f1c80df7a8..1504fd2b9bc4 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include "process.h" @@ -101,21 +102,20 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) void exit_thread(struct task_struct *tsk) { struct thread_struct *t = &tsk->thread; - unsigned long *bp = t->io_bitmap_ptr; + struct io_bitmap *iobm = t->io_bitmap; struct fpu *fpu = &t->fpu; struct tss_struct *tss; - if (bp) { + if (iobm) { preempt_disable(); tss = this_cpu_ptr(&cpu_tss_rw); - t->io_bitmap_ptr = NULL; - t->io_bitmap_max = 0; + t->io_bitmap = NULL; clear_thread_flag(TIF_IO_BITMAP); /* Invalidate the io bitmap base in the TSS */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; preempt_enable(); - kfree(bp); + kfree(iobm); } free_vm86(t); @@ -135,25 +135,25 @@ static int set_new_tls(struct task_struct *p, unsigned long tls) static inline int copy_io_bitmap(struct task_struct *tsk) { + struct io_bitmap *iobm = current->thread.io_bitmap; + if (likely(!test_tsk_thread_flag(current, TIF_IO_BITMAP))) return 0; - tsk->thread.io_bitmap_ptr = kmemdup(current->thread.io_bitmap_ptr, - IO_BITMAP_BYTES, GFP_KERNEL); - if (!tsk->thread.io_bitmap_ptr) { - tsk->thread.io_bitmap_max = 0; + tsk->thread.io_bitmap = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL); + + if (!tsk->thread.io_bitmap) return -ENOMEM; - } + set_tsk_thread_flag(tsk, TIF_IO_BITMAP); return 0; } static inline void free_io_bitmap(struct task_struct *tsk) { - if (tsk->thread.io_bitmap_ptr) { - kfree(tsk->thread.io_bitmap_ptr); - tsk->thread.io_bitmap_ptr = NULL; - tsk->thread.io_bitmap_max = 0; + if (tsk->thread.io_bitmap) { + kfree(tsk->thread.io_bitmap); + tsk->thread.io_bitmap = NULL; } } @@ -172,7 +172,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp, frame->bp = 0; frame->ret_addr = (unsigned long) ret_from_fork; p->thread.sp = (unsigned long) fork_frame; - p->thread.io_bitmap_ptr = NULL; + p->thread.io_bitmap = NULL; memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps)); #ifdef CONFIG_X86_64 @@ -366,6 +366,8 @@ static inline void switch_to_bitmap(struct thread_struct *next, struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); if (tifn & _TIF_IO_BITMAP) { + struct io_bitmap *iobm = next->io_bitmap; + /* * Copy at least the size of the incoming tasks bitmap * which covers the last permitted I/O port. @@ -374,11 +376,11 @@ static inline void switch_to_bitmap(struct thread_struct *next, * bits permitted, then the copy needs to cover those as * well so they get turned off. */ - memcpy(tss->io_bitmap.bitmap, next->io_bitmap_ptr, - max(tss->io_bitmap.prev_max, next->io_bitmap_max)); + memcpy(tss->io_bitmap.bitmap, next->io_bitmap->bitmap, + max(tss->io_bitmap.prev_max, next->io_bitmap->max)); /* Store the new max and set io_bitmap_base valid */ - tss->io_bitmap.prev_max = next->io_bitmap_max; + tss->io_bitmap.prev_max = next->io_bitmap->max; tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7c526741215b..066e5b01a7e0 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -42,6 +42,7 @@ #include #include #include +#include #include "tls.h" @@ -697,7 +698,9 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n, static int ioperm_active(struct task_struct *target, const struct user_regset *regset) { - return DIV_ROUND_UP(target->thread.io_bitmap_max, regset->size); + struct io_bitmap *iobm = target->thread.io_bitmap; + + return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0; } static int ioperm_get(struct task_struct *target, @@ -705,12 +708,13 @@ static int ioperm_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - if (!target->thread.io_bitmap_ptr) + struct io_bitmap *iobm = target->thread.io_bitmap; + + if (!iobm) return -ENXIO; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - target->thread.io_bitmap_ptr, - 0, IO_BITMAP_BYTES); + iobm->bitmap, 0, IO_BITMAP_BYTES); } /* -- cgit v1.2.3 From 060aa16fdb7c5078a4159a76e5dc87d6a493af9b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:22 +0100 Subject: x86/ioperm: Add bitmap sequence number Add a globally unique sequence number which is incremented when ioperm() is changing the I/O bitmap of a task. Store the new sequence number in the io_bitmap structure and compare it with the sequence number of the I/O bitmap which was last loaded on a CPU. Only update the bitmap if the sequence is different. That should further reduce the overhead of I/O bitmap scheduling when there are only a few I/O bitmap users on the system. The 64bit sequence counter is sufficient. A wraparound of the sequence counter assuming an ioperm() call every nanosecond would require about 584 years of uptime. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/io_bitmap.h | 1 + arch/x86/include/asm/processor.h | 3 +++ arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/ioport.c | 5 +++++ arch/x86/kernel/process.c | 38 ++++++++++++++++++++++++++++---------- 5 files changed, 38 insertions(+), 10 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index 1a12b9ff5e4e..d63bd5afe671 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -5,6 +5,7 @@ #include struct io_bitmap { + u64 sequence; /* The maximum number of bytes to copy so all zero bits are covered */ unsigned int max; unsigned long bitmap[IO_BITMAP_LONGS]; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c949e0e5cbe6..40bb0f7bca3f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -361,6 +361,9 @@ struct entry_stack_page { * All IO bitmap related data stored in the TSS: */ struct x86_io_bitmap { + /* The sequence number of the last active bitmap. */ + u64 prev_sequence; + /* * Store the dirty size of the last io bitmap offender. The next * one will have to do the cleanup as the switch out to a non io diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 3aee167246f7..79dd544bb974 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1862,6 +1862,7 @@ void cpu_init(void) tss_setup_ist(tss); tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 05f77f3af46d..7c1ab5c2b395 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,8 @@ #include #include +static atomic64_t io_bitmap_sequence; + /* * this changes the io permissions bitmap in the current task. */ @@ -72,6 +74,9 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) /* Update the thread data */ iobm->max = bytes; + /* Update the sequence number to force an update in switch_to() */ + iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence); + /* * Store the bitmap pointer (might be the same if the task already * head one). Set the TIF flag, just in case this is the first diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1504fd2b9bc4..7c49be90468b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -360,6 +360,28 @@ void arch_setup_new_exec(void) } } +static void switch_to_update_io_bitmap(struct tss_struct *tss, + struct io_bitmap *iobm) +{ + /* + * Copy at least the byte range of the incoming tasks bitmap which + * covers the permitted I/O ports. + * + * If the previous task which used an I/O bitmap had more bits + * permitted, then the copy needs to cover those as well so they + * get turned off. + */ + memcpy(tss->io_bitmap.bitmap, iobm->bitmap, + max(tss->io_bitmap.prev_max, iobm->max)); + + /* + * Store the new max and the sequence number of this bitmap + * and a pointer to the bitmap itself. + */ + tss->io_bitmap.prev_max = iobm->max; + tss->io_bitmap.prev_sequence = iobm->sequence; +} + static inline void switch_to_bitmap(struct thread_struct *next, unsigned long tifp, unsigned long tifn) { @@ -369,18 +391,14 @@ static inline void switch_to_bitmap(struct thread_struct *next, struct io_bitmap *iobm = next->io_bitmap; /* - * Copy at least the size of the incoming tasks bitmap - * which covers the last permitted I/O port. - * - * If the previous task which used an io bitmap had more - * bits permitted, then the copy needs to cover those as - * well so they get turned off. + * Only copy bitmap data when the sequence number + * differs. The update time is accounted to the incoming + * task. */ - memcpy(tss->io_bitmap.bitmap, next->io_bitmap->bitmap, - max(tss->io_bitmap.prev_max, next->io_bitmap->max)); + if (tss->io_bitmap.prev_sequence != iobm->sequence) + switch_to_update_io_bitmap(tss, iobm); - /* Store the new max and set io_bitmap_base valid */ - tss->io_bitmap.prev_max = next->io_bitmap->max; + /* Enable the bitmap */ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; /* -- cgit v1.2.3 From c8137ace56383688af911fea5934c71ad158135e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:28 +0100 Subject: x86/iopl: Restrict iopl() permission scope The access to the full I/O port range can be also provided by the TSS I/O bitmap, but that would require to copy 8k of data on scheduling in the task. As shown with the sched out optimization TSS.io_bitmap_base can be used to switch the incoming task to a preallocated I/O bitmap which has all bits zero, i.e. allows access to all I/O ports. Implementing this allows to provide an iopl() emulation mode which restricts the IOPL level 3 permissions to I/O port access but removes the STI/CLI permission which is coming with the hardware IOPL mechansim. Provide a config option to switch IOPL to emulation mode, make it the default and while at it also provide an option to disable IOPL completely. Signed-off-by: Thomas Gleixner Acked-by: Andy Lutomirski --- arch/x86/Kconfig | 32 ++++++++++++ arch/x86/include/asm/pgtable_32_types.h | 2 +- arch/x86/include/asm/processor.h | 28 ++++++++--- arch/x86/kernel/cpu/common.c | 5 ++ arch/x86/kernel/ioport.c | 87 +++++++++++++++++++++++---------- arch/x86/kernel/process.c | 32 +++++++----- 6 files changed, 139 insertions(+), 47 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d6e1faa28c58..2aad1cd14cc5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,6 +1254,38 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. +choice + prompt "IOPL" + default X86_IOPL_EMULATION + +config X86_IOPL_EMULATION + bool "IOPL Emulation" + ---help--- + Legacy IOPL support is an overbroad mechanism which allows user + space aside of accessing all 65536 I/O ports also to disable + interrupts. To gain this access the caller needs CAP_SYS_RAWIO + capabilities and permission from potentially active security + modules. + + The emulation restricts the functionality of the syscall to + only allowing the full range I/O port access, but prevents the + ability to disable interrupts from user space. + +config X86_IOPL_LEGACY + bool "IOPL Legacy" + ---help--- + Allow the full IOPL permissions, i.e. user space access to all + 65536 I/O ports and also the ability to disable interrupts, which + is overbroad and can result in system lockups. + +config X86_IOPL_NONE + bool "IOPL None" + ---help--- + Disable the IOPL permission syscall. That's the safest option as + no sane application should depend on this functionality. + +endchoice + config TOSHIBA tristate "Toshiba Laptop support" depends on X86_32 diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index b0bc0fff5f1f..0fab4bfb4df2 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -44,7 +44,7 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ * Define this here and validate with BUILD_BUG_ON() in pgtable_32.c * to avoid include recursion hell */ -#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40) +#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41) #define CPU_ENTRY_AREA_BASE \ ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 40bb0f7bca3f..b0e02aa3f46a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -332,19 +332,21 @@ struct x86_hw_tss { #define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE) #define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long)) -#define IO_BITMAP_OFFSET_VALID \ +#define IO_BITMAP_OFFSET_VALID_MAP \ (offsetof(struct tss_struct, io_bitmap.bitmap) - \ offsetof(struct tss_struct, x86_tss)) +#define IO_BITMAP_OFFSET_VALID_ALL \ + (offsetof(struct tss_struct, io_bitmap.mapall) - \ + offsetof(struct tss_struct, x86_tss)) + /* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte + * sizeof(unsigned long) coming from an extra "long" at the end of the + * iobitmap. The limit is inclusive, i.e. the last valid byte. */ #define __KERNEL_TSS_LIMIT \ - (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) + (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ + sizeof(unsigned long) - 1) /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) @@ -380,6 +382,12 @@ struct x86_io_bitmap { * be within the limit. */ unsigned long bitmap[IO_BITMAP_LONGS + 1]; + + /* + * Special I/O bitmap to emulate IOPL(3). All bytes zero, + * except the additional byte at the end. + */ + unsigned long mapall[IO_BITMAP_LONGS + 1]; }; struct tss_struct { @@ -506,7 +514,13 @@ struct thread_struct { #endif /* IO permissions: */ struct io_bitmap *io_bitmap; + + /* + * IOPL. Priviledge level dependent I/O permission which includes + * user space CLI/STI when granted. + */ unsigned long iopl; + unsigned long iopl_emul; mm_segment_t addr_limit; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 79dd544bb974..7bf402be13bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1864,6 +1864,11 @@ void cpu_init(void) tss->io_bitmap.prev_max = 0; tss->io_bitmap.prev_sequence = 0; memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 3548563b0935..9ed9458e02df 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -17,25 +17,41 @@ static atomic64_t io_bitmap_sequence; void io_bitmap_share(struct task_struct *tsk) - { - /* - * Take a refcount on current's bitmap. It can be used by - * both tasks as long as none of them changes the bitmap. - */ - refcount_inc(¤t->thread.io_bitmap->refcnt); - tsk->thread.io_bitmap = current->thread.io_bitmap; +{ + /* Can be NULL when current->thread.iopl_emul == 3 */ + if (current->thread.io_bitmap) { + /* + * Take a refcount on current's bitmap. It can be used by + * both tasks as long as none of them changes the bitmap. + */ + refcount_inc(¤t->thread.io_bitmap->refcnt); + tsk->thread.io_bitmap = current->thread.io_bitmap; + } set_tsk_thread_flag(tsk, TIF_IO_BITMAP); } +static void task_update_io_bitmap(void) +{ + struct thread_struct *t = ¤t->thread; + + if (t->iopl_emul == 3 || t->io_bitmap) { + /* TSS update is handled on exit to user space */ + set_thread_flag(TIF_IO_BITMAP); + } else { + clear_thread_flag(TIF_IO_BITMAP); + /* Invalidate TSS */ + preempt_disable(); + tss_update_io_bitmap(); + preempt_enable(); + } +} + void io_bitmap_exit(void) { struct io_bitmap *iobm = current->thread.io_bitmap; current->thread.io_bitmap = NULL; - clear_thread_flag(TIF_IO_BITMAP); - preempt_disable(); - tss_update_io_bitmap(); - preempt_enable(); + task_update_io_bitmap(); if (iobm && refcount_dec_and_test(&iobm->refcnt)) kfree(iobm); } @@ -157,36 +173,55 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) */ SYSCALL_DEFINE1(iopl, unsigned int, level) { - struct pt_regs *regs = current_pt_regs(); struct thread_struct *t = ¤t->thread; + struct pt_regs *regs = current_pt_regs(); + unsigned int old; /* * Careful: the IOPL bits in regs->flags are undefined under Xen PV * and changing them has no effect. */ - unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT; + if (IS_ENABLED(CONFIG_X86_IOPL_NONE)) + return -ENOSYS; if (level > 3) return -EINVAL; + + if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) + old = t->iopl_emul; + else + old = t->iopl >> X86_EFLAGS_IOPL_BIT; + + /* No point in going further if nothing changes */ + if (level == old) + return 0; + /* Trying to gain more privileges? */ if (level > old) { if (!capable(CAP_SYS_RAWIO) || security_locked_down(LOCKDOWN_IOPORT)) return -EPERM; } - /* - * Change the flags value on the return stack, which has been set - * up on system-call entry. See also the fork and signal handling - * code how this is handled. - */ - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | - (level << X86_EFLAGS_IOPL_BIT); - /* Store the new level in the thread struct */ - t->iopl = level << X86_EFLAGS_IOPL_BIT; - /* - * X86_32 switches immediately and XEN handles it via emulation. - */ - set_iopl_mask(t->iopl); + + if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) { + t->iopl_emul = level; + task_update_io_bitmap(); + } else { + /* + * Change the flags value on the return stack, which has + * been set up on system-call entry. See also the fork and + * signal handling code how this is handled. + */ + regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | + (level << X86_EFLAGS_IOPL_BIT); + /* Store the new level in the thread struct */ + t->iopl = level << X86_EFLAGS_IOPL_BIT; + /* + * X86_32 switches immediately and XEN handles it via + * emulation. + */ + set_iopl_mask(t->iopl); + } return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 0b19c137c442..8a844a5d5ae8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -376,21 +376,27 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm) void tss_update_io_bitmap(void) { struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + u16 *base = &tss->x86_tss.io_bitmap_base; if (test_thread_flag(TIF_IO_BITMAP)) { - struct io_bitmap *iobm = current->thread.io_bitmap; - - /* - * Only copy bitmap data when the sequence number - * differs. The update time is accounted to the incoming - * task. - */ - if (tss->io_bitmap.prev_sequence != iobm->sequence) - tss_copy_io_bitmap(tss, iobm); - - /* Enable the bitmap */ - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_VALID; - + struct thread_struct *t = ¤t->thread; + + if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION) && + t->iopl_emul == 3) { + *base = IO_BITMAP_OFFSET_VALID_ALL; + } else { + struct io_bitmap *iobm = t->io_bitmap; + /* + * Only copy bitmap data when the sequence number + * differs. The update time is accounted to the + * incoming task. + */ + if (tss->io_bitmap.prev_sequence != iobm->sequence) + tss_copy_io_bitmap(tss, iobm); + + /* Enable the bitmap */ + *base = IO_BITMAP_OFFSET_VALID_MAP; + } /* * Make sure that the TSS limit is covering the io bitmap. * It might have been cut down by a VMEXIT to 0x67 which -- cgit v1.2.3 From a24ca9976843156eabbc5f2d798954b5674d1b61 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 11 Nov 2019 23:03:29 +0100 Subject: x86/iopl: Remove legacy IOPL option The IOPL emulation via the I/O bitmap is sufficient. Remove the legacy cruft dealing with the (e)flags based IOPL mechanism. Signed-off-by: Thomas Gleixner Reviewed-by: Juergen Gross (Paravirt and Xen parts) Acked-by: Andy Lutomirski --- arch/x86/Kconfig | 23 +++-------------- arch/x86/include/asm/paravirt.h | 4 --- arch/x86/include/asm/paravirt_types.h | 2 -- arch/x86/include/asm/processor.h | 26 +++---------------- arch/x86/include/asm/xen/hypervisor.h | 2 -- arch/x86/kernel/ioport.c | 47 ++++++++--------------------------- arch/x86/kernel/paravirt.c | 2 -- arch/x86/kernel/process_32.c | 9 ------- arch/x86/kernel/process_64.c | 11 -------- arch/x86/xen/enlighten_pv.c | 10 -------- 10 files changed, 17 insertions(+), 119 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2aad1cd14cc5..1f926e396ec1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,12 +1254,9 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. -choice - prompt "IOPL" - default X86_IOPL_EMULATION - config X86_IOPL_EMULATION bool "IOPL Emulation" + default y ---help--- Legacy IOPL support is an overbroad mechanism which allows user space aside of accessing all 65536 I/O ports also to disable @@ -1269,22 +1266,8 @@ config X86_IOPL_EMULATION The emulation restricts the functionality of the syscall to only allowing the full range I/O port access, but prevents the - ability to disable interrupts from user space. - -config X86_IOPL_LEGACY - bool "IOPL Legacy" - ---help--- - Allow the full IOPL permissions, i.e. user space access to all - 65536 I/O ports and also the ability to disable interrupts, which - is overbroad and can result in system lockups. - -config X86_IOPL_NONE - bool "IOPL None" - ---help--- - Disable the IOPL permission syscall. That's the safest option as - no sane application should depend on this functionality. - -endchoice + ability to disable interrupts from user space which would be + granted if the hardware IOPL mechanism would be used. config TOSHIBA tristate "Toshiba Laptop support" diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 69089d46f128..86e7317eb31f 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g) { PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g); } -static inline void set_iopl_mask(unsigned mask) -{ - PVOP_VCALL1(cpu.set_iopl_mask, mask); -} static inline void paravirt_activate_mm(struct mm_struct *prev, struct mm_struct *next) diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 70b654f3ffe5..84812964d3dd 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -140,8 +140,6 @@ struct pv_cpu_ops { void (*load_sp0)(unsigned long sp0); - void (*set_iopl_mask)(unsigned mask); - void (*wbinvd)(void); /* cpuid emulation, mostly so that caps bits can be disabled */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b0e02aa3f46a..1387d31c5e07 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -516,10 +516,10 @@ struct thread_struct { struct io_bitmap *io_bitmap; /* - * IOPL. Priviledge level dependent I/O permission which includes - * user space CLI/STI when granted. + * IOPL. Priviledge level dependent I/O permission which is + * emulated via the I/O bitmap to prevent user space from disabling + * interrupts. */ - unsigned long iopl; unsigned long iopl_emul; mm_segment_t addr_limit; @@ -552,25 +552,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, */ #define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ -/* - * Set IOPL bits in EFLAGS from given mask - */ -static inline void native_set_iopl_mask(unsigned mask) -{ -#ifdef CONFIG_X86_32 - unsigned int reg; - - asm volatile ("pushfl;" - "popl %0;" - "andl %1, %0;" - "orl %2, %0;" - "pushl %0;" - "popfl" - : "=&r" (reg) - : "i" (~X86_EFLAGS_IOPL), "r" (mask)); -#endif -} - static inline void native_load_sp0(unsigned long sp0) { @@ -610,7 +591,6 @@ static inline void load_sp0(unsigned long sp0) native_load_sp0(sp0); } -#define set_iopl_mask native_set_iopl_mask #endif /* CONFIG_PARAVIRT_XXL */ /* Free all resources held by a thread. */ diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 42e1245af0d8..ff4b52e37e60 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num); void xen_arch_unregister_cpu(int num); #endif -extern void xen_set_iopl_mask(unsigned mask); - #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 9ed9458e02df..d5dcde972c42 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -153,28 +153,23 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) /* * The sys_iopl functionality depends on the level argument, which if - * granted for the task is used by the CPU to check I/O instruction and - * CLI/STI against the current priviledge level (CPL). If CPL is less than - * or equal the tasks IOPL level the instructions take effect. If not a #GP - * is raised. The default IOPL is 0, i.e. no permissions. + * granted for the task is used to enable access to all 65536 I/O ports. * - * Setting IOPL to level 0-2 is disabling the userspace access. Only level - * 3 enables it. If set it allows the user space thread: + * This does not use the IOPL mechanism provided by the CPU as that would + * also allow the user space task to use the CLI/STI instructions. * - * - Unrestricted access to all 65535 I/O ports - * - The usage of CLI/STI instructions + * Disabling interrupts in a user space task is dangerous as it might lock + * up the machine and the semantics vs. syscalls and exceptions is + * undefined. * - * The advantage over ioperm is that the context switch does not require to - * update the I/O bitmap which is especially true when a large number of - * ports is accessed. But the allowance of CLI/STI in userspace is - * considered a major problem. + * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3 + * 3 enables them. * * IOPL is strictly per thread and inherited on fork. */ SYSCALL_DEFINE1(iopl, unsigned int, level) { struct thread_struct *t = ¤t->thread; - struct pt_regs *regs = current_pt_regs(); unsigned int old; /* @@ -187,10 +182,7 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) if (level > 3) return -EINVAL; - if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) - old = t->iopl_emul; - else - old = t->iopl >> X86_EFLAGS_IOPL_BIT; + old = t->iopl_emul; /* No point in going further if nothing changes */ if (level == old) @@ -203,25 +195,8 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) return -EPERM; } - if (IS_ENABLED(CONFIG_X86_IOPL_EMULATION)) { - t->iopl_emul = level; - task_update_io_bitmap(); - } else { - /* - * Change the flags value on the return stack, which has - * been set up on system-call entry. See also the fork and - * signal handling code how this is handled. - */ - regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) | - (level << X86_EFLAGS_IOPL_BIT); - /* Store the new level in the thread struct */ - t->iopl = level << X86_EFLAGS_IOPL_BIT; - /* - * X86_32 switches immediately and XEN handles it via - * emulation. - */ - set_iopl_mask(t->iopl); - } + t->iopl_emul = level; + task_update_io_bitmap(); return 0; } diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 59d3d2763a9e..789f5e4f89de 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -341,8 +341,6 @@ struct paravirt_patch_template pv_ops = { .cpu.iret = native_iret, .cpu.swapgs = native_swapgs, - .cpu.set_iopl_mask = native_set_iopl_mask, - .cpu.start_context_switch = paravirt_nop, .cpu.end_context_switch = paravirt_nop, diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6c7d90527156..323499f48858 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -187,15 +187,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) */ load_TLS(next, cpu); - /* - * Restore IOPL if needed. In normal use, the flags restore - * in the switch assembly will handle this. But if the kernel - * is running virtualized at a non-zero CPL, the popf will - * not restore flags, so it must be done in a separate step. - */ - if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) - set_iopl_mask(next->iopl); - switch_to_extra(prev_p, next_p); /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e93a1b8fd7f9..506d66830d4d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -497,17 +497,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) switch_to_extra(prev_p, next_p); -#ifdef CONFIG_XEN_PV - /* - * On Xen PV, IOPL bits in pt_regs->flags have no effect, and - * current_pt_regs()->flags may not match the current task's - * intended IOPL. We need to switch it manually. - */ - if (unlikely(static_cpu_has(X86_FEATURE_XENPV) && - prev->iopl != next->iopl)) - xen_set_iopl_mask(next->iopl); -#endif - if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) { /* * AMD CPUs have a misfeature: SYSRET sets the SS selector but diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 5bfea374a160..ae4a41ca19f6 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -837,15 +837,6 @@ static void xen_load_sp0(unsigned long sp0) this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0); } -void xen_set_iopl_mask(unsigned mask) -{ - struct physdev_set_iopl set_iopl; - - /* Force the change at ring 0. */ - set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3; - HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); -} - static void xen_io_delay(void) { } @@ -1055,7 +1046,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = { .write_idt_entry = xen_write_idt_entry, .load_sp0 = xen_load_sp0, - .set_iopl_mask = xen_set_iopl_mask, .io_delay = xen_io_delay, /* Xen takes care of %gs when switching to usermode for us */ -- cgit v1.2.3 From 111e7b15cf10f6e973ccf537c70c66a5de539060 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 12 Nov 2019 21:40:33 +0100 Subject: x86/ioperm: Extend IOPL config to control ioperm() as well If iopl() is disabled, then providing ioperm() does not make much sense. Rename the config option and disable/enable both syscalls with it. Guard the code with #ifdefs where appropriate. Suggested-by: Andy Lutomirski Signed-off-by: Thomas Gleixner --- arch/x86/Kconfig | 7 +++++-- arch/x86/include/asm/io_bitmap.h | 6 ++++++ arch/x86/include/asm/processor.h | 9 ++++++++- arch/x86/include/asm/thread_info.h | 7 ++++++- arch/x86/kernel/cpu/common.c | 26 +++++++++++++++++--------- arch/x86/kernel/ioport.c | 26 +++++++++++++++++++------- arch/x86/kernel/process.c | 4 ++++ 7 files changed, 65 insertions(+), 20 deletions(-) (limited to 'arch/x86/include/asm/processor.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1f926e396ec1..b162ce1482fc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1254,10 +1254,13 @@ config X86_VSYSCALL_EMULATION Disabling this option saves about 7K of kernel size and possibly 4K of additional runtime pagetable memory. -config X86_IOPL_EMULATION - bool "IOPL Emulation" +config X86_IOPL_IOPERM + bool "IOPERM and IOPL Emulation" default y ---help--- + This enables the ioperm() and iopl() syscalls which are necessary + for legacy applications. + Legacy IOPL support is an overbroad mechanism which allows user space aside of accessing all 65536 I/O ports also to disable interrupts. To gain this access the caller needs CAP_SYS_RAWIO diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h index b664baadf736..02c6ef8f7667 100644 --- a/arch/x86/include/asm/io_bitmap.h +++ b/arch/x86/include/asm/io_bitmap.h @@ -15,9 +15,15 @@ struct io_bitmap { struct task_struct; +#ifdef CONFIG_X86_IOPL_IOPERM void io_bitmap_share(struct task_struct *tsk); void io_bitmap_exit(void); void tss_update_io_bitmap(void); +#else +static inline void io_bitmap_share(struct task_struct *tsk) { } +static inline void io_bitmap_exit(void) { } +static inline void tss_update_io_bitmap(void) { } +#endif #endif diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1387d31c5e07..45f416a2c1f1 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -340,13 +340,18 @@ struct x86_hw_tss { (offsetof(struct tss_struct, io_bitmap.mapall) - \ offsetof(struct tss_struct, x86_tss)) +#ifdef CONFIG_X86_IOPL_IOPERM /* * sizeof(unsigned long) coming from an extra "long" at the end of the * iobitmap. The limit is inclusive, i.e. the last valid byte. */ -#define __KERNEL_TSS_LIMIT \ +# define __KERNEL_TSS_LIMIT \ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \ sizeof(unsigned long) - 1) +#else +# define __KERNEL_TSS_LIMIT \ + (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1) +#endif /* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */ #define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1) @@ -398,7 +403,9 @@ struct tss_struct { */ struct x86_hw_tss x86_tss; +#ifdef CONFIG_X86_IOPL_IOPERM struct x86_io_bitmap io_bitmap; +#endif } __aligned(PAGE_SIZE); DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0accf44878a5..d779366ce3f8 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -156,8 +156,13 @@ struct thread_info { # define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) #endif -#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ +#ifdef CONFIG_X86_IOPL_IOPERM +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \ _TIF_IO_BITMAP) +#else +# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY) +#endif + #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) #define STACK_WARN (THREAD_SIZE/8) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7bf402be13bb..6f6ca6bd58d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1804,6 +1804,22 @@ static inline void gdt_setup_doublefault_tss(int cpu) } #endif /* !CONFIG_X86_64 */ +static inline void tss_setup_io_bitmap(struct tss_struct *tss) +{ + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; + +#ifdef CONFIG_X86_IOPL_IOPERM + tss->io_bitmap.prev_max = 0; + tss->io_bitmap.prev_sequence = 0; + memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); + /* + * Invalidate the extra array entry past the end of the all + * permission bitmap as required by the hardware. + */ + tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; +#endif +} + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -1860,15 +1876,7 @@ void cpu_init(void) /* Initialize the TSS. */ tss_setup_ist(tss); - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID; - tss->io_bitmap.prev_max = 0; - tss->io_bitmap.prev_sequence = 0; - memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap)); - /* - * Invalidate the extra array entry past the end of the all - * permission bitmap as required by the hardware. - */ - tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL; + tss_setup_io_bitmap(tss); set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss); load_TR_desc(); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index d5dcde972c42..8abeee0dd7bf 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -14,6 +14,8 @@ #include #include +#ifdef CONFIG_X86_IOPL_IOPERM + static atomic64_t io_bitmap_sequence; void io_bitmap_share(struct task_struct *tsk) @@ -172,13 +174,6 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) struct thread_struct *t = ¤t->thread; unsigned int old; - /* - * Careful: the IOPL bits in regs->flags are undefined under Xen PV - * and changing them has no effect. - */ - if (IS_ENABLED(CONFIG_X86_IOPL_NONE)) - return -ENOSYS; - if (level > 3) return -EINVAL; @@ -200,3 +195,20 @@ SYSCALL_DEFINE1(iopl, unsigned int, level) return 0; } + +#else /* CONFIG_X86_IOPL_IOPERM */ + +long ksys_ioperm(unsigned long from, unsigned long num, int turn_on) +{ + return -ENOSYS; +} +SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on) +{ + return -ENOSYS; +} + +SYSCALL_DEFINE1(iopl, unsigned int, level) +{ + return -ENOSYS; +} +#endif diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8a844a5d5ae8..7964d7db9366 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -322,6 +322,7 @@ void arch_setup_new_exec(void) } } +#ifdef CONFIG_X86_IOPL_IOPERM static inline void tss_invalidate_io_bitmap(struct tss_struct *tss) { /* @@ -409,6 +410,9 @@ void tss_update_io_bitmap(void) tss_invalidate_io_bitmap(tss); } } +#else /* CONFIG_X86_IOPL_IOPERM */ +static inline void switch_to_bitmap(unsigned long tifp) { } +#endif #ifdef CONFIG_SMP -- cgit v1.2.3