aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/fork.c24
-rw-r--r--kernel/pid.c72
-rw-r--r--kernel/pid_namespace.c2
3 files changed, 80 insertions, 18 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 954e875e72b1..417570263f1f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2087,7 +2087,8 @@ static __latent_entropy struct task_struct *copy_process(
stackleak_task_init(p);
if (pid != &init_struct_pid) {
- pid = alloc_pid(p->nsproxy->pid_ns_for_children);
+ pid = alloc_pid(p->nsproxy->pid_ns_for_children, args->set_tid,
+ args->set_tid_size);
if (IS_ERR(pid)) {
retval = PTR_ERR(pid);
goto bad_fork_cleanup_thread;
@@ -2590,6 +2591,7 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
{
int err;
struct clone_args args;
+ pid_t *kset_tid = kargs->set_tid;
if (unlikely(usize > PAGE_SIZE))
return -E2BIG;
@@ -2600,6 +2602,15 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
if (err)
return err;
+ if (unlikely(args.set_tid_size > MAX_PID_NS_LEVEL))
+ return -EINVAL;
+
+ if (unlikely(!args.set_tid && args.set_tid_size > 0))
+ return -EINVAL;
+
+ if (unlikely(args.set_tid && args.set_tid_size == 0))
+ return -EINVAL;
+
/*
* Verify that higher 32bits of exit_signal are unset and that
* it is a valid signal
@@ -2617,8 +2628,16 @@ noinline static int copy_clone_args_from_user(struct kernel_clone_args *kargs,
.stack = args.stack,
.stack_size = args.stack_size,
.tls = args.tls,
+ .set_tid_size = args.set_tid_size,
};
+ if (args.set_tid &&
+ copy_from_user(kset_tid, u64_to_user_ptr(args.set_tid),
+ (kargs->set_tid_size * sizeof(pid_t))))
+ return -EFAULT;
+
+ kargs->set_tid = kset_tid;
+
return 0;
}
@@ -2662,6 +2681,9 @@ SYSCALL_DEFINE2(clone3, struct clone_args __user *, uargs, size_t, size)
int err;
struct kernel_clone_args kargs;
+ pid_t set_tid[MAX_PID_NS_LEVEL];
+
+ kargs.set_tid = set_tid;
err = copy_clone_args_from_user(&kargs, uargs, size);
if (err)
diff --git a/kernel/pid.c b/kernel/pid.c
index 7b5f6c963d72..2278e249141d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -157,7 +157,8 @@ void free_pid(struct pid *pid)
call_rcu(&pid->rcu, delayed_put_pid);
}
-struct pid *alloc_pid(struct pid_namespace *ns)
+struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
+ size_t set_tid_size)
{
struct pid *pid;
enum pid_type type;
@@ -166,6 +167,17 @@ struct pid *alloc_pid(struct pid_namespace *ns)
struct upid *upid;
int retval = -ENOMEM;
+ /*
+ * set_tid_size contains the size of the set_tid array. Starting at
+ * the most nested currently active PID namespace it tells alloc_pid()
+ * which PID to set for a process in that most nested PID namespace
+ * up to set_tid_size PID namespaces. It does not have to set the PID
+ * for a process in all nested PID namespaces but set_tid_size must
+ * never be greater than the current ns->level + 1.
+ */
+ if (set_tid_size > ns->level + 1)
+ return ERR_PTR(-EINVAL);
+
pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
if (!pid)
return ERR_PTR(retval);
@@ -174,24 +186,54 @@ struct pid *alloc_pid(struct pid_namespace *ns)
pid->level = ns->level;
for (i = ns->level; i >= 0; i--) {
- int pid_min = 1;
+ int tid = 0;
+
+ if (set_tid_size) {
+ tid = set_tid[ns->level - i];
+
+ retval = -EINVAL;
+ if (tid < 1 || tid >= pid_max)
+ goto out_free;
+ /*
+ * Also fail if a PID != 1 is requested and
+ * no PID 1 exists.
+ */
+ if (tid != 1 && !tmp->child_reaper)
+ goto out_free;
+ retval = -EPERM;
+ if (!ns_capable(tmp->user_ns, CAP_SYS_ADMIN))
+ goto out_free;
+ set_tid_size--;
+ }
idr_preload(GFP_KERNEL);
spin_lock_irq(&pidmap_lock);
- /*
- * init really needs pid 1, but after reaching the maximum
- * wrap back to RESERVED_PIDS
- */
- if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
- pid_min = RESERVED_PIDS;
-
- /*
- * Store a null pointer so find_pid_ns does not find
- * a partially initialized PID (see below).
- */
- nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
- pid_max, GFP_ATOMIC);
+ if (tid) {
+ nr = idr_alloc(&tmp->idr, NULL, tid,
+ tid + 1, GFP_ATOMIC);
+ /*
+ * If ENOSPC is returned it means that the PID is
+ * alreay in use. Return EEXIST in that case.
+ */
+ if (nr == -ENOSPC)
+ nr = -EEXIST;
+ } else {
+ int pid_min = 1;
+ /*
+ * init really needs pid 1, but after reaching the
+ * maximum wrap back to RESERVED_PIDS
+ */
+ if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
+ pid_min = RESERVED_PIDS;
+
+ /*
+ * Store a null pointer so find_pid_ns does not find
+ * a partially initialized PID (see below).
+ */
+ nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
+ pid_max, GFP_ATOMIC);
+ }
spin_unlock_irq(&pidmap_lock);
idr_preload_end();
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index a6a79f85c81a..d40017e79ebe 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -26,8 +26,6 @@
static DEFINE_MUTEX(pid_caches_mutex);
static struct kmem_cache *pid_ns_cachep;
-/* MAX_PID_NS_LEVEL is needed for limiting size of 'struct pid' */
-#define MAX_PID_NS_LEVEL 32
/* Write once array, filled from the beginning. */
static struct kmem_cache *pid_cache[MAX_PID_NS_LEVEL];