1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
|
// SPDX-License-Identifier: GPL-2.0
/*
* Author: Andrei Vagin <avagin@openvz.org>
* Author: Dmitry Safonov <dima@arista.com>
*/
#include <linux/cleanup.h>
#include <linux/mm.h>
#include <linux/time_namespace.h>
#include <linux/time.h>
#include <linux/vdso_datastore.h>
#include <vdso/clocksource.h>
#include <vdso/datapage.h>
#include "namespace_internal.h"
static struct timens_offset offset_from_ts(struct timespec64 off)
{
struct timens_offset ret;
ret.sec = off.tv_sec;
ret.nsec = off.tv_nsec;
return ret;
}
/*
* A time namespace VVAR page has the same layout as the VVAR page which
* contains the system wide VDSO data.
*
* For a normal task the VVAR pages are installed in the normal ordering:
* VVAR
* PVCLOCK
* HVCLOCK
* TIMENS <- Not really required
*
* Now for a timens task the pages are installed in the following order:
* TIMENS
* PVCLOCK
* HVCLOCK
* VVAR
*
* The check for vdso_clock->clock_mode is in the unlikely path of
* the seq begin magic. So for the non-timens case most of the time
* 'seq' is even, so the branch is not taken.
*
* If 'seq' is odd, i.e. a concurrent update is in progress, the extra check
* for vdso_clock->clock_mode is a non-issue. The task is spin waiting for the
* update to finish and for 'seq' to become even anyway.
*
* Timens page has vdso_clock->clock_mode set to VDSO_CLOCKMODE_TIMENS which
* enforces the time namespace handling path.
*/
static void timens_setup_vdso_clock_data(struct vdso_clock *vc,
struct time_namespace *ns)
{
struct timens_offset *offset = vc->offset;
struct timens_offset monotonic = offset_from_ts(ns->offsets.monotonic);
struct timens_offset boottime = offset_from_ts(ns->offsets.boottime);
vc->seq = 1;
vc->clock_mode = VDSO_CLOCKMODE_TIMENS;
offset[CLOCK_MONOTONIC] = monotonic;
offset[CLOCK_MONOTONIC_RAW] = monotonic;
offset[CLOCK_MONOTONIC_COARSE] = monotonic;
offset[CLOCK_BOOTTIME] = boottime;
offset[CLOCK_BOOTTIME_ALARM] = boottime;
}
struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
if (likely(vma->vm_mm == current->mm))
return current->nsproxy->time_ns->vvar_page;
/*
* VM_PFNMAP | VM_IO protect .fault() handler from being called
* through interfaces like /proc/$pid/mem or
* process_vm_{readv,writev}() as long as there's no .access()
* in special_mapping_vmops().
* For more details check_vma_flags() and __access_remote_vm()
*/
WARN(1, "vvar_page accessed remotely");
return NULL;
}
static void timens_set_vvar_page(struct task_struct *task,
struct time_namespace *ns)
{
struct vdso_time_data *vdata;
struct vdso_clock *vc;
unsigned int i;
if (ns == &init_time_ns)
return;
/* Fast-path, taken by every task in namespace except the first. */
if (likely(ns->frozen_offsets))
return;
guard(mutex)(&timens_offset_lock);
/* Nothing to-do: vvar_page has been already initialized. */
if (ns->frozen_offsets)
return;
ns->frozen_offsets = true;
vdata = page_address(ns->vvar_page);
vc = vdata->clock_data;
for (i = 0; i < CS_BASES; i++)
timens_setup_vdso_clock_data(&vc[i], ns);
if (IS_ENABLED(CONFIG_POSIX_AUX_CLOCKS)) {
for (i = 0; i < ARRAY_SIZE(vdata->aux_clock_data); i++)
timens_setup_vdso_clock_data(&vdata->aux_clock_data[i], ns);
}
}
/*
* The vvar page layout depends on whether a task belongs to the root or
* non-root time namespace. Whenever a task changes its namespace, the VVAR
* page tables are cleared and then they will be re-faulted with a
* corresponding layout.
* See also the comment near timens_setup_vdso_clock_data() for details.
*/
static int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
{
struct mm_struct *mm = task->mm;
struct vm_area_struct *vma;
VMA_ITERATOR(vmi, mm, 0);
guard(mmap_read_lock)(mm);
for_each_vma(vmi, vma) {
if (vma_is_special_mapping(vma, &vdso_vvar_mapping))
zap_vma(vma);
}
return 0;
}
void timens_commit(struct task_struct *tsk, struct time_namespace *ns)
{
timens_set_vvar_page(tsk, ns);
vdso_join_timens(tsk, ns);
}
int timens_vdso_alloc_vvar_page(struct time_namespace *ns)
{
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!ns->vvar_page)
return -ENOMEM;
return 0;
}
void timens_vdso_free_vvar_page(struct time_namespace *ns)
{
__free_page(ns->vvar_page);
}
|