aboutsummaryrefslogtreecommitdiffstats
path: root/tools/sched_ext/scx_qmap.bpf.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/sched_ext/scx_qmap.bpf.c')
-rw-r--r--tools/sched_ext/scx_qmap.bpf.c142
1 files changed, 139 insertions, 3 deletions
diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c
index c75c70d6a8eb..b1d0b09c966e 100644
--- a/tools/sched_ext/scx_qmap.bpf.c
+++ b/tools/sched_ext/scx_qmap.bpf.c
@@ -69,6 +69,18 @@ struct {
};
/*
+ * If enabled, CPU performance target is set according to the queue index
+ * according to the following table.
+ */
+static const u32 qidx_to_cpuperf_target[] = {
+ [0] = SCX_CPUPERF_ONE * 0 / 4,
+ [1] = SCX_CPUPERF_ONE * 1 / 4,
+ [2] = SCX_CPUPERF_ONE * 2 / 4,
+ [3] = SCX_CPUPERF_ONE * 3 / 4,
+ [4] = SCX_CPUPERF_ONE * 4 / 4,
+};
+
+/*
* Per-queue sequence numbers to implement core-sched ordering.
*
* Tail seq is assigned to each queued task and incremented. Head seq tracks the
@@ -95,6 +107,8 @@ struct {
struct cpu_ctx {
u64 dsp_idx; /* dispatch index */
u64 dsp_cnt; /* remaining count */
+ u32 avg_weight;
+ u32 cpuperf_target;
};
struct {
@@ -107,6 +121,8 @@ struct {
/* Statistics */
u64 nr_enqueued, nr_dispatched, nr_reenqueued, nr_dequeued;
u64 nr_core_sched_execed;
+u32 cpuperf_min, cpuperf_avg, cpuperf_max;
+u32 cpuperf_target_min, cpuperf_target_avg, cpuperf_target_max;
s32 BPF_STRUCT_OPS(qmap_select_cpu, struct task_struct *p,
s32 prev_cpu, u64 wake_flags)
@@ -313,6 +329,29 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev)
}
}
+void BPF_STRUCT_OPS(qmap_tick, struct task_struct *p)
+{
+ struct cpu_ctx *cpuc;
+ u32 zero = 0;
+ int idx;
+
+ if (!(cpuc = bpf_map_lookup_elem(&cpu_ctx_stor, &zero))) {
+ scx_bpf_error("failed to look up cpu_ctx");
+ return;
+ }
+
+ /*
+ * Use the running avg of weights to select the target cpuperf level.
+ * This is a demonstration of the cpuperf feature rather than a
+ * practical strategy to regulate CPU frequency.
+ */
+ cpuc->avg_weight = cpuc->avg_weight * 3 / 4 + p->scx.weight / 4;
+ idx = weight_to_idx(cpuc->avg_weight);
+ cpuc->cpuperf_target = qidx_to_cpuperf_target[idx];
+
+ scx_bpf_cpuperf_set(scx_bpf_task_cpu(p), cpuc->cpuperf_target);
+}
+
/*
* The distance from the head of the queue scaled by the weight of the queue.
* The lower the number, the older the task and the higher the priority.
@@ -422,8 +461,9 @@ void BPF_STRUCT_OPS(qmap_dump_cpu, struct scx_dump_ctx *dctx, s32 cpu, bool idle
if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, cpu)))
return;
- scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu",
- cpuc->dsp_idx, cpuc->dsp_cnt);
+ scx_bpf_dump("QMAP: dsp_idx=%llu dsp_cnt=%llu avg_weight=%u cpuperf_target=%u",
+ cpuc->dsp_idx, cpuc->dsp_cnt, cpuc->avg_weight,
+ cpuc->cpuperf_target);
}
void BPF_STRUCT_OPS(qmap_dump_task, struct scx_dump_ctx *dctx, struct task_struct *p)
@@ -492,11 +532,106 @@ void BPF_STRUCT_OPS(qmap_cpu_offline, s32 cpu)
print_cpus();
}
+struct monitor_timer {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, struct monitor_timer);
+} monitor_timer SEC(".maps");
+
+/*
+ * Print out the min, avg and max performance levels of CPUs every second to
+ * demonstrate the cpuperf interface.
+ */
+static void monitor_cpuperf(void)
+{
+ u32 zero = 0, nr_cpu_ids;
+ u64 cap_sum = 0, cur_sum = 0, cur_min = SCX_CPUPERF_ONE, cur_max = 0;
+ u64 target_sum = 0, target_min = SCX_CPUPERF_ONE, target_max = 0;
+ const struct cpumask *online;
+ int i, nr_online_cpus = 0;
+
+ nr_cpu_ids = scx_bpf_nr_cpu_ids();
+ online = scx_bpf_get_online_cpumask();
+
+ bpf_for(i, 0, nr_cpu_ids) {
+ struct cpu_ctx *cpuc;
+ u32 cap, cur;
+
+ if (!bpf_cpumask_test_cpu(i, online))
+ continue;
+ nr_online_cpus++;
+
+ /* collect the capacity and current cpuperf */
+ cap = scx_bpf_cpuperf_cap(i);
+ cur = scx_bpf_cpuperf_cur(i);
+
+ cur_min = cur < cur_min ? cur : cur_min;
+ cur_max = cur > cur_max ? cur : cur_max;
+
+ /*
+ * $cur is relative to $cap. Scale it down accordingly so that
+ * it's in the same scale as other CPUs and $cur_sum/$cap_sum
+ * makes sense.
+ */
+ cur_sum += cur * cap / SCX_CPUPERF_ONE;
+ cap_sum += cap;
+
+ if (!(cpuc = bpf_map_lookup_percpu_elem(&cpu_ctx_stor, &zero, i))) {
+ scx_bpf_error("failed to look up cpu_ctx");
+ goto out;
+ }
+
+ /* collect target */
+ cur = cpuc->cpuperf_target;
+ target_sum += cur;
+ target_min = cur < target_min ? cur : target_min;
+ target_max = cur > target_max ? cur : target_max;
+ }
+
+ cpuperf_min = cur_min;
+ cpuperf_avg = cur_sum * SCX_CPUPERF_ONE / cap_sum;
+ cpuperf_max = cur_max;
+
+ cpuperf_target_min = target_min;
+ cpuperf_target_avg = target_sum / nr_online_cpus;
+ cpuperf_target_max = target_max;
+out:
+ scx_bpf_put_cpumask(online);
+}
+
+static int monitor_timerfn(void *map, int *key, struct bpf_timer *timer)
+{
+ monitor_cpuperf();
+
+ bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
+ return 0;
+}
+
s32 BPF_STRUCT_OPS_SLEEPABLE(qmap_init)
{
+ u32 key = 0;
+ struct bpf_timer *timer;
+ s32 ret;
+
print_cpus();
- return scx_bpf_create_dsq(SHARED_DSQ, -1);
+ ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
+ if (ret)
+ return ret;
+
+ timer = bpf_map_lookup_elem(&monitor_timer, &key);
+ if (!timer)
+ return -ESRCH;
+
+ bpf_timer_init(timer, &monitor_timer, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(timer, monitor_timerfn);
+
+ return bpf_timer_start(timer, ONE_SEC_IN_NS, 0);
}
void BPF_STRUCT_OPS(qmap_exit, struct scx_exit_info *ei)
@@ -509,6 +644,7 @@ SCX_OPS_DEFINE(qmap_ops,
.enqueue = (void *)qmap_enqueue,
.dequeue = (void *)qmap_dequeue,
.dispatch = (void *)qmap_dispatch,
+ .tick = (void *)qmap_tick,
.core_sched_before = (void *)qmap_core_sched_before,
.cpu_release = (void *)qmap_cpu_release,
.init_task = (void *)qmap_init_task,