aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-03-14 21:03:47 -0700
committerDavid S. Miller <davem@davemloft.net>2020-03-14 21:03:47 -0700
commit3d572b2308ec68461abcc1754443bf56a30e1f3b (patch)
tree13c5f71b2cc4c4fdc25e47f538a91197ae3a5954 /net
parentsfc: support configuring vf spoofchk on EF10 VFs (diff)
parentselftests: mlxsw: RED: Test RED ECN nodrop offload (diff)
downloadlinux-3d572b2308ec68461abcc1754443bf56a30e1f3b.tar.gz
linux-3d572b2308ec68461abcc1754443bf56a30e1f3b.zip
Merge branch 'RED-Introduce-an-ECN-tail-dropping-mode'
Petr Machata says: ==================== RED: Introduce an ECN tail-dropping mode When the RED qdisc is currently configured to enable ECN, the RED algorithm is used to decide whether a certain SKB should be marked. If that SKB is not ECN-capable, it is early-dropped. It is also possible to keep all traffic in the queue, and just mark the ECN-capable subset of it, as appropriate under the RED algorithm. Some switches support this mode, and some installations make use of it. There is currently no way to put the RED qdiscs to this mode. Therefore this patchset adds a new RED flag, TC_RED_TAILDROP. When the qdisc is configured with this flag, non-ECT traffic is enqueued (and tail-dropped when the queue size is exhausted) instead of being early-dropped. Unfortunately, adding a new RED flag is not as simple as it sounds. RED flags are passed in tc_red_qopt.flags. However RED neglects to validate the flag field, and just copies it over wholesale to its internal structure, and later dumps it back. A broken userspace can therefore configure a RED qdisc with arbitrary unsupported flags, and later expect to see the flags on qdisc dump. The current ABI thus allows storage of 5 bits of custom data along with the qdisc instance. GRED, SFQ and CHOKE qdiscs are in the same situation. (GRED validates VQ flags, but not the flags for the main queue.) E.g. if SFQ ever needs to support TC_RED_ADAPTATIVE, it needs another way of doing it, and at the same time it needs to retain the possibility to store 6 bits of uninterpreted data. For RED, this problem is resolved in patch #2, which adds a new attribute, and a way to separate flags from userbits that can be reused by other qdiscs. The flag itself and related behavioral changes are added in patch To test the new feature, patch #1 first introduces a TDC testsuite that covers the existing RED flags. Patch #5 later extends it with taildrop coverage. Patch #6 contains a forwarding selftest for the offloaded datapath. To test the SW datapath, I took the mlxsw selftest and adapted it in mostly obvious ways. The test is stable enough to verify that RED, ECN and ECN taildrop actually work. However, I have no confidence in its portability to other people's machines or mildly different configurations. I therefore do not find it suitable for upstreaming. GRED and CHOKE can use the same method as RED if they ever need to support extra flags. SFQ uses the length of TCA_OPTIONS to dispatch on binary control structure version, and would therefore need a different approach. v2: - Patch #1 - Require nsPlugin in each RED test - Match end-of-line to catch cases of more flags reported than requested - Patch #2: - Replaced with another patch. - Patch #3: - Fix red_use_taildrop() condition in red_enqueue switch for probabilistic case. - Patch #5: - Require nsPlugin in each RED test - Match end-of-line to catch cases of more flags reported than requested - Add a test for creation of non-ECN taildrop, which should fail ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/sched/sch_red.c72
1 files changed, 64 insertions, 8 deletions
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 1695421333e3..3ef0a4f7399b 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -35,7 +35,11 @@
struct red_sched_data {
u32 limit; /* HARD maximal queue length */
+
unsigned char flags;
+ /* Non-flags in tc_red_qopt.flags. */
+ unsigned char userbits;
+
struct timer_list adapt_timer;
struct Qdisc *sch;
struct red_parms parms;
@@ -44,6 +48,8 @@ struct red_sched_data {
struct Qdisc *qdisc;
};
+static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
+
static inline int red_use_ecn(struct red_sched_data *q)
{
return q->flags & TC_RED_ECN;
@@ -54,6 +60,11 @@ static inline int red_use_harddrop(struct red_sched_data *q)
return q->flags & TC_RED_HARDDROP;
}
+static int red_use_nodrop(struct red_sched_data *q)
+{
+ return q->flags & TC_RED_NODROP;
+}
+
static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -74,23 +85,36 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
case RED_PROB_MARK:
qdisc_qstats_overlimit(sch);
- if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
+ if (!red_use_ecn(q)) {
q->stats.prob_drop++;
goto congestion_drop;
}
- q->stats.prob_mark++;
+ if (INET_ECN_set_ce(skb)) {
+ q->stats.prob_mark++;
+ } else if (!red_use_nodrop(q)) {
+ q->stats.prob_drop++;
+ goto congestion_drop;
+ }
+
+ /* Non-ECT packet in ECN nodrop mode: queue it. */
break;
case RED_HARD_MARK:
qdisc_qstats_overlimit(sch);
- if (red_use_harddrop(q) || !red_use_ecn(q) ||
- !INET_ECN_set_ce(skb)) {
+ if (red_use_harddrop(q) || !red_use_ecn(q)) {
q->stats.forced_drop++;
goto congestion_drop;
}
- q->stats.forced_mark++;
+ if (INET_ECN_set_ce(skb)) {
+ q->stats.forced_mark++;
+ } else if (!red_use_nodrop(q)) {
+ q->stats.forced_drop++;
+ goto congestion_drop;
+ }
+
+ /* Non-ECT packet in ECN nodrop mode: queue it. */
break;
}
@@ -165,6 +189,7 @@ static int red_offload(struct Qdisc *sch, bool enable)
opt.set.limit = q->limit;
opt.set.is_ecn = red_use_ecn(q);
opt.set.is_harddrop = red_use_harddrop(q);
+ opt.set.is_nodrop = red_use_nodrop(q);
opt.set.qstats = &sch->qstats;
} else {
opt.command = TC_RED_DESTROY;
@@ -183,9 +208,12 @@ static void red_destroy(struct Qdisc *sch)
}
static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
+ [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
[TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
[TCA_RED_STAB] = { .len = RED_STAB_SIZE },
[TCA_RED_MAX_P] = { .type = NLA_U32 },
+ [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
+ .validation_data = &red_supported_flags },
};
static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -194,7 +222,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *old_child = NULL, *child = NULL;
struct red_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_RED_MAX + 1];
+ struct nla_bitfield32 flags_bf;
struct tc_red_qopt *ctl;
+ unsigned char userbits;
+ unsigned char flags;
int err;
u32 max_P;
@@ -216,6 +247,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
return -EINVAL;
+ err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
+ tb[TCA_RED_FLAGS], red_supported_flags,
+ &flags_bf, &userbits, extack);
+ if (err)
+ return err;
+
if (ctl->limit > 0) {
child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
extack);
@@ -227,7 +264,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
}
sch_tree_lock(sch);
- q->flags = ctl->flags;
+
+ flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
+ err = red_validate_flags(flags, extack);
+ if (err)
+ goto unlock_out;
+
+ q->flags = flags;
+ q->userbits = userbits;
q->limit = ctl->limit;
if (child) {
qdisc_tree_flush_backlog(q->qdisc);
@@ -256,6 +300,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
if (old_child)
qdisc_put(old_child);
return 0;
+
+unlock_out:
+ sch_tree_unlock(sch);
+ if (child)
+ qdisc_put(child);
+ return err;
}
static inline void red_adaptative_timer(struct timer_list *t)
@@ -299,10 +349,15 @@ static int red_dump_offload_stats(struct Qdisc *sch)
static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct red_sched_data *q = qdisc_priv(sch);
+ struct nla_bitfield32 flags_bf = {
+ .selector = red_supported_flags,
+ .value = q->flags,
+ };
struct nlattr *opts = NULL;
struct tc_red_qopt opt = {
.limit = q->limit,
- .flags = q->flags,
+ .flags = (q->flags & TC_RED_HISTORIC_FLAGS) |
+ q->userbits,
.qth_min = q->parms.qth_min >> q->parms.Wlog,
.qth_max = q->parms.qth_max >> q->parms.Wlog,
.Wlog = q->parms.Wlog,
@@ -319,7 +374,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
if (opts == NULL)
goto nla_put_failure;
if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
- nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P))
+ nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
+ nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf))
goto nla_put_failure;
return nla_nest_end(skb, opts);