diff options
Diffstat (limited to 'tools/testing')
714 files changed, 44665 insertions, 7368 deletions
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 6f9347ade82c..90f3c9802ffb 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -6,13 +6,13 @@ ldflags-y += --wrap=acpi_pci_find_root ldflags-y += --wrap=nvdimm_bus_register ldflags-y += --wrap=devm_cxl_port_enumerate_dports ldflags-y += --wrap=devm_cxl_setup_hdm -ldflags-y += --wrap=devm_cxl_enable_hdm ldflags-y += --wrap=devm_cxl_add_passthrough_decoder ldflags-y += --wrap=devm_cxl_enumerate_decoders ldflags-y += --wrap=cxl_await_media_ready ldflags-y += --wrap=cxl_hdm_decode_init ldflags-y += --wrap=cxl_dvsec_rr_decode -ldflags-y += --wrap=cxl_rcrb_to_component +ldflags-y += --wrap=devm_cxl_add_rch_dport +ldflags-y += --wrap=cxl_rcd_component_reg_phys DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -57,6 +57,7 @@ cxl_core-y += $(CXL_CORE_SRC)/memdev.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o cxl_core-y += $(CXL_CORE_SRC)/pci.o cxl_core-y += $(CXL_CORE_SRC)/hdm.o +cxl_core-y += $(CXL_CORE_SRC)/pmu.o cxl_core-$(CONFIG_TRACING) += $(CXL_CORE_SRC)/trace.o cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o cxl_core-y += config_check.o diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index bf00dc52fe96..fb6ab9cef84f 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -713,7 +713,7 @@ static void default_mock_decoder(struct cxl_decoder *cxld) cxld->interleave_ways = 1; cxld->interleave_granularity = 256; - cxld->target_type = CXL_DECODER_EXPANDER; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->commit = mock_decoder_commit; cxld->reset = mock_decoder_reset; } @@ -754,7 +754,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) /* check is endpoint is attach to host-bridge0 */ port = cxled_to_port(cxled); do { - if (port->uport == &cxl_host_bridge[0]->dev) { + if (port->uport_dev == &cxl_host_bridge[0]->dev) { hb0 = true; break; } @@ -787,7 +787,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) cxld->interleave_ways = 2; eig_to_granularity(window->granularity, &cxld->interleave_granularity); - cxld->target_type = CXL_DECODER_EXPANDER; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->flags = CXL_DECODER_F_ENABLE; cxled->state = CXL_DECODER_STATE_AUTO; port->commit_end = cxld->id; @@ -820,7 +820,7 @@ static void mock_init_hdm_decoder(struct cxl_decoder *cxld) } else cxlsd->target[0] = dport; cxld = &cxlsd->cxld; - cxld->target_type = CXL_DECODER_EXPANDER; + cxld->target_type = CXL_DECODER_HOSTONLYMEM; cxld->flags = CXL_DECODER_F_ENABLE; iter->commit_end = 0; /* @@ -889,7 +889,7 @@ static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, mock_init_hdm_decoder(cxld); if (target_count) { - rc = device_for_each_child(port->uport, &ctx, + rc = device_for_each_child(port->uport_dev, &ctx, map_targets); if (rc) { put_device(&cxld->dev); @@ -919,29 +919,29 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) int i, array_size; if (port->depth == 1) { - if (is_multi_bridge(port->uport)) { + if (is_multi_bridge(port->uport_dev)) { array_size = ARRAY_SIZE(cxl_root_port); array = cxl_root_port; - } else if (is_single_bridge(port->uport)) { + } else if (is_single_bridge(port->uport_dev)) { array_size = ARRAY_SIZE(cxl_root_single); array = cxl_root_single; } else { dev_dbg(&port->dev, "%s: unknown bridge type\n", - dev_name(port->uport)); + dev_name(port->uport_dev)); return -ENXIO; } } else if (port->depth == 2) { struct cxl_port *parent = to_cxl_port(port->dev.parent); - if (is_multi_bridge(parent->uport)) { + if (is_multi_bridge(parent->uport_dev)) { array_size = ARRAY_SIZE(cxl_switch_dport); array = cxl_switch_dport; - } else if (is_single_bridge(parent->uport)) { + } else if (is_single_bridge(parent->uport_dev)) { array_size = ARRAY_SIZE(cxl_swd_single); array = cxl_swd_single; } else { dev_dbg(&port->dev, "%s: unknown bridge type\n", - dev_name(port->uport)); + dev_name(port->uport_dev)); return -ENXIO; } } else { @@ -954,9 +954,9 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) struct platform_device *pdev = array[i]; struct cxl_dport *dport; - if (pdev->dev.parent != port->uport) { + if (pdev->dev.parent != port->uport_dev) { dev_dbg(&port->dev, "%s: mismatch parent %s\n", - dev_name(port->uport), + dev_name(port->uport_dev), dev_name(pdev->dev.parent)); continue; } @@ -971,15 +971,6 @@ static int mock_cxl_port_enumerate_dports(struct cxl_port *port) return 0; } -resource_size_t mock_cxl_rcrb_to_component(struct device *dev, - resource_size_t rcrb, - enum cxl_rcrb which) -{ - dev_dbg(dev, "rcrb: %pa which: %d\n", &rcrb, which); - - return (resource_size_t) which + 1; -} - static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, @@ -988,7 +979,6 @@ static struct cxl_mock_ops cxl_mock_ops = { .is_mock_dev = is_mock_dev, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, - .cxl_rcrb_to_component = mock_cxl_rcrb_to_component, .acpi_pci_find_root = mock_acpi_pci_find_root, .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, .devm_cxl_setup_hdm = mock_cxl_setup_hdm, @@ -1009,10 +999,6 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_64G (SZ_32G * 2) #endif -#ifndef SZ_512G -#define SZ_512G (SZ_64G * 8) -#endif - static __init int cxl_rch_init(void) { int rc, i; diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 34b48027b3de..464fc39ed277 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -8,11 +8,14 @@ #include <linux/sizes.h> #include <linux/bits.h> #include <asm/unaligned.h> +#include <crypto/sha2.h> #include <cxlmem.h> #include "trace.h" #define LSA_SIZE SZ_128K +#define FW_SIZE SZ_64M +#define FW_SLOTS 3 #define DEV_SIZE SZ_2G #define EFFECT(x) (1U << x) @@ -21,42 +24,70 @@ static unsigned int poison_inject_dev_max = MOCK_INJECT_DEV_MAX; +enum cxl_command_effects { + CONF_CHANGE_COLD_RESET = 0, + CONF_CHANGE_IMMEDIATE, + DATA_CHANGE_IMMEDIATE, + POLICY_CHANGE_IMMEDIATE, + LOG_CHANGE_IMMEDIATE, + SECURITY_CHANGE_IMMEDIATE, + BACKGROUND_OP, + SECONDARY_MBOX_SUPPORTED, +}; + +#define CXL_CMD_EFFECT_NONE cpu_to_le16(0) + static struct cxl_cel_entry mock_cel[] = { { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_SUPPORTED_LOGS), - .effect = cpu_to_le16(0), + .effect = CXL_CMD_EFFECT_NONE, }, { .opcode = cpu_to_le16(CXL_MBOX_OP_IDENTIFY), - .effect = cpu_to_le16(0), + .effect = CXL_CMD_EFFECT_NONE, }, { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_LSA), - .effect = cpu_to_le16(0), + .effect = CXL_CMD_EFFECT_NONE, }, { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_PARTITION_INFO), - .effect = cpu_to_le16(0), + .effect = CXL_CMD_EFFECT_NONE, }, { .opcode = cpu_to_le16(CXL_MBOX_OP_SET_LSA), - .effect = cpu_to_le16(EFFECT(1) | EFFECT(2)), + .effect = cpu_to_le16(EFFECT(CONF_CHANGE_IMMEDIATE) | + EFFECT(DATA_CHANGE_IMMEDIATE)), }, { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_HEALTH_INFO), - .effect = cpu_to_le16(0), + .effect = CXL_CMD_EFFECT_NONE, }, { .opcode = cpu_to_le16(CXL_MBOX_OP_GET_POISON), - .effect = cpu_to_le16(0), + .effect = CXL_CMD_EFFECT_NONE, }, { .opcode = cpu_to_le16(CXL_MBOX_OP_INJECT_POISON), - .effect = cpu_to_le16(0), + .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE)), }, { .opcode = cpu_to_le16(CXL_MBOX_OP_CLEAR_POISON), - .effect = cpu_to_le16(0), + .effect = cpu_to_le16(EFFECT(DATA_CHANGE_IMMEDIATE)), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_GET_FW_INFO), + .effect = CXL_CMD_EFFECT_NONE, + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_TRANSFER_FW), + .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) | + EFFECT(BACKGROUND_OP)), + }, + { + .opcode = cpu_to_le16(CXL_MBOX_OP_ACTIVATE_FW), + .effect = cpu_to_le16(EFFECT(CONF_CHANGE_COLD_RESET) | + EFFECT(CONF_CHANGE_IMMEDIATE)), }, }; @@ -102,13 +133,17 @@ struct mock_event_log { }; struct mock_event_store { - struct cxl_dev_state *cxlds; + struct cxl_memdev_state *mds; struct mock_event_log mock_logs[CXL_EVENT_TYPE_MAX]; u32 ev_status; }; struct cxl_mockmem_data { void *lsa; + void *fw; + int fw_slot; + int fw_staged; + size_t fw_size; u32 security_state; u8 user_pass[NVDIMM_PASSPHRASE_LEN]; u8 master_pass[NVDIMM_PASSPHRASE_LEN]; @@ -180,8 +215,7 @@ static void mes_add_event(struct mock_event_store *mes, log->nr_events++; } -static int mock_get_event(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *cmd) +static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd) { struct cxl_get_event_payload *pl; struct mock_event_log *log; @@ -201,7 +235,7 @@ static int mock_get_event(struct cxl_dev_state *cxlds, memset(cmd->payload_out, 0, cmd->size_out); - log = event_find_log(cxlds->dev, log_type); + log = event_find_log(dev, log_type); if (!log || event_log_empty(log)) return 0; @@ -234,8 +268,7 @@ static int mock_get_event(struct cxl_dev_state *cxlds, return 0; } -static int mock_clear_event(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *cmd) +static int mock_clear_event(struct device *dev, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_clear_event_payload *pl = cmd->payload_in; struct mock_event_log *log; @@ -246,7 +279,7 @@ static int mock_clear_event(struct cxl_dev_state *cxlds, if (log_type >= CXL_EVENT_TYPE_MAX) return -EINVAL; - log = event_find_log(cxlds->dev, log_type); + log = event_find_log(dev, log_type); if (!log) return 0; /* No mock data in this log */ @@ -256,7 +289,7 @@ static int mock_clear_event(struct cxl_dev_state *cxlds, * However, this is not good behavior for the host so test it. */ if (log->clear_idx + pl->nr_recs > log->cur_idx) { - dev_err(cxlds->dev, + dev_err(dev, "Attempting to clear more events than returned!\n"); return -EINVAL; } @@ -266,7 +299,7 @@ static int mock_clear_event(struct cxl_dev_state *cxlds, nr < pl->nr_recs; nr++, handle++) { if (handle != le16_to_cpu(pl->handles[nr])) { - dev_err(cxlds->dev, "Clearing events out of order\n"); + dev_err(dev, "Clearing events out of order\n"); return -EINVAL; } } @@ -293,7 +326,7 @@ static void cxl_mock_event_trigger(struct device *dev) event_reset_log(log); } - cxl_mem_get_event_records(mes->cxlds, mes->ev_status); + cxl_mem_get_event_records(mes->mds, mes->ev_status); } struct cxl_event_record_raw maint_needed = { @@ -453,7 +486,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd) return 0; } -static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_log *gl = cmd->payload_in; u32 offset = le32_to_cpu(gl->offset); @@ -463,7 +496,7 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) if (cmd->size_in < sizeof(*gl)) return -EINVAL; - if (length > cxlds->payload_size) + if (length > mds->payload_size) return -EINVAL; if (offset + length > sizeof(mock_cel)) return -EINVAL; @@ -477,7 +510,7 @@ static int mock_get_log(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_rcd_id(struct cxl_mbox_cmd *cmd) { struct cxl_mbox_identify id = { .fw_revision = { "mock fw v1 " }, @@ -495,7 +528,7 @@ static int mock_rcd_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_id(struct cxl_mbox_cmd *cmd) { struct cxl_mbox_identify id = { .fw_revision = { "mock fw v1 " }, @@ -517,8 +550,7 @@ static int mock_id(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_partition_info(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *cmd) +static int mock_partition_info(struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_partition_info pi = { .active_volatile_cap = @@ -535,11 +567,52 @@ static int mock_partition_info(struct cxl_dev_state *cxlds, return 0; } -static int mock_get_security_state(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *cmd) +static int mock_sanitize(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); + if (cmd->size_in != 0) + return -EINVAL; + + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + if (mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + return 0; /* assume less than 2 secs, no bg */ +} + +static int mock_secure_erase(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + if (cmd->size_in != 0) + return -EINVAL; + if (cmd->size_out != 0) + return -EINVAL; + + if (mdata->security_state & CXL_PMEM_SEC_STATE_USER_PASS_SET) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + if (mdata->security_state & CXL_PMEM_SEC_STATE_LOCKED) { + cmd->return_code = CXL_MBOX_CMD_RC_SECURITY; + return -ENXIO; + } + + return 0; +} + +static int mock_get_security_state(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ if (cmd->size_in) return -EINVAL; @@ -569,9 +642,9 @@ static void user_plimit_check(struct cxl_mockmem_data *mdata) mdata->security_state |= CXL_PMEM_SEC_STATE_USER_PLIMIT; } -static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_set_passphrase(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); struct cxl_set_pass *set_pass; if (cmd->size_in != sizeof(*set_pass)) @@ -629,9 +702,9 @@ static int mock_set_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd return -EINVAL; } -static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_disable_passphrase(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); struct cxl_disable_pass *dis_pass; if (cmd->size_in != sizeof(*dis_pass)) @@ -700,10 +773,9 @@ static int mock_disable_passphrase(struct cxl_dev_state *cxlds, struct cxl_mbox_ return 0; } -static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_freeze_security(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); - if (cmd->size_in != 0) return -EINVAL; @@ -717,10 +789,9 @@ static int mock_freeze_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd return 0; } -static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_unlock_security(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); - if (cmd->size_in != NVDIMM_PASSPHRASE_LEN) return -EINVAL; @@ -759,10 +830,9 @@ static int mock_unlock_security(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd return 0; } -static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds, +static int mock_passphrase_secure_erase(struct cxl_mockmem_data *mdata, struct cxl_mbox_cmd *cmd) { - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); struct cxl_pass_erase *erase; if (cmd->size_in != sizeof(*erase)) @@ -858,10 +928,10 @@ static int mock_passphrase_secure_erase(struct cxl_dev_state *cxlds, return 0; } -static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_get_lsa(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { struct cxl_mbox_get_lsa *get_lsa = cmd->payload_in; - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); void *lsa = mdata->lsa; u32 offset, length; @@ -878,10 +948,10 @@ static int mock_get_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_set_lsa(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { struct cxl_mbox_set_lsa *set_lsa = cmd->payload_in; - struct cxl_mockmem_data *mdata = dev_get_drvdata(cxlds->dev); void *lsa = mdata->lsa; u32 offset, length; @@ -896,8 +966,7 @@ static int mock_set_lsa(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) return 0; } -static int mock_health_info(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *cmd) +static int mock_health_info(struct cxl_mbox_cmd *cmd) { struct cxl_mbox_health_info health_info = { /* set flags for maint needed, perf degraded, hw replacement */ @@ -1114,9 +1183,90 @@ static struct attribute *cxl_mock_mem_core_attrs[] = { }; ATTRIBUTE_GROUPS(cxl_mock_mem_core); -static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) +static int mock_fw_info(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_get_fw_info fw_info = { + .num_slots = FW_SLOTS, + .slot_info = (mdata->fw_slot & 0x7) | + ((mdata->fw_staged & 0x7) << 3), + .activation_cap = 0, + }; + + strcpy(fw_info.slot_1_revision, "cxl_test_fw_001"); + strcpy(fw_info.slot_2_revision, "cxl_test_fw_002"); + strcpy(fw_info.slot_3_revision, "cxl_test_fw_003"); + strcpy(fw_info.slot_4_revision, ""); + + if (cmd->size_out < sizeof(fw_info)) + return -EINVAL; + + memcpy(cmd->payload_out, &fw_info, sizeof(fw_info)); + return 0; +} + +static int mock_transfer_fw(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) { + struct cxl_mbox_transfer_fw *transfer = cmd->payload_in; + void *fw = mdata->fw; + size_t offset, length; + + offset = le32_to_cpu(transfer->offset) * CXL_FW_TRANSFER_ALIGNMENT; + length = cmd->size_in - sizeof(*transfer); + if (offset + length > FW_SIZE) + return -EINVAL; + + switch (transfer->action) { + case CXL_FW_TRANSFER_ACTION_FULL: + if (offset != 0) + return -EINVAL; + fallthrough; + case CXL_FW_TRANSFER_ACTION_END: + if (transfer->slot == 0 || transfer->slot > FW_SLOTS) + return -EINVAL; + mdata->fw_size = offset + length; + break; + case CXL_FW_TRANSFER_ACTION_INITIATE: + case CXL_FW_TRANSFER_ACTION_CONTINUE: + break; + case CXL_FW_TRANSFER_ACTION_ABORT: + return 0; + default: + return -EINVAL; + } + + memcpy(fw + offset, transfer->data, length); + return 0; +} + +static int mock_activate_fw(struct cxl_mockmem_data *mdata, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_mbox_activate_fw *activate = cmd->payload_in; + + if (activate->slot == 0 || activate->slot > FW_SLOTS) + return -EINVAL; + + switch (activate->action) { + case CXL_FW_ACTIVATE_ONLINE: + mdata->fw_slot = activate->slot; + mdata->fw_staged = 0; + return 0; + case CXL_FW_ACTIVATE_OFFLINE: + mdata->fw_staged = activate->slot; + return 0; + } + + return -EINVAL; +} + +static int cxl_mock_mbox_send(struct cxl_memdev_state *mds, + struct cxl_mbox_cmd *cmd) +{ + struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = cxlds->dev; + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); int rc = -EIO; switch (cmd->opcode) { @@ -1127,49 +1277,55 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * rc = mock_gsl(cmd); break; case CXL_MBOX_OP_GET_LOG: - rc = mock_get_log(cxlds, cmd); + rc = mock_get_log(mds, cmd); break; case CXL_MBOX_OP_IDENTIFY: if (cxlds->rcd) - rc = mock_rcd_id(cxlds, cmd); + rc = mock_rcd_id(cmd); else - rc = mock_id(cxlds, cmd); + rc = mock_id(cmd); break; case CXL_MBOX_OP_GET_LSA: - rc = mock_get_lsa(cxlds, cmd); + rc = mock_get_lsa(mdata, cmd); break; case CXL_MBOX_OP_GET_PARTITION_INFO: - rc = mock_partition_info(cxlds, cmd); + rc = mock_partition_info(cmd); break; case CXL_MBOX_OP_GET_EVENT_RECORD: - rc = mock_get_event(cxlds, cmd); + rc = mock_get_event(dev, cmd); break; case CXL_MBOX_OP_CLEAR_EVENT_RECORD: - rc = mock_clear_event(cxlds, cmd); + rc = mock_clear_event(dev, cmd); break; case CXL_MBOX_OP_SET_LSA: - rc = mock_set_lsa(cxlds, cmd); + rc = mock_set_lsa(mdata, cmd); break; case CXL_MBOX_OP_GET_HEALTH_INFO: - rc = mock_health_info(cxlds, cmd); + rc = mock_health_info(cmd); + break; + case CXL_MBOX_OP_SANITIZE: + rc = mock_sanitize(mdata, cmd); + break; + case CXL_MBOX_OP_SECURE_ERASE: + rc = mock_secure_erase(mdata, cmd); break; case CXL_MBOX_OP_GET_SECURITY_STATE: - rc = mock_get_security_state(cxlds, cmd); + rc = mock_get_security_state(mdata, cmd); break; case CXL_MBOX_OP_SET_PASSPHRASE: - rc = mock_set_passphrase(cxlds, cmd); + rc = mock_set_passphrase(mdata, cmd); break; case CXL_MBOX_OP_DISABLE_PASSPHRASE: - rc = mock_disable_passphrase(cxlds, cmd); + rc = mock_disable_passphrase(mdata, cmd); break; case CXL_MBOX_OP_FREEZE_SECURITY: - rc = mock_freeze_security(cxlds, cmd); + rc = mock_freeze_security(mdata, cmd); break; case CXL_MBOX_OP_UNLOCK: - rc = mock_unlock_security(cxlds, cmd); + rc = mock_unlock_security(mdata, cmd); break; case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE: - rc = mock_passphrase_secure_erase(cxlds, cmd); + rc = mock_passphrase_secure_erase(mdata, cmd); break; case CXL_MBOX_OP_GET_POISON: rc = mock_get_poison(cxlds, cmd); @@ -1180,6 +1336,15 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * case CXL_MBOX_OP_CLEAR_POISON: rc = mock_clear_poison(cxlds, cmd); break; + case CXL_MBOX_OP_GET_FW_INFO: + rc = mock_fw_info(mdata, cmd); + break; + case CXL_MBOX_OP_TRANSFER_FW: + rc = mock_transfer_fw(mdata, cmd); + break; + case CXL_MBOX_OP_ACTIVATE_FW: + rc = mock_activate_fw(mdata, cmd); + break; default: break; } @@ -1195,6 +1360,11 @@ static void label_area_release(void *lsa) vfree(lsa); } +static void fw_buf_release(void *buf) +{ + vfree(buf); +} + static bool is_rcd(struct platform_device *pdev) { const struct platform_device_id *id = platform_get_device_id(pdev); @@ -1215,6 +1385,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; struct cxl_memdev *cxlmd; + struct cxl_memdev_state *mds; struct cxl_dev_state *cxlds; struct cxl_mockmem_data *mdata; int rc; @@ -1227,52 +1398,67 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) mdata->lsa = vmalloc(LSA_SIZE); if (!mdata->lsa) return -ENOMEM; + mdata->fw = vmalloc(FW_SIZE); + if (!mdata->fw) + return -ENOMEM; + mdata->fw_slot = 2; + rc = devm_add_action_or_reset(dev, label_area_release, mdata->lsa); if (rc) return rc; - cxlds = cxl_dev_state_create(dev); - if (IS_ERR(cxlds)) - return PTR_ERR(cxlds); + rc = devm_add_action_or_reset(dev, fw_buf_release, mdata->fw); + if (rc) + return rc; + + mds = cxl_memdev_state_create(dev); + if (IS_ERR(mds)) + return PTR_ERR(mds); + mds->mbox_send = cxl_mock_mbox_send; + mds->payload_size = SZ_4K; + mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; + + cxlds = &mds->cxlds; cxlds->serial = pdev->id; - cxlds->mbox_send = cxl_mock_mbox_send; - cxlds->payload_size = SZ_4K; - cxlds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; if (is_rcd(pdev)) { cxlds->rcd = true; cxlds->component_reg_phys = CXL_RESOURCE_NONE; } - rc = cxl_enumerate_cmds(cxlds); + rc = cxl_enumerate_cmds(mds); if (rc) return rc; - rc = cxl_poison_state_init(cxlds); + rc = cxl_poison_state_init(mds); if (rc) return rc; - rc = cxl_set_timestamp(cxlds); + rc = cxl_set_timestamp(mds); if (rc) return rc; cxlds->media_ready = true; - rc = cxl_dev_state_identify(cxlds); + rc = cxl_dev_state_identify(mds); if (rc) return rc; - rc = cxl_mem_create_range_info(cxlds); + rc = cxl_mem_create_range_info(mds); if (rc) return rc; - mdata->mes.cxlds = cxlds; + mdata->mes.mds = mds; cxl_mock_add_event_logs(&mdata->mes); cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); - cxl_mem_get_event_records(cxlds, CXLDEV_EVENT_STATUS_ALL); + rc = cxl_memdev_setup_fw_upload(mds); + if (rc) + return rc; + + cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); return 0; } @@ -1310,9 +1496,40 @@ static ssize_t security_lock_store(struct device *dev, struct device_attribute * static DEVICE_ATTR_RW(security_lock); +static ssize_t fw_buf_checksum_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + u8 hash[SHA256_DIGEST_SIZE]; + unsigned char *hstr, *hptr; + struct sha256_state sctx; + ssize_t written = 0; + int i; + + sha256_init(&sctx); + sha256_update(&sctx, mdata->fw, mdata->fw_size); + sha256_final(&sctx, hash); + + hstr = kzalloc((SHA256_DIGEST_SIZE * 2) + 1, GFP_KERNEL); + if (!hstr) + return -ENOMEM; + + hptr = hstr; + for (i = 0; i < SHA256_DIGEST_SIZE; i++) + hptr += sprintf(hptr, "%02x", hash[i]); + + written = sysfs_emit(buf, "%s\n", hstr); + + kfree(hstr); + return written; +} + +static DEVICE_ATTR_RO(fw_buf_checksum); + static struct attribute *cxl_mock_mem_attrs[] = { &dev_attr_security_lock.attr, &dev_attr_event_trigger.attr, + &dev_attr_fw_buf_checksum.attr, NULL }; ATTRIBUTE_GROUPS(cxl_mock_mem); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 284416527644..1a61e68e3095 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -139,7 +139,7 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_port(port->uport)) + if (ops && ops->is_mock_port(port->uport_dev)) cxlhdm = ops->devm_cxl_setup_hdm(port, info); else cxlhdm = devm_cxl_setup_hdm(port, info); @@ -149,27 +149,12 @@ struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port, } EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); -int __wrap_devm_cxl_enable_hdm(struct cxl_port *port, struct cxl_hdm *cxlhdm) -{ - int index, rc; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_port(port->uport)) - rc = 0; - else - rc = devm_cxl_enable_hdm(port, cxlhdm); - put_cxl_mock_ops(index); - - return rc; -} -EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enable_hdm, CXL); - int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) { int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_port(port->uport)) + if (ops && ops->is_mock_port(port->uport_dev)) rc = ops->devm_cxl_add_passthrough_decoder(port); else rc = devm_cxl_add_passthrough_decoder(port); @@ -186,7 +171,7 @@ int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm, struct cxl_port *port = cxlhdm->port; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_port(port->uport)) + if (ops && ops->is_mock_port(port->uport_dev)) rc = ops->devm_cxl_enumerate_decoders(cxlhdm, info); else rc = devm_cxl_enumerate_decoders(cxlhdm, info); @@ -201,7 +186,7 @@ int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) int rc, index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - if (ops && ops->is_mock_port(port->uport)) + if (ops && ops->is_mock_port(port->uport_dev)) rc = ops->devm_cxl_port_enumerate_dports(port); else rc = devm_cxl_port_enumerate_dports(port); @@ -259,24 +244,46 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dvsec_rr_decode, CXL); -resource_size_t __wrap_cxl_rcrb_to_component(struct device *dev, - resource_size_t rcrb, - enum cxl_rcrb which) +struct cxl_dport *__wrap_devm_cxl_add_rch_dport(struct cxl_port *port, + struct device *dport_dev, + int port_id, + resource_size_t rcrb) +{ + int index; + struct cxl_dport *dport; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(dport_dev)) { + dport = devm_cxl_add_dport(port, dport_dev, port_id, + CXL_RESOURCE_NONE); + if (!IS_ERR(dport)) { + dport->rcrb.base = rcrb; + dport->rch = true; + } + } else + dport = devm_cxl_add_rch_dport(port, dport_dev, port_id, rcrb); + put_cxl_mock_ops(index); + + return dport; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_rch_dport, CXL); + +resource_size_t __wrap_cxl_rcd_component_reg_phys(struct device *dev, + struct cxl_dport *dport) { int index; resource_size_t component_reg_phys; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (ops && ops->is_mock_port(dev)) - component_reg_phys = - ops->cxl_rcrb_to_component(dev, rcrb, which); + component_reg_phys = CXL_RESOURCE_NONE; else - component_reg_phys = cxl_rcrb_to_component(dev, rcrb, which); + component_reg_phys = cxl_rcd_component_reg_phys(dev, dport); put_cxl_mock_ops(index); return component_reg_phys; } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcrb_to_component, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_rcd_component_reg_phys, CXL); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index bef8817b01f2..a94223750346 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -15,9 +15,6 @@ struct cxl_mock_ops { acpi_string pathname, struct acpi_object_list *arguments, unsigned long long *data); - resource_size_t (*cxl_rcrb_to_component)(struct device *dev, - resource_size_t rcrb, - enum cxl_rcrb which); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); bool (*is_mock_bus)(struct pci_bus *bus); bool (*is_mock_port)(struct device *dev); diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index f990cbb73250..3bf506d4a63c 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -9,6 +9,8 @@ CONFIG_KUNIT=y CONFIG_KUNIT_EXAMPLE_TEST=y CONFIG_KUNIT_ALL_TESTS=y +CONFIG_FORTIFY_SOURCE=y + CONFIG_IIO=y CONFIG_EXT4_FS=y @@ -31,5 +33,12 @@ CONFIG_DAMON_PADDR=y CONFIG_DEBUG_FS=y CONFIG_DAMON_DBGFS=y +CONFIG_REGMAP_BUILD=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y + +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SOC=y +CONFIG_SND_SOC_TOPOLOGY_BUILD=y diff --git a/tools/testing/kunit/configs/arch_uml.config b/tools/testing/kunit/configs/arch_uml.config index e824ce43b05a..54ad8972681a 100644 --- a/tools/testing/kunit/configs/arch_uml.config +++ b/tools/testing/kunit/configs/arch_uml.config @@ -3,3 +3,6 @@ # Enable virtio/pci, as a lot of tests require it. CONFIG_VIRTIO_UML=y CONFIG_UML_PCI_OVER_VIRTIO=y + +# Enable FORTIFY_SOURCE for wider checking. +CONFIG_FORTIFY_SOURCE=y diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 3905c43369c3..bc74088c458a 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -55,8 +55,12 @@ class KunitExecRequest(KunitParseRequest): build_dir: str timeout: int filter_glob: str + filter: str + filter_action: Optional[str] kernel_args: Optional[List[str]] run_isolated: Optional[str] + list_tests: bool + list_tests_attr: bool @dataclass class KunitRequest(KunitExecRequest, KunitBuildRequest): @@ -102,19 +106,41 @@ def config_and_build_tests(linux: kunit_kernel.LinuxSourceTree, def _list_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> List[str]: args = ['kunit.action=list'] + + if request.kernel_args: + args.extend(request.kernel_args) + + output = linux.run_kernel(args=args, + timeout=request.timeout, + filter_glob=request.filter_glob, + filter=request.filter, + filter_action=request.filter_action, + build_dir=request.build_dir) + lines = kunit_parser.extract_tap_lines(output) + # Hack! Drop the dummy TAP version header that the executor prints out. + lines.pop() + + # Filter out any extraneous non-test output that might have gotten mixed in. + return [l for l in output if re.match(r'^[^\s.]+\.[^\s.]+$', l)] + +def _list_tests_attr(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> Iterable[str]: + args = ['kunit.action=list_attr'] + if request.kernel_args: args.extend(request.kernel_args) output = linux.run_kernel(args=args, timeout=request.timeout, filter_glob=request.filter_glob, + filter=request.filter, + filter_action=request.filter_action, build_dir=request.build_dir) lines = kunit_parser.extract_tap_lines(output) # Hack! Drop the dummy TAP version header that the executor prints out. lines.pop() # Filter out any extraneous non-test output that might have gotten mixed in. - return [l for l in lines if re.match(r'^[^\s.]+\.[^\s.]+$', l)] + return lines def _suites_from_test_list(tests: List[str]) -> List[str]: """Extracts all the suites from an ordered list of tests.""" @@ -128,10 +154,18 @@ def _suites_from_test_list(tests: List[str]) -> List[str]: suites.append(suite) return suites - - def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) -> KunitResult: filter_globs = [request.filter_glob] + if request.list_tests: + output = _list_tests(linux, request) + for line in output: + print(line.rstrip()) + return KunitResult(status=KunitStatus.SUCCESS, elapsed_time=0.0) + if request.list_tests_attr: + attr_output = _list_tests_attr(linux, request) + for line in attr_output: + print(line.rstrip()) + return KunitResult(status=KunitStatus.SUCCESS, elapsed_time=0.0) if request.run_isolated: tests = _list_tests(linux, request) if request.run_isolated == 'test': @@ -155,6 +189,8 @@ def exec_tests(linux: kunit_kernel.LinuxSourceTree, request: KunitExecRequest) - args=request.kernel_args, timeout=request.timeout, filter_glob=filter_glob, + filter=request.filter, + filter_action=request.filter_action, build_dir=request.build_dir) _, test_result = parse_tests(request, metadata, run_result) @@ -341,6 +377,16 @@ def add_exec_opts(parser: argparse.ArgumentParser) -> None: nargs='?', default='', metavar='filter_glob') + parser.add_argument('--filter', + help='Filter KUnit tests with attributes, ' + 'e.g. module=example or speed>slow', + type=str, + default='') + parser.add_argument('--filter_action', + help='If set to skip, filtered tests will be skipped, ' + 'e.g. --filter_action=skip. Otherwise they will not run.', + type=str, + choices=['skip']) parser.add_argument('--kernel_args', help='Kernel command-line parameters. Maybe be repeated', action='append', metavar='') @@ -350,6 +396,12 @@ def add_exec_opts(parser: argparse.ArgumentParser) -> None: 'what ran before it.', type=str, choices=['suite', 'test']) + parser.add_argument('--list_tests', help='If set, list all tests that will be ' + 'run.', + action='store_true') + parser.add_argument('--list_tests_attr', help='If set, list all tests and test ' + 'attributes.', + action='store_true') def add_parse_opts(parser: argparse.ArgumentParser) -> None: parser.add_argument('--raw_output', help='If set don\'t parse output from kernel. ' @@ -398,8 +450,12 @@ def run_handler(cli_args: argparse.Namespace) -> None: json=cli_args.json, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, + filter=cli_args.filter, + filter_action=cli_args.filter_action, kernel_args=cli_args.kernel_args, - run_isolated=cli_args.run_isolated) + run_isolated=cli_args.run_isolated, + list_tests=cli_args.list_tests, + list_tests_attr=cli_args.list_tests_attr) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) @@ -441,8 +497,12 @@ def exec_handler(cli_args: argparse.Namespace) -> None: json=cli_args.json, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, + filter=cli_args.filter, + filter_action=cli_args.filter_action, kernel_args=cli_args.kernel_args, - run_isolated=cli_args.run_isolated) + run_isolated=cli_args.run_isolated, + list_tests=cli_args.list_tests, + list_tests_attr=cli_args.list_tests_attr) result = exec_tests(linux, exec_request) stdout.print_with_timestamp(( 'Elapsed time: %.3fs\n') % (result.elapsed_time)) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index f01f94106129..0b6488efed47 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -92,7 +92,7 @@ class LinuxSourceTreeOperations: if stderr: # likely only due to build warnings print(stderr.decode()) - def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]: + def start(self, params: List[str], build_dir: str) -> subprocess.Popen: raise RuntimeError('not implemented!') @@ -113,7 +113,7 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): kconfig.merge_in_entries(base_kunitconfig) return kconfig - def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]: + def start(self, params: List[str], build_dir: str) -> subprocess.Popen: kernel_path = os.path.join(build_dir, self._kernel_path) qemu_command = ['qemu-system-' + self._qemu_arch, '-nodefaults', @@ -142,7 +142,7 @@ class LinuxSourceTreeOperationsUml(LinuxSourceTreeOperations): kconfig.merge_in_entries(base_kunitconfig) return kconfig - def start(self, params: List[str], build_dir: str) -> subprocess.Popen[str]: + def start(self, params: List[str], build_dir: str) -> subprocess.Popen: """Runs the Linux UML binary. Must be named 'linux'.""" linux_bin = os.path.join(build_dir, 'linux') params.extend(['mem=1G', 'console=tty', 'kunit_shutdown=halt']) @@ -330,11 +330,15 @@ class LinuxSourceTree: return False return self.validate_config(build_dir) - def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', timeout: Optional[int]=None) -> Iterator[str]: + def run_kernel(self, args: Optional[List[str]]=None, build_dir: str='', filter_glob: str='', filter: str='', filter_action: Optional[str]=None, timeout: Optional[int]=None) -> Iterator[str]: if not args: args = [] if filter_glob: - args.append('kunit.filter_glob='+filter_glob) + args.append('kunit.filter_glob=' + filter_glob) + if filter: + args.append('kunit.filter="' + filter + '"') + if filter_action: + args.append('kunit.filter_action=' + filter_action) args.append('kunit.enable=1') process = self._ops.start(args, build_dir) diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index fbc094f0567e..79d8832c862a 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -212,6 +212,7 @@ KTAP_START = re.compile(r'\s*KTAP version ([0-9]+)$') TAP_START = re.compile(r'\s*TAP version ([0-9]+)$') KTAP_END = re.compile(r'\s*(List of all partitions:|' 'Kernel panic - not syncing: VFS:|reboot: System halted)') +EXECUTOR_ERROR = re.compile(r'\s*kunit executor: (.*)$') def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream: """Extracts KTAP lines from the kernel output.""" @@ -242,6 +243,8 @@ def extract_tap_lines(kernel_output: Iterable[str]) -> LineStream: # remove the prefix, if any. line = line[prefix_len:] yield line_num, line + elif EXECUTOR_ERROR.search(line): + yield line_num, line return LineStream(lines=isolate_ktap_output(kernel_output)) KTAP_VERSIONS = [1] @@ -447,7 +450,7 @@ def parse_diagnostic(lines: LineStream) -> List[str]: Log of diagnostic lines """ log = [] # type: List[str] - non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START] + non_diagnostic_lines = [TEST_RESULT, TEST_HEADER, KTAP_START, TAP_START] while lines and not any(re.match(lines.peek()) for re in non_diagnostic_lines): log.append(lines.pop()) @@ -713,6 +716,11 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: """ test = Test() test.log.extend(log) + + # Parse any errors prior to parsing tests + err_log = parse_diagnostic(lines) + test.log.extend(err_log) + if not is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan @@ -774,6 +782,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # Don't override a bad status if this test had one reported. # Assumption: no subtests means CRASHED is from Test.__init__() if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS): + print_log(test.log) test.status = TestStatus.NO_TESTS test.add_error('0 tests run!') diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index be35999bb84f..b28c1510be2e 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -597,7 +597,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 0) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='', timeout=300) + args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_passes_args_pass(self): @@ -605,7 +605,7 @@ class KUnitMainTest(unittest.TestCase): self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.assertEqual(self.linux_source_mock.run_kernel.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='', timeout=300) + args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_exec_passes_args_fail(self): @@ -629,7 +629,7 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run']) self.assertEqual(e.exception.code, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='', timeout=300) + args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300) self.print_mock.assert_any_call(StrContains(' 0 tests run!')) def test_exec_raw_output(self): @@ -670,13 +670,13 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) kunit.main(['run', '--raw_output', 'filter_glob']) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300) + args=None, build_dir='.kunit', filter_glob='filter_glob', filter='', filter_action=None, timeout=300) def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)]) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='', timeout=timeout) + args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_timeout(self): @@ -684,7 +684,7 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run', '--timeout', str(timeout)]) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir='.kunit', filter_glob='', timeout=timeout) + args=None, build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=timeout) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_builddir(self): @@ -692,7 +692,7 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run', '--build_dir=.kunit']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir=build_dir, filter_glob='', timeout=300) + args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_config_builddir(self): @@ -710,7 +710,7 @@ class KUnitMainTest(unittest.TestCase): build_dir = '.kunit' kunit.main(['exec', '--build_dir', build_dir]) self.linux_source_mock.run_kernel.assert_called_once_with( - args=None, build_dir=build_dir, filter_glob='', timeout=300) + args=None, build_dir=build_dir, filter_glob='', filter='', filter_action=None, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_run_kunitconfig(self): @@ -786,7 +786,7 @@ class KUnitMainTest(unittest.TestCase): kunit.main(['run', '--kernel_args=a=1', '--kernel_args=b=2']) self.assertEqual(self.linux_source_mock.build_reconfig.call_count, 1) self.linux_source_mock.run_kernel.assert_called_once_with( - args=['a=1','b=2'], build_dir='.kunit', filter_glob='', timeout=300) + args=['a=1','b=2'], build_dir='.kunit', filter_glob='', filter='', filter_action=None, timeout=300) self.print_mock.assert_any_call(StrContains('Testing complete.')) def test_list_tests(self): @@ -794,13 +794,11 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', None, 'suite')) - + kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) self.assertEqual(got, want) # Should respect the user's filter glob when listing tests. self.linux_source_mock.run_kernel.assert_called_once_with( - args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', timeout=300) - + args=['kunit.action=list'], build_dir='.kunit', filter_glob='suite*', filter='', filter_action=None, timeout=300) @mock.patch.object(kunit, '_list_tests') def test_run_isolated_by_suite(self, mock_tests): @@ -809,10 +807,10 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', None, 'suite')) + kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ - mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', timeout=300), - mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', filter='', filter_action=None, timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', filter='', filter_action=None, timeout=300), ]) @mock.patch.object(kunit, '_list_tests') @@ -822,13 +820,12 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', None, 'test')) + kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ - mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', timeout=300), - mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', timeout=300), - mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', filter='', filter_action=None, timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', filter='', filter_action=None, timeout=300), + mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test1', filter='', filter_action=None, timeout=300), ]) - if __name__ == '__main__': unittest.main() diff --git a/tools/testing/kunit/mypy.ini b/tools/testing/kunit/mypy.ini new file mode 100644 index 000000000000..ddd288309efa --- /dev/null +++ b/tools/testing/kunit/mypy.ini @@ -0,0 +1,6 @@ +[mypy] +strict = True + +# E.g. we can't write subprocess.Popen[str] until Python 3.9+. +# But kunit.py tries to support Python 3.7+, so let's disable it. +disable_error_code = type-arg diff --git a/tools/testing/kunit/qemu_configs/arm64.py b/tools/testing/kunit/qemu_configs/arm64.py index 67d04064f785..d3ff27024755 100644 --- a/tools/testing/kunit/qemu_configs/arm64.py +++ b/tools/testing/kunit/qemu_configs/arm64.py @@ -9,4 +9,4 @@ CONFIG_SERIAL_AMBA_PL011_CONSOLE=y''', qemu_arch='aarch64', kernel_path='arch/arm64/boot/Image.gz', kernel_command_line='console=ttyAMA0', - extra_qemu_params=['-machine', 'virt', '-cpu', 'cortex-a57']) + extra_qemu_params=['-machine', 'virt', '-cpu', 'max,pauth-impdef=on']) diff --git a/tools/testing/kunit/run_checks.py b/tools/testing/kunit/run_checks.py index 8208c3b3135e..c6d494ea3373 100755 --- a/tools/testing/kunit/run_checks.py +++ b/tools/testing/kunit/run_checks.py @@ -23,7 +23,7 @@ commands: Dict[str, Sequence[str]] = { 'kunit_tool_test.py': ['./kunit_tool_test.py'], 'kunit smoke test': ['./kunit.py', 'run', '--kunitconfig=lib/kunit', '--build_dir=kunit_run_checks'], 'pytype': ['/bin/sh', '-c', 'pytype *.py'], - 'mypy': ['mypy', '--strict', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'], + 'mypy': ['mypy', '--config-file', 'mypy.ini', '--exclude', '_test.py$', '--exclude', 'qemu_configs/', '.'], } # The user might not have mypy or pytype installed, skip them if so. diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index fdb7f5db7308..f6c6e5474c3a 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -20,4 +20,8 @@ void memblock_free_pages(struct page *page, unsigned long pfn, { } +static inline void accept_memory(phys_addr_t start, phys_addr_t end) +{ +} + #endif diff --git a/tools/testing/memblock/mmzone.c b/tools/testing/memblock/mmzone.c index 7b0909e8b759..d3d58851864e 100644 --- a/tools/testing/memblock/mmzone.c +++ b/tools/testing/memblock/mmzone.c @@ -11,7 +11,7 @@ struct pglist_data *next_online_pgdat(struct pglist_data *pgdat) return NULL; } -void reserve_bootmem_region(phys_addr_t start, phys_addr_t end) +void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid) { } diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c index 49ef68cccd6f..49bb416d34ff 100644 --- a/tools/testing/memblock/tests/alloc_nid_api.c +++ b/tools/testing/memblock/tests/alloc_nid_api.c @@ -2494,6 +2494,35 @@ static int alloc_nid_numa_split_all_reserved_generic_check(void) return 0; } +/* + * A simple test that tries to allocate a memory region through the + * memblock_alloc_node() on a NUMA node with id `nid`. Expected to have the + * correct NUMA node set for the new region. + */ +static int alloc_node_on_correct_nid(void) +{ + int nid_req = 2; + void *allocated_ptr = NULL; +#ifdef CONFIG_NUMA + struct memblock_region *req_node = &memblock.memory.regions[nid_req]; +#endif + phys_addr_t size = SZ_512; + + PREFIX_PUSH(); + setup_numa_memblock(node_fractions); + + allocated_ptr = memblock_alloc_node(size, SMP_CACHE_BYTES, nid_req); + + ASSERT_NE(allocated_ptr, NULL); +#ifdef CONFIG_NUMA + ASSERT_EQ(nid_req, req_node->nid); +#endif + + test_pass_pop(); + + return 0; +} + /* Test case wrappers for NUMA tests */ static int alloc_nid_numa_simple_check(void) { @@ -2632,6 +2661,15 @@ static int alloc_nid_numa_split_all_reserved_check(void) return 0; } +static int alloc_node_numa_on_correct_nid(void) +{ + test_print("\tRunning %s...\n", __func__); + run_top_down(alloc_node_on_correct_nid); + run_bottom_up(alloc_node_on_correct_nid); + + return 0; +} + int __memblock_alloc_nid_numa_checks(void) { test_print("Running %s NUMA tests...\n", @@ -2652,6 +2690,8 @@ int __memblock_alloc_nid_numa_checks(void) alloc_nid_numa_reserved_full_merge_check(); alloc_nid_numa_split_all_reserved_check(); + alloc_node_numa_on_correct_nid(); + return 0; } diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 411647094cc3..57bf2688edfd 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-or-later +#include "basic_api.h" #include <string.h> #include <linux/memblock.h> -#include "basic_api.h" #define EXPECTED_MEMBLOCK_REGIONS 128 #define FUNC_ADD "memblock_add" diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h index 4f23302ee677..b5ec59aa62d7 100644 --- a/tools/testing/memblock/tests/common.h +++ b/tools/testing/memblock/tests/common.h @@ -5,6 +5,7 @@ #include <stdlib.h> #include <assert.h> #include <linux/types.h> +#include <linux/seq_file.h> #include <linux/memblock.h> #include <linux/sizes.h> #include <linux/printk.h> diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index e4e2d1650dd5..005043bd9623 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -3240,11 +3240,6 @@ static int nfit_test_probe(struct platform_device *pdev) return 0; } -static int nfit_test_remove(struct platform_device *pdev) -{ - return 0; -} - static void nfit_test_release(struct device *dev) { struct nfit_test *nfit_test = to_nfit_test(dev); @@ -3259,7 +3254,6 @@ static const struct platform_device_id nfit_test_id[] = { static struct platform_driver nfit_test_driver = { .probe = nfit_test_probe, - .remove = nfit_test_remove, .driver = { .name = KBUILD_MODNAME, }, diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h index b5f7a996c4d0..b00583d1eace 100644 --- a/tools/testing/nvdimm/test/nfit_test.h +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -207,7 +207,36 @@ typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); typedef union acpi_object *(*nfit_test_evaluate_dsm_fn)(acpi_handle handle, const guid_t *guid, u64 rev, u64 func, union acpi_object *argv4); +void __iomem *__wrap_devm_ioremap(struct device *dev, + resource_size_t offset, unsigned long size); +void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags); +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); +pfn_t __wrap_phys_to_pfn_t(phys_addr_t addr, unsigned long flags); +void *__wrap_memremap(resource_size_t offset, size_t size, + unsigned long flags); +void __wrap_devm_memunmap(struct device *dev, void *addr); +void __iomem *__wrap_ioremap(resource_size_t offset, unsigned long size); +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size); void __wrap_iounmap(volatile void __iomem *addr); +void __wrap_memunmap(void *addr); +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags); +int __wrap_insert_resource(struct resource *parent, struct resource *res); +int __wrap_remove_resource(struct resource *res); +struct resource *__wrap___devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name); +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n); +void __wrap___devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n); +acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path, + struct acpi_object_list *p, struct acpi_buffer *buf); +union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *guid, + u64 rev, u64 func, union acpi_object *argv4); + void nfit_test_setup(nfit_test_lookup_fn lookup, nfit_test_evaluate_dsm_fn evaluate); void nfit_test_teardown(void); diff --git a/tools/testing/radix-tree/linux/init.h b/tools/testing/radix-tree/linux/init.h index 1bb0afc21309..81563c3dfce7 100644 --- a/tools/testing/radix-tree/linux/init.h +++ b/tools/testing/radix-tree/linux/init.h @@ -1 +1,2 @@ #define __init +#define __exit diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 9286d3baa12d..e5da1cad70ba 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -14,6 +14,7 @@ #include "test.h" #include <stdlib.h> #include <time.h> +#include "linux/init.h" #define module_init(x) #define module_exit(x) @@ -22,7 +23,6 @@ #define dump_stack() assert(0) #include "../../../lib/maple_tree.c" -#undef CONFIG_DEBUG_MAPLE_TREE #include "../../../lib/test_maple_tree.c" #define RCU_RANGE_COUNT 1000 @@ -45,6 +45,13 @@ struct rcu_test_struct2 { unsigned long last[RCU_RANGE_COUNT]; }; +struct rcu_test_struct3 { + struct maple_tree *mt; + unsigned long index; + unsigned long last; + bool stop; +}; + struct rcu_reader_struct { unsigned int id; int mod; @@ -81,7 +88,7 @@ static void check_mas_alloc_node_count(struct ma_state *mas) * check_new_node() - Check the creation of new nodes and error path * verification. */ -static noinline void check_new_node(struct maple_tree *mt) +static noinline void __init check_new_node(struct maple_tree *mt) { struct maple_node *mn, *mn2, *mn3; @@ -206,9 +213,9 @@ static noinline void check_new_node(struct maple_tree *mt) e = i - 1; } else { if (i >= 4) - e = i - 4; - else if (i == 3) - e = i - 2; + e = i - 3; + else if (i >= 1) + e = i - 1; else e = 0; } @@ -455,7 +462,7 @@ static noinline void check_new_node(struct maple_tree *mt) /* * Check erasing including RCU. */ -static noinline void check_erase(struct maple_tree *mt, unsigned long index, +static noinline void __init check_erase(struct maple_tree *mt, unsigned long index, void *ptr) { MT_BUG_ON(mt, mtree_test_erase(mt, index) != ptr); @@ -465,24 +472,24 @@ static noinline void check_erase(struct maple_tree *mt, unsigned long index, #define erase_check_insert(mt, i) check_insert(mt, set[i], entry[i%2]) #define erase_check_erase(mt, i) check_erase(mt, set[i], entry[i%2]) -static noinline void check_erase_testset(struct maple_tree *mt) +static noinline void __init check_erase_testset(struct maple_tree *mt) { - unsigned long set[] = { 5015, 5014, 5017, 25, 1000, - 1001, 1002, 1003, 1005, 0, - 6003, 6002, 6008, 6012, 6015, - 7003, 7002, 7008, 7012, 7015, - 8003, 8002, 8008, 8012, 8015, - 9003, 9002, 9008, 9012, 9015, - 10003, 10002, 10008, 10012, 10015, - 11003, 11002, 11008, 11012, 11015, - 12003, 12002, 12008, 12012, 12015, - 13003, 13002, 13008, 13012, 13015, - 14003, 14002, 14008, 14012, 14015, - 15003, 15002, 15008, 15012, 15015, - }; - - - void *ptr = &set; + static const unsigned long set[] = { 5015, 5014, 5017, 25, 1000, + 1001, 1002, 1003, 1005, 0, + 6003, 6002, 6008, 6012, 6015, + 7003, 7002, 7008, 7012, 7015, + 8003, 8002, 8008, 8012, 8015, + 9003, 9002, 9008, 9012, 9015, + 10003, 10002, 10008, 10012, 10015, + 11003, 11002, 11008, 11012, 11015, + 12003, 12002, 12008, 12012, 12015, + 13003, 13002, 13008, 13012, 13015, + 14003, 14002, 14008, 14012, 14015, + 15003, 15002, 15008, 15012, 15015, + }; + + + void *ptr = &check_erase_testset; void *entry[2] = { ptr, mt }; void *root_node; @@ -739,7 +746,7 @@ static noinline void check_erase_testset(struct maple_tree *mt) int mas_ce2_over_count(struct ma_state *mas_start, struct ma_state *mas_end, void *s_entry, unsigned long s_min, void *e_entry, unsigned long e_max, - unsigned long *set, int i, bool null_entry) + const unsigned long *set, int i, bool null_entry) { int count = 0, span = 0; unsigned long retry = 0; @@ -969,8 +976,8 @@ retry: } #if defined(CONFIG_64BIT) -static noinline void check_erase2_testset(struct maple_tree *mt, - unsigned long *set, unsigned long size) +static noinline void __init check_erase2_testset(struct maple_tree *mt, + const unsigned long *set, unsigned long size) { int entry_count = 0; int check = 0; @@ -1054,7 +1061,7 @@ static noinline void check_erase2_testset(struct maple_tree *mt, if (entry_count) MT_BUG_ON(mt, !mt_height(mt)); #if check_erase2_debug > 1 - mt_dump(mt); + mt_dump(mt, mt_dump_hex); #endif #if check_erase2_debug pr_err("Done\n"); @@ -1085,7 +1092,7 @@ static noinline void check_erase2_testset(struct maple_tree *mt, mas_for_each(&mas, foo, ULONG_MAX) { if (xa_is_zero(foo)) { if (addr == mas.index) { - mt_dump(mas.tree); + mt_dump(mas.tree, mt_dump_hex); pr_err("retry failed %lu - %lu\n", mas.index, mas.last); MT_BUG_ON(mt, 1); @@ -1114,11 +1121,11 @@ static noinline void check_erase2_testset(struct maple_tree *mt, /* These tests were pulled from KVM tree modifications which failed. */ -static noinline void check_erase2_sets(struct maple_tree *mt) +static noinline void __init check_erase2_sets(struct maple_tree *mt) { void *entry; unsigned long start = 0; - unsigned long set[] = { + static const unsigned long set[] = { STORE, 140737488347136, 140737488351231, STORE, 140721266458624, 140737488351231, ERASE, 140721266458624, 140737488351231, @@ -1136,7 +1143,7 @@ ERASE, 140253902692352, 140253902864383, STORE, 140253902692352, 140253902696447, STORE, 140253902696448, 140253902864383, }; - unsigned long set2[] = { + static const unsigned long set2[] = { STORE, 140737488347136, 140737488351231, STORE, 140735933583360, 140737488351231, ERASE, 140735933583360, 140737488351231, @@ -1160,7 +1167,7 @@ STORE, 140277094813696, 140277094821887, STORE, 140277094821888, 140277094825983, STORE, 140735933906944, 140735933911039, }; - unsigned long set3[] = { + static const unsigned long set3[] = { STORE, 140737488347136, 140737488351231, STORE, 140735790264320, 140737488351231, ERASE, 140735790264320, 140737488351231, @@ -1203,7 +1210,7 @@ STORE, 47135835840512, 47135835885567, STORE, 47135835885568, 47135835893759, }; - unsigned long set4[] = { + static const unsigned long set4[] = { STORE, 140737488347136, 140737488351231, STORE, 140728251703296, 140737488351231, ERASE, 140728251703296, 140737488351231, @@ -1224,7 +1231,7 @@ ERASE, 47646523277312, 47646523445247, STORE, 47646523277312, 47646523400191, }; - unsigned long set5[] = { + static const unsigned long set5[] = { STORE, 140737488347136, 140737488351231, STORE, 140726874062848, 140737488351231, ERASE, 140726874062848, 140737488351231, @@ -1357,7 +1364,7 @@ STORE, 47884791619584, 47884791623679, STORE, 47884791623680, 47884791627775, }; - unsigned long set6[] = { + static const unsigned long set6[] = { STORE, 140737488347136, 140737488351231, STORE, 140722999021568, 140737488351231, ERASE, 140722999021568, 140737488351231, @@ -1489,7 +1496,7 @@ ERASE, 47430432014336, 47430432022527, STORE, 47430432014336, 47430432018431, STORE, 47430432018432, 47430432022527, }; - unsigned long set7[] = { + static const unsigned long set7[] = { STORE, 140737488347136, 140737488351231, STORE, 140729808330752, 140737488351231, ERASE, 140729808330752, 140737488351231, @@ -1621,7 +1628,7 @@ ERASE, 47439987130368, 47439987138559, STORE, 47439987130368, 47439987134463, STORE, 47439987134464, 47439987138559, }; - unsigned long set8[] = { + static const unsigned long set8[] = { STORE, 140737488347136, 140737488351231, STORE, 140722482974720, 140737488351231, ERASE, 140722482974720, 140737488351231, @@ -1754,7 +1761,7 @@ STORE, 47708488638464, 47708488642559, STORE, 47708488642560, 47708488646655, }; - unsigned long set9[] = { + static const unsigned long set9[] = { STORE, 140737488347136, 140737488351231, STORE, 140736427839488, 140737488351231, ERASE, 140736427839488, 140736427839488, @@ -5620,7 +5627,7 @@ ERASE, 47906195480576, 47906195480576, STORE, 94641242615808, 94641242750975, }; - unsigned long set10[] = { + static const unsigned long set10[] = { STORE, 140737488347136, 140737488351231, STORE, 140736427839488, 140737488351231, ERASE, 140736427839488, 140736427839488, @@ -9484,7 +9491,7 @@ STORE, 139726599680000, 139726599684095, ERASE, 47906195480576, 47906195480576, STORE, 94641242615808, 94641242750975, }; - unsigned long set11[] = { + static const unsigned long set11[] = { STORE, 140737488347136, 140737488351231, STORE, 140732658499584, 140737488351231, ERASE, 140732658499584, 140732658499584, @@ -9510,7 +9517,7 @@ STORE, 140732658565120, 140732658569215, STORE, 140732658552832, 140732658565119, }; - unsigned long set12[] = { /* contains 12 values. */ + static const unsigned long set12[] = { /* contains 12 values. */ STORE, 140737488347136, 140737488351231, STORE, 140732658499584, 140737488351231, ERASE, 140732658499584, 140732658499584, @@ -9537,7 +9544,7 @@ STORE, 140732658552832, 140732658565119, STORE, 140014592741375, 140014592741375, /* contrived */ STORE, 140014592733184, 140014592741376, /* creates first entry retry. */ }; - unsigned long set13[] = { + static const unsigned long set13[] = { STORE, 140373516247040, 140373516251135,/*: ffffa2e7b0e10d80 */ STORE, 140373516251136, 140373516255231,/*: ffffa2e7b1195d80 */ STORE, 140373516255232, 140373516443647,/*: ffffa2e7b0e109c0 */ @@ -9550,7 +9557,7 @@ STORE, 140373518684160, 140373518688254,/*: ffffa2e7b05fec00 */ STORE, 140373518688256, 140373518692351,/*: ffffa2e7bfbdcd80 */ STORE, 140373518692352, 140373518696447,/*: ffffa2e7b0749e40 */ }; - unsigned long set14[] = { + static const unsigned long set14[] = { STORE, 140737488347136, 140737488351231, STORE, 140731667996672, 140737488351231, SNULL, 140731668000767, 140737488351231, @@ -9834,7 +9841,7 @@ SNULL, 139826136543232, 139826136809471, STORE, 139826136809472, 139826136842239, STORE, 139826136543232, 139826136809471, }; - unsigned long set15[] = { + static const unsigned long set15[] = { STORE, 140737488347136, 140737488351231, STORE, 140722061451264, 140737488351231, SNULL, 140722061455359, 140737488351231, @@ -10119,7 +10126,7 @@ STORE, 139906808958976, 139906808991743, STORE, 139906808692736, 139906808958975, }; - unsigned long set16[] = { + static const unsigned long set16[] = { STORE, 94174808662016, 94174809321471, STORE, 94174811414528, 94174811426815, STORE, 94174811426816, 94174811430911, @@ -10330,7 +10337,7 @@ STORE, 139921865613312, 139921865617407, STORE, 139921865547776, 139921865564159, }; - unsigned long set17[] = { + static const unsigned long set17[] = { STORE, 94397057224704, 94397057646591, STORE, 94397057650688, 94397057691647, STORE, 94397057691648, 94397057695743, @@ -10392,7 +10399,7 @@ STORE, 140720477511680, 140720477646847, STORE, 140720478302208, 140720478314495, STORE, 140720478314496, 140720478318591, }; - unsigned long set18[] = { + static const unsigned long set18[] = { STORE, 140737488347136, 140737488351231, STORE, 140724953673728, 140737488351231, SNULL, 140724953677823, 140737488351231, @@ -10425,7 +10432,7 @@ STORE, 140222970597376, 140222970605567, ERASE, 140222970597376, 140222970605567, STORE, 140222970597376, 140222970605567, }; - unsigned long set19[] = { + static const unsigned long set19[] = { STORE, 140737488347136, 140737488351231, STORE, 140725182459904, 140737488351231, SNULL, 140725182463999, 140737488351231, @@ -10694,7 +10701,7 @@ STORE, 140656836775936, 140656836780031, STORE, 140656787476480, 140656791920639, ERASE, 140656774639616, 140656779083775, }; - unsigned long set20[] = { + static const unsigned long set20[] = { STORE, 140737488347136, 140737488351231, STORE, 140735952392192, 140737488351231, SNULL, 140735952396287, 140737488351231, @@ -10850,7 +10857,7 @@ STORE, 140590386819072, 140590386823167, STORE, 140590386823168, 140590386827263, SNULL, 140590376591359, 140590376595455, }; - unsigned long set21[] = { + static const unsigned long set21[] = { STORE, 93874710941696, 93874711363583, STORE, 93874711367680, 93874711408639, STORE, 93874711408640, 93874711412735, @@ -10920,7 +10927,7 @@ ERASE, 140708393312256, 140708393316351, ERASE, 140708393308160, 140708393312255, ERASE, 140708393291776, 140708393308159, }; - unsigned long set22[] = { + static const unsigned long set22[] = { STORE, 93951397134336, 93951397183487, STORE, 93951397183488, 93951397728255, STORE, 93951397728256, 93951397826559, @@ -11047,7 +11054,7 @@ STORE, 140551361253376, 140551361519615, ERASE, 140551361253376, 140551361519615, }; - unsigned long set23[] = { + static const unsigned long set23[] = { STORE, 94014447943680, 94014448156671, STORE, 94014450253824, 94014450257919, STORE, 94014450257920, 94014450266111, @@ -14371,7 +14378,7 @@ SNULL, 140175956627455, 140175985139711, STORE, 140175927242752, 140175956627455, STORE, 140175956627456, 140175985139711, }; - unsigned long set24[] = { + static const unsigned long set24[] = { STORE, 140737488347136, 140737488351231, STORE, 140735281639424, 140737488351231, SNULL, 140735281643519, 140737488351231, @@ -15533,7 +15540,7 @@ ERASE, 139635393024000, 139635401412607, ERASE, 139635384627200, 139635384631295, ERASE, 139635384631296, 139635393019903, }; - unsigned long set25[] = { + static const unsigned long set25[] = { STORE, 140737488347136, 140737488351231, STORE, 140737488343040, 140737488351231, STORE, 140722547441664, 140737488351231, @@ -22321,7 +22328,7 @@ STORE, 140249652703232, 140249682087935, STORE, 140249682087936, 140249710600191, }; - unsigned long set26[] = { + static const unsigned long set26[] = { STORE, 140737488347136, 140737488351231, STORE, 140729464770560, 140737488351231, SNULL, 140729464774655, 140737488351231, @@ -22345,7 +22352,7 @@ ERASE, 140109040951296, 140109040959487, STORE, 140109040955392, 140109040959487, ERASE, 140109040955392, 140109040959487, }; - unsigned long set27[] = { + static const unsigned long set27[] = { STORE, 140737488347136, 140737488351231, STORE, 140726128070656, 140737488351231, SNULL, 140726128074751, 140737488351231, @@ -22741,7 +22748,7 @@ STORE, 140415509696512, 140415535910911, ERASE, 140415537422336, 140415562588159, STORE, 140415482433536, 140415509696511, }; - unsigned long set28[] = { + static const unsigned long set28[] = { STORE, 140737488347136, 140737488351231, STORE, 140722475622400, 140737488351231, SNULL, 140722475626495, 140737488351231, @@ -22809,7 +22816,7 @@ STORE, 139918413348864, 139918413352959, ERASE, 139918413316096, 139918413344767, STORE, 93865848528896, 93865848664063, }; - unsigned long set29[] = { + static const unsigned long set29[] = { STORE, 140737488347136, 140737488351231, STORE, 140734467944448, 140737488351231, SNULL, 140734467948543, 140737488351231, @@ -23684,7 +23691,7 @@ ERASE, 140143079972864, 140143088361471, ERASE, 140143205793792, 140143205797887, ERASE, 140143205797888, 140143214186495, }; - unsigned long set30[] = { + static const unsigned long set30[] = { STORE, 140737488347136, 140737488351231, STORE, 140733436743680, 140737488351231, SNULL, 140733436747775, 140737488351231, @@ -24566,7 +24573,7 @@ ERASE, 140165225893888, 140165225897983, ERASE, 140165225897984, 140165234286591, ERASE, 140165058105344, 140165058109439, }; - unsigned long set31[] = { + static const unsigned long set31[] = { STORE, 140737488347136, 140737488351231, STORE, 140730890784768, 140737488351231, SNULL, 140730890788863, 140737488351231, @@ -25379,7 +25386,7 @@ ERASE, 140623906590720, 140623914979327, ERASE, 140622950277120, 140622950281215, ERASE, 140622950281216, 140622958669823, }; - unsigned long set32[] = { + static const unsigned long set32[] = { STORE, 140737488347136, 140737488351231, STORE, 140731244212224, 140737488351231, SNULL, 140731244216319, 140737488351231, @@ -26175,7 +26182,7 @@ ERASE, 140400417288192, 140400425676799, ERASE, 140400283066368, 140400283070463, ERASE, 140400283070464, 140400291459071, }; - unsigned long set33[] = { + static const unsigned long set33[] = { STORE, 140737488347136, 140737488351231, STORE, 140734562918400, 140737488351231, SNULL, 140734562922495, 140737488351231, @@ -26317,7 +26324,7 @@ STORE, 140582961786880, 140583003750399, ERASE, 140582961786880, 140583003750399, }; - unsigned long set34[] = { + static const unsigned long set34[] = { STORE, 140737488347136, 140737488351231, STORE, 140731327180800, 140737488351231, SNULL, 140731327184895, 140737488351231, @@ -27198,7 +27205,7 @@ ERASE, 140012522094592, 140012530483199, ERASE, 140012033142784, 140012033146879, ERASE, 140012033146880, 140012041535487, }; - unsigned long set35[] = { + static const unsigned long set35[] = { STORE, 140737488347136, 140737488351231, STORE, 140730536939520, 140737488351231, SNULL, 140730536943615, 140737488351231, @@ -27955,7 +27962,7 @@ ERASE, 140474471936000, 140474480324607, ERASE, 140474396430336, 140474396434431, ERASE, 140474396434432, 140474404823039, }; - unsigned long set36[] = { + static const unsigned long set36[] = { STORE, 140737488347136, 140737488351231, STORE, 140723893125120, 140737488351231, SNULL, 140723893129215, 140737488351231, @@ -28816,7 +28823,7 @@ ERASE, 140121890357248, 140121898745855, ERASE, 140121269587968, 140121269592063, ERASE, 140121269592064, 140121277980671, }; - unsigned long set37[] = { + static const unsigned long set37[] = { STORE, 140737488347136, 140737488351231, STORE, 140722404016128, 140737488351231, SNULL, 140722404020223, 140737488351231, @@ -28942,7 +28949,7 @@ STORE, 139759821246464, 139759888355327, ERASE, 139759821246464, 139759888355327, ERASE, 139759888355328, 139759955464191, }; - unsigned long set38[] = { + static const unsigned long set38[] = { STORE, 140737488347136, 140737488351231, STORE, 140730666221568, 140737488351231, SNULL, 140730666225663, 140737488351231, @@ -29752,7 +29759,7 @@ ERASE, 140613504712704, 140613504716799, ERASE, 140613504716800, 140613513105407, }; - unsigned long set39[] = { + static const unsigned long set39[] = { STORE, 140737488347136, 140737488351231, STORE, 140736271417344, 140737488351231, SNULL, 140736271421439, 140737488351231, @@ -30124,7 +30131,7 @@ STORE, 140325364428800, 140325372821503, STORE, 140325356036096, 140325364428799, SNULL, 140325364432895, 140325372821503, }; - unsigned long set40[] = { + static const unsigned long set40[] = { STORE, 140737488347136, 140737488351231, STORE, 140734309167104, 140737488351231, SNULL, 140734309171199, 140737488351231, @@ -30875,7 +30882,7 @@ ERASE, 140320289300480, 140320289304575, ERASE, 140320289304576, 140320297693183, ERASE, 140320163409920, 140320163414015, }; - unsigned long set41[] = { + static const unsigned long set41[] = { STORE, 140737488347136, 140737488351231, STORE, 140728157171712, 140737488351231, SNULL, 140728157175807, 140737488351231, @@ -31185,7 +31192,7 @@ STORE, 94376135090176, 94376135094271, STORE, 94376135094272, 94376135098367, SNULL, 94376135094272, 94377208836095, }; - unsigned long set42[] = { + static const unsigned long set42[] = { STORE, 314572800, 1388314623, STORE, 1462157312, 1462169599, STORE, 1462169600, 1462185983, @@ -33862,7 +33869,7 @@ SNULL, 3798999040, 3799101439, */ }; - unsigned long set43[] = { + static const unsigned long set43[] = { STORE, 140737488347136, 140737488351231, STORE, 140734187720704, 140737488351231, SNULL, 140734187724800, 140737488351231, @@ -34513,7 +34520,7 @@ static void *rcu_reader_rev(void *ptr) if (mas.index != r_start) { alt = xa_mk_value(index + i * 2 + 1 + RCU_RANGE_COUNT); - mt_dump(test->mt); + mt_dump(test->mt, mt_dump_dec); printk("Error: %lu-%lu %p != %lu-%lu %p %p line %d i %d\n", mas.index, mas.last, entry, r_start, r_end, expected, alt, @@ -34954,6 +34961,70 @@ void run_check_rcu(struct maple_tree *mt, struct rcu_test_struct *vals) MT_BUG_ON(mt, !vals->seen_entry2); } +static void *rcu_slot_store_reader(void *ptr) +{ + struct rcu_test_struct3 *test = ptr; + MA_STATE(mas, test->mt, test->index, test->index); + + rcu_register_thread(); + + rcu_read_lock(); + while (!test->stop) { + mas_walk(&mas); + /* The length of growth to both sides must be equal. */ + RCU_MT_BUG_ON(test, (test->index - mas.index) != + (mas.last - test->last)); + } + rcu_read_unlock(); + + rcu_unregister_thread(); + return NULL; +} + +static noinline void run_check_rcu_slot_store(struct maple_tree *mt) +{ + pthread_t readers[20]; + int range_cnt = 200, i, limit = 10000; + unsigned long len = ULONG_MAX / range_cnt, start, end; + struct rcu_test_struct3 test = {.stop = false, .mt = mt}; + + start = range_cnt / 2 * len; + end = start + len - 1; + test.index = start; + test.last = end; + + for (i = 0; i < range_cnt; i++) { + mtree_store_range(mt, i * len, i * len + len - 1, + xa_mk_value(i * 100), GFP_KERNEL); + } + + mt_set_in_rcu(mt); + MT_BUG_ON(mt, !mt_in_rcu(mt)); + + for (i = 0; i < ARRAY_SIZE(readers); i++) { + if (pthread_create(&readers[i], NULL, rcu_slot_store_reader, + &test)) { + perror("creating reader thread"); + exit(1); + } + } + + usleep(5); + + while (limit--) { + /* Step by step, expand the most middle range to both sides. */ + mtree_store_range(mt, --start, ++end, xa_mk_value(100), + GFP_KERNEL); + } + + test.stop = true; + + while (i--) + pthread_join(readers[i], NULL); + + mt_validate(mt); +} + static noinline void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals) { @@ -34996,7 +35067,7 @@ void run_check_rcu_slowread(struct maple_tree *mt, struct rcu_test_struct *vals) MT_BUG_ON(mt, !vals->seen_entry3); MT_BUG_ON(mt, !vals->seen_both); } -static noinline void check_rcu_simulated(struct maple_tree *mt) +static noinline void __init check_rcu_simulated(struct maple_tree *mt) { unsigned long i, nr_entries = 1000; unsigned long target = 4320; @@ -35157,7 +35228,7 @@ static noinline void check_rcu_simulated(struct maple_tree *mt) rcu_unregister_thread(); } -static noinline void check_rcu_threaded(struct maple_tree *mt) +static noinline void __init check_rcu_threaded(struct maple_tree *mt) { unsigned long i, nr_entries = 1000; struct rcu_test_struct vals; @@ -35206,6 +35277,10 @@ static noinline void check_rcu_threaded(struct maple_tree *mt) run_check_rcu(mt, &vals); mtree_destroy(mt); + /* Check expanding range in RCU mode */ + mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); + run_check_rcu_slot_store(mt); + mtree_destroy(mt); /* Forward writer for rcu stress */ mt_init_flags(mt, MT_FLAGS_ALLOC_RANGE); @@ -35259,6 +35334,7 @@ static void mas_dfs_preorder(struct ma_state *mas) struct maple_enode *prev; unsigned char end, slot = 0; + unsigned long *pivots; if (mas->node == MAS_START) { mas_start(mas); @@ -35291,6 +35367,9 @@ walk_up: mas_ascend(mas); goto walk_up; } + pivots = ma_pivots(mte_to_node(prev), mte_node_type(prev)); + mas->max = mas_safe_pivot(mas, pivots, slot, mte_node_type(prev)); + mas->min = mas_safe_min(mas, pivots, slot); return; done: @@ -35366,7 +35445,7 @@ static void check_dfs_preorder(struct maple_tree *mt) /* End of depth first search tests */ /* Preallocation testing */ -static noinline void check_prealloc(struct maple_tree *mt) +static noinline void __init check_prealloc(struct maple_tree *mt) { unsigned long i, max = 100; unsigned long allocated; @@ -35379,7 +35458,9 @@ static noinline void check_prealloc(struct maple_tree *mt) for (i = 0; i <= max; i++) mtree_test_store_range(mt, i * 10, i * 10 + 5, &i); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Spanning store */ + mas_set_range(&mas, 470, 500); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); @@ -35388,105 +35469,108 @@ static noinline void check_prealloc(struct maple_tree *mt) allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); mn->parent = ma_parent_ptr(mn); ma_free_rcu(mn); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mn = mas_pop_node(&mas); MT_BUG_ON(mt, mas_allocated(&mas) != allocated - 1); mas_push_node(&mas, mn); MT_BUG_ON(mt, mas_allocated(&mas) != allocated); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); mas_destroy(&mas); allocated = mas_allocated(&mas); MT_BUG_ON(mt, allocated != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Slot store does not need allocations */ + mas_set_range(&mas, 6, 9); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); - height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 0); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + + mas_set_range(&mas, 6, 10); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 1); mas_store_prealloc(&mas, ptr); + MT_BUG_ON(mt, mas_allocated(&mas) != 0); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Split */ + mas_set_range(&mas, 54, 54); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); - MT_BUG_ON(mt, allocated == 0); - MT_BUG_ON(mt, allocated != 1 + height * 3); + MT_BUG_ON(mt, allocated != 1 + height * 2); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); mt_set_non_kernel(1); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0); + /* Spanning store */ + mas_set_range(&mas, 1, 100); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated != 0); mas_destroy(&mas); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL) != 0); + /* Spanning store */ + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL) != 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated == 0); MT_BUG_ON(mt, allocated != 1 + height * 3); mas_store_prealloc(&mas, ptr); MT_BUG_ON(mt, mas_allocated(&mas) != 0); + mas_set_range(&mas, 0, 200); mt_set_non_kernel(1); - MT_BUG_ON(mt, mas_preallocate(&mas, GFP_KERNEL & GFP_NOWAIT) == 0); + MT_BUG_ON(mt, mas_preallocate(&mas, ptr, GFP_KERNEL & GFP_NOWAIT) == 0); allocated = mas_allocated(&mas); height = mas_mt_height(&mas); MT_BUG_ON(mt, allocated != 0); @@ -35494,7 +35578,7 @@ static noinline void check_prealloc(struct maple_tree *mt) /* End of preallocation testing */ /* Spanning writes, writes that span nodes and layers of the tree */ -static noinline void check_spanning_write(struct maple_tree *mt) +static noinline void __init check_spanning_write(struct maple_tree *mt) { unsigned long i, max = 5000; MA_STATE(mas, mt, 1200, 2380); @@ -35662,7 +35746,7 @@ static noinline void check_spanning_write(struct maple_tree *mt) /* End of spanning write testing */ /* Writes to a NULL area that are adjacent to other NULLs */ -static noinline void check_null_expand(struct maple_tree *mt) +static noinline void __init check_null_expand(struct maple_tree *mt) { unsigned long i, max = 100; unsigned char data_end; @@ -35723,7 +35807,7 @@ static noinline void check_null_expand(struct maple_tree *mt) /* End of NULL area expansions */ /* Checking for no memory is best done outside the kernel */ -static noinline void check_nomem(struct maple_tree *mt) +static noinline void __init check_nomem(struct maple_tree *mt) { MA_STATE(ms, mt, 1, 1); @@ -35758,7 +35842,7 @@ static noinline void check_nomem(struct maple_tree *mt) mtree_destroy(mt); } -static noinline void check_locky(struct maple_tree *mt) +static noinline void __init check_locky(struct maple_tree *mt) { MA_STATE(ms, mt, 2, 2); MA_STATE(reader, mt, 2, 2); @@ -35780,10 +35864,10 @@ void farmer_tests(void) struct maple_node *node; DEFINE_MTREE(tree); - mt_dump(&tree); + mt_dump(&tree, mt_dump_dec); tree.ma_root = xa_mk_value(0); - mt_dump(&tree); + mt_dump(&tree, mt_dump_dec); node = mt_alloc_one(GFP_KERNEL); node->parent = (void *)((unsigned long)(&tree) | 1); @@ -35793,7 +35877,7 @@ void farmer_tests(void) node->mr64.pivot[1] = 1; node->mr64.pivot[2] = 0; tree.ma_root = mt_mk_node(node, maple_leaf_64); - mt_dump(&tree); + mt_dump(&tree, mt_dump_dec); node->parent = ma_parent_ptr(node); ma_free_rcu(node); diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index e00520cc6349..cffaf2245d4f 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -159,7 +159,7 @@ void multiorder_tagged_iteration(struct xarray *xa) item_kill_tree(xa); } -bool stop_iteration = false; +bool stop_iteration; static void *creator_func(void *ptr) { @@ -201,6 +201,7 @@ static void multiorder_iteration_race(struct xarray *xa) pthread_t worker_thread[num_threads]; int i; + stop_iteration = false; pthread_create(&worker_thread[0], NULL, &creator_func, xa); for (i = 1; i < num_threads; i++) pthread_create(&worker_thread[i], NULL, &iterator_func, xa); @@ -211,6 +212,61 @@ static void multiorder_iteration_race(struct xarray *xa) item_kill_tree(xa); } +static void *load_creator(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order; + struct radix_tree_root *tree = ptr; + int i; + + rcu_register_thread(); + item_insert_order(tree, 3 << RADIX_TREE_MAP_SHIFT, 0); + item_insert_order(tree, 2 << RADIX_TREE_MAP_SHIFT, 0); + for (i = 0; i < 10000; i++) { + for (order = 1; order < RADIX_TREE_MAP_SHIFT; order++) { + unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - + (1 << order); + item_insert_order(tree, index, order); + item_delete_rcu(tree, index); + } + } + rcu_unregister_thread(); + + stop_iteration = true; + return NULL; +} + +static void *load_worker(void *ptr) +{ + unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) - 1; + + rcu_register_thread(); + while (!stop_iteration) { + struct item *item = xa_load(ptr, index); + assert(!xa_is_internal(item)); + } + rcu_unregister_thread(); + + return NULL; +} + +static void load_race(struct xarray *xa) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN) * 4; + pthread_t worker_thread[num_threads]; + int i; + + stop_iteration = false; + pthread_create(&worker_thread[0], NULL, &load_creator, xa); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &load_worker, xa); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(xa); +} + static DEFINE_XARRAY(array); void multiorder_checks(void) @@ -218,12 +274,20 @@ void multiorder_checks(void) multiorder_iteration(&array); multiorder_tagged_iteration(&array); multiorder_iteration_race(&array); + load_race(&array); radix_tree_cpu_dead(0); } -int __weak main(void) +int __weak main(int argc, char **argv) { + int opt; + + while ((opt = getopt(argc, argv, "ls:v")) != -1) { + if (opt == 'v') + test_verbose++; + } + rcu_register_thread(); radix_tree_init(); multiorder_checks(); diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c index a61c7bcbc72d..63f468bf8245 100644 --- a/tools/testing/radix-tree/regression1.c +++ b/tools/testing/radix-tree/regression1.c @@ -177,7 +177,7 @@ void regression1_test(void) nr_threads = 2; pthread_barrier_init(&worker_barrier, NULL, nr_threads); - threads = malloc(nr_threads * sizeof(pthread_t *)); + threads = malloc(nr_threads * sizeof(*threads)); for (i = 0; i < nr_threads; i++) { arg = i; diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 90a62cf75008..1a21d6beebc6 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -4,19 +4,23 @@ TARGETS += amd-pstate TARGETS += arm64 TARGETS += bpf TARGETS += breakpoints +TARGETS += cachestat TARGETS += capabilities TARGETS += cgroup TARGETS += clone3 +TARGETS += connector TARGETS += core TARGETS += cpufreq TARGETS += cpu-hotplug TARGETS += damon +TARGETS += dmabuf-heaps TARGETS += drivers/dma-buf TARGETS += drivers/s390x/uvdevice TARGETS += drivers/net/bonding TARGETS += drivers/net/team TARGETS += efivarfs TARGETS += exec +TARGETS += fchmodat2 TARGETS += filesystems TARGETS += filesystems/binderfs TARGETS += filesystems/epoll @@ -55,6 +59,7 @@ TARGETS += net/mptcp TARGETS += net/openvswitch TARGETS += netfilter TARGETS += nsfs +TARGETS += perf_events TARGETS += pidfd TARGETS += pid_namespace TARGETS += powerpc @@ -86,7 +91,10 @@ TARGETS += timers endif TARGETS += tmpfs TARGETS += tpm2 +TARGETS += tty +TARGETS += uevent TARGETS += user +TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 @@ -144,10 +152,12 @@ ifneq ($(KBUILD_OUTPUT),) abs_objtree := $(realpath $(abs_objtree)) BUILD := $(abs_objtree)/kselftest KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include + KHDR_DIR := ${abs_objtree}/usr/include else BUILD := $(CURDIR) abs_srctree := $(shell cd $(top_srcdir) && pwd) KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include + KHDR_DIR := ${abs_srctree}/usr/include DEFAULT_INSTALL_HDR_PATH := 1 endif @@ -161,7 +171,7 @@ export KHDR_INCLUDES # all isn't the first target in the file. .DEFAULT_GOAL := all -all: +all: kernel_header_files @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -172,6 +182,23 @@ all: ret=$$((ret * $$?)); \ done; exit $$ret; +kernel_header_files: + @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ + if [ $$? -ne 0 ]; then \ + RED='\033[1;31m'; \ + NOCOLOR='\033[0m'; \ + echo; \ + echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ + echo "Please run this and try again:"; \ + echo; \ + echo " cd $(top_srcdir)"; \ + echo " make headers"; \ + echo; \ + exit 1; \ + fi + +.PHONY: kernel_header_files + run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore index 2b0d11797f25..12dc3fcd3456 100644 --- a/tools/testing/selftests/alsa/.gitignore +++ b/tools/testing/selftests/alsa/.gitignore @@ -1,2 +1,3 @@ mixer-test pcm-test +test-pcmtest-driver diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 901949db80ad..5af9ba8a4645 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -12,7 +12,7 @@ LDLIBS+=-lpthread OVERRIDE_TARGETS = 1 -TEST_GEN_PROGS := mixer-test pcm-test +TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver TEST_GEN_PROGS_EXTENDED := libatest.so diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c index d7aafe5a1993..2f1685a3eae1 100644 --- a/tools/testing/selftests/alsa/conf.c +++ b/tools/testing/selftests/alsa/conf.c @@ -431,7 +431,6 @@ long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def) { snd_config_t *cfg; - long l; int ret; if (!root) diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c index c95d63e553f4..21e482b23f50 100644 --- a/tools/testing/selftests/alsa/mixer-test.c +++ b/tools/testing/selftests/alsa/mixer-test.c @@ -188,7 +188,7 @@ static int wait_for_event(struct ctl_data *ctl, int timeout) { unsigned short revents; snd_ctl_event_t *event; - int count, err; + int err; unsigned int mask = 0; unsigned int ev_id; @@ -430,7 +430,6 @@ static bool strend(const char *haystack, const char *needle) static void test_ctl_name(struct ctl_data *ctl) { bool name_ok = true; - bool check; ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem, ctl->name); @@ -863,7 +862,6 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, snd_ctl_elem_value_t *val) { int err; - long val_read; /* Ideally this will fail... */ err = snd_ctl_elem_write(ctl->card->handle, val); @@ -883,8 +881,7 @@ static bool test_ctl_write_invalid_value(struct ctl_data *ctl, static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl) { - int err, i; - long val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -994,8 +991,7 @@ static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl) static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) { - int err, i; - unsigned int val_read; + int i; bool fail = false; snd_ctl_elem_value_t *val; snd_ctl_elem_value_alloca(&val); @@ -1027,7 +1023,6 @@ static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl) static void test_ctl_write_invalid(struct ctl_data *ctl) { bool pass; - int err; /* If the control is turned off let's be polite */ if (snd_ctl_elem_info_is_inactive(ctl->info)) { diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c index b7eef32addb4..c0a39818c5a4 100644 --- a/tools/testing/selftests/alsa/pcm-test.c +++ b/tools/testing/selftests/alsa/pcm-test.c @@ -257,7 +257,9 @@ static void find_pcms(void) static void test_pcm_time(struct pcm_data *data, enum test_class class, const char *test_name, snd_config_t *pcm_cfg) { - char name[64], key[128], msg[256]; + char name[64], msg[256]; + const int duration_s = 2, margin_ms = 100; + const int duration_ms = duration_s * 1000; const char *cs; int i, err; snd_pcm_t *handle = NULL; @@ -442,7 +444,7 @@ __format: skip = false; timestamp_now(&tstamp); - for (i = 0; i < 4; i++) { + for (i = 0; i < duration_s; i++) { if (data->stream == SND_PCM_STREAM_PLAYBACK) { frames = snd_pcm_writei(handle, samples, rate); if (frames < 0) { @@ -472,8 +474,8 @@ __format: snd_pcm_drain(handle); ms = timestamp_diff_ms(&tstamp); - if (ms < 3900 || ms > 4100) { - snprintf(msg, sizeof(msg), "time mismatch: expected 4000ms got %lld", ms); + if (ms < duration_ms - margin_ms || ms > duration_ms + margin_ms) { + snprintf(msg, sizeof(msg), "time mismatch: expected %dms got %lld", duration_ms, ms); goto __close; } @@ -565,7 +567,7 @@ int main(void) { struct card_data *card; struct pcm_data *pcm; - snd_config_t *global_config, *cfg, *pcm_cfg; + snd_config_t *global_config, *cfg; int num_pcm_tests = 0, num_tests, num_std_pcm_tests; int ret; void *thread_ret; diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c new file mode 100644 index 000000000000..a52ecd43dbe3 --- /dev/null +++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This is the test which covers PCM middle layer data transferring using + * the virtual pcm test driver (snd-pcmtest). + * + * Copyright 2023 Ivan Orlov <ivan.orlov0322@gmail.com> + */ +#include <string.h> +#include <alsa/asoundlib.h> +#include "../kselftest_harness.h" + +#define CH_NUM 4 + +struct pattern_buf { + char buf[1024]; + int len; +}; + +struct pattern_buf patterns[CH_NUM]; + +struct pcmtest_test_params { + unsigned long buffer_size; + unsigned long period_size; + unsigned long channels; + unsigned int rate; + snd_pcm_access_t access; + size_t sec_buf_len; + size_t sample_size; + int time; + snd_pcm_format_t format; +}; + +static int read_patterns(void) +{ + FILE *fp, *fpl; + int i; + char pf[64]; + char plf[64]; + + for (i = 0; i < CH_NUM; i++) { + sprintf(plf, "/sys/kernel/debug/pcmtest/fill_pattern%d_len", i); + fpl = fopen(plf, "r"); + if (!fpl) + return -1; + fscanf(fpl, "%u", &patterns[i].len); + fclose(fpl); + + sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i); + fp = fopen(pf, "r"); + if (!fp) + return -1; + fread(patterns[i].buf, 1, patterns[i].len, fp); + fclose(fp); + } + + return 0; +} + +static int get_test_results(char *debug_name) +{ + int result; + FILE *f; + char fname[128]; + + sprintf(fname, "/sys/kernel/debug/pcmtest/%s", debug_name); + + f = fopen(fname, "r"); + if (!f) { + printf("Failed to open file\n"); + return -1; + } + fscanf(f, "%d", &result); + fclose(f); + + return result; +} + +static size_t get_sec_buf_len(unsigned int rate, unsigned long channels, snd_pcm_format_t format) +{ + return rate * channels * snd_pcm_format_physical_width(format) / 8; +} + +static int setup_handle(snd_pcm_t **handle, snd_pcm_sw_params_t *swparams, + snd_pcm_hw_params_t *hwparams, struct pcmtest_test_params *params, + int card, snd_pcm_stream_t stream) +{ + char pcm_name[32]; + int err; + + sprintf(pcm_name, "hw:%d,0,0", card); + err = snd_pcm_open(handle, pcm_name, stream, 0); + if (err < 0) + return err; + snd_pcm_hw_params_any(*handle, hwparams); + snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0); + snd_pcm_hw_params_set_access(*handle, hwparams, params->access); + snd_pcm_hw_params_set_format(*handle, hwparams, params->format); + snd_pcm_hw_params_set_channels(*handle, hwparams, params->channels); + snd_pcm_hw_params_set_rate_near(*handle, hwparams, ¶ms->rate, 0); + snd_pcm_hw_params_set_period_size_near(*handle, hwparams, ¶ms->period_size, 0); + snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, ¶ms->buffer_size); + snd_pcm_hw_params(*handle, hwparams); + snd_pcm_sw_params_current(*handle, swparams); + + snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0); + snd_pcm_sw_params_set_avail_min(*handle, swparams, params->period_size); + snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, ¶ms->buffer_size); + snd_pcm_hw_params_set_period_size_near(*handle, hwparams, ¶ms->period_size, 0); + snd_pcm_sw_params(*handle, swparams); + snd_pcm_hw_params(*handle, hwparams); + + return 0; +} + +FIXTURE(pcmtest) { + int card; + snd_pcm_sw_params_t *swparams; + snd_pcm_hw_params_t *hwparams; + struct pcmtest_test_params params; +}; + +FIXTURE_TEARDOWN(pcmtest) { +} + +FIXTURE_SETUP(pcmtest) { + char *card_name; + int err; + + if (geteuid()) + SKIP(exit(-1), "This test needs root to run!"); + + err = read_patterns(); + if (err) + SKIP(exit(-1), "Can't read patterns. Probably, module isn't loaded"); + + card_name = malloc(127); + ASSERT_NE(card_name, NULL); + self->params.buffer_size = 16384; + self->params.period_size = 4096; + self->params.channels = CH_NUM; + self->params.rate = 8000; + self->params.access = SND_PCM_ACCESS_RW_INTERLEAVED; + self->params.format = SND_PCM_FORMAT_S16_LE; + self->card = -1; + self->params.sample_size = snd_pcm_format_physical_width(self->params.format) / 8; + + self->params.sec_buf_len = get_sec_buf_len(self->params.rate, self->params.channels, + self->params.format); + self->params.time = 4; + + while (snd_card_next(&self->card) >= 0) { + if (self->card == -1) + break; + snd_card_get_name(self->card, &card_name); + if (!strcmp(card_name, "PCM-Test")) + break; + } + free(card_name); + ASSERT_NE(self->card, -1); +} + +/* + * Here we are trying to send the looped monotonically increasing sequence of bytes to the driver. + * If our data isn't corrupted, the driver will set the content of 'pc_test' debugfs file to '1' + */ +TEST_F(pcmtest, playback) { + snd_pcm_t *handle; + unsigned char *it; + size_t write_res; + int test_results; + int i, cur_ch, pos_in_ch; + void *samples; + struct pcmtest_test_params *params = &self->params; + + samples = calloc(self->params.sec_buf_len * self->params.time, 1); + ASSERT_NE(samples, NULL); + + snd_pcm_sw_params_alloca(&self->swparams); + snd_pcm_hw_params_alloca(&self->hwparams); + + ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params, + self->card, SND_PCM_STREAM_PLAYBACK), 0); + snd_pcm_format_set_silence(params->format, samples, + params->rate * params->channels * params->time); + it = samples; + for (i = 0; i < self->params.sec_buf_len * params->time; i++) { + cur_ch = (i / params->sample_size) % CH_NUM; + pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size + + (i % params->sample_size); + it[i] = patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len]; + } + write_res = snd_pcm_writei(handle, samples, params->rate * params->time); + ASSERT_GE(write_res, 0); + + snd_pcm_close(handle); + free(samples); + test_results = get_test_results("pc_test"); + ASSERT_EQ(test_results, 1); +} + +/* + * Here we test that the virtual alsa driver returns looped and monotonically increasing sequence + * of bytes. In the interleaved mode the buffer will contain samples in the following order: + * C0, C1, C2, C3, C0, C1, ... + */ +TEST_F(pcmtest, capture) { + snd_pcm_t *handle; + unsigned char *it; + size_t read_res; + int i, cur_ch, pos_in_ch; + void *samples; + struct pcmtest_test_params *params = &self->params; + + samples = calloc(self->params.sec_buf_len * self->params.time, 1); + ASSERT_NE(samples, NULL); + + snd_pcm_sw_params_alloca(&self->swparams); + snd_pcm_hw_params_alloca(&self->hwparams); + + ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, + params, self->card, SND_PCM_STREAM_CAPTURE), 0); + snd_pcm_format_set_silence(params->format, samples, + params->rate * params->channels * params->time); + read_res = snd_pcm_readi(handle, samples, params->rate * params->time); + ASSERT_GE(read_res, 0); + snd_pcm_close(handle); + it = (unsigned char *)samples; + for (i = 0; i < self->params.sec_buf_len * self->params.time; i++) { + cur_ch = (i / params->sample_size) % CH_NUM; + pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size + + (i % params->sample_size); + ASSERT_EQ(it[i], patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len]); + } + free(samples); +} + +// Test capture in the non-interleaved access mode. The are buffers for each recorded channel +TEST_F(pcmtest, ni_capture) { + snd_pcm_t *handle; + struct pcmtest_test_params params = self->params; + char **chan_samples; + size_t i, j, read_res; + + chan_samples = calloc(CH_NUM, sizeof(*chan_samples)); + ASSERT_NE(chan_samples, NULL); + + snd_pcm_sw_params_alloca(&self->swparams); + snd_pcm_hw_params_alloca(&self->hwparams); + + params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED; + + ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, + ¶ms, self->card, SND_PCM_STREAM_CAPTURE), 0); + + for (i = 0; i < CH_NUM; i++) + chan_samples[i] = calloc(params.sec_buf_len * params.time, 1); + + for (i = 0; i < 1; i++) { + read_res = snd_pcm_readn(handle, (void **)chan_samples, params.rate * params.time); + ASSERT_GE(read_res, 0); + } + snd_pcm_close(handle); + + for (i = 0; i < CH_NUM; i++) { + for (j = 0; j < params.rate * params.time; j++) + ASSERT_EQ(chan_samples[i][j], patterns[i].buf[j % patterns[i].len]); + free(chan_samples[i]); + } + free(chan_samples); +} + +TEST_F(pcmtest, ni_playback) { + snd_pcm_t *handle; + struct pcmtest_test_params params = self->params; + char **chan_samples; + size_t i, j, read_res; + int test_res; + + chan_samples = calloc(CH_NUM, sizeof(*chan_samples)); + ASSERT_NE(chan_samples, NULL); + + snd_pcm_sw_params_alloca(&self->swparams); + snd_pcm_hw_params_alloca(&self->hwparams); + + params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED; + + ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, + ¶ms, self->card, SND_PCM_STREAM_PLAYBACK), 0); + + for (i = 0; i < CH_NUM; i++) { + chan_samples[i] = calloc(params.sec_buf_len * params.time, 1); + for (j = 0; j < params.sec_buf_len * params.time; j++) + chan_samples[i][j] = patterns[i].buf[j % patterns[i].len]; + } + + for (i = 0; i < 1; i++) { + read_res = snd_pcm_writen(handle, (void **)chan_samples, params.rate * params.time); + ASSERT_GE(read_res, 0); + } + + snd_pcm_close(handle); + test_res = get_test_results("pc_test"); + ASSERT_EQ(test_res, 1); + + for (i = 0; i < CH_NUM; i++) + free(chan_samples[i]); + free(chan_samples); +} + +/* + * Here we are testing the custom ioctl definition inside the virtual driver. If it triggers + * successfully, the driver sets the content of 'ioctl_test' debugfs file to '1'. + */ +TEST_F(pcmtest, reset_ioctl) { + snd_pcm_t *handle; + int test_res; + struct pcmtest_test_params *params = &self->params; + + snd_pcm_sw_params_alloca(&self->swparams); + snd_pcm_hw_params_alloca(&self->hwparams); + + ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params, + self->card, SND_PCM_STREAM_CAPTURE), 0); + snd_pcm_reset(handle); + test_res = get_test_results("ioctl_test"); + ASSERT_EQ(test_res, 1); + snd_pcm_close(handle); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 9460cbe81bcc..28b93cab8c0d 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -19,6 +19,8 @@ CFLAGS += -I$(top_srcdir)/tools/testing/selftests/ CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -I$(top_srcdir)/tools/include + export CFLAGS export top_srcdir @@ -42,7 +44,7 @@ run_tests: all done # Avoid any output on non arm64 on emit_tests -emit_tests: all +emit_tests: @for DIR in $(ARM64_SUBTARGETS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 93333a90bf3a..e3d262831d91 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -19,19 +19,38 @@ #include "../../kselftest.h" -#define TESTS_PER_HWCAP 2 +#define TESTS_PER_HWCAP 3 /* - * Function expected to generate SIGILL when the feature is not - * supported and return when it is supported. If SIGILL is generated - * then the handler must be able to skip over the instruction safely. + * Function expected to generate exception when the feature is not + * supported and return when it is supported. If the specific exception + * is generated then the handler must be able to skip over the + * instruction safely. * * Note that it is expected that for many architecture extensions * there are no specific traps due to no architecture state being * added so we may not fault if running on a kernel which doesn't know * to add the hwcap. */ -typedef void (*sigill_fn)(void); +typedef void (*sig_fn)(void); + +static void aes_sigill(void) +{ + /* AESE V0.16B, V0.16B */ + asm volatile(".inst 0x4e284800" : : : ); +} + +static void atomics_sigill(void) +{ + /* STADD W0, [SP] */ + asm volatile(".inst 0xb82003ff" : : : ); +} + +static void crc32_sigill(void) +{ + /* CRC32W W0, W0, W1 */ + asm volatile(".inst 0x1ac14800" : : : ); +} static void cssc_sigill(void) { @@ -39,11 +58,72 @@ static void cssc_sigill(void) asm volatile(".inst 0xdac01c00" : : : "x0"); } +static void fp_sigill(void) +{ + asm volatile("fmov s0, #1"); +} + +static void ilrcpc_sigill(void) +{ + /* LDAPUR W0, [SP, #8] */ + asm volatile(".inst 0x994083e0" : : : ); +} + +static void jscvt_sigill(void) +{ + /* FJCVTZS W0, D0 */ + asm volatile(".inst 0x1e7e0000" : : : ); +} + +static void lrcpc_sigill(void) +{ + /* LDAPR W0, [SP, #0] */ + asm volatile(".inst 0xb8bfc3e0" : : : ); +} + +static void mops_sigill(void) +{ + char dst[1], src[1]; + register char *dstp asm ("x0") = dst; + register char *srcp asm ("x1") = src; + register long size asm ("x2") = 1; + + /* CPYP [x0]!, [x1]!, x2! */ + asm volatile(".inst 0x1d010440" + : "+r" (dstp), "+r" (srcp), "+r" (size) + : + : "cc", "memory"); +} + +static void pmull_sigill(void) +{ + /* PMULL V0.1Q, V0.1D, V0.1D */ + asm volatile(".inst 0x0ee0e000" : : : ); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); } +static void sha1_sigill(void) +{ + /* SHA1H S0, S0 */ + asm volatile(".inst 0x5e280800" : : : ); +} + +static void sha2_sigill(void) +{ + /* SHA256H Q0, Q0, V0.4S */ + asm volatile(".inst 0x5e004000" : : : ); +} + +static void sha512_sigill(void) +{ + /* SHA512H Q0, Q0, V0.2D */ + asm volatile(".inst 0xce608000" : : : ); +} + static void sme_sigill(void) { /* RDSVL x0, #0 */ @@ -194,15 +274,46 @@ static void svebf16_sigill(void) asm volatile(".inst 0x658aa000" : : : "z0"); } +static void hbc_sigill(void) +{ + /* BC.EQ +4 */ + asm volatile("cmp xzr, xzr\n" + ".inst 0x54000030" : : : "cc"); +} + +static void uscat_sigbus(void) +{ + /* unaligned atomic access */ + asm volatile("ADD x1, sp, #2" : : : ); + /* STADD W0, [X1] */ + asm volatile(".inst 0xb820003f" : : : ); +} + static const struct hwcap_data { const char *name; unsigned long at_hwcap; unsigned long hwcap_bit; const char *cpuinfo; - sigill_fn sigill_fn; + sig_fn sigill_fn; bool sigill_reliable; + sig_fn sigbus_fn; + bool sigbus_reliable; } hwcaps[] = { { + .name = "AES", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_AES, + .cpuinfo = "aes", + .sigill_fn = aes_sigill, + }, + { + .name = "CRC32", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_CRC32, + .cpuinfo = "crc32", + .sigill_fn = crc32_sigill, + }, + { .name = "CSSC", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_CSSC, @@ -210,6 +321,65 @@ static const struct hwcap_data { .sigill_fn = cssc_sigill, }, { + .name = "FP", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_FP, + .cpuinfo = "fp", + .sigill_fn = fp_sigill, + }, + { + .name = "JSCVT", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_JSCVT, + .cpuinfo = "jscvt", + .sigill_fn = jscvt_sigill, + }, + { + .name = "LRCPC", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_LRCPC, + .cpuinfo = "lrcpc", + .sigill_fn = lrcpc_sigill, + }, + { + .name = "LRCPC2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_ILRCPC, + .cpuinfo = "ilrcpc", + .sigill_fn = ilrcpc_sigill, + }, + { + .name = "LSE", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_ATOMICS, + .cpuinfo = "atomics", + .sigill_fn = atomics_sigill, + }, + { + .name = "LSE2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_USCAT, + .cpuinfo = "uscat", + .sigill_fn = atomics_sigill, + .sigbus_fn = uscat_sigbus, + .sigbus_reliable = true, + }, + { + .name = "MOPS", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_MOPS, + .cpuinfo = "mops", + .sigill_fn = mops_sigill, + .sigill_reliable = true, + }, + { + .name = "PMULL", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_PMULL, + .cpuinfo = "pmull", + .sigill_fn = pmull_sigill, + }, + { .name = "RNG", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_RNG, @@ -223,6 +393,27 @@ static const struct hwcap_data { .cpuinfo = "rprfm", }, { + .name = "SHA1", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SHA1, + .cpuinfo = "sha1", + .sigill_fn = sha1_sigill, + }, + { + .name = "SHA2", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SHA2, + .cpuinfo = "sha2", + .sigill_fn = sha2_sigill, + }, + { + .name = "SHA512", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_SHA512, + .cpuinfo = "sha512", + .sigill_fn = sha512_sigill, + }, + { .name = "SME", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_SME, @@ -364,20 +555,32 @@ static const struct hwcap_data { .hwcap_bit = HWCAP2_SVE_EBF16, .cpuinfo = "sveebf16", }, + { + .name = "HBC", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_HBC, + .cpuinfo = "hbc", + .sigill_fn = hbc_sigill, + .sigill_reliable = true, + }, }; -static bool seen_sigill; - -static void handle_sigill(int sig, siginfo_t *info, void *context) -{ - ucontext_t *uc = context; - - seen_sigill = true; - - /* Skip over the offending instruction */ - uc->uc_mcontext.pc += 4; +typedef void (*sighandler_fn)(int, siginfo_t *, void *); + +#define DEF_SIGHANDLER_FUNC(SIG, NUM) \ +static bool seen_##SIG; \ +static void handle_##SIG(int sig, siginfo_t *info, void *context) \ +{ \ + ucontext_t *uc = context; \ + \ + seen_##SIG = true; \ + /* Skip over the offending instruction */ \ + uc->uc_mcontext.pc += 4; \ } +DEF_SIGHANDLER_FUNC(sigill, SIGILL); +DEF_SIGHANDLER_FUNC(sigbus, SIGBUS); + bool cpuinfo_present(const char *name) { FILE *f; @@ -420,24 +623,77 @@ bool cpuinfo_present(const char *name) return false; } -int main(void) +static int install_sigaction(int signum, sighandler_fn handler) { - const struct hwcap_data *hwcap; - int i, ret; - bool have_cpuinfo, have_hwcap; + int ret; struct sigaction sa; - ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = handle_sigill; + sa.sa_sigaction = handler; sa.sa_flags = SA_RESTART | SA_SIGINFO; sigemptyset(&sa.sa_mask); - ret = sigaction(SIGILL, &sa, NULL); + ret = sigaction(signum, &sa, NULL); if (ret < 0) - ksft_exit_fail_msg("Failed to install SIGILL handler: %s (%d)\n", + ksft_exit_fail_msg("Failed to install SIGNAL handler: %s (%d)\n", + strerror(errno), errno); + + return ret; +} + +static void uninstall_sigaction(int signum) +{ + if (sigaction(signum, NULL, NULL) < 0) + ksft_exit_fail_msg("Failed to uninstall SIGNAL handler: %s (%d)\n", strerror(errno), errno); +} + +#define DEF_INST_RAISE_SIG(SIG, NUM) \ +static bool inst_raise_##SIG(const struct hwcap_data *hwcap, \ + bool have_hwcap) \ +{ \ + if (!hwcap->SIG##_fn) { \ + ksft_test_result_skip(#SIG"_%s\n", hwcap->name); \ + /* assume that it would raise exception in default */ \ + return true; \ + } \ + \ + install_sigaction(NUM, handle_##SIG); \ + \ + seen_##SIG = false; \ + hwcap->SIG##_fn(); \ + \ + if (have_hwcap) { \ + /* Should be able to use the extension */ \ + ksft_test_result(!seen_##SIG, \ + #SIG"_%s\n", hwcap->name); \ + } else if (hwcap->SIG##_reliable) { \ + /* Guaranteed a SIGNAL */ \ + ksft_test_result(seen_##SIG, \ + #SIG"_%s\n", hwcap->name); \ + } else { \ + /* Missing SIGNAL might be fine */ \ + ksft_print_msg(#SIG"_%sreported for %s\n", \ + seen_##SIG ? "" : "not ", \ + hwcap->name); \ + ksft_test_result_skip(#SIG"_%s\n", \ + hwcap->name); \ + } \ + \ + uninstall_sigaction(NUM); \ + return seen_##SIG; \ +} + +DEF_INST_RAISE_SIG(sigill, SIGILL); +DEF_INST_RAISE_SIG(sigbus, SIGBUS); + +int main(void) +{ + int i; + const struct hwcap_data *hwcap; + bool have_cpuinfo, have_hwcap, raise_sigill; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP); for (i = 0; i < ARRAY_SIZE(hwcaps); i++) { hwcap = &hwcaps[i]; @@ -451,30 +707,15 @@ int main(void) ksft_test_result(have_hwcap == have_cpuinfo, "cpuinfo_match_%s\n", hwcap->name); - if (hwcap->sigill_fn) { - seen_sigill = false; - hwcap->sigill_fn(); - - if (have_hwcap) { - /* Should be able to use the extension */ - ksft_test_result(!seen_sigill, "sigill_%s\n", - hwcap->name); - } else if (hwcap->sigill_reliable) { - /* Guaranteed a SIGILL */ - ksft_test_result(seen_sigill, "sigill_%s\n", - hwcap->name); - } else { - /* Missing SIGILL might be fine */ - ksft_print_msg("SIGILL %sreported for %s\n", - seen_sigill ? "" : "not ", - hwcap->name); - ksft_test_result_skip("sigill_%s\n", - hwcap->name); - } - } else { - ksft_test_result_skip("sigill_%s\n", - hwcap->name); - } + /* + * Testing for SIGBUS only makes sense after make sure + * that the instruction does not cause a SIGILL signal. + */ + raise_sigill = inst_raise_sigill(hwcap, have_hwcap); + if (!raise_sigill) + inst_raise_sigbus(hwcap, have_hwcap); + else + ksft_test_result_skip("sigbus_%s\n", hwcap->name); } ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index be952511af22..abe4d58d731d 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -20,7 +20,7 @@ #include "../../kselftest.h" -#define EXPECTED_TESTS 7 +#define EXPECTED_TESTS 11 #define MAX_TPIDRS 2 @@ -132,6 +132,34 @@ static void test_tpidr(pid_t child) } } +static void test_hw_debug(pid_t child, int type, const char *type_name) +{ + struct user_hwdebug_state state; + struct iovec iov; + int slots, arch, ret; + + iov.iov_len = sizeof(state); + iov.iov_base = &state; + + /* Should be able to read the values */ + ret = ptrace(PTRACE_GETREGSET, child, type, &iov); + ksft_test_result(ret == 0, "read_%s\n", type_name); + + if (ret == 0) { + /* Low 8 bits is the number of slots, next 4 bits the arch */ + slots = state.dbg_info & 0xff; + arch = (state.dbg_info >> 8) & 0xf; + + ksft_print_msg("%s version %d with %d slots\n", type_name, + arch, slots); + + /* Zero is not currently architecturally valid */ + ksft_test_result(arch, "%s_arch_set\n", type_name); + } else { + ksft_test_result_skip("%s_arch_set\n"); + } +} + static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) @@ -207,6 +235,8 @@ static int do_parent(pid_t child) ksft_print_msg("Parent is %d, child is %d\n", getpid(), child); test_tpidr(child); + test_hw_debug(child, NT_ARM_HW_WATCH, "NT_ARM_HW_WATCH"); + test_hw_debug(child, NT_ARM_HW_BREAK, "NT_ARM_HW_BREAK"); ret = EXIT_SUCCESS; diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index 18cc123e2347..d704511a0955 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -20,12 +20,20 @@ #include "syscall-abi.h" +/* + * The kernel defines a much larger SVE_VQ_MAX than is expressable in + * the architecture, this creates a *lot* of overhead filling the + * buffers (especially ZA) on emulated platforms so use the actual + * architectural maximum instead. + */ +#define ARCH_SVE_VQ_MAX 16 + static int default_sme_vl; static int sve_vl_count; -static unsigned int sve_vls[SVE_VQ_MAX]; +static unsigned int sve_vls[ARCH_SVE_VQ_MAX]; static int sme_vl_count; -static unsigned int sme_vls[SVE_VQ_MAX]; +static unsigned int sme_vls[ARCH_SVE_VQ_MAX]; extern void do_syscall(int sve_vl, int sme_vl); @@ -130,9 +138,9 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, #define SVE_Z_SHARED_BYTES (128 / 8) -static uint8_t z_zero[__SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; -uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(SVE_VQ_MAX)]; +static uint8_t z_zero[__SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -190,8 +198,8 @@ static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; -uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -222,8 +230,8 @@ static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t ffr_in[__SVE_PREG_SIZE(SVE_VQ_MAX)]; -uint8_t ffr_out[__SVE_PREG_SIZE(SVE_VQ_MAX)]; +uint8_t ffr_in[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t ffr_out[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -300,8 +308,8 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, return errors; } -uint8_t za_in[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; -uint8_t za_out[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)]; +uint8_t za_in[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)]; +uint8_t za_out[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)]; static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr) @@ -470,9 +478,9 @@ void sve_count_vls(void) return; /* - * Enumerate up to SVE_VQ_MAX vector lengths + * Enumerate up to ARCH_SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { + for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SVE_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", @@ -496,9 +504,9 @@ void sme_count_vls(void) return; /* - * Enumerate up to SVE_VQ_MAX vector lengths + * Enumerate up to ARCH_SVE_VQ_MAX vector lengths */ - for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) { + for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) { vl = prctl(PR_SME_SET_VL, vq * 16); if (vl == -1) ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n", diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile index ccdac414ad94..05e4ee523a53 100644 --- a/tools/testing/selftests/arm64/bti/Makefile +++ b/tools/testing/selftests/arm64/bti/Makefile @@ -2,8 +2,6 @@ TEST_GEN_PROGS := btitest nobtitest -PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS)) - # These tests are built as freestanding binaries since otherwise BTI # support in ld.so is required which is not currently widespread; when # it is available it will still be useful to test this separately as the @@ -18,44 +16,41 @@ CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS) BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $< NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $< -%-bti.o: %.c +$(OUTPUT)/%-bti.o: %.c $(BTI_CC_COMMAND) -%-bti.o: %.S +$(OUTPUT)/%-bti.o: %.S $(BTI_CC_COMMAND) -%-nobti.o: %.c +$(OUTPUT)/%-nobti.o: %.c $(NOBTI_CC_COMMAND) -%-nobti.o: %.S +$(OUTPUT)/%-nobti.o: %.S $(NOBTI_CC_COMMAND) BTI_OBJS = \ - test-bti.o \ - signal-bti.o \ - start-bti.o \ - syscall-bti.o \ - system-bti.o \ - teststubs-bti.o \ - trampoline-bti.o -gen/btitest: $(BTI_OBJS) + $(OUTPUT)/test-bti.o \ + $(OUTPUT)/signal-bti.o \ + $(OUTPUT)/start-bti.o \ + $(OUTPUT)/syscall-bti.o \ + $(OUTPUT)/system-bti.o \ + $(OUTPUT)/teststubs-bti.o \ + $(OUTPUT)/trampoline-bti.o +$(OUTPUT)/btitest: $(BTI_OBJS) $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^ NOBTI_OBJS = \ - test-nobti.o \ - signal-nobti.o \ - start-nobti.o \ - syscall-nobti.o \ - system-nobti.o \ - teststubs-nobti.o \ - trampoline-nobti.o -gen/nobtitest: $(NOBTI_OBJS) + $(OUTPUT)/test-nobti.o \ + $(OUTPUT)/signal-nobti.o \ + $(OUTPUT)/start-nobti.o \ + $(OUTPUT)/syscall-nobti.o \ + $(OUTPUT)/system-nobti.o \ + $(OUTPUT)/teststubs-nobti.o \ + $(OUTPUT)/trampoline-nobti.o +$(OUTPUT)/nobtitest: $(NOBTI_OBJS) $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^ # Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list # to account for any OUTPUT target-dirs optionally provided by # the toplevel makefile include ../../lib.mk - -$(TEST_GEN_PROGS): $(PROGS) - cp $(PROGS) $(OUTPUT)/ diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h deleted file mode 100644 index ebb6204f447a..000000000000 --- a/tools/testing/selftests/arm64/bti/compiler.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2019 Arm Limited - * Original author: Dave Martin <Dave.Martin@arm.com> - */ - -#ifndef COMPILER_H -#define COMPILER_H - -#define __always_unused __attribute__((__unused__)) -#define __noreturn __attribute__((__noreturn__)) -#define __unreachable() __builtin_unreachable() - -/* curse(e) has value e, but the compiler cannot assume so */ -#define curse(e) ({ \ - __typeof__(e) __curse_e = (e); \ - asm ("" : "+r" (__curse_e)); \ - __curse_e; \ -}) - -#endif /* ! COMPILER_H */ diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore deleted file mode 100644 index 73869fabada4..000000000000 --- a/tools/testing/selftests/arm64/bti/gen/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -btitest -nobtitest diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c index 6385d8d4973b..93d772b00bfe 100644 --- a/tools/testing/selftests/arm64/bti/system.c +++ b/tools/testing/selftests/arm64/bti/system.c @@ -8,12 +8,10 @@ #include <asm/unistd.h> -#include "compiler.h" - void __noreturn exit(int n) { syscall(__NR_exit, n); - __unreachable(); + unreachable(); } ssize_t write(int fd, const void *buf, size_t size) diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h index aca118589705..2e9ee1284a0c 100644 --- a/tools/testing/selftests/arm64/bti/system.h +++ b/tools/testing/selftests/arm64/bti/system.h @@ -14,12 +14,12 @@ typedef __kernel_size_t size_t; typedef __kernel_ssize_t ssize_t; #include <linux/errno.h> +#include <linux/compiler.h> + #include <asm/hwcap.h> #include <asm/ptrace.h> #include <asm/unistd.h> -#include "compiler.h" - long syscall(int nr, ...); void __noreturn exit(int n); diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c index 2cd8dcee5aec..28a8e8a28a84 100644 --- a/tools/testing/selftests/arm64/bti/test.c +++ b/tools/testing/selftests/arm64/bti/test.c @@ -17,7 +17,6 @@ typedef struct ucontext ucontext_t; #include "btitest.h" -#include "compiler.h" #include "signal.h" #define EXPECTED_TESTS 18 diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c index 9bcfcdc34ee9..5f648b97a06f 100644 --- a/tools/testing/selftests/arm64/fp/vec-syscfg.c +++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c @@ -6,6 +6,7 @@ #include <assert.h> #include <errno.h> #include <fcntl.h> +#include <stdbool.h> #include <stddef.h> #include <stdio.h> #include <stdlib.h> @@ -39,9 +40,11 @@ struct vec_data { int max_vl; }; +#define VEC_SVE 0 +#define VEC_SME 1 static struct vec_data vec_data[] = { - { + [VEC_SVE] = { .name = "SVE", .hwcap_type = AT_HWCAP, .hwcap = HWCAP_SVE, @@ -51,7 +54,7 @@ static struct vec_data vec_data[] = { .prctl_set = PR_SVE_SET_VL, .default_vl_file = "/proc/sys/abi/sve_default_vector_length", }, - { + [VEC_SME] = { .name = "SME", .hwcap_type = AT_HWCAP2, .hwcap = HWCAP2_SME, @@ -551,7 +554,8 @@ static void prctl_set_onexec(struct vec_data *data) /* For each VQ verify that setting via prctl() does the right thing */ static void prctl_set_all_vqs(struct vec_data *data) { - int ret, vq, vl, new_vl; + int ret, vq, vl, new_vl, i; + int orig_vls[ARRAY_SIZE(vec_data)]; int errors = 0; if (!data->min_vl || !data->max_vl) { @@ -560,6 +564,9 @@ static void prctl_set_all_vqs(struct vec_data *data) return; } + for (i = 0; i < ARRAY_SIZE(vec_data); i++) + orig_vls[i] = vec_data[i].rdvl(); + for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) { vl = sve_vl_from_vq(vq); @@ -582,6 +589,22 @@ static void prctl_set_all_vqs(struct vec_data *data) errors++; } + /* Did any other VLs change? */ + for (i = 0; i < ARRAY_SIZE(vec_data); i++) { + if (&vec_data[i] == data) + continue; + + if (!(getauxval(vec_data[i].hwcap_type) & vec_data[i].hwcap)) + continue; + + if (vec_data[i].rdvl() != orig_vls[i]) { + ksft_print_msg("%s VL changed from %d to %d\n", + vec_data[i].name, orig_vls[i], + vec_data[i].rdvl()); + errors++; + } + } + /* Was that the VL we asked for? */ if (new_vl == vl) continue; @@ -644,18 +667,107 @@ static const test_type tests[] = { prctl_set_all_vqs, }; +static inline void smstart(void) +{ + asm volatile("msr S0_3_C4_C7_3, xzr"); +} + +static inline void smstart_sm(void) +{ + asm volatile("msr S0_3_C4_C3_3, xzr"); +} + +static inline void smstop(void) +{ + asm volatile("msr S0_3_C4_C6_3, xzr"); +} + + +/* + * Verify we can change the SVE vector length while SME is active and + * continue to use SME afterwards. + */ +static void change_sve_with_za(void) +{ + struct vec_data *sve_data = &vec_data[VEC_SVE]; + bool pass = true; + int ret, i; + + if (sve_data->min_vl == sve_data->max_vl) { + ksft_print_msg("Only one SVE VL supported, can't change\n"); + ksft_test_result_skip("change_sve_while_sme\n"); + return; + } + + /* Ensure we will trigger a change when we set the maximum */ + ret = prctl(sve_data->prctl_set, sve_data->min_vl); + if (ret != sve_data->min_vl) { + ksft_print_msg("Failed to set SVE VL %d: %d\n", + sve_data->min_vl, ret); + pass = false; + } + + /* Enable SM and ZA */ + smstart(); + + /* Trigger another VL change */ + ret = prctl(sve_data->prctl_set, sve_data->max_vl); + if (ret != sve_data->max_vl) { + ksft_print_msg("Failed to set SVE VL %d: %d\n", + sve_data->max_vl, ret); + pass = false; + } + + /* + * Spin for a bit with SM enabled to try to trigger another + * save/restore. We can't use syscalls without exiting + * streaming mode. + */ + for (i = 0; i < 100000000; i++) + smstart_sm(); + + /* + * TODO: Verify that ZA was preserved over the VL change and + * spin. + */ + + /* Clean up after ourselves */ + smstop(); + ret = prctl(sve_data->prctl_set, sve_data->default_vl); + if (ret != sve_data->default_vl) { + ksft_print_msg("Failed to restore SVE VL %d: %d\n", + sve_data->default_vl, ret); + pass = false; + } + + ksft_test_result(pass, "change_sve_with_za\n"); +} + +typedef void (*test_all_type)(void); + +static const struct { + const char *name; + test_all_type test; +} all_types_tests[] = { + { "change_sve_with_za", change_sve_with_za }, +}; + int main(void) { + bool all_supported = true; int i, j; ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data)); + ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data) + + ARRAY_SIZE(all_types_tests)); for (i = 0; i < ARRAY_SIZE(vec_data); i++) { struct vec_data *data = &vec_data[i]; unsigned long supported; supported = getauxval(data->hwcap_type) & data->hwcap; + if (!supported) + all_supported = false; for (j = 0; j < ARRAY_SIZE(tests); j++) { if (supported) @@ -666,5 +778,12 @@ int main(void) } } + for (i = 0; i < ARRAY_SIZE(all_types_tests); i++) { + if (all_supported) + all_types_tests[i].test(); + else + ksft_test_result_skip("%s\n", all_types_tests[i].name); + } + ksft_exit_pass(); } diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index 8ab4c86837fd..839e3a252629 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -4,7 +4,7 @@ fake_sigreturn_* sme_* ssve_* sve_* -tpidr2_siginfo +tpidr2_* za_* zt_* !*.[ch] diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 40be8443949d..0dc948db3a4a 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -249,7 +249,8 @@ static void default_handler(int signum, siginfo_t *si, void *uc) fprintf(stderr, "-- Timeout !\n"); } else { fprintf(stderr, - "-- RX UNEXPECTED SIGNAL: %d\n", signum); + "-- RX UNEXPECTED SIGNAL: %d code %d address %p\n", + signum, si->si_code, si->si_addr); } default_result(current, 1); } diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 222093f51b67..762c8fe9c54a 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -8,6 +8,8 @@ #include <stdio.h> #include <string.h> +#include <linux/compiler.h> + #include "test_signals.h" int test_init(struct tdescr *td); @@ -60,13 +62,25 @@ static __always_inline bool get_current_context(struct tdescr *td, size_t dest_sz) { static volatile bool seen_already; + int i; + char *uc = (char *)dest_uc; assert(td && dest_uc); /* it's a genuine invocation..reinit */ seen_already = 0; td->live_uc_valid = 0; td->live_sz = dest_sz; - memset(dest_uc, 0x00, td->live_sz); + + /* + * This is a memset() but we don't want the compiler to + * optimise it into either instructions or a library call + * which might be incompatible with streaming mode. + */ + for (i = 0; i < td->live_sz; i++) { + uc[i] = 0; + OPTIMIZER_HIDE_VAR(uc[0]); + } + td->live_uc = dest_uc; /* * Grab ucontext_t triggering a SIGTRAP. @@ -104,6 +118,17 @@ static __always_inline bool get_current_context(struct tdescr *td, : "memory"); /* + * If we were grabbing a streaming mode context then we may + * have entered streaming mode behind the system's back and + * libc or compiler generated code might decide to do + * something invalid in streaming mode, or potentially even + * the state of ZA. Issue a SMSTOP to exit both now we have + * grabbed the state. + */ + if (td->feats_supported & FEAT_SME) + asm volatile("msr S0_3_C4_C6_3, xzr"); + + /* * If we get here with seen_already==1 it implies the td->live_uc * context has been used to get back here....this probably means * a test has failed to cause a SEGV...anyway live_uc does not diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c new file mode 100644 index 000000000000..f9a86c00c28c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + * + * Verify that the TPIDR2 register context in signal frames is restored. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/sigcontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +#define SYS_TPIDR2 "S3_3_C13_C0_5" + +static uint64_t get_tpidr2(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, " SYS_TPIDR2 "\n" + : "=r"(val) + : + : "cc"); + + return val; +} + +static void set_tpidr2(uint64_t val) +{ + asm volatile ( + "msr " SYS_TPIDR2 ", %0\n" + : + : "r"(val) + : "cc"); +} + + +static uint64_t initial_tpidr2; + +static bool save_tpidr2(struct tdescr *td) +{ + initial_tpidr2 = get_tpidr2(); + fprintf(stderr, "Initial TPIDR2: %lx\n", initial_tpidr2); + + return true; +} + +static int modify_tpidr2(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + uint64_t my_tpidr2 = get_tpidr2(); + + my_tpidr2++; + fprintf(stderr, "Setting TPIDR2 to %lx\n", my_tpidr2); + set_tpidr2(my_tpidr2); + + return 0; +} + +static void check_tpidr2(struct tdescr *td) +{ + uint64_t tpidr2 = get_tpidr2(); + + td->pass = tpidr2 == initial_tpidr2; + + if (td->pass) + fprintf(stderr, "TPIDR2 restored\n"); + else + fprintf(stderr, "TPIDR2 was %lx but is now %lx\n", + initial_tpidr2, tpidr2); +} + +struct tdescr tde = { + .name = "TPIDR2 restore", + .descr = "Validate that TPIDR2 is restored from the sigframe", + .feats_required = FEAT_SME, + .timeout = 3, + .sig_trig = SIGUSR1, + .init = save_tpidr2, + .run = modify_tpidr2, + .check_result = check_tpidr2, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c index e1eb4d5c027a..2e384d731618 100644 --- a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c @@ -65,6 +65,7 @@ int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc) if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET, ZT_SIG_REGS_SIZE(zt->nregs)) != 0) { fprintf(stderr, "ZT data invalid\n"); + free(zeros); return 1; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 116fecf80ca1..f1aebabfb017 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -13,6 +13,7 @@ test_dev_cgroup /test_progs /test_progs-no_alu32 /test_progs-bpf_gcc +/test_progs-cpuv4 test_verifier_log feature test_sock @@ -36,12 +37,14 @@ test_cpp *.lskel.h /no_alu32 /bpf_gcc +/cpuv4 /host-tools /tools /runqslower /bench /veristat /sign-file +/uprobe_multi *.ko *.tmp xskxceiver diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 index 0a6837f97c32..3babaf3eee5c 100644 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ b/tools/testing/selftests/bpf/DENYLIST.aarch64 @@ -1,85 +1,11 @@ -bloom_filter_map # libbpf: prog 'check_bloom': failed to attach: ERROR: strerror_r(-524)=22 -bpf_cookie/lsm -bpf_cookie/multi_kprobe_attach_api -bpf_cookie/multi_kprobe_link_api -bpf_cookie/trampoline -bpf_loop/check_callback_fn_stop # link unexpected error: -524 -bpf_loop/check_invalid_flags -bpf_loop/check_nested_calls -bpf_loop/check_non_constant_callback -bpf_loop/check_nr_loops -bpf_loop/check_null_callback_ctx -bpf_loop/check_stack -bpf_mod_race # bpf_mod_kfunc_race__attach unexpected error: -524 (errno 524) -bpf_tcp_ca/dctcp_fallback -btf_dump/btf_dump: var_data # find type id unexpected find type id: actual -2 < expected 0 -cgroup_hierarchical_stats # attach unexpected error: -524 (errno 524) -d_path/basic # setup attach failed: -524 -deny_namespace # attach unexpected error: -524 (errno 524) -fentry_fexit # fentry_attach unexpected error: -1 (errno 524) -fentry_test # fentry_attach unexpected error: -1 (errno 524) -fexit_sleep # fexit_attach fexit attach failed: -1 -fexit_stress # fexit attach unexpected fexit attach: actual -524 < expected 0 -fexit_test # fexit_attach unexpected error: -1 (errno 524) -get_func_args_test # get_func_args_test__attach unexpected error: -524 (errno 524) (trampoline) -get_func_ip_test # get_func_ip_test__attach unexpected error: -524 (errno 524) (trampoline) -htab_update/reenter_update -kfree_skb # attach fentry unexpected error: -524 (trampoline) -kfunc_call/subprog # extern (var ksym) 'bpf_prog_active': not found in kernel BTF -kfunc_call/subprog_lskel # skel unexpected error: -2 -kfunc_dynptr_param/dynptr_data_null # libbpf: prog 'dynptr_data_null': failed to attach: ERROR: strerror_r(-524)=22 -kprobe_multi_bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_addrs # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_pattern # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/attach_api_syms # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/bench_attach # bpf_program__attach_kprobe_multi_opts unexpected error: -95 -kprobe_multi_test/link_api_addrs # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/link_api_syms # link_fd unexpected link_fd: actual -95 < expected 0 -kprobe_multi_test/skel_api # kprobe_multi__attach unexpected error: -524 (errno 524) -ksyms_module/libbpf # 'bpf_testmod_ksym_percpu': not found in kernel BTF -ksyms_module/lskel # test_ksyms_module_lskel__open_and_load unexpected error: -2 -libbpf_get_fd_by_id_opts # test_libbpf_get_fd_by_id_opts__attach unexpected error: -524 (errno 524) -linked_list -lookup_key # test_lookup_key__attach unexpected error: -524 (errno 524) -lru_bug # lru_bug__attach unexpected error: -524 (errno 524) -modify_return # modify_return__attach failed unexpected error: -524 (errno 524) -module_attach # skel_attach skeleton attach failed: -524 -module_fentry_shadow # bpf_link_create unexpected bpf_link_create: actual -524 < expected 0 -mptcp/base # run_test mptcp unexpected error: -524 (errno 524) -netcnt # packets unexpected packets: actual 10001 != expected 10000 -rcu_read_lock # failed to attach: ERROR: strerror_r(-524)=22 -recursion # skel_attach unexpected error: -524 (errno 524) -ringbuf # skel_attach skeleton attachment failed: -1 -setget_sockopt # attach_cgroup unexpected error: -524 -sk_storage_tracing # test_sk_storage_tracing__attach unexpected error: -524 (errno 524) -skc_to_unix_sock # could not attach BPF object unexpected error: -524 (errno 524) -socket_cookie # prog_attach unexpected error: -524 -stacktrace_build_id # compare_stack_ips stackmap vs. stack_amap err -1 errno 2 -task_local_storage/exit_creds # skel_attach unexpected error: -524 (errno 524) -task_local_storage/recursion # skel_attach unexpected error: -524 (errno 524) -test_bprm_opts # attach attach failed: -524 -test_ima # attach attach failed: -524 -test_local_storage # attach lsm attach failed: -524 -test_lsm # test_lsm_first_attach unexpected error: -524 (errno 524) -test_overhead # attach_fentry unexpected error: -524 -timer # timer unexpected error: -524 (errno 524) -timer_crash # timer_crash__attach unexpected error: -524 (errno 524) -timer_mim # timer_mim unexpected error: -524 (errno 524) -trace_printk # trace_printk__attach unexpected error: -1 (errno 524) -trace_vprintk # trace_vprintk__attach unexpected error: -1 (errno 524) -tracing_struct # tracing_struct__attach unexpected error: -524 (errno 524) -trampoline_count # attach_prog unexpected error: -524 -unpriv_bpf_disabled # skel_attach unexpected error: -524 (errno 524) -user_ringbuf/test_user_ringbuf_post_misaligned # misaligned_skel unexpected error: -524 (errno 524) -user_ringbuf/test_user_ringbuf_post_producer_wrong_offset -user_ringbuf/test_user_ringbuf_post_larger_than_ringbuf_sz -user_ringbuf/test_user_ringbuf_basic # ringbuf_basic_skel unexpected error: -524 (errno 524) -user_ringbuf/test_user_ringbuf_sample_full_ring_buffer -user_ringbuf/test_user_ringbuf_post_alignment_autoadjust -user_ringbuf/test_user_ringbuf_overfill -user_ringbuf/test_user_ringbuf_discards_properly_ignored -user_ringbuf/test_user_ringbuf_loop -user_ringbuf/test_user_ringbuf_msg_protocol -user_ringbuf/test_user_ringbuf_blocking_reserve -verify_pkcs7_sig # test_verify_pkcs7_sig__attach unexpected error: -524 (errno 524) -vmlinux # skel_attach skeleton attach failed: -524 +bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 +bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3 +fexit_sleep # The test never returns. The remaining tests cannot start. +kprobe_multi_bench_attach # needs CONFIG_FPROBE +kprobe_multi_test # needs CONFIG_FPROBE +module_attach # prog 'kprobe_multi': failed to auto-attach: -95 +fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524 +fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524 +fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95 +fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95 diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x index c7463f3ec3c0..5061d9e24c16 100644 --- a/tools/testing/selftests/bpf/DENYLIST.s390x +++ b/tools/testing/selftests/bpf/DENYLIST.s390x @@ -26,3 +26,4 @@ user_ringbuf # failed to find kernel BTF type ID of verif_stats # trace_vprintk__open_and_load unexpected error: -9 (?) xdp_bonding # failed to auto-attach program 'trace_on_entry': -524 (trampoline) xdp_metadata # JIT does not support calling kernel function (kfunc) +test_task_under_cgroup # JIT does not support calling kernel function (kfunc) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 28d2c77262be..caede9b574cb 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -12,7 +12,11 @@ BPFDIR := $(LIBDIR)/bpf TOOLSINCDIR := $(TOOLSDIR)/include BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool APIDIR := $(TOOLSINCDIR)/uapi +ifneq ($(O),) +GENDIR := $(O)/include/generated +else GENDIR := $(abspath ../../../../include/generated) +endif GENHDR := $(GENDIR)/autoconf.h HOSTPKG_CONFIG := pkg-config @@ -29,11 +33,16 @@ CFLAGS += -g -O0 -rdynamic -Wall -Werror $(GENFLAGS) $(SAN_CFLAGS) \ LDFLAGS += $(SAN_LDFLAGS) LDLIBS += -lelf -lz -lrt -lpthread -# Silence some warnings when compiled with clang ifneq ($(LLVM),) +# Silence some warnings when compiled with clang CFLAGS += -Wno-unused-command-line-argument endif +# Check whether bpf cpu=v4 is supported or not by clang +ifneq ($(shell $(CLANG) --target=bpf -mcpu=help 2>&1 | grep 'v4'),) +CLANG_CPUV4 := 1 +endif + # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_dev_cgroup \ @@ -41,10 +50,17 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test test_cgroup_storage \ test_tcpnotify_user test_sysctl \ test_progs-no_alu32 +TEST_INST_SUBDIRS := no_alu32 # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) TEST_GEN_PROGS += test_progs-bpf_gcc +TEST_INST_SUBDIRS += bpf_gcc +endif + +ifneq ($(CLANG_CPUV4),) +TEST_GEN_PROGS += test_progs-cpuv4 +TEST_INST_SUBDIRS += cpuv4 endif TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o @@ -88,8 +104,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ xdp_features -TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file -TEST_GEN_FILES += liburandom_read.so +TEST_GEN_FILES += liburandom_read.so urandom_read sign-file # Emit succinct information message describing current building step # $1 - generic step name (e.g., CC, LINK, etc); @@ -332,7 +347,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by -# '-target bpf'. This fixes "missing" files on some architectures/distros, +# '--target=bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. # # Use '-idirafter': Don't interfere with include mechanics except where the @@ -373,12 +388,17 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h # $3 - CFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2 + $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2 + $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2 +endef +# Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4 +define CLANG_CPUV4_BPF_BUILD_RULE + $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) + $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v4 -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE @@ -422,7 +442,7 @@ LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(ske # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. # Parameters: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc) define DEFINE_TEST_RUNNER TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2 @@ -450,7 +470,7 @@ endef # Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and # set up by DEFINE_TEST_RUNNER itself, create test runner build rules with: # $1 - test runner base binary name (e.g., test_progs) -# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc) +# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc) define DEFINE_TEST_RUNNER_RULES ifeq ($($(TRUNNER_OUTPUT)-dir),) @@ -562,12 +582,13 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \ network_helpers.c testing_helpers.c \ btf_helpers.c flow_dissector_load.h \ cap_helpers.c test_loader.c xsk.c disasm.c \ - json_writer.c unpriv_helpers.c - + json_writer.c unpriv_helpers.c \ + ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ + $(OUTPUT)/uprobe_multi \ ima_setup.sh \ verify_sig_setup.sh \ $(wildcard progs/btf_dump_test_case_*.c) \ @@ -581,6 +602,13 @@ TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) +# Define test_progs-cpuv4 test runner. +ifneq ($(CLANG_CPUV4),) +TRUNNER_BPF_BUILD_RULE := CLANG_CPUV4_BPF_BUILD_RULE +TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) +$(eval $(call DEFINE_TEST_RUNNER,test_progs,cpuv4)) +endif + # Define test_progs BPF-GCC-flavored test runner. ifneq ($(BPF_GCC),) TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE @@ -645,11 +673,13 @@ $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h +$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ) $(OUTPUT)/bench: LDLIBS += -lm $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(TESTING_HELPERS) \ $(TRACE_HELPERS) \ + $(CGROUP_HELPERS) \ $(OUTPUT)/bench_count.o \ $(OUTPUT)/bench_rename.o \ $(OUTPUT)/bench_trigger.o \ @@ -662,6 +692,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \ $(OUTPUT)/bench_bpf_hashmap_lookup.o \ $(OUTPUT)/bench_local_storage_create.o \ + $(OUTPUT)/bench_htab_mem.o \ # $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ @@ -671,14 +702,27 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +$(OUTPUT)/uprobe_multi: uprobe_multi.c + $(call msg,BINARY,,$@) + $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@ + EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ feature bpftool \ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \ - no_alu32 bpf_gcc bpf_testmod.ko \ + no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ liburandom_read.so) .PHONY: docs docs-clean # Delete partially updated (corrupted) files on error .DELETE_ON_ERROR: + +DEFAULT_INSTALL_RULE := $(INSTALL_RULE) +override define INSTALL_RULE + $(DEFAULT_INSTALL_RULE) + @for DIR in $(TEST_INST_SUBDIRS); do \ + mkdir -p $(INSTALL_PATH)/$$DIR; \ + rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\ + done +endef diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index d9c080ac1796..73ce11b0547d 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -17,7 +17,7 @@ struct env env = { .duration_sec = 5, .affinity = false, .quiet = false, - .consumer_cnt = 1, + .consumer_cnt = 0, .producer_cnt = 1, }; @@ -279,6 +279,7 @@ extern struct argp bench_local_storage_rcu_tasks_trace_argp; extern struct argp bench_strncmp_argp; extern struct argp bench_hashmap_lookup_argp; extern struct argp bench_local_storage_create_argp; +extern struct argp bench_htab_mem_argp; static const struct argp_child bench_parsers[] = { { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 }, @@ -290,6 +291,7 @@ static const struct argp_child bench_parsers[] = { "local_storage RCU Tasks Trace slowdown benchmark", 0 }, { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 }, { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 }, + { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 }, {}, }; @@ -441,12 +443,14 @@ static void setup_timer() static void set_thread_affinity(pthread_t thread, int cpu) { cpu_set_t cpuset; + int err; CPU_ZERO(&cpuset); CPU_SET(cpu, &cpuset); - if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) { + err = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); + if (err) { fprintf(stderr, "setting affinity to CPU #%d failed: %d\n", - cpu, errno); + cpu, -err); exit(1); } } @@ -467,7 +471,7 @@ static int next_cpu(struct cpu_set *cpu_set) exit(1); } - return cpu_set->next_cpu++; + return cpu_set->next_cpu++ % env.nr_cpus; } static struct bench_state { @@ -518,6 +522,7 @@ extern const struct bench bench_local_storage_cache_hashmap_control; extern const struct bench bench_local_storage_tasks_trace; extern const struct bench bench_bpf_hashmap_lookup; extern const struct bench bench_local_storage_create; +extern const struct bench bench_htab_mem; static const struct bench *benchs[] = { &bench_count_global, @@ -559,6 +564,7 @@ static const struct bench *benchs[] = { &bench_local_storage_tasks_trace, &bench_bpf_hashmap_lookup, &bench_local_storage_create, + &bench_htab_mem, }; static void find_benchmark(void) @@ -605,7 +611,7 @@ static void setup_benchmark(void) bench->consumer_thread, (void *)(long)i); if (err) { fprintf(stderr, "failed to create consumer thread #%d: %d\n", - i, -errno); + i, -err); exit(1); } if (env.affinity) @@ -624,7 +630,7 @@ static void setup_benchmark(void) bench->producer_thread, (void *)(long)i); if (err) { fprintf(stderr, "failed to create producer thread #%d: %d\n", - i, -errno); + i, -err); exit(1); } if (env.affinity) @@ -657,6 +663,7 @@ static void collect_measurements(long delta_ns) { int main(int argc, char **argv) { + env.nr_cpus = get_nprocs(); parse_cmdline_args_init(argc, argv); if (env.list) { diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h index 402729c6a3ac..68180d8f8558 100644 --- a/tools/testing/selftests/bpf/bench.h +++ b/tools/testing/selftests/bpf/bench.h @@ -27,6 +27,7 @@ struct env { bool quiet; int consumer_cnt; int producer_cnt; + int nr_cpus; struct cpu_set prod_cpus; struct cpu_set cons_cpus; }; @@ -80,15 +81,6 @@ void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat); -static inline __u64 get_time_ns(void) -{ - struct timespec t; - - clock_gettime(CLOCK_MONOTONIC, &t); - - return (u64)t.tv_sec * 1000000000 + t.tv_nsec; -} - static inline void atomic_inc(long *value) { (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED); diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c index 7c8ccc108313..e289dd1a14ee 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c +++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c @@ -107,9 +107,9 @@ const struct argp bench_bloom_map_argp = { static void validate(void) { - if (env.consumer_cnt != 1) { + if (env.consumer_cnt != 0) { fprintf(stderr, - "The bloom filter benchmarks do not support multi-consumer use\n"); + "The bloom filter benchmarks do not support consumer\n"); exit(1); } } @@ -421,18 +421,12 @@ static void measure(struct bench_res *res) last_false_hits = total_false_hits; } -static void *consumer(void *input) -{ - return NULL; -} - const struct bench bench_bloom_lookup = { .name = "bloom-lookup", .argp = &bench_bloom_map_argp, .validate = validate, .setup = bloom_lookup_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -444,7 +438,6 @@ const struct bench bench_bloom_update = { .validate = validate, .setup = bloom_update_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -456,7 +449,6 @@ const struct bench bench_bloom_false_positive = { .validate = validate, .setup = false_positive_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = false_hits_report_progress, .report_final = false_hits_report_final, @@ -468,7 +460,6 @@ const struct bench bench_hashmap_without_bloom = { .validate = validate, .setup = hashmap_no_bloom_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -480,7 +471,6 @@ const struct bench bench_hashmap_with_bloom = { .validate = validate, .setup = hashmap_with_bloom_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c index 75abe8137b6c..ee1dc12c5e5e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c @@ -14,8 +14,8 @@ static struct ctx { static void validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -30,11 +30,6 @@ static void *producer(void *input) return NULL; } -static void *consumer(void *input) -{ - return NULL; -} - static void measure(struct bench_res *res) { } @@ -88,7 +83,6 @@ const struct bench bench_bpf_hashmap_full_update = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = NULL, .report_final = hashmap_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c index 8dbb02f75cff..279ff1b8b5b2 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c @@ -113,8 +113,8 @@ const struct argp bench_hashmap_lookup_argp = { static void validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } @@ -134,11 +134,6 @@ static void *producer(void *input) return NULL; } -static void *consumer(void *input) -{ - return NULL; -} - static void measure(struct bench_res *res) { } @@ -276,7 +271,6 @@ const struct bench bench_bpf_hashmap_lookup = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = NULL, .report_final = hashmap_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c index d8a0394e10b1..a705cfb2bccc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c +++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c @@ -47,8 +47,8 @@ const struct argp bench_bpf_loop_argp = { static void validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -62,11 +62,6 @@ static void *producer(void *input) return NULL; } -static void *consumer(void *input) -{ - return NULL; -} - static void measure(struct bench_res *res) { res->hits = atomic_swap(&ctx.skel->bss->hits, 0); @@ -99,7 +94,6 @@ const struct bench bench_bpf_loop = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = ops_report_progress, .report_final = ops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c index 078972ce208e..ba89ed3936b7 100644 --- a/tools/testing/selftests/bpf/benchs/bench_count.c +++ b/tools/testing/selftests/bpf/benchs/bench_count.c @@ -18,11 +18,6 @@ static void *count_global_producer(void *input) return NULL; } -static void *count_global_consumer(void *input) -{ - return NULL; -} - static void count_global_measure(struct bench_res *res) { struct count_global_ctx *ctx = &count_global_ctx; @@ -40,7 +35,7 @@ static void count_local_setup(void) { struct count_local_ctx *ctx = &count_local_ctx; - ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits)); + ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits)); if (!ctx->hits) exit(1); } @@ -56,11 +51,6 @@ static void *count_local_producer(void *input) return NULL; } -static void *count_local_consumer(void *input) -{ - return NULL; -} - static void count_local_measure(struct bench_res *res) { struct count_local_ctx *ctx = &count_local_ctx; @@ -74,7 +64,6 @@ static void count_local_measure(struct bench_res *res) const struct bench bench_count_global = { .name = "count-global", .producer_thread = count_global_producer, - .consumer_thread = count_global_consumer, .measure = count_global_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -84,7 +73,6 @@ const struct bench bench_count_local = { .name = "count-local", .setup = count_local_setup, .producer_thread = count_local_producer, - .consumer_thread = count_local_consumer, .measure = count_local_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c new file mode 100644 index 000000000000..9146d3f414d2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <argp.h> +#include <stdbool.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/param.h> +#include <fcntl.h> + +#include "bench.h" +#include "bpf_util.h" +#include "cgroup_helpers.h" +#include "htab_mem_bench.skel.h" + +struct htab_mem_use_case { + const char *name; + const char **progs; + /* Do synchronization between addition thread and deletion thread */ + bool need_sync; +}; + +static struct htab_mem_ctx { + const struct htab_mem_use_case *uc; + struct htab_mem_bench *skel; + pthread_barrier_t *notify; + int fd; +} ctx; + +const char *ow_progs[] = {"overwrite", NULL}; +const char *batch_progs[] = {"batch_add_batch_del", NULL}; +const char *add_del_progs[] = {"add_only", "del_only", NULL}; +const static struct htab_mem_use_case use_cases[] = { + { .name = "overwrite", .progs = ow_progs }, + { .name = "batch_add_batch_del", .progs = batch_progs }, + { .name = "add_del_on_diff_cpu", .progs = add_del_progs, .need_sync = true }, +}; + +static struct htab_mem_args { + u32 value_size; + const char *use_case; + bool preallocated; +} args = { + .value_size = 8, + .use_case = "overwrite", + .preallocated = false, +}; + +enum { + ARG_VALUE_SIZE = 10000, + ARG_USE_CASE = 10001, + ARG_PREALLOCATED = 10002, +}; + +static const struct argp_option opts[] = { + { "value-size", ARG_VALUE_SIZE, "VALUE_SIZE", 0, + "Set the value size of hash map (default 8)" }, + { "use-case", ARG_USE_CASE, "USE_CASE", 0, + "Set the use case of hash map: overwrite|batch_add_batch_del|add_del_on_diff_cpu" }, + { "preallocated", ARG_PREALLOCATED, NULL, 0, "use preallocated hash map" }, + {}, +}; + +static error_t htab_mem_parse_arg(int key, char *arg, struct argp_state *state) +{ + switch (key) { + case ARG_VALUE_SIZE: + args.value_size = strtoul(arg, NULL, 10); + if (args.value_size > 4096) { + fprintf(stderr, "too big value size %u\n", args.value_size); + argp_usage(state); + } + break; + case ARG_USE_CASE: + args.use_case = strdup(arg); + if (!args.use_case) { + fprintf(stderr, "no mem for use-case\n"); + argp_usage(state); + } + break; + case ARG_PREALLOCATED: + args.preallocated = true; + break; + default: + return ARGP_ERR_UNKNOWN; + } + + return 0; +} + +const struct argp bench_htab_mem_argp = { + .options = opts, + .parser = htab_mem_parse_arg, +}; + +static void htab_mem_validate(void) +{ + if (!strcmp(use_cases[2].name, args.use_case) && env.producer_cnt % 2) { + fprintf(stderr, "%s needs an even number of producers\n", args.use_case); + exit(1); + } +} + +static int htab_mem_bench_init_barriers(void) +{ + pthread_barrier_t *barriers; + unsigned int i, nr; + + if (!ctx.uc->need_sync) + return 0; + + nr = (env.producer_cnt + 1) / 2; + barriers = calloc(nr, sizeof(*barriers)); + if (!barriers) + return -1; + + /* Used for synchronization between two threads */ + for (i = 0; i < nr; i++) + pthread_barrier_init(&barriers[i], NULL, 2); + + ctx.notify = barriers; + return 0; +} + +static void htab_mem_bench_exit_barriers(void) +{ + unsigned int i, nr; + + if (!ctx.notify) + return; + + nr = (env.producer_cnt + 1) / 2; + for (i = 0; i < nr; i++) + pthread_barrier_destroy(&ctx.notify[i]); + free(ctx.notify); +} + +static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char *name) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(use_cases); i++) { + if (!strcmp(name, use_cases[i].name)) + return &use_cases[i]; + } + + fprintf(stderr, "no such use-case: %s\n", name); + fprintf(stderr, "available use case:"); + for (i = 0; i < ARRAY_SIZE(use_cases); i++) + fprintf(stderr, " %s", use_cases[i].name); + fprintf(stderr, "\n"); + exit(1); +} + +static void htab_mem_setup(void) +{ + struct bpf_map *map; + const char **names; + int err; + + setup_libbpf(); + + ctx.uc = htab_mem_find_use_case_or_exit(args.use_case); + err = htab_mem_bench_init_barriers(); + if (err) { + fprintf(stderr, "failed to init barrier\n"); + exit(1); + } + + ctx.fd = cgroup_setup_and_join("/htab_mem"); + if (ctx.fd < 0) + goto cleanup; + + ctx.skel = htab_mem_bench__open(); + if (!ctx.skel) { + fprintf(stderr, "failed to open skeleton\n"); + goto cleanup; + } + + map = ctx.skel->maps.htab; + bpf_map__set_value_size(map, args.value_size); + /* Ensure that different CPUs can operate on different subset */ + bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus)); + if (args.preallocated) + bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC); + + names = ctx.uc->progs; + while (*names) { + struct bpf_program *prog; + + prog = bpf_object__find_program_by_name(ctx.skel->obj, *names); + if (!prog) { + fprintf(stderr, "no such program %s\n", *names); + goto cleanup; + } + bpf_program__set_autoload(prog, true); + names++; + } + ctx.skel->bss->nr_thread = env.producer_cnt; + + err = htab_mem_bench__load(ctx.skel); + if (err) { + fprintf(stderr, "failed to load skeleton\n"); + goto cleanup; + } + err = htab_mem_bench__attach(ctx.skel); + if (err) { + fprintf(stderr, "failed to attach skeleton\n"); + goto cleanup; + } + return; + +cleanup: + htab_mem_bench__destroy(ctx.skel); + htab_mem_bench_exit_barriers(); + if (ctx.fd >= 0) { + close(ctx.fd); + cleanup_cgroup_environment(); + } + exit(1); +} + +static void htab_mem_add_fn(pthread_barrier_t *notify) +{ + while (true) { + /* Do addition */ + (void)syscall(__NR_getpgid, 0); + /* Notify deletion thread to do deletion */ + pthread_barrier_wait(notify); + /* Wait for deletion to complete */ + pthread_barrier_wait(notify); + } +} + +static void htab_mem_delete_fn(pthread_barrier_t *notify) +{ + while (true) { + /* Wait for addition to complete */ + pthread_barrier_wait(notify); + /* Do deletion */ + (void)syscall(__NR_getppid); + /* Notify addition thread to do addition */ + pthread_barrier_wait(notify); + } +} + +static void *htab_mem_producer(void *arg) +{ + pthread_barrier_t *notify; + int seq; + + if (!ctx.uc->need_sync) { + while (true) + (void)syscall(__NR_getpgid, 0); + return NULL; + } + + seq = (long)arg; + notify = &ctx.notify[seq / 2]; + if (seq & 1) + htab_mem_delete_fn(notify); + else + htab_mem_add_fn(notify); + return NULL; +} + +static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value) +{ + char buf[32]; + ssize_t got; + int fd; + + fd = openat(ctx.fd, name, O_RDONLY); + if (fd < 0) { + /* cgroup v1 ? */ + fprintf(stderr, "no %s\n", name); + *value = 0; + return; + } + + got = read(fd, buf, sizeof(buf) - 1); + if (got <= 0) { + *value = 0; + return; + } + buf[got] = 0; + + *value = strtoull(buf, NULL, 0); + + close(fd); +} + +static void htab_mem_measure(struct bench_res *res) +{ + res->hits = atomic_swap(&ctx.skel->bss->op_cnt, 0) / env.producer_cnt; + htab_mem_read_mem_cgrp_file("memory.current", &res->gp_ct); +} + +static void htab_mem_report_progress(int iter, struct bench_res *res, long delta_ns) +{ + double loop, mem; + + loop = res->hits / 1000.0 / (delta_ns / 1000000000.0); + mem = res->gp_ct / 1048576.0; + printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0); + printf("per-prod-op %7.2lfk/s, memory usage %7.2lfMiB\n", loop, mem); +} + +static void htab_mem_report_final(struct bench_res res[], int res_cnt) +{ + double mem_mean = 0.0, mem_stddev = 0.0; + double loop_mean = 0.0, loop_stddev = 0.0; + unsigned long peak_mem; + int i; + + for (i = 0; i < res_cnt; i++) { + loop_mean += res[i].hits / 1000.0 / (0.0 + res_cnt); + mem_mean += res[i].gp_ct / 1048576.0 / (0.0 + res_cnt); + } + if (res_cnt > 1) { + for (i = 0; i < res_cnt; i++) { + loop_stddev += (loop_mean - res[i].hits / 1000.0) * + (loop_mean - res[i].hits / 1000.0) / + (res_cnt - 1.0); + mem_stddev += (mem_mean - res[i].gp_ct / 1048576.0) * + (mem_mean - res[i].gp_ct / 1048576.0) / + (res_cnt - 1.0); + } + loop_stddev = sqrt(loop_stddev); + mem_stddev = sqrt(mem_stddev); + } + + htab_mem_read_mem_cgrp_file("memory.peak", &peak_mem); + printf("Summary: per-prod-op %7.2lf \u00B1 %7.2lfk/s, memory usage %7.2lf \u00B1 %7.2lfMiB," + " peak memory usage %7.2lfMiB\n", + loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0); + + cleanup_cgroup_environment(); +} + +const struct bench bench_htab_mem = { + .name = "htab-mem", + .argp = &bench_htab_mem_argp, + .validate = htab_mem_validate, + .setup = htab_mem_setup, + .producer_thread = htab_mem_producer, + .measure = htab_mem_measure, + .report_progress = htab_mem_report_progress, + .report_final = htab_mem_report_final, +}; diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c index d4b2817306d4..452499428ceb 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c @@ -74,8 +74,8 @@ static void validate(void) fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } @@ -230,11 +230,6 @@ static inline void trigger_bpf_program(void) syscall(__NR_getpgid); } -static void *consumer(void *input) -{ - return NULL; -} - static void *producer(void *input) { while (true) @@ -259,7 +254,6 @@ const struct bench bench_local_storage_cache_seq_get = { .validate = validate, .setup = local_storage_cache_get_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = local_storage_report_progress, .report_final = local_storage_report_final, @@ -271,7 +265,6 @@ const struct bench bench_local_storage_cache_interleaved_get = { .validate = validate, .setup = local_storage_cache_get_interleaved_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = local_storage_report_progress, .report_final = local_storage_report_final, @@ -283,7 +276,6 @@ const struct bench bench_local_storage_cache_hashmap_control = { .validate = validate, .setup = hashmap_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = local_storage_report_progress, .report_final = local_storage_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c index cff703f90e95..b36de42ee4d9 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c @@ -71,7 +71,7 @@ const struct argp bench_local_storage_create_argp = { static void validate(void) { - if (env.consumer_cnt > 1) { + if (env.consumer_cnt != 0) { fprintf(stderr, "local-storage-create benchmark does not need consumer\n"); exit(1); @@ -143,11 +143,6 @@ static void measure(struct bench_res *res) res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0); } -static void *consumer(void *input) -{ - return NULL; -} - static void *sk_producer(void *input) { struct thread *t = &threads[(long)(input)]; @@ -257,7 +252,6 @@ const struct bench bench_local_storage_create = { .validate = validate, .setup = setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = report_progress, .report_final = report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c index d5eb5587f2aa..edf0b00418c1 100644 --- a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c +++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c @@ -72,8 +72,8 @@ static void validate(void) fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } @@ -197,11 +197,6 @@ static void measure(struct bench_res *res) ctx.prev_kthread_stime = ticks; } -static void *consumer(void *input) -{ - return NULL; -} - static void *producer(void *input) { while (true) @@ -262,7 +257,6 @@ const struct bench bench_local_storage_tasks_trace = { .validate = validate, .setup = local_storage_tasks_trace_setup, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = report_progress, .report_final = report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index 3c203b6d6a6e..bf66893c7a33 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -17,8 +17,8 @@ static void validate(void) fprintf(stderr, "benchmark doesn't support multi-producer!\n"); exit(1); } - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -106,17 +106,11 @@ static void setup_fexit(void) attach_bpf(ctx.skel->progs.prog5); } -static void *consumer(void *input) -{ - return NULL; -} - const struct bench bench_rename_base = { .name = "rename-base", .validate = validate, .setup = setup_base, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -127,7 +121,6 @@ const struct bench bench_rename_kprobe = { .validate = validate, .setup = setup_kprobe, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -138,7 +131,6 @@ const struct bench bench_rename_kretprobe = { .validate = validate, .setup = setup_kretprobe, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -149,7 +141,6 @@ const struct bench bench_rename_rawtp = { .validate = validate, .setup = setup_rawtp, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -160,7 +151,6 @@ const struct bench bench_rename_fentry = { .validate = validate, .setup = setup_fentry, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -171,7 +161,6 @@ const struct bench bench_rename_fexit = { .validate = validate, .setup = setup_fexit, .producer_thread = producer, - .consumer_thread = consumer, .measure = measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c index fc91fdac4faa..e1ee979e6acc 100644 --- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c +++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c @@ -96,7 +96,7 @@ static inline void bufs_trigger_batch(void) static void bufs_validate(void) { if (env.consumer_cnt != 1) { - fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n"); + fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n"); exit(1); } @@ -399,7 +399,7 @@ static void perfbuf_libbpf_setup(void) ctx->skel = perfbuf_setup_skeleton(); memset(&attr, 0, sizeof(attr)); - attr.config = PERF_COUNT_SW_BPF_OUTPUT, + attr.config = PERF_COUNT_SW_BPF_OUTPUT; attr.type = PERF_TYPE_SOFTWARE; attr.sample_type = PERF_SAMPLE_RAW; /* notify only every Nth sample */ diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c index d3fad2ba6916..a5e1428fd7a0 100644 --- a/tools/testing/selftests/bpf/benchs/bench_strncmp.c +++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c @@ -50,8 +50,8 @@ const struct argp bench_strncmp_argp = { static void strncmp_validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "strncmp benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "strncmp benchmark doesn't support consumer!\n"); exit(1); } } @@ -128,11 +128,6 @@ static void *strncmp_producer(void *ctx) return NULL; } -static void *strncmp_consumer(void *ctx) -{ - return NULL; -} - static void strncmp_measure(struct bench_res *res) { res->hits = atomic_swap(&ctx.skel->bss->hits, 0); @@ -144,7 +139,6 @@ const struct bench bench_strncmp_no_helper = { .validate = strncmp_validate, .setup = strncmp_no_helper_setup, .producer_thread = strncmp_producer, - .consumer_thread = strncmp_consumer, .measure = strncmp_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -156,7 +150,6 @@ const struct bench bench_strncmp_helper = { .validate = strncmp_validate, .setup = strncmp_helper_setup, .producer_thread = strncmp_producer, - .consumer_thread = strncmp_consumer, .measure = strncmp_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 0c481de2833d..dbd362771d6a 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -13,8 +13,8 @@ static struct counter base_hits; static void trigger_validate(void) { - if (env.consumer_cnt != 1) { - fprintf(stderr, "benchmark doesn't support multi-consumer!\n"); + if (env.consumer_cnt != 0) { + fprintf(stderr, "benchmark doesn't support consumer!\n"); exit(1); } } @@ -103,11 +103,6 @@ static void trigger_fmodret_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fmodret); } -static void *trigger_consumer(void *input) -{ - return NULL; -} - /* make sure call is not inlined and not avoided by compiler, so __weak and * inline asm volatile in the body of the function * @@ -205,7 +200,6 @@ const struct bench bench_trig_base = { .name = "trig-base", .validate = trigger_validate, .producer_thread = trigger_base_producer, - .consumer_thread = trigger_consumer, .measure = trigger_base_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -216,7 +210,6 @@ const struct bench bench_trig_tp = { .validate = trigger_validate, .setup = trigger_tp_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -227,7 +220,6 @@ const struct bench bench_trig_rawtp = { .validate = trigger_validate, .setup = trigger_rawtp_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -238,7 +230,6 @@ const struct bench bench_trig_kprobe = { .validate = trigger_validate, .setup = trigger_kprobe_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -249,7 +240,6 @@ const struct bench bench_trig_fentry = { .validate = trigger_validate, .setup = trigger_fentry_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -260,7 +250,6 @@ const struct bench bench_trig_fentry_sleep = { .validate = trigger_validate, .setup = trigger_fentry_sleep_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -271,7 +260,6 @@ const struct bench bench_trig_fmodret = { .validate = trigger_validate, .setup = trigger_fmodret_setup, .producer_thread = trigger_producer, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -281,7 +269,6 @@ const struct bench bench_trig_uprobe_base = { .name = "trig-uprobe-base", .setup = NULL, /* no uprobe/uretprobe is attached */ .producer_thread = uprobe_base_producer, - .consumer_thread = trigger_consumer, .measure = trigger_base_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -291,7 +278,6 @@ const struct bench bench_trig_uprobe_with_nop = { .name = "trig-uprobe-with-nop", .setup = uprobe_setup_with_nop, .producer_thread = uprobe_producer_with_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -301,7 +287,6 @@ const struct bench bench_trig_uretprobe_with_nop = { .name = "trig-uretprobe-with-nop", .setup = uretprobe_setup_with_nop, .producer_thread = uprobe_producer_with_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -311,7 +296,6 @@ const struct bench bench_trig_uprobe_without_nop = { .name = "trig-uprobe-without-nop", .setup = uprobe_setup_without_nop, .producer_thread = uprobe_producer_without_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, @@ -321,7 +305,6 @@ const struct bench bench_trig_uretprobe_without_nop = { .name = "trig-uretprobe-without-nop", .setup = uretprobe_setup_without_nop, .producer_thread = uprobe_producer_without_nop, - .consumer_thread = trigger_consumer, .measure = trigger_measure, .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, diff --git a/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh new file mode 100755 index 000000000000..9ff5832463a2 --- /dev/null +++ b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./benchs/run_common.sh + +set -eufo pipefail + +htab_mem() +{ + echo -n "per-prod-op: " + echo -n "$*" | sed -E "s/.* per-prod-op\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+k\/s).*/\1/" + echo -n -e ", avg mem: " + echo -n "$*" | sed -E "s/.* memory usage\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+MiB).*/\1/" + echo -n ", peak mem: " + echo "$*" | sed -E "s/.* peak memory usage\s+([0-9]+\.[0-9]+MiB).*/\1/" +} + +summarize_htab_mem() +{ + local bench="$1" + local summary=$(echo $2 | tail -n1) + + printf "%-20s %s\n" "$bench" "$(htab_mem $summary)" +} + +htab_mem_bench() +{ + local name + + for name in overwrite batch_add_batch_del add_del_on_diff_cpu + do + summarize_htab_mem "$name" "$($RUN_BENCH htab-mem --use-case $name -p8 "$@")" + done +} + +header "preallocated" +htab_mem_bench "--preallocated" + +header "normal bpf ma" +htab_mem_bench diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh index 16f774b1cdbe..7b281dbe4165 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in base kprobe kretprobe rawtp fentry fexit fmodret +for i in base kprobe kretprobe rawtp fentry fexit do summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-10s: %s\n" $i "$summary" diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh index ada028aa9007..91e3567962ff 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh @@ -4,46 +4,48 @@ source ./benchs/run_common.sh set -eufo pipefail +RUN_RB_BENCH="$RUN_BENCH -c1" + header "Single-producer, parallel producer" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH $b)" + summarize $b "$($RUN_RB_BENCH $b)" done header "Single-producer, parallel producer, sampled notification" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-sampled $b)" + summarize $b "$($RUN_RB_BENCH --rb-sampled $b)" done header "Single-producer, back-to-back mode" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-b2b $b)" - summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)" + summarize $b "$($RUN_RB_BENCH --rb-b2b $b)" + summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)" done header "Ringbuf back-to-back, effect of sample rate" for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do - summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" + summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)" done header "Perfbuf back-to-back, effect of sample rate" for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do - summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" + summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)" done header "Ringbuf back-to-back, reserve+commit vs output" -summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)" -summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)" +summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)" +summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)" header "Ringbuf sampled, reserve+commit vs output" -summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)" -summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)" +summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)" +summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)" header "Single-producer, consumer/producer competing on the same CPU, low batch count" for b in rb-libbpf rb-custom pb-libbpf pb-custom; do - summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" + summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)" done header "Ringbuf, multi-producer contention" for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do - summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" + summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)" done diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8c993ec8ceea..642dda0e758a 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -35,4 +35,10 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset, extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset, void *buffer, __u32 buffer__szk) __ksym; +extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym; +extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym; +extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym; +extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym; +extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 52785ba671e6..cefc5dd72573 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -9,6 +9,7 @@ #include <linux/sysfs.h> #include <linux/tracepoint.h> #include "bpf_testmod.h" +#include "bpf_testmod_kfunc.h" #define CREATE_TRACE_POINTS #include "bpf_testmod-events.h" @@ -33,6 +34,11 @@ struct bpf_testmod_struct_arg_3 { int b[]; }; +struct bpf_testmod_struct_arg_4 { + u64 a; + int b; +}; + __diag_push(); __diag_ignore_all("-Wmissing-prototypes", "Global functions as their definitions will be in bpf_testmod.ko BTF"); @@ -74,6 +80,30 @@ bpf_testmod_test_struct_arg_6(struct bpf_testmod_struct_arg_3 *a) { return bpf_testmod_test_struct_arg_result; } +noinline int +bpf_testmod_test_struct_arg_7(u64 a, void *b, short c, int d, void *e, + struct bpf_testmod_struct_arg_4 f) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + + (long)e + f.a + f.b; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e, + struct bpf_testmod_struct_arg_4 f, int g) +{ + bpf_testmod_test_struct_arg_result = a + (long)b + c + d + + (long)e + f.a + f.b + g; + return bpf_testmod_test_struct_arg_result; +} + +noinline int +bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) { + bpf_testmod_test_struct_arg_result = a->a; + return bpf_testmod_test_struct_arg_result; +} + __bpf_kfunc void bpf_testmod_test_mod_kfunc(int i) { @@ -190,7 +220,19 @@ noinline int bpf_testmod_fentry_test3(char a, int b, u64 c) return a + b + c; } -__diag_pop(); +noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d, + void *e, char f, int g) +{ + return a + (long)b + c + d + (long)e + f + g; +} + +noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, + void *e, char f, int g, + unsigned int h, long i, __u64 j, + unsigned long k) +{ + return a + (long)b + c + d + (long)e + f + g + h + i + j + k; +} int bpf_testmod_fentry_ok; @@ -204,9 +246,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, .off = off, .len = len, }; - struct bpf_testmod_struct_arg_1 struct_arg1 = {10}; + struct bpf_testmod_struct_arg_1 struct_arg1 = {10}, struct_arg1_2 = {-1}; struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3}; struct bpf_testmod_struct_arg_3 *struct_arg3; + struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22}; int i = 1; while (bpf_testmod_return_ptr(i)) @@ -217,6 +260,12 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2); (void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2); (void)bpf_testmod_test_struct_arg_5(); + (void)bpf_testmod_test_struct_arg_7(16, (void *)17, 18, 19, + (void *)20, struct_arg4); + (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19, + (void *)20, struct_arg4, 23); + + (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + sizeof(int)), GFP_KERNEL); @@ -244,7 +293,11 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_fentry_test1(1) != 2 || bpf_testmod_fentry_test2(2, 3) != 5 || - bpf_testmod_fentry_test3(4, 5, 6) != 15) + bpf_testmod_fentry_test3(4, 5, 6) != 15 || + bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20, + 21, 22) != 133 || + bpf_testmod_fentry_test11(16, (void *)17, 18, 19, (void *)20, + 21, 22, 23, 24, 25, 26) != 231) goto out; bpf_testmod_fentry_ok = 1; @@ -272,6 +325,14 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj, EXPORT_SYMBOL(bpf_testmod_test_write); ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO); +noinline int bpf_fentry_shadow_test(int a) +{ + return a + 2; +} +EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); + +__diag_pop(); + static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { .attr = { .name = "bpf_testmod", .mode = 0666, }, .read = bpf_testmod_test_read, @@ -289,8 +350,171 @@ static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = { .set = &bpf_testmod_common_kfunc_ids, }; +__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d) +{ + return a + b + c + d; +} + +__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b) +{ + return a + b; +} + +__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk) +{ + return sk; +} + +__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d) +{ + /* Provoke the compiler to assume that the caller has sign-extended a, + * b and c on platforms where this is required (e.g. s390x). + */ + return (long)a + (long)b + (long)c + d; +} + +static struct prog_test_ref_kfunc prog_test_struct = { + .a = 42, + .b = 108, + .next = &prog_test_struct, + .cnt = REFCOUNT_INIT(1), +}; + +__bpf_kfunc struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) +{ + refcount_inc(&prog_test_struct.cnt); + return &prog_test_struct; +} + +__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p) +{ + WARN_ON_ONCE(1); +} + +__bpf_kfunc struct prog_test_member * +bpf_kfunc_call_memb_acquire(void) +{ + WARN_ON_ONCE(1); + return NULL; +} + +__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p) +{ + WARN_ON_ONCE(1); +} + +static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size) +{ + if (size > 2 * sizeof(int)) + return NULL; + + return (int *)p; +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, + const int rdwr_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size); +} + +__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +/* the next 2 ones can't be really used for testing expect to ensure + * that the verifier rejects the call. + * Acquire functions must return struct pointers, so these ones are + * failing. + */ +__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, + const int rdonly_buf_size) +{ + return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size); +} + +__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len) +{ +} + +__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) +{ + /* p != NULL, but p->cnt could be 0 */ +} + +__bpf_kfunc void bpf_kfunc_call_test_destructive(void) +{ +} + +__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) +{ + return arg; +} + BTF_SET8_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) +BTF_ID_FLAGS(func, bpf_kfunc_call_test1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test4) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL) +BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) +BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(bpf_testmod_check_kfunc_ids) static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { @@ -298,12 +522,6 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { .set = &bpf_testmod_check_kfunc_ids, }; -noinline int bpf_fentry_shadow_test(int a) -{ - return a + 2; -} -EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test); - extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -312,6 +530,8 @@ static int bpf_testmod_init(void) ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); + ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h new file mode 100644 index 000000000000..f5c5b1375c24 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h @@ -0,0 +1,107 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _BPF_TESTMOD_KFUNC_H +#define _BPF_TESTMOD_KFUNC_H + +#ifndef __KERNEL__ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#else +#define __ksym +struct prog_test_member1 { + int a; +}; + +struct prog_test_member { + struct prog_test_member1 m; + int c; +}; + +struct prog_test_ref_kfunc { + int a; + int b; + struct prog_test_member memb; + struct prog_test_ref_kfunc *next; + refcount_t cnt; +}; +#endif + +struct prog_test_pass1 { + int x0; + struct { + int x1; + struct { + int x2; + struct { + int x3; + }; + }; + }; +}; + +struct prog_test_pass2 { + int len; + short arr1[4]; + struct { + char arr2[4]; + unsigned long arr3[8]; + } x; +}; + +struct prog_test_fail1 { + void *p; + int x; +}; + +struct prog_test_fail2 { + int x8; + struct prog_test_pass1 x; +}; + +struct prog_test_fail3 { + int len; + char arr1[2]; + char arr2[]; +}; + +struct prog_test_ref_kfunc * +bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym; +void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; +void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; + +void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; +int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; +int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; +void bpf_kfunc_call_int_mem_release(int *p) __ksym; + +/* The bpf_kfunc_call_test_static_unused_arg is defined as static, + * but bpf program compilation needs to see it as global symbol. + */ +#ifndef __KERNEL__ +u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#endif + +void bpf_testmod_test_mod_kfunc(int i) __ksym; + +__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, + __u32 c, __u64 d) __ksym; +int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; +struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; +long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; + +void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; +void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; +void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; +void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; + +void bpf_kfunc_call_test_destructive(void) __ksym; + +void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p); +struct prog_test_member *bpf_kfunc_call_memb_acquire(void); +void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p); +void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p); +void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p); +void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p); +void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len); +#endif /* _BPF_TESTMOD_KFUNC_H */ diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 9e95b37a7dff..2caee8423ee0 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -278,6 +278,18 @@ int join_cgroup(const char *relative_path) } /** + * join_root_cgroup() - Join the root cgroup + * + * This function joins the root cgroup. + * + * On success, it returns 0, otherwise on failure it returns 1. + */ +int join_root_cgroup(void) +{ + return join_cgroup_from_top(CGROUP_MOUNT_PATH); +} + +/** * join_parent_cgroup() - Join a cgroup in the parent process workdir * @relative_path: The cgroup path, relative to parent process workdir, to join * diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index f099a166c94d..5c2cb9c8b546 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -22,6 +22,7 @@ void remove_cgroup(const char *relative_path); unsigned long long get_cgroup_id(const char *relative_path); int join_cgroup(const char *relative_path); +int join_root_cgroup(void); int join_parent_cgroup(const char *relative_path); int setup_cgroup_environment(void); diff --git a/tools/testing/selftests/bpf/cgroup_tcp_skb.h b/tools/testing/selftests/bpf/cgroup_tcp_skb.h new file mode 100644 index 000000000000..7f6b24f102fb --- /dev/null +++ b/tools/testing/selftests/bpf/cgroup_tcp_skb.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +/* Define states of a socket to tracking messages sending to and from the + * socket. + * + * These states are based on rfc9293 with some modifications to support + * tracking of messages sent out from a socket. For example, when a SYN is + * received, a new socket is transiting to the SYN_RECV state defined in + * rfc9293. But, we put it in SYN_RECV_SENDING_SYN_ACK state and when + * SYN-ACK is sent out, it moves to SYN_RECV state. With this modification, + * we can track the message sent out from a socket. + */ + +#ifndef __CGROUP_TCP_SKB_H__ +#define __CGROUP_TCP_SKB_H__ + +enum { + INIT, + CLOSED, + SYN_SENT, + SYN_RECV_SENDING_SYN_ACK, + SYN_RECV, + ESTABLISHED, + FIN_WAIT1, + FIN_WAIT2, + CLOSE_WAIT_SENDING_ACK, + CLOSE_WAIT, + CLOSING, + LAST_ACK, + TIME_WAIT_SENDING_ACK, + TIME_WAIT, +}; + +#endif /* __CGROUP_TCP_SKB_H__ */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 63cd4ab70171..e41eb33b2704 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -4,6 +4,7 @@ CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y CONFIG_BPF=y CONFIG_BPF_EVENTS=y CONFIG_BPF_JIT=y +CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_LIRC_MODE2=y CONFIG_BPF_LSM=y CONFIG_BPF_STREAM_PARSER=y @@ -13,6 +14,10 @@ CONFIG_CGROUP_BPF=y CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_DEBUG_INFO=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_DWARF4=y +CONFIG_DUMMY=y CONFIG_DYNAMIC_FTRACE=y CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y @@ -56,10 +61,12 @@ CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y CONFIG_NET_IPIP=y CONFIG_NET_MPLS_GSO=y +CONFIG_NET_SCH_FQ=y CONFIG_NET_SCH_INGRESS=y CONFIG_NET_SCHED=y CONFIG_NETDEVSIM=y CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_SYNPROXY=y CONFIG_NETFILTER_XT_CONNMARK=y CONFIG_NETFILTER_XT_MATCH_STATE=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index b650b2e617b8..2e70a6048278 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -20,7 +20,6 @@ CONFIG_BLK_DEV_THROTTLING=y CONFIG_BONDING=y CONFIG_BOOTTIME_TRACING=y CONFIG_BPF_JIT_ALWAYS_ON=y -CONFIG_BPF_KPROBE_OVERRIDE=y CONFIG_BPF_PRELOAD=y CONFIG_BPF_PRELOAD_UMD=y CONFIG_BPFILTER=y diff --git a/tools/testing/selftests/bpf/generate_udp_fragments.py b/tools/testing/selftests/bpf/generate_udp_fragments.py new file mode 100755 index 000000000000..2b8a1187991c --- /dev/null +++ b/tools/testing/selftests/bpf/generate_udp_fragments.py @@ -0,0 +1,90 @@ +#!/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This script helps generate fragmented UDP packets. + +While it is technically possible to dynamically generate +fragmented packets in C, it is much harder to read and write +said code. `scapy` is relatively industry standard and really +easy to read / write. + +So we choose to write this script that generates a valid C +header. Rerun script and commit generated file after any +modifications. +""" + +import argparse +import os + +from scapy.all import * + + +# These constants must stay in sync with `ip_check_defrag.c` +VETH1_ADDR = "172.16.1.200" +VETH0_ADDR6 = "fc00::100" +VETH1_ADDR6 = "fc00::200" +CLIENT_PORT = 48878 +SERVER_PORT = 48879 +MAGIC_MESSAGE = "THIS IS THE ORIGINAL MESSAGE, PLEASE REASSEMBLE ME" + + +def print_header(f): + f.write("// SPDX-License-Identifier: GPL-2.0\n") + f.write("/* DO NOT EDIT -- this file is generated */\n") + f.write("\n") + f.write("#ifndef _IP_CHECK_DEFRAG_FRAGS_H\n") + f.write("#define _IP_CHECK_DEFRAG_FRAGS_H\n") + f.write("\n") + f.write("#include <stdint.h>\n") + f.write("\n") + + +def print_frags(f, frags, v6): + for idx, frag in enumerate(frags): + # 10 bytes per line to keep width in check + chunks = [frag[i : i + 10] for i in range(0, len(frag), 10)] + chunks_fmted = [", ".join([str(hex(b)) for b in chunk]) for chunk in chunks] + suffix = "6" if v6 else "" + + f.write(f"static uint8_t frag{suffix}_{idx}[] = {{\n") + for chunk in chunks_fmted: + f.write(f"\t{chunk},\n") + f.write(f"}};\n") + + +def print_trailer(f): + f.write("\n") + f.write("#endif /* _IP_CHECK_DEFRAG_FRAGS_H */\n") + + +def main(f): + # srcip of 0 is filled in by IP_HDRINCL + sip = "0.0.0.0" + sip6 = VETH0_ADDR6 + dip = VETH1_ADDR + dip6 = VETH1_ADDR6 + sport = CLIENT_PORT + dport = SERVER_PORT + payload = MAGIC_MESSAGE.encode() + + # Disable UDPv4 checksums to keep code simpler + pkt = IP(src=sip,dst=dip) / UDP(sport=sport,dport=dport,chksum=0) / Raw(load=payload) + # UDPv6 requires a checksum + # Also pin the ipv6 fragment header ID, otherwise it's a random value + pkt6 = IPv6(src=sip6,dst=dip6) / IPv6ExtHdrFragment(id=0xBEEF) / UDP(sport=sport,dport=dport) / Raw(load=payload) + + frags = [f.build() for f in pkt.fragment(24)] + frags6 = [f.build() for f in fragment6(pkt6, 72)] + + print_header(f) + print_frags(f, frags, False) + print_frags(f, frags6, True) + print_trailer(f) + + +if __name__ == "__main__": + dir = os.path.dirname(os.path.realpath(__file__)) + header = f"{dir}/ip_check_defrag_frags.h" + with open(header, "w") as f: + main(f) diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h index 719225b16626..1c638d9dce1a 100644 --- a/tools/testing/selftests/bpf/gnu/stubs.h +++ b/tools/testing/selftests/bpf/gnu/stubs.h @@ -1 +1 @@ -/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */ +/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */ diff --git a/tools/testing/selftests/bpf/ip_check_defrag_frags.h b/tools/testing/selftests/bpf/ip_check_defrag_frags.h new file mode 100644 index 000000000000..70ab7e9fa22b --- /dev/null +++ b/tools/testing/selftests/bpf/ip_check_defrag_frags.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +/* DO NOT EDIT -- this file is generated */ + +#ifndef _IP_CHECK_DEFRAG_FRAGS_H +#define _IP_CHECK_DEFRAG_FRAGS_H + +#include <stdint.h> + +static uint8_t frag_0[] = { + 0x45, 0x0, 0x0, 0x2c, 0x0, 0x1, 0x20, 0x0, 0x40, 0x11, + 0xac, 0xe8, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8, + 0xbe, 0xee, 0xbe, 0xef, 0x0, 0x3a, 0x0, 0x0, 0x54, 0x48, + 0x49, 0x53, 0x20, 0x49, 0x53, 0x20, 0x54, 0x48, 0x45, 0x20, + 0x4f, 0x52, 0x49, 0x47, +}; +static uint8_t frag_1[] = { + 0x45, 0x0, 0x0, 0x2c, 0x0, 0x1, 0x20, 0x3, 0x40, 0x11, + 0xac, 0xe5, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8, + 0x49, 0x4e, 0x41, 0x4c, 0x20, 0x4d, 0x45, 0x53, 0x53, 0x41, + 0x47, 0x45, 0x2c, 0x20, 0x50, 0x4c, 0x45, 0x41, 0x53, 0x45, + 0x20, 0x52, 0x45, 0x41, +}; +static uint8_t frag_2[] = { + 0x45, 0x0, 0x0, 0x1e, 0x0, 0x1, 0x0, 0x6, 0x40, 0x11, + 0xcc, 0xf0, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8, + 0x53, 0x53, 0x45, 0x4d, 0x42, 0x4c, 0x45, 0x20, 0x4d, 0x45, +}; +static uint8_t frag6_0[] = { + 0x60, 0x0, 0x0, 0x0, 0x0, 0x20, 0x2c, 0x40, 0xfc, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, + 0x11, 0x0, 0x0, 0x1, 0x0, 0x0, 0xbe, 0xef, 0xbe, 0xee, + 0xbe, 0xef, 0x0, 0x3a, 0xd0, 0xf8, 0x54, 0x48, 0x49, 0x53, + 0x20, 0x49, 0x53, 0x20, 0x54, 0x48, 0x45, 0x20, 0x4f, 0x52, + 0x49, 0x47, +}; +static uint8_t frag6_1[] = { + 0x60, 0x0, 0x0, 0x0, 0x0, 0x20, 0x2c, 0x40, 0xfc, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, + 0x11, 0x0, 0x0, 0x19, 0x0, 0x0, 0xbe, 0xef, 0x49, 0x4e, + 0x41, 0x4c, 0x20, 0x4d, 0x45, 0x53, 0x53, 0x41, 0x47, 0x45, + 0x2c, 0x20, 0x50, 0x4c, 0x45, 0x41, 0x53, 0x45, 0x20, 0x52, + 0x45, 0x41, +}; +static uint8_t frag6_2[] = { + 0x60, 0x0, 0x0, 0x0, 0x0, 0x12, 0x2c, 0x40, 0xfc, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0, + 0x11, 0x0, 0x0, 0x30, 0x0, 0x0, 0xbe, 0xef, 0x53, 0x53, + 0x45, 0x4d, 0x42, 0x4c, 0x45, 0x20, 0x4d, 0x45, +}; + +#endif /* _IP_CHECK_DEFRAG_FRAGS_H */ diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c new file mode 100644 index 000000000000..1a9eeefda9a8 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c @@ -0,0 +1,447 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include <errno.h> +#include <unistd.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <bpf_util.h> +#include <test_maps.h> + +#include "map_percpu_stats.skel.h" + +#define MAX_ENTRIES 16384 +#define MAX_ENTRIES_HASH_OF_MAPS 64 +#define N_THREADS 8 +#define MAX_MAP_KEY_SIZE 4 + +static void map_info(int map_fd, struct bpf_map_info *info) +{ + __u32 len = sizeof(*info); + int ret; + + memset(info, 0, sizeof(*info)); + + ret = bpf_obj_get_info_by_fd(map_fd, info, &len); + CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno)); +} + +static const char *map_type_to_s(__u32 type) +{ + switch (type) { + case BPF_MAP_TYPE_HASH: + return "HASH"; + case BPF_MAP_TYPE_PERCPU_HASH: + return "PERCPU_HASH"; + case BPF_MAP_TYPE_LRU_HASH: + return "LRU_HASH"; + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + return "LRU_PERCPU_HASH"; + case BPF_MAP_TYPE_HASH_OF_MAPS: + return "BPF_MAP_TYPE_HASH_OF_MAPS"; + default: + return "<define-me>"; + } +} + +static __u32 map_count_elements(__u32 type, int map_fd) +{ + __u32 key = -1; + int n = 0; + + while (!bpf_map_get_next_key(map_fd, &key, &key)) + n++; + return n; +} + +#define BATCH true + +static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count) +{ + static __u8 values[(8 << 10) * MAX_ENTRIES]; + void *in_batch = NULL, *out_batch; + __u32 save_count = count; + int ret; + + ret = bpf_map_lookup_and_delete_batch(map_fd, + &in_batch, &out_batch, + keys, values, &count, + NULL); + + /* + * Despite what uapi header says, lookup_and_delete_batch will return + * -ENOENT in case we successfully have deleted all elements, so check + * this separately + */ + CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch", + "error: %s\n", strerror(errno)); + + CHECK(count != save_count, + "bpf_map_lookup_and_delete_batch", + "deleted not all elements: removed=%u expected=%u\n", + count, save_count); +} + +static void delete_all_elements(__u32 type, int map_fd, bool batch) +{ + static __u8 val[8 << 10]; /* enough for 1024 CPUs */ + __u32 key = -1; + void *keys; + __u32 i, n; + int ret; + + keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES); + CHECK(!keys, "calloc", "error: %s\n", strerror(errno)); + + for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++) + memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE); + + if (batch) { + /* Can't mix delete_batch and delete_and_lookup_batch because + * they have different semantics in relation to the keys + * argument. However, delete_batch utilize map_delete_elem, + * so we actually test it in non-batch scenario */ + delete_and_lookup_batch(map_fd, keys, n); + } else { + /* Intentionally mix delete and lookup_and_delete so we can test both */ + for (i = 0; i < n; i++) { + void *keyp = keys + i*MAX_MAP_KEY_SIZE; + + if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) { + ret = bpf_map_delete_elem(map_fd, keyp); + CHECK(ret < 0, "bpf_map_delete_elem", + "error: key %u: %s\n", i, strerror(errno)); + } else { + ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val); + CHECK(ret < 0, "bpf_map_lookup_and_delete_elem", + "error: key %u: %s\n", i, strerror(errno)); + } + } + } + + free(keys); +} + +static bool is_lru(__u32 map_type) +{ + return map_type == BPF_MAP_TYPE_LRU_HASH || + map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH; +} + +struct upsert_opts { + __u32 map_type; + int map_fd; + __u32 n; +}; + +static int create_small_hash(void) +{ + int map_fd; + + map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL); + CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", + strerror(errno), "small"); + + return map_fd; +} + +static void *patch_map_thread(void *arg) +{ + struct upsert_opts *opts = arg; + int val; + int ret; + int i; + + for (i = 0; i < opts->n; i++) { + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + val = create_small_hash(); + else + val = rand(); + ret = bpf_map_update_elem(opts->map_fd, &i, &val, 0); + CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno)); + + if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) + close(val); + } + return NULL; +} + +static void upsert_elements(struct upsert_opts *opts) +{ + pthread_t threads[N_THREADS]; + int ret; + int i; + + for (i = 0; i < ARRAY_SIZE(threads); i++) { + ret = pthread_create(&i[threads], NULL, patch_map_thread, opts); + CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret)); + } + + for (i = 0; i < ARRAY_SIZE(threads); i++) { + ret = pthread_join(i[threads], NULL); + CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret)); + } +} + +static __u32 read_cur_elements(int iter_fd) +{ + char buf[64]; + ssize_t n; + __u32 ret; + + n = read(iter_fd, buf, sizeof(buf)-1); + CHECK(n <= 0, "read", "error: %s\n", strerror(errno)); + buf[n] = '\0'; + + errno = 0; + ret = (__u32)strtol(buf, NULL, 10); + CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno)); + + return ret; +} + +static __u32 get_cur_elements(int map_id) +{ + struct map_percpu_stats *skel; + struct bpf_link *link; + __u32 n_elements; + int iter_fd; + int ret; + + skel = map_percpu_stats__open(); + CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno)); + + skel->bss->target_id = map_id; + + ret = map_percpu_stats__load(skel); + CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno)); + + link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL); + CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno)); + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno)); + + n_elements = read_cur_elements(iter_fd); + + close(iter_fd); + bpf_link__destroy(link); + map_percpu_stats__destroy(skel); + + return n_elements; +} + +static void check_expected_number_elements(__u32 n_inserted, int map_fd, + struct bpf_map_info *info) +{ + __u32 n_real; + __u32 n_iter; + + /* Count the current number of elements in the map by iterating through + * all the map keys via bpf_get_next_key + */ + n_real = map_count_elements(info->type, map_fd); + + /* The "real" number of elements should be the same as the inserted + * number of elements in all cases except LRU maps, where some elements + * may have been evicted + */ + if (n_inserted == 0 || !is_lru(info->type)) + CHECK(n_inserted != n_real, "map_count_elements", + "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted); + + /* Count the current number of elements in the map using an iterator */ + n_iter = get_cur_elements(info->id); + + /* Both counts should be the same, as all updates are over */ + CHECK(n_iter != n_real, "get_cur_elements", + "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n", + n_iter, n_real, map_type_to_s(info->type), info->map_flags); +} + +static void __test(int map_fd) +{ + struct upsert_opts opts = { + .map_fd = map_fd, + }; + struct bpf_map_info info; + + map_info(map_fd, &info); + opts.map_type = info.type; + opts.n = info.max_entries; + + /* Reduce the number of elements we are updating such that we don't + * bump into -E2BIG from non-preallocated hash maps, but still will + * have some evictions for LRU maps */ + if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS) + opts.n -= 512; + else + opts.n /= 2; + + /* + * Upsert keys [0, n) under some competition: with random values from + * N_THREADS threads. Check values, then delete all elements and check + * values again. + */ + upsert_elements(&opts); + check_expected_number_elements(opts.n, map_fd, &info); + delete_all_elements(info.type, map_fd, !BATCH); + check_expected_number_elements(0, map_fd, &info); + + /* Now do the same, but using batch delete operations */ + upsert_elements(&opts); + check_expected_number_elements(opts.n, map_fd, &info); + delete_all_elements(info.type, map_fd, BATCH); + check_expected_number_elements(0, map_fd, &info); + + close(map_fd); +} + +static int map_create_opts(__u32 type, const char *name, + struct bpf_map_create_opts *map_opts, + __u32 key_size, __u32 val_size) +{ + int max_entries; + int map_fd; + + if (type == BPF_MAP_TYPE_HASH_OF_MAPS) + max_entries = MAX_ENTRIES_HASH_OF_MAPS; + else + max_entries = MAX_ENTRIES; + + map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts); + CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n", + strerror(errno), name); + + return map_fd; +} + +static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts) +{ + return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int)); +} + +static int create_hash(void) +{ + struct bpf_map_create_opts map_opts = { + .sz = sizeof(map_opts), + .map_flags = BPF_F_NO_PREALLOC, + }; + + return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts); +} + +static int create_percpu_hash(void) +{ + struct bpf_map_create_opts map_opts = { + .sz = sizeof(map_opts), + .map_flags = BPF_F_NO_PREALLOC, + }; + + return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts); +} + +static int create_hash_prealloc(void) +{ + return map_create(BPF_MAP_TYPE_HASH, "hash", NULL); +} + +static int create_percpu_hash_prealloc(void) +{ + return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL); +} + +static int create_lru_hash(__u32 type, __u32 map_flags) +{ + struct bpf_map_create_opts map_opts = { + .sz = sizeof(map_opts), + .map_flags = map_flags, + }; + + return map_create(type, "lru_hash", &map_opts); +} + +static int create_hash_of_maps(void) +{ + struct bpf_map_create_opts map_opts = { + .sz = sizeof(map_opts), + .map_flags = BPF_F_NO_PREALLOC, + .inner_map_fd = create_small_hash(), + }; + int ret; + + ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps", + &map_opts, sizeof(int), sizeof(int)); + close(map_opts.inner_map_fd); + return ret; +} + +static void map_percpu_stats_hash(void) +{ + __test(create_hash()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_hash(void) +{ + __test(create_percpu_hash()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_hash_prealloc(void) +{ + __test(create_hash_prealloc()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_hash_prealloc(void) +{ + __test(create_percpu_hash_prealloc()); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_lru_hash(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_lru_hash_no_common(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_lru_hash(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_percpu_lru_hash_no_common(void) +{ + __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU)); + printf("test_%s:PASS\n", __func__); +} + +static void map_percpu_stats_hash_of_maps(void) +{ + __test(create_hash_of_maps()); + printf("test_%s:PASS\n", __func__); +} + +void test_map_percpu_stats(void) +{ + map_percpu_stats_hash(); + map_percpu_stats_percpu_hash(); + map_percpu_stats_hash_prealloc(); + map_percpu_stats_percpu_hash_prealloc(); + map_percpu_stats_lru_hash(); + map_percpu_stats_lru_hash_no_common(); + map_percpu_stats_percpu_lru_hash(); + map_percpu_stats_percpu_lru_hash_no_common(); + map_percpu_stats_hash_of_maps(); +} diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 596caa176582..da72a3a66230 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -270,14 +270,23 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) opts = &default_opts; optlen = sizeof(type); - if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { - log_err("getsockopt(SOL_TYPE)"); - return -1; + + if (opts->type) { + type = opts->type; + } else { + if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { + log_err("getsockopt(SOL_TYPE)"); + return -1; + } } - if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { - log_err("getsockopt(SOL_PROTOCOL)"); - return -1; + if (opts->proto) { + protocol = opts->proto; + } else { + if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) { + log_err("getsockopt(SOL_PROTOCOL)"); + return -1; + } } addrlen = sizeof(addr); @@ -301,8 +310,9 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) strlen(opts->cc) + 1)) goto error_close; - if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) - goto error_close; + if (!opts->noconnect) + if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) + goto error_close; return fd; @@ -423,7 +433,33 @@ fail: void close_netns(struct nstoken *token) { + if (!token) + return; + ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns"); close(token->orig_netns_fd); free(token); } + +int get_socket_local_port(int sock_fd) +{ + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + int err; + + err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen); + if (err < 0) + return err; + + if (addr.ss_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&addr; + + return sin->sin_port; + } else if (addr.ss_family == AF_INET6) { + struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr; + + return sin->sin6_port; + } + + return -1; +} diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index f882c691b790..5eccc67d1a99 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -21,6 +21,9 @@ struct network_helper_opts { const char *cc; int timeout_ms; bool must_fail; + bool noconnect; + int type; + int proto; }; /* ipv4 test vector */ @@ -56,6 +59,7 @@ int fastopen_connect(int server_fd, const char *data, unsigned int data_len, int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); char *ping_command(int family); +int get_socket_local_port(int sock_fd); struct nstoken; /** diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c index b17bfa0e0aac..bb143de68875 100644 --- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c +++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c @@ -96,12 +96,80 @@ static void test_parse_test_list(void) goto error; ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name"); ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name"); + free_test_filter_set(&set); + + ASSERT_OK(parse_test_list("t/subtest1,t/subtest2", &set, true), + "parsing"); + if (!ASSERT_EQ(set.cnt, 1, "count of test filters")) + goto error; + if (!ASSERT_OK_PTR(set.tests, "test filters initialized")) + goto error; + if (!ASSERT_EQ(set.tests[0].subtest_cnt, 2, "subtest filters count")) + goto error; + ASSERT_OK(strcmp("t", set.tests[0].name), "test name"); + ASSERT_OK(strcmp("subtest1", set.tests[0].subtests[0]), "subtest name"); + ASSERT_OK(strcmp("subtest2", set.tests[0].subtests[1]), "subtest name"); error: free_test_filter_set(&set); } +static void test_parse_test_list_file(void) +{ + struct test_filter_set set; + char tmpfile[80]; + FILE *fp; + int fd; + + snprintf(tmpfile, sizeof(tmpfile), "/tmp/bpf_arg_parsing_test.XXXXXX"); + fd = mkstemp(tmpfile); + if (!ASSERT_GE(fd, 0, "create tmp")) + return; + + fp = fdopen(fd, "w"); + if (!ASSERT_NEQ(fp, NULL, "fdopen tmp")) { + close(fd); + goto out_remove; + } + + fprintf(fp, "# comment\n"); + fprintf(fp, " test_with_spaces \n"); + fprintf(fp, "testA/subtest # comment\n"); + fprintf(fp, "testB#comment with no space\n"); + fprintf(fp, "testB # duplicate\n"); + fprintf(fp, "testA/subtest # subtest duplicate\n"); + fprintf(fp, "testA/subtest2\n"); + fprintf(fp, "testC_no_eof_newline"); + fflush(fp); + + if (!ASSERT_OK(ferror(fp), "prepare tmp")) + goto out_fclose; + + init_test_filter_set(&set); + + ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"); + + ASSERT_EQ(set.cnt, 4, "test count"); + ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name"); + ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count"); + ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name"); + ASSERT_EQ(set.tests[1].subtest_cnt, 2, "test 1 subtest count"); + ASSERT_OK(strcmp("subtest", set.tests[1].subtests[0]), "test 1 subtest 0"); + ASSERT_OK(strcmp("subtest2", set.tests[1].subtests[1]), "test 1 subtest 1"); + ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name"); + ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name"); + + free_test_filter_set(&set); + +out_fclose: + fclose(fp); +out_remove: + remove(tmpfile); +} + void test_arg_parsing(void) { if (test__start_subtest("test_parse_test_list")) test_parse_test_list(); + if (test__start_subtest("test_parse_test_list_file")) + test_parse_test_list_file(); } diff --git a/tools/testing/selftests/bpf/prog_tests/assign_reuse.c b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c new file mode 100644 index 000000000000..989ee4d9785b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c @@ -0,0 +1,199 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <uapi/linux/if_link.h> +#include <test_progs.h> + +#include <netinet/tcp.h> +#include <netinet/udp.h> + +#include "network_helpers.h" +#include "test_assign_reuse.skel.h" + +#define NS_TEST "assign_reuse" +#define LOOPBACK 1 +#define PORT 4443 + +static int attach_reuseport(int sock_fd, int prog_fd) +{ + return setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, + &prog_fd, sizeof(prog_fd)); +} + +static __u64 cookie(int fd) +{ + __u64 cookie = 0; + socklen_t cookie_len = sizeof(cookie); + int ret; + + ret = getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len); + ASSERT_OK(ret, "cookie"); + ASSERT_GT(cookie, 0, "cookie_invalid"); + + return cookie; +} + +static int echo_test_udp(int fd_sv) +{ + struct sockaddr_storage addr = {}; + socklen_t len = sizeof(addr); + char buff[1] = {}; + int fd_cl = -1, ret; + + fd_cl = connect_to_fd(fd_sv, 100); + ASSERT_GT(fd_cl, 0, "create_client"); + ASSERT_EQ(getsockname(fd_cl, (void *)&addr, &len), 0, "getsockname"); + + ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client"); + + ret = recv(fd_sv, buff, sizeof(buff), 0); + if (ret < 0) { + close(fd_cl); + return errno; + } + + ASSERT_EQ(ret, 1, "recv_server"); + ASSERT_EQ(sendto(fd_sv, buff, sizeof(buff), 0, (void *)&addr, len), 1, "send_server"); + ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client"); + close(fd_cl); + return 0; +} + +static int echo_test_tcp(int fd_sv) +{ + char buff[1] = {}; + int fd_cl = -1, fd_sv_cl = -1; + + fd_cl = connect_to_fd(fd_sv, 100); + if (fd_cl < 0) + return errno; + + fd_sv_cl = accept(fd_sv, NULL, NULL); + ASSERT_GE(fd_sv_cl, 0, "accept_fd"); + + ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client"); + ASSERT_EQ(recv(fd_sv_cl, buff, sizeof(buff), 0), 1, "recv_server"); + ASSERT_EQ(send(fd_sv_cl, buff, sizeof(buff), 0), 1, "send_server"); + ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client"); + close(fd_sv_cl); + close(fd_cl); + return 0; +} + +void run_assign_reuse(int family, int sotype, const char *ip, __u16 port) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .ifindex = LOOPBACK, + .attach_point = BPF_TC_INGRESS, + ); + DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts, + .handle = 1, + .priority = 1, + ); + bool hook_created = false, tc_attached = false; + int ret, fd_tc, fd_accept, fd_drop, fd_map; + int *fd_sv = NULL; + __u64 fd_val; + struct test_assign_reuse *skel; + const int zero = 0; + + skel = test_assign_reuse__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + skel->rodata->dest_port = port; + + ret = test_assign_reuse__load(skel); + if (!ASSERT_OK(ret, "skel_load")) + goto cleanup; + + ASSERT_EQ(skel->bss->sk_cookie_seen, 0, "cookie_init"); + + fd_tc = bpf_program__fd(skel->progs.tc_main); + fd_accept = bpf_program__fd(skel->progs.reuse_accept); + fd_drop = bpf_program__fd(skel->progs.reuse_drop); + fd_map = bpf_map__fd(skel->maps.sk_map); + + fd_sv = start_reuseport_server(family, sotype, ip, port, 100, 1); + if (!ASSERT_NEQ(fd_sv, NULL, "start_reuseport_server")) + goto cleanup; + + ret = attach_reuseport(*fd_sv, fd_drop); + if (!ASSERT_OK(ret, "attach_reuseport")) + goto cleanup; + + fd_val = *fd_sv; + ret = bpf_map_update_elem(fd_map, &zero, &fd_val, BPF_NOEXIST); + if (!ASSERT_OK(ret, "bpf_sk_map")) + goto cleanup; + + ret = bpf_tc_hook_create(&tc_hook); + if (ret == 0) + hook_created = true; + ret = ret == -EEXIST ? 0 : ret; + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = fd_tc; + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto cleanup; + tc_attached = true; + + if (sotype == SOCK_STREAM) + ASSERT_EQ(echo_test_tcp(*fd_sv), ECONNREFUSED, "drop_tcp"); + else + ASSERT_EQ(echo_test_udp(*fd_sv), EAGAIN, "drop_udp"); + ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once"); + + skel->bss->sk_cookie_seen = 0; + skel->bss->reuseport_executed = 0; + ASSERT_OK(attach_reuseport(*fd_sv, fd_accept), "attach_reuseport(accept)"); + + if (sotype == SOCK_STREAM) + ASSERT_EQ(echo_test_tcp(*fd_sv), 0, "echo_tcp"); + else + ASSERT_EQ(echo_test_udp(*fd_sv), 0, "echo_udp"); + + ASSERT_EQ(skel->bss->sk_cookie_seen, cookie(*fd_sv), + "cookie_mismatch"); + ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once"); +cleanup: + if (tc_attached) { + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + ret = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(ret, "bpf_tc_detach"); + } + if (hook_created) { + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + } + test_assign_reuse__destroy(skel); + free_fds(fd_sv, 1); +} + +void test_assign_reuse(void) +{ + struct nstoken *tok = NULL; + + SYS(out, "ip netns add %s", NS_TEST); + SYS(cleanup, "ip -net %s link set dev lo up", NS_TEST); + + tok = open_netns(NS_TEST); + if (!ASSERT_OK_PTR(tok, "netns token")) + return; + + if (test__start_subtest("tcpv4")) + run_assign_reuse(AF_INET, SOCK_STREAM, "127.0.0.1", PORT); + if (test__start_subtest("tcpv6")) + run_assign_reuse(AF_INET6, SOCK_STREAM, "::1", PORT); + if (test__start_subtest("udpv4")) + run_assign_reuse(AF_INET, SOCK_DGRAM, "127.0.0.1", PORT); + if (test__start_subtest("udpv6")) + run_assign_reuse(AF_INET6, SOCK_DGRAM, "::1", PORT); + +cleanup: + close_netns(tok); + SYS_NOFAIL("ip netns delete %s", NS_TEST); +out: + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c new file mode 100644 index 000000000000..118abc29b236 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +#include "async_stack_depth.skel.h" + +void test_async_stack_depth(void) +{ + RUN_TESTS(async_stack_depth); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 26b2d1bffdfd..1454cebc262b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -11,6 +11,7 @@ #include <bpf/btf.h> #include "test_bpf_cookie.skel.h" #include "kprobe_multi.skel.h" +#include "uprobe_multi.skel.h" /* uprobe attach point */ static noinline void trigger_func(void) @@ -239,6 +240,81 @@ cleanup: bpf_link__destroy(link1); kprobe_multi__destroy(skel); } + +/* defined in prog_tests/uprobe_multi_test.c */ +void uprobe_multi_func_1(void); +void uprobe_multi_func_2(void); +void uprobe_multi_func_3(void); + +static void uprobe_multi_test_run(struct uprobe_multi *skel) +{ + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; + + skel->bss->pid = getpid(); + skel->bss->test_cookie = true; + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result"); + + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 1, "uretprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 1, "uretprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 1, "uretprobe_multi_func_3_result"); +} + +static void uprobe_multi_attach_api_subtest(void) +{ + struct bpf_link *link1 = NULL, *link2 = NULL; + struct uprobe_multi *skel = NULL; + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + __u64 cookies[3]; + + cookies[0] = 3; /* uprobe_multi_func_1 */ + cookies[1] = 1; /* uprobe_multi_func_2 */ + cookies[2] = 2; /* uprobe_multi_func_3 */ + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + opts.cookies = &cookies[0]; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi")) + goto cleanup; + + link1 = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1, + "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link1, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + cookies[0] = 2; /* uprobe_multi_func_1 */ + cookies[1] = 3; /* uprobe_multi_func_2 */ + cookies[2] = 1; /* uprobe_multi_func_3 */ + + opts.retprobe = true; + link2 = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, -1, + "/proc/self/exe", NULL, &opts); + if (!ASSERT_OK_PTR(link2, "bpf_program__attach_uprobe_multi_retprobe")) + goto cleanup; + + uprobe_multi_test_run(skel); + +cleanup: + bpf_link__destroy(link2); + bpf_link__destroy(link1); + uprobe_multi__destroy(skel); +} + static void uprobe_subtest(struct test_bpf_cookie *skel) { DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts); @@ -515,6 +591,8 @@ void test_bpf_cookie(void) kprobe_multi_attach_api_subtest(); if (test__start_subtest("uprobe")) uprobe_subtest(skel); + if (test__start_subtest("multi_uprobe_attach_api")) + uprobe_multi_attach_api_subtest(); if (test__start_subtest("tracepoint")) tp_subtest(skel); if (test__start_subtest("perf_event")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index a4d0cc9d3367..fe2c502e5089 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -11,6 +11,7 @@ #include "ksym_race.skel.h" #include "bpf_mod_race.skel.h" #include "kfunc_call_race.skel.h" +#include "testing_helpers.h" /* This test crafts a race between btf_try_get_module and do_init_module, and * checks whether btf_try_get_module handles the invocation for a well-formed @@ -44,35 +45,10 @@ enum bpf_test_state { static _Atomic enum bpf_test_state state = _TS_INVALID; -static int sys_finit_module(int fd, const char *param_values, int flags) -{ - return syscall(__NR_finit_module, fd, param_values, flags); -} - -static int sys_delete_module(const char *name, unsigned int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - -static int load_module(const char *mod) -{ - int ret, fd; - - fd = open("bpf_testmod.ko", O_RDONLY); - if (fd < 0) - return fd; - - ret = sys_finit_module(fd, "", 0); - close(fd); - if (ret < 0) - return ret; - return 0; -} - static void *load_module_thread(void *p) { - if (!ASSERT_NEQ(load_module("bpf_testmod.ko"), 0, "load_module_thread must fail")) + if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail")) atomic_store(&state, TS_MODULE_LOAD); else atomic_store(&state, TS_MODULE_LOAD_FAIL); @@ -124,7 +100,7 @@ static void test_bpf_mod_race_config(const struct test_config *config) if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration")) return; - if (!ASSERT_OK(sys_delete_module("bpf_testmod", 0), "unload bpf_testmod")) + if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod")) goto end_mmap; skel = bpf_mod_race__open(); @@ -202,8 +178,8 @@ end_destroy: bpf_mod_race__destroy(skel); ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu"); end_module: - sys_delete_module("bpf_testmod", 0); - ASSERT_OK(load_module("bpf_testmod.ko"), "restore bpf_testmod"); + unload_bpf_testmod(false); + ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod"); end_mmap: munmap(fault_addr, 4096); atomic_store(&state, _TS_INVALID); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c index c8ba4009e4ab..b30ff6b3b81a 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c @@ -123,12 +123,13 @@ static void test_bpf_nf_ct(int mode) ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting"); ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting"); end: - if (srv_client_fd != -1) - close(srv_client_fd); if (client_fd != -1) close(client_fd); + if (srv_client_fd != -1) + close(srv_client_fd); if (srv_fd != -1) close(srv_fd); + snprintf(cmd, sizeof(cmd), iptables, "-D"); system(cmd); test_bpf_nf__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c new file mode 100644 index 000000000000..ee0458a5ce78 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <test_progs.h> +#include <bpf/btf.h> +#include <fcntl.h> +#include <unistd.h> +#include <linux/unistd.h> +#include <linux/mount.h> +#include <sys/syscall.h> +#include "bpf/libbpf_internal.h" + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +__attribute__((unused)) +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned int ms_flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags); +} + +static void bpf_obj_pinning_detached(void) +{ + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + int fs_fd = -1, mnt_fd = -1; + int map_fd = -1, map_fd2 = -1; + int zero = 0, src_value, dst_value, err; + const char *map_name = "fsmount_map"; + + /* A bunch of below UAPI calls are constructed based on reading: + * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html + */ + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + if (!ASSERT_GE(fs_fd, 0, "fs_fd")) + goto cleanup; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (!ASSERT_OK(err, "fs_create")) + goto cleanup; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_GE(mnt_fd, 0, "mnt_fd")) + goto cleanup; + + /* If we wanted to expose detached mount in the file system, we'd do + * something like below. But the whole point is that we actually don't + * even have to expose BPF FS in the file system to be able to work + * (pin/get objects) with it. + * + * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH); + * if (!ASSERT_OK(err, "move_mount")) + * goto cleanup; + */ + + /* create BPF map to pin */ + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); + if (!ASSERT_GE(map_fd, 0, "map_fd")) + goto cleanup; + + /* pin BPF map into detached BPF FS through mnt_fd */ + pin_opts.file_flags = BPF_F_PATH_FD; + pin_opts.path_fd = mnt_fd; + err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts); + if (!ASSERT_OK(err, "map_pin")) + goto cleanup; + + /* get BPF map from detached BPF FS through mnt_fd */ + get_opts.file_flags = BPF_F_PATH_FD; + get_opts.path_fd = mnt_fd; + map_fd2 = bpf_obj_get_opts(map_name, &get_opts); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* update map through one FD */ + src_value = 0xcafebeef; + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq1"); + ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2"); + +cleanup: + if (map_fd >= 0) + ASSERT_OK(close(map_fd), "close_map_fd"); + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + if (fs_fd >= 0) + ASSERT_OK(close(fs_fd), "close_fs_fd"); + if (mnt_fd >= 0) + ASSERT_OK(close(mnt_fd), "close_mnt_fd"); +} + +enum path_kind +{ + PATH_STR_ABS, + PATH_STR_REL, + PATH_FD_REL, +}; + +static void validate_pin(int map_fd, const char *map_name, int src_value, + enum path_kind path_kind) +{ + LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts); + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; + const char *pin_path = NULL; + int zero = 0, dst_value, map_fd2, err; + + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); + old_cwd[0] = '\0'; + + switch (path_kind) { + case PATH_STR_ABS: + /* absolute path */ + pin_path = abs_path; + break; + case PATH_STR_REL: + /* cwd + relative path */ + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); + pin_path = map_name; + break; + case PATH_FD_REL: + /* dir fd + relative path */ + pin_opts.file_flags = BPF_F_PATH_FD; + pin_opts.path_fd = open("/sys/fs/bpf", O_PATH); + ASSERT_GE(pin_opts.path_fd, 0, "path_fd"); + pin_path = map_name; + break; + } + + /* pin BPF map using specified path definition */ + err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts); + ASSERT_OK(err, "obj_pin"); + + /* cleanup */ + if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0) + close(pin_opts.path_fd); + if (old_cwd[0]) + ASSERT_OK(chdir(old_cwd), "restore_cwd"); + + map_fd2 = bpf_obj_get(abs_path); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* update map through one FD */ + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq"); +cleanup: + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + unlink(abs_path); +} + +static void validate_get(int map_fd, const char *map_name, int src_value, + enum path_kind path_kind) +{ + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + char abs_path[PATH_MAX], old_cwd[PATH_MAX]; + const char *pin_path = NULL; + int zero = 0, dst_value, map_fd2, err; + + snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name); + /* pin BPF map using specified path definition */ + err = bpf_obj_pin(map_fd, abs_path); + if (!ASSERT_OK(err, "pin_map")) + return; + + old_cwd[0] = '\0'; + + switch (path_kind) { + case PATH_STR_ABS: + /* absolute path */ + pin_path = abs_path; + break; + case PATH_STR_REL: + /* cwd + relative path */ + ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd"); + ASSERT_OK(chdir("/sys/fs/bpf"), "chdir"); + pin_path = map_name; + break; + case PATH_FD_REL: + /* dir fd + relative path */ + get_opts.file_flags = BPF_F_PATH_FD; + get_opts.path_fd = open("/sys/fs/bpf", O_PATH); + ASSERT_GE(get_opts.path_fd, 0, "path_fd"); + pin_path = map_name; + break; + } + + map_fd2 = bpf_obj_get_opts(pin_path, &get_opts); + if (!ASSERT_GE(map_fd2, 0, "map_get")) + goto cleanup; + + /* cleanup */ + if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0) + close(get_opts.path_fd); + if (old_cwd[0]) + ASSERT_OK(chdir(old_cwd), "restore_cwd"); + + /* update map through one FD */ + err = bpf_map_update_elem(map_fd, &zero, &src_value, 0); + ASSERT_OK(err, "map_update"); + + /* check values written/read through different FDs do match */ + dst_value = 0; + err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value); + ASSERT_OK(err, "map_lookup"); + ASSERT_EQ(dst_value, src_value, "map_value_eq"); +cleanup: + if (map_fd2 >= 0) + ASSERT_OK(close(map_fd2), "close_map_fd2"); + unlink(abs_path); +} + +static void bpf_obj_pinning_mounted(enum path_kind path_kind) +{ + const char *map_name = "mounted_map"; + int map_fd; + + /* create BPF map to pin */ + map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL); + if (!ASSERT_GE(map_fd, 0, "map_fd")) + return; + + validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind); + validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind); + ASSERT_OK(close(map_fd), "close_map_fd"); +} + +void test_bpf_obj_pinning() +{ + if (test__start_subtest("detached")) + bpf_obj_pinning_detached(); + if (test__start_subtest("mounted-str-abs")) + bpf_obj_pinning_mounted(PATH_STR_ABS); + if (test__start_subtest("mounted-str-rel")) + bpf_obj_pinning_mounted(PATH_STR_REL); + if (test__start_subtest("mounted-fd-rel")) + bpf_obj_pinning_mounted(PATH_FD_REL); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index a53c254c6058..4aabeaa525d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -185,6 +185,8 @@ static void test_cubic(void) do_test("bpf_cubic", NULL); + ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called"); + bpf_link__destroy(link); bpf_cubic__destroy(cubic_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 210d643fda6c..4e0cdb593318 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3991,6 +3991,46 @@ static struct btf_raw_test raw_tests[] = { .err_str = "Invalid arg#1", }, { + .descr = "decl_tag test #18, decl_tag as the map key type", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 32), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0m1\0m2\0tag"), + .map_type = BPF_MAP_TYPE_HASH, + .map_name = "tag_type_check_btf", + .key_size = 8, + .value_size = 4, + .key_type_id = 3, + .value_type_id = 1, + .max_entries = 1, + .map_create_err = true, +}, +{ + .descr = "decl_tag test #19, decl_tag as the map value type", + .raw_types = { + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + BTF_STRUCT_ENC(0, 2, 8), /* [2] */ + BTF_MEMBER_ENC(NAME_TBD, 1, 0), + BTF_MEMBER_ENC(NAME_TBD, 1, 32), + BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */ + BTF_END_RAW, + }, + BTF_STR_SEC("\0m1\0m2\0tag"), + .map_type = BPF_MAP_TYPE_HASH, + .map_name = "tag_type_check_btf", + .key_size = 4, + .value_size = 8, + .key_type_id = 1, + .value_type_id = 3, + .max_entries = 1, + .map_create_err = true, +}, +{ .descr = "type_tag test #1", .raw_types = { BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c index 4d2fa99273d8..2bb5773d6f99 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c @@ -25,6 +25,8 @@ static void test_setsockopt_set(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, assert that * we actually get that error when we run setsockopt() */ @@ -59,6 +61,8 @@ static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, and one that gets the * previously set errno. Assert that we get the same errno back. */ @@ -100,6 +104,8 @@ static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that gets the previously set errno. * Assert that, without anything setting one, we get 0. */ @@ -134,6 +140,8 @@ static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that gets the previously set errno, and then * one that sets the errno to EUNATCH. Assert that the get does not * see EUNATCH set later, and does not prevent EUNATCH from being set. @@ -177,6 +185,8 @@ static void test_setsockopt_override(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN, * and then one that gets the exported errno. Assert both the syscall * and the helper sees the last set errno. @@ -224,6 +234,8 @@ static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that return a reject without setting errno * (legacy reject), and one that gets the errno. Assert that for * backward compatibility the syscall result in EPERM, and this @@ -268,6 +280,8 @@ static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach setsockopt that sets EUNATCH, then one that return a reject * without setting errno, and then one that gets the exported errno. * Assert both the syscall and the helper's errno are unaffected by @@ -319,6 +333,8 @@ static void test_getsockopt_get(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach getsockopt that gets previously set errno. Assert that the * error from kernel is in both ctx_retval_value and retval_value. */ @@ -359,6 +375,8 @@ static void test_getsockopt_override(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach getsockopt that sets retval to -EISCONN. Assert that this * overrides the value from kernel. */ @@ -396,6 +414,8 @@ static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd) if (!ASSERT_OK_PTR(obj, "skel-load")) return; + obj->bss->page_size = sysconf(_SC_PAGESIZE); + /* Attach getsockopt that sets retval to -EISCONN, and one that clears * ctx retval. Assert that the clearing ctx retval is synced to helper * and clears any errors both from kernel and BPF.. diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c new file mode 100644 index 000000000000..a1542faf7873 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Facebook */ +#include <test_progs.h> +#include <linux/in6.h> +#include <sys/socket.h> +#include <sched.h> +#include <unistd.h> +#include "cgroup_helpers.h" +#include "testing_helpers.h" +#include "cgroup_tcp_skb.skel.h" +#include "cgroup_tcp_skb.h" +#include "network_helpers.h" + +#define CGROUP_TCP_SKB_PATH "/test_cgroup_tcp_skb" + +static int install_filters(int cgroup_fd, + struct bpf_link **egress_link, + struct bpf_link **ingress_link, + struct bpf_program *egress_prog, + struct bpf_program *ingress_prog, + struct cgroup_tcp_skb *skel) +{ + /* Prepare filters */ + skel->bss->g_sock_state = 0; + skel->bss->g_unexpected = 0; + *egress_link = + bpf_program__attach_cgroup(egress_prog, + cgroup_fd); + if (!ASSERT_OK_PTR(egress_link, "egress_link")) + return -1; + *ingress_link = + bpf_program__attach_cgroup(ingress_prog, + cgroup_fd); + if (!ASSERT_OK_PTR(ingress_link, "ingress_link")) + return -1; + + return 0; +} + +static void uninstall_filters(struct bpf_link **egress_link, + struct bpf_link **ingress_link) +{ + bpf_link__destroy(*egress_link); + *egress_link = NULL; + bpf_link__destroy(*ingress_link); + *ingress_link = NULL; +} + +static int create_client_sock_v6(void) +{ + int fd; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd < 0) { + perror("socket"); + return -1; + } + + return fd; +} + +/* Connect to the server in a cgroup from the outside of the cgroup. */ +static int talk_to_cgroup(int *client_fd, int *listen_fd, int *service_fd, + struct cgroup_tcp_skb *skel) +{ + int err, cp; + char buf[5]; + int port; + + /* Create client & server socket */ + err = join_root_cgroup(); + if (!ASSERT_OK(err, "join_root_cgroup")) + return -1; + *client_fd = create_client_sock_v6(); + if (!ASSERT_GE(*client_fd, 0, "client_fd")) + return -1; + err = join_cgroup(CGROUP_TCP_SKB_PATH); + if (!ASSERT_OK(err, "join_cgroup")) + return -1; + *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(*listen_fd, 0, "listen_fd")) + return -1; + port = get_socket_local_port(*listen_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + skel->bss->g_sock_port = ntohs(port); + + /* Connect client to server */ + err = connect_fd_to_fd(*client_fd, *listen_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) + return -1; + *service_fd = accept(*listen_fd, NULL, NULL); + if (!ASSERT_GE(*service_fd, 0, "service_fd")) + return -1; + err = join_root_cgroup(); + if (!ASSERT_OK(err, "join_root_cgroup")) + return -1; + cp = write(*client_fd, "hello", 5); + if (!ASSERT_EQ(cp, 5, "write")) + return -1; + cp = read(*service_fd, buf, 5); + if (!ASSERT_EQ(cp, 5, "read")) + return -1; + + return 0; +} + +/* Connect to the server out of a cgroup from inside the cgroup. */ +static int talk_to_outside(int *client_fd, int *listen_fd, int *service_fd, + struct cgroup_tcp_skb *skel) + +{ + int err, cp; + char buf[5]; + int port; + + /* Create client & server socket */ + err = join_root_cgroup(); + if (!ASSERT_OK(err, "join_root_cgroup")) + return -1; + *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(*listen_fd, 0, "listen_fd")) + return -1; + err = join_cgroup(CGROUP_TCP_SKB_PATH); + if (!ASSERT_OK(err, "join_cgroup")) + return -1; + *client_fd = create_client_sock_v6(); + if (!ASSERT_GE(*client_fd, 0, "client_fd")) + return -1; + err = join_root_cgroup(); + if (!ASSERT_OK(err, "join_root_cgroup")) + return -1; + port = get_socket_local_port(*listen_fd); + if (!ASSERT_GE(port, 0, "get_socket_local_port")) + return -1; + skel->bss->g_sock_port = ntohs(port); + + /* Connect client to server */ + err = connect_fd_to_fd(*client_fd, *listen_fd, 0); + if (!ASSERT_OK(err, "connect_fd_to_fd")) + return -1; + *service_fd = accept(*listen_fd, NULL, NULL); + if (!ASSERT_GE(*service_fd, 0, "service_fd")) + return -1; + cp = write(*client_fd, "hello", 5); + if (!ASSERT_EQ(cp, 5, "write")) + return -1; + cp = read(*service_fd, buf, 5); + if (!ASSERT_EQ(cp, 5, "read")) + return -1; + + return 0; +} + +static int close_connection(int *closing_fd, int *peer_fd, int *listen_fd, + struct cgroup_tcp_skb *skel) +{ + __u32 saved_packet_count = 0; + int err; + int i; + + /* Wait for ACKs to be sent */ + saved_packet_count = skel->bss->g_packet_count; + usleep(100000); /* 0.1s */ + for (i = 0; + skel->bss->g_packet_count != saved_packet_count && i < 10; + i++) { + saved_packet_count = skel->bss->g_packet_count; + usleep(100000); /* 0.1s */ + } + if (!ASSERT_EQ(skel->bss->g_packet_count, saved_packet_count, + "packet_count")) + return -1; + + skel->bss->g_packet_count = 0; + saved_packet_count = 0; + + /* Half shutdown to make sure the closing socket having a chance to + * receive a FIN from the peer. + */ + err = shutdown(*closing_fd, SHUT_WR); + if (!ASSERT_OK(err, "shutdown closing_fd")) + return -1; + + /* Wait for FIN and the ACK of the FIN to be observed */ + for (i = 0; + skel->bss->g_packet_count < saved_packet_count + 2 && i < 10; + i++) + usleep(100000); /* 0.1s */ + if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2, + "packet_count")) + return -1; + + saved_packet_count = skel->bss->g_packet_count; + + /* Fully shutdown the connection */ + err = close(*peer_fd); + if (!ASSERT_OK(err, "close peer_fd")) + return -1; + *peer_fd = -1; + + /* Wait for FIN and the ACK of the FIN to be observed */ + for (i = 0; + skel->bss->g_packet_count < saved_packet_count + 2 && i < 10; + i++) + usleep(100000); /* 0.1s */ + if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2, + "packet_count")) + return -1; + + err = close(*closing_fd); + if (!ASSERT_OK(err, "close closing_fd")) + return -1; + *closing_fd = -1; + + close(*listen_fd); + *listen_fd = -1; + + return 0; +} + +/* This test case includes four scenarios: + * 1. Connect to the server from outside the cgroup and close the connection + * from outside the cgroup. + * 2. Connect to the server from outside the cgroup and close the connection + * from inside the cgroup. + * 3. Connect to the server from inside the cgroup and close the connection + * from outside the cgroup. + * 4. Connect to the server from inside the cgroup and close the connection + * from inside the cgroup. + * + * The test case is to verify that cgroup_skb/{egress,ingress} filters + * receive expected packets including SYN, SYN/ACK, ACK, FIN, and FIN/ACK. + */ +void test_cgroup_tcp_skb(void) +{ + struct bpf_link *ingress_link = NULL; + struct bpf_link *egress_link = NULL; + int client_fd = -1, listen_fd = -1; + struct cgroup_tcp_skb *skel; + int service_fd = -1; + int cgroup_fd = -1; + int err; + + skel = cgroup_tcp_skb__open_and_load(); + if (!ASSERT_OK(!skel, "skel_open_load")) + return; + + err = setup_cgroup_environment(); + if (!ASSERT_OK(err, "setup_cgroup_environment")) + goto cleanup; + + cgroup_fd = create_and_get_cgroup(CGROUP_TCP_SKB_PATH); + if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) + goto cleanup; + + /* Scenario 1 */ + err = install_filters(cgroup_fd, &egress_link, &ingress_link, + skel->progs.server_egress, + skel->progs.server_ingress, + skel); + if (!ASSERT_OK(err, "install_filters")) + goto cleanup; + + err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel); + if (!ASSERT_OK(err, "talk_to_cgroup")) + goto cleanup; + + err = close_connection(&client_fd, &service_fd, &listen_fd, skel); + if (!ASSERT_OK(err, "close_connection")) + goto cleanup; + + ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected"); + ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state"); + + uninstall_filters(&egress_link, &ingress_link); + + /* Scenario 2 */ + err = install_filters(cgroup_fd, &egress_link, &ingress_link, + skel->progs.server_egress_srv, + skel->progs.server_ingress_srv, + skel); + + err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel); + if (!ASSERT_OK(err, "talk_to_cgroup")) + goto cleanup; + + err = close_connection(&service_fd, &client_fd, &listen_fd, skel); + if (!ASSERT_OK(err, "close_connection")) + goto cleanup; + + ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected"); + ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state"); + + uninstall_filters(&egress_link, &ingress_link); + + /* Scenario 3 */ + err = install_filters(cgroup_fd, &egress_link, &ingress_link, + skel->progs.client_egress_srv, + skel->progs.client_ingress_srv, + skel); + + err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel); + if (!ASSERT_OK(err, "talk_to_outside")) + goto cleanup; + + err = close_connection(&service_fd, &client_fd, &listen_fd, skel); + if (!ASSERT_OK(err, "close_connection")) + goto cleanup; + + ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected"); + ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state"); + + uninstall_filters(&egress_link, &ingress_link); + + /* Scenario 4 */ + err = install_filters(cgroup_fd, &egress_link, &ingress_link, + skel->progs.client_egress, + skel->progs.client_ingress, + skel); + + err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel); + if (!ASSERT_OK(err, "talk_to_outside")) + goto cleanup; + + err = close_connection(&client_fd, &service_fd, &listen_fd, skel); + if (!ASSERT_OK(err, "close_connection")) + goto cleanup; + + ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected"); + ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state"); + + uninstall_filters(&egress_link, &ingress_link); + +cleanup: + close(client_fd); + close(listen_fd); + close(service_fd); + close(cgroup_fd); + bpf_link__destroy(egress_link); + bpf_link__destroy(ingress_link); + cleanup_cgroup_environment(); + cgroup_tcp_skb__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c index 5338d2ea0460..2a9a30650350 100644 --- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c +++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c @@ -183,7 +183,7 @@ cleanup: void serial_test_check_mtu(void) { - __u32 mtu_lo; + int mtu_lo; if (test__start_subtest("bpf_check_mtu XDP-attach")) test_check_mtu_xdp_attach(); diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index cdf4acc18e4c..756ea8b590b6 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -10,6 +10,7 @@ static const char * const cpumask_success_testcases[] = { "test_set_clear_cpu", "test_setall_clear_cpu", "test_first_firstzero_cpu", + "test_firstand_nocpu", "test_test_and_set_clear", "test_and_or_xor", "test_intersects_subset", @@ -70,5 +71,6 @@ void test_cpumask(void) verify_success(cpumask_success_testcases[i]); } + RUN_TESTS(cpumask_success); RUN_TESTS(cpumask_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c index 911345c526e6..ccc768592e66 100644 --- a/tools/testing/selftests/bpf/prog_tests/d_path.c +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -12,6 +12,17 @@ #include "test_d_path_check_rdonly_mem.skel.h" #include "test_d_path_check_types.skel.h" +/* sys_close_range is not around for long time, so let's + * make sure we can call it on systems with older glibc + */ +#ifndef __NR_close_range +#ifdef __alpha__ +#define __NR_close_range 546 +#else +#define __NR_close_range 436 +#endif +#endif + static int duration; static struct { @@ -90,7 +101,11 @@ static int trigger_fstat_events(pid_t pid) fstat(indicatorfd, &fileStat); out_close: - /* triggers filp_close */ + /* sys_close no longer triggers filp_close, but we can + * call sys_close_range instead which still does + */ +#define close(fd) syscall(__NR_close_range, fd, fd, 0) + close(pipefd[0]); close(pipefd[1]); close(sockfd); @@ -98,6 +113,8 @@ out_close: close(devfd); close(localfd); close(indicatorfd); + +#undef close return ret; } diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index d176c34a7d2e..7cfac53c0d58 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -20,6 +20,14 @@ static struct { {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_adjust", SETUP_SYSCALL_SLEEP}, + {"test_adjust_err", SETUP_SYSCALL_SLEEP}, + {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_is_null", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_is_rdonly", SETUP_SKB_PROG}, + {"test_dynptr_clone", SETUP_SKB_PROG}, + {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, + {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, }; static void verify_success(const char *prog_name, enum test_setup_type setup_type) diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c index 3b77d8a422db..261228eb68e8 100644 --- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c +++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c @@ -24,6 +24,7 @@ void test_empty_skb(void) int *ifindex; int err; int ret; + int lwt_egress_ret; /* expected retval at lwt/egress */ bool success_on_tc; } tests[] = { /* Empty packets are always rejected. */ @@ -57,6 +58,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &veth_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, .success_on_tc = true, }, { @@ -70,6 +72,7 @@ void test_empty_skb(void) .data_size_in = sizeof(eth_hlen), .ifindex = &ipip_ifindex, .ret = -ERANGE, + .lwt_egress_ret = -ERANGE, }, /* ETH_HLEN+1-sized packet should be redirected. */ @@ -79,6 +82,7 @@ void test_empty_skb(void) .data_in = eth_hlen_pp, .data_size_in = sizeof(eth_hlen_pp), .ifindex = &veth_ifindex, + .lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */ }, { .msg = "ipip ETH_HLEN+1 packet ingress", @@ -108,8 +112,12 @@ void test_empty_skb(void) for (i = 0; i < ARRAY_SIZE(tests); i++) { bpf_object__for_each_program(prog, bpf_obj->obj) { - char buf[128]; + bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL; bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2); + int expected_ret; + char buf[128]; + + expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret; tattr.data_in = tests[i].data_in; tattr.data_size_in = tests[i].data_size_in; @@ -128,7 +136,7 @@ void test_empty_skb(void) if (at_tc && tests[i].success_on_tc) ASSERT_GE(bpf_obj->bss->ret, 0, buf); else - ASSERT_EQ(bpf_obj->bss->ret, tests[i].ret, buf); + ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf); } } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index c0d1d61d5f66..aee1bc77a17f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -2,8 +2,9 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> #include "fentry_test.lskel.h" +#include "fentry_many_args.skel.h" -static int fentry_test(struct fentry_test_lskel *fentry_skel) +static int fentry_test_common(struct fentry_test_lskel *fentry_skel) { int err, prog_fd, i; int link_fd; @@ -37,7 +38,7 @@ static int fentry_test(struct fentry_test_lskel *fentry_skel) return 0; } -void test_fentry_test(void) +static void fentry_test(void) { struct fentry_test_lskel *fentry_skel = NULL; int err; @@ -46,13 +47,47 @@ void test_fentry_test(void) if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load")) goto cleanup; - err = fentry_test(fentry_skel); + err = fentry_test_common(fentry_skel); if (!ASSERT_OK(err, "fentry_first_attach")) goto cleanup; - err = fentry_test(fentry_skel); + err = fentry_test_common(fentry_skel); ASSERT_OK(err, "fentry_second_attach"); cleanup: fentry_test_lskel__destroy(fentry_skel); } + +static void fentry_many_args(void) +{ + struct fentry_many_args *fentry_skel = NULL; + int err; + + fentry_skel = fentry_many_args__open_and_load(); + if (!ASSERT_OK_PTR(fentry_skel, "fentry_many_args_skel_load")) + goto cleanup; + + err = fentry_many_args__attach(fentry_skel); + if (!ASSERT_OK(err, "fentry_many_args_attach")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + + ASSERT_EQ(fentry_skel->bss->test1_result, 1, + "fentry_many_args_result1"); + ASSERT_EQ(fentry_skel->bss->test2_result, 1, + "fentry_many_args_result2"); + ASSERT_EQ(fentry_skel->bss->test3_result, 1, + "fentry_many_args_result3"); + +cleanup: + fentry_many_args__destroy(fentry_skel); +} + +void test_fentry_test(void) +{ + if (test__start_subtest("fentry")) + fentry_test(); + if (test__start_subtest("fentry_many_args")) + fentry_many_args(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c index 101b7343036b..1c13007e37dd 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c @@ -2,8 +2,9 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> #include "fexit_test.lskel.h" +#include "fexit_many_args.skel.h" -static int fexit_test(struct fexit_test_lskel *fexit_skel) +static int fexit_test_common(struct fexit_test_lskel *fexit_skel) { int err, prog_fd, i; int link_fd; @@ -37,7 +38,7 @@ static int fexit_test(struct fexit_test_lskel *fexit_skel) return 0; } -void test_fexit_test(void) +static void fexit_test(void) { struct fexit_test_lskel *fexit_skel = NULL; int err; @@ -46,13 +47,47 @@ void test_fexit_test(void) if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load")) goto cleanup; - err = fexit_test(fexit_skel); + err = fexit_test_common(fexit_skel); if (!ASSERT_OK(err, "fexit_first_attach")) goto cleanup; - err = fexit_test(fexit_skel); + err = fexit_test_common(fexit_skel); ASSERT_OK(err, "fexit_second_attach"); cleanup: fexit_test_lskel__destroy(fexit_skel); } + +static void fexit_many_args(void) +{ + struct fexit_many_args *fexit_skel = NULL; + int err; + + fexit_skel = fexit_many_args__open_and_load(); + if (!ASSERT_OK_PTR(fexit_skel, "fexit_many_args_skel_load")) + goto cleanup; + + err = fexit_many_args__attach(fexit_skel); + if (!ASSERT_OK(err, "fexit_many_args_attach")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(1), "trigger_read"); + + ASSERT_EQ(fexit_skel->bss->test1_result, 1, + "fexit_many_args_result1"); + ASSERT_EQ(fexit_skel->bss->test2_result, 1, + "fexit_many_args_result2"); + ASSERT_EQ(fexit_skel->bss->test3_result, 1, + "fexit_many_args_result3"); + +cleanup: + fexit_many_args__destroy(fexit_skel); +} + +void test_fexit_test(void) +{ + if (test__start_subtest("fexit")) + fexit_test(); + if (test__start_subtest("fexit_many_args")) + fexit_many_args(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c index a1e712105811..2fd05649bad1 100644 --- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <linux/rtnetlink.h> #include <sys/types.h> #include <net/if.h> @@ -15,14 +16,23 @@ #define IPV4_IFACE_ADDR "10.0.0.254" #define IPV4_NUD_FAILED_ADDR "10.0.0.1" #define IPV4_NUD_STALE_ADDR "10.0.0.2" +#define IPV4_TBID_ADDR "172.0.0.254" +#define IPV4_TBID_NET "172.0.0.0" +#define IPV4_TBID_DST "172.0.0.2" +#define IPV6_TBID_ADDR "fd00::FFFF" +#define IPV6_TBID_NET "fd00::" +#define IPV6_TBID_DST "fd00::2" #define DMAC "11:11:11:11:11:11" #define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, } +#define DMAC2 "01:01:01:01:01:01" +#define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, } struct fib_lookup_test { const char *desc; const char *daddr; int expected_ret; int lookup_flags; + __u32 tbid; __u8 dmac[6]; }; @@ -43,6 +53,22 @@ static const struct fib_lookup_test tests[] = { { .desc = "IPv4 skip neigh", .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, }, + { .desc = "IPv4 TBID lookup failure", + .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, + .tbid = RT_TABLE_MAIN, }, + { .desc = "IPv4 TBID lookup success", + .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, + .dmac = DMAC_INIT2, }, + { .desc = "IPv6 TBID lookup failure", + .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, + .tbid = RT_TABLE_MAIN, }, + { .desc = "IPv6 TBID lookup success", + .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS, + .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100, + .dmac = DMAC_INIT2, }, }; static int ifindex; @@ -53,6 +79,7 @@ static int setup_netns(void) SYS(fail, "ip link add veth1 type veth peer name veth2"); SYS(fail, "ip link set dev veth1 up"); + SYS(fail, "ip link set dev veth2 up"); err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)")) @@ -70,6 +97,17 @@ static int setup_netns(void) SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR); SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC); + /* Setup for tbid lookup tests */ + SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR); + SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET); + SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET); + SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2); + + SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR); + SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET); + SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET); + SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2); + err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1"); if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)")) goto fail; @@ -83,7 +121,7 @@ fail: return -1; } -static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) +static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test) { int ret; @@ -91,8 +129,9 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) params->l4_protocol = IPPROTO_TCP; params->ifindex = ifindex; + params->tbid = test->tbid; - if (inet_pton(AF_INET6, daddr, params->ipv6_dst) == 1) { + if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) { params->family = AF_INET6; ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src); if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)")) @@ -100,7 +139,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const char *daddr) return 0; } - ret = inet_pton(AF_INET, daddr, ¶ms->ipv4_dst); + ret = inet_pton(AF_INET, test->daddr, ¶ms->ipv4_dst); if (!ASSERT_EQ(ret, 1, "convert IP[46] address")) return -1; params->family = AF_INET; @@ -154,13 +193,12 @@ void test_fib_lookup(void) fib_params = &skel->bss->fib_params; for (i = 0; i < ARRAY_SIZE(tests); i++) { - printf("Testing %s\n", tests[i].desc); + printf("Testing %s ", tests[i].desc); - if (set_lookup_params(fib_params, tests[i].daddr)) + if (set_lookup_params(fib_params, &tests[i])) continue; skel->bss->fib_lookup_ret = -1; - skel->bss->lookup_flags = BPF_FIB_LOOKUP_OUTPUT | - tests[i].lookup_flags; + skel->bss->lookup_flags = tests[i].lookup_flags; err = bpf_prog_test_run_opts(prog_fd, &run_opts); if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) @@ -175,7 +213,14 @@ void test_fib_lookup(void) mac_str(expected, tests[i].dmac); mac_str(actual, fib_params->dmac); - printf("dmac expected %s actual %s\n", expected, actual); + printf("dmac expected %s actual %s ", expected, actual); + } + + // ensure tbid is zero'd out after fib lookup. + if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) { + if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0, + "expected fib_params.tbid to be zero")) + goto fail; } } diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c new file mode 100644 index 000000000000..9d768e083714 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include <string.h> +#include <linux/bpf.h> +#include <linux/limits.h> +#include <test_progs.h> +#include "trace_helpers.h" +#include "test_fill_link_info.skel.h" + +#define TP_CAT "sched" +#define TP_NAME "sched_switch" + +static const char *kmulti_syms[] = { + "bpf_fentry_test2", + "bpf_fentry_test1", + "bpf_fentry_test3", +}; +#define KMULTI_CNT ARRAY_SIZE(kmulti_syms) +static __u64 kmulti_addrs[KMULTI_CNT]; + +#define KPROBE_FUNC "bpf_fentry_test1" +static __u64 kprobe_addr; + +#define UPROBE_FILE "/proc/self/exe" +static ssize_t uprobe_offset; +/* uprobe attach point */ +static noinline void uprobe_func(void) +{ + asm volatile (""); +} + +static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, + ssize_t offset, ssize_t entry_offset) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + char buf[PATH_MAX]; + int err; + + memset(&info, 0, sizeof(info)); + buf[0] = '\0'; + +again: + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type")) + return -1; + if (!ASSERT_EQ(info.perf_event.type, type, "perf_type_match")) + return -1; + + switch (info.perf_event.type) { + case BPF_PERF_EVENT_KPROBE: + case BPF_PERF_EVENT_KRETPROBE: + ASSERT_EQ(info.perf_event.kprobe.offset, offset, "kprobe_offset"); + + /* In case kernel.kptr_restrict is not permitted or MAX_SYMS is reached */ + if (addr) + ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, + "kprobe_addr"); + + if (!info.perf_event.kprobe.func_name) { + ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); + info.perf_event.kprobe.func_name = ptr_to_u64(&buf); + info.perf_event.kprobe.name_len = sizeof(buf); + goto again; + } + + err = strncmp(u64_to_ptr(info.perf_event.kprobe.func_name), KPROBE_FUNC, + strlen(KPROBE_FUNC)); + ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); + break; + case BPF_PERF_EVENT_TRACEPOINT: + if (!info.perf_event.tracepoint.tp_name) { + ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); + info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); + info.perf_event.tracepoint.name_len = sizeof(buf); + goto again; + } + + err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME, + strlen(TP_NAME)); + ASSERT_EQ(err, 0, "cmp_tp_name"); + break; + case BPF_PERF_EVENT_UPROBE: + case BPF_PERF_EVENT_URETPROBE: + ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + + if (!info.perf_event.uprobe.file_name) { + ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); + info.perf_event.uprobe.file_name = ptr_to_u64(&buf); + info.perf_event.uprobe.name_len = sizeof(buf); + goto again; + } + + err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE, + strlen(UPROBE_FILE)); + ASSERT_EQ(err, 0, "cmp_file_name"); + break; + default: + err = -1; + break; + } + return err; +} + +static void kprobe_fill_invalid_user_buffer(int fd) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + int err; + + memset(&info, 0, sizeof(info)); + + info.perf_event.kprobe.func_name = 0x1; /* invalid address */ + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "invalid_buff_and_len"); + + info.perf_event.kprobe.name_len = 64; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "invalid_buff"); + + info.perf_event.kprobe.func_name = 0; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "invalid_len"); + + ASSERT_EQ(info.perf_event.kprobe.addr, 0, "func_addr"); + ASSERT_EQ(info.perf_event.kprobe.offset, 0, "func_offset"); + ASSERT_EQ(info.perf_event.type, 0, "type"); +} + +static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, + enum bpf_perf_event_type type, + bool invalid) +{ + DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, + .attach_mode = PROBE_ATTACH_MODE_LINK, + .retprobe = type == BPF_PERF_EVENT_KRETPROBE, + ); + ssize_t entry_offset = 0; + int link_fd, err; + + skel->links.kprobe_run = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, + KPROBE_FUNC, &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_run, "attach_kprobe")) + return; + + link_fd = bpf_link__fd(skel->links.kprobe_run); + if (!invalid) { + /* See also arch_adjust_kprobe_addr(). */ + if (skel->kconfig->CONFIG_X86_KERNEL_IBT) + entry_offset = 4; + err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset); + ASSERT_OK(err, "verify_perf_link_info"); + } else { + kprobe_fill_invalid_user_buffer(link_fd); + } + bpf_link__detach(skel->links.kprobe_run); +} + +static void test_tp_fill_link_info(struct test_fill_link_info *skel) +{ + int link_fd, err; + + skel->links.tp_run = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + if (!ASSERT_OK_PTR(skel->links.tp_run, "attach_tp")) + return; + + link_fd = bpf_link__fd(skel->links.tp_run); + err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0); + ASSERT_OK(err, "verify_perf_link_info"); + bpf_link__detach(skel->links.tp_run); +} + +static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, + enum bpf_perf_event_type type) +{ + int link_fd, err; + + skel->links.uprobe_run = bpf_program__attach_uprobe(skel->progs.uprobe_run, + type == BPF_PERF_EVENT_URETPROBE, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset); + if (!ASSERT_OK_PTR(skel->links.uprobe_run, "attach_uprobe")) + return; + + link_fd = bpf_link__fd(skel->links.uprobe_run); + err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0); + ASSERT_OK(err, "verify_perf_link_info"); + bpf_link__detach(skel->links.uprobe_run); +} + +static int verify_kmulti_link_info(int fd, bool retprobe) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 addrs[KMULTI_CNT]; + int flags, i, err; + + memset(&info, 0, sizeof(info)); + +again: + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_KPROBE_MULTI, "kmulti_type")) + return -1; + + ASSERT_EQ(info.kprobe_multi.count, KMULTI_CNT, "func_cnt"); + flags = info.kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN; + if (!retprobe) + ASSERT_EQ(flags, 0, "kmulti_flags"); + else + ASSERT_NEQ(flags, 0, "kretmulti_flags"); + + if (!info.kprobe_multi.addrs) { + info.kprobe_multi.addrs = ptr_to_u64(addrs); + goto again; + } + for (i = 0; i < KMULTI_CNT; i++) + ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + return 0; +} + +static void verify_kmulti_invalid_user_buffer(int fd) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + __u64 addrs[KMULTI_CNT]; + int err, i; + + memset(&info, 0, sizeof(info)); + + info.kprobe_multi.count = KMULTI_CNT; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "no_addr"); + + info.kprobe_multi.addrs = ptr_to_u64(addrs); + info.kprobe_multi.count = 0; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "no_cnt"); + + for (i = 0; i < KMULTI_CNT; i++) + addrs[i] = 0; + info.kprobe_multi.count = KMULTI_CNT - 1; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -ENOSPC, "smaller_cnt"); + for (i = 0; i < KMULTI_CNT - 1; i++) + ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + ASSERT_EQ(addrs[i], 0, "kmulti_addrs"); + + for (i = 0; i < KMULTI_CNT; i++) + addrs[i] = 0; + info.kprobe_multi.count = KMULTI_CNT + 1; + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, 0, "bigger_cnt"); + for (i = 0; i < KMULTI_CNT; i++) + ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + + info.kprobe_multi.count = KMULTI_CNT; + info.kprobe_multi.addrs = 0x1; /* invalid addr */ + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "invalid_buff"); +} + +static int symbols_cmp_r(const void *a, const void *b) +{ + const char **str_a = (const char **) a; + const char **str_b = (const char **) b; + + return strcmp(*str_a, *str_b); +} + +static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, + bool retprobe, bool invalid) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + int link_fd, err; + + opts.syms = kmulti_syms; + opts.cnt = KMULTI_CNT; + opts.retprobe = retprobe; + skel->links.kmulti_run = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, + NULL, &opts); + if (!ASSERT_OK_PTR(skel->links.kmulti_run, "attach_kprobe_multi")) + return; + + link_fd = bpf_link__fd(skel->links.kmulti_run); + if (!invalid) { + err = verify_kmulti_link_info(link_fd, retprobe); + ASSERT_OK(err, "verify_kmulti_link_info"); + } else { + verify_kmulti_invalid_user_buffer(link_fd); + } + bpf_link__detach(skel->links.kmulti_run); +} + +void test_fill_link_info(void) +{ + struct test_fill_link_info *skel; + int i; + + skel = test_fill_link_info__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* load kallsyms to compare the addr */ + if (!ASSERT_OK(load_kallsyms_refresh(), "load_kallsyms_refresh")) + goto cleanup; + + kprobe_addr = ksym_get_addr(KPROBE_FUNC); + if (test__start_subtest("kprobe_link_info")) + test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, false); + if (test__start_subtest("kretprobe_link_info")) + test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KRETPROBE, false); + if (test__start_subtest("kprobe_invalid_ubuff")) + test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true); + if (test__start_subtest("tracepoint_link_info")) + test_tp_fill_link_info(skel); + + uprobe_offset = get_uprobe_offset(&uprobe_func); + if (test__start_subtest("uprobe_link_info")) + test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_UPROBE); + if (test__start_subtest("uretprobe_link_info")) + test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_URETPROBE); + + qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r); + for (i = 0; i < KMULTI_CNT; i++) + kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]); + if (test__start_subtest("kprobe_multi_link_info")) + test_kprobe_multi_fill_link_info(skel, false, false); + if (test__start_subtest("kretprobe_multi_link_info")) + test_kprobe_multi_fill_link_info(skel, true, false); + if (test__start_subtest("kprobe_multi_invalid_ubuff")) + test_kprobe_multi_fill_link_info(skel, true, true); + +cleanup: + test_fill_link_info__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c index 28cf63963cb7..64a9c95d4acf 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c @@ -30,7 +30,9 @@ void test_get_func_args_test(void) prog_fd = bpf_program__fd(skel->progs.fmod_ret_test); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); - ASSERT_EQ(topts.retval, 1234, "test_run"); + + ASSERT_EQ(topts.retval >> 16, 1, "test_run"); + ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run"); ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c index fede8ef58b5b..c40242dfa8fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c @@ -1,6 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include "get_func_ip_test.skel.h" +#include "get_func_ip_uprobe_test.skel.h" + +static noinline void uprobe_trigger(void) +{ +} static void test_function_entry(void) { @@ -20,6 +25,8 @@ static void test_function_entry(void) if (!ASSERT_OK(err, "get_func_ip_test__attach")) goto cleanup; + skel->bss->uprobe_trigger = (unsigned long) uprobe_trigger; + prog_fd = bpf_program__fd(skel->progs.test1); err = bpf_prog_test_run_opts(prog_fd, &topts); ASSERT_OK(err, "test_run"); @@ -30,21 +37,31 @@ static void test_function_entry(void) ASSERT_OK(err, "test_run"); + uprobe_trigger(); + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); ASSERT_EQ(skel->bss->test2_result, 1, "test2_result"); ASSERT_EQ(skel->bss->test3_result, 1, "test3_result"); ASSERT_EQ(skel->bss->test4_result, 1, "test4_result"); ASSERT_EQ(skel->bss->test5_result, 1, "test5_result"); + ASSERT_EQ(skel->bss->test7_result, 1, "test7_result"); + ASSERT_EQ(skel->bss->test8_result, 1, "test8_result"); cleanup: get_func_ip_test__destroy(skel); } -/* test6 is x86_64 specific because of the instruction - * offset, disabling it for all other archs - */ #ifdef __x86_64__ -static void test_function_body(void) +extern void uprobe_trigger_body(void); +asm( +".globl uprobe_trigger_body\n" +".type uprobe_trigger_body, @function\n" +"uprobe_trigger_body:\n" +" nop\n" +" ret\n" +); + +static void test_function_body_kprobe(void) { struct get_func_ip_test *skel = NULL; LIBBPF_OPTS(bpf_test_run_opts, topts); @@ -56,6 +73,9 @@ static void test_function_body(void) if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open")) return; + /* test6 is x86_64 specific and is disabled by default, + * enable it for body test. + */ bpf_program__set_autoload(skel->progs.test6, true); err = get_func_ip_test__load(skel); @@ -79,6 +99,35 @@ cleanup: bpf_link__destroy(link6); get_func_ip_test__destroy(skel); } + +static void test_function_body_uprobe(void) +{ + struct get_func_ip_uprobe_test *skel = NULL; + int err; + + skel = get_func_ip_uprobe_test__open_and_load(); + if (!ASSERT_OK_PTR(skel, "get_func_ip_uprobe_test__open_and_load")) + return; + + err = get_func_ip_uprobe_test__attach(skel); + if (!ASSERT_OK(err, "get_func_ip_test__attach")) + goto cleanup; + + skel->bss->uprobe_trigger_body = (unsigned long) uprobe_trigger_body; + + uprobe_trigger_body(); + + ASSERT_EQ(skel->bss->test1_result, 1, "test1_result"); + +cleanup: + get_func_ip_uprobe_test__destroy(skel); +} + +static void test_function_body(void) +{ + test_function_body_kprobe(); + test_function_body_uprobe(); +} #else #define test_function_body() #endif diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c new file mode 100644 index 000000000000..56b5baef35c8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c @@ -0,0 +1,235 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <errno.h> +#include <sys/syscall.h> +#include <unistd.h> +#include "test_global_map_resize.skel.h" +#include "test_progs.h" + +static void run_prog_bss_array_sum(void) +{ + (void)syscall(__NR_getpid); +} + +static void run_prog_data_array_sum(void) +{ + (void)syscall(__NR_getuid); +} + +static void global_map_resize_bss_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2; + size_t array_len, actual_sz, new_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + goto teardown; + + /* set some initial value before resizing. + * it is expected this non-zero value will be preserved + * while resizing. + */ + skel->bss->array[0] = 1; + + /* resize map value and verify the new size */ + map = skel->maps.bss; + err = bpf_map__set_value_size(map, desired_sz); + if (!ASSERT_OK(err, "bpf_map__set_value_size")) + goto teardown; + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) + goto teardown; + + new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus(); + err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz); + ASSERT_OK(err, "percpu_arr_resize"); + + /* set the expected number of elements based on the resized array */ + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]); + if (!ASSERT_GT(array_len, 1, "array_len")) + goto teardown; + + skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz); + if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)")) + goto teardown; + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) + goto teardown; + + /* fill the newly resized array with ones, + * skipping the first element which was previously set + */ + for (int i = 1; i < array_len; i++) + skel->bss->array[i] = 1; + + /* set global const values before loading */ + skel->rodata->pid = getpid(); + skel->rodata->bss_array_len = array_len; + skel->rodata->data_array_len = 1; + + err = test_global_map_resize__load(skel); + if (!ASSERT_OK(err, "test_global_map_resize__load")) + goto teardown; + err = test_global_map_resize__attach(skel); + if (!ASSERT_OK(err, "test_global_map_resize__attach")) + goto teardown; + + /* run the bpf program which will sum the contents of the array. + * since the array was filled with ones,verify the sum equals array_len + */ + run_prog_bss_array_sum(); + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +static void global_map_resize_data_subtest(void) +{ + struct test_global_map_resize *skel; + struct bpf_map *map; + const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2; + size_t array_len, actual_sz, new_sz; + int err; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + goto teardown; + + /* set some initial value before resizing. + * it is expected this non-zero value will be preserved + * while resizing. + */ + skel->data_custom->my_array[0] = 1; + + /* resize map value and verify the new size */ + map = skel->maps.data_custom; + err = bpf_map__set_value_size(map, desired_sz); + if (!ASSERT_OK(err, "bpf_map__set_value_size")) + goto teardown; + if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize")) + goto teardown; + + new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus(); + err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz); + ASSERT_OK(err, "percpu_arr_resize"); + + /* set the expected number of elements based on the resized array */ + array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]); + if (!ASSERT_GT(array_len, 1, "array_len")) + goto teardown; + + skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz); + if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)")) + goto teardown; + if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)")) + goto teardown; + + /* fill the newly resized array with ones, + * skipping the first element which was previously set + */ + for (int i = 1; i < array_len; i++) + skel->data_custom->my_array[i] = 1; + + /* set global const values before loading */ + skel->rodata->pid = getpid(); + skel->rodata->bss_array_len = 1; + skel->rodata->data_array_len = array_len; + + err = test_global_map_resize__load(skel); + if (!ASSERT_OK(err, "test_global_map_resize__load")) + goto teardown; + err = test_global_map_resize__attach(skel); + if (!ASSERT_OK(err, "test_global_map_resize__attach")) + goto teardown; + + /* run the bpf program which will sum the contents of the array. + * since the array was filled with ones,verify the sum equals array_len + */ + run_prog_data_array_sum(); + if (!ASSERT_EQ(skel->bss->sum, array_len, "sum")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +static void global_map_resize_invalid_subtest(void) +{ + int err; + struct test_global_map_resize *skel; + struct bpf_map *map; + __u32 element_sz, desired_sz; + + skel = test_global_map_resize__open(); + if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open")) + return; + + /* attempt to resize a global datasec map to size + * which does NOT align with array + */ + map = skel->maps.data_custom; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf")) + goto teardown; + /* set desired size a fraction of element size beyond an aligned size */ + element_sz = sizeof(skel->data_custom->my_array[0]); + desired_sz = element_sz + element_sz / 2; + /* confirm desired size does NOT align with array */ + if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment")) + goto teardown; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val")) + goto teardown; + + /* attempt to resize a global datasec map whose only var is NOT an array */ + map = skel->maps.data_non_array; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf")) + goto teardown; + /* set desired size to arbitrary value */ + desired_sz = 1024; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val")) + goto teardown; + + /* attempt to resize a global datasec map + * whose last var is NOT an array + */ + map = skel->maps.data_array_not_last; + if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf")) + goto teardown; + /* set desired size to a multiple of element size */ + element_sz = sizeof(skel->data_array_not_last->my_array_first[0]); + desired_sz = element_sz * 8; + /* confirm desired size aligns with array */ + if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment")) + goto teardown; + err = bpf_map__set_value_size(map, desired_sz); + /* confirm resize is OK but BTF info is cleared */ + if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") || + !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") || + !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val")) + goto teardown; + +teardown: + test_global_map_resize__destroy(skel); +} + +void test_global_map_resize(void) +{ + if (test__start_subtest("global_map_resize_bss")) + global_map_resize_bss_subtest(); + + if (test__start_subtest("global_map_resize_data")) + global_map_resize_data_subtest(); + + if (test__start_subtest("global_map_resize_invalid")) + global_map_resize_invalid_subtest(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c new file mode 100644 index 000000000000..57c814f5f6a7 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <net/if.h> +#include <linux/netfilter.h> +#include <network_helpers.h> +#include "ip_check_defrag.skel.h" +#include "ip_check_defrag_frags.h" + +/* + * This selftest spins up a client and an echo server, each in their own + * network namespace. The client will send a fragmented message to the server. + * The prog attached to the server will shoot down any fragments. Thus, if + * the server is able to correctly echo back the message to the client, we will + * have verified that netfilter is reassembling packets for us. + * + * Topology: + * ========= + * NS0 | NS1 + * | + * client | server + * ---------- | ---------- + * | veth0 | --------- | veth1 | + * ---------- peer ---------- + * | + * | with bpf + */ + +#define NS0 "defrag_ns0" +#define NS1 "defrag_ns1" +#define VETH0 "veth0" +#define VETH1 "veth1" +#define VETH0_ADDR "172.16.1.100" +#define VETH0_ADDR6 "fc00::100" +/* The following constants must stay in sync with `generate_udp_fragments.py` */ +#define VETH1_ADDR "172.16.1.200" +#define VETH1_ADDR6 "fc00::200" +#define CLIENT_PORT 48878 +#define SERVER_PORT 48879 +#define MAGIC_MESSAGE "THIS IS THE ORIGINAL MESSAGE, PLEASE REASSEMBLE ME" + +static int setup_topology(bool ipv6) +{ + bool up; + int i; + + SYS(fail, "ip netns add " NS0); + SYS(fail, "ip netns add " NS1); + SYS(fail, "ip link add " VETH0 " netns " NS0 " type veth peer name " VETH1 " netns " NS1); + if (ipv6) { + SYS(fail, "ip -6 -net " NS0 " addr add " VETH0_ADDR6 "/64 dev " VETH0 " nodad"); + SYS(fail, "ip -6 -net " NS1 " addr add " VETH1_ADDR6 "/64 dev " VETH1 " nodad"); + } else { + SYS(fail, "ip -net " NS0 " addr add " VETH0_ADDR "/24 dev " VETH0); + SYS(fail, "ip -net " NS1 " addr add " VETH1_ADDR "/24 dev " VETH1); + } + SYS(fail, "ip -net " NS0 " link set dev " VETH0 " up"); + SYS(fail, "ip -net " NS1 " link set dev " VETH1 " up"); + + /* Wait for up to 5s for links to come up */ + for (i = 0; i < 5; ++i) { + if (ipv6) + up = !system("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6 " &>/dev/null"); + else + up = !system("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR " &>/dev/null"); + + if (up) + break; + } + + return 0; +fail: + return -1; +} + +static void cleanup_topology(void) +{ + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0); + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1); +} + +static int attach(struct ip_check_defrag *skel, bool ipv6) +{ + LIBBPF_OPTS(bpf_netfilter_opts, opts, + .pf = ipv6 ? NFPROTO_IPV6 : NFPROTO_IPV4, + .priority = 42, + .flags = BPF_F_NETFILTER_IP_DEFRAG); + struct nstoken *nstoken; + int err = -1; + + nstoken = open_netns(NS1); + + skel->links.defrag = bpf_program__attach_netfilter(skel->progs.defrag, &opts); + if (!ASSERT_OK_PTR(skel->links.defrag, "program attach")) + goto out; + + err = 0; +out: + close_netns(nstoken); + return err; +} + +static int send_frags(int client) +{ + struct sockaddr_storage saddr; + struct sockaddr *saddr_p; + socklen_t saddr_len; + int err; + + saddr_p = (struct sockaddr *)&saddr; + err = make_sockaddr(AF_INET, VETH1_ADDR, SERVER_PORT, &saddr, &saddr_len); + if (!ASSERT_OK(err, "make_sockaddr")) + return -1; + + err = sendto(client, frag_0, sizeof(frag_0), 0, saddr_p, saddr_len); + if (!ASSERT_GE(err, 0, "sendto frag_0")) + return -1; + + err = sendto(client, frag_1, sizeof(frag_1), 0, saddr_p, saddr_len); + if (!ASSERT_GE(err, 0, "sendto frag_1")) + return -1; + + err = sendto(client, frag_2, sizeof(frag_2), 0, saddr_p, saddr_len); + if (!ASSERT_GE(err, 0, "sendto frag_2")) + return -1; + + return 0; +} + +static int send_frags6(int client) +{ + struct sockaddr_storage saddr; + struct sockaddr *saddr_p; + socklen_t saddr_len; + int err; + + saddr_p = (struct sockaddr *)&saddr; + /* Port needs to be set to 0 for raw ipv6 socket for some reason */ + err = make_sockaddr(AF_INET6, VETH1_ADDR6, 0, &saddr, &saddr_len); + if (!ASSERT_OK(err, "make_sockaddr")) + return -1; + + err = sendto(client, frag6_0, sizeof(frag6_0), 0, saddr_p, saddr_len); + if (!ASSERT_GE(err, 0, "sendto frag6_0")) + return -1; + + err = sendto(client, frag6_1, sizeof(frag6_1), 0, saddr_p, saddr_len); + if (!ASSERT_GE(err, 0, "sendto frag6_1")) + return -1; + + err = sendto(client, frag6_2, sizeof(frag6_2), 0, saddr_p, saddr_len); + if (!ASSERT_GE(err, 0, "sendto frag6_2")) + return -1; + + return 0; +} + +void test_bpf_ip_check_defrag_ok(bool ipv6) +{ + struct network_helper_opts rx_opts = { + .timeout_ms = 1000, + .noconnect = true, + }; + struct network_helper_opts tx_ops = { + .timeout_ms = 1000, + .type = SOCK_RAW, + .proto = IPPROTO_RAW, + .noconnect = true, + }; + struct sockaddr_storage caddr; + struct ip_check_defrag *skel; + struct nstoken *nstoken; + int client_tx_fd = -1; + int client_rx_fd = -1; + socklen_t caddr_len; + int srv_fd = -1; + char buf[1024]; + int len, err; + + skel = ip_check_defrag__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + if (!ASSERT_OK(setup_topology(ipv6), "setup_topology")) + goto out; + + if (!ASSERT_OK(attach(skel, ipv6), "attach")) + goto out; + + /* Start server in ns1 */ + nstoken = open_netns(NS1); + if (!ASSERT_OK_PTR(nstoken, "setns ns1")) + goto out; + srv_fd = start_server(ipv6 ? AF_INET6 : AF_INET, SOCK_DGRAM, NULL, SERVER_PORT, 0); + close_netns(nstoken); + if (!ASSERT_GE(srv_fd, 0, "start_server")) + goto out; + + /* Open tx raw socket in ns0 */ + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns ns0")) + goto out; + client_tx_fd = connect_to_fd_opts(srv_fd, &tx_ops); + close_netns(nstoken); + if (!ASSERT_GE(client_tx_fd, 0, "connect_to_fd_opts")) + goto out; + + /* Open rx socket in ns0 */ + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns ns0")) + goto out; + client_rx_fd = connect_to_fd_opts(srv_fd, &rx_opts); + close_netns(nstoken); + if (!ASSERT_GE(client_rx_fd, 0, "connect_to_fd_opts")) + goto out; + + /* Bind rx socket to a premeditated port */ + memset(&caddr, 0, sizeof(caddr)); + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns ns0")) + goto out; + if (ipv6) { + struct sockaddr_in6 *c = (struct sockaddr_in6 *)&caddr; + + c->sin6_family = AF_INET6; + inet_pton(AF_INET6, VETH0_ADDR6, &c->sin6_addr); + c->sin6_port = htons(CLIENT_PORT); + err = bind(client_rx_fd, (struct sockaddr *)c, sizeof(*c)); + } else { + struct sockaddr_in *c = (struct sockaddr_in *)&caddr; + + c->sin_family = AF_INET; + inet_pton(AF_INET, VETH0_ADDR, &c->sin_addr); + c->sin_port = htons(CLIENT_PORT); + err = bind(client_rx_fd, (struct sockaddr *)c, sizeof(*c)); + } + close_netns(nstoken); + if (!ASSERT_OK(err, "bind")) + goto out; + + /* Send message in fragments */ + if (ipv6) { + if (!ASSERT_OK(send_frags6(client_tx_fd), "send_frags6")) + goto out; + } else { + if (!ASSERT_OK(send_frags(client_tx_fd), "send_frags")) + goto out; + } + + if (!ASSERT_EQ(skel->bss->shootdowns, 0, "shootdowns")) + goto out; + + /* Receive reassembled msg on server and echo back to client */ + caddr_len = sizeof(caddr); + len = recvfrom(srv_fd, buf, sizeof(buf), 0, (struct sockaddr *)&caddr, &caddr_len); + if (!ASSERT_GE(len, 0, "server recvfrom")) + goto out; + len = sendto(srv_fd, buf, len, 0, (struct sockaddr *)&caddr, caddr_len); + if (!ASSERT_GE(len, 0, "server sendto")) + goto out; + + /* Expect reassembed message to be echoed back */ + len = recvfrom(client_rx_fd, buf, sizeof(buf), 0, NULL, NULL); + if (!ASSERT_EQ(len, sizeof(MAGIC_MESSAGE) - 1, "client short read")) + goto out; + +out: + if (client_rx_fd != -1) + close(client_rx_fd); + if (client_tx_fd != -1) + close(client_tx_fd); + if (srv_fd != -1) + close(srv_fd); + cleanup_topology(); + ip_check_defrag__destroy(skel); +} + +void test_bpf_ip_check_defrag(void) +{ + if (test__start_subtest("v4")) + test_bpf_ip_check_defrag_ok(false); + if (test__start_subtest("v6")) + test_bpf_ip_check_defrag_ok(true); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c index a543742cd7bd..2eb71559713c 100644 --- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c @@ -173,8 +173,8 @@ static void verify_fail(struct kfunc_test_params *param) case tc_test: topts.data_in = &pkt_v4; topts.data_size_in = sizeof(pkt_v4); - break; topts.repeat = 1; + break; } skel = kfunc_call_fail__open_opts(&opts); diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 2173c4bb555e..4041cfa670eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -3,6 +3,7 @@ #include "kprobe_multi.skel.h" #include "trace_helpers.h" #include "kprobe_multi_empty.skel.h" +#include "kprobe_multi_override.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -304,14 +305,6 @@ cleanup: kprobe_multi__destroy(skel); } -static inline __u64 get_time_ns(void) -{ - struct timespec t; - - clock_gettime(CLOCK_MONOTONIC, &t); - return (__u64) t.tv_sec * 1000000000 + t.tv_nsec; -} - static size_t symbol_hash(long key, void *ctx __maybe_unused) { return str_hash((const char *) key); @@ -461,6 +454,40 @@ cleanup: } } +static void test_attach_override(void) +{ + struct kprobe_multi_override *skel = NULL; + struct bpf_link *link = NULL; + + skel = kprobe_multi_override__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load")) + goto cleanup; + + /* The test_override calls bpf_override_return so it should fail + * to attach to bpf_fentry_test1 function, which is not on error + * injection list. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "bpf_fentry_test1", NULL); + if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) { + bpf_link__destroy(link); + goto cleanup; + } + + /* The should_fail_bio function is on error injection list, + * attach should succeed. + */ + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override, + "should_fail_bio", NULL); + if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio")) + goto cleanup; + + bpf_link__destroy(link); + +cleanup: + kprobe_multi_override__destroy(skel); +} + void serial_test_kprobe_multi_bench_attach(void) { if (test__start_subtest("kernel")) @@ -488,4 +515,6 @@ void test_kprobe_multi_test(void) test_attach_api_syms(); if (test__start_subtest("attach_api_fails")) test_attach_api_fails(); + if (test__start_subtest("attach_override")) + test_attach_override(); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index f63309fd0e28..18cf7b17463d 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -23,7 +23,7 @@ static struct { "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \ { #test "_missing_lock_pop_back", \ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, - TEST(kptr, 32) + TEST(kptr, 40) TEST(global, 16) TEST(map, 0) TEST(inner_map, 0) @@ -31,7 +31,7 @@ static struct { #define TEST(test, op) \ { #test "_kptr_incorrect_lock_" #op, \ "held lock and object are not in the same allocation\n" \ - "bpf_spin_lock at off=32 must be held for bpf_list_head" }, \ + "bpf_spin_lock at off=40 must be held for bpf_list_head" }, \ { #test "_global_incorrect_lock_" #op, \ "held lock and object are not in the same allocation\n" \ "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \ @@ -84,23 +84,23 @@ static struct { { "double_push_back", "arg#1 expected pointer to allocated object" }, { "no_node_value_type", "bpf_list_node not found at offset=0" }, { "incorrect_value_type", - "operation on bpf_list_head expects arg#1 bpf_list_node at offset=40 in struct foo, " + "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, " "but arg is at offset=0 in struct bar" }, { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, - { "incorrect_node_off1", "bpf_list_node not found at offset=41" }, - { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=40 in struct foo" }, + { "incorrect_node_off1", "bpf_list_node not found at offset=49" }, + { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" }, { "no_head_type", "bpf_list_head not found at offset=0" }, { "incorrect_head_var_off1", "R1 doesn't have constant offset" }, { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" }, - { "incorrect_head_off1", "bpf_list_head not found at offset=17" }, + { "incorrect_head_off1", "bpf_list_head not found at offset=25" }, { "incorrect_head_off2", "bpf_list_head not found at offset=1" }, { "pop_front_off", - "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " - "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n" "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, { "pop_back_off", - "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) " - "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=40,imm=0) refs=2,4\n" + "15: (bf) r1 = r6 ; R1_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) " + "R6_w=ptr_or_null_foo(id=4,ref_obj_id=4,off=48,imm=0) refs=2,4\n" "16: (85) call bpf_this_cpu_ptr#154\nR1 type=ptr_or_null_ expected=percpu_ptr_" }, }; @@ -257,7 +257,7 @@ static struct btf *init_btf(void) hid = btf__add_struct(btf, "bpf_list_head", 16); if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head")) goto end; - nid = btf__add_struct(btf, "bpf_list_node", 16); + nid = btf__add_struct(btf, "bpf_list_node", 24); if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node")) goto end; return btf; @@ -276,7 +276,7 @@ static void list_and_rb_node_same_struct(bool refcount_field) if (!ASSERT_OK_PTR(btf, "init_btf")) return; - bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 24); + bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 32); if (!ASSERT_GT(bpf_rb_node_btf_id, 0, "btf__add_struct bpf_rb_node")) return; @@ -286,17 +286,17 @@ static void list_and_rb_node_same_struct(bool refcount_field) return; } - id = btf__add_struct(btf, "bar", refcount_field ? 44 : 40); + id = btf__add_struct(btf, "bar", refcount_field ? 60 : 56); if (!ASSERT_GT(id, 0, "btf__add_struct bar")) return; err = btf__add_field(btf, "a", LIST_NODE, 0, 0); if (!ASSERT_OK(err, "btf__add_field bar::a")) return; - err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 128, 0); + err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 192, 0); if (!ASSERT_OK(err, "btf__add_field bar::c")) return; if (refcount_field) { - err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 320, 0); + err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 448, 0); if (!ASSERT_OK(err, "btf__add_field bar::ref")) return; } @@ -527,7 +527,7 @@ static void test_btf(void) btf = init_btf(); if (!ASSERT_OK_PTR(btf, "init_btf")) break; - id = btf__add_struct(btf, "foo", 36); + id = btf__add_struct(btf, "foo", 44); if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -536,7 +536,7 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field foo::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field foo::c")) break; id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0); @@ -553,7 +553,7 @@ static void test_btf(void) btf = init_btf(); if (!ASSERT_OK_PTR(btf, "init_btf")) break; - id = btf__add_struct(btf, "foo", 36); + id = btf__add_struct(btf, "foo", 44); if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -562,13 +562,13 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field foo::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field foo::c")) break; id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) break; - id = btf__add_struct(btf, "bar", 36); + id = btf__add_struct(btf, "bar", 44); if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -577,7 +577,7 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field bar::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field bar::c")) break; id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0); @@ -594,19 +594,19 @@ static void test_btf(void) btf = init_btf(); if (!ASSERT_OK_PTR(btf, "init_btf")) break; - id = btf__add_struct(btf, "foo", 20); + id = btf__add_struct(btf, "foo", 28); if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); if (!ASSERT_OK(err, "btf__add_field foo::a")) break; - err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0); if (!ASSERT_OK(err, "btf__add_field foo::b")) break; id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0); if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a")) break; - id = btf__add_struct(btf, "bar", 16); + id = btf__add_struct(btf, "bar", 24); if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) break; err = btf__add_field(btf, "a", LIST_NODE, 0, 0); @@ -623,19 +623,19 @@ static void test_btf(void) btf = init_btf(); if (!ASSERT_OK_PTR(btf, "init_btf")) break; - id = btf__add_struct(btf, "foo", 20); + id = btf__add_struct(btf, "foo", 28); if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); if (!ASSERT_OK(err, "btf__add_field foo::a")) break; - err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0); + err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0); if (!ASSERT_OK(err, "btf__add_field foo::b")) break; id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) break; - id = btf__add_struct(btf, "bar", 36); + id = btf__add_struct(btf, "bar", 44); if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -644,13 +644,13 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field bar::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field bar::c")) break; id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) break; - id = btf__add_struct(btf, "baz", 16); + id = btf__add_struct(btf, "baz", 24); if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) break; err = btf__add_field(btf, "a", LIST_NODE, 0, 0); @@ -667,7 +667,7 @@ static void test_btf(void) btf = init_btf(); if (!ASSERT_OK_PTR(btf, "init_btf")) break; - id = btf__add_struct(btf, "foo", 36); + id = btf__add_struct(btf, "foo", 44); if (!ASSERT_EQ(id, 5, "btf__add_struct foo")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -676,13 +676,13 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field foo::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field foo::c")) break; id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) break; - id = btf__add_struct(btf, "bar", 36); + id = btf__add_struct(btf, "bar", 44); if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -691,13 +691,13 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field bar:b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field bar:c")) break; id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0); if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a")) break; - id = btf__add_struct(btf, "baz", 16); + id = btf__add_struct(btf, "baz", 24); if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) break; err = btf__add_field(btf, "a", LIST_NODE, 0, 0); @@ -726,7 +726,7 @@ static void test_btf(void) id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0); if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b")) break; - id = btf__add_struct(btf, "bar", 36); + id = btf__add_struct(btf, "bar", 44); if (!ASSERT_EQ(id, 7, "btf__add_struct bar")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -735,13 +735,13 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field bar::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field bar::c")) break; id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0); if (!ASSERT_EQ(id, 8, "btf__add_decl_tag")) break; - id = btf__add_struct(btf, "baz", 36); + id = btf__add_struct(btf, "baz", 44); if (!ASSERT_EQ(id, 9, "btf__add_struct baz")) break; err = btf__add_field(btf, "a", LIST_HEAD, 0, 0); @@ -750,13 +750,13 @@ static void test_btf(void) err = btf__add_field(btf, "b", LIST_NODE, 128, 0); if (!ASSERT_OK(err, "btf__add_field bar::b")) break; - err = btf__add_field(btf, "c", SPIN_LOCK, 256, 0); + err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0); if (!ASSERT_OK(err, "btf__add_field bar::c")) break; id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0); if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a")) break; - id = btf__add_struct(btf, "bam", 16); + id = btf__add_struct(btf, "bam", 24); if (!ASSERT_EQ(id, 11, "btf__add_struct bam")) break; err = btf__add_field(btf, "a", LIST_NODE, 0, 0); diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index 76f1da877f81..b25b870f87ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -5,6 +5,7 @@ #include <network_helpers.h> #include "local_kptr_stash.skel.h" +#include "local_kptr_stash_fail.skel.h" static void test_local_kptr_stash_simple(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -26,6 +27,27 @@ static void test_local_kptr_stash_simple(void) local_kptr_stash__destroy(skel); } +static void test_local_kptr_stash_plain(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_plain), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_plain run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_plain retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_unstash(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -51,10 +73,19 @@ static void test_local_kptr_stash_unstash(void) local_kptr_stash__destroy(skel); } -void test_local_kptr_stash_success(void) +static void test_local_kptr_stash_fail(void) +{ + RUN_TESTS(local_kptr_stash_fail); +} + +void test_local_kptr_stash(void) { if (test__start_subtest("local_kptr_stash_simple")) test_local_kptr_stash_simple(); + if (test__start_subtest("local_kptr_stash_plain")) + test_local_kptr_stash_plain(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); + if (test__start_subtest("local_kptr_stash_fail")) + test_local_kptr_stash_fail(); } diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c index dba71d98a227..effd78b2a657 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_fixup.c +++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c @@ -124,7 +124,7 @@ static void missing_map(void) ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate"); ASSERT_HAS_SUBSTR(log_buf, - "8: <invalid BPF map reference>\n" + ": <invalid BPF map reference>\n" "BPF map 'missing_map' is referenced but wasn't created\n", "log_buf"); diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h new file mode 100644 index 000000000000..61333f2a03f9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __LWT_HELPERS_H +#define __LWT_HELPERS_H + +#include <time.h> +#include <net/if.h> +#include <linux/if_tun.h> +#include <linux/icmp.h> + +#include "test_progs.h" + +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__) + +#define RUN_TEST(name) \ + ({ \ + if (test__start_subtest(#name)) \ + if (ASSERT_OK(netns_create(), "netns_create")) { \ + struct nstoken *token = open_netns(NETNS); \ + if (ASSERT_OK_PTR(token, "setns")) { \ + test_ ## name(); \ + close_netns(token); \ + } \ + netns_delete(); \ + } \ + }) + +#define NETNS "ns_lwt" + +static inline int netns_create(void) +{ + return system("ip netns add " NETNS); +} + +static inline int netns_delete(void) +{ + return system("ip netns del " NETNS ">/dev/null 2>&1"); +} + +static int open_tuntap(const char *dev_name, bool need_mac) +{ + int err = 0; + struct ifreq ifr; + int fd = open("/dev/net/tun", O_RDWR); + + if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)")) + return -1; + + ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN); + memcpy(ifr.ifr_name, dev_name, IFNAMSIZ); + + err = ioctl(fd, TUNSETIFF, &ifr); + if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) { + close(fd); + return -1; + } + + err = fcntl(fd, F_SETFL, O_NONBLOCK); + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { + close(fd); + return -1; + } + + return fd; +} + +#define ICMP_PAYLOAD_SIZE 100 + +/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */ +static int __expect_icmp_ipv4(char *buf, ssize_t len) +{ + struct iphdr *ip = (struct iphdr *)buf; + struct icmphdr *icmp = (struct icmphdr *)(ip + 1); + ssize_t min_header_len = sizeof(*ip) + sizeof(*icmp); + + if (len < min_header_len) + return -1; + + if (ip->protocol != IPPROTO_ICMP) + return -1; + + if (icmp->type != ICMP_ECHO) + return -1; + + return len == ICMP_PAYLOAD_SIZE + min_header_len; +} + +typedef int (*filter_t) (char *, ssize_t); + +/* wait_for_packet - wait for a packet that matches the filter + * + * @fd: tun fd/packet socket to read packet + * @filter: filter function, returning 1 if matches + * @timeout: timeout to wait for the packet + * + * Returns 1 if a matching packet is read, 0 if timeout expired, -1 on error. + */ +static int wait_for_packet(int fd, filter_t filter, struct timeval *timeout) +{ + char buf[4096]; + int max_retry = 5; /* in case we read some spurious packets */ + fd_set fds; + + FD_ZERO(&fds); + while (max_retry--) { + /* Linux modifies timeout arg... So make a copy */ + struct timeval copied_timeout = *timeout; + ssize_t ret = -1; + + FD_SET(fd, &fds); + + ret = select(1 + fd, &fds, NULL, NULL, &copied_timeout); + if (ret <= 0) { + if (errno == EINTR) + continue; + else if (errno == EAGAIN || ret == 0) + return 0; + + log_err("select failed"); + return -1; + } + + ret = read(fd, buf, sizeof(buf)); + + if (ret <= 0) { + log_err("read(dev): %ld", ret); + return -1; + } + + if (filter && filter(buf, ret) > 0) + return 1; + } + + return 0; +} + +#endif /* __LWT_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c new file mode 100644 index 000000000000..59b38569f310 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Test suite of lwt_xmit BPF programs that redirect packets + * The file tests focus not only if these programs work as expected normally, + * but also if they can handle abnormal situations gracefully. + * + * WARNING + * ------- + * This test suite may crash the kernel, thus should be run in a VM. + * + * Setup: + * --------- + * All tests are performed in a single netns. Two lwt encap routes are setup for + * each subtest: + * + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err + * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err + * + * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section + * of this object holds a program entry to test. The BPF object is built from + * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the + * attachment for lwt programs are not supported by libbpf yet. + * + * For testing, ping commands are run in the test netns: + * + * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100 + * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100 + * + * Scenarios: + * -------------------------------- + * 1. Redirect to a running tap/tun device + * 2. Redirect to a down tap/tun device + * 3. Redirect to a vlan device with lower layer down + * + * Case 1, ping packets should be received by packet socket on target device + * when redirected to ingress, and by tun/tap fd when redirected to egress. + * + * Case 2,3 are considered successful as long as they do not crash the kernel + * as a regression. + * + * Case 1,2 use tap device to test redirect to device that requires MAC + * header, and tun device to test the case with no MAC header added. + */ +#include <sys/socket.h> +#include <net/if.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_tun.h> +#include <linux/icmp.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <errno.h> +#include <stdbool.h> +#include <stdlib.h> + +#include "lwt_helpers.h" +#include "test_progs.h" +#include "network_helpers.h" + +#define BPF_OBJECT "test_lwt_redirect.bpf.o" +#define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac") +#define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac") +#define LOCAL_SRC "10.0.0.1" +#define CIDR_TO_INGRESS "10.0.0.0/24" +#define CIDR_TO_EGRESS "20.0.0.0/24" + +/* ping to redirect toward given dev, with last byte of dest IP being the target + * device index. + * + * Note: ping command inside BPF-CI is busybox version, so it does not have certain + * function, such like -m option to set packet mark. + */ +static void ping_dev(const char *dev, bool is_ingress) +{ + int link_index = if_nametoindex(dev); + char ip[256]; + + if (!ASSERT_GE(link_index, 0, "if_nametoindex")) + return; + + if (is_ingress) + snprintf(ip, sizeof(ip), "10.0.0.%d", link_index); + else + snprintf(ip, sizeof(ip), "20.0.0.%d", link_index); + + /* We won't get a reply. Don't fail here */ + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + ip, ICMP_PAYLOAD_SIZE); +} + +static int new_packet_sock(const char *ifname) +{ + int err = 0; + int ignore_outgoing = 1; + int ifindex = -1; + int s = -1; + + s = socket(AF_PACKET, SOCK_RAW, 0); + if (!ASSERT_GE(s, 0, "socket(AF_PACKET)")) + return -1; + + ifindex = if_nametoindex(ifname); + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) { + close(s); + return -1; + } + + struct sockaddr_ll addr = { + .sll_family = AF_PACKET, + .sll_protocol = htons(ETH_P_IP), + .sll_ifindex = ifindex, + }; + + err = bind(s, (struct sockaddr *)&addr, sizeof(addr)); + if (!ASSERT_OK(err, "bind(AF_PACKET)")) { + close(s); + return -1; + } + + /* Use packet socket to capture only the ingress, so we can distinguish + * the case where a regression that actually redirects the packet to + * the egress. + */ + err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING, + &ignore_outgoing, sizeof(ignore_outgoing)); + if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) { + close(s); + return -1; + } + + err = fcntl(s, F_SETFL, O_NONBLOCK); + if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) { + close(s); + return -1; + } + + return s; +} + +static int expect_icmp(char *buf, ssize_t len) +{ + struct ethhdr *eth = (struct ethhdr *)buf; + + if (len < (ssize_t)sizeof(*eth)) + return -1; + + if (eth->h_proto == htons(ETH_P_IP)) + return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth)); + + return -1; +} + +static int expect_icmp_nomac(char *buf, ssize_t len) +{ + return __expect_icmp_ipv4(buf, len); +} + +static void send_and_capture_test_packets(const char *test_name, int tap_fd, + const char *target_dev, bool need_mac) +{ + int psock = -1; + struct timeval timeo = { + .tv_sec = 0, + .tv_usec = 250000, + }; + int ret = -1; + + filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac; + + ping_dev(target_dev, false); + + ret = wait_for_packet(tap_fd, filter, &timeo); + if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) { + log_err("%s egress test fails", test_name); + goto out; + } + + psock = new_packet_sock(target_dev); + ping_dev(target_dev, true); + + ret = wait_for_packet(psock, filter, &timeo); + if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) { + log_err("%s ingress test fails", test_name); + goto out; + } + +out: + if (psock >= 0) + close(psock); +} + +static int setup_redirect_target(const char *target_dev, bool need_mac) +{ + int target_index = -1; + int tap_fd = -1; + + tap_fd = open_tuntap(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto fail; + + target_index = if_nametoindex(target_dev); + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) + goto fail; + + SYS(fail, "ip link add link_err type dummy"); + SYS(fail, "ip link set lo up"); + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); + SYS(fail, "ip link set link_err up"); + SYS(fail, "ip link set %s up", target_dev); + + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", + CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac)); + + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s", + CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac)); + + return tap_fd; + +fail: + if (tap_fd >= 0) + close(tap_fd); + return -1; +} + +static void test_lwt_redirect_normal(void) +{ + const char *target_dev = "tap0"; + int tap_fd = -1; + bool need_mac = true; + + tap_fd = setup_redirect_target(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); + close(tap_fd); +} + +static void test_lwt_redirect_normal_nomac(void) +{ + const char *target_dev = "tun0"; + int tap_fd = -1; + bool need_mac = false; + + tap_fd = setup_redirect_target(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac); + close(tap_fd); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +static void __test_lwt_redirect_dev_down(bool need_mac) +{ + const char *target_dev = "tap0"; + int tap_fd = -1; + + tap_fd = setup_redirect_target(target_dev, need_mac); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + SYS(out, "ip link set %s down", target_dev); + ping_dev(target_dev, true); + ping_dev(target_dev, false); + +out: + close(tap_fd); +} + +static void test_lwt_redirect_dev_down(void) +{ + __test_lwt_redirect_dev_down(true); +} + +static void test_lwt_redirect_dev_down_nomac(void) +{ + __test_lwt_redirect_dev_down(false); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +static void test_lwt_redirect_dev_carrier_down(void) +{ + const char *lower_dev = "tap0"; + const char *vlan_dev = "vlan100"; + int tap_fd = -1; + + tap_fd = setup_redirect_target(lower_dev, true); + if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target")) + return; + + SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev); + SYS(out, "ip link set %s up", vlan_dev); + SYS(out, "ip link set %s down", lower_dev); + ping_dev(vlan_dev, true); + ping_dev(vlan_dev, false); + +out: + close(tap_fd); +} + +static void *test_lwt_redirect_run(void *arg) +{ + netns_delete(); + RUN_TEST(lwt_redirect_normal); + RUN_TEST(lwt_redirect_normal_nomac); + RUN_TEST(lwt_redirect_dev_down); + RUN_TEST(lwt_redirect_dev_down_nomac); + RUN_TEST(lwt_redirect_dev_carrier_down); + return NULL; +} + +void test_lwt_redirect(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c new file mode 100644 index 000000000000..f4bb2d5fcae0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Test suite of lwt BPF programs that reroutes packets + * The file tests focus not only if these programs work as expected normally, + * but also if they can handle abnormal situations gracefully. This test + * suite currently only covers lwt_xmit hook. lwt_in tests have not been + * implemented. + * + * WARNING + * ------- + * This test suite can crash the kernel, thus should be run in a VM. + * + * Setup: + * --------- + * all tests are performed in a single netns. A lwt encap route is setup for + * each subtest: + * + * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err + * + * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains + * a single test program entry. This program sets packet mark by last byte of + * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb + * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped + * to avoid route loop. We didn't use generated BPF skeleton since the + * attachment for lwt programs are not supported by libbpf yet. + * + * The test program will bring up a tun device, and sets up the following + * routes: + * + * ip rule add pref 100 from all fwmark <tun_index> lookup 100 + * ip route add table 100 default dev tun0 + * + * For normal testing, a ping command is running in the test netns: + * + * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100 + * + * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP + * socket will try to overflow the fq queue and trigger qdisc drop error. + * + * Scenarios: + * -------------------------------- + * 1. Reroute to a running tun device + * 2. Reroute to a device where qdisc drop + * + * For case 1, ping packets should be received by the tun device. + * + * For case 2, force UDP packets to overflow fq limit. As long as kernel + * is not crashed, it is considered successful. + */ +#include "lwt_helpers.h" +#include "network_helpers.h" +#include <linux/net_tstamp.h> + +#define BPF_OBJECT "test_lwt_reroute.bpf.o" +#define LOCAL_SRC "10.0.0.1" +#define TEST_CIDR "10.0.0.0/24" +#define XMIT_HOOK "xmit" +#define XMIT_SECTION "lwt_xmit" +#define NSEC_PER_SEC 1000000000ULL + +/* send a ping to be rerouted to the target device */ +static void ping_once(const char *ip) +{ + /* We won't get a reply. Don't fail here */ + SYS_NOFAIL("ping %s -c1 -W1 -s %d >/dev/null 2>&1", + ip, ICMP_PAYLOAD_SIZE); +} + +/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop + * error. This is done via TX tstamp to force buffering delayed packets. + */ +static int overflow_fq(int snd_target, const char *target_ip) +{ + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(1234), + }; + + char data_buf[8]; /* only #pkts matter, so use a random small buffer */ + char control_buf[CMSG_SPACE(sizeof(uint64_t))]; + struct iovec iov = { + .iov_base = data_buf, + .iov_len = sizeof(data_buf), + }; + int err = -1; + int s = -1; + struct sock_txtime txtime_on = { + .clockid = CLOCK_MONOTONIC, + .flags = 0, + }; + struct msghdr msg = { + .msg_name = &addr, + .msg_namelen = sizeof(addr), + .msg_control = control_buf, + .msg_controllen = sizeof(control_buf), + .msg_iovlen = 1, + .msg_iov = &iov, + }; + struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg); + + memset(data_buf, 0, sizeof(data_buf)); + + s = socket(AF_INET, SOCK_DGRAM, 0); + if (!ASSERT_GE(s, 0, "socket")) + goto out; + + err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on)); + if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)")) + goto out; + + err = inet_pton(AF_INET, target_ip, &addr.sin_addr); + if (!ASSERT_EQ(err, 1, "inet_pton")) + goto out; + + while (snd_target > 0) { + struct timespec now; + + memset(control_buf, 0, sizeof(control_buf)); + cmsg->cmsg_type = SCM_TXTIME; + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t)); + + err = clock_gettime(CLOCK_MONOTONIC, &now); + if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) { + err = -1; + goto out; + } + + *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC + + now.tv_nsec; + + /* we will intentionally send more than fq limit, so ignore + * the error here. + */ + sendmsg(s, &msg, MSG_NOSIGNAL); + snd_target--; + } + + /* no kernel crash so far is considered success */ + err = 0; + +out: + if (s >= 0) + close(s); + + return err; +} + +static int setup(const char *tun_dev) +{ + int target_index = -1; + int tap_fd = -1; + + tap_fd = open_tuntap(tun_dev, false); + if (!ASSERT_GE(tap_fd, 0, "open_tun")) + return -1; + + target_index = if_nametoindex(tun_dev); + if (!ASSERT_GE(target_index, 0, "if_nametoindex")) + return -1; + + SYS(fail, "ip link add link_err type dummy"); + SYS(fail, "ip link set lo up"); + SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32"); + SYS(fail, "ip link set link_err up"); + SYS(fail, "ip link set %s up", tun_dev); + + SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit", + TEST_CIDR, BPF_OBJECT); + + SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100", + target_index); + SYS(fail, "ip route add t 100 default dev %s", tun_dev); + + return tap_fd; + +fail: + if (tap_fd >= 0) + close(tap_fd); + return -1; +} + +static void test_lwt_reroute_normal_xmit(void) +{ + const char *tun_dev = "tun0"; + int tun_fd = -1; + int ifindex = -1; + char ip[256]; + struct timeval timeo = { + .tv_sec = 0, + .tv_usec = 250000, + }; + + tun_fd = setup(tun_dev); + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) + return; + + ifindex = if_nametoindex(tun_dev); + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) + return; + + snprintf(ip, 256, "10.0.0.%d", ifindex); + + /* ping packets should be received by the tun device */ + ping_once(ip); + + if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1, + "wait_for_packet")) + log_err("%s xmit", __func__); +} + +/* + * Test the failure case when the skb is dropped at the qdisc. This is a + * regression prevention at the xmit hook only. + */ +static void test_lwt_reroute_qdisc_dropped(void) +{ + const char *tun_dev = "tun0"; + int tun_fd = -1; + int ifindex = -1; + char ip[256]; + + tun_fd = setup(tun_dev); + if (!ASSERT_GE(tun_fd, 0, "setup_reroute")) + goto fail; + + SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev); + + ifindex = if_nametoindex(tun_dev); + if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) + return; + + snprintf(ip, 256, "10.0.0.%d", ifindex); + ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq"); + +fail: + if (tun_fd >= 0) + close(tun_fd); +} + +static void *test_lwt_reroute_run(void *arg) +{ + netns_delete(); + RUN_TEST(lwt_reroute_normal_xmit); + RUN_TEST(lwt_reroute_qdisc_dropped); + return NULL; +} + +void test_lwt_reroute(void) +{ + pthread_t test_thread; + int err; + + /* Run the tests in their own thread to isolate the namespace changes + * so they do not affect the environment of other tests. + * (specifically needed because of unshare(CLONE_NEWNS) in open_netns()) + */ + err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL); + if (ASSERT_OK(err, "pthread_create")) + ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join"); +} diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c index 5d9955af6247..a70c99c2f8c8 100644 --- a/tools/testing/selftests/bpf/prog_tests/modify_return.c +++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c @@ -41,6 +41,10 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret) ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result"); ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result"); + ASSERT_EQ(skel->bss->fentry_result2, 1, "modify_return fentry_result2"); + ASSERT_EQ(skel->bss->fexit_result2, 1, "modify_return fexit_result2"); + ASSERT_EQ(skel->bss->fmod_ret_result2, 1, "modify_return fmod_ret_result2"); + cleanup: modify_return__destroy(skel); } @@ -49,9 +53,9 @@ cleanup: void serial_test_modify_return(void) { run_test(0 /* input_retval */, - 1 /* want_side_effect */, - 4 /* want_ret */); + 2 /* want_side_effect */, + 33 /* want_ret */); run_test(-EINVAL /* input_retval */, 0 /* want_side_effect */, - -EINVAL /* want_ret */); + -EINVAL * 2 /* want_ret */); } diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c index 7fc01ff490db..f53d658ed080 100644 --- a/tools/testing/selftests/bpf/prog_tests/module_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c @@ -4,6 +4,7 @@ #include <test_progs.h> #include <stdbool.h> #include "test_module_attach.skel.h" +#include "testing_helpers.h" static int duration; @@ -32,11 +33,6 @@ static int trigger_module_test_writable(int *val) return 0; } -static int delete_module(const char *name, int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - void test_module_attach(void) { const int READ_SZ = 456; @@ -93,21 +89,21 @@ void test_module_attach(void) if (!ASSERT_OK_PTR(link, "attach_fentry")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); link = bpf_program__attach(skel->progs.handle_fexit); if (!ASSERT_OK_PTR(link, "attach_fexit")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); link = bpf_program__attach(skel->progs.kprobe_multi); if (!ASSERT_OK_PTR(link, "attach_kprobe_multi")) goto cleanup; - ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module"); + ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod"); bpf_link__destroy(link); cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index cd0c42fff7c0..7c0be7cf550b 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -2,17 +2,59 @@ /* Copyright (c) 2020, Tessares SA. */ /* Copyright (c) 2022, SUSE. */ +#include <linux/const.h> +#include <netinet/in.h> #include <test_progs.h> #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" +#include "mptcpify.skel.h" #define NS_TEST "mptcp_ns" +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +#ifndef SOL_MPTCP +#define SOL_MPTCP 284 +#endif +#ifndef MPTCP_INFO +#define MPTCP_INFO 1 +#endif +#ifndef MPTCP_INFO_FLAG_FALLBACK +#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0) +#endif +#ifndef MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED +#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1) +#endif + #ifndef TCP_CA_NAME_MAX #define TCP_CA_NAME_MAX 16 #endif +struct __mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; +}; + struct mptcp_storage { __u32 invoked; __u32 is_mptcp; @@ -22,6 +64,24 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; }; +static struct nstoken *create_netns(void) +{ + SYS(fail, "ip netns add %s", NS_TEST); + SYS(fail, "ip -net %s link set dev lo up", NS_TEST); + + return open_netns(NS_TEST); +fail: + return NULL; +} + +static void cleanup_netns(struct nstoken *nstoken) +{ + if (nstoken) + close_netns(nstoken); + + SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST); +} + static int verify_tsk(int map_fd, int client_fd) { int err, cfd = client_fd; @@ -100,24 +160,14 @@ static int run_test(int cgroup_fd, int server_fd, bool is_mptcp) sock_skel = mptcp_sock__open_and_load(); if (!ASSERT_OK_PTR(sock_skel, "skel_open_load")) - return -EIO; + return libbpf_get_error(sock_skel); err = mptcp_sock__attach(sock_skel); if (!ASSERT_OK(err, "skel_attach")) goto out; prog_fd = bpf_program__fd(sock_skel->progs._sockops); - if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) { - err = -EIO; - goto out; - } - map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map); - if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) { - err = -EIO; - goto out; - } - err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0); if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; @@ -147,11 +197,8 @@ static void test_base(void) if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; - SYS(fail, "ip netns add %s", NS_TEST); - SYS(fail, "ip -net %s link set dev lo up", NS_TEST); - - nstoken = open_netns(NS_TEST); - if (!ASSERT_OK_PTR(nstoken, "open_netns")) + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns")) goto fail; /* without MPTCP */ @@ -174,11 +221,104 @@ with_mptcp: close(server_fd); fail: - if (nstoken) - close_netns(nstoken); + cleanup_netns(nstoken); + close(cgroup_fd); +} - SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null"); +static void send_byte(int fd) +{ + char b = 0x55; + + ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte"); +} + +static int verify_mptcpify(int server_fd, int client_fd) +{ + struct __mptcp_info info; + socklen_t optlen; + int protocol; + int err = 0; + + optlen = sizeof(protocol); + if (!ASSERT_OK(getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen), + "getsockopt(SOL_PROTOCOL)")) + return -1; + + if (!ASSERT_EQ(protocol, IPPROTO_MPTCP, "protocol isn't MPTCP")) + err++; + optlen = sizeof(info); + if (!ASSERT_OK(getsockopt(client_fd, SOL_MPTCP, MPTCP_INFO, &info, &optlen), + "getsockopt(MPTCP_INFO)")) + return -1; + + if (!ASSERT_GE(info.mptcpi_flags, 0, "unexpected mptcpi_flags")) + err++; + if (!ASSERT_FALSE(info.mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK, + "MPTCP fallback")) + err++; + if (!ASSERT_TRUE(info.mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED, + "no remote key received")) + err++; + + return err; +} + +static int run_mptcpify(int cgroup_fd) +{ + int server_fd, client_fd, err = 0; + struct mptcpify *mptcpify_skel; + + mptcpify_skel = mptcpify__open_and_load(); + if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load")) + return libbpf_get_error(mptcpify_skel); + + err = mptcpify__attach(mptcpify_skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + /* without MPTCP */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(server_fd, 0, "start_server")) { + err = -EIO; + goto out; + } + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_GE(client_fd, 0, "connect to fd")) { + err = -EIO; + goto close_server; + } + + send_byte(client_fd); + + err = verify_mptcpify(server_fd, client_fd); + + close(client_fd); +close_server: + close(server_fd); +out: + mptcpify__destroy(mptcpify_skel); + return err; +} + +static void test_mptcpify(void) +{ + struct nstoken *nstoken = NULL; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcpify"); + if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) + return; + + nstoken = create_netns(); + if (!ASSERT_OK_PTR(nstoken, "create_netns")) + goto fail; + + ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify"); + +fail: + cleanup_netns(nstoken); close(cgroup_fd); } @@ -186,4 +326,6 @@ void test_mptcp(void) { if (test__start_subtest("base")) test_base(); + if (test__start_subtest("mptcpify")) + test_mptcpify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c index d3915c58d0e1..c3333edd029f 100644 --- a/tools/testing/selftests/bpf/prog_tests/netcnt.c +++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c @@ -67,12 +67,12 @@ void serial_test_netcnt(void) } /* No packets should be lost */ - ASSERT_EQ(packets, 10000, "packets"); + ASSERT_GE(packets, 10000, "packets"); /* Let's check that bytes counter matches the number of packets * multiplied by the size of ipv6 ICMP packet. */ - ASSERT_EQ(bytes, packets * 104, "bytes"); + ASSERT_GE(bytes, packets * 104, "bytes"); err: if (cg_fd != -1) diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c new file mode 100644 index 000000000000..4297a2a4cb11 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include <netinet/in.h> +#include <linux/netfilter.h> + +#include "test_progs.h" +#include "test_netfilter_link_attach.skel.h" + +struct nf_link_test { + __u32 pf; + __u32 hooknum; + __s32 priority; + __u32 flags; + + bool expect_success; + const char * const name; +}; + +static const struct nf_link_test nf_hook_link_tests[] = { + { .name = "allzero", }, + { .pf = NFPROTO_NUMPROTO, .name = "invalid-pf", }, + { .pf = NFPROTO_IPV4, .hooknum = 42, .name = "invalid-hooknum", }, + { .pf = NFPROTO_IPV4, .priority = INT_MIN, .name = "invalid-priority-min", }, + { .pf = NFPROTO_IPV4, .priority = INT_MAX, .name = "invalid-priority-max", }, + { .pf = NFPROTO_IPV4, .flags = UINT_MAX, .name = "invalid-flags", }, + + { .pf = NFPROTO_INET, .priority = 1, .name = "invalid-inet-not-supported", }, + + { .pf = NFPROTO_IPV4, .priority = -10000, .expect_success = true, .name = "attach ipv4", }, + { .pf = NFPROTO_IPV6, .priority = 10001, .expect_success = true, .name = "attach ipv6", }, +}; + +void test_netfilter_link_attach(void) +{ + struct test_netfilter_link_attach *skel; + struct bpf_program *prog; + LIBBPF_OPTS(bpf_netfilter_opts, opts); + int i; + + skel = test_netfilter_link_attach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_netfilter_link_attach__open_and_load")) + goto out; + + prog = skel->progs.nf_link_attach_test; + if (!ASSERT_OK_PTR(prog, "attach program")) + goto out; + + for (i = 0; i < ARRAY_SIZE(nf_hook_link_tests); i++) { + struct bpf_link *link; + + if (!test__start_subtest(nf_hook_link_tests[i].name)) + continue; + +#define X(opts, m, i) opts.m = nf_hook_link_tests[(i)].m + X(opts, pf, i); + X(opts, hooknum, i); + X(opts, priority, i); + X(opts, flags, i); +#undef X + link = bpf_program__attach_netfilter(prog, &opts); + if (nf_hook_link_tests[i].expect_success) { + struct bpf_link *link2; + + if (!ASSERT_OK_PTR(link, "program attach successful")) + continue; + + link2 = bpf_program__attach_netfilter(prog, &opts); + ASSERT_ERR_PTR(link2, "attach program with same pf/hook/priority"); + + if (!ASSERT_OK(bpf_link__destroy(link), "link destroy")) + break; + + link2 = bpf_program__attach_netfilter(prog, &opts); + if (!ASSERT_OK_PTR(link2, "program reattach successful")) + continue; + if (!ASSERT_OK(bpf_link__destroy(link2), "link destroy")) + break; + } else { + ASSERT_ERR_PTR(link, "program load failure"); + } + } + +out: + test_netfilter_link_attach__destroy(skel); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c new file mode 100644 index 000000000000..8d077d150c56 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include <string.h> +#include <linux/bpf.h> +#include <test_progs.h> +#include "test_ptr_untrusted.skel.h" + +#define TP_NAME "sched_switch" + +void serial_test_ptr_untrusted(void) +{ + struct test_ptr_untrusted *skel; + int err; + + skel = test_ptr_untrusted__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + /* First, attach lsm prog */ + skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run); + if (!ASSERT_OK_PTR(skel->links.lsm_run, "lsm_attach")) + goto cleanup; + + /* Second, attach raw_tp prog. The lsm prog will be triggered. */ + skel->links.raw_tp_run = bpf_program__attach_raw_tracepoint(skel->progs.raw_tp_run, + TP_NAME); + if (!ASSERT_OK_PTR(skel->links.raw_tp_run, "raw_tp_attach")) + goto cleanup; + + err = strncmp(skel->bss->tp_name, TP_NAME, strlen(TP_NAME)); + ASSERT_EQ(err, 0, "cmp_tp_name"); + +cleanup: + test_ptr_untrusted__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c index 595cbf92bff5..d6bd5e16e637 100644 --- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c @@ -9,8 +9,38 @@ void test_refcounted_kptr(void) { + RUN_TESTS(refcounted_kptr); } void test_refcounted_kptr_fail(void) { + RUN_TESTS(refcounted_kptr_fail); +} + +void test_refcounted_kptr_wrong_owner(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct refcounted_kptr *skel; + int ret; + + skel = refcounted_kptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts); + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1"); + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts); + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b"); + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval"); + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts); + ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2"); + ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval"); + refcounted_kptr__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c new file mode 100644 index 000000000000..f35852d245e3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Facebook */ +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <sys/types.h> +#include <sys/socket.h> +#include "sk_storage_omem_uncharge.skel.h" + +void test_sk_storage_omem_uncharge(void) +{ + struct sk_storage_omem_uncharge *skel; + int sk_fd = -1, map_fd, err, value; + socklen_t optlen; + + skel = sk_storage_omem_uncharge__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + return; + map_fd = bpf_map__fd(skel->maps.sk_storage); + + /* A standalone socket not binding to addr:port, + * so nentns is not needed. + */ + sk_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (!ASSERT_GE(sk_fd, 0, "socket")) + goto done; + + optlen = sizeof(skel->bss->cookie); + err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen); + if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)")) + goto done; + + value = 0; + err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)")) + goto done; + + value = 0xdeadbeef; + err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0); + if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)")) + goto done; + + err = sk_storage_omem_uncharge__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto done; + + close(sk_fd); + sk_fd = -1; + + ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found"); + ASSERT_EQ(skel->bss->omem, 0, "omem"); + +done: + sk_storage_omem_uncharge__destroy(skel); + if (sk_fd != -1) + close(sk_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c new file mode 100644 index 000000000000..b0583309a94e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/bpf_endian.h> + +#include "sock_destroy_prog.skel.h" +#include "sock_destroy_prog_fail.skel.h" +#include "network_helpers.h" + +#define TEST_NS "sock_destroy_netns" + +static void start_iter_sockets(struct bpf_program *prog) +{ + struct bpf_link *link; + char buf[50] = {}; + int iter_fd, len; + + link = bpf_program__attach_iter(prog, NULL); + if (!ASSERT_OK_PTR(link, "attach_iter")) + return; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_GE(iter_fd, 0, "create_iter")) + goto free_link; + + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + ASSERT_GE(len, 0, "read"); + + close(iter_fd); + +free_link: + bpf_link__destroy(link); +} + +static void test_tcp_client(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, accept_serv = -1, n; + + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + accept_serv = accept(serv, NULL, NULL); + if (!ASSERT_GE(accept_serv, 0, "serv accept")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys connected client sockets. */ + start_iter_sockets(skel->progs.iter_tcp6_client); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"); + +cleanup: + if (clien != -1) + close(clien); + if (accept_serv != -1) + close(accept_serv); + if (serv != -1) + close(serv); +} + +static void test_tcp_server(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, accept_serv = -1, n, serv_port; + + serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + serv_port = get_socket_local_port(serv); + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) + goto cleanup; + skel->bss->serv_port = (__be16) serv_port; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + accept_serv = accept(serv, NULL, NULL); + if (!ASSERT_GE(accept_serv, 0, "serv accept")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys server sockets. */ + start_iter_sockets(skel->progs.iter_tcp6_server); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket"); + +cleanup: + if (clien != -1) + close(clien); + if (accept_serv != -1) + close(accept_serv); + if (serv != -1) + close(serv); +} + +static void test_udp_client(struct sock_destroy_prog *skel) +{ + int serv = -1, clien = -1, n = 0; + + serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0); + if (!ASSERT_GE(serv, 0, "start_server")) + goto cleanup; + + clien = connect_to_fd(serv, 0); + if (!ASSERT_GE(clien, 0, "connect_to_fd")) + goto cleanup; + + n = send(clien, "t", 1, 0); + if (!ASSERT_EQ(n, 1, "client send")) + goto cleanup; + + /* Run iterator program that destroys sockets. */ + start_iter_sockets(skel->progs.iter_udp6_client); + + n = send(clien, "t", 1, 0); + if (!ASSERT_LT(n, 0, "client_send on destroyed socket")) + goto cleanup; + /* UDP sockets have an overriding error code after they are disconnected, + * so we don't check for ECONNABORTED error code. + */ + +cleanup: + if (clien != -1) + close(clien); + if (serv != -1) + close(serv); +} + +static void test_udp_server(struct sock_destroy_prog *skel) +{ + int *listen_fds = NULL, n, i, serv_port; + unsigned int num_listens = 5; + char buf[1]; + + /* Start reuseport servers. */ + listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM, + "::1", 0, 0, num_listens); + if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server")) + goto cleanup; + serv_port = get_socket_local_port(listen_fds[0]); + if (!ASSERT_GE(serv_port, 0, "get_sock_local_port")) + goto cleanup; + skel->bss->serv_port = (__be16) serv_port; + + /* Run iterator program that destroys server sockets. */ + start_iter_sockets(skel->progs.iter_udp6_server); + + for (i = 0; i < num_listens; ++i) { + n = read(listen_fds[i], buf, sizeof(buf)); + if (!ASSERT_EQ(n, -1, "read") || + !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket")) + break; + } + ASSERT_EQ(i, num_listens, "server socket"); + +cleanup: + free_fds(listen_fds, num_listens); +} + +void test_sock_destroy(void) +{ + struct sock_destroy_prog *skel; + struct nstoken *nstoken = NULL; + int cgroup_fd; + + skel = sock_destroy_prog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + cgroup_fd = test__join_cgroup("/sock_destroy"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + goto cleanup; + + skel->links.sock_connect = bpf_program__attach_cgroup( + skel->progs.sock_connect, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach")) + goto cleanup; + + SYS(cleanup, "ip netns add %s", TEST_NS); + SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS); + + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto cleanup; + + if (test__start_subtest("tcp_client")) + test_tcp_client(skel); + if (test__start_subtest("tcp_server")) + test_tcp_server(skel); + if (test__start_subtest("udp_client")) + test_udp_client(skel); + if (test__start_subtest("udp_server")) + test_udp_server(skel); + + RUN_TESTS(sock_destroy_prog_fail); + +cleanup: + if (nstoken) + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_NS " &> /dev/null"); + if (cgroup_fd >= 0) + close(cgroup_fd); + sock_destroy_prog__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 064cc5e8d9ad..dda7060e86a0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -475,6 +475,55 @@ out: test_sockmap_drop_prog__destroy(drop); } +static void test_sockmap_skb_verdict_peek(void) +{ + int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail; + struct test_sockmap_pass_prog *pass; + char snd[256] = "0123456789"; + char rcv[256] = "0"; + + pass = test_sockmap_pass_prog__open_and_load(); + if (!ASSERT_OK_PTR(pass, "open_and_load")) + return; + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + map = bpf_map__fd(pass->maps.sock_map_rx); + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach")) + goto out; + + s = socket_loopback(AF_INET, SOCK_STREAM); + if (!ASSERT_GT(s, -1, "socket_loopback(s)")) + goto out; + + err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pairs(s)")) + goto out; + + err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c1)")) + goto out_close; + + sent = xsend(p1, snd, sizeof(snd), 0); + ASSERT_EQ(sent, sizeof(snd), "xsend(p1)"); + recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK); + ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)"); + recvd = recv(c1, rcv, sizeof(rcv), 0); + ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)"); + err = ioctl(c1, FIONREAD, &avail); + ASSERT_OK(err, "ioctl(FIONREAD) error"); + ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)"); + +out_close: + close(c1); + close(p1); +out: + test_sockmap_pass_prog__destroy(pass); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -515,4 +564,6 @@ void test_sockmap_basic(void) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) test_sockmap_skb_verdict_fionread(false); + if (test__start_subtest("sockmap skb_verdict msg_f_peek")) + test_sockmap_skb_verdict_peek(); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index d12665490a90..36d829a65aa4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -179,6 +179,32 @@ __ret; \ }) +static inline int poll_connect(int fd, unsigned int timeout_sec) +{ + struct timeval timeout = { .tv_sec = timeout_sec }; + fd_set wfds; + int r, eval; + socklen_t esize = sizeof(eval); + + FD_ZERO(&wfds); + FD_SET(fd, &wfds); + + r = select(fd + 1, NULL, &wfds, NULL, &timeout); + if (r == 0) + errno = ETIME; + if (r != 1) + return -1; + + if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0) + return -1; + if (eval != 0) { + errno = eval; + return -1; + } + + return 0; +} + static inline int poll_read(int fd, unsigned int timeout_sec) { struct timeval timeout = { .tv_sec = timeout_sec }; diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index b4f6f3a50ae5..8df8cbb447f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -869,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel, xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT); } +static void redir_partial(int family, int sotype, int sock_map, int parser_map) +{ + int s, c0, c1, p0, p1; + int err, n, key, value; + char buf[] = "abc"; + + key = 0; + value = sizeof(buf) - 1; + err = xbpf_map_update_elem(parser_map, &key, &value, 0); + if (err) + return; + + s = socket_loopback(family, sotype | SOCK_NONBLOCK); + if (s < 0) + goto clean_parser_map; + + err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); + if (err) + goto close_srv; + + err = add_to_sockmap(sock_map, p0, p1); + if (err) + goto close; + + n = xsend(c1, buf, sizeof(buf), 0); + if (n < sizeof(buf)) + FAIL("incomplete write"); + + n = xrecv_nonblock(c0, buf, sizeof(buf), 0); + if (n != sizeof(buf) - 1) + FAIL("expect %zu, received %d", sizeof(buf) - 1, n); + +close: + xclose(c0); + xclose(p0); + xclose(c1); + xclose(p1); +close_srv: + xclose(s); + +clean_parser_map: + key = 0; + value = 0; + xbpf_map_update_elem(parser_map, &key, &value, 0); +} + +static void test_skb_redir_partial(struct test_sockmap_listen *skel, + struct bpf_map *inner_map, int family, + int sotype) +{ + int verdict = bpf_program__fd(skel->progs.prog_stream_verdict); + int parser = bpf_program__fd(skel->progs.prog_stream_parser); + int parser_map = bpf_map__fd(skel->maps.parser_map); + int sock_map = bpf_map__fd(inner_map); + int err; + + err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0); + if (err) + return; + + err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (err) + goto detach; + + redir_partial(family, sotype, sock_map, parser_map); + + xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT); +detach: + xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER); +} + static void test_reuseport_select_listening(int family, int sotype, int sock_map, int verd_map, int reuseport_prog) @@ -1243,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map, } tests[] = { TEST(test_skb_redir_to_connected), TEST(test_skb_redir_to_listening), + TEST(test_skb_redir_partial), TEST(test_msg_redir_to_connected), TEST(test_msg_redir_to_listening), }; @@ -1380,11 +1452,18 @@ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) if (p < 0) goto close_cli; + if (poll_connect(c, IO_TIMEOUT_SEC) < 0) { + FAIL_ERRNO("poll_connect"); + goto close_acc; + } + *v0 = p; *v1 = c; return 0; +close_acc: + close(p); close_cli: close(c); close_srv: @@ -1432,7 +1511,7 @@ static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, if (n < 1) goto out; - n = recv(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), MSG_DONTWAIT); + n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0); if (n < 0) FAIL("%s: recv() err, errno=%d", log_prefix, errno); if (n == 0) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c index aa4debf62fc6..9e6a5e3ed4de 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c @@ -5,10 +5,15 @@ static char bpf_log_buf[4096]; static bool verbose; +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + enum sockopt_test_error { OK = 0, DENY_LOAD, DENY_ATTACH, + EOPNOTSUPP_GETSOCKOPT, EPERM_GETSOCKOPT, EFAULT_GETSOCKOPT, EPERM_SETSOCKOPT, @@ -273,10 +278,31 @@ static struct sockopt_test { .error = EFAULT_GETSOCKOPT, }, { - .descr = "getsockopt: deny arbitrary ctx->retval", + .descr = "getsockopt: ignore >PAGE_SIZE optlen", .insns = { - /* ctx->retval = 123 */ - BPF_MOV64_IMM(BPF_REG_0, 123), + /* write 0xFF to the first optval byte */ + + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r2 = ctx->optval */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + /* r6 = ctx->optval + 1 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + + /* r7 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + /* ctx->optval[0] = 0xF0 */ + BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xFF), + /* } */ + + /* retval changes are ignored */ + /* ctx->retval = 5 */ + BPF_MOV64_IMM(BPF_REG_0, 5), BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, offsetof(struct bpf_sockopt, retval)), @@ -287,9 +313,11 @@ static struct sockopt_test { .attach_type = BPF_CGROUP_GETSOCKOPT, .expected_attach_type = BPF_CGROUP_GETSOCKOPT, - .get_optlen = 64, - - .error = EFAULT_GETSOCKOPT, + .get_level = 1234, + .get_optname = 5678, + .get_optval = {}, /* the changes are ignored */ + .get_optlen = PAGE_SIZE + 1, + .error = EOPNOTSUPP_GETSOCKOPT, }, { .descr = "getsockopt: support smaller ctx->optlen", @@ -649,6 +677,45 @@ static struct sockopt_test { .error = EFAULT_SETSOCKOPT, }, { + .descr = "setsockopt: ignore >PAGE_SIZE optlen", + .insns = { + /* write 0xFF to the first optval byte */ + + /* r6 = ctx->optval */ + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, + offsetof(struct bpf_sockopt, optval)), + /* r2 = ctx->optval */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_6), + /* r6 = ctx->optval + 1 */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + + /* r7 = ctx->optval_end */ + BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, + offsetof(struct bpf_sockopt, optval_end)), + + /* if (ctx->optval + 1 <= ctx->optval_end) { */ + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1), + /* ctx->optval[0] = 0xF0 */ + BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0), + /* } */ + + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .attach_type = BPF_CGROUP_SETSOCKOPT, + .expected_attach_type = BPF_CGROUP_SETSOCKOPT, + + .set_level = SOL_IP, + .set_optname = IP_TOS, + .set_optval = {}, + .set_optlen = PAGE_SIZE + 1, + + .get_level = SOL_IP, + .get_optname = IP_TOS, + .get_optval = {}, /* the changes are ignored */ + .get_optlen = 4, + }, + { .descr = "setsockopt: allow changing ctx->optlen within bounds", .insns = { /* r6 = ctx->optval */ @@ -906,6 +973,13 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) } if (test->set_optlen) { + if (test->set_optlen >= PAGE_SIZE) { + int num_pages = test->set_optlen / PAGE_SIZE; + int remainder = test->set_optlen % PAGE_SIZE; + + test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder; + } + err = setsockopt(sock_fd, test->set_level, test->set_optname, test->set_optval, test->set_optlen); if (err) { @@ -921,7 +995,15 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) } if (test->get_optlen) { + if (test->get_optlen >= PAGE_SIZE) { + int num_pages = test->get_optlen / PAGE_SIZE; + int remainder = test->get_optlen % PAGE_SIZE; + + test->get_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder; + } + optval = malloc(test->get_optlen); + memset(optval, 0, test->get_optlen); socklen_t optlen = test->get_optlen; socklen_t expected_get_optlen = test->get_optlen_ret ?: test->get_optlen; @@ -929,6 +1011,8 @@ static int run_test(int cgroup_fd, struct sockopt_test *test) err = getsockopt(sock_fd, test->get_level, test->get_optname, optval, &optlen); if (err) { + if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT) + goto free_optval; if (errno == EPERM && test->error == EPERM_GETSOCKOPT) goto free_optval; if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT) @@ -976,7 +1060,9 @@ void test_sockopt(void) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { - test__start_subtest(tests[i].descr); + if (!test__start_subtest(tests[i].descr)) + continue; + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c index 60c17a8e2789..917f486db826 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c @@ -2,6 +2,8 @@ #include <test_progs.h> #include "cgroup_helpers.h" +#include "sockopt_inherit.skel.h" + #define SOL_CUSTOM 0xdeadbeef #define CUSTOM_INHERIT1 0 #define CUSTOM_INHERIT2 1 @@ -132,58 +134,30 @@ static int start_server(void) return fd; } -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, - const char *prog_name) -{ - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; - - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) { - log_err("Failed to deduct types for %s BPF program", prog_name); - return -1; - } - - prog = bpf_object__find_program_by_name(obj, prog_name); - if (!prog) { - log_err("Failed to find %s BPF program", prog_name); - return -1; - } - - err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, - attach_type, 0); - if (err) { - log_err("Failed to attach %s BPF program", prog_name); - return -1; - } - - return 0; -} - static void run_test(int cgroup_fd) { + struct bpf_link *link_getsockopt = NULL; + struct bpf_link *link_setsockopt = NULL; int server_fd = -1, client_fd; - struct bpf_object *obj; + struct sockopt_inherit *obj; void *server_err; pthread_t tid; int err; - obj = bpf_object__open_file("sockopt_inherit.bpf.o", NULL); - if (!ASSERT_OK_PTR(obj, "obj_open")) + obj = sockopt_inherit__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) return; - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "obj_load")) - goto close_bpf_object; + obj->bss->page_size = sysconf(_SC_PAGESIZE); - err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt"); - if (!ASSERT_OK(err, "prog_attach _getsockopt")) + link_getsockopt = bpf_program__attach_cgroup(obj->progs._getsockopt, + cgroup_fd); + if (!ASSERT_OK_PTR(link_getsockopt, "cg-attach-getsockopt")) goto close_bpf_object; - err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt"); - if (!ASSERT_OK(err, "prog_attach _setsockopt")) + link_setsockopt = bpf_program__attach_cgroup(obj->progs._setsockopt, + cgroup_fd); + if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt")) goto close_bpf_object; server_fd = start_server(); @@ -217,7 +191,10 @@ static void run_test(int cgroup_fd) close_server_fd: close(server_fd); close_bpf_object: - bpf_object__close(obj); + bpf_link__destroy(link_getsockopt); + bpf_link__destroy(link_setsockopt); + + sockopt_inherit__destroy(obj); } void test_sockopt_inherit(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c index 7f5659349011..759bbb6f8c5f 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c @@ -2,61 +2,13 @@ #include <test_progs.h> #include "cgroup_helpers.h" -static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) -{ - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; - - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) { - log_err("Failed to deduct types for %s BPF program", title); - return -1; - } - - prog = bpf_object__find_program_by_name(obj, name); - if (!prog) { - log_err("Failed to find %s BPF program", name); - return -1; - } - - err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, - attach_type, BPF_F_ALLOW_MULTI); - if (err) { - log_err("Failed to attach %s BPF program", name); - return -1; - } - - return 0; -} +#include "sockopt_multi.skel.h" -static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name) -{ - enum bpf_attach_type attach_type; - enum bpf_prog_type prog_type; - struct bpf_program *prog; - int err; - - err = libbpf_prog_type_by_name(title, &prog_type, &attach_type); - if (err) - return -1; - - prog = bpf_object__find_program_by_name(obj, name); - if (!prog) - return -1; - - err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd, - attach_type); - if (err) - return -1; - - return 0; -} - -static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, +static int run_getsockopt_test(struct sockopt_multi *obj, int cg_parent, int cg_child, int sock_fd) { + struct bpf_link *link_parent = NULL; + struct bpf_link *link_child = NULL; socklen_t optlen; __u8 buf; int err; @@ -89,8 +41,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - child: 0x80 -> 0x90 */ - err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - if (err) + link_child = bpf_program__attach_cgroup(obj->progs._getsockopt_child, + cg_child); + if (!ASSERT_OK_PTR(link_child, "cg-attach-getsockopt_child")) goto detach; buf = 0x00; @@ -113,8 +66,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: 0x90 -> 0xA0 */ - err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); - if (err) + link_parent = bpf_program__attach_cgroup(obj->progs._getsockopt_parent, + cg_parent); + if (!ASSERT_OK_PTR(link_parent, "cg-attach-getsockopt_parent")) goto detach; buf = 0x00; @@ -157,11 +111,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, * - parent: unexpected 0x40, EPERM */ - err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - if (err) { - log_err("Failed to detach child program"); - goto detach; - } + bpf_link__destroy(link_child); + link_child = NULL; buf = 0x00; optlen = 1; @@ -198,15 +149,17 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child"); - prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent"); + bpf_link__destroy(link_child); + bpf_link__destroy(link_parent); return err; } -static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, +static int run_setsockopt_test(struct sockopt_multi *obj, int cg_parent, int cg_child, int sock_fd) { + struct bpf_link *link_parent = NULL; + struct bpf_link *link_child = NULL; socklen_t optlen; __u8 buf; int err; @@ -236,8 +189,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach child program and make sure it adds 0x10. */ - err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); - if (err) + link_child = bpf_program__attach_cgroup(obj->progs._setsockopt, + cg_child); + if (!ASSERT_OK_PTR(link_child, "cg-attach-setsockopt_child")) goto detach; buf = 0x80; @@ -263,8 +217,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, /* Attach parent program and make sure it adds another 0x10. */ - err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); - if (err) + link_parent = bpf_program__attach_cgroup(obj->progs._setsockopt, + cg_parent); + if (!ASSERT_OK_PTR(link_parent, "cg-attach-setsockopt_parent")) goto detach; buf = 0x80; @@ -289,8 +244,8 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent, } detach: - prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt"); - prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt"); + bpf_link__destroy(link_child); + bpf_link__destroy(link_parent); return err; } @@ -298,9 +253,8 @@ detach: void test_sockopt_multi(void) { int cg_parent = -1, cg_child = -1; - struct bpf_object *obj = NULL; + struct sockopt_multi *obj = NULL; int sock_fd = -1; - int err = -1; cg_parent = test__join_cgroup("/parent"); if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) @@ -310,13 +264,11 @@ void test_sockopt_multi(void) if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child")) goto out; - obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL); - if (!ASSERT_OK_PTR(obj, "obj_load")) + obj = sockopt_multi__open_and_load(); + if (!ASSERT_OK_PTR(obj, "skel-load")) goto out; - err = bpf_object__load(obj); - if (!ASSERT_OK(err, "obj_load")) - goto out; + obj->bss->page_size = sysconf(_SC_PAGESIZE); sock_fd = socket(AF_INET, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "socket")) @@ -327,7 +279,7 @@ void test_sockopt_multi(void) out: close(sock_fd); - bpf_object__close(obj); + sockopt_multi__destroy(obj); close(cg_child); close(cg_parent); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c index 6b53b3cb8dad..6b2d300e9fd4 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c @@ -42,6 +42,8 @@ void test_sockopt_qos_to_cc(void) if (!ASSERT_OK_PTR(skel, "skel")) goto done; + skel->bss->page_size = sysconf(_SC_PAGESIZE); + sock_fd = socket(AF_INET6, SOCK_STREAM, 0); if (!ASSERT_GE(sock_fd, 0, "v6 socket open")) goto done; diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c index d9270bd3d920..f29c08d93beb 100644 --- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <regex.h> #include <test_progs.h> #include <network_helpers.h> @@ -19,12 +20,16 @@ static struct { "; R1_w=map_value(off=0,ks=4,vs=4,imm=0)\n2: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mapval_preserve", - "8: (bf) r1 = r0 ; R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0) " - "R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n9: (85) call bpf_this_cpu_ptr#154\n" + "[0-9]\\+: (bf) r1 = r0 ;" + " R0_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)" + " R1_w=map_value(id=1,off=0,ks=4,vs=8,imm=0)\n" + "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_innermapval_preserve", - "13: (bf) r1 = r0 ; R0=map_value(id=2,off=0,ks=4,vs=8,imm=0) " - "R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n14: (85) call bpf_this_cpu_ptr#154\n" + "[0-9]\\+: (bf) r1 = r0 ;" + " R0=map_value(id=2,off=0,ks=4,vs=8,imm=0)" + " R1_w=map_value(id=2,off=0,ks=4,vs=8,imm=0)\n" + "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n" "R1 type=map_value expected=percpu_ptr_" }, { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" }, { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" }, @@ -45,6 +50,24 @@ static struct { { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" }, }; +static int match_regex(const char *pattern, const char *string) +{ + int err, rc; + regex_t re; + + err = regcomp(&re, pattern, REG_NOSUB); + if (err) { + char errbuf[512]; + + regerror(err, &re, errbuf, sizeof(errbuf)); + PRINT_FAIL("Can't compile regex: %s\n", errbuf); + return -1; + } + rc = regexec(&re, string, 0, NULL, 0); + regfree(&re); + return rc == 0 ? 1 : 0; +} + static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) { LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf, @@ -74,7 +97,11 @@ static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg) goto end; } - if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) { + ret = match_regex(err_msg, log_buf); + if (!ASSERT_GE(ret, 0, "match_regex")) + goto end; + + if (!ASSERT_TRUE(ret, "no match for expected error message")) { fprintf(stderr, "Expected: %s\n", err_msg); fprintf(stderr, "Verifier: %s\n", log_buf); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index 740d5f644b40..d4579f735398 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -79,6 +79,8 @@ static const char * const success_tests[] = { "test_task_from_pid_current", "test_task_from_pid_invalid", "task_kfunc_acquire_trusted_walked", + "test_task_kfunc_flavor_relo", + "test_task_kfunc_flavor_relo_not_found", }; void test_task_kfunc(void) diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c new file mode 100644 index 000000000000..4224727fb364 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */ + +#include <sys/syscall.h> +#include <test_progs.h> +#include <cgroup_helpers.h> +#include "test_task_under_cgroup.skel.h" + +#define FOO "/foo" + +void test_task_under_cgroup(void) +{ + struct test_task_under_cgroup *skel; + int ret, foo; + pid_t pid; + + foo = test__join_cgroup(FOO); + if (!ASSERT_OK(foo < 0, "cgroup_join_foo")) + return; + + skel = test_task_under_cgroup__open(); + if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open")) + goto cleanup; + + skel->rodata->local_pid = getpid(); + skel->bss->remote_pid = getpid(); + skel->rodata->cgid = get_cgroup_id(FOO); + + ret = test_task_under_cgroup__load(skel); + if (!ASSERT_OK(ret, "test_task_under_cgroup__load")) + goto cleanup; + + ret = test_task_under_cgroup__attach(skel); + if (!ASSERT_OK(ret, "test_task_under_cgroup__attach")) + goto cleanup; + + pid = fork(); + if (pid == 0) + exit(0); + + ret = (pid == -1); + if (ASSERT_OK(ret, "fork process")) + wait(NULL); + + test_task_under_cgroup__detach(skel); + + ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid, + "test task_under_cgroup"); + +cleanup: + test_task_under_cgroup__destroy(skel); + close(foo); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c index e873766276d1..48b55539331e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c @@ -3,6 +3,7 @@ #include <test_progs.h> #include <linux/pkt_cls.h> +#include "cap_helpers.h" #include "test_tc_bpf.skel.h" #define LO_IFINDEX 1 @@ -327,7 +328,7 @@ static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd) return 0; } -void test_tc_bpf(void) +void tc_bpf_root(void) { DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX, .attach_point = BPF_TC_INGRESS); @@ -393,3 +394,36 @@ end: } test_tc_bpf__destroy(skel); } + +void tc_bpf_non_root(void) +{ + struct test_tc_bpf *skel = NULL; + __u64 caps = 0; + int ret; + + /* In case CAP_BPF and CAP_PERFMON is not set */ + ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); + if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) + return; + ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); + if (!ASSERT_OK(ret, "disable_cap_sys_admin")) + goto restore_cap; + + skel = test_tc_bpf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load")) + goto restore_cap; + + test_tc_bpf__destroy(skel); + +restore_cap: + if (caps) + cap_enable_effective(caps, NULL); +} + +void test_tc_bpf(void) +{ + if (test__start_subtest("tc_bpf_root")) + tc_bpf_root(); + if (test__start_subtest("tc_bpf_non_root")) + tc_bpf_non_root(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h new file mode 100644 index 000000000000..67f985f7d215 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2023 Isovalent */ +#ifndef TC_HELPERS +#define TC_HELPERS +#include <test_progs.h> + +static inline __u32 id_from_prog_fd(int fd) +{ + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "id_from_prog_fd")) + return 0; + + ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); + return prog_info.id; +} + +static inline __u32 id_from_link_fd(int fd) +{ + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + int err; + + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); + if (!ASSERT_OK(err, "id_from_link_fd")) + return 0; + + ASSERT_NEQ(link_info.id, 0, "link_info.id"); + return link_info.id; +} + +static inline __u32 ifindex_from_link_fd(int fd) +{ + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + int err; + + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); + if (!ASSERT_OK(err, "id_from_link_fd")) + return 0; + + return link_info.tcx.ifindex; +} + +static inline void __assert_mprog_count(int target, int expected, int ifindex) +{ + __u32 count = 0, attach_flags = 0; + int err; + + err = bpf_prog_query(ifindex, target, 0, &attach_flags, + NULL, &count); + ASSERT_EQ(count, expected, "count"); + ASSERT_EQ(err, 0, "prog_query"); +} + +static inline void assert_mprog_count(int target, int expected) +{ + __assert_mprog_count(target, expected, loopback); +} + +static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected) +{ + __assert_mprog_count(target, expected, ifindex); +} + +static inline void tc_skel_reset_all_seen(struct test_tc_link *skel) +{ + memset(skel->bss, 0, sizeof(*skel->bss)); +} + +#endif /* TC_HELPERS */ diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c new file mode 100644 index 000000000000..bc9841144685 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c @@ -0,0 +1,1901 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <uapi/linux/if_link.h> +#include <uapi/linux/pkt_sched.h> +#include <net/if.h> +#include <test_progs.h> + +#define loopback 1 +#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +#include "test_tc_link.skel.h" +#include "tc_helpers.h" + +void serial_test_tc_links_basic(void) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 prog_ids[2], link_ids[2]; + __u32 pid1, pid2, lid1, lid2; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count(BPF_TCX_INGRESS, 0); + assert_mprog_count(BPF_TCX_EGRESS, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(BPF_TCX_INGRESS, 1); + assert_mprog_count(BPF_TCX_EGRESS, 0); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + ASSERT_NEQ(lid1, lid2, "link_ids_1_2"); + + assert_mprog_count(BPF_TCX_INGRESS, 1); + assert_mprog_count(BPF_TCX_EGRESS, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count(BPF_TCX_INGRESS, 0); + assert_mprog_count(BPF_TCX_EGRESS, 0); +} + +static void test_tc_links_before_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 prog_ids[5], link_ids[5]; + __u32 pid1, pid2, pid3, pid4; + __u32 lid1, lid2, lid3, lid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + .relative_fd = bpf_program__fd(skel->progs.tc2), + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3)); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_id = lid1, + ); + + link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4)); + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], pid2, "prog_ids[3]"); + ASSERT_EQ(optq.link_ids[3], lid2, "link_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_before(void) +{ + test_tc_links_before_target(BPF_TCX_INGRESS); + test_tc_links_before_target(BPF_TCX_EGRESS); +} + +static void test_tc_links_after_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 prog_ids[5], link_ids[5]; + __u32 pid1, pid2, pid3, pid4; + __u32 lid1, lid2, lid3, lid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3)); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_link__fd(skel->links.tc2), + ); + + link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4)); + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]"); + ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_after(void) +{ + test_tc_links_after_target(BPF_TCX_INGRESS); + test_tc_links_after_target(BPF_TCX_EGRESS); +} + +static void test_tc_links_revision_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 prog_ids[3], link_ids[3]; + __u32 pid1, pid2, lid1, lid2; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count(target, 0); + + optl.expected_revision = 1; + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + optl.expected_revision = 1; + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 1); + + optl.expected_revision = 2; + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_revision(void) +{ + test_tc_links_revision_target(BPF_TCX_INGRESS); + test_tc_links_revision_target(BPF_TCX_EGRESS); +} + +static void test_tc_chain_classic(int target, bool chain_tc_old) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + bool hook_created = false, tc_attached = false; + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + if (chain_tc_old) { + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + if (err == 0) + hook_created = true; + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + tc_attached = true; + } + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count(target, 2); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); + + err = bpf_link__detach(skel->links.tc2); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); +cleanup: + if (tc_attached) { + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + } + if (hook_created) { + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + } + assert_mprog_count(target, 1); + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_chain_classic(void) +{ + test_tc_chain_classic(BPF_TCX_INGRESS, false); + test_tc_chain_classic(BPF_TCX_EGRESS, false); + test_tc_chain_classic(BPF_TCX_INGRESS, true); + test_tc_chain_classic(BPF_TCX_EGRESS, true); +} + +static void test_tc_links_replace_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3, lid1, lid2; + __u32 prog_ids[4], link_ids[4]; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + optl.expected_revision = 1; + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + .relative_id = pid1, + .expected_revision = 2, + ); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_REPLACE, + .relative_fd = bpf_program__fd(skel->progs.tc2), + .expected_revision = 3, + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_REPLACE | BPF_F_LINK, + .relative_fd = bpf_link__fd(skel->links.tc2), + .expected_revision = 3, + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_REPLACE | BPF_F_LINK | BPF_F_AFTER, + .relative_id = lid2, + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 2); + + err = bpf_link__update_program(skel->links.tc2, skel->progs.tc3); + if (!ASSERT_OK(err, "link_update")) + goto cleanup; + + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 4, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + + err = bpf_link__detach(skel->links.tc2); + if (!ASSERT_OK(err, "link_detach")) + goto cleanup; + + assert_mprog_count(target, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + + err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1); + if (!ASSERT_OK(err, "link_update_self")) + goto cleanup; + + assert_mprog_count(target, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_replace(void) +{ + test_tc_links_replace_target(BPF_TCX_INGRESS); + test_tc_links_replace_target(BPF_TCX_EGRESS); +} + +static void test_tc_links_invalid_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, lid1; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count(target, 0); + + optl.flags = BPF_F_BEFORE | BPF_F_AFTER; + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_ID, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER | BPF_F_ID, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_ID, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_LINK, + .relative_fd = bpf_program__fd(skel->progs.tc2), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_LINK, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .relative_fd = bpf_program__fd(skel->progs.tc2), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_AFTER, + .relative_fd = bpf_program__fd(skel->progs.tc2), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_ID, + .relative_id = pid2, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_ID, + .relative_id = 42, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl); + + link = bpf_program__attach_tcx(skel->progs.tc1, 0, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optl); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_program__fd(skel->progs.tc1), + ); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID, + .relative_id = ~0, + ); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID, + .relative_id = lid1, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_ID, + .relative_id = pid1, + ); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) { + bpf_link__destroy(link); + goto cleanup; + } + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID, + .relative_id = lid1, + ); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count(target, 2); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_invalid(void) +{ + test_tc_links_invalid_target(BPF_TCX_INGRESS); + test_tc_links_invalid_target(BPF_TCX_EGRESS); +} + +static void test_tc_links_prepend_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 prog_ids[5], link_ids[5]; + __u32 pid1, pid2, pid3, pid4; + __u32 lid1, lid2, lid3, lid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + ); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3)); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_BEFORE, + ); + + link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4)); + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], pid1, "prog_ids[3]"); + ASSERT_EQ(optq.link_ids[3], lid1, "link_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_prepend(void) +{ + test_tc_links_prepend_target(BPF_TCX_INGRESS); + test_tc_links_prepend_target(BPF_TCX_EGRESS); +} + +static void test_tc_links_append_target(int target) +{ + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 prog_ids[5], link_ids[5]; + __u32 pid1, pid2, pid3, pid4; + __u32 lid1, lid2, lid3, lid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1)); + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER, + ); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + optq.link_ids = link_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER, + ); + + link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3)); + + LIBBPF_OPTS_RESET(optl, + .flags = BPF_F_AFTER, + ); + + link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4)); + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(link_ids, 0, sizeof(link_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]"); + ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]"); + ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]"); + ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]"); + ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_append(void) +{ + test_tc_links_append_target(BPF_TCX_INGRESS); + test_tc_links_append_target(BPF_TCX_EGRESS); +} + +static void test_tc_links_dev_cleanup_target(int target) +{ + LIBBPF_OPTS(bpf_tcx_opts, optl); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 pid1, pid2, pid3, pid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + assert_mprog_count_ifindex(ifindex, target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count_ifindex(ifindex, target, 2); + + link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + assert_mprog_count_ifindex(ifindex, target, 3); + + link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + assert_mprog_count_ifindex(ifindex, target, 4); + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); + + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex"); + + test_tc_link__destroy(skel); + return; +cleanup: + test_tc_link__destroy(skel); + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_links_dev_cleanup(void) +{ + test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS); + test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS); +} + +static void test_tc_chain_mixed(int target) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_tcx_opts, optl); + struct test_tc_link *skel; + struct bpf_link *link; + __u32 pid1, pid2, pid3; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc5, target), + 0, "tc5_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc6, target), + 0, "tc6_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc5)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc6)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + + link = bpf_program__attach_tcx(skel->progs.tc6, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc6 = link; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); + + err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4); + if (!ASSERT_OK(err, "link_update")) + goto cleanup; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + + err = bpf_link__detach(skel->links.tc6); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + assert_mprog_count(target, 0); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + +cleanup: + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + + test_tc_link__destroy(skel); +} + +void serial_test_tc_links_chain_mixed(void) +{ + test_tc_chain_mixed(BPF_TCX_INGRESS); + test_tc_chain_mixed(BPF_TCX_EGRESS); +} + +static void test_tc_links_ingress(int target, bool chain_tc_old, + bool tcx_teardown_first) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, + .handle = 1, + .priority = 1, + ); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .ifindex = loopback, + .attach_point = BPF_TC_CUSTOM, + .parent = TC_H_INGRESS, + ); + bool hook_created = false, tc_attached = false; + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3; + struct test_tc_link *skel; + struct bpf_link *link; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + if (chain_tc_old) { + ASSERT_OK(system("tc qdisc add dev lo ingress"), "add_ingress"); + hook_created = true; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + tc_attached = true; + } + + link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count(target, 2); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); + + err = bpf_link__detach(skel->links.tc2); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); +cleanup: + if (tc_attached) { + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + } + ASSERT_OK(system(ping_cmd), ping_cmd); + assert_mprog_count(target, 1); + if (hook_created && tcx_teardown_first) + ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress"); + ASSERT_OK(system(ping_cmd), ping_cmd); + test_tc_link__destroy(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + if (hook_created && !tcx_teardown_first) + ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress"); + ASSERT_OK(system(ping_cmd), ping_cmd); + assert_mprog_count(target, 0); +} + +void serial_test_tc_links_ingress(void) +{ + test_tc_links_ingress(BPF_TCX_INGRESS, true, true); + test_tc_links_ingress(BPF_TCX_INGRESS, true, false); + test_tc_links_ingress(BPF_TCX_INGRESS, false, false); +} + +static void test_tc_links_dev_mixed(int target) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3, pid4; + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc1 = link; + + assert_mprog_count_ifindex(ifindex, target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc2 = link; + + assert_mprog_count_ifindex(ifindex, target, 2); + + link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc3 = link; + + assert_mprog_count_ifindex(ifindex, target, 3); + + link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc4 = link; + + assert_mprog_count_ifindex(ifindex, target, 4); + + tc_hook.ifindex = ifindex; + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + + err = bpf_tc_hook_create(&tc_hook); + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5); + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); + + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex"); + ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex"); + + test_tc_link__destroy(skel); + return; +cleanup: + test_tc_link__destroy(skel); + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_links_dev_mixed(void) +{ + test_tc_links_dev_mixed(BPF_TCX_INGRESS); + test_tc_links_dev_mixed(BPF_TCX_EGRESS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c new file mode 100644 index 000000000000..ca506d2fcf58 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c @@ -0,0 +1,2689 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <uapi/linux/if_link.h> +#include <net/if.h> +#include <test_progs.h> + +#define loopback 1 +#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +#include "test_tc_link.skel.h" +#include "tc_helpers.h" + +void serial_test_tc_opts_basic(void) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, id1, id2; + struct test_tc_link *skel; + __u32 prog_ids[2]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + + assert_mprog_count(BPF_TCX_INGRESS, 0); + assert_mprog_count(BPF_TCX_EGRESS, 0); + + ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + err = bpf_prog_attach_opts(fd1, loopback, BPF_TCX_INGRESS, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(BPF_TCX_INGRESS, 1); + assert_mprog_count(BPF_TCX_EGRESS, 0); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_in; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + + err = bpf_prog_attach_opts(fd2, loopback, BPF_TCX_EGRESS, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_in; + + assert_mprog_count(BPF_TCX_INGRESS, 1); + assert_mprog_count(BPF_TCX_EGRESS, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_eg; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + +cleanup_eg: + err = bpf_prog_detach_opts(fd2, loopback, BPF_TCX_EGRESS, &optd); + ASSERT_OK(err, "prog_detach_eg"); + + assert_mprog_count(BPF_TCX_INGRESS, 1); + assert_mprog_count(BPF_TCX_EGRESS, 0); + +cleanup_in: + err = bpf_prog_detach_opts(fd1, loopback, BPF_TCX_INGRESS, &optd); + ASSERT_OK(err, "prog_detach_in"); + + assert_mprog_count(BPF_TCX_INGRESS, 0); + assert_mprog_count(BPF_TCX_EGRESS, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +static void test_tc_opts_before_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target; + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_fd = fd2, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target2; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target3; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 4, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_id = id1, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target3; + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + +cleanup_target4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup_target3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup_target2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup_target: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_before(void) +{ + test_tc_opts_before_target(BPF_TCX_INGRESS); + test_tc_opts_before_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_after_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target; + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target2; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target3; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 4, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target3; + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + +cleanup_target4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target3; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 6, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + +cleanup_target3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 7, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + +cleanup_target2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 8, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + +cleanup_target: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_after(void) +{ + test_tc_opts_after_target(BPF_TCX_INGRESS); + test_tc_opts_after_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_revision_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, id1, id2; + struct test_tc_link *skel; + __u32 prog_ids[3]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, -ESTALE, "prog_attach")) + goto cleanup_target; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target; + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + + LIBBPF_OPTS_RESET(optd, + .expected_revision = 2, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_EQ(err, -ESTALE, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup_target2: + LIBBPF_OPTS_RESET(optd, + .expected_revision = 3, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup_target: + LIBBPF_OPTS_RESET(optd); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_revision(void) +{ + test_tc_opts_revision_target(BPF_TCX_INGRESS); + test_tc_opts_revision_target(BPF_TCX_EGRESS); +} + +static void test_tc_chain_classic(int target, bool chain_tc_old) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + bool hook_created = false, tc_attached = false; + __u32 fd1, fd2, fd3, id1, id2, id3; + struct test_tc_link *skel; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + if (chain_tc_old) { + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + if (err == 0) + hook_created = true; + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = fd3; + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup; + tc_attached = true; + } + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_detach; + + assert_mprog_count(target, 2); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup_detach; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3"); + +cleanup_detach: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + if (!ASSERT_OK(err, "prog_detach")) + goto cleanup; + + assert_mprog_count(target, 0); +cleanup: + if (tc_attached) { + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + } + if (hook_created) { + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + } + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_opts_chain_classic(void) +{ + test_tc_chain_classic(BPF_TCX_INGRESS, false); + test_tc_chain_classic(BPF_TCX_EGRESS, false); + test_tc_chain_classic(BPF_TCX_INGRESS, true); + test_tc_chain_classic(BPF_TCX_EGRESS, true); +} + +static void test_tc_opts_replace_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, id1, id2, id3, detach_fd; + __u32 prog_ids[4], prog_flags[4]; + struct test_tc_link *skel; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_id = id1, + .expected_revision = 2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target; + + detach_fd = fd2; + + assert_mprog_count(target, 2); + + optq.prog_attach_flags = prog_flags; + optq.prog_ids = prog_ids; + + memset(prog_flags, 0, sizeof(prog_flags)); + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]"); + ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]"); + ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = fd2, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target2; + + detach_fd = fd3; + + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 4, "revision"); + ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE | BPF_F_BEFORE, + .replace_prog_fd = fd3, + .relative_fd = fd1, + .expected_revision = 4, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target2; + + detach_fd = fd2; + + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = fd2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE | BPF_F_AFTER, + .replace_prog_fd = fd2, + .relative_fd = fd1, + .expected_revision = 5, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + ASSERT_EQ(err, -ERANGE, "prog_attach"); + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_AFTER | BPF_F_REPLACE, + .replace_prog_fd = fd2, + .relative_fd = fd1, + .expected_revision = 5, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + ASSERT_EQ(err, -ERANGE, "prog_attach"); + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_id = id1, + .expected_revision = 5, + ); + +cleanup_target2: + err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup_target: + LIBBPF_OPTS_RESET(optd); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_replace(void) +{ + test_tc_opts_replace_target(BPF_TCX_INGRESS); + test_tc_opts_replace_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_invalid_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, id1, id2; + struct test_tc_link *skel; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -ERANGE, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .relative_fd = fd2, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_AFTER, + .relative_fd = fd2, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ID, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach_x1"); + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + assert_mprog_count(target, 1); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_invalid(void) +{ + test_tc_opts_invalid_target(BPF_TCX_INGRESS); + test_tc_opts_invalid_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_prepend_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target; + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target2; + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target3; + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + +cleanup_target4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup_target3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup_target2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup_target: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_prepend(void) +{ + test_tc_opts_prepend_target(BPF_TCX_INGRESS); + test_tc_opts_prepend_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_append_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target; + + assert_mprog_count(target, 2); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target2; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 3, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target2; + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_target3; + + assert_mprog_count(target, 4); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup_target4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1"); + ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2"); + ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3"); + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + +cleanup_target4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup_target3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup_target2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup_target: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_append(void) +{ + test_tc_opts_append_target(BPF_TCX_INGRESS); + test_tc_opts_append_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_dev_cleanup_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + int err, ifindex; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + err = bpf_prog_attach_opts(fd1, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count_ifindex(ifindex, target, 1); + + err = bpf_prog_attach_opts(fd2, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count_ifindex(ifindex, target, 2); + + err = bpf_prog_attach_opts(fd3, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count_ifindex(ifindex, target, 3); + + err = bpf_prog_attach_opts(fd4, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count_ifindex(ifindex, target, 4); + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); + return; +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count_ifindex(ifindex, target, 2); +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count_ifindex(ifindex, target, 1); +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count_ifindex(ifindex, target, 0); +cleanup: + test_tc_link__destroy(skel); + + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_opts_dev_cleanup(void) +{ + test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS); + test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_mixed_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + LIBBPF_OPTS(bpf_tcx_opts, optl); + __u32 pid1, pid2, pid3, pid4, lid2, lid4; + __u32 prog_flags[4], link_flags[4]; + __u32 prog_ids[4], link_ids[4]; + struct test_tc_link *skel; + struct bpf_link *link; + int err, detach_fd; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target), + 0, "tc3_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target), + 0, "tc4_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3)); + pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4)); + + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + ASSERT_NEQ(pid3, pid4, "prog_ids_3_4"); + ASSERT_NEQ(pid2, pid3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1), + loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + detach_fd = bpf_program__fd(skel->progs.tc1); + + assert_mprog_count(target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup1; + skel->links.tc2 = link; + + lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2)); + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = bpf_program__fd(skel->progs.tc1), + ); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2), + loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = bpf_program__fd(skel->progs.tc2), + ); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1), + loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = bpf_program__fd(skel->progs.tc2), + ); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3), + loopback, target, &opta); + ASSERT_EQ(err, -EBUSY, "prog_attach"); + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = bpf_program__fd(skel->progs.tc1), + ); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3), + loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + detach_fd = bpf_program__fd(skel->progs.tc3); + + assert_mprog_count(target, 2); + + link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup1; + skel->links.tc4 = link; + + lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4)); + + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = bpf_program__fd(skel->progs.tc4), + ); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2), + loopback, target, &opta); + ASSERT_EQ(err, -EEXIST, "prog_attach"); + + optq.prog_ids = prog_ids; + optq.prog_attach_flags = prog_flags; + optq.link_ids = link_ids; + optq.link_attach_flags = link_flags; + + memset(prog_ids, 0, sizeof(prog_ids)); + memset(prog_flags, 0, sizeof(prog_flags)); + memset(link_ids, 0, sizeof(link_ids)); + memset(link_flags, 0, sizeof(link_flags)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup1; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]"); + ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]"); + ASSERT_EQ(optq.link_ids[0], 0, "link_ids[0]"); + ASSERT_EQ(optq.link_attach_flags[0], 0, "link_flags[0]"); + ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]"); + ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]"); + ASSERT_EQ(optq.link_attach_flags[1], 0, "link_flags[1]"); + ASSERT_EQ(optq.prog_ids[2], pid4, "prog_ids[2]"); + ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]"); + ASSERT_EQ(optq.link_ids[2], lid4, "link_ids[2]"); + ASSERT_EQ(optq.link_attach_flags[2], 0, "link_flags[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + ASSERT_EQ(optq.prog_attach_flags[3], 0, "prog_flags[3]"); + ASSERT_EQ(optq.link_ids[3], 0, "link_ids[3]"); + ASSERT_EQ(optq.link_attach_flags[3], 0, "link_flags[3]"); + + ASSERT_OK(system(ping_cmd), ping_cmd); + +cleanup1: + err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_opts_mixed(void) +{ + test_tc_opts_mixed_target(BPF_TCX_INGRESS); + test_tc_opts_mixed_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_demixed_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_tcx_opts, optl); + struct test_tc_link *skel; + struct bpf_link *link; + __u32 pid1, pid2; + int err; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target), + 0, "tc1_attach_type"); + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target), + 0, "tc2_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1)); + pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2)); + ASSERT_NEQ(pid1, pid2, "prog_ids_1_2"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1), + loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup1; + skel->links.tc2 = link; + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_EQ(err, -EBUSY, "prog_detach"); + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 1); + goto cleanup; + +cleanup1: + err = bpf_prog_detach_opts(bpf_program__fd(skel->progs.tc1), + loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup: + test_tc_link__destroy(skel); + assert_mprog_count(target, 0); +} + +void serial_test_tc_opts_demixed(void) +{ + test_tc_opts_demixed_target(BPF_TCX_INGRESS); + test_tc_opts_demixed_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_detach_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 3); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 6, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 7, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + LIBBPF_OPTS_RESET(optd); + + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + goto cleanup; + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_detach(void) +{ + test_tc_opts_detach_target(BPF_TCX_INGRESS); + test_tc_opts_detach_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_detach_before_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_fd = fd2, + ); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 3); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 6, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_fd = fd2, + ); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_fd = fd4, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_EQ(err, -ERANGE, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_fd = fd3, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 7, "revision"); + ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + .relative_fd = fd4, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 8, "revision"); + ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_BEFORE, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 0); + goto cleanup; + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_detach_before(void) +{ + test_tc_opts_detach_before_target(BPF_TCX_INGRESS); + test_tc_opts_detach_before_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_detach_after_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id3, id4, "prog_ids_3_4"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + optq.prog_ids = prog_ids; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 3); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 3, "count"); + ASSERT_EQ(optq.revision, 6, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd4, + ); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_EQ(err, -ERANGE, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd3, + ); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_EQ(err, -ERANGE, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_EQ(err, -ERANGE, "prog_detach"); + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 2); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 2, "count"); + ASSERT_EQ(optq.revision, 7, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 1); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 8, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + + LIBBPF_OPTS_RESET(optd, + .flags = BPF_F_AFTER, + ); + + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + + assert_mprog_count(target, 0); + goto cleanup; + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_detach_after(void) +{ + test_tc_opts_detach_after_target(BPF_TCX_INGRESS); + test_tc_opts_detach_after_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_delete_empty(int target, bool chain_tc_old) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + int err; + + assert_mprog_count(target, 0); + if (chain_tc_old) { + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + ASSERT_OK(err, "bpf_tc_hook_create"); + assert_mprog_count(target, 0); + } + err = bpf_prog_detach_opts(0, loopback, target, &optd); + ASSERT_EQ(err, -ENOENT, "prog_detach"); + if (chain_tc_old) { + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + } + assert_mprog_count(target, 0); +} + +void serial_test_tc_opts_delete_empty(void) +{ + test_tc_opts_delete_empty(BPF_TCX_INGRESS, false); + test_tc_opts_delete_empty(BPF_TCX_EGRESS, false); + test_tc_opts_delete_empty(BPF_TCX_INGRESS, true); + test_tc_opts_delete_empty(BPF_TCX_EGRESS, true); +} + +static void test_tc_chain_mixed(int target) +{ + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, id1, id2, id3; + struct test_tc_link *skel; + int err, detach_fd; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc4); + fd2 = bpf_program__fd(skel->progs.tc5); + fd3 = bpf_program__fd(skel->progs.tc6); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + + ASSERT_NEQ(id1, id2, "prog_ids_1_2"); + ASSERT_NEQ(id2, id3, "prog_ids_2_3"); + + assert_mprog_count(target, 0); + + tc_hook.attach_point = target == BPF_TCX_INGRESS ? + BPF_TC_INGRESS : BPF_TC_EGRESS; + err = bpf_tc_hook_create(&tc_hook); + err = err == -EEXIST ? 0 : err; + if (!ASSERT_OK(err, "bpf_tc_hook_create")) + goto cleanup; + + tc_opts.prog_fd = fd2; + err = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(err, "bpf_tc_attach")) + goto cleanup_hook; + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_filter; + + detach_fd = fd3; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6"); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_REPLACE, + .replace_prog_fd = fd3, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup_opts; + + detach_fd = fd1; + + assert_mprog_count(target, 1); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + +cleanup_opts: + err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + + tc_skel_reset_all_seen(skel); + ASSERT_OK(system(ping_cmd), ping_cmd); + + ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4"); + ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5"); + ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6"); + +cleanup_filter: + tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0; + err = bpf_tc_detach(&tc_hook, &tc_opts); + ASSERT_OK(err, "bpf_tc_detach"); + +cleanup_hook: + tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS; + bpf_tc_hook_destroy(&tc_hook); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_chain_mixed(void) +{ + test_tc_chain_mixed(BPF_TCX_INGRESS); + test_tc_chain_mixed(BPF_TCX_EGRESS); +} + +static int generate_dummy_prog(void) +{ + const struct bpf_insn prog_insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn); + LIBBPF_OPTS(bpf_prog_load_opts, opts); + const size_t log_buf_sz = 256; + char *log_buf; + int fd = -1; + + log_buf = malloc(log_buf_sz); + if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc")) + return fd; + opts.log_buf = log_buf; + opts.log_size = log_buf_sz; + + log_buf[0] = '\0'; + opts.log_level = 0; + fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, "tcx_prog", "GPL", + prog_insns, prog_insn_cnt, &opts); + ASSERT_STREQ(log_buf, "", "log_0"); + ASSERT_GE(fd, 0, "prog_fd"); + free(log_buf); + return fd; +} + +static void test_tc_opts_max_target(int target, int flags, bool relative) +{ + int err, ifindex, i, prog_fd, last_fd = -1; + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + const int max_progs = 63; + + ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth"); + ifindex = if_nametoindex("tcx_opts1"); + ASSERT_NEQ(ifindex, 0, "non_zero_ifindex"); + + assert_mprog_count_ifindex(ifindex, target, 0); + + for (i = 0; i < max_progs; i++) { + prog_fd = generate_dummy_prog(); + if (!ASSERT_GE(prog_fd, 0, "dummy_prog")) + goto cleanup; + err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count_ifindex(ifindex, target, i + 1); + if (i == max_progs - 1 && relative) + last_fd = prog_fd; + else + close(prog_fd); + } + + prog_fd = generate_dummy_prog(); + if (!ASSERT_GE(prog_fd, 0, "dummy_prog")) + goto cleanup; + opta.flags = flags; + if (last_fd > 0) + opta.relative_fd = last_fd; + err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta); + ASSERT_EQ(err, -ERANGE, "prog_64_attach"); + assert_mprog_count_ifindex(ifindex, target, max_progs); + close(prog_fd); +cleanup: + if (last_fd > 0) + close(last_fd); + ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth"); + ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed"); + ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed"); +} + +void serial_test_tc_opts_max(void) +{ + test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false); + test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false); + + test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_BEFORE, false); + test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_BEFORE, true); + + test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true); + test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false); +} + +static void test_tc_opts_query_target(int target) +{ + const size_t attr_size = offsetofend(union bpf_attr, query); + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct test_tc_link *skel; + union bpf_attr attr; + __u32 prog_ids[5]; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + fd2 = bpf_program__fd(skel->progs.tc2); + fd3 = bpf_program__fd(skel->progs.tc3); + fd4 = bpf_program__fd(skel->progs.tc4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(target, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(target, 1); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 2, + ); + + err = bpf_prog_attach_opts(fd2, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(target, 2); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(target, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + err = bpf_prog_attach_opts(fd4, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(target, 4); + + /* Test 1: Double query via libbpf API */ + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids, NULL, "prog_ids"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + + /* Test 2: Double query via bpf_attr & bpf(2) directly */ + memset(&attr, 0, attr_size); + attr.query.target_ifindex = loopback; + attr.query.attach_type = target; + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + + memset(prog_ids, 0, sizeof(prog_ids)); + attr.query.prog_ids = ptr_to_u64(prog_ids); + + err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(attr.query.count, 4, "count"); + ASSERT_EQ(attr.query.revision, 5, "revision"); + ASSERT_EQ(attr.query.query_flags, 0, "query_flags"); + ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags"); + ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex"); + ASSERT_EQ(attr.query.attach_type, target, "attach_type"); + ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids"); + ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]"); + ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]"); + ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags"); + ASSERT_EQ(attr.query.link_ids, 0, "link_ids"); + ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags"); + +cleanup4: + err = bpf_prog_detach_opts(fd4, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); + +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query(void) +{ + test_tc_opts_query_target(BPF_TCX_INGRESS); + test_tc_opts_query_target(BPF_TCX_EGRESS); +} + +static void test_tc_opts_query_attach_target(int target) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct test_tc_link *skel; + __u32 prog_ids[2]; + __u32 fd1, id1; + int err; + + skel = test_tc_link__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.tc1); + id1 = id_from_prog_fd(fd1); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup; + + ASSERT_EQ(optq.count, 0, "count"); + ASSERT_EQ(optq.revision, 1, "revision"); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = optq.revision, + ); + + err = bpf_prog_attach_opts(fd1, loopback, target, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + optq.count = ARRAY_SIZE(prog_ids); + + err = bpf_prog_query_opts(loopback, target, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup1; + + ASSERT_EQ(optq.count, 1, "count"); + ASSERT_EQ(optq.revision, 2, "revision"); + ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]"); + +cleanup1: + err = bpf_prog_detach_opts(fd1, loopback, target, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(target, 0); +cleanup: + test_tc_link__destroy(skel); +} + +void serial_test_tc_opts_query_attach(void) +{ + test_tc_opts_query_attach_target(BPF_TCX_INGRESS); + test_tc_opts_query_attach_target(BPF_TCX_EGRESS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c index 13bcaeb028b8..56685fc03c7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -347,7 +347,7 @@ static void syncookie_estab(void) exp_active_estab_in.max_delack_ms = 22; exp_passive_hdr_stg.syncookie = true; - exp_active_hdr_stg.resend_syn = true, + exp_active_hdr_stg.resend_syn = true; prepare_out(); diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c new file mode 100644 index 000000000000..0cca4e8ae38e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <sched.h> +#include <pthread.h> +#include <stdbool.h> +#include <bpf/btf.h> +#include <test_progs.h> + +#include "test_bpf_ma.skel.h" + +void test_test_bpf_ma(void) +{ + struct test_bpf_ma *skel; + struct btf *btf; + int i, err; + + skel = test_bpf_ma__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + btf = bpf_object__btf(skel->obj); + if (!ASSERT_OK_PTR(btf, "btf")) + goto out; + + for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) { + char name[32]; + int id; + + snprintf(name, sizeof(name), "bin_data_%u", skel->rodata->data_sizes[i]); + id = btf__find_by_name_kind(btf, name, BTF_KIND_STRUCT); + if (!ASSERT_GT(id, 0, "bin_data")) + goto out; + skel->rodata->data_btf_ids[i] = id; + } + + err = test_bpf_ma__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + err = test_bpf_ma__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_OK(skel->bss->err, "test error"); +out: + test_bpf_ma__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c b/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c new file mode 100644 index 000000000000..375677c19146 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates.*/ + +#include <test_progs.h> +#include <network_helpers.h> +#include "test_ldsx_insn.skel.h" + +static void test_map_val_and_probed_memory(void) +{ + struct test_ldsx_insn *skel; + int err; + + skel = test_ldsx_insn__open(); + if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open")) + return; + + if (skel->rodata->skip) { + test__skip(); + goto out; + } + + bpf_program__set_autoload(skel->progs.rdonly_map_prog, true); + bpf_program__set_autoload(skel->progs.map_val_prog, true); + bpf_program__set_autoload(skel->progs.test_ptr_struct_arg, true); + + err = test_ldsx_insn__load(skel); + if (!ASSERT_OK(err, "test_ldsx_insn__load")) + goto out; + + err = test_ldsx_insn__attach(skel); + if (!ASSERT_OK(err, "test_ldsx_insn__attach")) + goto out; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->done1, 1, "done1"); + ASSERT_EQ(skel->bss->ret1, 1, "ret1"); + ASSERT_EQ(skel->bss->done2, 1, "done2"); + ASSERT_EQ(skel->bss->ret2, 1, "ret2"); + ASSERT_EQ(skel->bss->int_member, -1, "int_member"); + +out: + test_ldsx_insn__destroy(skel); +} + +static void test_ctx_member_sign_ext(void) +{ + struct test_ldsx_insn *skel; + int err, fd, cgroup_fd; + char buf[16] = {0}; + socklen_t optlen; + + cgroup_fd = test__join_cgroup("/ldsx_test"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /ldsx_test")) + return; + + skel = test_ldsx_insn__open(); + if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open")) + goto close_cgroup_fd; + + if (skel->rodata->skip) { + test__skip(); + goto destroy_skel; + } + + bpf_program__set_autoload(skel->progs._getsockopt, true); + + err = test_ldsx_insn__load(skel); + if (!ASSERT_OK(err, "test_ldsx_insn__load")) + goto destroy_skel; + + skel->links._getsockopt = + bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link")) + goto destroy_skel; + + fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(fd, 0, "socket")) + goto destroy_skel; + + optlen = sizeof(buf); + (void)getsockopt(fd, SOL_IP, IP_TTL, buf, &optlen); + + ASSERT_EQ(skel->bss->set_optlen, -1, "optlen"); + ASSERT_EQ(skel->bss->set_retval, -1, "retval"); + + close(fd); +destroy_skel: + test_ldsx_insn__destroy(skel); +close_cgroup_fd: + close(cgroup_fd); +} + +static void test_ctx_member_narrow_sign_ext(void) +{ + struct test_ldsx_insn *skel; + struct __sk_buff skb = {}; + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + int err, prog_fd; + + skel = test_ldsx_insn__open(); + if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open")) + return; + + if (skel->rodata->skip) { + test__skip(); + goto out; + } + + bpf_program__set_autoload(skel->progs._tc, true); + + err = test_ldsx_insn__load(skel); + if (!ASSERT_OK(err, "test_ldsx_insn__load")) + goto out; + + prog_fd = bpf_program__fd(skel->progs._tc); + err = bpf_prog_test_run_opts(prog_fd, &topts); + ASSERT_OK(err, "test_run"); + + ASSERT_EQ(skel->bss->set_mark, -2, "set_mark"); + +out: + test_ldsx_insn__destroy(skel); +} + +void test_ldsx_insn(void) +{ + if (test__start_subtest("map_val and probed_memory")) + test_map_val_and_probed_memory(); + if (test__start_subtest("ctx_member_sign_ext")) + test_ctx_member_sign_ext(); + if (test__start_subtest("ctx_member_narrow_sign_ext")) + test_ctx_member_narrow_sign_ext(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c index 290c21dbe65a..ce2c61d62fc6 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer.c +++ b/tools/testing/selftests/bpf/prog_tests/timer.c @@ -2,6 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <test_progs.h> #include "timer.skel.h" +#include "timer_failure.skel.h" static int timer(struct timer *timer_skel) { @@ -49,10 +50,11 @@ void serial_test_timer(void) timer_skel = timer__open_and_load(); if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load")) - goto cleanup; + return; err = timer(timer_skel); ASSERT_OK(err, "timer"); -cleanup: timer__destroy(timer_skel); + + RUN_TESTS(timer_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c index 1c75a32186d6..fe0fb0c9849a 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c @@ -55,6 +55,25 @@ static void test_fentry(void) ASSERT_EQ(skel->bss->t6, 1, "t6 ret"); + ASSERT_EQ(skel->bss->t7_a, 16, "t7:a"); + ASSERT_EQ(skel->bss->t7_b, 17, "t7:b"); + ASSERT_EQ(skel->bss->t7_c, 18, "t7:c"); + ASSERT_EQ(skel->bss->t7_d, 19, "t7:d"); + ASSERT_EQ(skel->bss->t7_e, 20, "t7:e"); + ASSERT_EQ(skel->bss->t7_f_a, 21, "t7:f.a"); + ASSERT_EQ(skel->bss->t7_f_b, 22, "t7:f.b"); + ASSERT_EQ(skel->bss->t7_ret, 133, "t7 ret"); + + ASSERT_EQ(skel->bss->t8_a, 16, "t8:a"); + ASSERT_EQ(skel->bss->t8_b, 17, "t8:b"); + ASSERT_EQ(skel->bss->t8_c, 18, "t8:c"); + ASSERT_EQ(skel->bss->t8_d, 19, "t8:d"); + ASSERT_EQ(skel->bss->t8_e, 20, "t8:e"); + ASSERT_EQ(skel->bss->t8_f_a, 21, "t8:f.a"); + ASSERT_EQ(skel->bss->t8_f_b, 22, "t8:f.b"); + ASSERT_EQ(skel->bss->t8_g, 23, "t8:g"); + ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret"); + tracing_struct__detach(skel); destroy_skel: tracing_struct__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c index e91d0d1769f1..6cd7349d4a2b 100644 --- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c +++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c @@ -88,8 +88,8 @@ void serial_test_trampoline_count(void) if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) goto cleanup; - ASSERT_EQ(opts.retval & 0xffff, 4, "bpf_modify_return_test.result"); - ASSERT_EQ(opts.retval >> 16, 1, "bpf_modify_return_test.side_effect"); + ASSERT_EQ(opts.retval & 0xffff, 33, "bpf_modify_return_test.result"); + ASSERT_EQ(opts.retval >> 16, 2, "bpf_modify_return_test.side_effect"); cleanup: for (; i >= 0; i--) { diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c index 8383a99f610f..0adf8d9475cb 100644 --- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c +++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c @@ -171,7 +171,11 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s prog_insns, prog_insn_cnt, &load_opts), -EPERM, "prog_load_fails"); - for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++) + /* some map types require particular correct parameters which could be + * sanity-checked before enforcing -EPERM, so only validate that + * the simple ARRAY and HASH maps are failing with -EPERM + */ + for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++) ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL), -EPERM, "map_create_fails"); diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c new file mode 100644 index 000000000000..cd051d3901a9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <unistd.h> +#include <test_progs.h> +#include "uprobe_multi.skel.h" +#include "uprobe_multi_bench.skel.h" +#include "uprobe_multi_usdt.skel.h" +#include "bpf/libbpf_internal.h" +#include "testing_helpers.h" + +static char test_data[] = "test_data"; + +noinline void uprobe_multi_func_1(void) +{ + asm volatile (""); +} + +noinline void uprobe_multi_func_2(void) +{ + asm volatile (""); +} + +noinline void uprobe_multi_func_3(void) +{ + asm volatile (""); +} + +struct child { + int go[2]; + int pid; +}; + +static void release_child(struct child *child) +{ + int child_status; + + if (!child) + return; + close(child->go[1]); + close(child->go[0]); + if (child->pid > 0) + waitpid(child->pid, &child_status, 0); +} + +static void kick_child(struct child *child) +{ + char c = 1; + + if (child) { + write(child->go[1], &c, 1); + release_child(child); + } + fflush(NULL); +} + +static struct child *spawn_child(void) +{ + static struct child child; + int err; + int c; + + /* pipe to notify child to execute the trigger functions */ + if (pipe(child.go)) + return NULL; + + child.pid = fork(); + if (child.pid < 0) { + release_child(&child); + errno = EINVAL; + return NULL; + } + + /* child */ + if (child.pid == 0) { + close(child.go[1]); + + /* wait for parent's kick */ + err = read(child.go[0], &c, 1); + if (err != 1) + exit(err); + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + exit(errno); + } + + return &child; +} + +static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child) +{ + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; + + skel->bss->user_ptr = test_data; + + /* + * Disable pid check in bpf program if we are pid filter test, + * because the probe should be executed only by child->pid + * passed at the probe attach. + */ + skel->bss->pid = child ? 0 : getpid(); + + if (child) + kick_child(child); + + /* trigger all probes */ + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + /* + * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123] + * function and each slepable probe (6) increments uprobe_multi_sleep_result. + */ + ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 2, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 2, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 2, "uprobe_multi_func_3_result"); + + ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 2, "uretprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 2, "uretprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 2, "uretprobe_multi_func_3_result"); + + ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result"); + + if (child) + ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid"); +} + +static void test_skel_api(void) +{ + struct uprobe_multi *skel = NULL; + int err; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + err = uprobe_multi__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi__attach")) + goto cleanup; + + uprobe_multi_test_run(skel, NULL); + +cleanup: + uprobe_multi__destroy(skel); +} + +static void +__test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts, + struct child *child) +{ + pid_t pid = child ? child->pid : -1; + struct uprobe_multi *skel = NULL; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + opts->retprobe = false; + skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, pid, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = true; + skel->links.uretprobe = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, pid, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uretprobe, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = false; + skel->links.uprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uprobe_sleep, pid, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uprobe_sleep, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = true; + skel->links.uretprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uretprobe_sleep, + pid, binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uretprobe_sleep, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + opts->retprobe = false; + skel->links.uprobe_extra = bpf_program__attach_uprobe_multi(skel->progs.uprobe_extra, -1, + binary, pattern, opts); + if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi")) + goto cleanup; + + uprobe_multi_test_run(skel, child); + +cleanup: + uprobe_multi__destroy(skel); +} + +static void +test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts) +{ + struct child *child; + + /* no pid filter */ + __test_attach_api(binary, pattern, opts, NULL); + + /* pid filter */ + child = spawn_child(); + if (!ASSERT_OK_PTR(child, "spawn_child")) + return; + + __test_attach_api(binary, pattern, opts, child); +} + +static void test_attach_api_pattern(void) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + + test_attach_api("/proc/self/exe", "uprobe_multi_func_*", &opts); + test_attach_api("/proc/self/exe", "uprobe_multi_func_?", &opts); +} + +static void test_attach_api_syms(void) +{ + LIBBPF_OPTS(bpf_uprobe_multi_opts, opts); + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + + opts.syms = syms; + opts.cnt = ARRAY_SIZE(syms); + test_attach_api("/proc/self/exe", NULL, &opts); +} + +static void __test_link_api(struct child *child) +{ + int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; + LIBBPF_OPTS(bpf_link_create_opts, opts); + const char *path = "/proc/self/exe"; + struct uprobe_multi *skel = NULL; + unsigned long *offsets = NULL; + const char *syms[3] = { + "uprobe_multi_func_1", + "uprobe_multi_func_2", + "uprobe_multi_func_3", + }; + int link_extra_fd = -1; + int err; + + err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets); + if (!ASSERT_OK(err, "elf_resolve_syms_offsets")) + return; + + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = offsets; + opts.uprobe_multi.cnt = ARRAY_SIZE(syms); + opts.uprobe_multi.pid = child ? child->pid : 0; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + opts.kprobe_multi.flags = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe); + link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link1_fd, 0, "link1_fd")) + goto cleanup; + + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; + prog_fd = bpf_program__fd(skel->progs.uretprobe); + link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link2_fd, 0, "link2_fd")) + goto cleanup; + + opts.kprobe_multi.flags = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe_sleep); + link3_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link3_fd, 0, "link3_fd")) + goto cleanup; + + opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN; + prog_fd = bpf_program__fd(skel->progs.uretprobe_sleep); + link4_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link4_fd, 0, "link4_fd")) + goto cleanup; + + opts.kprobe_multi.flags = 0; + opts.uprobe_multi.pid = 0; + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); + link_extra_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_GE(link_extra_fd, 0, "link_extra_fd")) + goto cleanup; + + uprobe_multi_test_run(skel, child); + +cleanup: + if (link1_fd >= 0) + close(link1_fd); + if (link2_fd >= 0) + close(link2_fd); + if (link3_fd >= 0) + close(link3_fd); + if (link4_fd >= 0) + close(link4_fd); + if (link_extra_fd >= 0) + close(link_extra_fd); + + uprobe_multi__destroy(skel); + free(offsets); +} + +void test_link_api(void) +{ + struct child *child; + + /* no pid filter */ + __test_link_api(NULL); + + /* pid filter */ + child = spawn_child(); + if (!ASSERT_OK_PTR(child, "spawn_child")) + return; + + __test_link_api(child); +} + +static void test_bench_attach_uprobe(void) +{ + long attach_start_ns = 0, attach_end_ns = 0; + struct uprobe_multi_bench *skel = NULL; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + int err; + + skel = uprobe_multi_bench__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_bench__open_and_load")) + goto cleanup; + + attach_start_ns = get_time_ns(); + + err = uprobe_multi_bench__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_bench__attach")) + goto cleanup; + + attach_end_ns = get_time_ns(); + + system("./uprobe_multi bench"); + + ASSERT_EQ(skel->bss->count, 50000, "uprobes_count"); + +cleanup: + detach_start_ns = get_time_ns(); + uprobe_multi_bench__destroy(skel); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); +} + +static void test_bench_attach_usdt(void) +{ + long attach_start_ns = 0, attach_end_ns = 0; + struct uprobe_multi_usdt *skel = NULL; + long detach_start_ns, detach_end_ns; + double attach_delta, detach_delta; + + skel = uprobe_multi_usdt__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open")) + goto cleanup; + + attach_start_ns = get_time_ns(); + + skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, -1, "./uprobe_multi", + "test", "usdt", NULL); + if (!ASSERT_OK_PTR(skel->links.usdt0, "bpf_program__attach_usdt")) + goto cleanup; + + attach_end_ns = get_time_ns(); + + system("./uprobe_multi usdt"); + + ASSERT_EQ(skel->bss->count, 50000, "usdt_count"); + +cleanup: + detach_start_ns = get_time_ns(); + uprobe_multi_usdt__destroy(skel); + detach_end_ns = get_time_ns(); + + attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0; + detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0; + + printf("%s: attached in %7.3lfs\n", __func__, attach_delta); + printf("%s: detached in %7.3lfs\n", __func__, detach_delta); +} + +void test_uprobe_multi_test(void) +{ + if (test__start_subtest("skel_api")) + test_skel_api(); + if (test__start_subtest("attach_api_pattern")) + test_attach_api_pattern(); + if (test__start_subtest("attach_api_syms")) + test_attach_api_syms(); + if (test__start_subtest("link_api")) + test_link_api(); + if (test__start_subtest("bench_uprobe")) + test_bench_attach_uprobe(); + if (test__start_subtest("bench_usdt")) + test_bench_attach_usdt(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 2497716ee379..e3e68c97b40c 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -11,6 +11,7 @@ #include "verifier_bounds_deduction_non_const.skel.h" #include "verifier_bounds_mix_sign_unsign.skel.h" #include "verifier_bpf_get_stack.skel.h" +#include "verifier_bswap.skel.h" #include "verifier_btf_ctx_access.skel.h" #include "verifier_cfg.skel.h" #include "verifier_cgroup_inv_retcode.skel.h" @@ -24,6 +25,7 @@ #include "verifier_direct_stack_access_wraparound.skel.h" #include "verifier_div0.skel.h" #include "verifier_div_overflow.skel.h" +#include "verifier_gotol.skel.h" #include "verifier_helper_access_var_len.skel.h" #include "verifier_helper_packet_access.skel.h" #include "verifier_helper_restricted.skel.h" @@ -31,6 +33,7 @@ #include "verifier_int_ptr.skel.h" #include "verifier_jeq_infer_not_null.skel.h" #include "verifier_ld_ind.skel.h" +#include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" @@ -40,6 +43,7 @@ #include "verifier_map_ret_val.skel.h" #include "verifier_masking.skel.h" #include "verifier_meta_access.skel.h" +#include "verifier_movsx.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" #include "verifier_prevent_map_lookup.skel.h" @@ -50,12 +54,16 @@ #include "verifier_regalloc.skel.h" #include "verifier_ringbuf.skel.h" #include "verifier_runtime_jit.skel.h" +#include "verifier_scalar_ids.skel.h" +#include "verifier_sdiv.skel.h" #include "verifier_search_pruning.skel.h" #include "verifier_sock.skel.h" #include "verifier_spill_fill.skel.h" #include "verifier_spin_lock.skel.h" #include "verifier_stack_ptr.skel.h" +#include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" +#include "verifier_typedef.skel.h" #include "verifier_uninit.skel.h" #include "verifier_unpriv.skel.h" #include "verifier_unpriv_perf.skel.h" @@ -110,6 +118,7 @@ void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); } void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); } +void test_verifier_bswap(void) { RUN(verifier_bswap); } void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); } void test_verifier_cfg(void) { RUN(verifier_cfg); } void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); } @@ -123,6 +132,7 @@ void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_acces void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); } void test_verifier_div0(void) { RUN(verifier_div0); } void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); } +void test_verifier_gotol(void) { RUN(verifier_gotol); } void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); } void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); } void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); } @@ -130,6 +140,7 @@ void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); } void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } +void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } void test_verifier_loops1(void) { RUN(verifier_loops1); } void test_verifier_lwt(void) { RUN(verifier_lwt); } @@ -139,6 +150,7 @@ void test_verifier_map_ptr_mixing(void) { RUN(verifier_map_ptr_mixing); } void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); } void test_verifier_masking(void) { RUN(verifier_masking); } void test_verifier_meta_access(void) { RUN(verifier_meta_access); } +void test_verifier_movsx(void) { RUN(verifier_movsx); } void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); } void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } @@ -149,12 +161,16 @@ void test_verifier_ref_tracking(void) { RUN(verifier_ref_tracking); } void test_verifier_regalloc(void) { RUN(verifier_regalloc); } void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); } void test_verifier_runtime_jit(void) { RUN(verifier_runtime_jit); } +void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); } +void test_verifier_sdiv(void) { RUN(verifier_sdiv); } void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); } void test_verifier_sock(void) { RUN(verifier_sock); } void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); } void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); } void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } +void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } +void test_verifier_typedef(void) { RUN(verifier_typedef); } void test_verifier_uninit(void) { RUN(verifier_uninit); } void test_verifier_unpriv(void) { RUN(verifier_unpriv); } void test_verifier_unpriv_perf(void) { RUN(verifier_unpriv_perf); } diff --git a/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c new file mode 100644 index 000000000000..2a5e207edad6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c @@ -0,0 +1,312 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause + +/* + * Topology: + * --------- + * NS0 namespace | NS1 namespace + * | + * +--------------+ | +--------------+ + * | veth01 |----------| veth10 | + * | 172.16.1.100 | | | 172.16.1.200 | + * | bpf | | +--------------+ + * +--------------+ | + * server(UDP/TCP) | + * +-------------------+ | + * | vrf1 | | + * | +--------------+ | | +--------------+ + * | | veth02 |----------| veth20 | + * | | 172.16.2.100 | | | | 172.16.2.200 | + * | | bpf | | | +--------------+ + * | +--------------+ | | + * | server(UDP/TCP) | | + * +-------------------+ | + * + * Test flow + * ----------- + * The tests verifies that socket lookup via TC is VRF aware: + * 1) Creates two veth pairs between NS0 and NS1: + * a) veth01 <-> veth10 outside the VRF + * b) veth02 <-> veth20 in the VRF + * 2) Attaches to veth01 and veth02 a program that calls: + * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true + * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false + * c) bpf_sk_lookup_udp() with UDP + * The program stores the lookup result in bss->lookup_status. + * 3) Creates a socket TCP/UDP server in/outside the VRF. + * 4) The test expects lookup_status to be: + * a) 0 from device in VRF to server outside VRF + * b) 0 from device outside VRF to server in VRF + * c) 1 from device in VRF to server in VRF + * d) 1 from device outside VRF to server outside VRF + */ + +#include <net/if.h> + +#include "test_progs.h" +#include "network_helpers.h" +#include "vrf_socket_lookup.skel.h" + +#define NS0 "vrf_socket_lookup_0" +#define NS1 "vrf_socket_lookup_1" + +#define IP4_ADDR_VETH01 "172.16.1.100" +#define IP4_ADDR_VETH10 "172.16.1.200" +#define IP4_ADDR_VETH02 "172.16.2.100" +#define IP4_ADDR_VETH20 "172.16.2.200" + +#define NON_VRF_PORT 5000 +#define IN_VRF_PORT 5001 + +#define TIMEOUT_MS 3000 + +static int make_socket(int sotype, const char *ip, int port, + struct sockaddr_storage *addr) +{ + int err, fd; + + err = make_sockaddr(AF_INET, ip, port, addr, NULL); + if (!ASSERT_OK(err, "make_address")) + return -1; + + fd = socket(AF_INET, sotype, 0); + if (!ASSERT_GE(fd, 0, "socket")) + return -1; + + if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo")) + goto fail; + + return fd; +fail: + close(fd); + return -1; +} + +static int make_server(int sotype, const char *ip, int port, const char *ifname) +{ + int err, fd = -1; + + fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS); + if (!ASSERT_GE(fd, 0, "start_server")) + return -1; + + if (ifname) { + err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + ifname, strlen(ifname) + 1); + if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)")) + goto fail; + } + + return fd; +fail: + close(fd); + return -1; +} + +static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd) +{ + LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, + .prog_fd = tc_prog_fd); + int ret, ifindex; + + ifindex = if_nametoindex(ifname); + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) + return -1; + hook.ifindex = ifindex; + + ret = bpf_tc_hook_create(&hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + return ret; + + ret = bpf_tc_attach(&hook, &opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) { + bpf_tc_hook_destroy(&hook); + return ret; + } + ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL); + if (!ASSERT_OK(ret, "bpf_xdp_attach")) { + bpf_tc_hook_destroy(&hook); + return ret; + } + + return 0; +} + +static void cleanup(void) +{ + SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " + NS0); + SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " + NS1); +} + +static int setup(struct vrf_socket_lookup *skel) +{ + int tc_prog_fd, xdp_prog_fd, ret = 0; + struct nstoken *nstoken = NULL; + + SYS(fail, "ip netns add " NS0); + SYS(fail, "ip netns add " NS1); + + /* NS0 <-> NS1 [veth01 <-> veth10] */ + SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10" + " netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01"); + SYS(fail, "ip -net " NS0 " link set dev veth01 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10"); + SYS(fail, "ip -net " NS1 " link set dev veth10 up"); + + /* NS0 <-> NS1 [veth02 <-> veth20] */ + SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20" + " netns " NS1); + SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02"); + SYS(fail, "ip -net " NS0 " link set dev veth02 up"); + SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20"); + SYS(fail, "ip -net " NS1 " link set dev veth20 up"); + + /* veth02 -> vrf1 */ + SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11"); + SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default" + " metric 4278198272"); + SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf"); + SYS(fail, "ip -net " NS0 " link set vrf1 up"); + SYS(fail, "ip -net " NS0 " link set veth02 master vrf1"); + + /* Attach TC and XDP progs to veth devices in NS0 */ + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) + goto fail; + tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup); + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd")) + goto fail; + xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup); + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd")) + goto fail; + + if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd)) + goto fail; + + if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd)) + goto fail; + + goto close; +fail: + ret = -1; +close: + if (nstoken) + close_netns(nstoken); + return ret; +} + +static int test_lookup(struct vrf_socket_lookup *skel, int sotype, + const char *ip, int port, bool test_xdp, bool tcp_skc, + int lookup_status_exp) +{ + static const char msg[] = "Hello Server"; + struct sockaddr_storage addr = {}; + int fd, ret = 0; + + fd = make_socket(sotype, ip, port, &addr); + if (fd < 0) + return -1; + + skel->bss->test_xdp = test_xdp; + skel->bss->tcp_skc = tcp_skc; + skel->bss->lookup_status = -1; + + if (sotype == SOCK_STREAM) + connect(fd, (void *)&addr, sizeof(struct sockaddr_in)); + else + sendto(fd, msg, sizeof(msg), 0, (void *)&addr, + sizeof(struct sockaddr_in)); + + if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp, + "lookup_status")) + goto fail; + + goto close; + +fail: + ret = -1; +close: + close(fd); + return ret; +} + +static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype, + bool test_xdp, bool tcp_skc) +{ + int in_vrf_server = -1, non_vrf_server = -1; + struct nstoken *nstoken = NULL; + + nstoken = open_netns(NS0); + if (!ASSERT_OK_PTR(nstoken, "setns " NS0)) + goto done; + + /* Open sockets in and outside VRF */ + non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL); + if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd")) + goto done; + + in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02"); + if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd")) + goto done; + + /* Perform test from NS1 */ + close_netns(nstoken); + nstoken = open_netns(NS1); + if (!ASSERT_OK_PTR(nstoken, "setns " NS1)) + goto done; + + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT, + test_xdp, tcp_skc, 0), "in_to_out")) + goto done; + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT, + test_xdp, tcp_skc, 1), "in_to_in")) + goto done; + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT, + test_xdp, tcp_skc, 1), "out_to_out")) + goto done; + if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT, + test_xdp, tcp_skc, 0), "out_to_in")) + goto done; + +done: + if (non_vrf_server >= 0) + close(non_vrf_server); + if (in_vrf_server >= 0) + close(in_vrf_server); + if (nstoken) + close_netns(nstoken); +} + +void test_vrf_socket_lookup(void) +{ + struct vrf_socket_lookup *skel; + + cleanup(); + + skel = vrf_socket_lookup__open_and_load(); + if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load")) + return; + + if (!ASSERT_OK(setup(skel), "setup")) + goto done; + + if (test__start_subtest("tc_socket_lookup_tcp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); + if (test__start_subtest("tc_socket_lookup_tcp_skc")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); + if (test__start_subtest("tc_socket_lookup_udp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false); + if (test__start_subtest("xdp_socket_lookup_tcp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); + if (test__start_subtest("xdp_socket_lookup_tcp_skc")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); + if (test__start_subtest("xdp_socket_lookup_udp")) + _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false); + +done: + vrf_socket_lookup__destroy(skel); + cleanup(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c index fa3cac5488f5..e6bcb6051402 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_xdp_attach_fail.skel.h" #define IFINDEX_LO 1 #define XDP_FLAGS_REPLACE (1U << 4) @@ -85,10 +86,74 @@ out_1: bpf_object__close(obj1); } +#define ERRMSG_LEN 64 + +struct xdp_errmsg { + char msg[ERRMSG_LEN]; +}; + +static void on_xdp_errmsg(void *ctx, int cpu, void *data, __u32 size) +{ + struct xdp_errmsg *ctx_errmg = ctx, *tp_errmsg = data; + + memcpy(&ctx_errmg->msg, &tp_errmsg->msg, ERRMSG_LEN); +} + +static const char tgt_errmsg[] = "Invalid XDP flags for BPF link attachment"; + +static void test_xdp_attach_fail(const char *file) +{ + struct test_xdp_attach_fail *skel = NULL; + struct xdp_errmsg errmsg = {}; + struct perf_buffer *pb = NULL; + struct bpf_object *obj = NULL; + int err, fd_xdp; + + LIBBPF_OPTS(bpf_link_create_opts, opts); + + skel = test_xdp_attach_fail__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_attach_fail__open_and_load")) + goto out_close; + + err = test_xdp_attach_fail__attach(skel); + if (!ASSERT_EQ(err, 0, "test_xdp_attach_fail__attach")) + goto out_close; + + /* set up perf buffer */ + pb = perf_buffer__new(bpf_map__fd(skel->maps.xdp_errmsg_pb), 1, + on_xdp_errmsg, NULL, &errmsg, NULL); + if (!ASSERT_OK_PTR(pb, "perf_buffer__new")) + goto out_close; + + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &fd_xdp); + if (!ASSERT_EQ(err, 0, "bpf_prog_test_load")) + goto out_close; + + opts.flags = 0xFF; // invalid flags to fail to attach XDP prog + err = bpf_link_create(fd_xdp, IFINDEX_LO, BPF_XDP, &opts); + if (!ASSERT_EQ(err, -EINVAL, "bpf_link_create")) + goto out_close; + + /* read perf buffer */ + err = perf_buffer__poll(pb, 100); + if (!ASSERT_GT(err, -1, "perf_buffer__poll")) + goto out_close; + + ASSERT_STRNEQ((const char *) errmsg.msg, tgt_errmsg, + 42 /* strlen(tgt_errmsg) */, "check error message"); + +out_close: + perf_buffer__free(pb); + bpf_object__close(obj); + test_xdp_attach_fail__destroy(skel); +} + void serial_test_xdp_attach(void) { if (test__start_subtest("xdp_attach")) test_xdp_attach("./test_xdp.bpf.o"); if (test__start_subtest("xdp_attach_dynptr")) test_xdp_attach("./test_xdp_dynptr.bpf.o"); + if (test__start_subtest("xdp_attach_failed")) + test_xdp_attach_fail("./xdp_dummy.bpf.o"); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c index d19f79048ff6..c3b45745cbcc 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c @@ -18,6 +18,7 @@ #include <linux/if_bonding.h> #include <linux/limits.h> #include <linux/udp.h> +#include <uapi/linux/netdev.h> #include "xdp_dummy.skel.h" #include "xdp_redirect_multi_kern.skel.h" @@ -492,6 +493,123 @@ out: system("ip link del bond_nest2"); } +static void test_xdp_bonding_features(struct skeletons *skeletons) +{ + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); + int bond_idx, veth1_idx, err; + struct bpf_link *link = NULL; + + if (!ASSERT_OK(system("ip link add bond type bond"), "add bond")) + goto out; + + bond_idx = if_nametoindex("bond"); + if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond")) + goto out; + + /* query default xdp-feature for bond device */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"), + "add veth{0,1} pair")) + goto out; + + if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"), + "add veth{2,3} pair")) + goto out; + + if (!ASSERT_OK(system("ip link set veth0 master bond"), + "add veth0 to master bond")) + goto out; + + /* xdp-feature for bond device should be obtained from the single slave + * device (veth0) + */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "bond query_opts.feature_flags")) + goto out; + + veth1_idx = if_nametoindex("veth1"); + if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1")) + goto out; + + link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, + veth1_idx); + if (!ASSERT_OK_PTR(link, "attach program to veth1")) + goto out; + + /* xdp-feature for veth0 are changed */ + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth2 master bond"), + "add veth2 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + /* xdp-feature for bond device should be set to the most restrict + * value obtained from attached slave devices (veth0 and veth2) + */ + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth2 nomaster"), + "del veth2 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + if (!ASSERT_EQ(query_opts.feature_flags, + NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT | + NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT | + NETDEV_XDP_ACT_NDO_XMIT_SG, + "bond query_opts.feature_flags")) + goto out; + + if (!ASSERT_OK(system("ip link set veth0 nomaster"), + "del veth0 to master bond")) + goto out; + + err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts); + if (!ASSERT_OK(err, "bond bpf_xdp_query")) + goto out; + + ASSERT_EQ(query_opts.feature_flags, NETDEV_XDP_ACT_MASK, + "bond query_opts.feature_flags"); +out: + bpf_link__destroy(link); + system("ip link del veth0"); + system("ip link del veth2"); + system("ip link del bond"); +} + static int libbpf_debug_print(enum libbpf_print_level level, const char *format, va_list args) { @@ -546,6 +664,9 @@ void serial_test_xdp_bonding(void) if (test__start_subtest("xdp_bonding_nested")) test_xdp_bonding_nested(&skeletons); + if (test__start_subtest("xdp_bonding_features")) + test_xdp_bonding_features(&skeletons); + for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) { struct bond_test_case *test_case = &bond_test_cases[i]; diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c new file mode 100644 index 000000000000..7dd18c6d06c6 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <net/if.h> +#include <test_progs.h> +#include <network_helpers.h> + +#define LOCAL_NETNS "xdp_dev_bound_only_netns" + +static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags) +{ + struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() }; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + opts.prog_flags = flags; + opts.prog_ifindex = ifindex; + return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts); +} + +/* A test case for bpf_offload_netdev->offload handling bug: + * - create a veth device (does not support offload); + * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag + * (such programs are not offloaded); + * - create a device bound XDP program without flags (such programs are offloaded). + * This might lead to 'BUG: kernel NULL pointer dereference'. + */ +void test_xdp_dev_bound_only_offdev(void) +{ + struct nstoken *tok = NULL; + __u32 ifindex; + int fd1 = -1; + int fd2 = -1; + + SYS(out, "ip netns add " LOCAL_NETNS); + tok = open_netns(LOCAL_NETNS); + if (!ASSERT_OK_PTR(tok, "open_netns")) + goto out; + SYS(out, "ip link add eth42 type veth"); + ifindex = if_nametoindex("eth42"); + if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) { + perror("if_nametoindex"); + goto out; + } + fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY); + if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) { + perror("load_dummy_prog #1"); + goto out; + } + /* Program with ifindex is considered offloaded, however veth + * does not support offload => error should be reported. + */ + fd2 = load_dummy_prog("dummy2", ifindex, 0); + ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)"); + +out: + close(fd1); + close(fd2); + close_netns(tok); + /* eth42 was added inside netns, removing the netns will + * also remove eth42 veth pair. + */ + SYS_NOFAIL("ip netns del " LOCAL_NETNS); +} diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c new file mode 100644 index 000000000000..3517c0e01206 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct hmap_elem { + struct bpf_timer timer; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, int); + __type(value, struct hmap_elem); +} hmap SEC(".maps"); + +__attribute__((noinline)) +static int timer_cb(void *map, int *key, struct bpf_timer *timer) +{ + volatile char buf[256] = {}; + return buf[69]; +} + +__attribute__((noinline)) +static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer) +{ + volatile char buf[300] = {}; + return buf[255] + timer_cb(NULL, NULL, NULL); +} + +SEC("tc") +__failure __msg("combined stack size of 2 calls is 576. Too large") +int pseudo_call_check(struct __sk_buff *ctx) +{ + struct hmap_elem *elem; + volatile char buf[256] = {}; + + elem = bpf_map_lookup_elem(&hmap, &(int){0}); + if (!elem) + return 0; + + timer_cb(NULL, NULL, NULL); + return bpf_timer_set_callback(&elem->timer, timer_cb) + buf[0]; +} + +SEC("tc") +__failure __msg("combined stack size of 2 calls is 608. Too large") +int async_call_root_check(struct __sk_buff *ctx) +{ + struct hmap_elem *elem; + volatile char buf[256] = {}; + + elem = bpf_map_lookup_elem(&hmap, &(int){0}); + if (!elem) + return 0; + + return bpf_timer_set_callback(&elem->timer, bad_timer_cb) + buf[0]; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index d9660e7200e2..c997e3e3d3fb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -490,6 +490,8 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay) } } +int bpf_cubic_acked_called = 0; + void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample) { @@ -497,6 +499,7 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk, struct bictcp *ca = inet_csk_ca(sk); __u32 delay; + bpf_cubic_acked_called = 1; /* Some calls are for duplicates without timetamps */ if (sample->rtt_us < 0) return; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 5ddcc46fd886..521267818f4d 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -59,9 +59,7 @@ int dump_ksym(struct bpf_iter__ksym *ctx) } else { BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name); } - if (!iter->pos_arch_end || iter->pos_arch_end > iter->pos) - BPF_SEQ_PRINTF(seq, "CORE "); - else if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos) + if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos) BPF_SEQ_PRINTF(seq, "MOD "); else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos) BPF_SEQ_PRINTF(seq, "FTRACE_MOD "); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index d3c1217ba79a..38a57a2e70db 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -86,6 +86,10 @@ #define POINTER_VALUE 0xcafe4all #define TEST_DATA_LEN 64 +#ifndef __used +#define __used __attribute__((used)) +#endif + #if defined(__TARGET_ARCH_x86) #define SYSCALL_WRAPPER 1 #define SYS_PREFIX "__x64_" diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index cfed4df490f3..0b793a102791 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -88,6 +88,7 @@ #define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr #define sk_flags __sk_common.skc_flags #define sk_reuse __sk_common.skc_reuse +#define sk_cookie __sk_common.skc_cookie #define s6_addr32 in6_u.u6_addr32 diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c index 50f95ec61165..76d661b20e87 100644 --- a/tools/testing/selftests/bpf/progs/cb_refs.c +++ b/tools/testing/selftests/bpf/progs/cb_refs.c @@ -2,6 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr *ptr; @@ -14,9 +15,6 @@ struct { __uint(max_entries, 16); } array_map SEC(".maps"); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - static __noinline int cb1(void *map, void *key, void *value, void *ctx) { void *p = *(void **)ctx; diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c index b2a409e6382a..932b8ecd4ae3 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c @@ -12,6 +12,7 @@ __u32 invocations = 0; __u32 assertion_error = 0; __u32 retval_value = 0; __u32 ctx_retval_value = 0; +__u32 page_size = 0; SEC("cgroup/getsockopt") int get_retval(struct bpf_sockopt *ctx) @@ -20,6 +21,10 @@ int get_retval(struct bpf_sockopt *ctx) ctx_retval_value = ctx->retval; __sync_fetch_and_add(&invocations, 1); + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } @@ -31,6 +36,10 @@ int set_eisconn(struct bpf_sockopt *ctx) if (bpf_set_retval(-EISCONN)) assertion_error = 1; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } @@ -41,5 +50,9 @@ int clear_retval(struct bpf_sockopt *ctx) ctx->retval = 0; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c index d6e5903e06ba..b7fa8804e19d 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c @@ -11,6 +11,7 @@ __u32 invocations = 0; __u32 assertion_error = 0; __u32 retval_value = 0; +__u32 page_size = 0; SEC("cgroup/setsockopt") int get_retval(struct bpf_sockopt *ctx) @@ -18,6 +19,10 @@ int get_retval(struct bpf_sockopt *ctx) retval_value = bpf_get_retval(); __sync_fetch_and_add(&invocations, 1); + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } @@ -29,6 +34,10 @@ int set_eunatch(struct bpf_sockopt *ctx) if (bpf_set_retval(-EUNATCH)) assertion_error = 1; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 0; } @@ -40,6 +49,10 @@ int set_eisconn(struct bpf_sockopt *ctx) if (bpf_set_retval(-EISCONN)) assertion_error = 1; + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 0; } @@ -48,5 +61,9 @@ int legacy_eperm(struct bpf_sockopt *ctx) { __sync_fetch_and_add(&invocations, 1); + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c new file mode 100644 index 000000000000..1e2e73f3b749 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <linux/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> + +#include <sys/types.h> +#include <sys/socket.h> + +#include "cgroup_tcp_skb.h" + +char _license[] SEC("license") = "GPL"; + +__u16 g_sock_port = 0; +__u32 g_sock_state = 0; +int g_unexpected = 0; +__u32 g_packet_count = 0; + +int needed_tcp_pkt(struct __sk_buff *skb, struct tcphdr *tcph) +{ + struct ipv6hdr ip6h; + + if (skb->protocol != bpf_htons(ETH_P_IPV6)) + return 0; + if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h))) + return 0; + + if (ip6h.nexthdr != IPPROTO_TCP) + return 0; + + if (bpf_skb_load_bytes(skb, sizeof(ip6h), tcph, sizeof(*tcph))) + return 0; + + if (tcph->source != bpf_htons(g_sock_port) && + tcph->dest != bpf_htons(g_sock_port)) + return 0; + + return 1; +} + +/* Run accept() on a socket in the cgroup to receive a new connection. */ +static int egress_accept(struct tcphdr *tcph) +{ + if (g_sock_state == SYN_RECV_SENDING_SYN_ACK) { + if (tcph->fin || !tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = SYN_RECV; + return 1; + } + + return 0; +} + +static int ingress_accept(struct tcphdr *tcph) +{ + switch (g_sock_state) { + case INIT: + if (!tcph->syn || tcph->fin || tcph->ack) + g_unexpected++; + else + g_sock_state = SYN_RECV_SENDING_SYN_ACK; + break; + case SYN_RECV: + if (tcph->fin || tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = ESTABLISHED; + break; + default: + return 0; + } + + return 1; +} + +/* Run connect() on a socket in the cgroup to start a new connection. */ +static int egress_connect(struct tcphdr *tcph) +{ + if (g_sock_state == INIT) { + if (!tcph->syn || tcph->fin || tcph->ack) + g_unexpected++; + else + g_sock_state = SYN_SENT; + return 1; + } + + return 0; +} + +static int ingress_connect(struct tcphdr *tcph) +{ + if (g_sock_state == SYN_SENT) { + if (tcph->fin || !tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = ESTABLISHED; + return 1; + } + + return 0; +} + +/* The connection is closed by the peer outside the cgroup. */ +static int egress_close_remote(struct tcphdr *tcph) +{ + switch (g_sock_state) { + case ESTABLISHED: + break; + case CLOSE_WAIT_SENDING_ACK: + if (tcph->fin || tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = CLOSE_WAIT; + break; + case CLOSE_WAIT: + if (!tcph->fin) + g_unexpected++; + else + g_sock_state = LAST_ACK; + break; + default: + return 0; + } + + return 1; +} + +static int ingress_close_remote(struct tcphdr *tcph) +{ + switch (g_sock_state) { + case ESTABLISHED: + if (tcph->fin) + g_sock_state = CLOSE_WAIT_SENDING_ACK; + break; + case LAST_ACK: + if (tcph->fin || tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = CLOSED; + break; + default: + return 0; + } + + return 1; +} + +/* The connection is closed by the endpoint inside the cgroup. */ +static int egress_close_local(struct tcphdr *tcph) +{ + switch (g_sock_state) { + case ESTABLISHED: + if (tcph->fin) + g_sock_state = FIN_WAIT1; + break; + case TIME_WAIT_SENDING_ACK: + if (tcph->fin || tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = TIME_WAIT; + break; + default: + return 0; + } + + return 1; +} + +static int ingress_close_local(struct tcphdr *tcph) +{ + switch (g_sock_state) { + case ESTABLISHED: + break; + case FIN_WAIT1: + if (tcph->fin || tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = FIN_WAIT2; + break; + case FIN_WAIT2: + if (!tcph->fin || tcph->syn || !tcph->ack) + g_unexpected++; + else + g_sock_state = TIME_WAIT_SENDING_ACK; + break; + default: + return 0; + } + + return 1; +} + +/* Check the types of outgoing packets of a server socket to make sure they + * are consistent with the state of the server socket. + * + * The connection is closed by the client side. + */ +SEC("cgroup_skb/egress") +int server_egress(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Egress of the server socket. */ + if (egress_accept(&tcph) || egress_close_remote(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of incoming packets of a server socket to make sure they + * are consistent with the state of the server socket. + * + * The connection is closed by the client side. + */ +SEC("cgroup_skb/ingress") +int server_ingress(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Ingress of the server socket. */ + if (ingress_accept(&tcph) || ingress_close_remote(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of outgoing packets of a server socket to make sure they + * are consistent with the state of the server socket. + * + * The connection is closed by the server side. + */ +SEC("cgroup_skb/egress") +int server_egress_srv(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Egress of the server socket. */ + if (egress_accept(&tcph) || egress_close_local(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of incoming packets of a server socket to make sure they + * are consistent with the state of the server socket. + * + * The connection is closed by the server side. + */ +SEC("cgroup_skb/ingress") +int server_ingress_srv(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Ingress of the server socket. */ + if (ingress_accept(&tcph) || ingress_close_local(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of outgoing packets of a client socket to make sure they + * are consistent with the state of the client socket. + * + * The connection is closed by the server side. + */ +SEC("cgroup_skb/egress") +int client_egress_srv(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Egress of the server socket. */ + if (egress_connect(&tcph) || egress_close_remote(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of incoming packets of a client socket to make sure they + * are consistent with the state of the client socket. + * + * The connection is closed by the server side. + */ +SEC("cgroup_skb/ingress") +int client_ingress_srv(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Ingress of the server socket. */ + if (ingress_connect(&tcph) || ingress_close_remote(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of outgoing packets of a client socket to make sure they + * are consistent with the state of the client socket. + * + * The connection is closed by the client side. + */ +SEC("cgroup_skb/egress") +int client_egress(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Egress of the server socket. */ + if (egress_connect(&tcph) || egress_close_local(&tcph)) + return 1; + + g_unexpected++; + return 1; +} + +/* Check the types of incoming packets of a client socket to make sure they + * are consistent with the state of the client socket. + * + * The connection is closed by the client side. + */ +SEC("cgroup_skb/ingress") +int client_ingress(struct __sk_buff *skb) +{ + struct tcphdr tcph; + + if (!needed_tcp_pkt(skb, &tcph)) + return 1; + + g_packet_count++; + + /* Ingress of the server socket. */ + if (ingress_connect(&tcph) || ingress_close_local(&tcph)) + return 1; + + g_unexpected++; + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index 0c5b785a93e4..b15c588ace15 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -28,6 +28,8 @@ void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym; struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym; u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym; +u32 bpf_cpumask_first_and(const struct cpumask *src1, + const struct cpumask *src2) __ksym; void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym; bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym; @@ -50,8 +52,8 @@ bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym; bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; -u32 bpf_cpumask_any(const struct cpumask *src) __ksym; -u32 bpf_cpumask_any_and(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; +u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 2fcdd7f68ac7..674a63424dee 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -5,6 +5,7 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "bpf_misc.h" #include "cpumask_common.h" char _license[] SEC("license") = "GPL"; @@ -175,6 +176,38 @@ release_exit: } SEC("tp_btf/task_newtask") +int BPF_PROG(test_firstand_nocpu, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2; + u32 first; + + if (!is_test_task()) + return 0; + + mask1 = create_cpumask(); + if (!mask1) + return 0; + + mask2 = create_cpumask(); + if (!mask2) + goto release_exit; + + bpf_cpumask_set_cpu(0, mask1); + bpf_cpumask_set_cpu(1, mask2); + + first = bpf_cpumask_first_and(cast(mask1), cast(mask2)); + if (first <= 1) + err = 3; + +release_exit: + if (mask1) + bpf_cpumask_release(mask1); + if (mask2) + bpf_cpumask_release(mask2); + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *cpumask; @@ -311,13 +344,13 @@ int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) bpf_cpumask_set_cpu(1, mask2); bpf_cpumask_or(dst1, cast(mask1), cast(mask2)); - cpu = bpf_cpumask_any(cast(mask1)); + cpu = bpf_cpumask_any_distribute(cast(mask1)); if (cpu != 0) { err = 6; goto release_exit; } - cpu = bpf_cpumask_any(cast(dst2)); + cpu = bpf_cpumask_any_distribute(cast(dst2)); if (cpu < nr_cpus) { err = 7; goto release_exit; @@ -329,13 +362,13 @@ int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags) goto release_exit; } - cpu = bpf_cpumask_any(cast(dst2)); + cpu = bpf_cpumask_any_distribute(cast(dst2)); if (cpu > 1) { err = 9; goto release_exit; } - cpu = bpf_cpumask_any_and(cast(mask1), cast(mask2)); + cpu = bpf_cpumask_any_and_distribute(cast(mask1), cast(mask2)); if (cpu < nr_cpus) { err = 10; goto release_exit; @@ -426,3 +459,26 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) return 0; } + +SEC("tp_btf/task_newtask") +__success +int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *mask1, *mask2; + + mask1 = bpf_cpumask_create(); + mask2 = bpf_cpumask_create(); + + if (!mask1 || !mask2) + goto free_masks_return; + + bpf_cpumask_test_cpu(0, (const struct cpumask *)mask1); + bpf_cpumask_test_cpu(0, (const struct cpumask *)mask2); + +free_masks_return: + if (mask1) + bpf_cpumask_release(mask1); + if (mask2) + bpf_cpumask_release(mask2); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index 759eb5c245cd..7ce7e827d5f0 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -3,6 +3,7 @@ #include <errno.h> #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include <linux/if_ether.h> @@ -1378,3 +1379,310 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb) return 0; } + +/* bpf_dynptr_adjust can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_adjust_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_adjust(&ptr, 1, 2); + + return 0; +} + +/* bpf_dynptr_is_null can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_is_null_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_is_null(&ptr); + + return 0; +} + +/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_is_rdonly_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_is_rdonly(&ptr); + + return 0; +} + +/* bpf_dynptr_size can only be called on initialized dynptrs */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int dynptr_size_invalid(void *ctx) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_size(&ptr); + + return 0; +} + +/* Only initialized dynptrs can be cloned */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #1") +int clone_invalid1(void *ctx) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + + /* this should fail */ + bpf_dynptr_clone(&ptr1, &ptr2); + + return 0; +} + +/* Can't overwrite an existing dynptr when cloning */ +SEC("?xdp") +__failure __msg("cannot overwrite referenced dynptr") +int clone_invalid2(struct xdp_md *xdp) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr clone; + + bpf_dynptr_from_xdp(xdp, 0, &ptr1); + + bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone); + + /* this should fail */ + bpf_dynptr_clone(&ptr1, &clone); + + bpf_ringbuf_submit_dynptr(&clone, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate its clones */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") +int clone_invalidate1(void *ctx) +{ + struct bpf_dynptr clone; + struct bpf_dynptr ptr; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate its parent */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") +int clone_invalidate2(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + + bpf_ringbuf_submit_dynptr(&clone, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate its siblings */ +SEC("?raw_tp") +__failure __msg("Expected an initialized dynptr as arg #3") +int clone_invalidate3(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone1; + struct bpf_dynptr clone2; + char read_data[64]; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone1); + + bpf_dynptr_clone(&ptr, &clone2); + + bpf_ringbuf_submit_dynptr(&clone2, 0); + + /* this should fail */ + bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0); + + return 0; +} + +/* Invalidating a dynptr should invalidate any data slices + * of its clones + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int clone_invalidate4(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone; + int *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + data = bpf_dynptr_data(&clone, 0, sizeof(val)); + if (!data) + return 0; + + bpf_ringbuf_submit_dynptr(&ptr, 0); + + /* this should fail */ + *data = 123; + + return 0; +} + +/* Invalidating a dynptr should invalidate any data slices + * of its parent + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int clone_invalidate5(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone; + int *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + data = bpf_dynptr_data(&ptr, 0, sizeof(val)); + if (!data) + return 0; + + bpf_dynptr_clone(&ptr, &clone); + + bpf_ringbuf_submit_dynptr(&clone, 0); + + /* this should fail */ + *data = 123; + + return 0; +} + +/* Invalidating a dynptr should invalidate any data slices + * of its sibling + */ +SEC("?raw_tp") +__failure __msg("invalid mem access 'scalar'") +int clone_invalidate6(void *ctx) +{ + struct bpf_dynptr ptr; + struct bpf_dynptr clone1; + struct bpf_dynptr clone2; + int *data; + + bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone1); + + bpf_dynptr_clone(&ptr, &clone2); + + data = bpf_dynptr_data(&clone1, 0, sizeof(val)); + if (!data) + return 0; + + bpf_ringbuf_submit_dynptr(&clone2, 0); + + /* this should fail */ + *data = 123; + + return 0; +} + +/* A skb clone's data slices should be invalid anytime packet data changes */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_data(struct __sk_buff *skb) +{ + char buffer[sizeof(__u32)] = {}; + struct bpf_dynptr clone; + struct bpf_dynptr ptr; + __u32 *data; + + bpf_dynptr_from_skb(skb, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer)); + if (!data) + return XDP_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *data = 123; + + return 0; +} + +/* A xdp clone's data slices should be invalid anytime packet data changes */ +SEC("?xdp") +__failure __msg("invalid mem access 'scalar'") +int clone_xdp_packet_data(struct xdp_md *xdp) +{ + char buffer[sizeof(__u32)] = {}; + struct bpf_dynptr clone; + struct bpf_dynptr ptr; + struct ethhdr *hdr; + __u32 *data; + + bpf_dynptr_from_xdp(xdp, 0, &ptr); + + bpf_dynptr_clone(&ptr, &clone); + data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer)); + if (!data) + return XDP_DROP; + + if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr))) + return XDP_DROP; + + /* this should fail */ + *data = 123; + + return 0; +} + +/* Buffers that are provided must be sufficiently long */ +SEC("?cgroup_skb/egress") +__failure __msg("memory, len pair leads to invalid memory access") +int test_dynptr_skb_small_buff(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + char buffer[8] = {}; + __u64 *data; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This may return NULL. SKB may require a buffer */ + data = bpf_dynptr_slice(&ptr, 0, buffer, 9); + + return !!data; +} diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index b2fa6c47ecc0..5985920d162e 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Facebook */ #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -207,3 +208,339 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_adjust(void *ctx) +{ + struct bpf_dynptr ptr; + __u32 bytes = 64; + __u32 off = 10; + __u32 trim = 15; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr); + if (err) { + err = 1; + goto done; + } + + if (bpf_dynptr_size(&ptr) != bytes) { + err = 2; + goto done; + } + + /* Advance the dynptr by off */ + err = bpf_dynptr_adjust(&ptr, off, bpf_dynptr_size(&ptr)); + if (err) { + err = 3; + goto done; + } + + if (bpf_dynptr_size(&ptr) != bytes - off) { + err = 4; + goto done; + } + + /* Trim the dynptr */ + err = bpf_dynptr_adjust(&ptr, off, 15); + if (err) { + err = 5; + goto done; + } + + /* Check that the size was adjusted correctly */ + if (bpf_dynptr_size(&ptr) != trim - off) { + err = 6; + goto done; + } + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_adjust_err(void *ctx) +{ + char write_data[45] = "hello there, world!!"; + struct bpf_dynptr ptr; + __u32 size = 64; + __u32 off = 20; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) { + err = 1; + goto done; + } + + /* Check that start can't be greater than end */ + if (bpf_dynptr_adjust(&ptr, 5, 1) != -EINVAL) { + err = 2; + goto done; + } + + /* Check that start can't be greater than size */ + if (bpf_dynptr_adjust(&ptr, size + 1, size + 1) != -ERANGE) { + err = 3; + goto done; + } + + /* Check that end can't be greater than size */ + if (bpf_dynptr_adjust(&ptr, 0, size + 1) != -ERANGE) { + err = 4; + goto done; + } + + if (bpf_dynptr_adjust(&ptr, off, size)) { + err = 5; + goto done; + } + + /* Check that you can't write more bytes than available into the dynptr + * after you've adjusted it + */ + if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) { + err = 6; + goto done; + } + + /* Check that even after adjusting, submitting/discarding + * a ringbuf dynptr works + */ + bpf_ringbuf_submit_dynptr(&ptr, 0); + return 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_zero_size_dynptr(void *ctx) +{ + char write_data = 'x', read_data; + struct bpf_dynptr ptr; + __u32 size = 64; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) { + err = 1; + goto done; + } + + /* After this, the dynptr has a size of 0 */ + if (bpf_dynptr_adjust(&ptr, size, size)) { + err = 2; + goto done; + } + + /* Test that reading + writing non-zero bytes is not ok */ + if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) { + err = 3; + goto done; + } + + if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) { + err = 4; + goto done; + } + + /* Test that reading + writing 0 bytes from a 0-size dynptr is ok */ + if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) { + err = 5; + goto done; + } + + if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) { + err = 6; + goto done; + } + + err = 0; + +done: + bpf_ringbuf_discard_dynptr(&ptr, 0); + return 0; +} + +SEC("tp/syscalls/sys_enter_nanosleep") +int test_dynptr_is_null(void *ctx) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + __u64 size = 4; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 0; + + /* Pass in invalid flags, get back an invalid dynptr */ + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) { + err = 1; + goto exit_early; + } + + /* Test that the invalid dynptr is null */ + if (!bpf_dynptr_is_null(&ptr1)) { + err = 2; + goto exit_early; + } + + /* Get a valid dynptr */ + if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) { + err = 3; + goto exit; + } + + /* Test that the valid dynptr is not null */ + if (bpf_dynptr_is_null(&ptr2)) { + err = 4; + goto exit; + } + +exit: + bpf_ringbuf_discard_dynptr(&ptr2, 0); +exit_early: + bpf_ringbuf_discard_dynptr(&ptr1, 0); + return 0; +} + +SEC("cgroup_skb/egress") +int test_dynptr_is_rdonly(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + struct bpf_dynptr ptr3; + + /* Pass in invalid flags, get back an invalid dynptr */ + if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) { + err = 1; + return 0; + } + + /* Test that an invalid dynptr is_rdonly returns false */ + if (bpf_dynptr_is_rdonly(&ptr1)) { + err = 2; + return 0; + } + + /* Get a read-only dynptr */ + if (bpf_dynptr_from_skb(skb, 0, &ptr2)) { + err = 3; + return 0; + } + + /* Test that the dynptr is read-only */ + if (!bpf_dynptr_is_rdonly(&ptr2)) { + err = 4; + return 0; + } + + /* Get a read-writeable dynptr */ + if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) { + err = 5; + goto done; + } + + /* Test that the dynptr is read-only */ + if (bpf_dynptr_is_rdonly(&ptr3)) { + err = 6; + goto done; + } + +done: + bpf_ringbuf_discard_dynptr(&ptr3, 0); + return 0; +} + +SEC("cgroup_skb/egress") +int test_dynptr_clone(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr1; + struct bpf_dynptr ptr2; + __u32 off = 2, size; + + /* Get a dynptr */ + if (bpf_dynptr_from_skb(skb, 0, &ptr1)) { + err = 1; + return 0; + } + + if (bpf_dynptr_adjust(&ptr1, off, bpf_dynptr_size(&ptr1))) { + err = 2; + return 0; + } + + /* Clone the dynptr */ + if (bpf_dynptr_clone(&ptr1, &ptr2)) { + err = 3; + return 0; + } + + size = bpf_dynptr_size(&ptr1); + + /* Check that the clone has the same size and rd-only */ + if (bpf_dynptr_size(&ptr2) != size) { + err = 4; + return 0; + } + + if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) { + err = 5; + return 0; + } + + /* Advance and trim the original dynptr */ + bpf_dynptr_adjust(&ptr1, 5, 5); + + /* Check that only original dynptr was affected, and the clone wasn't */ + if (bpf_dynptr_size(&ptr2) != size) { + err = 6; + return 0; + } + + return 0; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_no_buff(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + __u64 *data; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This may return NULL. SKB may require a buffer */ + data = bpf_dynptr_slice(&ptr, 0, NULL, 1); + + return !!data; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_skb_strcmp(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + char *data; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* This may return NULL. SKB may require a buffer */ + data = bpf_dynptr_slice(&ptr, 0, NULL, 10); + if (data) { + bpf_strncmp(data, 10, "foo"); + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/fentry_many_args.c b/tools/testing/selftests/bpf/progs/fentry_many_args.c new file mode 100644 index 000000000000..b61bb92fee2c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fentry_many_args.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Tencent */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +__u64 test1_result = 0; +SEC("fentry/bpf_testmod_fentry_test7") +int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f, + int g) +{ + test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && + e == (void *)20 && f == 21 && g == 22; + return 0; +} + +__u64 test2_result = 0; +SEC("fentry/bpf_testmod_fentry_test11") +int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f, + int g, unsigned int h, long i, __u64 j, unsigned long k) +{ + test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && + e == (void *)20 && f == 21 && g == 22 && h == 23 && + i == 24 && j == 25 && k == 26; + return 0; +} + +__u64 test3_result = 0; +SEC("fentry/bpf_testmod_fentry_test11") +int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f, + __u64 g, __u64 h, __u64 i, __u64 j, __u64 k) +{ + test3_result = a == 16 && b == 17 && c == 18 && d == 19 && + e == 20 && f == 21 && g == 22 && h == 23 && + i == 24 && j == 25 && k == 26; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/fexit_many_args.c b/tools/testing/selftests/bpf/progs/fexit_many_args.c new file mode 100644 index 000000000000..53b335c2dafb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_many_args.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Tencent */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +__u64 test1_result = 0; +SEC("fexit/bpf_testmod_fentry_test7") +int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f, + int g, int ret) +{ + test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && + e == (void *)20 && f == 21 && g == 22 && ret == 133; + return 0; +} + +__u64 test2_result = 0; +SEC("fexit/bpf_testmod_fentry_test11") +int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f, + int g, unsigned int h, long i, __u64 j, unsigned long k, + int ret) +{ + test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 && + e == (void *)20 && f == 21 && g == 22 && h == 23 && + i == 24 && j == 25 && k == 26 && ret == 231; + return 0; +} + +__u64 test3_result = 0; +SEC("fexit/bpf_testmod_fentry_test11") +int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f, + __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 ret) +{ + test3_result = a == 16 && b == 17 && c == 18 && d == 19 && + e == 20 && f == 21 && g == 22 && h == 23 && + i == 24 && j == 25 && k == 26 && ret == 231; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c index a1b139888048..511ac634eef0 100644 --- a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c +++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c @@ -15,7 +15,7 @@ long total_entries = 0; #define ENTRY_CNT 32 struct perf_branch_entry entries[ENTRY_CNT] = {}; -static inline bool in_range(__u64 val) +static inline bool gbs_in_range(__u64 val) { return (val >= address_low) && (val < address_high); } @@ -31,7 +31,7 @@ int BPF_PROG(test1, int n, int ret) for (i = 0; i < ENTRY_CNT; i++) { if (i >= total_entries) break; - if (in_range(entries[i].from) && in_range(entries[i].to)) + if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to)) test1_hits++; else if (!test1_hits) wasted_entries++; diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c index 8559e698b40d..8956eb78a226 100644 --- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c +++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -#include <stdbool.h> char _license[] SEC("license") = "GPL"; @@ -83,3 +82,25 @@ int test6(struct pt_regs *ctx) test6_result = (const void *) addr == 0; return 0; } + +unsigned long uprobe_trigger; + +__u64 test7_result = 0; +SEC("uprobe//proc/self/exe:uprobe_trigger") +int BPF_UPROBE(test7) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test7_result = (const void *) addr == (const void *) uprobe_trigger; + return 0; +} + +__u64 test8_result = 0; +SEC("uretprobe//proc/self/exe:uprobe_trigger") +int BPF_URETPROBE(test8, int ret) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test8_result = (const void *) addr == (const void *) uprobe_trigger; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c new file mode 100644 index 000000000000..052f8a4345a8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +unsigned long uprobe_trigger_body; + +__u64 test1_result = 0; +SEC("uprobe//proc/self/exe:uprobe_trigger_body+1") +int BPF_UPROBE(test1) +{ + __u64 addr = bpf_get_func_ip(ctx); + + test1_result = (const void *) addr == (const void *) uprobe_trigger_body + 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/htab_mem_bench.c b/tools/testing/selftests/bpf/progs/htab_mem_bench.c new file mode 100644 index 000000000000..b1b721b14d67 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/htab_mem_bench.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <stdbool.h> +#include <errno.h> +#include <linux/types.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define OP_BATCH 64 + +struct update_ctx { + unsigned int from; + unsigned int step; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, 4); + __uint(map_flags, BPF_F_NO_PREALLOC); +} htab SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +unsigned char zeroed_value[4096]; +unsigned int nr_thread = 0; +long op_cnt = 0; + +static int write_htab(unsigned int i, struct update_ctx *ctx, unsigned int flags) +{ + bpf_map_update_elem(&htab, &ctx->from, zeroed_value, flags); + ctx->from += ctx->step; + + return 0; +} + +static int overwrite_htab(unsigned int i, struct update_ctx *ctx) +{ + return write_htab(i, ctx, 0); +} + +static int newwrite_htab(unsigned int i, struct update_ctx *ctx) +{ + return write_htab(i, ctx, BPF_NOEXIST); +} + +static int del_htab(unsigned int i, struct update_ctx *ctx) +{ + bpf_map_delete_elem(&htab, &ctx->from); + ctx->from += ctx->step; + + return 0; +} + +SEC("?tp/syscalls/sys_enter_getpgid") +int overwrite(void *ctx) +{ + struct update_ctx update; + + update.from = bpf_get_smp_processor_id(); + update.step = nr_thread; + bpf_loop(OP_BATCH, overwrite_htab, &update, 0); + __sync_fetch_and_add(&op_cnt, 1); + return 0; +} + +SEC("?tp/syscalls/sys_enter_getpgid") +int batch_add_batch_del(void *ctx) +{ + struct update_ctx update; + + update.from = bpf_get_smp_processor_id(); + update.step = nr_thread; + bpf_loop(OP_BATCH, overwrite_htab, &update, 0); + + update.from = bpf_get_smp_processor_id(); + bpf_loop(OP_BATCH, del_htab, &update, 0); + + __sync_fetch_and_add(&op_cnt, 2); + return 0; +} + +SEC("?tp/syscalls/sys_enter_getpgid") +int add_only(void *ctx) +{ + struct update_ctx update; + + update.from = bpf_get_smp_processor_id() / 2; + update.step = nr_thread / 2; + bpf_loop(OP_BATCH, newwrite_htab, &update, 0); + __sync_fetch_and_add(&op_cnt, 1); + return 0; +} + +SEC("?tp/syscalls/sys_enter_getppid") +int del_only(void *ctx) +{ + struct update_ctx update; + + update.from = bpf_get_smp_processor_id() / 2; + update.step = nr_thread / 2; + bpf_loop(OP_BATCH, del_htab, &update, 0); + __sync_fetch_and_add(&op_cnt, 1); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c new file mode 100644 index 000000000000..1c2b6c1616b0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_tracing_net.h" + +#define NF_DROP 0 +#define NF_ACCEPT 1 +#define ETH_P_IP 0x0800 +#define ETH_P_IPV6 0x86DD +#define IP_MF 0x2000 +#define IP_OFFSET 0x1FFF +#define NEXTHDR_FRAGMENT 44 + +extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym; +extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset, + void *buffer, uint32_t buffer__sz) __ksym; + +volatile int shootdowns = 0; + +static bool is_frag_v4(struct iphdr *iph) +{ + int offset; + int flags; + + offset = bpf_ntohs(iph->frag_off); + flags = offset & ~IP_OFFSET; + offset &= IP_OFFSET; + offset <<= 3; + + return (flags & IP_MF) || offset; +} + +static bool is_frag_v6(struct ipv6hdr *ip6h) +{ + /* Simplifying assumption that there are no extension headers + * between fixed header and fragmentation header. This assumption + * is only valid in this test case. It saves us the hassle of + * searching all potential extension headers. + */ + return ip6h->nexthdr == NEXTHDR_FRAGMENT; +} + +static int handle_v4(struct sk_buff *skb) +{ + struct bpf_dynptr ptr; + u8 iph_buf[20] = {}; + struct iphdr *iph; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) + return NF_DROP; + + iph = bpf_dynptr_slice(&ptr, 0, iph_buf, sizeof(iph_buf)); + if (!iph) + return NF_DROP; + + /* Shootdown any frags */ + if (is_frag_v4(iph)) { + shootdowns++; + return NF_DROP; + } + + return NF_ACCEPT; +} + +static int handle_v6(struct sk_buff *skb) +{ + struct bpf_dynptr ptr; + struct ipv6hdr *ip6h; + u8 ip6h_buf[40] = {}; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) + return NF_DROP; + + ip6h = bpf_dynptr_slice(&ptr, 0, ip6h_buf, sizeof(ip6h_buf)); + if (!ip6h) + return NF_DROP; + + /* Shootdown any frags */ + if (is_frag_v6(ip6h)) { + shootdowns++; + return NF_DROP; + } + + return NF_ACCEPT; +} + +SEC("netfilter") +int defrag(struct bpf_nf_ctx *ctx) +{ + struct sk_buff *skb = ctx->skb; + + switch (bpf_ntohs(skb->protocol)) { + case ETH_P_IP: + return handle_v4(skb); + case ETH_P_IPV6: + return handle_v6(skb); + default: + return NF_ACCEPT; + } +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index be16143ae292..6b9b3c56f009 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -651,29 +651,25 @@ int iter_stack_array_loop(const void *ctx) return sum; } -#define ARR_SZ 16 - -static __noinline void fill(struct bpf_iter_num *it, int *arr, int mul) +static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul) { - int *t; - __u64 i; + int *t, i; while ((t = bpf_iter_num_next(it))) { i = *t; - if (i >= ARR_SZ) + if (i >= n) break; arr[i] = i * mul; } } -static __noinline int sum(struct bpf_iter_num *it, int *arr) +static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n) { - int *t, sum = 0;; - __u64 i; + int *t, i, sum = 0;; while ((t = bpf_iter_num_next(it))) { i = *t; - if (i >= ARR_SZ) + if (i >= n) break; sum += arr[i]; } @@ -685,7 +681,7 @@ SEC("raw_tp") __success int iter_pass_iter_ptr_to_subprog(const void *ctx) { - int arr1[ARR_SZ], arr2[ARR_SZ]; + int arr1[16], arr2[32]; struct bpf_iter_num it; int n, sum1, sum2; @@ -694,25 +690,25 @@ int iter_pass_iter_ptr_to_subprog(const void *ctx) /* fill arr1 */ n = ARRAY_SIZE(arr1); bpf_iter_num_new(&it, 0, n); - fill(&it, arr1, 2); + fill(&it, arr1, n, 2); bpf_iter_num_destroy(&it); /* fill arr2 */ n = ARRAY_SIZE(arr2); bpf_iter_num_new(&it, 0, n); - fill(&it, arr2, 10); + fill(&it, arr2, n, 10); bpf_iter_num_destroy(&it); /* sum arr1 */ n = ARRAY_SIZE(arr1); bpf_iter_num_new(&it, 0, n); - sum1 = sum(&it, arr1); + sum1 = sum(&it, arr1, n); bpf_iter_num_destroy(&it); /* sum arr2 */ n = ARRAY_SIZE(arr2); bpf_iter_num_new(&it, 0, n); - sum2 = sum(&it, arr2); + sum2 = sum(&it, arr2, n); bpf_iter_num_destroy(&it); bpf_printk("sum1=%d, sum2=%d", sum1, sum2); diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c index 13f00ca2ed0a..f9789e668297 100644 --- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c +++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c @@ -3,13 +3,11 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" static struct prog_test_ref_kfunc __kptr *v; long total_sum = -1; -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - SEC("tc") int test_jit_probe_mem(struct __sk_buff *ctx) { diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c index 767472bc5a97..7632d9ecb253 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern void bpf_kfunc_call_test_destructive(void) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_destructive_test(void) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c index b98313d391c6..4b0b7b79cdfb 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c @@ -2,14 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern void bpf_kfunc_call_int_mem_release(int *p) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct syscall_test_args { __u8 data[16]; diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c index 4e8fed75a4e0..d532af07decf 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern void bpf_testmod_test_mod_kfunc(int i) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_fail(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c index 7daa8f5720b9..cf68d1e48a0f 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c @@ -2,22 +2,7 @@ /* Copyright (c) 2021 Facebook */ #include <vmlinux.h> #include <bpf/bpf_helpers.h> - -extern long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym; -extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; - -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -extern void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym; -extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym; -extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym; -extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym; -extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym; -extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym; -extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym; -extern u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym; +#include "../bpf_testmod/bpf_testmod_kfunc.h" SEC("tc") int kfunc_call_test4(struct __sk_buff *skb) diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c index c1fdecabeabf..2380c75e74ce 100644 --- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c +++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c @@ -1,13 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include "bpf_tcp_helpers.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" extern const int bpf_prog_active __ksym; -extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b, - __u32 c, __u64 d) __ksym; -extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym; int active_res = -1; int sk_state_res = -1; @@ -28,7 +23,7 @@ int __noinline f1(struct __sk_buff *skb) if (active) active_res = *active; - sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->sk_state; + sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state; return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4); } diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c new file mode 100644 index 000000000000..28f8487c9059 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe.multi") +int test_override(struct pt_regs *ctx) +{ + bpf_override_return(ctx, 123); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c index 57440a554304..84d1777a9e6c 100644 --- a/tools/testing/selftests/bpf/progs/linked_list.c +++ b/tools/testing/selftests/bpf/progs/linked_list.c @@ -96,7 +96,7 @@ static __always_inline int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map) { struct bpf_list_node *n; - struct foo *f[8], *pf; + struct foo *f[200], *pf; int i; /* Loop following this check adds nodes 2-at-a-time in order to diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index 0ef286da092b..b567a666d2b8 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -5,7 +5,8 @@ #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> -#include "bpf_experimental.h" +#include "../bpf_experimental.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct node_data { long key; @@ -13,10 +14,16 @@ struct node_data { struct bpf_rb_node node; }; +struct plain_local { + long key; + long data; +}; + struct map_value { struct prog_test_ref_kfunc *not_kptr; struct prog_test_ref_kfunc __kptr *val; struct node_data __kptr *node; + struct plain_local __kptr *plain; }; /* This is necessary so that LLVM generates BTF for node_data struct @@ -32,8 +39,6 @@ struct map_value { */ struct node_data *just_here_because_btf_bug; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); @@ -68,6 +73,28 @@ long stash_rb_nodes(void *ctx) } SEC("tc") +long stash_plain(void *ctx) +{ + struct map_value *mapval; + struct plain_local *res; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + res->key = 41; + + res = bpf_kptr_xchg(&mapval->plain, res); + if (res) + bpf_obj_drop(res); + return 0; +} + +SEC("tc") long unstash_rb_node(void *ctx) { struct map_value *mapval; diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c new file mode 100644 index 000000000000..fcf7a7567da2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> +#include "../bpf_experimental.h" +#include "bpf_misc.h" + +struct node_data { + long key; + long data; + struct bpf_rb_node node; +}; + +struct map_value { + struct node_data __kptr *node; +}; + +struct node_data2 { + long key[4]; +}; + +/* This is necessary so that LLVM generates BTF for node_data struct + * If it's not included, a fwd reference for node_data will be generated but + * no struct. Example BTF of "node" field in map_value when not included: + * + * [10] PTR '(anon)' type_id=35 + * [34] FWD 'node_data' fwd_kind=struct + * [35] TYPE_TAG 'kptr_ref' type_id=34 + */ +struct node_data *just_here_because_btf_bug; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 2); +} some_nodes SEC(".maps"); + +SEC("tc") +__failure __msg("invalid kptr access, R2 type=ptr_node_data2 expected=ptr_node_data") +long stash_rb_nodes(void *ctx) +{ + struct map_value *mapval; + struct node_data2 *res; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + res->key[0] = 40; + + res = bpf_kptr_xchg(&mapval->node, res); + if (res) + bpf_obj_drop(res); + return 0; +} + +SEC("tc") +__failure __msg("R1 must have zero offset when passed to release func") +long drop_rb_node_off(void *ctx) +{ + struct map_value *mapval; + struct node_data *res; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 1; + /* Try releasing with graph node offset */ + bpf_obj_drop(&res->node); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c index d7150041e5d1..da30f0d59364 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr.c +++ b/tools/testing/selftests/bpf/progs/map_kptr.c @@ -2,6 +2,7 @@ #include <vmlinux.h> #include <bpf/bpf_tracing.h> #include <bpf/bpf_helpers.h> +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr; @@ -114,10 +115,6 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps); DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; -void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym; - #define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val)) static void test_kptr_unref(struct map_value *v) diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c index da8c724f839b..450bb373b179 100644 --- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c @@ -4,6 +4,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> #include "bpf_misc.h" +#include "../bpf_testmod/bpf_testmod_kfunc.h" struct map_value { char buf[8]; @@ -19,9 +20,6 @@ struct array_map { __uint(max_entries, 1); } array_map SEC(".maps"); -extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym; -extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym; - SEC("?tc") __failure __msg("kptr access size must be BPF_DW") int size_not_bpf_dw(struct __sk_buff *ctx) diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c new file mode 100644 index 000000000000..10b2325c1720 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +__u32 target_id; + +__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym; + +SEC("iter/bpf_map") +int dump_bpf_map(struct bpf_iter__bpf_map *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct bpf_map *map = ctx->map; + + if (map && map->id == target_id) + BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map)); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index db388f593d0a..3325da17ec81 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -103,6 +103,8 @@ struct { __type(value, __u32); } m_hash SEC(".maps"); +__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym; + static inline int check_hash(void) { struct bpf_htab *hash = (struct bpf_htab *)&m_hash; @@ -115,6 +117,8 @@ static inline int check_hash(void) VERIFY(hash->elem_size == 64); VERIFY(hash->count.counter == 0); + VERIFY(bpf_map_sum_elem_count(map) == 0); + for (i = 0; i < HALF_ENTRIES; ++i) { const __u32 key = i; const __u32 val = 1; @@ -123,6 +127,7 @@ static inline int check_hash(void) return 0; } VERIFY(hash->count.counter == HALF_ENTRIES); + VERIFY(bpf_map_sum_elem_count(map) == HALF_ENTRIES); return 1; } diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c index 8b7466a15c6b..3376d4849f58 100644 --- a/tools/testing/selftests/bpf/progs/modify_return.c +++ b/tools/testing/selftests/bpf/progs/modify_return.c @@ -47,3 +47,43 @@ int BPF_PROG(fexit_test, int a, __u64 b, int ret) return 0; } + +static int sequence2; + +__u64 fentry_result2 = 0; +SEC("fentry/bpf_modify_return_test2") +int BPF_PROG(fentry_test2, int a, int *b, short c, int d, void *e, char f, + int g) +{ + sequence2++; + fentry_result2 = (sequence2 == 1); + return 0; +} + +__u64 fmod_ret_result2 = 0; +SEC("fmod_ret/bpf_modify_return_test2") +int BPF_PROG(fmod_ret_test2, int a, int *b, short c, int d, void *e, char f, + int g, int ret) +{ + sequence2++; + /* This is the first fmod_ret program, the ret passed should be 0 */ + fmod_ret_result2 = (sequence2 == 2 && ret == 0); + return input_retval; +} + +__u64 fexit_result2 = 0; +SEC("fexit/bpf_modify_return_test2") +int BPF_PROG(fexit_test2, int a, int *b, short c, int d, void *e, char f, + int g, int ret) +{ + sequence2++; + /* If the input_reval is non-zero a successful modification should have + * occurred. + */ + if (input_retval) + fexit_result2 = (sequence2 == 3 && ret == input_retval); + else + fexit_result2 = (sequence2 == 3 && ret == 29); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c new file mode 100644 index 000000000000..53301ae8a8f7 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcpify.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023, SUSE. */ + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +SEC("fmod_ret/update_socket_protocol") +int BPF_PROG(mptcpify, int family, int type, int protocol) +{ + if ((family == AF_INET || family == AF_INET6) && + type == SOCK_STREAM && + (!protocol || protocol == IPPROTO_TCP)) { + return IPPROTO_MPTCP; + } + + return protocol; +} diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c index 0d1aa6bbace4..ea39497f11ed 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c @@ -10,6 +10,13 @@ char _license[] SEC("license") = "GPL"; +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, u64); +} sk_storage_map SEC(".maps"); + /* Prototype for all of the program trace events below: * * TRACE_EVENT(task_newtask, @@ -31,3 +38,12 @@ int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_fla bpf_cpumask_first_zero(&task->cpus_mask); return 0; } + +/* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */ +SEC("tp_btf/tcp_probe") +__failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") +int BPF_PROG(test_invalid_skb_field, struct sock *sk, struct sk_buff *skb) +{ + bpf_sk_storage_get(&sk_storage_map, skb->next, 0, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c index 886ade4aa99d..833840bffd3b 100644 --- a/tools/testing/selftests/bpf/progs/nested_trust_success.c +++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c @@ -10,6 +10,13 @@ char _license[] SEC("license") = "GPL"; +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, u64); +} sk_storage_map SEC(".maps"); + SEC("tp_btf/task_newtask") __success int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags) @@ -17,3 +24,11 @@ int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags) bpf_cpumask_test_cpu(0, task->cpus_ptr); return 0; } + +SEC("tp_btf/tcp_probe") +__success +int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb) +{ + bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c index 1d348a225140..893a4fdb4b6e 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c @@ -8,6 +8,9 @@ #include "bpf_misc.h" #include "bpf_experimental.h" +extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; + struct node_data { long key; long list_data; @@ -24,7 +27,7 @@ struct { __uint(type, BPF_MAP_TYPE_ARRAY); __type(key, int); __type(value, struct map_value); - __uint(max_entries, 1); + __uint(max_entries, 2); } stashed_nodes SEC(".maps"); struct node_acquire { @@ -42,6 +45,9 @@ private(A) struct bpf_list_head head __contains(node_data, l); private(B) struct bpf_spin_lock alock; private(B) struct bpf_rb_root aroot __contains(node_acquire, node); +private(C) struct bpf_spin_lock block; +private(C) struct bpf_rb_root broot __contains(node_data, r); + static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b) { struct node_data *a; @@ -375,6 +381,8 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) bpf_rbtree_add(&aroot, &n->node, less_a); m = bpf_refcount_acquire(n); bpf_spin_unlock(&alock); + if (!m) + return 2; m->key = 2; bpf_obj_drop(m); @@ -403,4 +411,161 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) return 0; } +static long __stash_map_empty_xchg(struct node_data *n, int idx) +{ + struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx); + + if (!mapval) { + bpf_obj_drop(n); + return 1; + } + n = bpf_kptr_xchg(&mapval->node, n); + if (n) { + bpf_obj_drop(n); + return 2; + } + return 0; +} + +SEC("tc") +long rbtree_wrong_owner_remove_fail_a1(void *ctx) +{ + struct node_data *n, *m; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 1; + m = bpf_refcount_acquire(n); + + if (__stash_map_empty_xchg(n, 0)) { + bpf_obj_drop(m); + return 2; + } + + if (__stash_map_empty_xchg(m, 1)) + return 3; + + return 0; +} + +SEC("tc") +long rbtree_wrong_owner_remove_fail_b(void *ctx) +{ + struct map_value *mapval; + struct node_data *n; + int idx = 0; + + mapval = bpf_map_lookup_elem(&stashed_nodes, &idx); + if (!mapval) + return 1; + + n = bpf_kptr_xchg(&mapval->node, NULL); + if (!n) + return 2; + + bpf_spin_lock(&block); + + bpf_rbtree_add(&broot, &n->r, less); + + bpf_spin_unlock(&block); + return 0; +} + +SEC("tc") +long rbtree_wrong_owner_remove_fail_a2(void *ctx) +{ + struct map_value *mapval; + struct bpf_rb_node *res; + struct node_data *m; + int idx = 1; + + mapval = bpf_map_lookup_elem(&stashed_nodes, &idx); + if (!mapval) + return 1; + + m = bpf_kptr_xchg(&mapval->node, NULL); + if (!m) + return 2; + bpf_spin_lock(&lock); + + /* make m non-owning ref */ + bpf_list_push_back(&head, &m->l); + res = bpf_rbtree_remove(&root, &m->r); + + bpf_spin_unlock(&lock); + if (res) { + bpf_obj_drop(container_of(res, struct node_data, r)); + return 3; + } + return 0; +} + +SEC("?fentry.s/bpf_testmod_test_read") +__success +int BPF_PROG(rbtree_sleepable_rcu, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + struct bpf_rb_node *rb; + struct node_data *n, *m = NULL; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 0; + + bpf_rcu_read_lock(); + bpf_spin_lock(&lock); + bpf_rbtree_add(&root, &n->r, less); + rb = bpf_rbtree_first(&root); + if (!rb) + goto err_out; + + rb = bpf_rbtree_remove(&root, rb); + if (!rb) + goto err_out; + + m = container_of(rb, struct node_data, r); + +err_out: + bpf_spin_unlock(&lock); + bpf_rcu_read_unlock(); + if (m) + bpf_obj_drop(m); + return 0; +} + +SEC("?fentry.s/bpf_testmod_test_read") +__success +int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + struct bpf_rb_node *rb; + struct node_data *n, *m = NULL; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 0; + + /* No explicit bpf_rcu_read_lock */ + bpf_spin_lock(&lock); + bpf_rbtree_add(&root, &n->r, less); + rb = bpf_rbtree_first(&root); + if (!rb) + goto err_out; + + rb = bpf_rbtree_remove(&root, rb); + if (!rb) + goto err_out; + + m = container_of(rb, struct node_data, r); + +err_out: + bpf_spin_unlock(&lock); + /* No explicit bpf_rcu_read_unlock */ + if (m) + bpf_obj_drop(m); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c index efcb308f80ad..1ef07f6ee580 100644 --- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c +++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c @@ -13,6 +13,9 @@ struct node_acquire { struct bpf_refcount refcount; }; +extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; + #define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8))) private(A) struct bpf_spin_lock glock; private(A) struct bpf_rb_root groot __contains(node_acquire, node); @@ -29,7 +32,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) } SEC("?tc") -__failure __msg("Unreleased reference id=3 alloc_insn=21") +__failure __msg("Unreleased reference id=4 alloc_insn=21") long rbtree_refcounted_node_ref_escapes(void *ctx) { struct node_acquire *n, *m; @@ -43,6 +46,8 @@ long rbtree_refcounted_node_ref_escapes(void *ctx) /* m becomes an owning ref but is never drop'd or added to a tree */ m = bpf_refcount_acquire(n); bpf_spin_unlock(&glock); + if (!m) + return 2; m->key = 2; return 0; @@ -69,4 +74,29 @@ long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx) return 0; } +SEC("?fentry.s/bpf_testmod_test_read") +__failure __msg("function calls are not allowed while holding a lock") +int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu, + struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len) +{ + struct node_acquire *n; + + n = bpf_obj_new(typeof(*n)); + if (!n) + return 0; + + /* spin_{lock,unlock} are in different RCU CS */ + bpf_rcu_read_lock(); + bpf_spin_lock(&glock); + bpf_rbtree_add(&groot, &n->node, less); + bpf_rcu_read_unlock(); + + bpf_rcu_read_lock(); + bpf_spin_unlock(&glock); + bpf_rcu_read_unlock(); + + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c new file mode 100644 index 000000000000..3e745793b27a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Facebook */ +#include "vmlinux.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +void *local_storage_ptr = NULL; +void *sk_ptr = NULL; +int cookie_found = 0; +__u64 cookie = 0; +__u32 omem = 0; + +void *bpf_rdonly_cast(void *, __u32) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); +} sk_storage SEC(".maps"); + +SEC("fexit/bpf_local_storage_destroy") +int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage) +{ + struct sock *sk; + + if (local_storage_ptr != local_storage) + return 0; + + sk = bpf_rdonly_cast(sk_ptr, bpf_core_type_id_kernel(struct sock)); + if (sk->sk_cookie.counter != cookie) + return 0; + + cookie_found++; + omem = sk->sk_omem_alloc.counter; + local_storage_ptr = NULL; + + return 0; +} + +SEC("fentry/inet6_sock_destruct") +int BPF_PROG(inet6_sock_destruct, struct sock *sk) +{ + int *value; + + if (!cookie || sk->sk_cookie.counter != cookie) + return 0; + + value = bpf_sk_storage_get(&sk_storage, sk, 0, 0); + if (value && *value == 0xdeadbeef) { + cookie_found++; + sk_ptr = sk; + local_storage_ptr = sk->sk_bpf_storage; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c new file mode 100644 index 000000000000..9e0bf7a54cec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include "bpf_tracing_net.h" + +__be16 serv_port = 0; + +int bpf_sock_destroy(struct sock_common *sk) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} tcp_conn_sockets SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} udp_conn_sockets SEC(".maps"); + +SEC("cgroup/connect6") +int sock_connect(struct bpf_sock_addr *ctx) +{ + __u64 sock_cookie = 0; + int key = 0; + __u32 keyc = 0; + + if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6) + return 1; + + sock_cookie = bpf_get_socket_cookie(ctx); + if (ctx->protocol == IPPROTO_TCP) + bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0); + else if (ctx->protocol == IPPROTO_UDP) + bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0); + else + return 1; + + return 1; +} + +SEC("iter/tcp") +int iter_tcp6_client(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + __u64 sock_cookie = 0; + __u64 *val; + int key = 0; + + if (!sk_common) + return 0; + + if (sk_common->skc_family != AF_INET6) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk_common); + val = bpf_map_lookup_elem(&tcp_conn_sockets, &key); + if (!val) + return 0; + /* Destroy connected client sockets. */ + if (sock_cookie == *val) + bpf_sock_destroy(sk_common); + + return 0; +} + +SEC("iter/tcp") +int iter_tcp6_server(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + const struct inet_connection_sock *icsk; + const struct inet_sock *inet; + struct tcp6_sock *tcp_sk; + __be16 srcp; + + if (!sk_common) + return 0; + + if (sk_common->skc_family != AF_INET6) + return 0; + + tcp_sk = bpf_skc_to_tcp6_sock(sk_common); + if (!tcp_sk) + return 0; + + icsk = &tcp_sk->tcp.inet_conn; + inet = &icsk->icsk_inet; + srcp = inet->inet_sport; + + /* Destroy server sockets. */ + if (srcp == serv_port) + bpf_sock_destroy(sk_common); + + return 0; +} + + +SEC("iter/udp") +int iter_udp6_client(struct bpf_iter__udp *ctx) +{ + struct udp_sock *udp_sk = ctx->udp_sk; + struct sock *sk = (struct sock *) udp_sk; + __u64 sock_cookie = 0, *val; + int key = 0; + + if (!sk) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk); + val = bpf_map_lookup_elem(&udp_conn_sockets, &key); + if (!val) + return 0; + /* Destroy connected client sockets. */ + if (sock_cookie == *val) + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + +SEC("iter/udp") +int iter_udp6_server(struct bpf_iter__udp *ctx) +{ + struct udp_sock *udp_sk = ctx->udp_sk; + struct sock *sk = (struct sock *) udp_sk; + struct inet_sock *inet; + __be16 srcp; + + if (!sk) + return 0; + + inet = &udp_sk->inet; + srcp = inet->inet_sport; + if (srcp == serv_port) + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c new file mode 100644 index 000000000000..dd6850b58e25 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int bpf_sock_destroy(struct sock_common *sk) __ksym; + +SEC("tp_btf/tcp_destroy_sock") +__failure __msg("calling kernel function bpf_sock_destroy is not allowed") +int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk) +{ + /* should not load */ + bpf_sock_destroy((struct sock_common *)sk); + + return 0; +} + diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c index 9fb241b97291..c8f59caa4639 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c +++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c @@ -9,6 +9,8 @@ char _license[] SEC("license") = "GPL"; #define CUSTOM_INHERIT2 1 #define CUSTOM_LISTENER 2 +__u32 page_size = 0; + struct sockopt_inherit { __u8 val; }; @@ -55,7 +57,7 @@ int _getsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_CUSTOM) - return 1; /* only interested in SOL_CUSTOM */ + goto out; /* only interested in SOL_CUSTOM */ if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -70,6 +72,12 @@ int _getsockopt(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/setsockopt") @@ -80,7 +88,7 @@ int _setsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_CUSTOM) - return 1; /* only interested in SOL_CUSTOM */ + goto out; /* only interested in SOL_CUSTOM */ if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -93,4 +101,10 @@ int _setsockopt(struct bpf_sockopt *ctx) ctx->optlen = -1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c index 177a59069dae..96f29fce050b 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_multi.c +++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c @@ -5,6 +5,8 @@ char _license[] SEC("license") = "GPL"; +__u32 page_size = 0; + SEC("cgroup/getsockopt") int _getsockopt_child(struct bpf_sockopt *ctx) { @@ -12,7 +14,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_IP || ctx->optname != IP_TOS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -26,6 +28,12 @@ int _getsockopt_child(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/getsockopt") @@ -35,7 +43,7 @@ int _getsockopt_parent(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_IP || ctx->optname != IP_TOS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -49,6 +57,12 @@ int _getsockopt_parent(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/setsockopt") @@ -58,7 +72,7 @@ int _setsockopt(struct bpf_sockopt *ctx) __u8 *optval = ctx->optval; if (ctx->level != SOL_IP || ctx->optname != IP_TOS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -67,4 +81,10 @@ int _setsockopt(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c index 1bce83b6e3a7..dbe235ede7f3 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c +++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c @@ -9,6 +9,8 @@ char _license[] SEC("license") = "GPL"; +__u32 page_size = 0; + SEC("cgroup/setsockopt") int sockopt_qos_to_cc(struct bpf_sockopt *ctx) { @@ -19,7 +21,7 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx) char cc_cubic[TCP_CA_NAME_MAX] = "cubic"; if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS) - return 1; + goto out; if (optval + 1 > optval_end) return 0; /* EPERM, bounds check */ @@ -36,4 +38,10 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx) return 0; } return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c index fe1df4cd206e..cb990a7d3d45 100644 --- a/tools/testing/selftests/bpf/progs/sockopt_sk.c +++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c @@ -37,7 +37,7 @@ int _getsockopt(struct bpf_sockopt *ctx) /* Bypass AF_NETLINK. */ sk = ctx->sk; if (sk && sk->family == AF_NETLINK) - return 1; + goto out; /* Make sure bpf_get_netns_cookie is callable. */ @@ -52,8 +52,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * let next BPF program in the cgroup chain or kernel * handle it. */ - ctx->optlen = 0; /* bypass optval>PAGE_SIZE */ - return 1; + goto out; } if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) { @@ -61,7 +60,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * let next BPF program in the cgroup chain or kernel * handle it. */ - return 1; + goto out; } if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) { @@ -69,7 +68,7 @@ int _getsockopt(struct bpf_sockopt *ctx) * let next BPF program in the cgroup chain or kernel * handle it. */ - return 1; + goto out; } if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) { @@ -85,7 +84,7 @@ int _getsockopt(struct bpf_sockopt *ctx) if (((struct tcp_zerocopy_receive *)optval)->address != 0) return 0; /* unexpected data */ - return 1; + goto out; } if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) { @@ -129,6 +128,12 @@ int _getsockopt(struct bpf_sockopt *ctx) ctx->optlen = 1; return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } SEC("cgroup/setsockopt") @@ -142,7 +147,7 @@ int _setsockopt(struct bpf_sockopt *ctx) /* Bypass AF_NETLINK. */ sk = ctx->sk; if (sk && sk->family == AF_NETLINK) - return 1; + goto out; /* Make sure bpf_get_netns_cookie is callable. */ @@ -224,4 +229,10 @@ int _setsockopt(struct bpf_sockopt *ctx) */ return 1; + +out: + /* optval larger than PAGE_SIZE use kernel's buffer. */ + if (ctx->optlen > page_size) + ctx->optlen = 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index b09371bba204..70df695312dc 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -18,6 +18,13 @@ int err, pid; */ struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak; + +struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak; +/* The two-param bpf_task_acquire doesn't exist */ +struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak; +/* Incorrect type for first param */ +struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak; + void invalid_kfunc(void) __ksym __weak; void bpf_testmod_test_mod_kfunc(int i) __ksym __weak; @@ -56,6 +63,50 @@ static int test_acquire_release(struct task_struct *task) } SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired = NULL; + int fake_ctx = 42; + + if (bpf_ksym_exists(bpf_task_acquire___one)) { + acquired = bpf_task_acquire___one(task); + } else if (bpf_ksym_exists(bpf_task_acquire___two)) { + /* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id + * call will find vmlinux's bpf_task_acquire, but subsequent + * bpf_core_types_are_compat will fail + */ + acquired = bpf_task_acquire___two(task, &fake_ctx); + err = 3; + return 0; + } else if (bpf_ksym_exists(bpf_task_acquire___three)) { + /* bpf_core_types_are_compat will fail similarly to above case */ + acquired = bpf_task_acquire___three(&fake_ctx); + err = 4; + return 0; + } + + if (acquired) + bpf_task_release(acquired); + else + err = 5; + return 0; +} + +SEC("tp_btf/task_newtask") +int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags) +{ + /* Neither symbol should successfully resolve. + * Success or failure of one ___flavor should not affect others + */ + if (bpf_ksym_exists(bpf_task_acquire___two)) + err = 1; + else if (bpf_ksym_exists(bpf_task_acquire___three)) + err = 2; + + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags) { if (!is_test_kfunc_task()) diff --git a/tools/testing/selftests/bpf/progs/test_assign_reuse.c b/tools/testing/selftests/bpf/progs/test_assign_reuse.c new file mode 100644 index 000000000000..4f2e2321ea06 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_assign_reuse.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <stdbool.h> +#include <linux/bpf.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include <linux/pkt_cls.h> + +char LICENSE[] SEC("license") = "GPL"; + +__u64 sk_cookie_seen; +__u64 reuseport_executed; +union { + struct tcphdr tcp; + struct udphdr udp; +} headers; + +const volatile __u16 dest_port; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} sk_map SEC(".maps"); + +SEC("sk_reuseport") +int reuse_accept(struct sk_reuseport_md *ctx) +{ + reuseport_executed++; + + if (ctx->ip_protocol == IPPROTO_TCP) { + if (ctx->data + sizeof(headers.tcp) > ctx->data_end) + return SK_DROP; + + if (__builtin_memcmp(&headers.tcp, ctx->data, sizeof(headers.tcp)) != 0) + return SK_DROP; + } else if (ctx->ip_protocol == IPPROTO_UDP) { + if (ctx->data + sizeof(headers.udp) > ctx->data_end) + return SK_DROP; + + if (__builtin_memcmp(&headers.udp, ctx->data, sizeof(headers.udp)) != 0) + return SK_DROP; + } else { + return SK_DROP; + } + + sk_cookie_seen = bpf_get_socket_cookie(ctx->sk); + return SK_PASS; +} + +SEC("sk_reuseport") +int reuse_drop(struct sk_reuseport_md *ctx) +{ + reuseport_executed++; + sk_cookie_seen = 0; + return SK_DROP; +} + +static int +assign_sk(struct __sk_buff *skb) +{ + int zero = 0, ret = 0; + struct bpf_sock *sk; + + sk = bpf_map_lookup_elem(&sk_map, &zero); + if (!sk) + return TC_ACT_SHOT; + ret = bpf_sk_assign(skb, sk, 0); + bpf_sk_release(sk); + return ret ? TC_ACT_SHOT : TC_ACT_OK; +} + +static bool +maybe_assign_tcp(struct __sk_buff *skb, struct tcphdr *th) +{ + if (th + 1 > (void *)(long)(skb->data_end)) + return TC_ACT_SHOT; + + if (!th->syn || th->ack || th->dest != bpf_htons(dest_port)) + return TC_ACT_OK; + + __builtin_memcpy(&headers.tcp, th, sizeof(headers.tcp)); + return assign_sk(skb); +} + +static bool +maybe_assign_udp(struct __sk_buff *skb, struct udphdr *uh) +{ + if (uh + 1 > (void *)(long)(skb->data_end)) + return TC_ACT_SHOT; + + if (uh->dest != bpf_htons(dest_port)) + return TC_ACT_OK; + + __builtin_memcpy(&headers.udp, uh, sizeof(headers.udp)); + return assign_sk(skb); +} + +SEC("tc") +int tc_main(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + struct ethhdr *eth; + + eth = (struct ethhdr *)(data); + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth)); + + if (iph + 1 > data_end) + return TC_ACT_SHOT; + + if (iph->protocol == IPPROTO_TCP) + return maybe_assign_tcp(skb, (struct tcphdr *)(iph + 1)); + else if (iph->protocol == IPPROTO_UDP) + return maybe_assign_udp(skb, (struct udphdr *)(iph + 1)); + else + return TC_ACT_SHOT; + } else { + struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth)); + + if (ip6h + 1 > data_end) + return TC_ACT_SHOT; + + if (ip6h->nexthdr == IPPROTO_TCP) + return maybe_assign_tcp(skb, (struct tcphdr *)(ip6h + 1)); + else if (ip6h->nexthdr == IPPROTO_UDP) + return maybe_assign_udp(skb, (struct udphdr *)(ip6h + 1)); + else + return TC_ACT_SHOT; + } +} diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c new file mode 100644 index 000000000000..ecde41ae0fc8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct generic_map_value { + void *data; +}; + +char _license[] SEC("license") = "GPL"; + +const unsigned int data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096}; +const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {}; + +int err = 0; +int pid = 0; + +#define DEFINE_ARRAY_WITH_KPTR(_size) \ + struct bin_data_##_size { \ + char data[_size - sizeof(void *)]; \ + }; \ + struct map_value_##_size { \ + struct bin_data_##_size __kptr * data; \ + /* To emit BTF info for bin_data_xx */ \ + struct bin_data_##_size not_used; \ + }; \ + struct { \ + __uint(type, BPF_MAP_TYPE_ARRAY); \ + __type(key, int); \ + __type(value, struct map_value_##_size); \ + __uint(max_entries, 128); \ + } array_##_size SEC(".maps"); + +static __always_inline void batch_alloc_free(struct bpf_map *map, unsigned int batch, + unsigned int idx) +{ + struct generic_map_value *value; + unsigned int i, key; + void *old, *new; + + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 1; + return; + } + new = bpf_obj_new_impl(data_btf_ids[idx], NULL); + if (!new) { + err = 2; + return; + } + old = bpf_kptr_xchg(&value->data, new); + if (old) { + bpf_obj_drop(old); + err = 3; + return; + } + } + for (i = 0; i < batch; i++) { + key = i; + value = bpf_map_lookup_elem(map, &key); + if (!value) { + err = 4; + return; + } + old = bpf_kptr_xchg(&value->data, NULL); + if (!old) { + err = 5; + return; + } + bpf_obj_drop(old); + } +} + +#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \ + batch_alloc_free((struct bpf_map *)(&array_##size), batch, idx) + +DEFINE_ARRAY_WITH_KPTR(8); +DEFINE_ARRAY_WITH_KPTR(16); +DEFINE_ARRAY_WITH_KPTR(32); +DEFINE_ARRAY_WITH_KPTR(64); +DEFINE_ARRAY_WITH_KPTR(96); +DEFINE_ARRAY_WITH_KPTR(128); +DEFINE_ARRAY_WITH_KPTR(192); +DEFINE_ARRAY_WITH_KPTR(256); +DEFINE_ARRAY_WITH_KPTR(512); +DEFINE_ARRAY_WITH_KPTR(1024); +DEFINE_ARRAY_WITH_KPTR(2048); +DEFINE_ARRAY_WITH_KPTR(4096); + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int test_bpf_mem_alloc_free(void *ctx) +{ + if ((u32)bpf_get_current_pid_tgid() != pid) + return 0; + + /* Alloc 128 8-bytes objects in batch to trigger refilling, + * then free 128 8-bytes objects in batch to trigger freeing. + */ + CALL_BATCH_ALLOC_FREE(8, 128, 0); + CALL_BATCH_ALLOC_FREE(16, 128, 1); + CALL_BATCH_ALLOC_FREE(32, 128, 2); + CALL_BATCH_ALLOC_FREE(64, 128, 3); + CALL_BATCH_ALLOC_FREE(96, 128, 4); + CALL_BATCH_ALLOC_FREE(128, 128, 5); + CALL_BATCH_ALLOC_FREE(192, 128, 6); + CALL_BATCH_ALLOC_FREE(256, 128, 7); + CALL_BATCH_ALLOC_FREE(512, 64, 8); + CALL_BATCH_ALLOC_FREE(1024, 32, 9); + CALL_BATCH_ALLOC_FREE(2048, 16, 10); + CALL_BATCH_ALLOC_FREE(4096, 8, 11); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h index 76eab0aacba0..233b089d1fba 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h @@ -12,6 +12,15 @@ #include <linux/ipv6.h> #include <linux/udp.h> +/* offsetof() is used in static asserts, and the libbpf-redefined CO-RE + * friendly version breaks compilation for older clang versions <= 15 + * when invoked in a static assert. Restore original here. + */ +#ifdef offsetof +#undef offsetof +#define offsetof(type, member) __builtin_offsetof(type, member) +#endif + struct gre_base_hdr { uint16_t flags; uint16_t protocol; diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c new file mode 100644 index 000000000000..564f402d56fe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <stdbool.h> + +extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak; + +/* This function is here to have CONFIG_X86_KERNEL_IBT + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_X86_KERNEL_IBT ? 0 : 1; +} + +SEC("kprobe") +int BPF_PROG(kprobe_run) +{ + return 0; +} + +SEC("uprobe") +int BPF_PROG(uprobe_run) +{ + return 0; +} + +SEC("tracepoint") +int BPF_PROG(tp_run) +{ + return 0; +} + +SEC("kprobe.multi") +int BPF_PROG(kmulti_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c index b85fc8c423ba..17a9f59bf5f3 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func1.c +++ b/tools/testing/selftests/bpf/progs/test_global_func1.c @@ -10,6 +10,8 @@ static __attribute__ ((noinline)) int f0(int var, struct __sk_buff *skb) { + asm volatile (""); + return skb->len; } diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c new file mode 100644 index 000000000000..1fbb73d3e5d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +/* rodata section */ +const volatile pid_t pid; +const volatile size_t bss_array_len; +const volatile size_t data_array_len; + +/* bss section */ +int sum = 0; +int array[1]; + +/* custom data secton */ +int my_array[1] SEC(".data.custom"); + +/* custom data section which should NOT be resizable, + * since it contains a single var which is not an array + */ +int my_int SEC(".data.non_array"); + +/* custom data section which should NOT be resizable, + * since its last var is not an array + */ +int my_array_first[1] SEC(".data.array_not_last"); +int my_int_last SEC(".data.array_not_last"); + +int percpu_arr[1] SEC(".data.percpu_arr"); + +SEC("tp/syscalls/sys_enter_getpid") +int bss_array_sum(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + /* this will be zero, we just rely on verifier not rejecting this */ + sum = percpu_arr[bpf_get_smp_processor_id()]; + + for (size_t i = 0; i < bss_array_len; ++i) + sum += array[i]; + + return 0; +} + +SEC("tp/syscalls/sys_enter_getuid") +int data_array_sum(void *ctx) +{ + if (pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + /* this will be zero, we just rely on verifier not rejecting this */ + sum = percpu_arr[bpf_get_smp_processor_id()]; + + for (size_t i = 0; i < data_array_len; ++i) + sum += my_array[i]; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c new file mode 100644 index 000000000000..67c14ba1e87b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 +const volatile int skip = 0; +#else +const volatile int skip = 1; +#endif + +volatile const short val1 = -1; +volatile const int val2 = -1; +short val3 = -1; +int val4 = -1; +int done1, done2, ret1, ret2; + +SEC("?raw_tp/sys_enter") +int rdonly_map_prog(const void *ctx) +{ + if (done1) + return 0; + + done1 = 1; + /* val1/val2 readonly map */ + if (val1 == val2) + ret1 = 1; + return 0; + +} + +SEC("?raw_tp/sys_enter") +int map_val_prog(const void *ctx) +{ + if (done2) + return 0; + + done2 = 1; + /* val1/val2 regular read/write map */ + if (val3 == val4) + ret2 = 1; + return 0; + +} + +struct bpf_testmod_struct_arg_1 { + int a; +}; + +long long int_member; + +SEC("?fentry/bpf_testmod_test_arg_ptr_to_struct") +int BPF_PROG2(test_ptr_struct_arg, struct bpf_testmod_struct_arg_1 *, p) +{ + /* probed memory access */ + int_member = p->a; + return 0; +} + +long long set_optlen, set_retval; + +SEC("?cgroup/getsockopt") +int _getsockopt(volatile struct bpf_sockopt *ctx) +{ + int old_optlen, old_retval; + + old_optlen = ctx->optlen; + old_retval = ctx->retval; + + ctx->optlen = -1; + ctx->retval = -1; + + /* sign extension for ctx member */ + set_optlen = ctx->optlen; + set_retval = ctx->retval; + + ctx->optlen = old_optlen; + ctx->retval = old_retval; + + return 0; +} + +long long set_mark; + +SEC("?tc") +int _tc(volatile struct __sk_buff *skb) +{ + long long tmp_mark; + int old_mark; + + old_mark = skb->mark; + + skb->mark = 0xf6fe; + + /* narrowed sign extension for ctx member */ +#if __clang_major__ >= 18 + /* force narrow one-byte signed load. Otherwise, compiler may + * generate a 32-bit unsigned load followed by an s8 movsx. + */ + asm volatile ("r1 = *(s8 *)(%[ctx] + %[off_mark])\n\t" + "%[tmp_mark] = r1" + : [tmp_mark]"=r"(tmp_mark) + : [ctx]"r"(skb), + [off_mark]"i"(offsetof(struct __sk_buff, mark)) + : "r1"); +#else + tmp_mark = (char)skb->mark; +#endif + set_mark = tmp_mark; + + skb->mark = old_mark; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c new file mode 100644 index 000000000000..8c895122f293 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include <linux/ip.h> +#include "bpf_tracing_net.h" + +/* We don't care about whether the packet can be received by network stack. + * Just care if the packet is sent to the correct device at correct direction + * and not panic the kernel. + */ +static int prepend_dummy_mac(struct __sk_buff *skb) +{ + char mac[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xf, + 0xe, 0xd, 0xc, 0xb, 0xa, 0x08, 0x00}; + + if (bpf_skb_change_head(skb, ETH_HLEN, 0)) + return -1; + + if (bpf_skb_store_bytes(skb, 0, mac, sizeof(mac), 0)) + return -1; + + return 0; +} + +/* Use the last byte of IP address to redirect the packet */ +static int get_redirect_target(struct __sk_buff *skb) +{ + struct iphdr *iph = NULL; + void *start = (void *)(long)skb->data; + void *end = (void *)(long)skb->data_end; + + if (start + sizeof(*iph) > end) + return -1; + + iph = (struct iphdr *)start; + return bpf_ntohl(iph->daddr) & 0xff; +} + +SEC("redir_ingress") +int test_lwt_redirect_in(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + if (prepend_dummy_mac(skb)) + return BPF_DROP; + + return bpf_redirect(target, BPF_F_INGRESS); +} + +SEC("redir_egress") +int test_lwt_redirect_out(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + if (prepend_dummy_mac(skb)) + return BPF_DROP; + + return bpf_redirect(target, 0); +} + +SEC("redir_egress_nomac") +int test_lwt_redirect_out_nomac(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + return bpf_redirect(target, 0); +} + +SEC("redir_ingress_nomac") +int test_lwt_redirect_in_nomac(struct __sk_buff *skb) +{ + int target = get_redirect_target(skb); + + if (target < 0) + return BPF_OK; + + return bpf_redirect(target, BPF_F_INGRESS); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_lwt_reroute.c b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c new file mode 100644 index 000000000000..1dc64351929c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <linux/bpf.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/ip.h> + +/* This function extracts the last byte of the daddr, and uses it + * as output dev index. + */ +SEC("lwt_xmit") +int test_lwt_reroute(struct __sk_buff *skb) +{ + struct iphdr *iph = NULL; + void *start = (void *)(long)skb->data; + void *end = (void *)(long)skb->data_end; + + /* set mark at most once */ + if (skb->mark != 0) + return BPF_OK; + + if (start + sizeof(*iph) > end) + return BPF_DROP; + + iph = (struct iphdr *)start; + skb->mark = bpf_ntohl(iph->daddr) & 0xff; + + /* do not reroute x.x.x.0 packets */ + if (skb->mark == 0) + return BPF_OK; + + return BPF_LWT_REROUTE; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c new file mode 100644 index 000000000000..03a475160abe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +#define NF_ACCEPT 1 + +SEC("netfilter") +int nf_link_attach_test(struct bpf_nf_ctx *ctx) +{ + return NF_ACCEPT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c new file mode 100644 index 000000000000..4bdd65b5aa2d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */ + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> + +char tp_name[128]; + +SEC("lsm/bpf") +int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size) +{ + switch (cmd) { + case BPF_RAW_TRACEPOINT_OPEN: + bpf_probe_read_user_str(tp_name, sizeof(tp_name) - 1, + (void *)attr->raw_tracepoint.name); + break; + default: + break; + } + return 0; +} + +SEC("raw_tracepoint") +int BPF_PROG(raw_tp_run) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index bbad3c2d9aa5..f75e531bf36f 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -265,7 +265,10 @@ static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk) static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk) { - __u16 *half = (__u16 *)&sk->dst_port; + __u16 *half; + + asm volatile (""); + half = (__u16 *)&sk->dst_port; return half[0] == bpf_htons(0xcafe); } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c index 325c9f193432..464d35bd57c7 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c @@ -28,12 +28,26 @@ struct { __type(value, unsigned int); } verdict_map SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} parser_map SEC(".maps"); + bool test_sockmap = false; /* toggled by user-space */ bool test_ingress = false; /* toggled by user-space */ SEC("sk_skb/stream_parser") int prog_stream_parser(struct __sk_buff *skb) { + int *value; + __u32 key = 0; + + value = bpf_map_lookup_elem(&parser_map, &key); + if (value && *value) + return *value; + return skb->len; } diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c new file mode 100644 index 000000000000..56cdc0a553f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Bytedance */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym; +long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym; +void bpf_cgroup_release(struct cgroup *p) __ksym; +struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; + +const volatile int local_pid; +const volatile __u64 cgid; +int remote_pid; + +SEC("tp_btf/task_newtask") +int BPF_PROG(handle__task_newtask, struct task_struct *task, u64 clone_flags) +{ + struct cgroup *cgrp = NULL; + struct task_struct *acquired; + + if (local_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + acquired = bpf_task_acquire(task); + if (!acquired) + return 0; + + if (local_pid == acquired->tgid) + goto out; + + cgrp = bpf_cgroup_from_id(cgid); + if (!cgrp) + goto out; + + if (bpf_task_under_cgroup(acquired, cgrp)) + remote_pid = acquired->tgid; + +out: + if (cgrp) + bpf_cgroup_release(cgrp); + bpf_task_release(acquired); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c index d28ca8d1f3d0..ef7da419632a 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c +++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c @@ -2,6 +2,8 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <linux/if_ether.h> +#include <linux/ip.h> /* Dummy prog to test TC-BPF API */ @@ -10,3 +12,14 @@ int cls(struct __sk_buff *skb) { return 0; } + +/* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */ +SEC("tcx/ingress") +int pkt_ptr(struct __sk_buff *skb) +{ + struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr); + + if ((long)(iph + 1) > (long)skb->data_end) + return 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c new file mode 100644 index 000000000000..30e7124c49a1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Isovalent */ +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +char LICENSE[] SEC("license") = "GPL"; + +bool seen_tc1; +bool seen_tc2; +bool seen_tc3; +bool seen_tc4; +bool seen_tc5; +bool seen_tc6; + +SEC("tc/ingress") +int tc1(struct __sk_buff *skb) +{ + seen_tc1 = true; + return TCX_NEXT; +} + +SEC("tc/egress") +int tc2(struct __sk_buff *skb) +{ + seen_tc2 = true; + return TCX_NEXT; +} + +SEC("tc/egress") +int tc3(struct __sk_buff *skb) +{ + seen_tc3 = true; + return TCX_NEXT; +} + +SEC("tc/egress") +int tc4(struct __sk_buff *skb) +{ + seen_tc4 = true; + return TCX_NEXT; +} + +SEC("tc/egress") +int tc5(struct __sk_buff *skb) +{ + seen_tc5 = true; + return TCX_PASS; +} + +SEC("tc/egress") +int tc6(struct __sk_buff *skb) +{ + seen_tc6 = true; + return TCX_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c b/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c new file mode 100644 index 000000000000..2ff1b596e87e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Leon Hwang */ + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define ERRMSG_LEN 64 + +struct xdp_errmsg { + char msg[ERRMSG_LEN]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, int); +} xdp_errmsg_pb SEC(".maps"); + +struct xdp_attach_error_ctx { + unsigned long unused; + + /* + * bpf does not support tracepoint __data_loc directly. + * + * Actually, this field is a 32 bit integer whose value encodes + * information on where to find the actual data. The first 2 bytes is + * the size of the data. The last 2 bytes is the offset from the start + * of the tracepoint struct where the data begins. + * -- https://github.com/iovisor/bpftrace/pull/1542 + */ + __u32 msg; // __data_loc char[] msg; +}; + +/* + * Catch the error message at the tracepoint. + */ + +SEC("tp/xdp/bpf_xdp_link_attach_failed") +int tp__xdp__bpf_xdp_link_attach_failed(struct xdp_attach_error_ctx *ctx) +{ + char *msg = (void *)(__u64) ((void *) ctx + (__u16) ctx->msg); + struct xdp_errmsg errmsg = {}; + + bpf_probe_read_kernel_str(&errmsg.msg, ERRMSG_LEN, msg); + bpf_perf_event_output(ctx, &xdp_errmsg_pb, BPF_F_CURRENT_CPU, &errmsg, + ERRMSG_LEN); + return 0; +} + +/* + * Reuse the XDP program in xdp_dummy.c. + */ + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 25ee4a22e48d..78c368e71797 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -2,6 +2,7 @@ /* Copyright (c) 2022 Meta */ #include <stddef.h> #include <string.h> +#include <stdbool.h> #include <linux/bpf.h> #include <linux/if_ether.h> #include <linux/if_packet.h> diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c new file mode 100644 index 000000000000..226d33b5a05c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <linux/bpf.h> +#include <time.h> +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_tcp_helpers.h" + +char _license[] SEC("license") = "GPL"; + +struct elem { + struct bpf_timer t; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} timer_map SEC(".maps"); + +static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +{ + if (bpf_get_smp_processor_id() % 2) + return 1; + else + return 0; +} + +SEC("fentry/bpf_fentry_test1") +__failure __msg("should have been in (0x0; 0x0)") +int BPF_PROG2(test_ret_1, int, a) +{ + int key = 0; + struct bpf_timer *timer; + + timer = bpf_map_lookup_elem(&timer_map, &key); + if (timer) { + bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); + bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_start(timer, 1000, 0); + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c index c435a3a8328a..515daef3c84b 100644 --- a/tools/testing/selftests/bpf/progs/tracing_struct.c +++ b/tools/testing/selftests/bpf/progs/tracing_struct.c @@ -18,6 +18,11 @@ struct bpf_testmod_struct_arg_3 { int b[]; }; +struct bpf_testmod_struct_arg_4 { + u64 a; + int b; +}; + long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs; __u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3; long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret; @@ -25,6 +30,9 @@ long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret; long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret; long t5_ret; int t6; +long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret; +long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret; + SEC("fentry/bpf_testmod_test_struct_arg_1") int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c) @@ -130,4 +138,50 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a) return 0; } +SEC("fentry/bpf_testmod_test_struct_arg_7") +int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f) +{ + t7_a = a; + t7_b = (long)b; + t7_c = c; + t7_d = d; + t7_e = (long)e; + t7_f_a = f.a; + t7_f_b = f.b; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_7") +int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, ret) +{ + t7_ret = ret; + return 0; +} + +SEC("fentry/bpf_testmod_test_struct_arg_8") +int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, g) +{ + t8_a = a; + t8_b = (long)b; + t8_c = c; + t8_d = d; + t8_e = (long)e; + t8_f_a = f.a; + t8_f_b = f.b; + t8_g = g; + return 0; +} + +SEC("fexit/bpf_testmod_test_struct_arg_8") +int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d, + void *, e, struct bpf_testmod_struct_arg_4, f, int, g, + int, ret) +{ + t8_ret = ret; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c new file mode 100644 index 000000000000..419d9aa28fce --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> + +char _license[] SEC("license") = "GPL"; + +__u64 uprobe_multi_func_1_addr = 0; +__u64 uprobe_multi_func_2_addr = 0; +__u64 uprobe_multi_func_3_addr = 0; + +__u64 uprobe_multi_func_1_result = 0; +__u64 uprobe_multi_func_2_result = 0; +__u64 uprobe_multi_func_3_result = 0; + +__u64 uretprobe_multi_func_1_result = 0; +__u64 uretprobe_multi_func_2_result = 0; +__u64 uretprobe_multi_func_3_result = 0; + +__u64 uprobe_multi_sleep_result = 0; + +int pid = 0; +int child_pid = 0; + +bool test_cookie = false; +void *user_ptr = 0; + +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep) +{ + child_pid = bpf_get_current_pid_tgid() >> 32; + + if (pid && child_pid != pid) + return; + + __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0; + __u64 addr = bpf_get_func_ip(ctx); + +#define SET(__var, __addr, __cookie) ({ \ + if (addr == __addr && \ + (!test_cookie || (cookie == __cookie))) \ + __var += 1; \ +}) + + if (is_return) { + SET(uretprobe_multi_func_1_result, uprobe_multi_func_1_addr, 2); + SET(uretprobe_multi_func_2_result, uprobe_multi_func_2_addr, 3); + SET(uretprobe_multi_func_3_result, uprobe_multi_func_3_addr, 1); + } else { + SET(uprobe_multi_func_1_result, uprobe_multi_func_1_addr, 3); + SET(uprobe_multi_func_2_result, uprobe_multi_func_2_addr, 1); + SET(uprobe_multi_func_3_result, uprobe_multi_func_3_addr, 2); + } + +#undef SET + + if (is_sleep && verify_sleepable_user_copy()) + uprobe_multi_sleep_result += 1; +} + +SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") +int uprobe(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, false, false); + return 0; +} + +SEC("uretprobe.multi//proc/self/exe:uprobe_multi_func_*") +int uretprobe(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, true, false); + return 0; +} + +SEC("uprobe.multi.s//proc/self/exe:uprobe_multi_func_*") +int uprobe_sleep(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, false, true); + return 0; +} + +SEC("uretprobe.multi.s//proc/self/exe:uprobe_multi_func_*") +int uretprobe_sleep(struct pt_regs *ctx) +{ + uprobe_multi_check(ctx, true, true); + return 0; +} + +SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*") +int uprobe_extra(struct pt_regs *ctx) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c new file mode 100644 index 000000000000..5367f6105e30 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int count; + +SEC("uprobe.multi/./uprobe_multi:uprobe_multi_func_*") +int uprobe_bench(struct pt_regs *ctx) +{ + count++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c new file mode 100644 index 000000000000..9e1c33d0bd2f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/usdt.bpf.h> + +char _license[] SEC("license") = "GPL"; + +int count; + +SEC("usdt") +int usdt0(struct pt_regs *ctx) +{ + count++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c new file mode 100644 index 000000000000..8893094725f0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + +SEC("socket") +__description("BSWAP, 16") +__success __success_unpriv __retval(0x23ff) +__naked void bswap_16(void) +{ + asm volatile (" \ + r0 = 0xff23; \ + r0 = bswap16 r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("BSWAP, 32") +__success __success_unpriv __retval(0x23ff0000) +__naked void bswap_32(void) +{ + asm volatile (" \ + r0 = 0xff23; \ + r0 = bswap32 r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("BSWAP, 64") +__success __success_unpriv __retval(0x34ff12ff) +__naked void bswap_64(void) +{ + asm volatile (" \ + r0 = %[u64_val] ll; \ + r0 = bswap64 r0; \ + exit; \ +" : + : [u64_val]"i"(0xff12ff34ff56ff78ull) + : __clobber_all); +} + +#else + +SEC("socket") +__description("cpuv4 is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c new file mode 100644 index 000000000000..2dae5322a18e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + +SEC("socket") +__description("gotol, small_imm") +__success __success_unpriv __retval(1) +__naked void gotol_small_imm(void) +{ + asm volatile (" \ + call %[bpf_ktime_get_ns]; \ + if r0 == 0 goto l0_%=; \ + gotol l1_%=; \ +l2_%=: \ + gotol l3_%=; \ +l1_%=: \ + r0 = 1; \ + gotol l2_%=; \ +l0_%=: \ + r0 = 2; \ +l3_%=: \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +#else + +SEC("socket") +__description("cpuv4 is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c new file mode 100644 index 000000000000..0c638f45aaf1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + +SEC("socket") +__description("LDSX, S8") +__success __success_unpriv __retval(-2) +__naked void ldsx_s8(void) +{ + asm volatile (" \ + r1 = 0x3fe; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = *(s8 *)(r10 - 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("LDSX, S16") +__success __success_unpriv __retval(-2) +__naked void ldsx_s16(void) +{ + asm volatile (" \ + r1 = 0x3fffe; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = *(s16 *)(r10 - 8); \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("LDSX, S32") +__success __success_unpriv __retval(-1) +__naked void ldsx_s32(void) +{ + asm volatile (" \ + r1 = 0xfffffffe; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = *(s32 *)(r10 - 8); \ + r0 >>= 1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("LDSX, S8 range checking, privileged") +__log_level(2) __success __retval(1) +__msg("R1_w=scalar(smin=-128,smax=127)") +__naked void ldsx_s8_range_priv(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + *(u64 *)(r10 - 8) = r0; \ + r1 = *(s8 *)(r10 - 8); \ + /* r1 with s8 range */ \ + if r1 s> 0x7f goto l0_%=; \ + if r1 s< -0x80 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("LDSX, S16 range checking") +__success __success_unpriv __retval(1) +__naked void ldsx_s16_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + *(u64 *)(r10 - 8) = r0; \ + r1 = *(s16 *)(r10 - 8); \ + /* r1 with s16 range */ \ + if r1 s> 0x7fff goto l0_%=; \ + if r1 s< -0x8000 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("LDSX, S32 range checking") +__success __success_unpriv __retval(1) +__naked void ldsx_s32_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + *(u64 *)(r10 - 8) = r0; \ + r1 = *(s32 *)(r10 - 8); \ + /* r1 with s16 range */ \ + if r1 s> 0x7fffFFFF goto l0_%=; \ + if r1 s< -0x80000000 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +#else + +SEC("socket") +__description("cpuv4 is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c new file mode 100644 index 000000000000..3c8ac2c57b1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + +SEC("socket") +__description("MOV32SX, S8") +__success __success_unpriv __retval(0x23) +__naked void mov32sx_s8(void) +{ + asm volatile (" \ + w0 = 0xff23; \ + w0 = (s8)w0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S16") +__success __success_unpriv __retval(0xFFFFff23) +__naked void mov32sx_s16(void) +{ + asm volatile (" \ + w0 = 0xff23; \ + w0 = (s16)w0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S8") +__success __success_unpriv __retval(-2) +__naked void mov64sx_s8(void) +{ + asm volatile (" \ + r0 = 0x1fe; \ + r0 = (s8)r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S16") +__success __success_unpriv __retval(0xf23) +__naked void mov64sx_s16(void) +{ + asm volatile (" \ + r0 = 0xf0f23; \ + r0 = (s16)r0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S32") +__success __success_unpriv __retval(-1) +__naked void mov64sx_s32(void) +{ + asm volatile (" \ + r0 = 0xfffffffe; \ + r0 = (s32)r0; \ + r0 >>= 1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, range_check") +__success __success_unpriv __retval(1) +__naked void mov32sx_s8_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w1 = (s8)w0; \ + /* w1 with s8 range */ \ + if w1 s> 0x7f goto l0_%=; \ + if w1 s< -0x80 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S16, range_check") +__success __success_unpriv __retval(1) +__naked void mov32sx_s16_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w1 = (s16)w0; \ + /* w1 with s16 range */ \ + if w1 s> 0x7fff goto l0_%=; \ + if w1 s< -0x80ff goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S16, range_check 2") +__success __success_unpriv __retval(1) +__naked void mov32sx_s16_range_2(void) +{ + asm volatile (" \ + r1 = 65535; \ + w2 = (s16)w1; \ + r2 >>= 1; \ + if r2 != 0x7fffFFFF goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 0; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S8, range_check") +__success __success_unpriv __retval(1) +__naked void mov64sx_s8_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = (s8)r0; \ + /* r1 with s8 range */ \ + if r1 s> 0x7f goto l0_%=; \ + if r1 s< -0x80 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S16, range_check") +__success __success_unpriv __retval(1) +__naked void mov64sx_s16_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = (s16)r0; \ + /* r1 with s16 range */ \ + if r1 s> 0x7fff goto l0_%=; \ + if r1 s< -0x8000 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S32, range_check") +__success __success_unpriv __retval(1) +__naked void mov64sx_s32_range(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = (s32)r0; \ + /* r1 with s32 range */ \ + if r1 s> 0x7fffffff goto l0_%=; \ + if r1 s< -0x80000000 goto l0_%=; \ + r0 = 1; \ +l1_%=: \ + exit; \ +l0_%=: \ + r0 = 2; \ + goto l1_%=; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV64SX, S16, R10 Sign Extension") +__failure __msg("R1 type=scalar expected=fp, pkt, pkt_meta, map_key, map_value, mem, ringbuf_mem, buf, trusted_ptr_") +__failure_unpriv __msg_unpriv("R10 sign-extension part of pointer") +__naked void mov64sx_s16_r10(void) +{ + asm volatile (" \ + r1 = 553656332; \ + *(u32 *)(r10 - 8) = r1; \ + r1 = (s16)r10; \ + r1 += -8; \ + r2 = 3; \ + if r2 <= r1 goto l0_%=; \ +l0_%=: \ + call %[bpf_trace_printk]; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_trace_printk) + : __clobber_all); +} + +#else + +SEC("socket") +__description("cpuv4 is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c new file mode 100644 index 000000000000..13b29a7faa71 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -0,0 +1,659 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +/* Check that precision marks propagate through scalar IDs. + * Registers r{0,1,2} have the same scalar ID at the moment when r0 is + * marked to be precise, this mark is immediately propagated to r{1,2}. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: regs=r0,r1,r2 stack= before 4: (bf) r3 = r10") +__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_same_state(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as precision_same_state, but mark propagates through state / + * parent state boundary. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: last_idx 6 first_idx 5 subseq_idx -1") +__msg("frame0: regs=r0,r1,r2 stack= before 5: (bf) r3 = r10") +__msg("frame0: parent state regs=r0,r1,r2 stack=:") +__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0") +__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: parent state regs=r0 stack=:") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_cross_state(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* force checkpoint */ + "goto +0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as precision_same_state, but break one of the + * links, note that r1 is absent from regs=... in __msg below. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: regs=r0,r2 stack= before 5: (bf) r3 = r10") +__msg("frame0: regs=r0,r2 stack= before 4: (b7) r1 = 0") +__msg("frame0: regs=r0,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_same_state_broken_link(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* break link for r1, this is the only line that differs + * compared to the previous test + */ + "r1 = 0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as precision_same_state_broken_link, but with state / + * parent state boundary. + */ +SEC("socket") +__success __log_level(2) +__msg("frame0: regs=r0,r2 stack= before 6: (bf) r3 = r10") +__msg("frame0: regs=r0,r2 stack= before 5: (b7) r1 = 0") +__msg("frame0: parent state regs=r0,r2 stack=:") +__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0") +__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__msg("frame0: parent state regs=r0 stack=:") +__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_cross_state_broken_link(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r2.id */ + "r1 = r0;" + "r2 = r0;" + /* force checkpoint, although link between r1 and r{0,2} is + * broken by the next statement current precision tracking + * algorithm can't react to it and propagates mark for r1 to + * the parent state. + */ + "goto +0;" + /* break link for r1, this is the only line that differs + * compared to precision_cross_state() + */ + "r1 = 0;" + /* force r0 to be precise, this immediately marks r1 and r2 as + * precise as well because of shared IDs + */ + "r3 = r10;" + "r3 += r0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that precision marks propagate through scalar IDs. + * Use the same scalar ID in multiple stack frames, check that + * precision information is propagated up the call stack. + */ +SEC("socket") +__success __log_level(2) +__msg("11: (0f) r2 += r1") +/* Current state */ +__msg("frame2: last_idx 11 first_idx 10 subseq_idx -1") +__msg("frame2: regs=r1 stack= before 10: (bf) r2 = r10") +__msg("frame2: parent state regs=r1 stack=") +/* frame1.r{6,7} are marked because mark_precise_scalar_ids() + * looks for all registers with frame2.r1.id in the current state + */ +__msg("frame1: parent state regs=r6,r7 stack=") +__msg("frame0: parent state regs=r6 stack=") +/* Parent state */ +__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10") +__msg("frame2: regs=r1 stack= before 8: (85) call pc+1") +/* frame1.r1 is marked because of backtracking of call instruction */ +__msg("frame1: parent state regs=r1,r6,r7 stack=") +__msg("frame0: parent state regs=r6 stack=") +/* Parent state */ +__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8") +__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1") +__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1") +__msg("frame1: parent state regs=r1 stack=") +__msg("frame0: parent state regs=r6 stack=") +/* Parent state */ +__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6") +__msg("frame1: regs=r1 stack= before 4: (85) call pc+1") +__msg("frame0: parent state regs=r1,r6 stack=") +/* Parent state */ +__msg("frame0: last_idx 3 first_idx 1 subseq_idx 4") +__msg("frame0: regs=r0,r1,r6 stack= before 3: (bf) r6 = r0") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_many_frames(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == r6.id */ + "r1 = r0;" + "r6 = r0;" + "call precision_many_frames__foo;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +static __naked __noinline __used +void precision_many_frames__foo(void) +{ + asm volatile ( + /* conflate one of the register numbers (r6) with outer frame, + * to verify that those are tracked independently + */ + "r6 = r1;" + "r7 = r1;" + "call precision_many_frames__bar;" + "exit" + ::: __clobber_all); +} + +static __naked __noinline __used +void precision_many_frames__bar(void) +{ + asm volatile ( + /* force r1 to be precise, this immediately marks: + * - bar frame r1 + * - foo frame r{1,6,7} + * - main frame r{1,6} + */ + "r2 = r10;" + "r2 += r1;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} + +/* Check that scalars with the same IDs are marked precise on stack as + * well as in registers. + */ +SEC("socket") +__success __log_level(2) +/* foo frame */ +__msg("frame1: regs=r1 stack=-8,-16 before 9: (bf) r2 = r10") +__msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1") +__msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1") +__msg("frame1: regs=r1 stack= before 4: (85) call pc+2") +/* main frame */ +__msg("frame0: regs=r0,r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1") +__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0") +__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_stack(void) +{ + asm volatile ( + /* r0 = random number up to 0xff */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* tie r0.id == r1.id == fp[-8].id */ + "r1 = r0;" + "*(u64*)(r10 - 8) = r1;" + "call precision_stack__foo;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +static __naked __noinline __used +void precision_stack__foo(void) +{ + asm volatile ( + /* conflate one of the register numbers (r6) with outer frame, + * to verify that those are tracked independently + */ + "*(u64*)(r10 - 8) = r1;" + "*(u64*)(r10 - 16) = r1;" + /* force r1 to be precise, this immediately marks: + * - foo frame r1,fp{-8,-16} + * - main frame r1,fp{-8} + */ + "r2 = r10;" + "r2 += r1;" + "exit" + ::: __clobber_all); +} + +/* Use two separate scalar IDs to check that these are propagated + * independently. + */ +SEC("socket") +__success __log_level(2) +/* r{6,7} */ +__msg("11: (0f) r3 += r7") +__msg("frame0: regs=r6,r7 stack= before 10: (bf) r3 = r10") +/* ... skip some insns ... */ +__msg("frame0: regs=r6,r7 stack= before 3: (bf) r7 = r0") +__msg("frame0: regs=r0,r6 stack= before 2: (bf) r6 = r0") +/* r{8,9} */ +__msg("12: (0f) r3 += r9") +__msg("frame0: regs=r8,r9 stack= before 11: (0f) r3 += r7") +/* ... skip some insns ... */ +__msg("frame0: regs=r8,r9 stack= before 7: (bf) r9 = r0") +__msg("frame0: regs=r0,r8 stack= before 6: (bf) r8 = r0") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void precision_two_ids(void) +{ + asm volatile ( + /* r6 = random number up to 0xff + * r6.id == r7.id + */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r6 = r0;" + "r7 = r0;" + /* same, but for r{8,9} */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r8 = r0;" + "r9 = r0;" + /* clear r0 id */ + "r0 = 0;" + /* force checkpoint */ + "goto +0;" + "r3 = r10;" + /* force r7 to be precise, this also marks r6 */ + "r3 += r7;" + /* force r9 to be precise, this also marks r8 */ + "r3 += r9;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Verify that check_ids() is used by regsafe() for scalars. + * + * r9 = ... some pointer with range X ... + * r6 = ... unbound scalar ID=a ... + * r7 = ... unbound scalar ID=b ... + * if (r6 > r7) goto +1 + * r7 = r6 + * if (r7 > X) goto exit + * r9 += r6 + * ... access memory using r9 ... + * + * The memory access is safe only if r7 is bounded, + * which is true for one branch and not true for another. + */ +SEC("socket") +__failure __msg("register with unbounded min value") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void check_ids_in_regsafe(void) +{ + asm volatile ( + /* Bump allocated stack */ + "r1 = 0;" + "*(u64*)(r10 - 8) = r1;" + /* r9 = pointer to stack */ + "r9 = r10;" + "r9 += -8;" + /* r7 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r7 = r0;" + /* r6 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + /* if r6 > r7 is an unpredictable jump */ + "if r6 > r7 goto l1_%=;" + "r7 = r6;" +"l1_%=:" + /* if r7 > 4 ...; transfers range to r6 on one execution path + * but does not transfer on another + */ + "if r7 > 4 goto l2_%=;" + /* Access memory at r9[r6], r6 is not always bounded */ + "r9 += r6;" + "r0 = *(u8*)(r9 + 0);" +"l2_%=:" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Similar to check_ids_in_regsafe. + * The l0 could be reached in two states: + * + * (1) r6{.id=A}, r7{.id=A}, r8{.id=B} + * (2) r6{.id=B}, r7{.id=A}, r8{.id=B} + * + * Where (2) is not safe, as "r7 > 4" check won't propagate range for it. + * This example would be considered safe without changes to + * mark_chain_precision() to track scalar values with equal IDs. + */ +SEC("socket") +__failure __msg("register with unbounded min value") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void check_ids_in_regsafe_2(void) +{ + asm volatile ( + /* Bump allocated stack */ + "r1 = 0;" + "*(u64*)(r10 - 8) = r1;" + /* r9 = pointer to stack */ + "r9 = r10;" + "r9 += -8;" + /* r8 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r8 = r0;" + /* r7 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r7 = r0;" + /* r6 = ktime_get_ns() */ + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + /* scratch .id from r0 */ + "r0 = 0;" + /* if r6 > r7 is an unpredictable jump */ + "if r6 > r7 goto l1_%=;" + /* tie r6 and r7 .id */ + "r6 = r7;" +"l0_%=:" + /* if r7 > 4 exit(0) */ + "if r7 > 4 goto l2_%=;" + /* Access memory at r9[r6] */ + "r9 += r6;" + "r0 = *(u8*)(r9 + 0);" +"l2_%=:" + "r0 = 0;" + "exit;" +"l1_%=:" + /* tie r6 and r8 .id */ + "r6 = r8;" + "goto l0_%=;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that scalar IDs *are not* generated on register to register + * assignments if source register is a constant. + * + * If such IDs *are* generated the 'l1' below would be reached in + * two states: + * + * (1) r1{.id=A}, r2{.id=A} + * (2) r1{.id=C}, r2{.id=C} + * + * Thus forcing 'if r1 == r2' verification twice. + */ +SEC("socket") +__success __log_level(2) +__msg("11: (1d) if r3 == r4 goto pc+0") +__msg("frame 0: propagating r3,r4") +__msg("11: safe") +__msg("processed 15 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void no_scalar_id_for_const(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* unpredictable jump */ + "if r0 > 7 goto l0_%=;" + /* possibly generate same scalar ids for r3 and r4 */ + "r1 = 0;" + "r1 = r1;" + "r3 = r1;" + "r4 = r1;" + "goto l1_%=;" +"l0_%=:" + /* possibly generate different scalar ids for r3 and r4 */ + "r1 = 0;" + "r2 = 0;" + "r3 = r1;" + "r4 = r2;" +"l1_%=:" + /* predictable jump, marks r3 and r4 precise */ + "if r3 == r4 goto +0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Same as no_scalar_id_for_const() but for 32-bit values */ +SEC("socket") +__success __log_level(2) +__msg("11: (1e) if w3 == w4 goto pc+0") +__msg("frame 0: propagating r3,r4") +__msg("11: safe") +__msg("processed 15 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void no_scalar_id_for_const32(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* unpredictable jump */ + "if r0 > 7 goto l0_%=;" + /* possibly generate same scalar ids for r3 and r4 */ + "w1 = 0;" + "w1 = w1;" + "w3 = w1;" + "w4 = w1;" + "goto l1_%=;" +"l0_%=:" + /* possibly generate different scalar ids for r3 and r4 */ + "w1 = 0;" + "w2 = 0;" + "w3 = w1;" + "w4 = w2;" +"l1_%=:" + /* predictable jump, marks r1 and r2 precise */ + "if w3 == w4 goto +0;" + "r0 = 0;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that unique scalar IDs are ignored when new verifier state is + * compared to cached verifier state. For this test: + * - cached state has no id on r1 + * - new state has a unique id on r1 + */ +SEC("socket") +__success __log_level(2) +__msg("6: (25) if r6 > 0x7 goto pc+1") +__msg("7: (57) r1 &= 255") +__msg("8: (bf) r2 = r10") +__msg("from 6 to 8: safe") +__msg("processed 12 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void ignore_unique_scalar_ids_cur(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* r1.id == r0.id */ + "r1 = r0;" + /* make r1.id unique */ + "r0 = 0;" + "if r6 > 7 goto l0_%=;" + /* clear r1 id, but keep the range compatible */ + "r1 &= 0xff;" +"l0_%=:" + /* get here in two states: + * - first: r1 has no id (cached state) + * - second: r1 has a unique id (should be considered equivalent) + */ + "r2 = r10;" + "r2 += r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that unique scalar IDs are ignored when new verifier state is + * compared to cached verifier state. For this test: + * - cached state has a unique id on r1 + * - new state has no id on r1 + */ +SEC("socket") +__success __log_level(2) +__msg("6: (25) if r6 > 0x7 goto pc+1") +__msg("7: (05) goto pc+1") +__msg("9: (bf) r2 = r10") +__msg("9: safe") +__msg("processed 13 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void ignore_unique_scalar_ids_old(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + /* r1.id == r0.id */ + "r1 = r0;" + /* make r1.id unique */ + "r0 = 0;" + "if r6 > 7 goto l1_%=;" + "goto l0_%=;" +"l1_%=:" + /* clear r1 id, but keep the range compatible */ + "r1 &= 0xff;" +"l0_%=:" + /* get here in two states: + * - first: r1 has a unique id (cached state) + * - second: r1 has no id (should be considered equivalent) + */ + "r2 = r10;" + "r2 += r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +/* Check that two different scalar IDs in a verified state can't be + * mapped to the same scalar ID in current state. + */ +SEC("socket") +__success __log_level(2) +/* The exit instruction should be reachable from two states, + * use two matches and "processed .. insns" to ensure this. + */ +__msg("13: (95) exit") +__msg("13: (95) exit") +__msg("processed 18 insns") +__flag(BPF_F_TEST_STATE_FREQ) +__naked void two_old_ids_one_cur_id(void) +{ + asm volatile ( + /* Give unique scalar IDs to r{6,7} */ + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r6 = r0;" + "call %[bpf_ktime_get_ns];" + "r0 &= 0xff;" + "r7 = r0;" + "r0 = 0;" + /* Maybe make r{6,7} IDs identical */ + "if r6 > r7 goto l0_%=;" + "goto l1_%=;" +"l0_%=:" + "r6 = r7;" +"l1_%=:" + /* Mark r{6,7} precise. + * Get here in two states: + * - first: r6{.id=A}, r7{.id=B} (cached state) + * - second: r6{.id=A}, r7{.id=A} + * Currently we don't want to consider such states equivalent. + * Thus "exit;" would be verified twice. + */ + "r2 = r10;" + "r2 += r6;" + "r2 += r7;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c new file mode 100644 index 000000000000..0990f8825675 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c @@ -0,0 +1,782 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) && __clang_major__ >= 18 + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 1") +__success __success_unpriv __retval(-20) +__naked void sdiv32_non_zero_imm_1(void) +{ + asm volatile (" \ + w0 = -41; \ + w0 s/= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 2") +__success __success_unpriv __retval(-20) +__naked void sdiv32_non_zero_imm_2(void) +{ + asm volatile (" \ + w0 = 41; \ + w0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 3") +__success __success_unpriv __retval(20) +__naked void sdiv32_non_zero_imm_3(void) +{ + asm volatile (" \ + w0 = -41; \ + w0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 4") +__success __success_unpriv __retval(-21) +__naked void sdiv32_non_zero_imm_4(void) +{ + asm volatile (" \ + w0 = -42; \ + w0 s/= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 5") +__success __success_unpriv __retval(-21) +__naked void sdiv32_non_zero_imm_5(void) +{ + asm volatile (" \ + w0 = 42; \ + w0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 6") +__success __success_unpriv __retval(21) +__naked void sdiv32_non_zero_imm_6(void) +{ + asm volatile (" \ + w0 = -42; \ + w0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 7") +__success __success_unpriv __retval(21) +__naked void sdiv32_non_zero_imm_7(void) +{ + asm volatile (" \ + w0 = 42; \ + w0 s/= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero imm divisor, check 8") +__success __success_unpriv __retval(20) +__naked void sdiv32_non_zero_imm_8(void) +{ + asm volatile (" \ + w0 = 41; \ + w0 s/= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 1") +__success __success_unpriv __retval(-20) +__naked void sdiv32_non_zero_reg_1(void) +{ + asm volatile (" \ + w0 = -41; \ + w1 = 2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 2") +__success __success_unpriv __retval(-20) +__naked void sdiv32_non_zero_reg_2(void) +{ + asm volatile (" \ + w0 = 41; \ + w1 = -2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 3") +__success __success_unpriv __retval(20) +__naked void sdiv32_non_zero_reg_3(void) +{ + asm volatile (" \ + w0 = -41; \ + w1 = -2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 4") +__success __success_unpriv __retval(-21) +__naked void sdiv32_non_zero_reg_4(void) +{ + asm volatile (" \ + w0 = -42; \ + w1 = 2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 5") +__success __success_unpriv __retval(-21) +__naked void sdiv32_non_zero_reg_5(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = -2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 6") +__success __success_unpriv __retval(21) +__naked void sdiv32_non_zero_reg_6(void) +{ + asm volatile (" \ + w0 = -42; \ + w1 = -2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 7") +__success __success_unpriv __retval(21) +__naked void sdiv32_non_zero_reg_7(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, non-zero reg divisor, check 8") +__success __success_unpriv __retval(20) +__naked void sdiv32_non_zero_reg_8(void) +{ + asm volatile (" \ + w0 = 41; \ + w1 = 2; \ + w0 s/= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero imm divisor, check 1") +__success __success_unpriv __retval(-20) +__naked void sdiv64_non_zero_imm_1(void) +{ + asm volatile (" \ + r0 = -41; \ + r0 s/= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero imm divisor, check 2") +__success __success_unpriv __retval(-20) +__naked void sdiv64_non_zero_imm_2(void) +{ + asm volatile (" \ + r0 = 41; \ + r0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero imm divisor, check 3") +__success __success_unpriv __retval(20) +__naked void sdiv64_non_zero_imm_3(void) +{ + asm volatile (" \ + r0 = -41; \ + r0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero imm divisor, check 4") +__success __success_unpriv __retval(-21) +__naked void sdiv64_non_zero_imm_4(void) +{ + asm volatile (" \ + r0 = -42; \ + r0 s/= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero imm divisor, check 5") +__success __success_unpriv __retval(-21) +__naked void sdiv64_non_zero_imm_5(void) +{ + asm volatile (" \ + r0 = 42; \ + r0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero imm divisor, check 6") +__success __success_unpriv __retval(21) +__naked void sdiv64_non_zero_imm_6(void) +{ + asm volatile (" \ + r0 = -42; \ + r0 s/= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero reg divisor, check 1") +__success __success_unpriv __retval(-20) +__naked void sdiv64_non_zero_reg_1(void) +{ + asm volatile (" \ + r0 = -41; \ + r1 = 2; \ + r0 s/= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero reg divisor, check 2") +__success __success_unpriv __retval(-20) +__naked void sdiv64_non_zero_reg_2(void) +{ + asm volatile (" \ + r0 = 41; \ + r1 = -2; \ + r0 s/= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero reg divisor, check 3") +__success __success_unpriv __retval(20) +__naked void sdiv64_non_zero_reg_3(void) +{ + asm volatile (" \ + r0 = -41; \ + r1 = -2; \ + r0 s/= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero reg divisor, check 4") +__success __success_unpriv __retval(-21) +__naked void sdiv64_non_zero_reg_4(void) +{ + asm volatile (" \ + r0 = -42; \ + r1 = 2; \ + r0 s/= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero reg divisor, check 5") +__success __success_unpriv __retval(-21) +__naked void sdiv64_non_zero_reg_5(void) +{ + asm volatile (" \ + r0 = 42; \ + r1 = -2; \ + r0 s/= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, non-zero reg divisor, check 6") +__success __success_unpriv __retval(21) +__naked void sdiv64_non_zero_reg_6(void) +{ + asm volatile (" \ + r0 = -42; \ + r1 = -2; \ + r0 s/= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero imm divisor, check 1") +__success __success_unpriv __retval(-1) +__naked void smod32_non_zero_imm_1(void) +{ + asm volatile (" \ + w0 = -41; \ + w0 s%%= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero imm divisor, check 2") +__success __success_unpriv __retval(1) +__naked void smod32_non_zero_imm_2(void) +{ + asm volatile (" \ + w0 = 41; \ + w0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero imm divisor, check 3") +__success __success_unpriv __retval(-1) +__naked void smod32_non_zero_imm_3(void) +{ + asm volatile (" \ + w0 = -41; \ + w0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero imm divisor, check 4") +__success __success_unpriv __retval(0) +__naked void smod32_non_zero_imm_4(void) +{ + asm volatile (" \ + w0 = -42; \ + w0 s%%= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero imm divisor, check 5") +__success __success_unpriv __retval(0) +__naked void smod32_non_zero_imm_5(void) +{ + asm volatile (" \ + w0 = 42; \ + w0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero imm divisor, check 6") +__success __success_unpriv __retval(0) +__naked void smod32_non_zero_imm_6(void) +{ + asm volatile (" \ + w0 = -42; \ + w0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero reg divisor, check 1") +__success __success_unpriv __retval(-1) +__naked void smod32_non_zero_reg_1(void) +{ + asm volatile (" \ + w0 = -41; \ + w1 = 2; \ + w0 s%%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero reg divisor, check 2") +__success __success_unpriv __retval(1) +__naked void smod32_non_zero_reg_2(void) +{ + asm volatile (" \ + w0 = 41; \ + w1 = -2; \ + w0 s%%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero reg divisor, check 3") +__success __success_unpriv __retval(-1) +__naked void smod32_non_zero_reg_3(void) +{ + asm volatile (" \ + w0 = -41; \ + w1 = -2; \ + w0 s%%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero reg divisor, check 4") +__success __success_unpriv __retval(0) +__naked void smod32_non_zero_reg_4(void) +{ + asm volatile (" \ + w0 = -42; \ + w1 = 2; \ + w0 s%%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero reg divisor, check 5") +__success __success_unpriv __retval(0) +__naked void smod32_non_zero_reg_5(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = -2; \ + w0 s%%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, non-zero reg divisor, check 6") +__success __success_unpriv __retval(0) +__naked void smod32_non_zero_reg_6(void) +{ + asm volatile (" \ + w0 = -42; \ + w1 = -2; \ + w0 s%%= w1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 1") +__success __success_unpriv __retval(-1) +__naked void smod64_non_zero_imm_1(void) +{ + asm volatile (" \ + r0 = -41; \ + r0 s%%= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 2") +__success __success_unpriv __retval(1) +__naked void smod64_non_zero_imm_2(void) +{ + asm volatile (" \ + r0 = 41; \ + r0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 3") +__success __success_unpriv __retval(-1) +__naked void smod64_non_zero_imm_3(void) +{ + asm volatile (" \ + r0 = -41; \ + r0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 4") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_imm_4(void) +{ + asm volatile (" \ + r0 = -42; \ + r0 s%%= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 5") +__success __success_unpriv __retval(-0) +__naked void smod64_non_zero_imm_5(void) +{ + asm volatile (" \ + r0 = 42; \ + r0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 6") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_imm_6(void) +{ + asm volatile (" \ + r0 = -42; \ + r0 s%%= -2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 7") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_imm_7(void) +{ + asm volatile (" \ + r0 = 42; \ + r0 s%%= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero imm divisor, check 8") +__success __success_unpriv __retval(1) +__naked void smod64_non_zero_imm_8(void) +{ + asm volatile (" \ + r0 = 41; \ + r0 s%%= 2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 1") +__success __success_unpriv __retval(-1) +__naked void smod64_non_zero_reg_1(void) +{ + asm volatile (" \ + r0 = -41; \ + r1 = 2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 2") +__success __success_unpriv __retval(1) +__naked void smod64_non_zero_reg_2(void) +{ + asm volatile (" \ + r0 = 41; \ + r1 = -2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 3") +__success __success_unpriv __retval(-1) +__naked void smod64_non_zero_reg_3(void) +{ + asm volatile (" \ + r0 = -41; \ + r1 = -2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 4") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_reg_4(void) +{ + asm volatile (" \ + r0 = -42; \ + r1 = 2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 5") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_reg_5(void) +{ + asm volatile (" \ + r0 = 42; \ + r1 = -2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 6") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_reg_6(void) +{ + asm volatile (" \ + r0 = -42; \ + r1 = -2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 7") +__success __success_unpriv __retval(0) +__naked void smod64_non_zero_reg_7(void) +{ + asm volatile (" \ + r0 = 42; \ + r1 = 2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, non-zero reg divisor, check 8") +__success __success_unpriv __retval(1) +__naked void smod64_non_zero_reg_8(void) +{ + asm volatile (" \ + r0 = 41; \ + r1 = 2; \ + r0 s%%= r1; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV32, zero divisor") +__success __success_unpriv __retval(0) +__naked void sdiv32_zero_divisor(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 0; \ + w2 = -1; \ + w2 s/= w1; \ + w0 = w2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SDIV64, zero divisor") +__success __success_unpriv __retval(0) +__naked void sdiv64_zero_divisor(void) +{ + asm volatile (" \ + r0 = 42; \ + r1 = 0; \ + r2 = -1; \ + r2 s/= r1; \ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD32, zero divisor") +__success __success_unpriv __retval(-1) +__naked void smod32_zero_divisor(void) +{ + asm volatile (" \ + w0 = 42; \ + w1 = 0; \ + w2 = -1; \ + w2 s%%= w1; \ + w0 = w2; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("SMOD64, zero divisor") +__success __success_unpriv __retval(-1) +__naked void smod64_zero_divisor(void) +{ + asm volatile (" \ + r0 = 42; \ + r1 = 0; \ + r2 = -1; \ + r2 s%%= r1; \ + r0 = r2; \ + exit; \ +" ::: __clobber_all); +} + +#else + +SEC("socket") +__description("cpuv4 is not supported by compiler or jit, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c new file mode 100644 index 000000000000..db6b3143338b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <errno.h> +#include <string.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +int vals[] SEC(".data.vals") = {1, 2, 3, 4}; + +__naked __noinline __used +static unsigned long identity_subprog() +{ + /* the simplest *static* 64-bit identity function */ + asm volatile ( + "r0 = r1;" + "exit;" + ); +} + +__noinline __used +unsigned long global_identity_subprog(__u64 x) +{ + /* the simplest *global* 64-bit identity function */ + return x; +} + +__naked __noinline __used +static unsigned long callback_subprog() +{ + /* the simplest callback function */ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("7: (0f) r1 += r0") +__msg("mark_precise: frame0: regs=r0 stack= before 6: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+5") +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int subprog_result_precise(void) +{ + asm volatile ( + "r6 = 3;" + /* pass r6 through r1 into subprog to get it back as r0; + * this whole chain will have to be marked as precise later + */ + "r1 = r6;" + "call identity_subprog;" + /* now use subprog's returned value (which is a + * r6 -> r1 -> r0 chain), as index into vals array, forcing + * all of that to be known precisely + */ + "r0 *= 4;" + "r1 = %[vals];" + /* here r0->r1->r6 chain is forced to be precise and has to be + * propagated back to the beginning, including through the + * subprog call + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("9: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 9 first_idx 0") +__msg("mark_precise: frame0: regs=r0 stack= before 8: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 7: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (a5) if r0 < 0x4 goto pc+1") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (85) call pc+7") +__naked int global_subprog_result_precise(void) +{ + asm volatile ( + "r6 = 3;" + /* pass r6 through r1 into subprog to get it back as r0; + * given global_identity_subprog is global, precision won't + * propagate all the way back to r6 + */ + "r1 = r6;" + "call global_identity_subprog;" + /* now use subprog's returned value (which is unknown now, so + * we need to clamp it), as index into vals array, forcing r0 + * to be marked precise (with no effect on r6, though) + */ + "if r0 < %[vals_arr_sz] goto 1f;" + "r0 = %[vals_arr_sz] - 1;" + "1:" + "r0 *= 4;" + "r1 = %[vals];" + /* here r0 is forced to be precise and has to be + * propagated back to the global subprog call, but it + * shouldn't go all the way to mark r6 as precise + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals), + __imm_const(vals_arr_sz, ARRAY_SIZE(vals)) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("14: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 14 first_idx 10") +__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4") +__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0") +__msg("mark_precise: frame0: parent state regs=r0 stack=:") +__msg("mark_precise: frame0: last_idx 18 first_idx 0") +__msg("mark_precise: frame0: regs=r0 stack= before 18: (95) exit") +__naked int callback_result_precise(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and use result; r0 shouldn't propagate back to + * callback_subprog + */ + "r1 = r6;" /* nr_loops */ + "r2 = %[callback_subprog];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r6 = r0;" + "if r6 > 3 goto 1f;" + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the bpf_loop() call, but not beyond + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "1:" + "exit;" + : + : __imm_ptr(vals), + __imm_ptr(callback_subprog), + __imm(bpf_loop) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("7: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 7 first_idx 0") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 10: (bf) r0 = r1") +__msg("mark_precise: frame1: regs= stack= before 4: (85) call pc+5") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_callee_saved_reg_precise(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call identity_subprog;" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("7: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 7 first_idx 0") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (85) call pc+5") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_callee_saved_reg_precise_global(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call global_identity_subprog;" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("12: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 12 first_idx 10") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4") +__msg("mark_precise: frame0: parent state regs=r6 stack=:") +__msg("mark_precise: frame0: last_idx 16 first_idx 0") +__msg("mark_precise: frame0: regs=r6 stack= before 16: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop#181") +__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8") +__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +__naked int parent_callee_saved_reg_precise_with_callback(void) +{ + asm volatile ( + "r6 = 3;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 1;" /* nr_loops */ + "r2 = %[callback_subprog];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) callback call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals), + __imm_ptr(callback_subprog), + __imm(bpf_loop) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("9: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 6") +__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8:") +__msg("mark_precise: frame0: last_idx 13 first_idx 0") +__msg("mark_precise: frame0: regs= stack=-8 before 13: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 12: (bf) r0 = r1") +__msg("mark_precise: frame1: regs= stack= before 5: (85) call pc+6") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_stack_slot_precise(void) +{ + asm volatile ( + /* spill reg */ + "r6 = 3;" + "*(u64 *)(r10 - 8) = r6;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call identity_subprog;" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r6 = *(u64 *)(r10 - 8);" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("9: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 9 first_idx 6") +__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8:") +__msg("mark_precise: frame0: last_idx 5 first_idx 0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3") +__naked int parent_stack_slot_precise_global(void) +{ + asm volatile ( + /* spill reg */ + "r6 = 3;" + "*(u64 *)(r10 - 8) = r6;" + + /* call subprog and ignore result; we need this call only to + * complicate jump history + */ + "r1 = 0;" + "call global_identity_subprog;" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r6 = *(u64 *)(r10 - 8);" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("14: (0f) r1 += r6") +__msg("mark_precise: frame0: last_idx 14 first_idx 11") +__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4") +__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8:") +__msg("mark_precise: frame0: last_idx 18 first_idx 0") +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0") +__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181") +__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3") +__naked int parent_stack_slot_precise_with_callback(void) +{ + asm volatile ( + /* spill reg */ + "r6 = 3;" + "*(u64 *)(r10 - 8) = r6;" + + /* ensure we have callback frame in jump history */ + "r1 = r6;" /* nr_loops */ + "r2 = %[callback_subprog];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r6 = *(u64 *)(r10 - 8);" + + "r6 *= 4;" + "r1 = %[vals];" + /* here r6 is forced to be precise and has to be propagated + * back to the beginning, handling (and ignoring) subprog call + */ + "r1 += r6;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals), + __imm_ptr(callback_subprog), + __imm(bpf_loop) + : __clobber_common, "r6" + ); +} + +__noinline __used +static __u64 subprog_with_precise_arg(__u64 x) +{ + return vals[x]; /* x is forced to be precise */ +} + +SEC("?raw_tp") +__success __log_level(2) +__msg("8: (0f) r2 += r1") +__msg("mark_precise: frame1: last_idx 8 first_idx 0") +__msg("mark_precise: frame1: regs=r1 stack= before 6: (18) r2 = ") +__msg("mark_precise: frame1: regs=r1 stack= before 5: (67) r1 <<= 2") +__msg("mark_precise: frame1: regs=r1 stack= before 2: (85) call pc+2") +__msg("mark_precise: frame0: regs=r1 stack= before 1: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 0: (b7) r6 = 3") +__naked int subprog_arg_precise(void) +{ + asm volatile ( + "r6 = 3;" + "r1 = r6;" + /* subprog_with_precise_arg expects its argument to be + * precise, so r1->r6 will be marked precise from inside the + * subprog + */ + "call subprog_with_precise_arg;" + "r0 += r6;" + "exit;" + : + : + : __clobber_common, "r6" + ); +} + +/* r1 is pointer to stack slot; + * r2 is a register to spill into that slot + * subprog also spills r2 into its own stack slot + */ +__naked __noinline __used +static __u64 subprog_spill_reg_precise(void) +{ + asm volatile ( + /* spill to parent stack */ + "*(u64 *)(r1 + 0) = r2;" + /* spill to subprog stack (we use -16 offset to avoid + * accidental confusion with parent's -8 stack slot in + * verifier log output) + */ + "*(u64 *)(r10 - 16) = r2;" + /* use both spills as return result to propagete precision everywhere */ + "r0 = *(u64 *)(r10 - 16);" + "r2 = *(u64 *)(r1 + 0);" + "r0 += r2;" + "exit;" + ); +} + +SEC("?raw_tp") +__success __log_level(2) +/* precision backtracking can't currently handle stack access not through r10, + * so we won't be able to mark stack slot fp-8 as precise, and so will + * fallback to forcing all as precise + */ +__msg("mark_precise: frame0: falling back to forcing all scalars precise") +__naked int subprog_spill_into_parent_stack_slot_precise(void) +{ + asm volatile ( + "r6 = 1;" + + /* pass pointer to stack slot and r6 to subprog; + * r6 will be marked precise and spilled into fp-8 slot, which + * also should be marked precise + */ + "r1 = r10;" + "r1 += -8;" + "r2 = r6;" + "call subprog_spill_reg_precise;" + + /* restore reg from stack; in this case we'll be carrying + * stack mask when going back into subprog through jump + * history + */ + "r7 = *(u64 *)(r10 - 8);" + + "r7 *= 4;" + "r1 = %[vals];" + /* here r7 is forced to be precise and has to be propagated + * back to the beginning, handling subprog call and logic + */ + "r1 += r7;" + "r0 = *(u32 *)(r1 + 0);" + "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6", "r7" + ); +} + +__naked __noinline __used +static __u64 subprog_with_checkpoint(void) +{ + asm volatile ( + "r0 = 0;" + /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */ + "goto +0;" + "exit;" + ); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_typedef.c b/tools/testing/selftests/bpf/progs/verifier_typedef.c new file mode 100644 index 000000000000..08481cfaac4b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_typedef.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("fentry/bpf_fentry_test_sinfo") +__description("typedef: resolve") +__success __retval(0) +__naked void resolve_typedef(void) +{ + asm volatile (" \ + r1 = *(u64 *)(r1 +0); \ + r2 = *(u64 *)(r1 +%[frags_offs]); \ + r0 = 0; \ + exit; \ +" : + : __imm_const(frags_offs, + offsetof(struct skb_shared_info, frags)) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c new file mode 100644 index 000000000000..bcfb6feb38c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#include <linux/ip.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> +#include <stdbool.h> + +int lookup_status; +bool test_xdp; +bool tcp_skc; + +#define CUR_NS BPF_F_CURRENT_NETNS + +static void socket_lookup(void *ctx, void *data_end, void *data) +{ + struct ethhdr *eth = data; + struct bpf_sock_tuple *tp; + struct bpf_sock *sk; + struct iphdr *iph; + int tplen; + + if (eth + 1 > data_end) + return; + + if (eth->h_proto != bpf_htons(ETH_P_IP)) + return; + + iph = (struct iphdr *)(eth + 1); + if (iph + 1 > data_end) + return; + + tp = (struct bpf_sock_tuple *)&iph->saddr; + tplen = sizeof(tp->ipv4); + if ((void *)tp + tplen > data_end) + return; + + switch (iph->protocol) { + case IPPROTO_TCP: + if (tcp_skc) + sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0); + else + sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0); + break; + case IPPROTO_UDP: + sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0); + break; + default: + return; + } + + lookup_status = 0; + + if (sk) { + bpf_sk_release(sk); + lookup_status = 1; + } +} + +SEC("tc") +int tc_socket_lookup(struct __sk_buff *skb) +{ + void *data_end = (void *)(long)skb->data_end; + void *data = (void *)(long)skb->data; + + if (test_xdp) + return TC_ACT_UNSPEC; + + socket_lookup(skb, data_end, data); + return TC_ACT_UNSPEC; +} + +SEC("xdp") +int xdp_socket_lookup(struct xdp_md *xdp) +{ + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + if (!test_xdp) + return XDP_PASS; + + socket_lookup(xdp, data_end, data); + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index e1c787815e44..b2dfd7066c6e 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -77,7 +77,9 @@ int rx(struct xdp_md *ctx) } err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); - if (err) + if (!err) + meta->xdp_timestamp = bpf_ktime_get_tai_ns(); + else meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c index a630c95c7471..24369f242853 100644 --- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c +++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c @@ -15,12 +15,12 @@ struct { static unsigned int idx; int count = 0; -SEC("xdp") int xsk_def_prog(struct xdp_md *xdp) +SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp) { return bpf_redirect_map(&xsk, 0, XDP_DROP); } -SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp) +SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp) { /* Drop every other packet */ if (idx++ % 2) @@ -29,7 +29,7 @@ SEC("xdp") int xsk_xdp_drop(struct xdp_md *xdp) return bpf_redirect_map(&xsk, 0, XDP_DROP); } -SEC("xdp") int xsk_xdp_populate_metadata(struct xdp_md *xdp) +SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp) { void *data, *data_meta; struct xdp_info *meta; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index ea82921110da..4d582cac2c09 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -11,7 +11,6 @@ #include <signal.h> #include <string.h> #include <execinfo.h> /* backtrace */ -#include <linux/membarrier.h> #include <sys/sysinfo.h> /* get_nprocs */ #include <netinet/in.h> #include <sys/select.h> @@ -629,68 +628,6 @@ out: return err; } -static int finit_module(int fd, const char *param_values, int flags) -{ - return syscall(__NR_finit_module, fd, param_values, flags); -} - -static int delete_module(const char *name, int flags) -{ - return syscall(__NR_delete_module, name, flags); -} - -/* - * Trigger synchronize_rcu() in kernel. - */ -int kern_sync_rcu(void) -{ - return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); -} - -static void unload_bpf_testmod(void) -{ - if (kern_sync_rcu()) - fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); - if (delete_module("bpf_testmod", 0)) { - if (errno == ENOENT) { - if (verbose()) - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); - return; - } - fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); - return; - } - if (verbose()) - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); -} - -static int load_bpf_testmod(void) -{ - int fd; - - /* ensure previous instance of the module is unloaded */ - unload_bpf_testmod(); - - if (verbose()) - fprintf(stdout, "Loading bpf_testmod.ko...\n"); - - fd = open("bpf_testmod.ko", O_RDONLY); - if (fd < 0) { - fprintf(env.stderr, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); - return -ENOENT; - } - if (finit_module(fd, "", 0)) { - fprintf(env.stderr, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); - close(fd); - return -EINVAL; - } - close(fd); - - if (verbose()) - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); - return 0; -} - /* extern declarations for test funcs */ #define DEFINE_TEST(name) \ extern void test_##name(void) __weak; \ @@ -714,7 +651,13 @@ static struct test_state test_states[ARRAY_SIZE(prog_test_defs)]; const char *argp_program_version = "test_progs 0.1"; const char *argp_program_bug_address = "<bpf@vger.kernel.org>"; -static const char argp_program_doc[] = "BPF selftests test runner"; +static const char argp_program_doc[] = +"BPF selftests test runner\v" +"Options accepting the NAMES parameter take either a comma-separated list\n" +"of test names, or a filename prefixed with @. The file contains one name\n" +"(or wildcard pattern) per line, and comments beginning with # are ignored.\n" +"\n" +"These options can be passed repeatedly to read multiple files.\n"; enum ARG_KEYS { ARG_TEST_NUM = 'n', @@ -797,6 +740,7 @@ extern int extra_prog_load_log_flags; static error_t parse_arg(int key, char *arg, struct argp_state *state) { struct test_env *env = state->input; + int err = 0; switch (key) { case ARG_TEST_NUM: { @@ -821,18 +765,28 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } case ARG_TEST_NAME_GLOB_ALLOWLIST: case ARG_TEST_NAME: { - if (parse_test_list(arg, - &env->test_selector.whitelist, - key == ARG_TEST_NAME_GLOB_ALLOWLIST)) - return -ENOMEM; + if (arg[0] == '@') + err = parse_test_list_file(arg + 1, + &env->test_selector.whitelist, + key == ARG_TEST_NAME_GLOB_ALLOWLIST); + else + err = parse_test_list(arg, + &env->test_selector.whitelist, + key == ARG_TEST_NAME_GLOB_ALLOWLIST); + break; } case ARG_TEST_NAME_GLOB_DENYLIST: case ARG_TEST_NAME_BLACKLIST: { - if (parse_test_list(arg, - &env->test_selector.blacklist, - key == ARG_TEST_NAME_GLOB_DENYLIST)) - return -ENOMEM; + if (arg[0] == '@') + err = parse_test_list_file(arg + 1, + &env->test_selector.blacklist, + key == ARG_TEST_NAME_GLOB_DENYLIST); + else + err = parse_test_list(arg, + &env->test_selector.blacklist, + key == ARG_TEST_NAME_GLOB_DENYLIST); + break; } case ARG_VERIFIER_STATS: @@ -900,7 +854,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) default: return ARGP_ERR_UNKNOWN; } - return 0; + return err; } /* @@ -1703,9 +1657,14 @@ int main(int argc, char **argv) env.stderr = stderr; env.has_testmod = true; - if (!env.list_test_names && load_bpf_testmod()) { - fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); - env.has_testmod = false; + if (!env.list_test_names) { + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(verbose()); + + if (load_bpf_testmod(verbose())) { + fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n"); + env.has_testmod = false; + } } /* initializing tests */ @@ -1802,7 +1761,7 @@ int main(int argc, char **argv) close(env.saved_netns_fd); out: if (!env.list_test_names && env.has_testmod) - unload_bpf_testmod(); + unload_bpf_testmod(verbose()); free_test_selector(&env.test_selector); free_test_selector(&env.subtest_selector); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 0ed3134333d4..77bd492c6024 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -405,7 +405,6 @@ static inline void *u64_to_ptr(__u64 ptr) int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); -int kern_sync_rcu(void); int trigger_module_test_read(int read_sz); int trigger_module_test_write(int write_sz); int write_sysctl(const char *sysctl, const char *value); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index e4657c5bc3f1..98107e0452d3 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -40,6 +40,7 @@ #include "bpf_util.h" #include "test_btf.h" #include "../../../include/linux/filter.h" +#include "testing_helpers.h" #ifndef ENOTSUPP #define ENOTSUPP 524 @@ -873,8 +874,140 @@ static int create_map_kptr(void) return fd; } +static void set_root(bool set) +{ + __u64 caps; + + if (set) { + if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps)) + perror("cap_disable_effective(CAP_SYS_ADMIN)"); + } else { + if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) + perror("cap_disable_effective(CAP_SYS_ADMIN)"); + } +} + +static __u64 ptr_to_u64(const void *ptr) +{ + return (uintptr_t) ptr; +} + +static struct btf *btf__load_testmod_btf(struct btf *vmlinux) +{ + struct bpf_btf_info info; + __u32 len = sizeof(info); + struct btf *btf = NULL; + char name[64]; + __u32 id = 0; + int err, fd; + + /* Iterate all loaded BTF objects and find bpf_testmod, + * we need SYS_ADMIN cap for that. + */ + set_root(true); + + while (true) { + err = bpf_btf_get_next_id(id, &id); + if (err) { + if (errno == ENOENT) + break; + perror("bpf_btf_get_next_id failed"); + break; + } + + fd = bpf_btf_get_fd_by_id(id); + if (fd < 0) { + if (errno == ENOENT) + continue; + perror("bpf_btf_get_fd_by_id failed"); + break; + } + + memset(&info, 0, sizeof(info)); + info.name_len = sizeof(name); + info.name = ptr_to_u64(name); + len = sizeof(info); + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + close(fd); + perror("bpf_obj_get_info_by_fd failed"); + break; + } + + if (strcmp("bpf_testmod", name)) { + close(fd); + continue; + } + + btf = btf__load_from_kernel_by_id_split(id, vmlinux); + if (!btf) { + close(fd); + break; + } + + /* We need the fd to stay open so it can be used in fd_array. + * The final cleanup call to btf__free will free btf object + * and close the file descriptor. + */ + btf__set_fd(btf, fd); + break; + } + + set_root(false); + return btf; +} + +static struct btf *testmod_btf; +static struct btf *vmlinux_btf; + +static void kfuncs_cleanup(void) +{ + btf__free(testmod_btf); + btf__free(vmlinux_btf); +} + +static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array, + struct kfunc_btf_id_pair *fixup_kfunc_btf_id) +{ + /* Patch in kfunc BTF IDs */ + while (fixup_kfunc_btf_id->kfunc) { + int btf_id = 0; + + /* try to find kfunc in kernel BTF */ + vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf(); + if (vmlinux_btf) { + btf_id = btf__find_by_name_kind(vmlinux_btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + } + + /* kfunc not found in kernel BTF, try bpf_testmod BTF */ + if (!btf_id) { + testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf); + if (testmod_btf) { + btf_id = btf__find_by_name_kind(testmod_btf, + fixup_kfunc_btf_id->kfunc, + BTF_KIND_FUNC); + btf_id = btf_id < 0 ? 0 : btf_id; + if (btf_id) { + /* We put bpf_testmod module fd into fd_array + * and its index 1 into instruction 'off'. + */ + *fd_array = btf__fd(testmod_btf); + prog[fixup_kfunc_btf_id->insn_idx].off = 1; + } + } + } + + prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; + fixup_kfunc_btf_id++; + } +} + static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, - struct bpf_insn *prog, int *map_fds) + struct bpf_insn *prog, int *map_fds, int *fd_array) { int *fixup_map_hash_8b = test->fixup_map_hash_8b; int *fixup_map_hash_48b = test->fixup_map_hash_48b; @@ -899,7 +1032,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, int *fixup_map_ringbuf = test->fixup_map_ringbuf; int *fixup_map_timer = test->fixup_map_timer; int *fixup_map_kptr = test->fixup_map_kptr; - struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id; if (test->fill_helper) { test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn)); @@ -1100,25 +1232,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type, } while (*fixup_map_kptr); } - /* Patch in kfunc BTF IDs */ - if (fixup_kfunc_btf_id->kfunc) { - struct btf *btf; - int btf_id; - - do { - btf_id = 0; - btf = btf__load_vmlinux_btf(); - if (btf) { - btf_id = btf__find_by_name_kind(btf, - fixup_kfunc_btf_id->kfunc, - BTF_KIND_FUNC); - btf_id = btf_id < 0 ? 0 : btf_id; - } - btf__free(btf); - prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id; - fixup_kfunc_btf_id++; - } while (fixup_kfunc_btf_id->kfunc); - } + fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id); } struct libcap { @@ -1227,45 +1341,46 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } -static int get_xlated_program(int fd_prog, struct bpf_insn **buf, int *cnt) +static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt) { + __u32 buf_element_size = sizeof(struct bpf_insn); struct bpf_prog_info info = {}; __u32 info_len = sizeof(info); __u32 xlated_prog_len; - __u32 buf_element_size = sizeof(struct bpf_insn); + struct bpf_insn *buf; if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { perror("bpf_prog_get_info_by_fd failed"); - return -1; + return NULL; } xlated_prog_len = info.xlated_prog_len; if (xlated_prog_len % buf_element_size) { printf("Program length %d is not multiple of %d\n", xlated_prog_len, buf_element_size); - return -1; + return NULL; } *cnt = xlated_prog_len / buf_element_size; - *buf = calloc(*cnt, buf_element_size); + buf = calloc(*cnt, buf_element_size); if (!buf) { perror("can't allocate xlated program buffer"); - return -ENOMEM; + return NULL; } bzero(&info, sizeof(info)); info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)(unsigned long)*buf; + info.xlated_prog_insns = (__u64)(unsigned long)buf; if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { perror("second bpf_prog_get_info_by_fd failed"); goto out_free_buf; } - return 0; + return buf; out_free_buf: - free(*buf); - return -1; + free(buf); + return NULL; } static bool is_null_insn(struct bpf_insn *insn) @@ -1398,7 +1513,8 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog) if (!check_expected && !check_unexpected) goto out; - if (get_xlated_program(fd_prog, &buf, &cnt)) { + buf = get_xlated_program(fd_prog, &cnt); + if (!buf) { printf("FAIL: can't get xlated program\n"); result = false; goto out; @@ -1445,6 +1561,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, int run_errs, run_successes; int map_fds[MAX_NR_MAPS]; const char *expected_err; + int fd_array[2] = { -1, -1 }; int saved_errno; int fixup_skips; __u32 pflags; @@ -1458,7 +1575,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (!prog_type) prog_type = BPF_PROG_TYPE_SOCKET_FILTER; fixup_skips = skips; - do_test_fixup(test, prog_type, prog, map_fds); + do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]); if (test->fill_insns) { prog = test->fill_insns; prog_len = test->prog_len; @@ -1492,6 +1609,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv, else opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL; opts.prog_flags = pflags; + if (fd_array[1] != -1) + opts.fd_array = &fd_array[0]; if ((prog_type == BPF_PROG_TYPE_TRACING || prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) { @@ -1684,6 +1803,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) { int i, passes = 0, errors = 0; + /* ensure previous instance of the module is unloaded */ + unload_bpf_testmod(verbose); + + if (load_bpf_testmod(verbose)) + return EXIT_FAILURE; + for (i = from; i < to; i++) { struct bpf_test *test = &tests[i]; @@ -1711,6 +1836,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to) } } + unload_bpf_testmod(verbose); + kfuncs_cleanup(); + printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes, skips, errors); return errors ? EXIT_FAILURE : EXIT_SUCCESS; @@ -1752,7 +1880,7 @@ int main(int argc, char **argv) } } - get_unpriv_disabled(); + unpriv_disabled = get_unpriv_disabled(); if (unpriv && unpriv_disabled) { printf("Cannot run as unprivileged user with sysctl %s.\n", UNPRIV_SYSCTL); diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh index 377fb157a57c..2aa5a3445056 100755 --- a/tools/testing/selftests/bpf/test_xsk.sh +++ b/tools/testing/selftests/bpf/test_xsk.sh @@ -68,9 +68,6 @@ # Run with verbose output: # sudo ./test_xsk.sh -v # -# Run and dump packet contents: -# sudo ./test_xsk.sh -D -# # Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger: # sudo ./test_xsk.sh -d # @@ -81,11 +78,10 @@ ETH="" -while getopts "vDi:d" flag +while getopts "vi:d" flag do case "${flag}" in v) verbose=1;; - D) dump_pkts=1;; d) debug=1;; i) ETH=${OPTARG};; esac @@ -157,10 +153,6 @@ if [[ $verbose -eq 1 ]]; then ARGS+="-v " fi -if [[ $dump_pkts -eq 1 ]]; then - ARGS="-D " -fi - retval=$? test_status $retval "${TEST_NAME}" @@ -179,7 +171,10 @@ exec_xskxceiver if [ -z $ETH ]; then cleanup_exit ${VETH0} ${VETH1} +else + cleanup_iface ${ETH} ${MTU} fi + TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL" busy_poll=1 @@ -192,6 +187,8 @@ exec_xskxceiver if [ -z $ETH ]; then cleanup_exit ${VETH0} ${VETH1} +else + cleanup_iface ${ETH} ${MTU} fi failures=0 diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index 0b5e0829e5be..8d994884c7b4 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (C) 2019 Netronome Systems, Inc. */ /* Copyright (C) 2020 Facebook, Inc. */ +#include <ctype.h> #include <stdlib.h> #include <string.h> #include <errno.h> @@ -8,6 +9,7 @@ #include <bpf/libbpf.h> #include "test_progs.h" #include "testing_helpers.h" +#include <linux/membarrier.h> int parse_num_list(const char *s, bool **num_set, int *num_set_len) { @@ -70,92 +72,168 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len) return 0; } -int parse_test_list(const char *s, - struct test_filter_set *set, - bool is_glob_pattern) +static int do_insert_test(struct test_filter_set *set, + char *test_str, + char *subtest_str) { - char *input, *state = NULL, *next; - struct test_filter *tmp, *tests = NULL; - int i, j, cnt = 0; + struct test_filter *tmp, *test; + char **ctmp; + int i; - input = strdup(s); - if (!input) + for (i = 0; i < set->cnt; i++) { + test = &set->tests[i]; + + if (strcmp(test_str, test->name) == 0) { + free(test_str); + goto subtest; + } + } + + tmp = realloc(set->tests, sizeof(*test) * (set->cnt + 1)); + if (!tmp) return -ENOMEM; - while ((next = strtok_r(state ? NULL : input, ",", &state))) { - char *subtest_str = strchr(next, '/'); - char *pattern = NULL; - int glob_chars = 0; + set->tests = tmp; + test = &set->tests[set->cnt]; - tmp = realloc(tests, sizeof(*tests) * (cnt + 1)); - if (!tmp) - goto err; - tests = tmp; + test->name = test_str; + test->subtests = NULL; + test->subtest_cnt = 0; - tests[cnt].subtest_cnt = 0; - tests[cnt].subtests = NULL; + set->cnt++; - if (is_glob_pattern) { - pattern = "%s"; - } else { - pattern = "*%s*"; - glob_chars = 2; - } +subtest: + if (!subtest_str) + return 0; - if (subtest_str) { - char **tmp_subtests = NULL; - int subtest_cnt = tests[cnt].subtest_cnt; - - *subtest_str = '\0'; - subtest_str += 1; - tmp_subtests = realloc(tests[cnt].subtests, - sizeof(*tmp_subtests) * - (subtest_cnt + 1)); - if (!tmp_subtests) - goto err; - tests[cnt].subtests = tmp_subtests; - - tests[cnt].subtests[subtest_cnt] = - malloc(strlen(subtest_str) + glob_chars + 1); - if (!tests[cnt].subtests[subtest_cnt]) - goto err; - sprintf(tests[cnt].subtests[subtest_cnt], - pattern, - subtest_str); - - tests[cnt].subtest_cnt++; + for (i = 0; i < test->subtest_cnt; i++) { + if (strcmp(subtest_str, test->subtests[i]) == 0) { + free(subtest_str); + return 0; } + } - tests[cnt].name = malloc(strlen(next) + glob_chars + 1); - if (!tests[cnt].name) - goto err; - sprintf(tests[cnt].name, pattern, next); + ctmp = realloc(test->subtests, + sizeof(*test->subtests) * (test->subtest_cnt + 1)); + if (!ctmp) + return -ENOMEM; - cnt++; + test->subtests = ctmp; + test->subtests[test->subtest_cnt] = subtest_str; + + test->subtest_cnt++; + + return 0; +} + +static int insert_test(struct test_filter_set *set, + char *test_spec, + bool is_glob_pattern) +{ + char *pattern, *subtest_str, *ext_test_str, *ext_subtest_str = NULL; + int glob_chars = 0; + + if (is_glob_pattern) { + pattern = "%s"; + } else { + pattern = "*%s*"; + glob_chars = 2; } - tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt)); - if (!tmp) + subtest_str = strchr(test_spec, '/'); + if (subtest_str) { + *subtest_str = '\0'; + subtest_str += 1; + } + + ext_test_str = malloc(strlen(test_spec) + glob_chars + 1); + if (!ext_test_str) goto err; - memcpy(tmp + set->cnt, tests, sizeof(*tests) * cnt); - set->tests = tmp; - set->cnt += cnt; + sprintf(ext_test_str, pattern, test_spec); - free(tests); - free(input); - return 0; + if (subtest_str) { + ext_subtest_str = malloc(strlen(subtest_str) + glob_chars + 1); + if (!ext_subtest_str) + goto err; + + sprintf(ext_subtest_str, pattern, subtest_str); + } + + return do_insert_test(set, ext_test_str, ext_subtest_str); err: - for (i = 0; i < cnt; i++) { - for (j = 0; j < tests[i].subtest_cnt; j++) - free(tests[i].subtests[j]); + free(ext_test_str); + free(ext_subtest_str); + + return -ENOMEM; +} + +int parse_test_list_file(const char *path, + struct test_filter_set *set, + bool is_glob_pattern) +{ + char *buf = NULL, *capture_start, *capture_end, *scan_end; + size_t buflen = 0; + int err = 0; + FILE *f; + + f = fopen(path, "r"); + if (!f) { + err = -errno; + fprintf(stderr, "Failed to open '%s': %d\n", path, err); + return err; + } + + while (getline(&buf, &buflen, f) != -1) { + capture_start = buf; + + while (isspace(*capture_start)) + ++capture_start; + + capture_end = capture_start; + scan_end = capture_start; + + while (*scan_end && *scan_end != '#') { + if (!isspace(*scan_end)) + capture_end = scan_end; + + ++scan_end; + } + + if (capture_end == capture_start) + continue; + + *(++capture_end) = '\0'; + + err = insert_test(set, capture_start, is_glob_pattern); + if (err) + break; + } + + fclose(f); + return err; +} + +int parse_test_list(const char *s, + struct test_filter_set *set, + bool is_glob_pattern) +{ + char *input, *state = NULL, *test_spec; + int err = 0; + + input = strdup(s); + if (!input) + return -ENOMEM; - free(tests[i].name); + while ((test_spec = strtok_r(state ? NULL : input, ",", &state))) { + err = insert_test(set, test_spec, is_glob_pattern); + if (err) + break; } - free(tests); + free(input); - return -ENOMEM; + return err; } __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) @@ -249,3 +327,63 @@ __u64 read_perf_max_sample_freq(void) fclose(f); return sample_freq; } + +static int finit_module(int fd, const char *param_values, int flags) +{ + return syscall(__NR_finit_module, fd, param_values, flags); +} + +static int delete_module(const char *name, int flags) +{ + return syscall(__NR_delete_module, name, flags); +} + +int unload_bpf_testmod(bool verbose) +{ + if (kern_sync_rcu()) + fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); + if (delete_module("bpf_testmod", 0)) { + if (errno == ENOENT) { + if (verbose) + fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); + return -1; + } + fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); + return -1; + } + if (verbose) + fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); + return 0; +} + +int load_bpf_testmod(bool verbose) +{ + int fd; + + if (verbose) + fprintf(stdout, "Loading bpf_testmod.ko...\n"); + + fd = open("bpf_testmod.ko", O_RDONLY); + if (fd < 0) { + fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); + return -ENOENT; + } + if (finit_module(fd, "", 0)) { + fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); + close(fd); + return -EINVAL; + } + close(fd); + + if (verbose) + fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); + return 0; +} + +/* + * Trigger synchronize_rcu() in kernel. + */ +int kern_sync_rcu(void) +{ + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index eb8790f928e4..5b7a55136741 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -1,8 +1,13 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ /* Copyright (C) 2020 Facebook, Inc. */ + +#ifndef __TESTING_HELPERS_H +#define __TESTING_HELPERS_H + #include <stdbool.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include <time.h> int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); @@ -20,5 +25,22 @@ struct test_filter_set; int parse_test_list(const char *s, struct test_filter_set *test_set, bool is_glob_pattern); +int parse_test_list_file(const char *path, + struct test_filter_set *test_set, + bool is_glob_pattern); __u64 read_perf_max_sample_freq(void); +int load_bpf_testmod(bool verbose); +int unload_bpf_testmod(bool verbose); +int kern_sync_rcu(void); + +static inline __u64 get_time_ns(void) +{ + struct timespec t; + + clock_gettime(CLOCK_MONOTONIC, &t); + + return (u64)t.tv_sec * 1000000000 + t.tv_nsec; +} + +#endif /* __TESTING_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 9b070cdf44ac..f83d9f65c65b 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -18,7 +18,7 @@ #define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe" #define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe" -#define MAX_SYMS 300000 +#define MAX_SYMS 400000 static struct ksym syms[MAX_SYMS]; static int sym_cnt; @@ -46,6 +46,9 @@ int load_kallsyms_refresh(void) break; if (!addr) continue; + if (i >= MAX_SYMS) + return -EFBIG; + syms[i].addr = (long) addr; syms[i].name = strdup(func); i++; diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c new file mode 100644 index 000000000000..a61ceab60b68 --- /dev/null +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <string.h> +#include <sdt.h> + +#define __PASTE(a, b) a##b +#define PASTE(a, b) __PASTE(a, b) + +#define NAME(name, idx) PASTE(name, idx) + +#define DEF(name, idx) int NAME(name, idx)(void) { return 0; } +#define CALL(name, idx) NAME(name, idx)(); + +#define F(body, name, idx) body(name, idx) + +#define F10(body, name, idx) \ + F(body, PASTE(name, idx), 0) F(body, PASTE(name, idx), 1) F(body, PASTE(name, idx), 2) \ + F(body, PASTE(name, idx), 3) F(body, PASTE(name, idx), 4) F(body, PASTE(name, idx), 5) \ + F(body, PASTE(name, idx), 6) F(body, PASTE(name, idx), 7) F(body, PASTE(name, idx), 8) \ + F(body, PASTE(name, idx), 9) + +#define F100(body, name, idx) \ + F10(body, PASTE(name, idx), 0) F10(body, PASTE(name, idx), 1) F10(body, PASTE(name, idx), 2) \ + F10(body, PASTE(name, idx), 3) F10(body, PASTE(name, idx), 4) F10(body, PASTE(name, idx), 5) \ + F10(body, PASTE(name, idx), 6) F10(body, PASTE(name, idx), 7) F10(body, PASTE(name, idx), 8) \ + F10(body, PASTE(name, idx), 9) + +#define F1000(body, name, idx) \ + F100(body, PASTE(name, idx), 0) F100(body, PASTE(name, idx), 1) F100(body, PASTE(name, idx), 2) \ + F100(body, PASTE(name, idx), 3) F100(body, PASTE(name, idx), 4) F100(body, PASTE(name, idx), 5) \ + F100(body, PASTE(name, idx), 6) F100(body, PASTE(name, idx), 7) F100(body, PASTE(name, idx), 8) \ + F100(body, PASTE(name, idx), 9) + +#define F10000(body, name, idx) \ + F1000(body, PASTE(name, idx), 0) F1000(body, PASTE(name, idx), 1) F1000(body, PASTE(name, idx), 2) \ + F1000(body, PASTE(name, idx), 3) F1000(body, PASTE(name, idx), 4) F1000(body, PASTE(name, idx), 5) \ + F1000(body, PASTE(name, idx), 6) F1000(body, PASTE(name, idx), 7) F1000(body, PASTE(name, idx), 8) \ + F1000(body, PASTE(name, idx), 9) + +F10000(DEF, uprobe_multi_func_, 0) +F10000(DEF, uprobe_multi_func_, 1) +F10000(DEF, uprobe_multi_func_, 2) +F10000(DEF, uprobe_multi_func_, 3) +F10000(DEF, uprobe_multi_func_, 4) + +static int bench(void) +{ + F10000(CALL, uprobe_multi_func_, 0) + F10000(CALL, uprobe_multi_func_, 1) + F10000(CALL, uprobe_multi_func_, 2) + F10000(CALL, uprobe_multi_func_, 3) + F10000(CALL, uprobe_multi_func_, 4) + return 0; +} + +#define PROBE STAP_PROBE(test, usdt); + +#define PROBE10 PROBE PROBE PROBE PROBE PROBE \ + PROBE PROBE PROBE PROBE PROBE +#define PROBE100 PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 \ + PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 +#define PROBE1000 PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 \ + PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 +#define PROBE10000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 \ + PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 + +static int usdt(void) +{ + PROBE10000 + PROBE10000 + PROBE10000 + PROBE10000 + PROBE10000 + return 0; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + goto error; + + if (!strcmp("bench", argv[1])) + return bench(); + if (!strcmp("usdt", argv[1])) + return usdt(); + +error: + fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]); + return -1; +} diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index b39665f33524..319337bdcfc8 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -242,4 +242,5 @@ .result = REJECT, .errstr = "R0 invalid mem access", .errstr_unpriv = "R10 partial copy of pointer", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, diff --git a/tools/testing/selftests/bpf/verifier/basic_instr.c b/tools/testing/selftests/bpf/verifier/basic_instr.c index 071dbc889e8c..bd928a72ad73 100644 --- a/tools/testing/selftests/bpf/verifier/basic_instr.c +++ b/tools/testing/selftests/bpf/verifier/basic_instr.c @@ -176,11 +176,11 @@ .retval = 1, }, { - "invalid 64-bit BPF_END", + "invalid 64-bit BPF_END with BPF_TO_BE", .insns = { BPF_MOV32_IMM(BPF_REG_0, 0), { - .code = BPF_ALU64 | BPF_END | BPF_TO_LE, + .code = BPF_ALU64 | BPF_END | BPF_TO_BE, .dst_reg = BPF_REG_0, .src_reg = 0, .off = 0, @@ -188,7 +188,7 @@ }, BPF_EXIT_INSN(), }, - .errstr = "unknown opcode d7", + .errstr = "unknown opcode df", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c index 83cecfbd6739..0b394a7f7a2d 100644 --- a/tools/testing/selftests/bpf/verifier/ctx_skb.c +++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c @@ -1169,6 +1169,7 @@ }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SK_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "pkt_end < pkt taken check", @@ -1190,4 +1191,5 @@ }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_SK_SKB, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index 1a27a6210554..43776f6f92f4 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -290,6 +290,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jgt32: BPF_K", @@ -360,6 +361,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jle32: BPF_K", @@ -430,6 +432,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jlt32: BPF_K", @@ -500,6 +503,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsge32: BPF_K", @@ -570,6 +574,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsgt32: BPF_K", @@ -640,6 +645,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jsle32: BPF_K", @@ -710,6 +716,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jslt32: BPF_K", @@ -780,6 +787,7 @@ .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "jgt32: range bound deduction, reg op imm", diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c index a0cfc06d75bc..d25c3e9605f1 100644 --- a/tools/testing/selftests/bpf/verifier/map_kptr.c +++ b/tools/testing/selftests/bpf/verifier/map_kptr.c @@ -68,6 +68,7 @@ .fixup_map_kptr = { 1 }, .result = REJECT, .errstr = "kptr access cannot have variable offset", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map_kptr: bpf_kptr_xchg non-const var_off", @@ -121,6 +122,7 @@ .fixup_map_kptr = { 1 }, .result = REJECT, .errstr = "kptr access misaligned expected=0 off=7", + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { "map_kptr: reject var_off != 0", diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 6c03a7d805f9..0d84dd1f38b6 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -38,25 +38,24 @@ .fixup_map_array_48b = { 1 }, .result = VERBOSE_ACCEPT, .errstr = - "26: (85) call bpf_probe_read_kernel#113\ - last_idx 26 first_idx 20\ - regs=4 stack=0 before 25\ - regs=4 stack=0 before 24\ - regs=4 stack=0 before 23\ - regs=4 stack=0 before 22\ - regs=4 stack=0 before 20\ - parent didn't have regs=4 stack=0 marks\ - last_idx 19 first_idx 10\ - regs=4 stack=0 before 19\ - regs=200 stack=0 before 18\ - regs=300 stack=0 before 17\ - regs=201 stack=0 before 15\ - regs=201 stack=0 before 14\ - regs=200 stack=0 before 13\ - regs=200 stack=0 before 12\ - regs=200 stack=0 before 11\ - regs=200 stack=0 before 10\ - parent already had regs=0 stack=0 marks", + "mark_precise: frame0: last_idx 26 first_idx 20\ + mark_precise: frame0: regs=r2 stack= before 25\ + mark_precise: frame0: regs=r2 stack= before 24\ + mark_precise: frame0: regs=r2 stack= before 23\ + mark_precise: frame0: regs=r2 stack= before 22\ + mark_precise: frame0: regs=r2 stack= before 20\ + mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: last_idx 19 first_idx 10\ + mark_precise: frame0: regs=r2,r9 stack= before 19\ + mark_precise: frame0: regs=r9 stack= before 18\ + mark_precise: frame0: regs=r8,r9 stack= before 17\ + mark_precise: frame0: regs=r0,r9 stack= before 15\ + mark_precise: frame0: regs=r0,r9 stack= before 14\ + mark_precise: frame0: regs=r9 stack= before 13\ + mark_precise: frame0: regs=r9 stack= before 12\ + mark_precise: frame0: regs=r9 stack= before 11\ + mark_precise: frame0: regs=r9 stack= before 10\ + mark_precise: frame0: parent state regs= stack=:", }, { "precise: test 2", @@ -100,20 +99,20 @@ .flags = BPF_F_TEST_STATE_FREQ, .errstr = "26: (85) call bpf_probe_read_kernel#113\ - last_idx 26 first_idx 22\ - regs=4 stack=0 before 25\ - regs=4 stack=0 before 24\ - regs=4 stack=0 before 23\ - regs=4 stack=0 before 22\ - parent didn't have regs=4 stack=0 marks\ - last_idx 20 first_idx 20\ - regs=4 stack=0 before 20\ - parent didn't have regs=4 stack=0 marks\ - last_idx 19 first_idx 17\ - regs=4 stack=0 before 19\ - regs=200 stack=0 before 18\ - regs=300 stack=0 before 17\ - parent already had regs=0 stack=0 marks", + mark_precise: frame0: last_idx 26 first_idx 22\ + mark_precise: frame0: regs=r2 stack= before 25\ + mark_precise: frame0: regs=r2 stack= before 24\ + mark_precise: frame0: regs=r2 stack= before 23\ + mark_precise: frame0: regs=r2 stack= before 22\ + mark_precise: frame0: parent state regs=r2 stack=:\ + mark_precise: frame0: last_idx 20 first_idx 20\ + mark_precise: frame0: regs=r2,r9 stack= before 20\ + mark_precise: frame0: parent state regs=r2,r9 stack=:\ + mark_precise: frame0: last_idx 19 first_idx 17\ + mark_precise: frame0: regs=r2,r9 stack= before 19\ + mark_precise: frame0: regs=r9 stack= before 18\ + mark_precise: frame0: regs=r8,r9 stack= before 17\ + mark_precise: frame0: parent state regs= stack=:", }, { "precise: cross frame pruning", @@ -153,15 +152,16 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "5: (2d) if r4 > r0 goto pc+0\ - last_idx 5 first_idx 5\ - parent didn't have regs=10 stack=0 marks\ - last_idx 4 first_idx 2\ - regs=10 stack=0 before 4\ - regs=10 stack=0 before 3\ - regs=0 stack=1 before 2\ - last_idx 5 first_idx 5\ - parent didn't have regs=1 stack=0 marks", + .errstr = "mark_precise: frame0: last_idx 5 first_idx 5\ + mark_precise: frame0: parent state regs=r4 stack=:\ + mark_precise: frame0: last_idx 4 first_idx 2\ + mark_precise: frame0: regs=r4 stack= before 4\ + mark_precise: frame0: regs=r4 stack= before 3\ + mark_precise: frame0: regs= stack=-8 before 2\ + mark_precise: frame0: falling back to forcing all scalars precise\ + force_precise: frame0: forcing r0 to be precise\ + mark_precise: frame0: last_idx 5 first_idx 5\ + mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -179,16 +179,19 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "last_idx 6 first_idx 6\ - parent didn't have regs=10 stack=0 marks\ - last_idx 5 first_idx 3\ - regs=10 stack=0 before 5\ - regs=10 stack=0 before 4\ - regs=0 stack=1 before 3\ - last_idx 6 first_idx 6\ - parent didn't have regs=1 stack=0 marks\ - last_idx 5 first_idx 3\ - regs=1 stack=0 before 5", + .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: parent state regs=r4 stack=:\ + mark_precise: frame0: last_idx 5 first_idx 3\ + mark_precise: frame0: regs=r4 stack= before 5\ + mark_precise: frame0: regs=r4 stack= before 4\ + mark_precise: frame0: regs= stack=-8 before 3\ + mark_precise: frame0: falling back to forcing all scalars precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + force_precise: frame0: forcing r0 to be precise\ + mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -213,7 +216,43 @@ }, .fixup_map_ringbuf = { 1 }, .prog_type = BPF_PROG_TYPE_XDP, - .flags = BPF_F_TEST_STATE_FREQ, + .flags = BPF_F_TEST_STATE_FREQ | F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, .errstr = "invalid access to memory, mem_size=1 off=42 size=8", .result = REJECT, }, +{ + "precise: program doesn't prematurely prune branches", + .insns = { + BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401), + BPF_JMP_IMM(BPF_JA, 0, 0, 0), + BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2), + BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0), + BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 1), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0), + BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), + BPF_LD_MAP_FD(BPF_REG_4, 0), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 10), + BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8192), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6), + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_3, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 13 }, + .prog_type = BPF_PROG_TYPE_XDP, + .result = REJECT, + .errstr = "register with unbounded min value is not allowed", +}, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1db7185181da..655095810d4a 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -141,6 +141,7 @@ static struct env { bool verbose; bool debug; bool quiet; + bool force_checkpoints; enum resfmt out_fmt; bool show_version; bool comparison_mode; @@ -209,6 +210,8 @@ static const struct argp_option opts[] = { { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" }, { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" }, { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" }, + { "test-states", 't', NULL, 0, + "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "quiet", 'q', NULL, 0, "Quiet mode" }, { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" }, { "sort", 's', "SPEC", 0, "Specify sort order" }, @@ -284,6 +287,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) argp_usage(state); } break; + case 't': + env.force_checkpoints = true; + break; case 'C': env.comparison_mode = true; break; @@ -989,6 +995,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf /* increase chances of successful BPF object loading */ fixup_obj(obj, prog, base_filename); + if (env.force_checkpoints) + bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ); + err = bpf_object__load(obj); env.progs_processed++; diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 987cf0db5ebc..613321eb84c1 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -27,6 +27,7 @@ #include <sys/mman.h> #include <net/if.h> #include <poll.h> +#include <time.h> #include "xdp_metadata.h" @@ -134,18 +135,52 @@ static void refill_rx(struct xsk *xsk, __u64 addr) } } -static void verify_xdp_metadata(void *data) +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ +static __u64 gettime(clockid_t clock_id) +{ + struct timespec t; + int res; + + /* See man clock_gettime(2) for type of clock_id's */ + res = clock_gettime(clock_id, &t); + + if (res < 0) + error(res, errno, "Error with clock_gettime()"); + + return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; +} + +static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; meta = data - sizeof(*meta); - printf("rx_timestamp: %llu\n", meta->rx_timestamp); if (meta->rx_hash_err < 0) printf("No rx_hash err=%d\n", meta->rx_hash_err); else printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); + + printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, + (double)meta->rx_timestamp / NANOSEC_PER_SEC); + if (meta->rx_timestamp) { + __u64 usr_clock = gettime(clock_id); + __u64 xdp_clock = meta->xdp_timestamp; + __s64 delta_X = xdp_clock - meta->rx_timestamp; + __s64 delta_X2U = usr_clock - xdp_clock; + + printf("XDP RX-time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", + xdp_clock, (double)xdp_clock / NANOSEC_PER_SEC, + (double)delta_X / NANOSEC_PER_SEC, + (double)delta_X / 1000); + + printf("AF_XDP time: %llu (sec:%0.4f) delta sec:%0.4f (%0.3f usec)\n", + usr_clock, (double)usr_clock / NANOSEC_PER_SEC, + (double)delta_X2U / NANOSEC_PER_SEC, + (double)delta_X2U / 1000); + } + } static void verify_skb_metadata(int fd) @@ -193,7 +228,7 @@ static void verify_skb_metadata(int fd) printf("skb hwtstamp is not found!\n"); } -static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) +static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id) { const struct xdp_desc *rx_desc; struct pollfd fds[rxq + 1]; @@ -243,7 +278,8 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd) addr = xsk_umem__add_offset_to_addr(rx_desc->addr); printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n", xsk, idx, rx_desc->addr, addr, comp_addr); - verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr)); + verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr), + clock_id); xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); } @@ -370,6 +406,7 @@ static void timestamping_enable(int fd, int val) int main(int argc, char *argv[]) { + clockid_t clock_id = CLOCK_TAI; int server_fd = -1; int ret; int i; @@ -443,7 +480,7 @@ int main(int argc, char *argv[]) error(1, -ret, "bpf_xdp_attach"); signal(SIGINT, handle_signal); - ret = verify_metadata(rx_xsk, rxq, server_fd); + ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id); close(server_fd); cleanup(); if (ret) diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 0c4624dc6f2f..938a729bd307 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -11,6 +11,7 @@ struct xdp_meta { __u64 rx_timestamp; + __u64 xdp_timestamp; __u32 rx_hash; union { __u32 rx_hash_type; diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c index 687d83e707f8..d9fb2b730a2c 100644 --- a/tools/testing/selftests/bpf/xsk.c +++ b/tools/testing/selftests/bpf/xsk.c @@ -18,17 +18,19 @@ #include <linux/ethtool.h> #include <linux/filter.h> #include <linux/if_ether.h> +#include <linux/if_link.h> #include <linux/if_packet.h> #include <linux/if_xdp.h> #include <linux/kernel.h> #include <linux/list.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> #include <linux/sockios.h> #include <net/if.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/socket.h> #include <sys/types.h> -#include <linux/if_link.h> #include <bpf/bpf.h> #include <bpf/libbpf.h> @@ -81,6 +83,12 @@ struct xsk_socket { int fd; }; +struct nl_mtu_req { + struct nlmsghdr nh; + struct ifinfomsg msg; + char buf[512]; +}; + int xsk_umem__fd(const struct xsk_umem *umem) { return umem ? umem->fd : -EINVAL; @@ -286,6 +294,132 @@ bool xsk_is_in_mode(u32 ifindex, int mode) return false; } +/* Lifted from netlink.c in tools/lib/bpf */ +static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags) +{ + int len; + + do { + len = recvmsg(sock, mhdr, flags); + } while (len < 0 && (errno == EINTR || errno == EAGAIN)); + + if (len < 0) + return -errno; + return len; +} + +/* Lifted from netlink.c in tools/lib/bpf */ +static int alloc_iov(struct iovec *iov, int len) +{ + void *nbuf; + + nbuf = realloc(iov->iov_base, len); + if (!nbuf) + return -ENOMEM; + + iov->iov_base = nbuf; + iov->iov_len = len; + return 0; +} + +/* Original version lifted from netlink.c in tools/lib/bpf */ +static int netlink_recv(int sock) +{ + struct iovec iov = {}; + struct msghdr mhdr = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + bool multipart = true; + struct nlmsgerr *err; + struct nlmsghdr *nh; + int len, ret; + + ret = alloc_iov(&iov, 4096); + if (ret) + goto done; + + while (multipart) { + multipart = false; + len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC); + if (len < 0) { + ret = len; + goto done; + } + + if (len > iov.iov_len) { + ret = alloc_iov(&iov, len); + if (ret) + goto done; + } + + len = netlink_recvmsg(sock, &mhdr, 0); + if (len < 0) { + ret = len; + goto done; + } + + if (len == 0) + break; + + for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_flags & NLM_F_MULTI) + multipart = true; + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + ret = err->error; + goto done; + case NLMSG_DONE: + ret = 0; + goto done; + default: + break; + } + } + } + ret = 0; +done: + free(iov.iov_base); + return ret; +} + +int xsk_set_mtu(int ifindex, int mtu) +{ + struct nl_mtu_req req; + struct rtattr *rta; + int fd, ret; + + fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE); + if (fd < 0) + return fd; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_NEWLINK; + req.msg.ifi_family = AF_UNSPEC; + req.msg.ifi_index = ifindex; + rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len)); + rta->rta_type = IFLA_MTU; + rta->rta_len = RTA_LENGTH(sizeof(unsigned int)); + req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu)); + memcpy(RTA_DATA(rta), &mtu, sizeof(mtu)); + + ret = send(fd, &req, req.nh.nlmsg_len, 0); + if (ret < 0) { + close(fd); + return errno; + } + + ret = netlink_recv(fd); + close(fd); + return ret; +} + int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags) { int prog_fd; diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h index 04ed8b544712..d93200fdaa8d 100644 --- a/tools/testing/selftests/bpf/xsk.h +++ b/tools/testing/selftests/bpf/xsk.h @@ -134,6 +134,11 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb) __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE); } +static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb) +{ + prod->cached_prod -= nb; +} + static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx) { __u32 entries = xsk_cons_nb_avail(cons, nb); @@ -234,6 +239,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr, int xsk_umem__delete(struct xsk_umem *umem); void xsk_socket__delete(struct xsk_socket *xsk); +int xsk_set_mtu(int ifindex, int mtu); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh index ae697a10a056..29175682c44d 100755 --- a/tools/testing/selftests/bpf/xsk_prereqs.sh +++ b/tools/testing/selftests/bpf/xsk_prereqs.sh @@ -53,6 +53,13 @@ test_exit() exit 1 } +cleanup_iface() +{ + ip link set $1 mtu $2 + ip link set $1 xdp off + ip link set $1 xdpgeneric off +} + clear_configs() { [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] && diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index f144d0604ddf..2827f2d7cf30 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -49,8 +49,11 @@ * h. tests for invalid and corner case Tx descriptors so that the correct ones * are discarded and let through, respectively. * i. 2K frame size tests - * - * Total tests: 12 + * j. If multi-buffer is supported, send 9k packets divided into 3 frames + * k. If multi-buffer and huge pages are supported, send 9k packets in a single frame + * using unaligned mode + * l. If multi-buffer is supported, try various nasty combinations of descriptors to + * check if they pass the validation or not * * Flow: * ----- @@ -73,19 +76,16 @@ #include <fcntl.h> #include <errno.h> #include <getopt.h> -#include <asm/barrier.h> #include <linux/if_link.h> #include <linux/if_ether.h> -#include <linux/ip.h> #include <linux/mman.h> -#include <linux/udp.h> +#include <linux/netdev.h> #include <arpa/inet.h> #include <net/if.h> #include <locale.h> #include <poll.h> #include <pthread.h> #include <signal.h> -#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -94,10 +94,7 @@ #include <sys/socket.h> #include <sys/time.h> #include <sys/types.h> -#include <sys/queue.h> -#include <time.h> #include <unistd.h> -#include <stdatomic.h> #include "xsk_xdp_progs.skel.h" #include "xsk.h" @@ -109,10 +106,6 @@ static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62"; static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61"; -static const char *IP1 = "192.168.100.162"; -static const char *IP2 = "192.168.100.161"; -static const u16 UDP_PORT1 = 2020; -static const u16 UDP_PORT2 = 2121; static void __exit_with_error(int error, const char *file, const char *func, int line) { @@ -147,112 +140,25 @@ static void report_failure(struct test_spec *test) test->fail = true; } -static void memset32_htonl(void *dest, u32 val, u32 size) -{ - u32 *ptr = (u32 *)dest; - int i; - - val = htonl(val); - - for (i = 0; i < (size & (~0x3)); i += 4) - ptr[i >> 2] = val; -} - -/* - * Fold a partial checksum - * This function code has been taken from - * Linux kernel include/asm-generic/checksum.h - */ -static __u16 csum_fold(__u32 csum) -{ - u32 sum = (__force u32)csum; - - sum = (sum & 0xffff) + (sum >> 16); - sum = (sum & 0xffff) + (sum >> 16); - return (__force __u16)~sum; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c - */ -static u32 from64to32(u64 x) -{ - /* add up 32-bit and 32-bit for 32+c bit */ - x = (x & 0xffffffff) + (x >> 32); - /* add up carry.. */ - x = (x & 0xffffffff) + (x >> 32); - return (u32)x; -} - -/* - * This function code has been taken from - * Linux kernel lib/checksum.c +/* The payload is a word consisting of a packet sequence number in the upper + * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's + * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0. */ -static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) +static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size) { - unsigned long long s = (__force u32)sum; + u32 *ptr = (u32 *)dest, i; - s += (__force u32)saddr; - s += (__force u32)daddr; -#ifdef __BIG_ENDIAN__ - s += proto + len; -#else - s += (proto + len) << 8; -#endif - return (__force __u32)from64to32(s); -} - -/* - * This function has been taken from - * Linux kernel include/asm-generic/checksum.h - */ -static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum) -{ - return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); -} - -static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt) -{ - u32 csum = 0; - u32 cnt = 0; - - /* udp hdr and data */ - for (; cnt < len; cnt += 2) - csum += udp_pkt[cnt >> 1]; - - return csum_tcpudp_magic(saddr, daddr, len, proto, csum); + start /= sizeof(*ptr); + size /= sizeof(*ptr); + for (i = 0; i < size; i++) + ptr[i] = htonl(pkt_nb << 16 | (i + start)); } static void gen_eth_hdr(struct ifobject *ifobject, struct ethhdr *eth_hdr) { memcpy(eth_hdr->h_dest, ifobject->dst_mac, ETH_ALEN); memcpy(eth_hdr->h_source, ifobject->src_mac, ETH_ALEN); - eth_hdr->h_proto = htons(ETH_P_IP); -} - -static void gen_ip_hdr(struct ifobject *ifobject, struct iphdr *ip_hdr) -{ - ip_hdr->version = IP_PKT_VER; - ip_hdr->ihl = 0x5; - ip_hdr->tos = IP_PKT_TOS; - ip_hdr->tot_len = htons(IP_PKT_SIZE); - ip_hdr->id = 0; - ip_hdr->frag_off = 0; - ip_hdr->ttl = IPDEFTTL; - ip_hdr->protocol = IPPROTO_UDP; - ip_hdr->saddr = ifobject->src_ip; - ip_hdr->daddr = ifobject->dst_ip; - ip_hdr->check = 0; -} - -static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject, - struct udphdr *udp_hdr) -{ - udp_hdr->source = htons(ifobject->src_port); - udp_hdr->dest = htons(ifobject->dst_port); - udp_hdr->len = htons(UDP_PKT_SIZE); - memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE); + eth_hdr->h_proto = htons(ETH_P_LOOPBACK); } static bool is_umem_valid(struct ifobject *ifobj) @@ -260,19 +166,18 @@ static bool is_umem_valid(struct ifobject *ifobj) return !!ifobj->umem->umem; } -static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr) +static u32 mode_to_xdp_flags(enum test_mode mode) { - udp_hdr->check = 0; - udp_hdr->check = - udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr); + return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; } -static u32 mode_to_xdp_flags(enum test_mode mode) +static u64 umem_size(struct xsk_umem_info *umem) { - return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE; + return umem->num_frames * umem->frame_size; } -static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size) +static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, + u64 size) { struct xsk_umem_config cfg = { .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS, @@ -292,9 +197,31 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size return ret; umem->buffer = buffer; + if (ifobj->shared_umem && ifobj->rx_on) { + umem->base_addr = umem_size(umem); + umem->next_buffer = umem_size(umem); + } + return 0; } +static u64 umem_alloc_buffer(struct xsk_umem_info *umem) +{ + u64 addr; + + addr = umem->next_buffer; + umem->next_buffer += umem->frame_size; + if (umem->next_buffer >= umem->base_addr + umem_size(umem)) + umem->next_buffer = umem->base_addr; + + return addr; +} + +static void umem_reset_alloc(struct xsk_umem_info *umem) +{ + umem->next_buffer = 0; +} + static void enable_busy_poll(struct xsk_socket_info *xsk) { int sock_opt; @@ -328,6 +255,8 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i cfg.bind_flags = ifobject->bind_flags; if (shared) cfg.bind_flags |= XDP_SHARED_UMEM; + if (ifobject->pkt_stream && ifobject->mtu > MAX_ETH_PKT_SIZE) + cfg.bind_flags |= XDP_USE_SG; txr = ifobject->tx_on ? &xsk->tx : NULL; rxr = ifobject->rx_on ? &xsk->rx : NULL; @@ -354,7 +283,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject) exit_with_error(ENOMEM); } umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - ret = xsk_configure_umem(umem, bufs, umem_sz); + ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz); if (ret) exit_with_error(-ret); @@ -380,7 +309,6 @@ out: static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, - {"dump-pkts", no_argument, 0, 'D'}, {"verbose", no_argument, 0, 'v'}, {0, 0, 0, 0} }; @@ -391,7 +319,6 @@ static void usage(const char *prog) " Usage: %s [OPTIONS]\n" " Options:\n" " -i, --interface Use interface\n" - " -D, --dump-pkts Dump packets L2 - L5\n" " -v, --verbose Verbose output\n" " -b, --busy-poll Enable busy poll\n"; @@ -415,7 +342,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj opterr = 0; for (;;) { - c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index); + c = getopt_long(argc, argv, "i:vb", long_options, &option_index); if (c == -1) break; @@ -437,9 +364,6 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj interface_nb++; break; - case 'D': - opt_pkt_dump = true; - break; case 'v': opt_verbose = true; break; @@ -482,9 +406,6 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, memset(ifobj->umem, 0, sizeof(*ifobj->umem)); ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS; ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; - if (ifobj->shared_umem && ifobj->rx_on) - ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS * - XSK_UMEM__DEFAULT_FRAME_SIZE; for (j = 0; j < MAX_SOCKETS; j++) { memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j])); @@ -498,6 +419,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx, test->total_steps = 1; test->nb_sockets = 1; test->fail = false; + test->mtu = MAX_ETH_PKT_SIZE; test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog; test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk; test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog; @@ -551,27 +473,47 @@ static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *x test->xskmap_tx = xskmap_tx; } +static int test_spec_set_mtu(struct test_spec *test, int mtu) +{ + int err; + + if (test->ifobj_rx->mtu != mtu) { + err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu); + if (err) + return err; + test->ifobj_rx->mtu = mtu; + } + if (test->ifobj_tx->mtu != mtu) { + err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu); + if (err) + return err; + test->ifobj_tx->mtu = mtu; + } + + return 0; +} + static void pkt_stream_reset(struct pkt_stream *pkt_stream) { if (pkt_stream) - pkt_stream->rx_pkt_nb = 0; + pkt_stream->current_pkt_nb = 0; } -static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb) +static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream) { - if (pkt_nb >= pkt_stream->nb_pkts) + if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) return NULL; - return &pkt_stream->pkts[pkt_nb]; + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; } static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent) { - while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) { + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { (*pkts_sent)++; - if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid) - return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++]; - pkt_stream->rx_pkt_nb++; + if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid) + return &pkt_stream->pkts[pkt_stream->current_pkt_nb++]; + pkt_stream->current_pkt_nb++; } return NULL; } @@ -616,16 +558,59 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts) return pkt_stream; } -static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len) +static bool pkt_continues(u32 options) +{ + return options & XDP_PKT_CONTD; +} + +static u32 ceil_u32(u32 a, u32 b) +{ + return (a + b - 1) / b; +} + +static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt) { - pkt->addr = addr + umem->base_addr; + u32 nb_frags = 1, next_frag; + + if (!pkt) + return 1; + + if (!pkt_stream->verbatim) { + if (!pkt->valid || !pkt->len) + return 1; + return ceil_u32(pkt->len, frame_size); + } + + /* Search for the end of the packet in verbatim mode */ + if (!pkt_continues(pkt->options)) + return nb_frags; + + next_frag = pkt_stream->current_pkt_nb; + pkt++; + while (next_frag++ < pkt_stream->nb_pkts) { + nb_frags++; + if (!pkt_continues(pkt->options) || !pkt->valid) + break; + pkt++; + } + return nb_frags; +} + +static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, int offset, u32 len) +{ + pkt->offset = offset; pkt->len = len; - if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom) + if (len > MAX_ETH_JUMBO_SIZE) pkt->valid = false; else pkt->valid = true; } +static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) +{ + return ceil_u32(len, umem->frame_size) * umem->frame_size; +} + static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len) { struct pkt_stream *pkt_stream; @@ -635,10 +620,13 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb if (!pkt_stream) exit_with_error(ENOMEM); + pkt_stream->nb_pkts = nb_pkts; + pkt_stream->max_pkt_len = pkt_len; for (i = 0; i < nb_pkts; i++) { - pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size, - pkt_len); - pkt_stream->pkts[i].payload = i; + struct pkt *pkt = &pkt_stream->pkts[i]; + + pkt_set(umem, pkt, 0, pkt_len); + pkt->pkt_nb = i; } return pkt_stream; @@ -669,8 +657,7 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream); for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2) - pkt_set(umem, &pkt_stream->pkts[i], - (i % umem->num_frames) * umem->frame_size + offset, pkt_len); + pkt_set(umem, &pkt_stream->pkts[i], offset, pkt_len); ifobj->pkt_stream = pkt_stream; } @@ -694,105 +681,149 @@ static void pkt_stream_receive_half(struct test_spec *test) pkt_stream->pkts[i].valid = false; } -static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb) +static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem) { - struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb); - struct udphdr *udp_hdr; - struct ethhdr *eth_hdr; - struct iphdr *ip_hdr; - void *data; + if (!pkt->valid) + return pkt->offset; + return pkt->offset + umem_alloc_buffer(umem); +} - if (!pkt) - return NULL; - if (!pkt->valid || pkt->len < MIN_PKT_SIZE) - return pkt; +static void pkt_stream_cancel(struct pkt_stream *pkt_stream) +{ + pkt_stream->current_pkt_nb--; +} - data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr); - udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr)); - ip_hdr = (struct iphdr *)(data + sizeof(struct ethhdr)); - eth_hdr = (struct ethhdr *)data; +static void pkt_generate(struct ifobject *ifobject, u64 addr, u32 len, u32 pkt_nb, + u32 bytes_written) +{ + void *data = xsk_umem__get_data(ifobject->umem->buffer, addr); - gen_udp_hdr(pkt_nb, data, ifobject, udp_hdr); - gen_ip_hdr(ifobject, ip_hdr); - gen_udp_csum(udp_hdr, ip_hdr); - gen_eth_hdr(ifobject, eth_hdr); + if (len < MIN_PKT_SIZE) + return; + + if (!bytes_written) { + gen_eth_hdr(ifobject, data); + + len -= PKT_HDR_SIZE; + data += PKT_HDR_SIZE; + } else { + bytes_written -= PKT_HDR_SIZE; + } - return pkt; + write_payload(data, pkt_nb, bytes_written, len); } -static void __pkt_stream_generate_custom(struct ifobject *ifobj, - struct pkt *pkts, u32 nb_pkts) +static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames, + u32 nb_frames, bool verbatim) { + u32 i, len = 0, pkt_nb = 0, payload = 0; struct pkt_stream *pkt_stream; - u32 i; - pkt_stream = __pkt_stream_alloc(nb_pkts); + pkt_stream = __pkt_stream_alloc(nb_frames); if (!pkt_stream) exit_with_error(ENOMEM); - for (i = 0; i < nb_pkts; i++) { - pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr; - pkt_stream->pkts[i].len = pkts[i].len; - pkt_stream->pkts[i].payload = i; - pkt_stream->pkts[i].valid = pkts[i].valid; + for (i = 0; i < nb_frames; i++) { + struct pkt *pkt = &pkt_stream->pkts[pkt_nb]; + struct pkt *frame = &frames[i]; + + pkt->offset = frame->offset; + if (verbatim) { + *pkt = *frame; + pkt->pkt_nb = payload; + if (!frame->valid || !pkt_continues(frame->options)) + payload++; + } else { + if (frame->valid) + len += frame->len; + if (frame->valid && pkt_continues(frame->options)) + continue; + + pkt->pkt_nb = pkt_nb; + pkt->len = len; + pkt->valid = frame->valid; + pkt->options = 0; + + len = 0; + } + + if (pkt->valid && pkt->len > pkt_stream->max_pkt_len) + pkt_stream->max_pkt_len = pkt->len; + pkt_nb++; } - ifobj->pkt_stream = pkt_stream; + pkt_stream->nb_pkts = pkt_nb; + pkt_stream->verbatim = verbatim; + return pkt_stream; } static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts) { - __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts); - __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts); + struct pkt_stream *pkt_stream; + + pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true); + test->ifobj_tx->pkt_stream = pkt_stream; + + pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false); + test->ifobj_rx->pkt_stream = pkt_stream; } -static void pkt_dump(void *pkt, u32 len) +static void pkt_print_data(u32 *data, u32 cnt) { - char s[INET_ADDRSTRLEN]; - struct ethhdr *ethhdr; - struct udphdr *udphdr; - struct iphdr *iphdr; - u32 payload, i; + u32 i; - ethhdr = pkt; - iphdr = pkt + sizeof(*ethhdr); - udphdr = pkt + sizeof(*ethhdr) + sizeof(*iphdr); + for (i = 0; i < cnt; i++) { + u32 seqnum, pkt_nb; - /*extract L2 frame */ - fprintf(stdout, "DEBUG>> L2: dst mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_dest[i]); + seqnum = ntohl(*data) & 0xffff; + pkt_nb = ntohl(*data) >> 16; + fprintf(stdout, "%u:%u ", pkt_nb, seqnum); + data++; + } +} - fprintf(stdout, "\nDEBUG>> L2: src mac: "); - for (i = 0; i < ETH_ALEN; i++) - fprintf(stdout, "%02X", ethhdr->h_source[i]); +static void pkt_dump(void *pkt, u32 len, bool eth_header) +{ + struct ethhdr *ethhdr = pkt; + u32 i, *data; - /*extract L3 frame */ - fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl); - fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", - inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s))); - fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", - inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s))); - /*extract L4 frame */ - fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source)); - fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest)); - /*extract L5 frame */ - payload = ntohl(*((u32 *)(pkt + PKT_HDR_SIZE))); + if (eth_header) { + /*extract L2 frame */ + fprintf(stdout, "DEBUG>> L2: dst mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_dest[i]); + + fprintf(stdout, "\nDEBUG>> L2: src mac: "); + for (i = 0; i < ETH_ALEN; i++) + fprintf(stdout, "%02X", ethhdr->h_source[i]); - fprintf(stdout, "DEBUG>> L5: payload: %d\n", payload); - fprintf(stdout, "---------------------------------------\n"); + data = pkt + PKT_HDR_SIZE; + } else { + data = pkt; + } + + /*extract L5 frame */ + fprintf(stdout, "\nDEBUG>> L5: seqnum: "); + pkt_print_data(data, PKT_DUMP_NB_TO_PRINT); + fprintf(stdout, "...."); + if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) { + fprintf(stdout, "\n.... "); + pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT, + PKT_DUMP_NB_TO_PRINT); + } + fprintf(stdout, "\n---------------------------------------\n"); } -static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr, - u64 pkt_stream_addr) +static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr) { u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom; - u32 offset = addr % umem->frame_size, expected_offset = 0; + u32 offset = addr % umem->frame_size, expected_offset; + int pkt_offset = pkt->valid ? pkt->offset : 0; - if (!pkt_stream->use_addr_for_fill) - pkt_stream_addr = 0; + if (!umem->unaligned_mode) + pkt_offset = 0; - expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; + expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size; if (offset == expected_offset) return true; @@ -806,51 +837,79 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr) void *data = xsk_umem__get_data(buffer, addr); struct xdp_info *meta = data - sizeof(struct xdp_info); - if (meta->count != pkt->payload) { + if (meta->count != pkt->pkt_nb) { ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%d]\n", - __func__, pkt->payload, meta->count); + __func__, pkt->pkt_nb, meta->count); return false; } return true; } -static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) +static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb, + u32 bytes_processed) { - void *data = xsk_umem__get_data(buffer, addr); - struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr)); + u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum; + void *data = xsk_umem__get_data(umem->buffer, addr); - if (!pkt) { - ksft_print_msg("[%s] too many packets received\n", __func__); + addr -= umem->base_addr; + + if (addr >= umem->num_frames * umem->frame_size || + addr + len > umem->num_frames * umem->frame_size) { + ksft_print_msg("Frag invalid addr: %llx len: %u\n", addr, len); + return false; + } + if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) { + ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n", addr, len); return false; } - if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) { - /* Do not try to verify packets that are smaller than minimum size. */ - return true; + pkt_data = data; + if (!bytes_processed) { + pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data); + len -= PKT_HDR_SIZE; + } else { + bytes_processed -= PKT_HDR_SIZE; } - if (pkt->len != len) { - ksft_print_msg("[%s] expected length [%d], got length [%d]\n", - __func__, pkt->len, len); - return false; + expected_seqnum = bytes_processed / sizeof(*pkt_data); + seqnum = ntohl(*pkt_data) & 0xffff; + pkt_nb = ntohl(*pkt_data) >> 16; + + if (expected_pkt_nb != pkt_nb) { + ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n", + __func__, expected_pkt_nb, pkt_nb); + goto error; + } + if (expected_seqnum != seqnum) { + ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n", + __func__, expected_seqnum, seqnum); + goto error; } - if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) { - u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE))); + words_to_end = len / sizeof(*pkt_data) - 1; + pkt_data += words_to_end; + seqnum = ntohl(*pkt_data) & 0xffff; + expected_seqnum += words_to_end; + if (expected_seqnum != seqnum) { + ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n", + __func__, expected_seqnum, seqnum); + goto error; + } - if (opt_pkt_dump) - pkt_dump(data, PKT_SIZE); + return true; - if (pkt->payload != seqnum) { - ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n", - __func__, pkt->payload, seqnum); - return false; - } - } else { - ksft_print_msg("Invalid frame received: "); - ksft_print_msg("[IP_PKT_VER: %02X], [IP_PKT_TOS: %02X]\n", iphdr->version, - iphdr->tos); +error: + pkt_dump(data, len, !bytes_processed); + return false; +} + +static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) +{ + if (pkt->len != len) { + ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n", + __func__, pkt->len, len); + pkt_dump(xsk_umem__get_data(buffer, addr), len, true); return false; } @@ -909,8 +968,8 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) { struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0}; struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream; - u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0; struct xsk_socket_info *xsk = test->ifobj_rx->xsk; + u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0; struct ifobject *ifobj = test->ifobj_rx; struct xsk_umem_info *umem = xsk->umem; struct pkt *pkt; @@ -923,6 +982,9 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); while (pkt) { + u32 frags_processed = 0, nb_frags = 0, pkt_len = 0; + u64 first_addr; + ret = gettimeofday(&tv_now, NULL); if (ret) exit_with_error(errno); @@ -943,7 +1005,6 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__); return TEST_FAILURE; - } if (!(fds->revents & POLLIN)) @@ -968,32 +1029,62 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) } } - for (i = 0; i < rcvd; i++) { + while (frags_processed < rcvd) { const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++); u64 addr = desc->addr, orig; orig = xsk_umem__extract_addr(addr); addr = xsk_umem__add_offset_to_addr(addr); - if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) || - !is_offset_correct(umem, pkt_stream, addr, pkt->addr) || + if (!pkt) { + ksft_print_msg("[%s] received too many packets addr: %lx len %u\n", + __func__, addr, desc->len); + return TEST_FAILURE; + } + + if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) || + !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata && !is_metadata_correct(pkt, umem->buffer, addr))) return TEST_FAILURE; + if (!nb_frags++) + first_addr = addr; + frags_processed++; + pkt_len += desc->len; if (ifobj->use_fill_ring) *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig; + + if (pkt_continues(desc->options)) + continue; + + /* The complete packet has been received */ + if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) || + !is_offset_correct(umem, pkt, addr)) + return TEST_FAILURE; + pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent); + nb_frags = 0; + pkt_len = 0; + } + + if (nb_frags) { + /* In the middle of a packet. Start over from beginning of packet. */ + idx_rx -= nb_frags; + xsk_ring_cons__cancel(&xsk->rx, nb_frags); + if (ifobj->use_fill_ring) { + idx_fq -= nb_frags; + xsk_ring_prod__cancel(&umem->fq, nb_frags); + } + frags_processed -= nb_frags; } if (ifobj->use_fill_ring) - xsk_ring_prod__submit(&umem->fq, rcvd); + xsk_ring_prod__submit(&umem->fq, frags_processed); if (ifobj->release_rx) - xsk_ring_cons__release(&xsk->rx, rcvd); + xsk_ring_cons__release(&xsk->rx, frags_processed); pthread_mutex_lock(&pacing_mutex); pkts_in_flight -= pkts_sent; - if (pkts_in_flight < umem->num_frames) - pthread_cond_signal(&pacing_cond); pthread_mutex_unlock(&pacing_mutex); pkts_sent = 0; } @@ -1001,14 +1092,22 @@ static int receive_pkts(struct test_spec *test, struct pollfd *fds) return TEST_PASS; } -static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds, - bool timeout) +static int __send_pkts(struct ifobject *ifobject, struct pollfd *fds, bool timeout) { + u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len; + struct pkt_stream *pkt_stream = ifobject->pkt_stream; struct xsk_socket_info *xsk = ifobject->xsk; + struct xsk_umem_info *umem = ifobject->umem; bool use_poll = ifobject->use_poll; - u32 i, idx = 0, valid_pkts = 0; int ret; + buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len); + /* pkts_in_flight might be negative if many invalid packets are sent */ + if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) { + kick_tx(xsk); + return TEST_CONTINUE; + } + while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) { if (use_poll) { ret = poll(fds, 1, POLL_TMOUT); @@ -1033,30 +1132,58 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd } for (i = 0; i < BATCH_SIZE; i++) { - struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); - struct pkt *pkt = pkt_generate(ifobject, *pkt_nb); + struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream); + u32 nb_frags_left, nb_frags, bytes_written = 0; if (!pkt) break; - tx_desc->addr = pkt->addr; - tx_desc->len = pkt->len; - (*pkt_nb)++; - if (pkt->valid) + nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt); + if (nb_frags > BATCH_SIZE - i) { + pkt_stream_cancel(pkt_stream); + xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i); + break; + } + nb_frags_left = nb_frags; + + while (nb_frags_left--) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i); + + tx_desc->addr = pkt_get_addr(pkt, ifobject->umem); + if (pkt_stream->verbatim) { + tx_desc->len = pkt->len; + tx_desc->options = pkt->options; + } else if (nb_frags_left) { + tx_desc->len = umem->frame_size; + tx_desc->options = XDP_PKT_CONTD; + } else { + tx_desc->len = pkt->len - bytes_written; + tx_desc->options = 0; + } + if (pkt->valid) + pkt_generate(ifobject, tx_desc->addr, tx_desc->len, pkt->pkt_nb, + bytes_written); + bytes_written += tx_desc->len; + + if (nb_frags_left) { + i++; + if (pkt_stream->verbatim) + pkt = pkt_stream_get_next_tx_pkt(pkt_stream); + } + } + + if (pkt && pkt->valid) { valid_pkts++; + valid_frags += nb_frags; + } } pthread_mutex_lock(&pacing_mutex); pkts_in_flight += valid_pkts; - /* pkts_in_flight might be negative if many invalid packets are sent */ - if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) { - kick_tx(xsk); - pthread_cond_wait(&pacing_cond, &pacing_mutex); - } pthread_mutex_unlock(&pacing_mutex); xsk_ring_prod__submit(&xsk->tx, i); - xsk->outstanding_tx += valid_pkts; + xsk->outstanding_tx += valid_frags; if (use_poll) { ret = poll(fds, 1, POLL_TMOUT); @@ -1088,18 +1215,21 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk) static int send_pkts(struct test_spec *test, struct ifobject *ifobject) { + struct pkt_stream *pkt_stream = ifobject->pkt_stream; bool timeout = !is_umem_valid(test->ifobj_rx); struct pollfd fds = { }; - u32 pkt_cnt = 0, ret; + u32 ret; fds.fd = xsk_socket__fd(ifobject->xsk->xsk); fds.events = POLLOUT; - while (pkt_cnt < ifobject->pkt_stream->nb_pkts) { - ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout); + while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) { + ret = __send_pkts(ifobject, &fds, timeout); + if (ret == TEST_CONTINUE && !test->fail) + continue; if ((ret || test->fail) && !timeout) return TEST_FAILURE; - else if (ret == TEST_PASS && timeout) + if (ret == TEST_PASS && timeout) return ret; } @@ -1249,11 +1379,14 @@ static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobje ifobject->xsk = &ifobject->xsk_arr[0]; ifobject->xskmap = test->ifobj_rx->xskmap; memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info)); + ifobject->umem->base_addr = 0; } -static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream) +static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, + bool fill_up) { - u32 idx = 0, i, buffers_to_fill; + u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM; + u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts; int ret; if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS) @@ -1264,22 +1397,33 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx); if (ret != buffers_to_fill) exit_with_error(ENOSPC); - for (i = 0; i < buffers_to_fill; i++) { - u64 addr; - if (pkt_stream->use_addr_for_fill) { - struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i); + while (filled < buffers_to_fill) { + struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts); + u64 addr; + u32 i; + + for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) { + if (!pkt) { + if (!fill_up) + break; + addr = filled * umem->frame_size + umem->base_addr; + } else if (pkt->offset >= 0) { + addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem); + } else { + addr = pkt->offset + umem_alloc_buffer(umem); + } - if (!pkt) + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; + if (++filled >= buffers_to_fill) break; - addr = pkt->addr; - } else { - addr = i * umem->frame_size; } - - *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr; } - xsk_ring_prod__submit(&umem->fq, i); + xsk_ring_prod__submit(&umem->fq, filled); + xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled); + + pkt_stream_reset(pkt_stream); + umem_reset_alloc(umem); } static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) @@ -1300,12 +1444,10 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (bufs == MAP_FAILED) exit_with_error(errno); - ret = xsk_configure_umem(ifobject->umem, bufs, umem_sz); + ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz); if (ret) exit_with_error(-ret); - xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream); - xsk_configure_socket(test, ifobject, ifobject->umem, false); ifobject->xsk = &ifobject->xsk_arr[0]; @@ -1313,6 +1455,8 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject) if (!ifobject->rx_on) return; + xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream, ifobject->use_fill_ring); + ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk); if (ret) exit_with_error(errno); @@ -1370,12 +1514,8 @@ static void *worker_testapp_validate_rx(void *arg) if (!err && ifobject->validation_func) err = ifobject->validation_func(ifobject); - if (err) { + if (err) report_failure(test); - pthread_mutex_lock(&pacing_mutex); - pthread_cond_signal(&pacing_cond); - pthread_mutex_unlock(&pacing_mutex); - } pthread_exit(NULL); } @@ -1402,11 +1542,20 @@ static void handler(int signum) pthread_exit(NULL); } -static bool xdp_prog_changed(struct test_spec *test, struct ifobject *ifobj) +static bool xdp_prog_changed_rx(struct test_spec *test) { + struct ifobject *ifobj = test->ifobj_rx; + return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode; } +static bool xdp_prog_changed_tx(struct test_spec *test) +{ + struct ifobject *ifobj = test->ifobj_tx; + + return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode; +} + static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog, struct bpf_map *xskmap, enum test_mode mode) { @@ -1433,13 +1582,13 @@ static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_pro static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx, struct ifobject *ifobj_tx) { - if (xdp_prog_changed(test, ifobj_rx)) + if (xdp_prog_changed_rx(test)) xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode); if (!ifobj_tx || ifobj_tx->shared_umem) return; - if (xdp_prog_changed(test, ifobj_tx)) + if (xdp_prog_changed_tx(test)) xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode); } @@ -1447,10 +1596,31 @@ static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *i struct ifobject *ifobj2) { pthread_t t0, t1; + int err; - if (ifobj2) + if (test->mtu > MAX_ETH_PKT_SIZE) { + if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp || + (ifobj2 && !ifobj2->multi_buff_zc_supp))) { + ksft_test_result_skip("Multi buffer for zero-copy not supported.\n"); + return TEST_SKIP; + } + if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp || + (ifobj2 && !ifobj2->multi_buff_supp))) { + ksft_test_result_skip("Multi buffer not supported.\n"); + return TEST_SKIP; + } + } + err = test_spec_set_mtu(test, test->mtu); + if (err) { + ksft_print_msg("Error, could not set mtu.\n"); + exit_with_error(err); + } + + if (ifobj2) { if (pthread_barrier_init(&barr, NULL, 2)) exit_with_error(errno); + pkt_stream_reset(ifobj2->pkt_stream); + } test->current_step++; pkt_stream_reset(ifobj1->pkt_stream); @@ -1493,6 +1663,12 @@ static int testapp_validate_traffic(struct test_spec *test) struct ifobject *ifobj_rx = test->ifobj_rx; struct ifobject *ifobj_tx = test->ifobj_tx; + if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) || + (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) { + ksft_test_result_skip("No huge pages present.\n"); + return TEST_SKIP; + } + xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx); return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx); } @@ -1502,16 +1678,18 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct return __testapp_validate_traffic(test, ifobj, NULL); } -static void testapp_teardown(struct test_spec *test) +static int testapp_teardown(struct test_spec *test) { int i; test_spec_set_name(test, "TEARDOWN"); for (i = 0; i < MAX_TEARDOWN_ITER; i++) { if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; test_spec_reset(test); } + + return TEST_PASS; } static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) @@ -1526,20 +1704,23 @@ static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2) *ifobj2 = tmp_ifobj; } -static void testapp_bidi(struct test_spec *test) +static int testapp_bidi(struct test_spec *test) { + int res; + test_spec_set_name(test, "BIDIRECTIONAL"); test->ifobj_tx->rx_on = true; test->ifobj_rx->tx_on = true; test->total_steps = 2; if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; print_verbose("Switching Tx/Rx vectors\n"); swap_directions(&test->ifobj_rx, &test->ifobj_tx); - __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); + res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx); swap_directions(&test->ifobj_rx, &test->ifobj_tx); + return res; } static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx) @@ -1556,160 +1737,198 @@ static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj exit_with_error(errno); } -static void testapp_bpf_res(struct test_spec *test) +static int testapp_bpf_res(struct test_spec *test) { test_spec_set_name(test, "BPF_RES"); test->total_steps = 2; test->nb_sockets = 2; if (testapp_validate_traffic(test)) - return; + return TEST_FAILURE; swap_xsk_resources(test->ifobj_tx, test->ifobj_rx); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_headroom(struct test_spec *test) +static int testapp_headroom(struct test_spec *test) { test_spec_set_name(test, "UMEM_HEADROOM"); test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_rx_dropped(struct test_spec *test) +static int testapp_stats_rx_dropped(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_DROPPED"); + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); + return TEST_SKIP; + } + pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0); test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3; pkt_stream_receive_half(test); test->ifobj_rx->validation_func = validate_rx_dropped; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_tx_invalid_descs(struct test_spec *test) +static int testapp_stats_tx_invalid_descs(struct test_spec *test) { test_spec_set_name(test, "STAT_TX_INVALID"); pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0); test->ifobj_tx->validation_func = validate_tx_invalid_descs; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_rx_full(struct test_spec *test) +static int testapp_stats_rx_full(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_FULL"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS; test->ifobj_rx->release_rx = false; test->ifobj_rx->validation_func = validate_rx_full; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_stats_fill_empty(struct test_spec *test) +static int testapp_stats_fill_empty(struct test_spec *test) { test_spec_set_name(test, "STAT_RX_FILL_EMPTY"); - pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE); + pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE); test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, - DEFAULT_UMEM_BUFFERS, PKT_SIZE); - if (!test->ifobj_rx->pkt_stream) - exit_with_error(ENOMEM); + DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE); test->ifobj_rx->use_fill_ring = false; test->ifobj_rx->validation_func = validate_fill_empty; - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -/* Simple test */ -static bool hugepages_present(struct ifobject *ifobject) +static int testapp_unaligned(struct test_spec *test) { - size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size; - void *bufs; - - bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_HUGE_2MB, -1, 0); - if (bufs == MAP_FAILED) - return false; + test_spec_set_name(test, "UNALIGNED_MODE"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + /* Let half of the packets straddle a 4K buffer boundary */ + pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2); - mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; - munmap(bufs, mmap_sz); - return true; + return testapp_validate_traffic(test); } -static bool testapp_unaligned(struct test_spec *test) +static int testapp_unaligned_mb(struct test_spec *test) { - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return false; - } - - test_spec_set_name(test, "UNALIGNED_MODE"); + test_spec_set_name(test, "UNALIGNED_MODE_9K"); + test->mtu = MAX_ETH_JUMBO_SIZE; test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; - /* Let half of the packets straddle a buffer boundrary */ - pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2); - test->ifobj_rx->pkt_stream->use_addr_for_fill = true; - testapp_validate_traffic(test); + pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE); + return testapp_validate_traffic(test); +} - return true; +static int testapp_single_pkt(struct test_spec *test) +{ + struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}}; + + pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); + return testapp_validate_traffic(test); } -static void testapp_single_pkt(struct test_spec *test) +static int testapp_multi_buffer(struct test_spec *test) { - struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}}; + test_spec_set_name(test, "RUN_TO_COMPLETION_9K_PACKETS"); + test->mtu = MAX_ETH_JUMBO_SIZE; + pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE); + return testapp_validate_traffic(test); +} + +static int testapp_invalid_desc_mb(struct test_spec *test) +{ + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; + struct pkt pkts[] = { + /* Valid packet for synch to start with */ + {0, MIN_PKT_SIZE, 0, true, 0}, + /* Zero frame len is not legal */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, 0, 0, false, 0}, + /* Invalid address in the second frame */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + /* Invalid len in the middle */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + /* Invalid options in the middle */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION}, + /* Transmit 2 frags, receive 3 */ + {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD}, + {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0}, + /* Middle frame crosses chunk boundary with small length */ + {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD}, + {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0}, + /* Valid packet for synch so that something is received */ + {0, MIN_PKT_SIZE, 0, true, 0}}; + + if (umem->unaligned_mode) { + /* Crossing a chunk boundary allowed */ + pkts[12].valid = true; + pkts[13].valid = true; + } + + test->mtu = MAX_ETH_JUMBO_SIZE; pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_invalid_desc(struct test_spec *test) +static int testapp_invalid_desc(struct test_spec *test) { - u64 umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; + struct xsk_umem_info *umem = test->ifobj_tx->umem; + u64 umem_size = umem->num_frames * umem->frame_size; struct pkt pkts[] = { /* Zero packet address allowed */ - {0, PKT_SIZE, 0, true}, + {0, MIN_PKT_SIZE, 0, true}, /* Allowed packet */ - {0x1000, PKT_SIZE, 0, true}, + {0, MIN_PKT_SIZE, 0, true}, /* Straddling the start of umem */ - {-2, PKT_SIZE, 0, false}, + {-2, MIN_PKT_SIZE, 0, false}, /* Packet too large */ - {0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, + {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false}, /* Up to end of umem allowed */ - {umem_size - PKT_SIZE, PKT_SIZE, 0, true}, + {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true}, /* After umem ends */ - {umem_size, PKT_SIZE, 0, false}, + {umem_size, MIN_PKT_SIZE, 0, false}, /* Straddle the end of umem */ - {umem_size - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a page boundrary */ - {0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false}, - /* Straddle a 2K boundrary */ - {0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true}, + {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 4K boundary */ + {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false}, + /* Straddle a 2K boundary */ + {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true}, /* Valid packet for synch so that something is received */ - {0x4000, PKT_SIZE, 0, true}}; + {0, MIN_PKT_SIZE, 0, true}}; - if (test->ifobj_tx->umem->unaligned_mode) { - /* Crossing a page boundrary allowed */ + if (umem->unaligned_mode) { + /* Crossing a page boundary allowed */ pkts[7].valid = true; } - if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { - /* Crossing a 2K frame size boundrary not allowed */ + if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) { + /* Crossing a 2K frame size boundary not allowed */ pkts[8].valid = false; } if (test->ifobj_tx->shared_umem) { - pkts[4].addr += umem_size; - pkts[5].addr += umem_size; - pkts[6].addr += umem_size; + pkts[4].offset += umem_size; + pkts[5].offset += umem_size; + pkts[6].offset += umem_size; } pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_xdp_drop(struct test_spec *test) +static int testapp_xdp_drop(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1719,10 +1938,10 @@ static void testapp_xdp_drop(struct test_spec *test) skel_rx->maps.xsk, skel_tx->maps.xsk); pkt_stream_receive_half(test); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_xdp_metadata_count(struct test_spec *test) +static int testapp_xdp_metadata_count(struct test_spec *test) { struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs; struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs; @@ -1730,7 +1949,6 @@ static void testapp_xdp_metadata_count(struct test_spec *test) int count = 0; int key = 0; - test_spec_set_name(test, "XDP_METADATA_COUNT"); test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata, skel_tx->progs.xsk_xdp_populate_metadata, skel_rx->maps.xsk, skel_tx->maps.xsk); @@ -1743,10 +1961,10 @@ static void testapp_xdp_metadata_count(struct test_spec *test) if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) exit_with_error(errno); - testapp_validate_traffic(test); + return testapp_validate_traffic(test); } -static void testapp_poll_txq_tmout(struct test_spec *test) +static int testapp_poll_txq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_TXQ_FULL"); @@ -1754,14 +1972,56 @@ static void testapp_poll_txq_tmout(struct test_spec *test) /* create invalid frame by set umem frame_size and pkt length equal to 2048 */ test->ifobj_tx->umem->frame_size = 2048; pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048); - testapp_validate_traffic_single_thread(test, test->ifobj_tx); + return testapp_validate_traffic_single_thread(test, test->ifobj_tx); } -static void testapp_poll_rxq_tmout(struct test_spec *test) +static int testapp_poll_rxq_tmout(struct test_spec *test) { test_spec_set_name(test, "POLL_RXQ_EMPTY"); test->ifobj_rx->use_poll = true; - testapp_validate_traffic_single_thread(test, test->ifobj_rx); + return testapp_validate_traffic_single_thread(test, test->ifobj_rx); +} + +static int testapp_too_many_frags(struct test_spec *test) +{ + struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {}; + u32 max_frags, i; + + test_spec_set_name(test, "TOO_MANY_FRAGS"); + if (test->mode == TEST_MODE_ZC) + max_frags = test->ifobj_tx->xdp_zc_max_segs; + else + max_frags = XSK_DESC__MAX_SKB_FRAGS; + + test->mtu = MAX_ETH_JUMBO_SIZE; + + /* Valid packet for synch */ + pkts[0].len = MIN_PKT_SIZE; + pkts[0].valid = true; + + /* One valid packet with the max amount of frags */ + for (i = 1; i < max_frags + 1; i++) { + pkts[i].len = MIN_PKT_SIZE; + pkts[i].options = XDP_PKT_CONTD; + pkts[i].valid = true; + } + pkts[max_frags].options = 0; + + /* An invalid packet with the max amount of frags but signals packet + * continues on the last frag + */ + for (i = max_frags + 1; i < 2 * max_frags + 1; i++) { + pkts[i].len = MIN_PKT_SIZE; + pkts[i].options = XDP_PKT_CONTD; + pkts[i].valid = false; + } + + /* Valid packet for synch */ + pkts[2 * max_frags + 1].len = MIN_PKT_SIZE; + pkts[2 * max_frags + 1].valid = true; + + pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2); + return testapp_validate_traffic(test); } static int xsk_load_xdp_programs(struct ifobject *ifobj) @@ -1778,25 +2038,31 @@ static void xsk_unload_xdp_programs(struct ifobject *ifobj) xsk_xdp_progs__destroy(ifobj->xdp_progs); } +/* Simple test */ +static bool hugepages_present(void) +{ + size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE; + void *bufs; + + bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB); + if (bufs == MAP_FAILED) + return false; + + mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE; + munmap(bufs, mmap_sz); + return true; +} + static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac, - const char *dst_ip, const char *src_ip, const u16 dst_port, - const u16 src_port, thread_func_t func_ptr) + thread_func_t func_ptr) { - struct in_addr ip; + LIBBPF_OPTS(bpf_xdp_query_opts, query_opts); int err; memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN); memcpy(ifobj->src_mac, src_mac, ETH_ALEN); - inet_aton(dst_ip, &ip); - ifobj->dst_ip = ip.s_addr; - - inet_aton(src_ip, &ip); - ifobj->src_ip = ip.s_addr; - - ifobj->dst_port = dst_port; - ifobj->src_port = src_port; - ifobj->func_ptr = func_ptr; err = xsk_load_xdp_programs(ifobj); @@ -1804,94 +2070,106 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char * printf("Error loading XDP program\n"); exit_with_error(err); } + + if (hugepages_present()) + ifobj->unaligned_supp = true; + + err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts); + if (err) { + ksft_print_msg("Error querying XDP capabilities\n"); + exit_with_error(-err); + } + if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG) + ifobj->multi_buff_supp = true; + if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) { + if (query_opts.xdp_zc_max_segs > 1) { + ifobj->multi_buff_zc_supp = true; + ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs; + } else { + ifobj->xdp_zc_max_segs = 0; + } + } } static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type) { + int ret = TEST_SKIP; + switch (type) { case TEST_TYPE_STATS_RX_DROPPED: - if (mode == TEST_MODE_ZC) { - ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n"); - return; - } - testapp_stats_rx_dropped(test); + ret = testapp_stats_rx_dropped(test); break; case TEST_TYPE_STATS_TX_INVALID_DESCS: - testapp_stats_tx_invalid_descs(test); + ret = testapp_stats_tx_invalid_descs(test); break; case TEST_TYPE_STATS_RX_FULL: - testapp_stats_rx_full(test); + ret = testapp_stats_rx_full(test); break; case TEST_TYPE_STATS_FILL_EMPTY: - testapp_stats_fill_empty(test); + ret = testapp_stats_fill_empty(test); break; case TEST_TYPE_TEARDOWN: - testapp_teardown(test); + ret = testapp_teardown(test); break; case TEST_TYPE_BIDI: - testapp_bidi(test); + ret = testapp_bidi(test); break; case TEST_TYPE_BPF_RES: - testapp_bpf_res(test); + ret = testapp_bpf_res(test); break; case TEST_TYPE_RUN_TO_COMPLETION: test_spec_set_name(test, "RUN_TO_COMPLETION"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); + break; + case TEST_TYPE_RUN_TO_COMPLETION_MB: + ret = testapp_multi_buffer(test); break; case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT: test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT"); - testapp_single_pkt(test); + ret = testapp_single_pkt(test); break; case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME: test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE); - testapp_validate_traffic(test); + pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_RX_POLL: test->ifobj_rx->use_poll = true; test_spec_set_name(test, "POLL_RX"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_TX_POLL: test->ifobj_tx->use_poll = true; test_spec_set_name(test, "POLL_TX"); - testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); break; case TEST_TYPE_POLL_TXQ_TMOUT: - testapp_poll_txq_tmout(test); + ret = testapp_poll_txq_tmout(test); break; case TEST_TYPE_POLL_RXQ_TMOUT: - testapp_poll_rxq_tmout(test); + ret = testapp_poll_rxq_tmout(test); break; case TEST_TYPE_ALIGNED_INV_DESC: test_spec_set_name(test, "ALIGNED_INV_DESC"); - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME: test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE"); test->ifobj_tx->umem->frame_size = 2048; test->ifobj_rx->umem->frame_size = 2048; - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC: - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } test_spec_set_name(test, "UNALIGNED_INV_DESC"); test->ifobj_tx->umem->unaligned_mode = true; test->ifobj_rx->umem->unaligned_mode = true; - testapp_invalid_desc(test); + ret = testapp_invalid_desc(test); break; case TEST_TYPE_UNALIGNED_INV_DESC_4K1_FRAME: { u64 page_size, umem_size; - if (!hugepages_present(test->ifobj_tx)) { - ksft_test_result_skip("No 2M huge pages present.\n"); - return; - } test_spec_set_name(test, "UNALIGNED_INV_DESC_4K1_FRAME_SIZE"); /* Odd frame size so the UMEM doesn't end near a page boundary. */ test->ifobj_tx->umem->frame_size = 4001; @@ -1903,29 +2181,50 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_ */ page_size = sysconf(_SC_PAGESIZE); umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size; - assert(umem_size % page_size > PKT_SIZE); - assert(umem_size % page_size < page_size - PKT_SIZE); - testapp_invalid_desc(test); + assert(umem_size % page_size > MIN_PKT_SIZE); + assert(umem_size % page_size < page_size - MIN_PKT_SIZE); + ret = testapp_invalid_desc(test); break; } + case TEST_TYPE_ALIGNED_INV_DESC_MB: + test_spec_set_name(test, "ALIGNED_INV_DESC_MULTI_BUFF"); + ret = testapp_invalid_desc_mb(test); + break; + case TEST_TYPE_UNALIGNED_INV_DESC_MB: + test_spec_set_name(test, "UNALIGNED_INV_DESC_MULTI_BUFF"); + test->ifobj_tx->umem->unaligned_mode = true; + test->ifobj_rx->umem->unaligned_mode = true; + ret = testapp_invalid_desc_mb(test); + break; case TEST_TYPE_UNALIGNED: - if (!testapp_unaligned(test)) - return; + ret = testapp_unaligned(test); + break; + case TEST_TYPE_UNALIGNED_MB: + ret = testapp_unaligned_mb(test); break; case TEST_TYPE_HEADROOM: - testapp_headroom(test); + ret = testapp_headroom(test); break; case TEST_TYPE_XDP_DROP_HALF: - testapp_xdp_drop(test); + ret = testapp_xdp_drop(test); break; case TEST_TYPE_XDP_METADATA_COUNT: - testapp_xdp_metadata_count(test); + test_spec_set_name(test, "XDP_METADATA_COUNT"); + ret = testapp_xdp_metadata_count(test); + break; + case TEST_TYPE_XDP_METADATA_COUNT_MB: + test_spec_set_name(test, "XDP_METADATA_COUNT_MULTI_BUFF"); + test->mtu = MAX_ETH_JUMBO_SIZE; + ret = testapp_xdp_metadata_count(test); + break; + case TEST_TYPE_TOO_MANY_FRAGS: + ret = testapp_too_many_frags(test); break; default: break; } - if (!test->fail) + if (ret == TEST_PASS) ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test), test->name); pkt_stream_restore_default(test); @@ -2030,14 +2329,12 @@ int main(int argc, char **argv) modes++; } - init_iface(ifobj_rx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, - worker_testapp_validate_rx); - init_iface(ifobj_tx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, - worker_testapp_validate_tx); + init_iface(ifobj_rx, MAC1, MAC2, worker_testapp_validate_rx); + init_iface(ifobj_tx, MAC2, MAC1, worker_testapp_validate_tx); test_spec_init(&test, ifobj_tx, ifobj_rx, 0); - tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE); - rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE); + tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); + rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, MIN_PKT_SIZE); if (!tx_pkt_stream_default || !rx_pkt_stream_default) exit_with_error(ENOMEM); test.tx_pkt_stream_default = tx_pkt_stream_default; diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index c535aeab2ca3..233b66cef64a 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -30,22 +30,16 @@ #define TEST_PASS 0 #define TEST_FAILURE -1 #define TEST_CONTINUE 1 +#define TEST_SKIP 2 #define MAX_INTERFACES 2 #define MAX_INTERFACE_NAME_CHARS 16 #define MAX_SOCKETS 2 #define MAX_TEST_NAME_SIZE 32 #define MAX_TEARDOWN_ITER 10 -#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ - sizeof(struct udphdr)) -#define MIN_ETH_PKT_SIZE 64 -#define ETH_FCS_SIZE 4 -#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE) -#define PKT_SIZE (MIN_PKT_SIZE) -#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) -#define IP_PKT_VER 0x4 -#define IP_PKT_TOS 0x9 -#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) -#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */ +#define MIN_PKT_SIZE 64 +#define MAX_ETH_PKT_SIZE 1518 +#define MAX_ETH_JUMBO_SIZE 9000 #define USLEEP_MAX 10000 #define SOCK_RECONF_CTR 10 #define BATCH_SIZE 64 @@ -55,8 +49,13 @@ #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4) #define RX_FULL_RXQSIZE 32 #define UMEM_HEADROOM_TEST_SIZE 128 -#define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1) +#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1) +#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024) +#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024) +#define XSK_DESC__INVALID_OPTION (0xffff) +#define XSK_DESC__MAX_SKB_FRAGS 18 #define HUGEPAGE_SIZE (2 * 1024 * 1024) +#define PKT_DUMP_NB_TO_PRINT 16 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0) @@ -90,16 +89,22 @@ enum test_type { TEST_TYPE_BPF_RES, TEST_TYPE_XDP_DROP_HALF, TEST_TYPE_XDP_METADATA_COUNT, + TEST_TYPE_XDP_METADATA_COUNT_MB, + TEST_TYPE_RUN_TO_COMPLETION_MB, + TEST_TYPE_UNALIGNED_MB, + TEST_TYPE_ALIGNED_INV_DESC_MB, + TEST_TYPE_UNALIGNED_INV_DESC_MB, + TEST_TYPE_TOO_MANY_FRAGS, TEST_TYPE_MAX }; -static bool opt_pkt_dump; static bool opt_verbose; struct xsk_umem_info { struct xsk_ring_prod fq; struct xsk_ring_cons cq; struct xsk_umem *umem; + u64 next_buffer; u32 num_frames; u32 frame_headroom; void *buffer; @@ -118,17 +123,19 @@ struct xsk_socket_info { }; struct pkt { - u64 addr; + int offset; u32 len; - u32 payload; + u32 pkt_nb; bool valid; + u16 options; }; struct pkt_stream { u32 nb_pkts; - u32 rx_pkt_nb; + u32 current_pkt_nb; struct pkt *pkts; - bool use_addr_for_fill; + u32 max_pkt_len; + bool verbatim; }; struct ifobject; @@ -148,11 +155,9 @@ struct ifobject { struct bpf_program *xdp_prog; enum test_mode mode; int ifindex; - u32 dst_ip; - u32 src_ip; + int mtu; u32 bind_flags; - u16 src_port; - u16 dst_port; + u32 xdp_zc_max_segs; bool tx_on; bool rx_on; bool use_poll; @@ -161,6 +166,9 @@ struct ifobject { bool release_rx; bool shared_umem; bool use_metadata; + bool unaligned_supp; + bool multi_buff_supp; + bool multi_buff_zc_supp; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; }; @@ -174,6 +182,7 @@ struct test_spec { struct bpf_program *xdp_prog_tx; struct bpf_map *xskmap_rx; struct bpf_map *xskmap_tx; + int mtu; u16 total_steps; u16 current_step; u16 nb_sockets; @@ -184,7 +193,6 @@ struct test_spec { pthread_barrier_t barr; pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER; -pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER; int pkts_in_flight; diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/cachestat/.gitignore index d65462d64816..d6c30b43a4bb 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore +++ b/tools/testing/selftests/cachestat/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -*.out +test_cachestat diff --git a/tools/testing/selftests/cachestat/Makefile b/tools/testing/selftests/cachestat/Makefile new file mode 100644 index 000000000000..778b54ebb036 --- /dev/null +++ b/tools/testing/selftests/cachestat/Makefile @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_GEN_PROGS := test_cachestat + +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -Wall +LDLIBS += -lrt + +include ../lib.mk diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c new file mode 100644 index 000000000000..4804c7dc7b31 --- /dev/null +++ b/tools/testing/selftests/cachestat/test_cachestat.c @@ -0,0 +1,318 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdbool.h> +#include <linux/kernel.h> +#include <linux/magic.h> +#include <linux/mman.h> +#include <sys/mman.h> +#include <sys/shm.h> +#include <sys/syscall.h> +#include <sys/vfs.h> +#include <unistd.h> +#include <string.h> +#include <fcntl.h> +#include <errno.h> + +#include "../kselftest.h" + +#define NR_TESTS 9 + +static const char * const dev_files[] = { + "/dev/zero", "/dev/null", "/dev/urandom", + "/proc/version", "/proc" +}; + +void print_cachestat(struct cachestat *cs) +{ + ksft_print_msg( + "Using cachestat: Cached: %lu, Dirty: %lu, Writeback: %lu, Evicted: %lu, Recently Evicted: %lu\n", + cs->nr_cache, cs->nr_dirty, cs->nr_writeback, + cs->nr_evicted, cs->nr_recently_evicted); +} + +bool write_exactly(int fd, size_t filesize) +{ + int random_fd = open("/dev/urandom", O_RDONLY); + char *cursor, *data; + int remained; + bool ret; + + if (random_fd < 0) { + ksft_print_msg("Unable to access urandom.\n"); + ret = false; + goto out; + } + + data = malloc(filesize); + if (!data) { + ksft_print_msg("Unable to allocate data.\n"); + ret = false; + goto close_random_fd; + } + + remained = filesize; + cursor = data; + + while (remained) { + ssize_t read_len = read(random_fd, cursor, remained); + + if (read_len <= 0) { + ksft_print_msg("Unable to read from urandom.\n"); + ret = false; + goto out_free_data; + } + + remained -= read_len; + cursor += read_len; + } + + /* write random data to fd */ + remained = filesize; + cursor = data; + while (remained) { + ssize_t write_len = write(fd, cursor, remained); + + if (write_len <= 0) { + ksft_print_msg("Unable write random data to file.\n"); + ret = false; + goto out_free_data; + } + + remained -= write_len; + cursor += write_len; + } + + ret = true; +out_free_data: + free(data); +close_random_fd: + close(random_fd); +out: + return ret; +} + +/* + * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync() + * test fail below, so we need to check for test file living on a tmpfs. + */ +static bool is_on_tmpfs(int fd) +{ + struct statfs statfs_buf; + + if (fstatfs(fd, &statfs_buf)) + return false; + + return statfs_buf.f_type == TMPFS_MAGIC; +} + +/* + * Open/create the file at filename, (optionally) write random data to it + * (exactly num_pages), then test the cachestat syscall on this file. + * + * If test_fsync == true, fsync the file, then check the number of dirty + * pages. + */ +static int test_cachestat(const char *filename, bool write_random, bool create, + bool test_fsync, unsigned long num_pages, + int open_flags, mode_t open_mode) +{ + size_t PS = sysconf(_SC_PAGESIZE); + int filesize = num_pages * PS; + int ret = KSFT_PASS; + long syscall_ret; + struct cachestat cs; + struct cachestat_range cs_range = { 0, filesize }; + + int fd = open(filename, open_flags, open_mode); + + if (fd == -1) { + ksft_print_msg("Unable to create/open file.\n"); + ret = KSFT_FAIL; + goto out; + } else { + ksft_print_msg("Create/open %s\n", filename); + } + + if (write_random) { + if (!write_exactly(fd, filesize)) { + ksft_print_msg("Unable to access urandom.\n"); + ret = KSFT_FAIL; + goto out1; + } + } + + syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0); + + ksft_print_msg("Cachestat call returned %ld\n", syscall_ret); + + if (syscall_ret) { + ksft_print_msg("Cachestat returned non-zero.\n"); + ret = KSFT_FAIL; + goto out1; + + } else { + print_cachestat(&cs); + + if (write_random) { + if (cs.nr_cache + cs.nr_evicted != num_pages) { + ksft_print_msg( + "Total number of cached and evicted pages is off.\n"); + ret = KSFT_FAIL; + } + } + } + + if (test_fsync) { + if (is_on_tmpfs(fd)) { + ret = KSFT_SKIP; + } else if (fsync(fd)) { + ksft_print_msg("fsync fails.\n"); + ret = KSFT_FAIL; + } else { + syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0); + + ksft_print_msg("Cachestat call (after fsync) returned %ld\n", + syscall_ret); + + if (!syscall_ret) { + print_cachestat(&cs); + + if (cs.nr_dirty) { + ret = KSFT_FAIL; + ksft_print_msg( + "Number of dirty should be zero after fsync.\n"); + } + } else { + ksft_print_msg("Cachestat (after fsync) returned non-zero.\n"); + ret = KSFT_FAIL; + goto out1; + } + } + } + +out1: + close(fd); + + if (create) + remove(filename); +out: + return ret; +} + +bool test_cachestat_shmem(void) +{ + size_t PS = sysconf(_SC_PAGESIZE); + size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */ + int syscall_ret; + size_t compute_len = PS * 512; + struct cachestat_range cs_range = { PS, compute_len }; + char *filename = "tmpshmcstat"; + struct cachestat cs; + bool ret = true; + unsigned long num_pages = compute_len / PS; + int fd = shm_open(filename, O_CREAT | O_RDWR, 0600); + + if (fd < 0) { + ksft_print_msg("Unable to create shmem file.\n"); + ret = false; + goto out; + } + + if (ftruncate(fd, filesize)) { + ksft_print_msg("Unable to truncate shmem file.\n"); + ret = false; + goto close_fd; + } + + if (!write_exactly(fd, filesize)) { + ksft_print_msg("Unable to write to shmem file.\n"); + ret = false; + goto close_fd; + } + + syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0); + + if (syscall_ret) { + ksft_print_msg("Cachestat returned non-zero.\n"); + ret = false; + goto close_fd; + } else { + print_cachestat(&cs); + if (cs.nr_cache + cs.nr_evicted != num_pages) { + ksft_print_msg( + "Total number of cached and evicted pages is off.\n"); + ret = false; + } + } + +close_fd: + shm_unlink(filename); +out: + return ret; +} + +int main(void) +{ + int ret; + + ksft_print_header(); + + ret = syscall(__NR_cachestat, -1, NULL, NULL, 0); + if (ret == -1 && errno == ENOSYS) + ksft_exit_skip("cachestat syscall not available\n"); + + ksft_set_plan(NR_TESTS); + + if (ret == -1 && errno == EBADF) { + ksft_test_result_pass("bad file descriptor recognized\n"); + ret = 0; + } else { + ksft_test_result_fail("bad file descriptor ignored\n"); + ret = 1; + } + + for (int i = 0; i < 5; i++) { + const char *dev_filename = dev_files[i]; + + if (test_cachestat(dev_filename, false, false, false, + 4, O_RDONLY, 0400) == KSFT_PASS) + ksft_test_result_pass("cachestat works with %s\n", dev_filename); + else { + ksft_test_result_fail("cachestat fails with %s\n", dev_filename); + ret = 1; + } + } + + if (test_cachestat("tmpfilecachestat", true, true, + false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS) + ksft_test_result_pass("cachestat works with a normal file\n"); + else { + ksft_test_result_fail("cachestat fails with normal file\n"); + ret = 1; + } + + switch (test_cachestat("tmpfilecachestat", true, true, + true, 4, O_CREAT | O_RDWR, 0600)) { + case KSFT_FAIL: + ksft_test_result_fail("cachestat fsync fails with normal file\n"); + ret = KSFT_FAIL; + break; + case KSFT_PASS: + ksft_test_result_pass("cachestat fsync works with a normal file\n"); + break; + case KSFT_SKIP: + ksft_test_result_skip("tmpfilecachestat is on tmpfs\n"); + break; + } + + if (test_cachestat_shmem()) + ksft_test_result_pass("cachestat works with a shmem file\n"); + else { + ksft_test_result_fail("cachestat fails with a shmem file\n"); + ret = 1; + } + + return ret; +} diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore index c4a57e69f749..af8c3f30b9c1 100644 --- a/tools/testing/selftests/cgroup/.gitignore +++ b/tools/testing/selftests/cgroup/.gitignore @@ -5,4 +5,6 @@ test_freezer test_kmem test_kill test_cpu +test_cpuset +test_zswap wait_inotify diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile index 3d263747d2ad..c27f05f6ce9b 100644 --- a/tools/testing/selftests/cgroup/Makefile +++ b/tools/testing/selftests/cgroup/Makefile @@ -12,6 +12,8 @@ TEST_GEN_PROGS += test_core TEST_GEN_PROGS += test_freezer TEST_GEN_PROGS += test_kill TEST_GEN_PROGS += test_cpu +TEST_GEN_PROGS += test_cpuset +TEST_GEN_PROGS += test_zswap LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h @@ -23,3 +25,5 @@ $(OUTPUT)/test_core: cgroup_util.c $(OUTPUT)/test_freezer: cgroup_util.c $(OUTPUT)/test_kill: cgroup_util.c $(OUTPUT)/test_cpu: cgroup_util.c +$(OUTPUT)/test_cpuset: cgroup_util.c +$(OUTPUT)/test_zswap: cgroup_util.c diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index e8bbbdb77e0d..0340d4ca8f51 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -286,6 +286,8 @@ int cg_destroy(const char *cgroup) { int ret; + if (!cgroup) + return 0; retry: ret = rmdir(cgroup); if (ret && errno == EBUSY) { diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h index c92df4e5d395..1df7f202214a 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.h +++ b/tools/testing/selftests/cgroup/cgroup_util.h @@ -11,6 +11,8 @@ #define USEC_PER_SEC 1000000L #define NSEC_PER_SEC 1000000000L +#define TEST_UID 65534 /* usually nobody, any !root is fine */ + /* * Checks if two given values differ by less than err% of their sum. */ diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c index 600123503063..80aa6b2373b9 100644 --- a/tools/testing/selftests/cgroup/test_core.c +++ b/tools/testing/selftests/cgroup/test_core.c @@ -683,7 +683,7 @@ cleanup: */ static int test_cgcore_lesser_euid_open(const char *root) { - const uid_t test_euid = 65534; /* usually nobody, any !root is fine */ + const uid_t test_euid = TEST_UID; int ret = KSFT_FAIL; char *cg_test_a = NULL, *cg_test_b = NULL; char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL; diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c new file mode 100644 index 000000000000..b061ed1e05b4 --- /dev/null +++ b/tools/testing/selftests/cgroup/test_cpuset.c @@ -0,0 +1,275 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/limits.h> +#include <signal.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int idle_process_fn(const char *cgroup, void *arg) +{ + (void)pause(); + return 0; +} + +static int do_migration_fn(const char *cgroup, void *arg) +{ + int object_pid = (int)(size_t)arg; + + if (setuid(TEST_UID)) + return EXIT_FAILURE; + + // XXX checking /proc/$pid/cgroup would be quicker than wait + if (cg_enter(cgroup, object_pid) || + cg_wait_for_proc_count(cgroup, 1)) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +static int do_controller_fn(const char *cgroup, void *arg) +{ + const char *child = cgroup; + const char *parent = arg; + + if (setuid(TEST_UID)) + return EXIT_FAILURE; + + if (!cg_read_strstr(child, "cgroup.controllers", "cpuset")) + return EXIT_FAILURE; + + if (cg_write(parent, "cgroup.subtree_control", "+cpuset")) + return EXIT_FAILURE; + + if (cg_read_strstr(child, "cgroup.controllers", "cpuset")) + return EXIT_FAILURE; + + if (cg_write(parent, "cgroup.subtree_control", "-cpuset")) + return EXIT_FAILURE; + + if (!cg_read_strstr(child, "cgroup.controllers", "cpuset")) + return EXIT_FAILURE; + + return EXIT_SUCCESS; +} + +/* + * Migrate a process between two sibling cgroups. + * The success should only depend on the parent cgroup permissions and not the + * migrated process itself (cpuset controller is in place because it uses + * security_task_setscheduler() in cgroup v1). + * + * Deliberately don't set cpuset.cpus in children to avoid definining migration + * permissions between two different cpusets. + */ +static int test_cpuset_perms_object(const char *root, bool allow) +{ + char *parent = NULL, *child_src = NULL, *child_dst = NULL; + char *parent_procs = NULL, *child_src_procs = NULL, *child_dst_procs = NULL; + const uid_t test_euid = TEST_UID; + int object_pid = 0; + int ret = KSFT_FAIL; + + parent = cg_name(root, "cpuset_test_0"); + if (!parent) + goto cleanup; + parent_procs = cg_name(parent, "cgroup.procs"); + if (!parent_procs) + goto cleanup; + if (cg_create(parent)) + goto cleanup; + + child_src = cg_name(parent, "cpuset_test_1"); + if (!child_src) + goto cleanup; + child_src_procs = cg_name(child_src, "cgroup.procs"); + if (!child_src_procs) + goto cleanup; + if (cg_create(child_src)) + goto cleanup; + + child_dst = cg_name(parent, "cpuset_test_2"); + if (!child_dst) + goto cleanup; + child_dst_procs = cg_name(child_dst, "cgroup.procs"); + if (!child_dst_procs) + goto cleanup; + if (cg_create(child_dst)) + goto cleanup; + + if (cg_write(parent, "cgroup.subtree_control", "+cpuset")) + goto cleanup; + + if (cg_read_strstr(child_src, "cgroup.controllers", "cpuset") || + cg_read_strstr(child_dst, "cgroup.controllers", "cpuset")) + goto cleanup; + + /* Enable permissions along src->dst tree path */ + if (chown(child_src_procs, test_euid, -1) || + chown(child_dst_procs, test_euid, -1)) + goto cleanup; + + if (allow && chown(parent_procs, test_euid, -1)) + goto cleanup; + + /* Fork a privileged child as a test object */ + object_pid = cg_run_nowait(child_src, idle_process_fn, NULL); + if (object_pid < 0) + goto cleanup; + + /* Carry out migration in a child process that can drop all privileges + * (including capabilities), the main process must remain privileged for + * cleanup. + * Child process's cgroup is irrelevant but we place it into child_dst + * as hacky way to pass information about migration target to the child. + */ + if (allow ^ (cg_run(child_dst, do_migration_fn, (void *)(size_t)object_pid) == EXIT_SUCCESS)) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (object_pid > 0) { + (void)kill(object_pid, SIGTERM); + (void)clone_reap(object_pid, WEXITED); + } + + cg_destroy(child_dst); + free(child_dst_procs); + free(child_dst); + + cg_destroy(child_src); + free(child_src_procs); + free(child_src); + + cg_destroy(parent); + free(parent_procs); + free(parent); + + return ret; +} + +static int test_cpuset_perms_object_allow(const char *root) +{ + return test_cpuset_perms_object(root, true); +} + +static int test_cpuset_perms_object_deny(const char *root) +{ + return test_cpuset_perms_object(root, false); +} + +/* + * Migrate a process between parent and child implicitely + * Implicit migration happens when a controller is enabled/disabled. + * + */ +static int test_cpuset_perms_subtree(const char *root) +{ + char *parent = NULL, *child = NULL; + char *parent_procs = NULL, *parent_subctl = NULL, *child_procs = NULL; + const uid_t test_euid = TEST_UID; + int object_pid = 0; + int ret = KSFT_FAIL; + + parent = cg_name(root, "cpuset_test_0"); + if (!parent) + goto cleanup; + parent_procs = cg_name(parent, "cgroup.procs"); + if (!parent_procs) + goto cleanup; + parent_subctl = cg_name(parent, "cgroup.subtree_control"); + if (!parent_subctl) + goto cleanup; + if (cg_create(parent)) + goto cleanup; + + child = cg_name(parent, "cpuset_test_1"); + if (!child) + goto cleanup; + child_procs = cg_name(child, "cgroup.procs"); + if (!child_procs) + goto cleanup; + if (cg_create(child)) + goto cleanup; + + /* Enable permissions as in a delegated subtree */ + if (chown(parent_procs, test_euid, -1) || + chown(parent_subctl, test_euid, -1) || + chown(child_procs, test_euid, -1)) + goto cleanup; + + /* Put a privileged child in the subtree and modify controller state + * from an unprivileged process, the main process remains privileged + * for cleanup. + * The unprivileged child runs in subtree too to avoid parent and + * internal-node constraing violation. + */ + object_pid = cg_run_nowait(child, idle_process_fn, NULL); + if (object_pid < 0) + goto cleanup; + + if (cg_run(child, do_controller_fn, parent) != EXIT_SUCCESS) + goto cleanup; + + ret = KSFT_PASS; + +cleanup: + if (object_pid > 0) { + (void)kill(object_pid, SIGTERM); + (void)clone_reap(object_pid, WEXITED); + } + + cg_destroy(child); + free(child_procs); + free(child); + + cg_destroy(parent); + free(parent_subctl); + free(parent_procs); + free(parent); + + return ret; +} + + +#define T(x) { x, #x } +struct cpuset_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_cpuset_perms_object_allow), + T(test_cpuset_perms_object_deny), + T(test_cpuset_perms_subtree), +}; +#undef T + +int main(int argc, char *argv[]) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "cpuset")) + if (cg_write(root, "cgroup.subtree_control", "+cpuset")) + ksft_exit_skip("Failed to set cpuset controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 2b5215cc599f..4afb132e4e4f 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -10,7 +10,7 @@ skip_test() { echo "$1" echo "Test SKIPPED" - exit 0 + exit 4 # ksft_skip } [[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!" diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c index 258ddc565deb..c82f974b85c9 100644 --- a/tools/testing/selftests/cgroup/test_kmem.c +++ b/tools/testing/selftests/cgroup/test_kmem.c @@ -70,12 +70,16 @@ static int test_kmem_basic(const char *root) goto cleanup; cg_write(cg, "memory.high", "1M"); + + /* wait for RCU freeing */ + sleep(1); + slab1 = cg_read_key_long(cg, "memory.stat", "slab "); - if (slab1 <= 0) + if (slab1 < 0) goto cleanup; current = cg_read_long(cg, "memory.current"); - if (current <= 0) + if (current < 0) goto cleanup; if (slab1 < slab0 / 2 && current < slab0 / 2) @@ -158,11 +162,11 @@ static int cg_run_in_subcgroups(const char *parent, * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS * threads. Then it checks the sanity of numbers on the parent level: * the total size of the cgroups should be roughly equal to - * anon + file + slab + kernel_stack. + * anon + file + kernel + sock. */ static int test_kmem_memcg_deletion(const char *root) { - long current, slab, anon, file, kernel_stack, pagetables, percpu, sock, sum; + long current, anon, file, kernel, sock, sum; int ret = KSFT_FAIL; char *parent; @@ -180,29 +184,22 @@ static int test_kmem_memcg_deletion(const char *root) goto cleanup; current = cg_read_long(parent, "memory.current"); - slab = cg_read_key_long(parent, "memory.stat", "slab "); anon = cg_read_key_long(parent, "memory.stat", "anon "); file = cg_read_key_long(parent, "memory.stat", "file "); - kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack "); - pagetables = cg_read_key_long(parent, "memory.stat", "pagetables "); - percpu = cg_read_key_long(parent, "memory.stat", "percpu "); + kernel = cg_read_key_long(parent, "memory.stat", "kernel "); sock = cg_read_key_long(parent, "memory.stat", "sock "); - if (current < 0 || slab < 0 || anon < 0 || file < 0 || - kernel_stack < 0 || pagetables < 0 || percpu < 0 || sock < 0) + if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0) goto cleanup; - sum = slab + anon + file + kernel_stack + pagetables + percpu + sock; + sum = anon + file + kernel + sock; if (abs(sum - current) < MAX_VMSTAT_ERROR) { ret = KSFT_PASS; } else { printf("memory.current = %ld\n", current); - printf("slab + anon + file + kernel_stack = %ld\n", sum); - printf("slab = %ld\n", slab); + printf("anon + file + kernel + sock = %ld\n", sum); printf("anon = %ld\n", anon); printf("file = %ld\n", file); - printf("kernel_stack = %ld\n", kernel_stack); - printf("pagetables = %ld\n", pagetables); - printf("percpu = %ld\n", percpu); + printf("kernel = %ld\n", kernel); printf("sock = %ld\n", sock); } diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index f4f7c0aef702..c7c9572003a8 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -292,6 +292,7 @@ static int test_memcg_protection(const char *root, bool min) char *children[4] = {NULL}; const char *attribute = min ? "memory.min" : "memory.low"; long c[4]; + long current; int i, attempts; int fd; @@ -400,7 +401,8 @@ static int test_memcg_protection(const char *root, bool min) goto cleanup; } - if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3)) + current = min ? MB(50) : MB(30); + if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3)) goto cleanup; if (!reclaim_until(children[0], MB(10))) @@ -987,7 +989,9 @@ static int tcp_client(const char *cgroup, unsigned short port) char servport[6]; int retries = 0x10; /* nice round number */ int sk, ret; + long allocated; + allocated = cg_read_long(cgroup, "memory.current"); snprintf(servport, sizeof(servport), "%hd", port); ret = getaddrinfo(server, servport, NULL, &ai); if (ret) @@ -1015,7 +1019,8 @@ static int tcp_client(const char *cgroup, unsigned short port) if (current < 0 || sock < 0) goto close_sk; - if (values_close(current, sock, 10)) { + /* exclude the memory not related to socket connection */ + if (values_close(current - allocated, sock, 10)) { ret = KSFT_PASS; break; } diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c new file mode 100644 index 000000000000..49def87a909b --- /dev/null +++ b/tools/testing/selftests/cgroup/test_zswap.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE + +#include <linux/limits.h> +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include <sys/sysinfo.h> +#include <string.h> +#include <sys/wait.h> +#include <sys/mman.h> + +#include "../kselftest.h" +#include "cgroup_util.h" + +static int read_int(const char *path, size_t *value) +{ + FILE *file; + int ret = 0; + + file = fopen(path, "r"); + if (!file) + return -1; + if (fscanf(file, "%ld", value) != 1) + ret = -1; + fclose(file); + return ret; +} + +static int set_min_free_kb(size_t value) +{ + FILE *file; + int ret; + + file = fopen("/proc/sys/vm/min_free_kbytes", "w"); + if (!file) + return -1; + ret = fprintf(file, "%ld\n", value); + fclose(file); + return ret; +} + +static int read_min_free_kb(size_t *value) +{ + return read_int("/proc/sys/vm/min_free_kbytes", value); +} + +static int get_zswap_stored_pages(size_t *value) +{ + return read_int("/sys/kernel/debug/zswap/stored_pages", value); +} + +static int get_zswap_written_back_pages(size_t *value) +{ + return read_int("/sys/kernel/debug/zswap/written_back_pages", value); +} + +static int allocate_bytes(const char *cgroup, void *arg) +{ + size_t size = (size_t)arg; + char *mem = (char *)malloc(size); + + if (!mem) + return -1; + for (int i = 0; i < size; i += 4095) + mem[i] = 'a'; + free(mem); + return 0; +} + +/* + * When trying to store a memcg page in zswap, if the memcg hits its memory + * limit in zswap, writeback should not be triggered. + * + * This was fixed with commit 0bdf0efa180a("zswap: do not shrink if cgroup may + * not zswap"). Needs to be revised when a per memcg writeback mechanism is + * implemented. + */ +static int test_no_invasive_cgroup_shrink(const char *root) +{ + size_t written_back_before, written_back_after; + int ret = KSFT_FAIL; + char *test_group; + + /* Set up */ + test_group = cg_name(root, "no_shrink_test"); + if (!test_group) + goto out; + if (cg_create(test_group)) + goto out; + if (cg_write(test_group, "memory.max", "1M")) + goto out; + if (cg_write(test_group, "memory.zswap.max", "10K")) + goto out; + if (get_zswap_written_back_pages(&written_back_before)) + goto out; + + /* Allocate 10x memory.max to push memory into zswap */ + if (cg_run(test_group, allocate_bytes, (void *)MB(10))) + goto out; + + /* Verify that no writeback happened because of the memcg allocation */ + if (get_zswap_written_back_pages(&written_back_after)) + goto out; + if (written_back_after == written_back_before) + ret = KSFT_PASS; +out: + cg_destroy(test_group); + free(test_group); + return ret; +} + +struct no_kmem_bypass_child_args { + size_t target_alloc_bytes; + size_t child_allocated; +}; + +static int no_kmem_bypass_child(const char *cgroup, void *arg) +{ + struct no_kmem_bypass_child_args *values = arg; + void *allocation; + + allocation = malloc(values->target_alloc_bytes); + if (!allocation) { + values->child_allocated = true; + return -1; + } + for (long i = 0; i < values->target_alloc_bytes; i += 4095) + ((char *)allocation)[i] = 'a'; + values->child_allocated = true; + pause(); + free(allocation); + return 0; +} + +/* + * When pages owned by a memcg are pushed to zswap by kswapd, they should be + * charged to that cgroup. This wasn't the case before commit + * cd08d80ecdac("mm: correctly charge compressed memory to its memcg"). + * + * The test first allocates memory in a memcg, then raises min_free_kbytes to + * a very high value so that the allocation falls below low wm, then makes + * another allocation to trigger kswapd that should push the memcg-owned pages + * to zswap and verifies that the zswap pages are correctly charged. + * + * To be run on a VM with at most 4G of memory. + */ +static int test_no_kmem_bypass(const char *root) +{ + size_t min_free_kb_high, min_free_kb_low, min_free_kb_original; + struct no_kmem_bypass_child_args *values; + size_t trigger_allocation_size; + int wait_child_iteration = 0; + long stored_pages_threshold; + struct sysinfo sys_info; + int ret = KSFT_FAIL; + int child_status; + char *test_group; + pid_t child_pid; + + /* Read sys info and compute test values accordingly */ + if (sysinfo(&sys_info) != 0) + return KSFT_FAIL; + if (sys_info.totalram > 5000000000) + return KSFT_SKIP; + values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ | + PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (values == MAP_FAILED) + return KSFT_FAIL; + if (read_min_free_kb(&min_free_kb_original)) + return KSFT_FAIL; + min_free_kb_high = sys_info.totalram / 2000; + min_free_kb_low = sys_info.totalram / 500000; + values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) + + sys_info.totalram * 5 / 100; + stored_pages_threshold = sys_info.totalram / 5 / 4096; + trigger_allocation_size = sys_info.totalram / 20; + + /* Set up test memcg */ + if (cg_write(root, "cgroup.subtree_control", "+memory")) + goto out; + test_group = cg_name(root, "kmem_bypass_test"); + if (!test_group) + goto out; + + /* Spawn memcg child and wait for it to allocate */ + set_min_free_kb(min_free_kb_low); + if (cg_create(test_group)) + goto out; + values->child_allocated = false; + child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values); + if (child_pid < 0) + goto out; + while (!values->child_allocated && wait_child_iteration++ < 10000) + usleep(1000); + + /* Try to wakeup kswapd and let it push child memory to zswap */ + set_min_free_kb(min_free_kb_high); + for (int i = 0; i < 20; i++) { + size_t stored_pages; + char *trigger_allocation = malloc(trigger_allocation_size); + + if (!trigger_allocation) + break; + for (int i = 0; i < trigger_allocation_size; i += 4095) + trigger_allocation[i] = 'b'; + usleep(100000); + free(trigger_allocation); + if (get_zswap_stored_pages(&stored_pages)) + break; + if (stored_pages < 0) + break; + /* If memory was pushed to zswap, verify it belongs to memcg */ + if (stored_pages > stored_pages_threshold) { + int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped "); + int delta = stored_pages * 4096 - zswapped; + int result_ok = delta < stored_pages * 4096 / 4; + + ret = result_ok ? KSFT_PASS : KSFT_FAIL; + break; + } + } + + kill(child_pid, SIGTERM); + waitpid(child_pid, &child_status, 0); +out: + set_min_free_kb(min_free_kb_original); + cg_destroy(test_group); + free(test_group); + return ret; +} + +#define T(x) { x, #x } +struct zswap_test { + int (*fn)(const char *root); + const char *name; +} tests[] = { + T(test_no_kmem_bypass), + T(test_no_invasive_cgroup_shrink), +}; +#undef T + +static bool zswap_configured(void) +{ + return access("/sys/module/zswap", F_OK) == 0; +} + +int main(int argc, char **argv) +{ + char root[PATH_MAX]; + int i, ret = EXIT_SUCCESS; + + if (cg_find_unified_root(root, sizeof(root))) + ksft_exit_skip("cgroup v2 isn't mounted\n"); + + if (!zswap_configured()) + ksft_exit_skip("zswap isn't configured\n"); + + /* + * Check that memory controller is available: + * memory is listed in cgroup.controllers + */ + if (cg_read_strstr(root, "cgroup.controllers", "memory")) + ksft_exit_skip("memory controller isn't available\n"); + + if (cg_read_strstr(root, "cgroup.subtree_control", "memory")) + if (cg_write(root, "cgroup.subtree_control", "+memory")) + ksft_exit_skip("Failed to set memory controller\n"); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + switch (tests[i].fn(root)) { + case KSFT_PASS: + ksft_test_result_pass("%s\n", tests[i].name); + break; + case KSFT_SKIP: + ksft_test_result_skip("%s\n", tests[i].name); + break; + default: + ret = EXIT_FAILURE; + ksft_test_result_fail("%s\n", tests[i].name); + break; + } + } + + return ret; +} diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index e495f895a2cd..e60cf4da8fb0 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -129,7 +129,7 @@ int main(int argc, char *argv[]) uid_t uid = getuid(); ksft_print_header(); - ksft_set_plan(18); + ksft_set_plan(19); test_clone3_supported(); /* Just a simple clone3() should return 0.*/ @@ -198,5 +198,8 @@ int main(int argc, char *argv[]) /* Do a clone3() in a new time namespace */ test_clone3(CLONE_NEWTIME, 0, 0, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with exit signal (SIGCHLD) in flags */ + test_clone3(SIGCHLD, 0, -EINVAL, CLONE3_ARGS_NO_TEST); + ksft_finished(); } diff --git a/tools/testing/selftests/connector/.gitignore b/tools/testing/selftests/connector/.gitignore new file mode 100644 index 000000000000..c90098199a44 --- /dev/null +++ b/tools/testing/selftests/connector/.gitignore @@ -0,0 +1 @@ +proc_filter diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile new file mode 100644 index 000000000000..92188b9bac5c --- /dev/null +++ b/tools/testing/selftests/connector/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS += -Wall $(KHDR_INCLUDES) + +TEST_GEN_PROGS = proc_filter + +include ../lib.mk diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c new file mode 100644 index 000000000000..4a825b997666 --- /dev/null +++ b/tools/testing/selftests/connector/proc_filter.c @@ -0,0 +1,310 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <sys/types.h> +#include <sys/epoll.h> +#include <sys/socket.h> +#include <linux/netlink.h> +#include <linux/connector.h> +#include <linux/cn_proc.h> + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <strings.h> +#include <errno.h> +#include <signal.h> +#include <string.h> + +#include "../kselftest.h" + +#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \ + sizeof(struct proc_input)) +#define NL_MESSAGE_SIZE_NF (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \ + sizeof(int)) + +#define MAX_EVENTS 1 + +volatile static int interrupted; +static int nl_sock, ret_errno, tcount; +static struct epoll_event evn; + +static int filter; + +#ifdef ENABLE_PRINTS +#define Printf printf +#else +#define Printf ksft_print_msg +#endif + +int send_message(void *pinp) +{ + char buff[NL_MESSAGE_SIZE]; + struct nlmsghdr *hdr; + struct cn_msg *msg; + + hdr = (struct nlmsghdr *)buff; + if (filter) + hdr->nlmsg_len = NL_MESSAGE_SIZE; + else + hdr->nlmsg_len = NL_MESSAGE_SIZE_NF; + hdr->nlmsg_type = NLMSG_DONE; + hdr->nlmsg_flags = 0; + hdr->nlmsg_seq = 0; + hdr->nlmsg_pid = getpid(); + + msg = (struct cn_msg *)NLMSG_DATA(hdr); + msg->id.idx = CN_IDX_PROC; + msg->id.val = CN_VAL_PROC; + msg->seq = 0; + msg->ack = 0; + msg->flags = 0; + + if (filter) { + msg->len = sizeof(struct proc_input); + ((struct proc_input *)msg->data)->mcast_op = + ((struct proc_input *)pinp)->mcast_op; + ((struct proc_input *)msg->data)->event_type = + ((struct proc_input *)pinp)->event_type; + } else { + msg->len = sizeof(int); + *(int *)msg->data = *(enum proc_cn_mcast_op *)pinp; + } + + if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) { + ret_errno = errno; + perror("send failed"); + return -3; + } + return 0; +} + +int register_proc_netlink(int *efd, void *input) +{ + struct sockaddr_nl sa_nl; + int err = 0, epoll_fd; + + nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); + + if (nl_sock == -1) { + ret_errno = errno; + perror("socket failed"); + return -1; + } + + bzero(&sa_nl, sizeof(sa_nl)); + sa_nl.nl_family = AF_NETLINK; + sa_nl.nl_groups = CN_IDX_PROC; + sa_nl.nl_pid = getpid(); + + if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) { + ret_errno = errno; + perror("bind failed"); + return -2; + } + + epoll_fd = epoll_create1(EPOLL_CLOEXEC); + if (epoll_fd < 0) { + ret_errno = errno; + perror("epoll_create1 failed"); + return -2; + } + + err = send_message(input); + + if (err < 0) + return err; + + evn.events = EPOLLIN; + evn.data.fd = nl_sock; + if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, nl_sock, &evn) < 0) { + ret_errno = errno; + perror("epoll_ctl failed"); + return -3; + } + *efd = epoll_fd; + return 0; +} + +static void sigint(int sig) +{ + interrupted = 1; +} + +int handle_packet(char *buff, int fd, struct proc_event *event) +{ + struct nlmsghdr *hdr; + + hdr = (struct nlmsghdr *)buff; + + if (hdr->nlmsg_type == NLMSG_ERROR) { + perror("NLMSG_ERROR error\n"); + return -3; + } else if (hdr->nlmsg_type == NLMSG_DONE) { + event = (struct proc_event *) + ((struct cn_msg *)NLMSG_DATA(hdr))->data; + tcount++; + switch (event->what) { + case PROC_EVENT_EXIT: + Printf("Exit process %d (tgid %d) with code %d, signal %d\n", + event->event_data.exit.process_pid, + event->event_data.exit.process_tgid, + event->event_data.exit.exit_code, + event->event_data.exit.exit_signal); + break; + case PROC_EVENT_FORK: + Printf("Fork process %d (tgid %d), parent %d (tgid %d)\n", + event->event_data.fork.child_pid, + event->event_data.fork.child_tgid, + event->event_data.fork.parent_pid, + event->event_data.fork.parent_tgid); + break; + case PROC_EVENT_EXEC: + Printf("Exec process %d (tgid %d)\n", + event->event_data.exec.process_pid, + event->event_data.exec.process_tgid); + break; + case PROC_EVENT_UID: + Printf("UID process %d (tgid %d) uid %d euid %d\n", + event->event_data.id.process_pid, + event->event_data.id.process_tgid, + event->event_data.id.r.ruid, + event->event_data.id.e.euid); + break; + case PROC_EVENT_GID: + Printf("GID process %d (tgid %d) gid %d egid %d\n", + event->event_data.id.process_pid, + event->event_data.id.process_tgid, + event->event_data.id.r.rgid, + event->event_data.id.e.egid); + break; + case PROC_EVENT_SID: + Printf("SID process %d (tgid %d)\n", + event->event_data.sid.process_pid, + event->event_data.sid.process_tgid); + break; + case PROC_EVENT_PTRACE: + Printf("Ptrace process %d (tgid %d), Tracer %d (tgid %d)\n", + event->event_data.ptrace.process_pid, + event->event_data.ptrace.process_tgid, + event->event_data.ptrace.tracer_pid, + event->event_data.ptrace.tracer_tgid); + break; + case PROC_EVENT_COMM: + Printf("Comm process %d (tgid %d) comm %s\n", + event->event_data.comm.process_pid, + event->event_data.comm.process_tgid, + event->event_data.comm.comm); + break; + case PROC_EVENT_COREDUMP: + Printf("Coredump process %d (tgid %d) parent %d, (tgid %d)\n", + event->event_data.coredump.process_pid, + event->event_data.coredump.process_tgid, + event->event_data.coredump.parent_pid, + event->event_data.coredump.parent_tgid); + break; + default: + break; + } + } + return 0; +} + +int handle_events(int epoll_fd, struct proc_event *pev) +{ + char buff[CONNECTOR_MAX_MSG_SIZE]; + struct epoll_event ev[MAX_EVENTS]; + int i, event_count = 0, err = 0; + + event_count = epoll_wait(epoll_fd, ev, MAX_EVENTS, -1); + if (event_count < 0) { + ret_errno = errno; + if (ret_errno != EINTR) + perror("epoll_wait failed"); + return -3; + } + for (i = 0; i < event_count; i++) { + if (!(ev[i].events & EPOLLIN)) + continue; + if (recv(ev[i].data.fd, buff, sizeof(buff), 0) == -1) { + ret_errno = errno; + perror("recv failed"); + return -3; + } + err = handle_packet(buff, ev[i].data.fd, pev); + if (err < 0) + return err; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + int epoll_fd, err; + struct proc_event proc_ev; + struct proc_input input; + + signal(SIGINT, sigint); + + if (argc > 2) { + printf("Expected 0(assume no-filter) or 1 argument(-f)\n"); + exit(KSFT_SKIP); + } + + if (argc == 2) { + if (strcmp(argv[1], "-f") == 0) { + filter = 1; + } else { + printf("Valid option : -f (for filter feature)\n"); + exit(KSFT_SKIP); + } + } + + if (filter) { + input.event_type = PROC_EVENT_NONZERO_EXIT; + input.mcast_op = PROC_CN_MCAST_LISTEN; + err = register_proc_netlink(&epoll_fd, (void*)&input); + } else { + enum proc_cn_mcast_op op = PROC_CN_MCAST_LISTEN; + err = register_proc_netlink(&epoll_fd, (void*)&op); + } + + if (err < 0) { + if (err == -2) + close(nl_sock); + if (err == -3) { + close(nl_sock); + close(epoll_fd); + } + exit(1); + } + + while (!interrupted) { + err = handle_events(epoll_fd, &proc_ev); + if (err < 0) { + if (ret_errno == EINTR) + continue; + if (err == -2) + close(nl_sock); + if (err == -3) { + close(nl_sock); + close(epoll_fd); + } + exit(1); + } + } + + if (filter) { + input.mcast_op = PROC_CN_MCAST_IGNORE; + send_message((void*)&input); + } else { + enum proc_cn_mcast_op op = PROC_CN_MCAST_IGNORE; + send_message((void*)&op); + } + + close(epoll_fd); + close(nl_sock); + + printf("Done total count: %d\n", tcount); + exit(0); +} diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config index 75e900793e8a..ce5068f5a6a2 100644 --- a/tools/testing/selftests/cpufreq/config +++ b/tools/testing/selftests/cpufreq/config @@ -5,11 +5,3 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_GOV_ONDEMAND=y CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y -CONFIG_DEBUG_RT_MUTEXES=y -CONFIG_DEBUG_PLIST=y -CONFIG_DEBUG_SPINLOCK=y -CONFIG_DEBUG_MUTEXES=y -CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_LOCKING=y -CONFIG_LOCKDEP=y -CONFIG_DEBUG_ATOMIC_SLEEP=y diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config new file mode 100644 index 000000000000..0daf38974eb0 --- /dev/null +++ b/tools/testing/selftests/damon/config @@ -0,0 +1,7 @@ +CONFIG_DAMON=y +CONFIG_DAMON_SYSFS=y +CONFIG_DAMON_DBGFS=y +CONFIG_DAMON_PADDR=y +CONFIG_DAMON_VADDR=y +CONFIG_DAMON_RECLAIM=y +CONFIG_DAMON_LRU_SORT=y diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh index bcd4734ca094..60a9a305aef0 100644 --- a/tools/testing/selftests/damon/sysfs.sh +++ b/tools/testing/selftests/damon/sysfs.sh @@ -84,6 +84,7 @@ test_tried_regions() { tried_regions_dir=$1 ensure_dir "$tried_regions_dir" "exist" + ensure_file "$tried_regions_dir/total_bytes" "exist" "400" } test_stats() @@ -102,9 +103,14 @@ test_filter() ensure_file "$filter_dir/type" "exist" "600" ensure_write_succ "$filter_dir/type" "anon" "valid input" ensure_write_succ "$filter_dir/type" "memcg" "valid input" + ensure_write_succ "$filter_dir/type" "addr" "valid input" + ensure_write_succ "$filter_dir/type" "target" "valid input" ensure_write_fail "$filter_dir/type" "foo" "invalid input" ensure_file "$filter_dir/matching" "exist" "600" ensure_file "$filter_dir/memcg_path" "exist" "600" + ensure_file "$filter_dir/addr_start" "exist" "600" + ensure_file "$filter_dir/addr_end" "exist" "600" + ensure_file "$filter_dir/damon_target_idx" "exist" "600" } test_filters() diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 03f92d7aeb19..8a72bb7de70f 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -9,10 +9,12 @@ TEST_PROGS := \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ bond_options.sh \ - bond-eth-type-change.sh + bond-eth-type-change.sh \ + bond_macvlan.sh TEST_FILES := \ lag_lib.sh \ + bond_topo_2d1c.sh \ bond_topo_3d1c.sh \ net_forwarding_lib.sh diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh index 71c00bfafbc9..4917dbb35a44 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh @@ -11,7 +11,6 @@ finish() { ip netns delete server || true ip netns delete client || true - ip link del link1_1 || true } trap finish EXIT @@ -23,14 +22,12 @@ server_ip4=192.168.1.254 echo 180 >/proc/sys/kernel/panic # build namespaces -ip link add dev link1_1 type veth peer name link1_2 - ip netns add "server" -ip link set dev link1_2 netns server up name eth0 +ip netns add "client" +ip -n client link add eth0 type veth peer name eth0 netns server +ip netns exec server ip link set dev eth0 up ip netns exec server ip addr add ${server_ip4}/24 dev eth0 -ip netns add "client" -ip link set dev link1_1 netns client down name eth0 ip netns exec client ip link add dev bond0 down type bond mode 1 \ miimon 100 all_slaves_active 1 ip netns exec client ip link set dev eth0 down master bond0 diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh index 47ab90596acb..6358df5752f9 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh @@ -57,8 +57,8 @@ ip link add name veth2-bond type veth peer name veth2-end # add ports ip link set fbond master fab-br0 -ip link set veth1-bond down master fbond -ip link set veth2-bond down master fbond +ip link set veth1-bond master fbond +ip link set veth2-bond master fbond # bring up ip link set veth1-end up diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh index 5cdd22048ba7..862e947e17c7 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh @@ -53,7 +53,6 @@ bond_test_enslave_type_change() # restore ARPHRD_ETHER type by enslaving such device ip link set dev "$devbond2" master "$devbond0" check_err $? "could not enslave $devbond2 to $devbond0" - ip link set dev "$devbond1" nomaster bond_check_flags "$devbond0" diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh new file mode 100755 index 000000000000..b609fb6231f4 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh @@ -0,0 +1,99 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test macvlan over balance-alb + +lib_dir=$(dirname "$0") +source ${lib_dir}/bond_topo_2d1c.sh + +m1_ns="m1-$(mktemp -u XXXXXX)" +m2_ns="m1-$(mktemp -u XXXXXX)" +m1_ip4="192.0.2.11" +m1_ip6="2001:db8::11" +m2_ip4="192.0.2.12" +m2_ip6="2001:db8::12" + +cleanup() +{ + ip -n ${m1_ns} link del macv0 + ip netns del ${m1_ns} + ip -n ${m2_ns} link del macv0 + ip netns del ${m2_ns} + + client_destroy + server_destroy + gateway_destroy +} + +check_connection() +{ + local ns=${1} + local target=${2} + local message=${3:-"macvlan_over_bond"} + RET=0 + + + ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null + check_err $? "ping failed" + log_test "$mode: $message" +} + +macvlan_over_bond() +{ + local param="$1" + RET=0 + + # setup new bond mode + bond_reset "${param}" + + ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge + ip -n ${s_ns} link set macv0 netns ${m1_ns} + ip -n ${m1_ns} link set dev macv0 up + ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0 + ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0 + + ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge + ip -n ${s_ns} link set macv0 netns ${m2_ns} + ip -n ${m2_ns} link set dev macv0 up + ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0 + ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0 + + sleep 2 + + check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server" + check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server" + check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1" + check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1" + check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2" + check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2" + check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2" + check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2" + + + sleep 5 + + check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client" + check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client" + check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client" + check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client" + check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client" + check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client" + check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2" + check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2" + + ip -n ${c_ns} neigh flush dev eth0 +} + +trap cleanup EXIT + +setup_prepare +ip netns add ${m1_ns} +ip netns add ${m2_ns} + +modes="active-backup balance-tlb balance-alb" + +for mode in $modes; do + macvlan_over_bond "mode $mode" +done + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index 607ba5c38977..c54d1697f439 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -9,10 +9,7 @@ ALL_TESTS=" num_grat_arp " -REQUIRE_MZ=no -NUM_NETIFS=0 lib_dir=$(dirname "$0") -source ${lib_dir}/net_forwarding_lib.sh source ${lib_dir}/bond_topo_3d1c.sh skip_prio() diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh new file mode 100644 index 000000000000..a509ef949dcf --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -0,0 +1,158 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Topology for Bond mode 1,5,6 testing +# +# +-------------------------+ +# | bond0 | Server +# | + | 192.0.2.1/24 +# | eth0 | eth1 | 2001:db8::1/24 +# | +---+---+ | +# | | | | +# +-------------------------+ +# | | +# +-------------------------+ +# | | | | +# | +---+-------+---+ | Gateway +# | | br0 | | 192.0.2.254/24 +# | +-------+-------+ | 2001:db8::254/24 +# | | | +# +-------------------------+ +# | +# +-------------------------+ +# | | | Client +# | + | 192.0.2.10/24 +# | eth0 | 2001:db8::10/24 +# +-------------------------+ + +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source ${lib_dir}/net_forwarding_lib.sh + +s_ns="s-$(mktemp -u XXXXXX)" +c_ns="c-$(mktemp -u XXXXXX)" +g_ns="g-$(mktemp -u XXXXXX)" +s_ip4="192.0.2.1" +c_ip4="192.0.2.10" +g_ip4="192.0.2.254" +s_ip6="2001:db8::1" +c_ip6="2001:db8::10" +g_ip6="2001:db8::254" + +gateway_create() +{ + ip netns add ${g_ns} + ip -n ${g_ns} link add br0 type bridge + ip -n ${g_ns} link set br0 up + ip -n ${g_ns} addr add ${g_ip4}/24 dev br0 + ip -n ${g_ns} addr add ${g_ip6}/24 dev br0 +} + +gateway_destroy() +{ + ip -n ${g_ns} link del br0 + ip netns del ${g_ns} +} + +server_create() +{ + ip netns add ${s_ns} + ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100 + + for i in $(seq 0 1); do + ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + + ip -n ${g_ns} link set s${i} up + ip -n ${g_ns} link set s${i} master br0 + ip -n ${s_ns} link set eth${i} master bond0 + + tc -n ${g_ns} qdisc add dev s${i} clsact + done + + ip -n ${s_ns} link set bond0 up + ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 + ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 + sleep 2 +} + +# Reset bond with new mode and options +bond_reset() +{ + # Count the eth link number in real-time as this function + # maybe called from other topologies. + local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth") + local param="$1" + link_num=$((link_num -1)) + + ip -n ${s_ns} link set bond0 down + ip -n ${s_ns} link del bond0 + + ip -n ${s_ns} link add bond0 type bond $param + for i in $(seq 0 ${link_num}); do + ip -n ${s_ns} link set eth$i master bond0 + done + + ip -n ${s_ns} link set bond0 up + ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 + ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 + sleep 2 +} + +server_destroy() +{ + # Count the eth link number in real-time as this function + # maybe called from other topologies. + local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth") + link_num=$((link_num -1)) + for i in $(seq 0 ${link_num}); do + ip -n ${s_ns} link del eth${i} + done + ip netns del ${s_ns} +} + +client_create() +{ + ip netns add ${c_ns} + ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns} + + ip -n ${g_ns} link set c0 up + ip -n ${g_ns} link set c0 master br0 + + ip -n ${c_ns} link set eth0 up + ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0 + ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0 +} + +client_destroy() +{ + ip -n ${c_ns} link del eth0 + ip netns del ${c_ns} +} + +setup_prepare() +{ + gateway_create + server_create + client_create +} + +cleanup() +{ + pre_cleanup + + client_destroy + server_destroy + gateway_destroy +} + +bond_check_connection() +{ + local msg=${1:-"check connection"} + + sleep 2 + ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null + check_err $? "${msg}: ping failed" + ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null + check_err $? "${msg}: ping6 failed" +} diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh index 69ab99a56043..3a1333d9a85b 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh @@ -25,121 +25,19 @@ # | eth0 | 2001:db8::10/24 # +-------------------------------------+ -s_ns="s-$(mktemp -u XXXXXX)" -c_ns="c-$(mktemp -u XXXXXX)" -g_ns="g-$(mktemp -u XXXXXX)" -s_ip4="192.0.2.1" -c_ip4="192.0.2.10" -g_ip4="192.0.2.254" -s_ip6="2001:db8::1" -c_ip6="2001:db8::10" -g_ip6="2001:db8::254" - -gateway_create() -{ - ip netns add ${g_ns} - ip -n ${g_ns} link add br0 type bridge - ip -n ${g_ns} link set br0 up - ip -n ${g_ns} addr add ${g_ip4}/24 dev br0 - ip -n ${g_ns} addr add ${g_ip6}/24 dev br0 -} - -gateway_destroy() -{ - ip -n ${g_ns} link del br0 - ip netns del ${g_ns} -} - -server_create() -{ - ip netns add ${s_ns} - ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100 - - for i in $(seq 0 2); do - ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} - - ip -n ${g_ns} link set s${i} up - ip -n ${g_ns} link set s${i} master br0 - ip -n ${s_ns} link set eth${i} master bond0 - - tc -n ${g_ns} qdisc add dev s${i} clsact - done - - ip -n ${s_ns} link set bond0 up - ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 - ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 - sleep 2 -} - -# Reset bond with new mode and options -bond_reset() -{ - local param="$1" - - ip -n ${s_ns} link set bond0 down - ip -n ${s_ns} link del bond0 - - ip -n ${s_ns} link add bond0 type bond $param - for i in $(seq 0 2); do - ip -n ${s_ns} link set eth$i master bond0 - done - - ip -n ${s_ns} link set bond0 up - ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 - ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 - sleep 2 -} - -server_destroy() -{ - for i in $(seq 0 2); do - ip -n ${s_ns} link del eth${i} - done - ip netns del ${s_ns} -} - -client_create() -{ - ip netns add ${c_ns} - ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns} - - ip -n ${g_ns} link set c0 up - ip -n ${g_ns} link set c0 master br0 - - ip -n ${c_ns} link set eth0 up - ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0 - ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0 -} - -client_destroy() -{ - ip -n ${c_ns} link del eth0 - ip netns del ${c_ns} -} +source bond_topo_2d1c.sh setup_prepare() { gateway_create server_create client_create -} - -cleanup() -{ - pre_cleanup - - client_destroy - server_destroy - gateway_destroy -} - -bond_check_connection() -{ - local msg=${1:-"check connection"} - sleep 2 - ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null - check_err $? "${msg}: ping failed" - ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null - check_err $? "${msg}: ping6 failed" + # Add the extra device as we use 3 down links for bond0 + local i=2 + ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns} + ip -n ${g_ns} link set s${i} up + ip -n ${g_ns} link set s${i} master br0 + ip -n ${s_ns} link set eth${i} master bond0 + tc -n ${g_ns} qdisc add dev s${i} clsact } diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh index 0cf9e47e3209..a5c2aec52898 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh @@ -16,10 +16,9 @@ # +----------------|--+ +--|-----------------+ # | | # +----------------|-------------------------|-----------------+ -# | SW | | | +# | SW $swp1 + + $swp2 | +# | | | | # | +--------------|-------------------------|---------------+ | -# | | $swp1 + + $swp2 | | -# | | | | | | # | | $swp1.10 + + $swp2.10 | | # | | | | # | | br0 | | diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh index 7a0a99c1d22f..6fd422d38fe8 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh @@ -35,7 +35,9 @@ netdev_pre_up_test() { RET=0 - ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link add name vx1 up type vxlan id 1000 \ local 192.0.2.17 remote 192.0.2.18 \ dstport 4789 nolearning noudpcsum tos inherit ttl 100 @@ -46,7 +48,9 @@ netdev_pre_up_test() ip link set dev $swp1 master br1 check_err $? - ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0 + ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0 + ip link set dev br2 addrgenmode none + ip link set dev br2 up ip link add name vx2 up type vxlan id 2000 \ local 192.0.2.17 remote 192.0.2.18 \ dstport 4789 nolearning noudpcsum tos inherit ttl 100 @@ -81,7 +85,9 @@ vxlan_vlan_add_test() { RET=0 - ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up # Unsupported configuration: mlxsw demands VXLAN with "noudpcsum". ip link add name vx1 up type vxlan id 1000 \ @@ -117,7 +123,9 @@ vxlan_bridge_create_test() dstport 4789 tos inherit ttl 100 # Test with VLAN-aware bridge. - ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0 + ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev vx1 master br1 @@ -142,8 +150,12 @@ bridge_create_test() { RET=0 - ip link add name br1 up type bridge vlan_filtering 1 - ip link add name br2 up type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up + ip link add name br2 type bridge vlan_filtering 1 + ip link set dev br2 addrgenmode none + ip link set dev br2 up ip link set dev $swp1 master br1 check_err $? diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh index df2b09966886..7d7f862c809c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh @@ -15,10 +15,9 @@ # +----------------|--+ +--|-----------------+ # | | # +----------------|-------------------------|-----------------+ -# | SW | | | +# | SW $swp1 + + $swp2 | +# | | | | # | +--------------|-------------------------|---------------+ | -# | | $swp1 + + $swp2 | | -# | | | | | | # | | $swp1.10 + + $swp2.10 | | # | | | | # | | br0 | | diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index e00435753008..e5589e2fca85 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -165,6 +165,7 @@ mirror_gre_setup_prepare() simple_if_init $h3 ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh index f02d83e94576..fca0e1e642c6 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh @@ -83,7 +83,8 @@ h2_destroy() switch_create() { - ip link add name br0 type bridge mcast_snooping 0 + ip link add name br0 address $(mac_get $swp1) \ + type bridge mcast_snooping 0 ip link set dev br0 up ip link set dev $swp1 master br0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh new file mode 100755 index 000000000000..b1f0781f6b25 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test that filters that match on the same port range, but with different +# combination of IPv4/IPv6 and TCP/UDP all use the same port range register by +# observing port range registers' occupancy via devlink-resource. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + port_range_occ_test +" +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + h1_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h1_destroy + + vrf_cleanup +} + +port_range_occ_get() +{ + devlink_resource_occ_get port_range_registers +} + +port_range_occ_test() +{ + RET=0 + + local occ=$(port_range_occ_get) + + # Two port range registers are used, for source and destination port + # ranges. + tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \ + flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \ + action pass + (( occ + 2 == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))" + + tc filter add dev $swp1 ingress pref 1 handle 102 proto ip \ + flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \ + action pass + tc filter add dev $swp1 ingress pref 2 handle 103 proto ipv6 \ + flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \ + action pass + tc filter add dev $swp1 ingress pref 2 handle 104 proto ipv6 \ + flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \ + action pass + (( occ + 2 == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))" + + tc filter del dev $swp1 ingress pref 2 handle 104 flower + tc filter del dev $swp1 ingress pref 2 handle 103 flower + tc filter del dev $swp1 ingress pref 1 handle 102 flower + (( occ + 2 == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))" + + tc filter del dev $swp1 ingress pref 1 handle 101 flower + (( occ == $(port_range_occ_get) )) + check_err $? "Got occupancy $(port_range_occ_get), expected $occ" + + log_test "port range occupancy" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh new file mode 100644 index 000000000000..2a70840ff14b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh @@ -0,0 +1,95 @@ +# SPDX-License-Identifier: GPL-2.0 + +PORT_RANGE_NUM_NETIFS=2 + +port_range_h1_create() +{ + simple_if_init $h1 +} + +port_range_h1_destroy() +{ + simple_if_fini $h1 +} + +port_range_switch_create() +{ + simple_if_init $swp1 + tc qdisc add dev $swp1 clsact +} + +port_range_switch_destroy() +{ + tc qdisc del dev $swp1 clsact + simple_if_fini $swp1 +} + +port_range_rules_create() +{ + local count=$1; shift + local should_fail=$1; shift + local batch_file="$(mktemp)" + + for ((i = 0; i < count; ++i)); do + cat >> $batch_file <<-EOF + filter add dev $swp1 ingress \ + prot ipv4 \ + pref 1000 \ + flower skip_sw \ + ip_proto udp dst_port 1-$((100 + i)) \ + action pass + EOF + done + + tc -b $batch_file + check_err_fail $should_fail $? "Rule insertion" + + rm -f $batch_file +} + +__port_range_test() +{ + local count=$1; shift + local should_fail=$1; shift + + port_range_rules_create $count $should_fail + + offload_count=$(tc -j filter show dev $swp1 ingress | + jq "[.[] | select(.options.in_hw == true)] | length") + ((offload_count == count)) + check_err_fail $should_fail $? "port range offload count" +} + +port_range_test() +{ + local count=$1; shift + local should_fail=$1; shift + + if ! tc_offload_check $PORT_RANGE_NUM_NETIFS; then + check_err 1 "Could not test offloaded functionality" + return + fi + + __port_range_test $count $should_fail +} + +port_range_setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + vrf_prepare + + port_range_h1_create + port_range_switch_create +} + +port_range_cleanup() +{ + pre_cleanup + + port_range_switch_destroy + port_range_h1_destroy + + vrf_cleanup +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh index 7edaed8eb86a..00d55b0e98c1 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh @@ -48,6 +48,7 @@ create_vlan_upper_on_top_of_bridge() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link set dev $swp1 master br0 @@ -88,6 +89,7 @@ create_8021ad_vlan_upper_on_top_bridge_port() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev $swp1 master br0 ip link set dev br0 up @@ -155,6 +157,7 @@ create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link set dev $swp1 master br0 @@ -177,6 +180,7 @@ create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link add name bond1 type bond mode 802.3ad @@ -203,6 +207,7 @@ enslave_front_panel_with_vlan_upper_to_8021ad_bridge() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link add name $swp1.100 link $swp1 type vlan id 100 @@ -225,6 +230,7 @@ enslave_lag_with_vlan_upper_to_8021ad_bridge() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link add name bond1 type bond mode 802.3ad @@ -252,6 +258,7 @@ add_ip_address_to_8021ad_bridge() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link set dev $swp1 master br0 @@ -273,6 +280,7 @@ switch_bridge_protocol_from_8021q_to_8021ad() ip link add dev br0 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link set dev $swp1 master br0 diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh index 87c41f5727c9..914c63d6318a 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh @@ -65,6 +65,7 @@ h2_destroy() switch_create() { ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 ip link set dev $swp1 up diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index 690d8daa71b4..fee74f215cec 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -138,11 +138,15 @@ switch_create() vlan_create $swp3 111 vlan_create $swp3 222 - ip link add name br111 up type bridge vlan_filtering 0 + ip link add name br111 type bridge vlan_filtering 0 + ip link set dev br111 addrgenmode none + ip link set dev br111 up ip link set dev $swp1.111 master br111 ip link set dev $swp3.111 master br111 - ip link add name br222 up type bridge vlan_filtering 0 + ip link add name br222 type bridge vlan_filtering 0 + ip link set dev br222 addrgenmode none + ip link set dev br222 up ip link set dev $swp2.222 master br222 ip link set dev $swp3.222 master br222 diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index c8e55fa91660..6d892de43fa8 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -135,11 +135,13 @@ switch_create() prio bands 8 priomap 7 7 7 7 7 7 7 7 ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 ip link set dev $swp3 master br1 ip link add name br111 type bridge vlan_filtering 0 + ip link set dev br111 addrgenmode none ip link set dev br111 up ip link set dev $swp2.111 master br111 ip link set dev $swp3.111 master br111 diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh new file mode 100755 index 000000000000..b79542a4dcc7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh @@ -0,0 +1,183 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + bridge_rif_add + bridge_rif_nomaster + bridge_rif_remaster + bridge_rif_nomaster_addr + bridge_rif_nomaster_port + bridge_rif_remaster_port +" + +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $swp1) + + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp2) + + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 address $(mac_get lag1) + ip link set dev br1 up + + ip link set dev lag1 master br1 + + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + + ip link set dev $swp2 master lag2 + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev lag2 + ip link set dev lag1 nomaster + ip link del dev lag1 + + ip link del dev br1 +} + +bridge_rif_add() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del br1 add 192.0.2.2/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for bridge on address addition" +} + +bridge_rif_nomaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev lag1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIF for bridge on LAG deslavement" +} + +bridge_rif_remaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev lag1 master br1 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for bridge on LAG reenslavement" +} + +bridge_rif_nomaster_addr() +{ + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + + # Adding an address while the LAG is enslaved shouldn't generate a RIF. + __addr_add_del lag1 add 192.0.2.65/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + # Removing the LAG from the bridge should drop RIF for the bridge (as + # tested in bridge_rif_lag_nomaster), but since the LAG now has an + # address, it should gain a RIF. + ip link set dev lag1 nomaster + sleep 1 + local rifs_occ_t2=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t2)) + check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used" + + log_test "Add RIF for LAG on deslavement from bridge" + + __addr_add_del lag1 del 192.0.2.65/28 + ip link set dev lag1 master br1 + sleep 1 +} + +bridge_rif_nomaster_port() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIF for bridge on deslavement of port from LAG" +} + +bridge_rif_remaster_port() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 down + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + setup_wait_dev $swp1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for bridge on reenslavement of port to LAG" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh new file mode 100755 index 000000000000..e28f978104f3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + lag_rif_add + lag_rif_nomaster + lag_rif_remaster + lag_rif_nomaster_addr +" + +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $swp1) + + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp2) + + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + + ip link set dev $swp2 master lag2 + ip link set dev $swp2 up +} + +cleanup() +{ + pre_cleanup + + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev lag2 + ip link del dev lag1 +} + +lag_rif_add() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del lag1 add 192.0.2.2/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for LAG on address addition" +} + +lag_rif_nomaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIF for LAG on port deslavement" +} + +lag_rif_remaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 down + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + setup_wait_dev $swp1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 1)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIF for LAG on port reenslavement" +} + +lag_rif_nomaster_addr() +{ + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + + # Adding an address while the port is LAG'd shouldn't generate a RIF. + __addr_add_del $swp1 add 192.0.2.65/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + # Removing the port from LAG should drop RIF for the LAG (as tested in + # lag_rif_nomaster), but since the port now has an address, it should + # gain a RIF. + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t2=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t2)) + check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used" + + __addr_add_del $swp1 del 192.0.2.65/28 + log_test "Add RIF for port on deslavement from LAG" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh new file mode 100755 index 000000000000..6318cfa6434c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh @@ -0,0 +1,146 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + lag_rif_add + lag_rif_nomaster + lag_rif_remaster + lag_rif_nomaster_addr +" + +NUM_NETIFS=2 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $swp1) + + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp2) + + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + + ip link set dev $swp2 master lag2 + ip link set dev $swp2 up + + vlan_create lag1 100 + ip link set dev lag1.100 addrgenmode none + + vlan_create lag1 200 + ip link set dev lag1.200 addrgenmode none +} + +cleanup() +{ + pre_cleanup + + ip link del dev lag1.200 + ip link del dev lag1.100 + + ip link set dev $swp2 nomaster + ip link set dev $swp2 down + + ip link set dev $swp1 nomaster + ip link set dev $swp1 down + + ip link del dev lag2 + ip link del dev lag1 +} + +lag_rif_add() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + __addr_add_del lag1.100 add 192.0.2.2/28 + __addr_add_del lag1.200 add 192.0.2.18/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 2)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIFs for LAG VLANs on address addition" +} + +lag_rif_nomaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 2)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Drop RIFs for LAG VLANs on port deslavement" +} + +lag_rif_remaster() +{ + RET=0 + + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + ip link set dev $swp1 down + ip link set dev $swp1 master lag1 + ip link set dev $swp1 up + setup_wait_dev $swp1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 + 2)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + log_test "Add RIFs for LAG VLANs on port reenslavement" +} + +lag_rif_nomaster_addr() +{ + local rifs_occ_t0=$(devlink_resource_occ_get rifs) + + # Adding an address while the port is LAG'd shouldn't generate a RIF. + __addr_add_del $swp1 add 192.0.2.65/28 + sleep 1 + local rifs_occ_t1=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0)) + + ((expected_rifs == rifs_occ_t1)) + check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used" + + # Removing the port from LAG should drop two RIFs for the LAG VLANs (as + # tested in lag_rif_nomaster), but since the port now has an address, it + # should gain a RIF. + ip link set dev $swp1 nomaster + sleep 1 + local rifs_occ_t2=$(devlink_resource_occ_get rifs) + local expected_rifs=$((rifs_occ_t0 - 1)) + + ((expected_rifs == rifs_occ_t2)) + check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used" + + __addr_add_del $swp1 del 192.0.2.65/28 + log_test "Add RIF for port on deslavement from LAG" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh new file mode 100755 index 000000000000..6ce317cfaf9b --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test enslavement to LAG with a clean slate. +# See $lib_dir/router_bridge_lag.sh for further details. + +ALL_TESTS=" + config_devlink_reload + config_enslave_h1 + config_enslave_h2 + config_enslave_h3 + config_enslave_h4 + config_enslave_swp1 + config_enslave_swp2 + config_enslave_swp3 + config_enslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 +" + +config_devlink_reload() +{ + log_info "Devlink reload" + devlink_reload +} + +config_enslave_h1() +{ + config_enslave $h1 lag1 +} + +config_enslave_h2() +{ + config_enslave $h2 lag4 +} + +config_enslave_h3() +{ + config_enslave $h3 lag4 +} + +config_enslave_h4() +{ + config_enslave $h4 lag1 +} + +lib_dir=$(dirname $0)/../../../net/forwarding +EXTRA_SOURCE="source $lib_dir/devlink_lib.sh" +source $lib_dir/router_bridge_lag.sh diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 5e89657857c7..893a693ad805 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -16,7 +16,6 @@ ALL_TESTS=" bridge_deletion_test bridge_vlan_flags_test vlan_1_test - lag_bridge_upper_test duplicate_vlans_test vlan_rif_refcount_test subport_rif_refcount_test @@ -211,33 +210,6 @@ vlan_1_test() ip link del dev $swp1.1 } -lag_bridge_upper_test() -{ - # Test that ports cannot be enslaved to LAG devices that have uppers - # and that failure is handled gracefully. See commit b3529af6bb0d - # ("spectrum: Reference count VLAN entries") for more details - RET=0 - - ip link add name bond1 type bond mode 802.3ad - - ip link add name br0 type bridge vlan_filtering 1 - ip link set dev bond1 master br0 - - ip link set dev $swp1 down - ip link set dev $swp1 master bond1 &> /dev/null - check_fail $? "managed to enslave port to lag when should not" - - # This might generate a trace, if we did not handle the failure - # correctly - ip -6 address add 2001:db8:1::1/64 dev $swp1 - ip -6 address del 2001:db8:1::1/64 dev $swp1 - - log_test "lag with bridge upper" - - ip link del dev br0 - ip link del dev bond1 -} - duplicate_vlans_test() { # Test that on a given port a VLAN is only used once. Either as VLAN @@ -510,9 +482,6 @@ vlan_interface_uppers_test() ip link set dev $swp1 master br0 ip link add link br0 name br0.10 type vlan id 10 - ip link add link br0.10 name macvlan0 \ - type macvlan mode private &> /dev/null - check_fail $? "managed to create a macvlan when should not" ip -6 address add 2001:db8:1::1/64 dev br0.10 ip link add link br0.10 name macvlan0 type macvlan mode private diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh index 7d9e73a43a49..0c47faff9274 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh @@ -98,12 +98,12 @@ sb_occ_etc_check() port_pool_test() { - local exp_max_occ=288 + local exp_max_occ=$(devlink_cell_size_get) local max_occ devlink sb occupancy clearmax $DEVLINK_DEV - $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ -t ip -q devlink sb occupancy snapshot $DEVLINK_DEV @@ -126,12 +126,12 @@ port_pool_test() port_tc_ip_test() { - local exp_max_occ=288 + local exp_max_occ=$(devlink_cell_size_get) local max_occ devlink sb occupancy clearmax $DEVLINK_DEV - $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ + $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \ -t ip -q devlink sb occupancy snapshot $DEVLINK_DEV @@ -154,16 +154,12 @@ port_tc_ip_test() port_tc_arp_test() { - local exp_max_occ=96 + local exp_max_occ=$(devlink_cell_size_get) local max_occ - if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then - exp_max_occ=144 - fi - devlink sb occupancy clearmax $DEVLINK_DEV - $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q + $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q devlink sb occupancy snapshot $DEVLINK_DEV diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh new file mode 120000 index 000000000000..bd670d9dc4e5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh @@ -0,0 +1 @@ +../spectrum/port_range_scale.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 688338bbeb97..a88d8a8c85f2 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -33,6 +33,7 @@ ALL_TESTS=" port rif_mac_profile rif_counter + port_range " for current_test in ${TESTS:-$ALL_TESTS}; do diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh new file mode 100644 index 000000000000..d0847e8ea270 --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 +source ../port_range_scale.sh + +port_range_get_target() +{ + local should_fail=$1; shift + local target + + target=$(devlink_resource_size_get port_range_registers) + + if ((! should_fail)); then + echo $target + else + echo $((target + 1)) + fi +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh index f0443b1b05b9..60753d46a2d4 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh @@ -34,6 +34,7 @@ create_vxlan_on_top_of_8021ad_bridge() ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br0 addrgenmode none ip link set dev br0 up ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \ diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 95d9f710a630..f981c957f097 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -30,6 +30,7 @@ ALL_TESTS=" port rif_mac_profile rif_counter + port_range " for current_test in ${TESTS:-$ALL_TESTS}; do diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh index 99a332b712f0..4687b0a7dffb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh @@ -444,8 +444,12 @@ offload_indication_setup_create() { # Create a simple setup with two bridges, each with a VxLAN device # and one local port - ip link add name br0 up type bridge mcast_snooping 0 - ip link add name br1 up type bridge mcast_snooping 0 + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name br1 type bridge mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 @@ -646,8 +650,12 @@ offload_indication_decap_route_test() RET=0 - ip link add name br0 up type bridge mcast_snooping 0 - ip link add name br1 up type bridge mcast_snooping 0 + ip link add name br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up + ip link add name br1 type bridge mcast_snooping 0 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev $swp1 master br0 ip link set dev $swp2 master br1 ip link set dev vxlan0 master br0 @@ -780,7 +788,9 @@ __offload_indication_join_vxlan_first() offload_indication_join_vxlan_first() { - ip link add dev br0 up type bridge mcast_snooping 0 + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 @@ -815,7 +825,9 @@ __offload_indication_join_vxlan_last() offload_indication_join_vxlan_last() { - ip link add dev br0 up type bridge mcast_snooping 0 + ip link add dev br0 type bridge mcast_snooping 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 @@ -842,6 +854,7 @@ sanitization_vlan_aware_test() RET=0 ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1 + ip link set dev br0 addrgenmode none ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 @@ -915,8 +928,10 @@ offload_indication_vlan_aware_setup_create() { # Create a simple setup with two VxLAN devices and a single VLAN-aware # bridge - ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \ + ip link add name br0 type bridge mcast_snooping 0 vlan_filtering 1 \ vlan_default_pvid 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up ip link set dev $swp1 master br0 @@ -1060,8 +1075,10 @@ offload_indication_vlan_aware_decap_route_test() offload_indication_vlan_aware_join_vxlan_first() { - ip link add dev br0 up type bridge mcast_snooping 0 \ + ip link add dev br0 type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 + ip link set dev br0 addrgenmode none + ip link set dev br0 up ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 @@ -1073,8 +1090,10 @@ offload_indication_vlan_aware_join_vxlan_first() offload_indication_vlan_aware_join_vxlan_last() { - ip link add dev br0 up type bridge mcast_snooping 0 \ + ip link add dev br0 type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 1 + ip link set dev br0 addrgenmode none + ip link set dev br0 up ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 @@ -1091,8 +1110,10 @@ offload_indication_vlan_aware_l3vni_test() RET=0 sysctl_set net.ipv6.conf.default.disable_ipv6 1 - ip link add dev br0 up type bridge mcast_snooping 0 \ + ip link add dev br0 type bridge mcast_snooping 0 \ vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br0 addrgenmode none + ip link set dev br0 up ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/fchmodat2/.gitignore index 57d296341304..82a4846cbc4b 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore +++ b/tools/testing/selftests/fchmodat2/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -srcu.h +/*_test diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile new file mode 100644 index 000000000000..71ec34bf1501 --- /dev/null +++ b/tools/testing/selftests/fchmodat2/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES) +TEST_GEN_PROGS := fchmodat2_test + +include ../lib.mk diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c new file mode 100644 index 000000000000..e0319417124d --- /dev/null +++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <syscall.h> +#include <unistd.h> + +#include "../kselftest.h" + +int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags) +{ + int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags); + + return ret >= 0 ? ret : -errno; +} + +int setup_testdir(void) +{ + int dfd, ret; + char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX"; + + /* Make the top-level directory. */ + if (!mkdtemp(dirname)) + ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__); + + dfd = open(dirname, O_PATH | O_DIRECTORY); + if (dfd < 0) + ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__); + + ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (ret < 0) + ksft_exit_fail_msg("%s: failed to create file in tmpdir\n", + __func__); + close(ret); + + ret = symlinkat("regfile", dfd, "symlink"); + if (ret < 0) + ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n", + __func__); + + return dfd; +} + +int expect_mode(int dfd, const char *filename, mode_t expect_mode) +{ + struct stat st; + int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW); + + if (ret) + ksft_exit_fail_msg("%s: %s: fstatat failed\n", + __func__, filename); + + return (st.st_mode == expect_mode); +} + +void test_regfile(void) +{ + int dfd, ret; + + dfd = setup_testdir(); + + ret = sys_fchmodat2(dfd, "regfile", 0640, 0); + + if (ret < 0) + ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__); + + if (!expect_mode(dfd, "regfile", 0100640)) + ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n", + __func__); + + ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW); + + if (ret < 0) + ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n", + __func__); + + if (!expect_mode(dfd, "regfile", 0100600)) + ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n", + __func__); + + ksft_test_result_pass("fchmodat2(regfile)\n"); +} + +void test_symlink(void) +{ + int dfd, ret; + + dfd = setup_testdir(); + + ret = sys_fchmodat2(dfd, "symlink", 0640, 0); + + if (ret < 0) + ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__); + + if (!expect_mode(dfd, "regfile", 0100640)) + ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n", + __func__); + + if (!expect_mode(dfd, "symlink", 0120777)) + ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n", + __func__); + + ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW); + + /* + * On certain filesystems (xfs or btrfs), chmod operation fails. So we + * first check the symlink target but if the operation fails we mark the + * test as skipped. + * + * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html + */ + if (ret == 0 && !expect_mode(dfd, "symlink", 0120600)) + ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n", + __func__); + + if (!expect_mode(dfd, "regfile", 0100640)) + ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n", + __func__); + + if (ret != 0) + ksft_test_result_skip("fchmodat2(symlink)\n"); + else + ksft_test_result_pass("fchmodat2(symlink)\n"); +} + +#define NUM_TESTS 2 + +int main(int argc, char **argv) +{ + ksft_print_header(); + ksft_set_plan(NUM_TESTS); + + test_regfile(); + test_symlink(); + + if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) + ksft_exit_fail(); + else + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/filelock/Makefile b/tools/testing/selftests/filelock/Makefile new file mode 100644 index 000000000000..478e82f8b464 --- /dev/null +++ b/tools/testing/selftests/filelock/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_PROGS := ofdlocks + +include ../lib.mk diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c new file mode 100644 index 000000000000..a55b79810ab2 --- /dev/null +++ b/tools/testing/selftests/filelock/ofdlocks.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <fcntl.h> +#include <assert.h> +#include <stdio.h> +#include <unistd.h> +#include <string.h> +#include "../kselftest.h" + +static int lock_set(int fd, struct flock *fl) +{ + int ret; + + fl->l_pid = 0; // needed for OFD locks + fl->l_whence = SEEK_SET; + ret = fcntl(fd, F_OFD_SETLK, fl); + if (ret) + perror("fcntl()"); + return ret; +} + +static int lock_get(int fd, struct flock *fl) +{ + int ret; + + fl->l_pid = 0; // needed for OFD locks + fl->l_whence = SEEK_SET; + ret = fcntl(fd, F_OFD_GETLK, fl); + if (ret) + perror("fcntl()"); + return ret; +} + +int main(void) +{ + int rc; + struct flock fl, fl2; + int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600); + int fd2 = open("/tmp/aa", O_RDONLY); + + unlink("/tmp/aa"); + assert(fd != -1); + assert(fd2 != -1); + ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2); + + /* Set some read lock */ + fl.l_type = F_RDLCK; + fl.l_start = 5; + fl.l_len = 3; + rc = lock_set(fd, &fl); + if (rc == 0) { + ksft_print_msg + ("[SUCCESS] set OFD read lock on first fd\n"); + } else { + ksft_print_msg("[FAIL] to set OFD read lock on first fd\n"); + return -1; + } + /* Make sure read locks do not conflict on different fds. */ + fl.l_type = F_RDLCK; + fl.l_start = 5; + fl.l_len = 1; + rc = lock_get(fd2, &fl); + if (rc != 0) + return -1; + if (fl.l_type != F_UNLCK) { + ksft_print_msg("[FAIL] read locks conflicted\n"); + return -1; + } + /* Make sure read/write locks do conflict on different fds. */ + fl.l_type = F_WRLCK; + fl.l_start = 5; + fl.l_len = 1; + rc = lock_get(fd2, &fl); + if (rc != 0) + return -1; + if (fl.l_type != F_UNLCK) { + ksft_print_msg + ("[SUCCESS] read and write locks conflicted\n"); + } else { + ksft_print_msg + ("[SUCCESS] read and write locks not conflicted\n"); + return -1; + } + /* Get info about the lock on first fd. */ + fl.l_type = F_UNLCK; + fl.l_start = 5; + fl.l_len = 1; + rc = lock_get(fd, &fl); + if (rc != 0) { + ksft_print_msg + ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n"); + return -1; + } + if (fl.l_type != F_UNLCK) { + ksft_print_msg + ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n", + fl.l_type, fl.l_pid, fl.l_len); + } else { + ksft_print_msg + ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n"); + return -1; + } + /* Try the same but by locking everything by len==0. */ + fl2.l_type = F_UNLCK; + fl2.l_start = 0; + fl2.l_len = 0; + rc = lock_get(fd, &fl2); + if (rc != 0) { + ksft_print_msg + ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n"); + return -1; + } + if (memcmp(&fl, &fl2, sizeof(fl))) { + ksft_print_msg + ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n", + fl.l_type, fl.l_pid, fl.l_len); + return -1; + } + ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n"); + /* Get info about the lock on second fd - no locks on it. */ + fl.l_type = F_UNLCK; + fl.l_start = 0; + fl.l_len = 0; + lock_get(fd2, &fl); + if (fl.l_type != F_UNLCK) { + ksft_print_msg + ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n"); + return -1; + } + return 0; +} diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh index 7f35dc3d15df..d61264d4795d 100755 --- a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh +++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh @@ -12,7 +12,7 @@ set -u set -o pipefail BASE_DIR="$(dirname $0)" -TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXX)" +TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXXXX)" IMG_PATH="${TMP_DIR}/fat.img" MNT_PATH="${TMP_DIR}/mnt" diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest index 2506621e75df..c778d4dcc17e 100755 --- a/tools/testing/selftests/ftrace/ftracetest +++ b/tools/testing/selftests/ftrace/ftracetest @@ -31,6 +31,9 @@ err_ret=1 # kselftest skip code is 4 err_skip=4 +# umount required +UMOUNT_DIR="" + # cgroup RT scheduling prevents chrt commands from succeeding, which # induces failures in test wakeup tests. Disable for the duration of # the tests. @@ -45,6 +48,9 @@ setup() { cleanup() { echo $sched_rt_runtime_orig > $sched_rt_runtime + if [ -n "${UMOUNT_DIR}" ]; then + umount ${UMOUNT_DIR} ||: + fi } errexit() { # message @@ -124,6 +130,7 @@ parse_opts() { # opts ;; --logdir|-l) LOG_DIR=$2 + LINK_PTR= shift 2 ;; *.tc) @@ -160,11 +167,13 @@ if [ -z "$TRACING_DIR" ]; then mount -t tracefs nodev /sys/kernel/tracing || errexit "Failed to mount /sys/kernel/tracing" TRACING_DIR="/sys/kernel/tracing" + UMOUNT_DIR=${TRACING_DIR} # If debugfs exists, then so does /sys/kernel/debug elif [ -d "/sys/kernel/debug" ]; then mount -t debugfs nodev /sys/kernel/debug || errexit "Failed to mount /sys/kernel/debug" TRACING_DIR="/sys/kernel/debug/tracing" + UMOUNT_DIR=${TRACING_DIR} else err_ret=$err_skip errexit "debugfs and tracefs are not configured in this kernel" @@ -181,7 +190,10 @@ fi TOP_DIR=`absdir $0` TEST_DIR=$TOP_DIR/test.d TEST_CASES=`find_testcases $TEST_DIR` -LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/ +LOG_TOP_DIR=$TOP_DIR/logs +LOG_DATE=`date +%Y%m%d-%H%M%S` +LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/ +LINK_PTR=$LOG_TOP_DIR/latest KEEP_LOG=0 KTAP=0 DEBUG=0 @@ -207,6 +219,10 @@ else LOG_FILE=$LOG_DIR/ftracetest.log mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR" date > $LOG_FILE + if [ "x-$LINK_PTR" != "x-" ]; then + unlink $LINK_PTR + ln -fs $LOG_DATE $LINK_PTR + fi fi # Define text colors @@ -301,7 +317,7 @@ ktaptest() { # result comment comment="# $comment" fi - echo $CASENO $result $INSTANCE$CASENAME $comment + echo $result $CASENO $INSTANCE$CASENAME $comment } eval_result() { # sigval diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc new file mode 100644 index 000000000000..63b76cf2a360 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Snapshot and tracing_cpumask +# requires: trace_marker tracing_cpumask snapshot +# flags: instance + +# This testcase is constrived to reproduce a problem that the cpu buffers +# become unavailable which is due to 'record_disabled' of array_buffer and +# max_buffer being messed up. + +# Store origin cpumask +ORIG_CPUMASK=`cat tracing_cpumask` + +# Stop tracing all cpu +echo 0 > tracing_cpumask + +# Take a snapshot of the main buffer +echo 1 > snapshot + +# Restore origin cpumask, note that there should be some cpus being traced +echo ${ORIG_CPUMASK} > tracing_cpumask + +# Set tracing on +echo 1 > tracing_on + +# Write a log into buffer +echo "test input 1" > trace_marker + +# Ensure the log writed so that cpu buffers are still available +grep -q "test input 1" trace +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc new file mode 100644 index 000000000000..b9c21a81d248 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc @@ -0,0 +1,78 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove probes with BTF arguments +# requires: dynamic_events "<argname>":README + +KPROBES= +FPROBES= +FIELDS= + +if grep -qF "p[:[<group>/][<event>]] <place> [<args>]" README ; then + KPROBES=yes +fi +if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; then + FPROBES=yes +fi +if grep -qF "<argname>[->field[->field|.field...]]" README ; then + FIELDS=yes +fi + +if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then + exit_unsupported +fi + +echo 0 > events/enable +echo > dynamic_events + +TP=kfree +TP2=kmem_cache_alloc +TP3=getname_flags +TP4=sched_wakeup + +if [ "$FPROBES" ] ; then +echo "f:fpevent $TP object" >> dynamic_events +echo "t:tpevent $TP ptr" >> dynamic_events + +grep -q "fpevent.*object=object" dynamic_events +grep -q "tpevent.*ptr=ptr" dynamic_events + +echo > dynamic_events + +echo "f:fpevent $TP "'$arg1' >> dynamic_events +grep -q "fpevent.*object=object" dynamic_events + +echo > dynamic_events + +echo "f:fpevent $TP "'$arg*' >> dynamic_events +echo "t:tpevent $TP "'$arg*' >> dynamic_events + +grep -q "fpevent.*object=object" dynamic_events +grep -q "tpevent.*ptr=ptr" dynamic_events +! grep -q "tpevent.*_data" dynamic_events +fi + +echo > dynamic_events + +if [ "$FIELDS" ] ; then +echo "t:tpevent ${TP2} obj_size=s->object_size" >> dynamic_events +echo "f:fpevent ${TP3}%return path=\$retval->name:string" >> dynamic_events +echo "t:tpevent2 ${TP4} p->se.group_node.next->prev" >> dynamic_events + +grep -q "tpevent .*obj_size=s->object_size" dynamic_events +grep -q "fpevent.*path=\$retval->name:string" dynamic_events +grep -q 'tpevent2 .*p->se.group_node.next->prev' dynamic_events + +echo > dynamic_events +fi + +if [ "$KPROBES" ] ; then +echo "p:kpevent $TP object" >> dynamic_events +grep -q "kpevent.*object=object" dynamic_events + +echo > dynamic_events + +echo "p:kpevent $TP "'$arg*' >> dynamic_events +grep -q "kpevent.*object=object" dynamic_events +fi + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc new file mode 100644 index 000000000000..dc25bcf4f9e2 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -0,0 +1,26 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove fprobe events +# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README + +echo 0 > events/enable +echo > dynamic_events + +PLACE=$FUNCTION_FORK + +echo "f:myevent1 $PLACE" >> dynamic_events +echo "f:myevent2 $PLACE%return" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +test -d events/fprobes/myevent1 +test -d events/fprobes/myevent2 + +echo "-:myevent2" >> dynamic_events + +grep -q myevent1 dynamic_events +! grep -q myevent2 dynamic_events + +echo > dynamic_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc new file mode 100644 index 000000000000..155792eaeee5 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc @@ -0,0 +1,27 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove tracepoint probe events +# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README + +echo 0 > events/enable +echo > dynamic_events + +TRACEPOINT1=kmem_cache_alloc +TRACEPOINT2=kmem_cache_free + +echo "t:myevent1 $TRACEPOINT1" >> dynamic_events +echo "t:myevent2 $TRACEPOINT2" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +test -d events/tracepoints/myevent1 +test -d events/tracepoints/myevent2 + +echo "-:myevent2" >> dynamic_events + +grep -q myevent1 dynamic_events +! grep -q myevent2 dynamic_events + +echo > dynamic_events + +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc new file mode 100644 index 000000000000..20e42c030095 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc @@ -0,0 +1,119 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Fprobe event parser error log check +# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events' +} + +case `uname -m` in +x86_64|i[3456]86) + REG=%ax ;; +aarch64) + REG=%x0 ;; +*) + REG=%r0 ;; +esac + +check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE +check_error 'f^1a111 vfs_read' # BAD_MAXACT +check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG + +check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent) +check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR +check_error 'f:^/bar vfs_read' # NO_GROUP_NAME +check_error 'f:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG + +check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME +check_error 'f:^ vfs_read' # NO_EVENT_NAME +check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG +check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME + +check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM + +check_error 'f vfs_read ^$arg10000' # BAD_ARG_NUM + +check_error 'f vfs_read $retval ^$arg1' # BAD_VAR +check_error 'f vfs_read ^$none_var' # BAD_VAR +check_error 'f vfs_read ^'$REG # BAD_VAR + +check_error 'f vfs_read ^@12345678abcde' # BAD_MEM_ADDR +check_error 'f vfs_read ^@+10' # FILE_ON_KPROBE + +grep -q "imm-value" README && \ +check_error 'f vfs_read arg1=\^x' # BAD_IMM +grep -q "imm-string" README && \ +check_error 'f vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE + +check_error 'f vfs_read ^+0@0)' # DEREF_NEED_BRACE +check_error 'f vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS +check_error 'f vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE + +if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then +check_error 'f vfs_read +0(^$comm)' # COMM_CANT_DEREF +fi + +check_error 'f vfs_read ^&1' # BAD_FETCH_ARG + + +# We've introduced this limitation with array support +if grep -q ' <type>\\\[<array-size>\\\]' README; then +check_error 'f vfs_read +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS? +check_error 'f vfs_read +0(@11):u8[10^' # ARRAY_NO_CLOSE +check_error 'f vfs_read +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX +check_error 'f vfs_read +0(@11):u8[^10a]' # BAD_ARRAY_NUM +check_error 'f vfs_read +0(@11):u8[^256]' # ARRAY_TOO_BIG +fi + +check_error 'f vfs_read @11:^unknown_type' # BAD_TYPE +check_error 'f vfs_read $stack0:^string' # BAD_STRING +check_error 'f vfs_read @11:^b10@a/16' # BAD_BITFIELD + +check_error 'f vfs_read ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG +check_error 'f vfs_read ^=@11' # NO_ARG_NAME +check_error 'f vfs_read ^var.1=@11' # BAD_ARG_NAME +check_error 'f vfs_read var1=@11 ^var1=@12' # USED_ARG_NAME +check_error 'f vfs_read ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG +check_error 'f vfs_read arg1=^' # NO_ARG_BODY + + +# multiprobe errors +if grep -q "Create/append/" README && grep -q "imm-value" README; then +echo "f:fprobes/testevent $FUNCTION_FORK" > dynamic_events +check_error '^f:fprobes/testevent do_exit%return' # DIFF_PROBE_TYPE + +# Explicitly use printf "%s" to not interpret \1 +printf "%s" "f:fprobes/testevent $FUNCTION_FORK abcd=\\1" > dynamic_events +check_error "f:fprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE +check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^f:fprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE +fi + +# %return suffix errors +check_error 'f vfs_read^%hoge' # BAD_ADDR_SUFFIX + +# BTF arguments errors +if grep -q "<argname>" README; then +check_error 'f vfs_read args=^$arg*' # BAD_VAR_ARGS +check_error 'f vfs_read +0(^$arg*)' # BAD_VAR_ARGS +check_error 'f vfs_read $arg* ^$arg*' # DOUBLE_ARGS +check_error 'f vfs_read%return ^$arg*' # NOFENTRY_ARGS +check_error 'f vfs_read ^hoge' # NO_BTFARG +check_error 'f kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) +check_error 'f kfree%return ^$retval' # NO_RETVAL + +if grep -qF "<argname>[->field[->field|.field...]]" README ; then +check_error 'f vfs_read%return $retval->^foo' # NO_PTR_STRCT +check_error 'f vfs_read file->^foo' # NO_BTF_FIELD +check_error 'f vfs_read file^-.foo' # BAD_HYPHEN +check_error 'f vfs_read ^file:string' # BAD_TYPE4STR +fi + +else +check_error 'f vfs_read ^$arg*' # NOSUP_BTFARG +check_error 't kfree ^$arg*' # NOSUP_BTFARG +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc new file mode 100644 index 000000000000..da117b8f1d12 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Tracepoint probe event parser error log check +# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events' +} + +check_error 't^100 kfree' # BAD_MAXACT_TYPE + +check_error 't ^non_exist_tracepoint' # NO_TRACEPOINT +check_error 't:^/bar kfree' # NO_GROUP_NAME +check_error 't:^12345678901234567890123456789012345678901234567890123456789012345/bar kfree' # GROUP_TOO_LONG + +check_error 't:^foo.1/bar kfree' # BAD_GROUP_NAME +check_error 't:^ kfree' # NO_EVENT_NAME +check_error 't:foo/^12345678901234567890123456789012345678901234567890123456789012345 kfree' # EVENT_TOO_LONG +check_error 't:foo/^bar.1 kfree' # BAD_EVENT_NAME + +check_error 't kfree ^$retval' # RETVAL_ON_PROBE +check_error 't kfree ^$stack10000' # BAD_STACK_NUM + +check_error 't kfree ^$arg10000' # BAD_ARG_NUM + +check_error 't kfree ^$none_var' # BAD_VAR +check_error 't kfree ^%rax' # BAD_VAR + +check_error 't kfree ^@12345678abcde' # BAD_MEM_ADDR +check_error 't kfree ^@+10' # FILE_ON_KPROBE + +grep -q "imm-value" README && \ +check_error 't kfree arg1=\^x' # BAD_IMM +grep -q "imm-string" README && \ +check_error 't kfree arg1=\"abcd^' # IMMSTR_NO_CLOSE + +check_error 't kfree ^+0@0)' # DEREF_NEED_BRACE +check_error 't kfree ^+0ab1(@0)' # BAD_DEREF_OFFS +check_error 't kfree +0(+0(@0^)' # DEREF_OPEN_BRACE + +if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then +check_error 't kfree +0(^$comm)' # COMM_CANT_DEREF +fi + +check_error 't kfree ^&1' # BAD_FETCH_ARG + + +# We've introduced this limitation with array support +if grep -q ' <type>\\\[<array-size>\\\]' README; then +check_error 't kfree +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS? +check_error 't kfree +0(@11):u8[10^' # ARRAY_NO_CLOSE +check_error 't kfree +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX +check_error 't kfree +0(@11):u8[^10a]' # BAD_ARRAY_NUM +check_error 't kfree +0(@11):u8[^256]' # ARRAY_TOO_BIG +fi + +check_error 't kfree @11:^unknown_type' # BAD_TYPE +check_error 't kfree $stack0:^string' # BAD_STRING +check_error 't kfree @11:^b10@a/16' # BAD_BITFIELD + +check_error 't kfree ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG +check_error 't kfree ^=@11' # NO_ARG_NAME +check_error 't kfree ^var.1=@11' # BAD_ARG_NAME +check_error 't kfree var1=@11 ^var1=@12' # USED_ARG_NAME +check_error 't kfree ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG +check_error 't kfree arg1=^' # NO_ARG_BODY + + +# multiprobe errors +if grep -q "Create/append/" README && grep -q "imm-value" README; then +echo "t:tracepoint/testevent kfree" > dynamic_events +check_error '^f:tracepoint/testevent kfree' # DIFF_PROBE_TYPE + +# Explicitly use printf "%s" to not interpret \1 +printf "%s" "t:tracepoints/testevent kfree abcd=\\1" > dynamic_events +check_error "t:tracepoints/testevent kfree ^bcd=\\1" # DIFF_ARG_TYPE +check_error "t:tracepoints/testevent kfree ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "t:tracepoints/testevent kfree ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^t:tracepoints/testevent kfree abcd=\\1" # SAME_PROBE +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc new file mode 100644 index 000000000000..e34c0bdef3ed --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc @@ -0,0 +1,44 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph print function return value +# requires: options/funcgraph-retval options/funcgraph-retval-hex function_graph:tracer + +# Make sure that funcgraph-retval works + +fail() { # msg + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +# get self PID, can not use $$, because it is PPID +read PID _ < /proc/self/stat + +[ -f set_ftrace_filter ] && echo proc_reg_write > set_ftrace_filter +[ -f set_ftrace_pid ] && echo ${PID} > set_ftrace_pid +echo function_graph > current_tracer +echo 1 > options/funcgraph-retval + +set +e +enable_tracing +echo > /proc/interrupts +disable_tracing +set -e + +: "Test printing the error code in signed decimal format" +echo 0 > options/funcgraph-retval-hex +count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l` +if [ $count -eq 0 ]; then + fail "Return value can not be printed in signed decimal format" +fi + +: "Test printing the error code in hexadecimal format" +echo 1 > options/funcgraph-retval-hex +count=`cat trace | grep 'proc_reg_write' | grep 'fffffffb' | wc -l` +if [ $count -eq 0 ]; then + fail "Return value can not be printed in hexadecimal format" +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc index 0eb47fbb3f44..42422e425107 100644 --- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc +++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc @@ -39,7 +39,7 @@ instance_read() { instance_set() { while :; do - echo 1 > foo/events/sched/sched_switch + echo 1 > foo/events/sched/sched_switch/enable done 2> /dev/null } diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc index 285b4770efad..ff7499eb98d6 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc @@ -34,14 +34,19 @@ mips*) esac : "Test get argument (1)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):char" > kprobe_events +if grep -q eventfs_add_dir available_filter_functions; then + DIR_NAME="eventfs_add_dir" +else + DIR_NAME="tracefs_create_dir" +fi +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1='t'" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1='t' arg2={'t','e','s','t'}" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a4f8e7c53c1f..a202b2ea4baf 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -37,14 +37,19 @@ loongarch*) esac : "Test get argument (1)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events +if grep -q eventfs_add_dir available_filter_functions; then + DIR_NAME="eventfs_add_dir" +else + DIR_NAME="tracefs_create_dir" +fi +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" -echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events +echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc new file mode 100644 index 000000000000..4f7cc318f331 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2023 Akanksha J N, IBM corporation +# description: Register multiple kprobe events in a function +# requires: kprobe_events + +for i in `seq 0 255`; do + echo p $FUNCTION_FORK+${i} >> kprobe_events || continue +done + +cat kprobe_events >> $testlog + +echo 1 > events/kprobes/enable +( echo "forked" ) +echo 0 > events/kprobes/enable +echo > kprobe_events +echo "Waiting for unoptimizing & freeing" +sleep 5 +echo "Done" diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc new file mode 100644 index 000000000000..9f5d99328086 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc @@ -0,0 +1,34 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2023 Akanksha J N, IBM corporation +# description: Register/unregister optimized probe +# requires: kprobe_events + +case `uname -m` in +x86_64) +;; +arm*) +;; +ppc*) +;; +*) + echo "Please implement other architecture here" + exit_unsupported +esac + +DEFAULT=$(cat /proc/sys/debug/kprobes-optimization) +echo 1 > /proc/sys/debug/kprobes-optimization +for i in `seq 0 255`; do + echo "p:testprobe $FUNCTION_FORK+${i}" > kprobe_events || continue + echo 1 > events/kprobes/enable || continue + (echo "forked") + PROBE=$(grep $FUNCTION_FORK /sys/kernel/debug/kprobes/list) + echo 0 > events/kprobes/enable + echo > kprobe_events + if echo $PROBE | grep -q OPTIMIZED; then + echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization + exit_pass + fi +done +echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization +exit_unresolved diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 9e85d3019ff0..65fbb26fd58c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -8,7 +8,7 @@ check_error() { # command-with-error-pos-by-^ } if grep -q 'r\[maxactive\]' README; then -check_error 'p^100 vfs_read' # MAXACT_NO_KPROBE +check_error 'p^100 vfs_read' # BAD_MAXACT_TYPE check_error 'r^1a111 vfs_read' # BAD_MAXACT check_error 'r^100000 vfs_read' # MAXACT_TOO_BIG fi @@ -103,4 +103,18 @@ check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX check_error 'p ^vfs_read+10%return' # BAD_RETPROBE fi +# BTF arguments errors +if grep -q "<argname>" README; then +check_error 'p vfs_read args=^$arg*' # BAD_VAR_ARGS +check_error 'p vfs_read +0(^$arg*)' # BAD_VAR_ARGS +check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS +check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS +check_error 'p vfs_read+8 ^$arg*' # NOFENTRY_ARGS +check_error 'p vfs_read ^hoge' # NO_BTFARG +check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters) +check_error 'r kfree ^$retval' # NO_RETVAL +else +check_error 'p vfs_read ^$arg*' # NOSUP_BTFARG +fi + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc index 213d890ed188..174376ddbc6c 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test inter-event histogram trigger trace action with dynamic string param -# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README ping:program +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "' >> synthetic_events":README ping:program fail() { #msg echo $1 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc index 955e3ceea44b..b927ee54c02d 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic_events syntax parser errors -# requires: synthetic_events error_log "char name[]' >> synthetic_events":README +# requires: synthetic_events error_log "' >> synthetic_events":README check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c index 3651ce17beeb..d183f878360b 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -24,6 +24,7 @@ static long timeout_ns = 100000; /* 100us default timeout */ static futex_t futex_pi; +static pthread_barrier_t barrier; void usage(char *prog) { @@ -48,6 +49,8 @@ void *get_pi_lock(void *arg) if (ret != 0) error("futex_lock_pi failed\n", ret); + pthread_barrier_wait(&barrier); + /* Blocks forever */ ret = futex_wait(&lock, 0, NULL, 0); error("futex_wait failed\n", ret); @@ -130,6 +133,7 @@ int main(int argc, char *argv[]) basename(argv[0])); ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns); + pthread_barrier_init(&barrier, NULL, 2); pthread_create(&thread, NULL, get_pi_lock, NULL); /* initialize relative timeout */ @@ -163,6 +167,9 @@ int main(int argc, char *argv[]) res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0); test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT); + /* Wait until the other thread calls futex_lock_pi() */ + pthread_barrier_wait(&barrier); + pthread_barrier_destroy(&barrier); /* * FUTEX_LOCK_PI with CLOCK_REALTIME * Due to historical reasons, FUTEX_LOCK_PI supports only realtime diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh index fa2ce2b9dd5f..6fb66a687f17 100755 --- a/tools/testing/selftests/gpio/gpio-sim.sh +++ b/tools/testing/selftests/gpio/gpio-sim.sh @@ -152,9 +152,9 @@ sysfs_set_pull() { local PULL=$4 local DEVNAME=`configfs_dev_name $DEV` local CHIPNAME=`configfs_chip_name $DEV $BANK` - local SYSFSPATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull" + local SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull" - echo $PULL > $SYSFSPATH || fail "Unable to set line pull in sysfs" + echo $PULL > $SYSFS_PATH || fail "Unable to set line pull in sysfs" } # Load the gpio-sim module. This will pull in configfs if needed too. diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 01c0491d64da..2e986cbf1a46 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -167,7 +167,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ) # Get Clang's default includes on this system, as opposed to those seen by -# '-target bpf'. This fixes "missing" files on some architectures/distros, +# '--target=bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. # # Use '-idirafter': Don't interfere with include mechanics except where the @@ -196,12 +196,12 @@ CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \ # $3 - CFLAGS define CLANG_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v3 -o $2 + $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2) - $(Q)$(CLANG) $3 -O2 -target bpf -c $1 -mcpu=v2 -o $2 + $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py index b1eb2bc787fc..f92fe8e02c1b 100644 --- a/tools/testing/selftests/hid/tests/test_wacom_generic.py +++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py @@ -31,6 +31,7 @@ from enum import Enum from hidtools.hut import HUT from hidtools.hid import HidUnit from . import base +from . import test_multitouch import libevdev import pytest @@ -517,7 +518,7 @@ class BaseTest: for usage in get_report_usages(report): yield usage - def assertName(self, uhdev): + def assertName(self, uhdev, type): """ Assert that the name is as we expect. @@ -526,7 +527,7 @@ class BaseTest: this assertion from the base class to work properly. """ evdev = uhdev.get_evdev() - expected_name = uhdev.name + " Pen" + expected_name = uhdev.name + type if "wacom" not in expected_name.lower(): expected_name = "Wacom " + expected_name assert evdev.name == expected_name @@ -549,6 +550,12 @@ class BaseTest: usage_id("Generic Desktop", "Y"): PhysRange( PhysRange.CENTIMETER, 5, 150 ), + usage_id("Digitizers", "Width"): PhysRange( + PhysRange.CENTIMETER, 5, 150 + ), + usage_id("Digitizers", "Height"): PhysRange( + PhysRange.CENTIMETER, 5, 150 + ), usage_id("Digitizers", "X Tilt"): PhysRange(PhysRange.DEGREE, 90, 180), usage_id("Digitizers", "Y Tilt"): PhysRange(PhysRange.DEGREE, 90, 180), usage_id("Digitizers", "Twist"): PhysRange(PhysRange.DEGREE, 358, 360), @@ -603,7 +610,17 @@ class BaseTest: pass -class TestOpaqueTablet(BaseTest.TestTablet): +class PenTabletTest(BaseTest.TestTablet): + def assertName(self, uhdev): + super().assertName(uhdev, " Pen") + + +class TouchTabletTest(BaseTest.TestTablet): + def assertName(self, uhdev): + super().assertName(uhdev, " Finger") + + +class TestOpaqueTablet(PenTabletTest): def create_device(self): return OpaqueTablet() @@ -842,3 +859,64 @@ class TestPTHX60_Pen(TestOpaqueCTLTablet): libevdev.InputEvent(libevdev.EV_KEY.BTN_0, 0), ], ) + + +class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest): + def create_device(self): + return test_multitouch.Digitizer( + "DTH 2452", + rdesc="05 0d 09 04 a1 01 85 0c 95 01 75 08 15 00 26 ff 00 81 03 09 54 81 02 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 75 08 95 0e 81 03 09 55 26 ff 00 75 08 b1 02 85 0a 06 00 ff 09 c5 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 13 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0", + input_info=(0x3, 0x056A, 0x0383), + ) + + def test_contact_id_0(self): + """ + Bring a finger in contact with the tablet, then hold it down and remove it. + + Ensure that even with contact ID = 0 which is usually given as an invalid + touch event by most tablets with the exception of a few, that given the + confidence bit is set to 1 it should process it as a valid touch to cover + the few tablets using contact ID = 0 as a valid touch value. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + t0 = test_multitouch.Touch(0, 50, 100) + r = uhdev.event([t0]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + slot = self.get_slot(uhdev, t0, 0) + + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events + assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0 + assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50 + assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100 + + t0.tipswitch = False + if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks: + t0.inrange = False + r = uhdev.event([t0]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events + assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1 + + def test_confidence_false(self): + """ + Bring a finger in contact with the tablet with confidence set to false. + + Ensure that the confidence bit being set to false should not result in a touch event. + """ + uhdev = self.uhdev + evdev = uhdev.get_evdev() + + t0 = test_multitouch.Touch(1, 50, 100) + t0.confidence = False + r = uhdev.event([t0]) + events = uhdev.next_sync_events() + self.debug_reports(r, uhdev, events) + + slot = self.get_slot(uhdev, t0, 0) + + assert not events
\ No newline at end of file diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh index 681b906b4853..4da48bf6b328 100755 --- a/tools/testing/selftests/hid/vmtest.sh +++ b/tools/testing/selftests/hid/vmtest.sh @@ -79,6 +79,7 @@ recompile_kernel() cd "${kernel_checkout}" ${make_command} olddefconfig + ${make_command} headers ${make_command} } diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index e4a6b33cfde4..33d08600be13 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -9,9 +9,6 @@ #include "iommufd_utils.h" -static void *buffer; - -static unsigned long PAGE_SIZE; static unsigned long HUGEPAGE_SIZE; #define MOCK_PAGE_SIZE (PAGE_SIZE / 2) @@ -116,6 +113,7 @@ TEST_F(iommufd, cmd_length) } TEST_LENGTH(iommu_destroy, IOMMU_DESTROY); + TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO); TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC); TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES); TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS); @@ -188,6 +186,7 @@ FIXTURE(iommufd_ioas) uint32_t ioas_id; uint32_t stdev_id; uint32_t hwpt_id; + uint32_t device_id; uint64_t base_iova; }; @@ -214,7 +213,7 @@ FIXTURE_SETUP(iommufd_ioas) for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_id, - &self->hwpt_id); + &self->hwpt_id, &self->device_id); self->base_iova = MOCK_APERTURE_START; } } @@ -265,7 +264,7 @@ TEST_F(iommufd_ioas, hwpt_attach) { /* Create a device attached directly to a hwpt */ if (self->stdev_id) { - test_cmd_mock_domain(self->hwpt_id, NULL, NULL); + test_cmd_mock_domain(self->hwpt_id, NULL, NULL, NULL); } else { test_err_mock_domain(ENOENT, self->hwpt_id, NULL, NULL); } @@ -293,6 +292,40 @@ TEST_F(iommufd_ioas, ioas_area_auto_destroy) } } +TEST_F(iommufd_ioas, get_hw_info) +{ + struct iommu_test_hw_info buffer_exact; + struct iommu_test_hw_info_buffer_larger { + struct iommu_test_hw_info info; + uint64_t trailing_bytes; + } buffer_larger; + struct iommu_test_hw_info_buffer_smaller { + __u32 flags; + } buffer_smaller; + + if (self->device_id) { + /* Provide a zero-size user_buffer */ + test_cmd_get_hw_info(self->device_id, NULL, 0); + /* Provide a user_buffer with exact size */ + test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact)); + /* + * Provide a user_buffer with size larger than the exact size to check if + * kernel zero the trailing bytes. + */ + test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger)); + /* + * Provide a user_buffer with size smaller than the exact size to check if + * the fields within the size range still gets updated. + */ + test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller)); + } else { + test_err_get_hw_info(ENOENT, self->device_id, + &buffer_exact, sizeof(buffer_exact)); + test_err_get_hw_info(ENOENT, self->device_id, + &buffer_larger, sizeof(buffer_larger)); + } +} + TEST_F(iommufd_ioas, area) { int i; @@ -684,7 +717,7 @@ TEST_F(iommufd_ioas, access_pin) _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), &access_cmd)); test_cmd_mock_domain(self->ioas_id, &mock_stdev_id, - &mock_hwpt_id); + &mock_hwpt_id, NULL); check_map_cmd.id = mock_hwpt_id; ASSERT_EQ(0, ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), @@ -839,7 +872,7 @@ TEST_F(iommufd_ioas, fork_gone) * If a domain already existed then everything was pinned within * the fork, so this copies from one domain to another. */ - test_cmd_mock_domain(self->ioas_id, NULL, NULL); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); @@ -888,7 +921,7 @@ TEST_F(iommufd_ioas, fork_present) ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp))); /* Read pages from the remote process */ - test_cmd_mock_domain(self->ioas_id, NULL, NULL); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0); ASSERT_EQ(0, close(pipefds[1])); @@ -1035,6 +1068,8 @@ FIXTURE(iommufd_mock_domain) uint32_t ioas_id; uint32_t hwpt_id; uint32_t hwpt_ids[2]; + uint32_t stdev_ids[2]; + uint32_t idev_ids[2]; int mmap_flags; size_t mmap_buf_size; }; @@ -1056,7 +1091,8 @@ FIXTURE_SETUP(iommufd_mock_domain) ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains); for (i = 0; i != variant->mock_domains; i++) - test_cmd_mock_domain(self->ioas_id, NULL, &self->hwpt_ids[i]); + test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i], + &self->hwpt_ids[i], &self->idev_ids[i]); self->hwpt_id = self->hwpt_ids[0]; self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; @@ -1250,7 +1286,7 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) /* Add and destroy a domain while the area exists */ old_id = self->hwpt_ids[1]; test_cmd_mock_domain(self->ioas_id, &mock_stdev_id, - &self->hwpt_ids[1]); + &self->hwpt_ids[1], NULL); check_mock_iova(buf + start, iova, length); check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, @@ -1283,7 +1319,13 @@ TEST_F(iommufd_mock_domain, user_copy) .dst_iova = MOCK_APERTURE_START, .length = BUFFER_SIZE, }; - unsigned int ioas_id; + struct iommu_ioas_unmap unmap_cmd = { + .size = sizeof(unmap_cmd), + .ioas_id = self->ioas_id, + .iova = MOCK_APERTURE_START, + .length = BUFFER_SIZE, + }; + unsigned int new_ioas_id, ioas_id; /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ test_ioctl_ioas_alloc(&ioas_id); @@ -1301,13 +1343,77 @@ TEST_F(iommufd_mock_domain, user_copy) ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + /* Now replace the ioas with a new one */ + test_ioctl_ioas_alloc(&new_ioas_id); + test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE, + ©_cmd.src_iova); + test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id); + + /* Destroy the old ioas and cleanup copied mapping */ + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_UNMAP, &unmap_cmd)); + test_ioctl_destroy(ioas_id); + + /* Then run the same test again with the new ioas */ + access_cmd.access_pages.iova = copy_cmd.src_iova; + ASSERT_EQ(0, + ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES), + &access_cmd)); + copy_cmd.src_ioas_id = new_ioas_id; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); + check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + test_cmd_destroy_access_pages( access_cmd.id, access_cmd.access_pages.out_access_pages_id); test_cmd_destroy_access(access_cmd.id); + test_ioctl_destroy(new_ioas_id); +} + +TEST_F(iommufd_mock_domain, replace) +{ + uint32_t ioas_id; + + test_ioctl_ioas_alloc(&ioas_id); + + test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id); + + /* + * Replacing the IOAS causes the prior HWPT to be deallocated, thus we + * should get enoent when we try to use it. + */ + if (variant->mock_domains == 1) + test_err_mock_domain_replace(ENOENT, self->stdev_ids[0], + self->hwpt_ids[0]); + + test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id); + if (variant->mock_domains >= 2) { + test_cmd_mock_domain_replace(self->stdev_ids[0], + self->hwpt_ids[1]); + test_cmd_mock_domain_replace(self->stdev_ids[0], + self->hwpt_ids[1]); + test_cmd_mock_domain_replace(self->stdev_ids[0], + self->hwpt_ids[0]); + } + + test_cmd_mock_domain_replace(self->stdev_ids[0], self->ioas_id); test_ioctl_destroy(ioas_id); } +TEST_F(iommufd_mock_domain, alloc_hwpt) +{ + int i; + + for (i = 0; i != variant->mock_domains; i++) { + uint32_t stddev_id; + uint32_t hwpt_id; + + test_cmd_hwpt_alloc(self->idev_ids[0], self->ioas_id, &hwpt_id); + test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL); + test_ioctl_destroy(stddev_id); + test_ioctl_destroy(hwpt_id); + } +} + /* VFIO compatibility IOCTLs */ TEST_F(iommufd, simple_ioctls) @@ -1429,7 +1535,7 @@ FIXTURE_SETUP(vfio_compat_mock_domain) /* Create what VFIO would consider a group */ test_ioctl_ioas_alloc(&self->ioas_id); - test_cmd_mock_domain(self->ioas_id, NULL, NULL); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); /* Attach it to the vfio compat */ vfio_ioas_cmd.ioas_id = self->ioas_id; diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index d9afcb23810e..a220ca2a689d 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -41,6 +41,8 @@ static int writeat(int dfd, const char *fn, const char *val) static __attribute__((constructor)) void setup_buffer(void) { + PAGE_SIZE = sysconf(_SC_PAGE_SIZE); + BUFFER_SIZE = 2*1024*1024; buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, @@ -313,7 +315,7 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) fail_nth_enable(); - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, @@ -324,7 +326,7 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) if (_test_ioctl_destroy(self->fd, stdev_id)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; return 0; } @@ -348,12 +350,13 @@ TEST_FAIL_NTH(basic_fail_nth, map_two_domains) if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; fail_nth_enable(); - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2, + NULL)) return -1; if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova, @@ -367,9 +370,10 @@ TEST_FAIL_NTH(basic_fail_nth, map_two_domains) if (_test_ioctl_destroy(self->fd, stdev_id2)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2, + NULL)) return -1; return 0; } @@ -526,7 +530,7 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) return -1; - if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id)) + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) return -1; if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova, @@ -569,4 +573,57 @@ TEST_FAIL_NTH(basic_fail_nth, access_pin_domain) return 0; } +/* device.c */ +TEST_FAIL_NTH(basic_fail_nth, device) +{ + struct iommu_test_hw_info info; + uint32_t ioas_id; + uint32_t ioas_id2; + uint32_t stdev_id; + uint32_t idev_id; + uint32_t hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id2)) + return -1; + + iova = MOCK_APERTURE_START; + if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, PAGE_SIZE, &iova, + IOMMU_IOAS_MAP_FIXED_IOVA | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + if (_test_ioctl_ioas_map(self->fd, ioas_id2, buffer, PAGE_SIZE, &iova, + IOMMU_IOAS_MAP_FIXED_IOVA | + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL, + &idev_id)) + return -1; + + if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info))) + return -1; + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, &hwpt_id)) + return -1; + + if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL)) + return -1; + + if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) + return -1; + return 0; +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 85d6662ef8e8..e0753d03ecaa 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -19,6 +19,12 @@ static void *buffer; static unsigned long BUFFER_SIZE; +static unsigned long PAGE_SIZE; + +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) + /* * Have the kernel check the refcount on pages. I don't know why a freshly * mmap'd anon non-compound page starts out with a ref of 3 @@ -39,7 +45,7 @@ static unsigned long BUFFER_SIZE; }) static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, - __u32 *hwpt_id) + __u32 *hwpt_id, __u32 *idev_id) { struct iommu_test_cmd cmd = { .size = sizeof(cmd), @@ -57,14 +63,84 @@ static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id, assert(cmd.id != 0); if (hwpt_id) *hwpt_id = cmd.mock_domain.out_hwpt_id; + if (idev_id) + *idev_id = cmd.mock_domain.out_idev_id; return 0; } -#define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id) \ - ASSERT_EQ(0, \ - _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, hwpt_id)) +#define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id, idev_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, \ + hwpt_id, idev_id)) #define test_err_mock_domain(_errno, ioas_id, stdev_id, hwpt_id) \ EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \ - stdev_id, hwpt_id)) + stdev_id, hwpt_id, NULL)) + +static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id, + __u32 *hwpt_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE, + .id = stdev_id, + .mock_domain_replace = { + .pt_id = pt_id, + }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + if (hwpt_id) + *hwpt_id = cmd.mock_domain_replace.pt_id; + return 0; +} + +#define test_cmd_mock_domain_replace(stdev_id, pt_id) \ + ASSERT_EQ(0, _test_cmd_mock_domain_replace(self->fd, stdev_id, pt_id, \ + NULL)) +#define test_err_mock_domain_replace(_errno, stdev_id, pt_id) \ + EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \ + pt_id, NULL)) + +static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, + __u32 *hwpt_id) +{ + struct iommu_hwpt_alloc cmd = { + .size = sizeof(cmd), + .dev_id = device_id, + .pt_id = pt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_HWPT_ALLOC, &cmd); + if (ret) + return ret; + if (hwpt_id) + *hwpt_id = cmd.out_hwpt_id; + return 0; +} + +#define test_cmd_hwpt_alloc(device_id, pt_id, hwpt_id) \ + ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, hwpt_id)) + +static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, + unsigned int ioas_id) +{ + struct iommu_test_cmd cmd = { + .size = sizeof(cmd), + .op = IOMMU_TEST_OP_ACCESS_REPLACE_IOAS, + .id = access_id, + .access_replace_ioas = { .ioas_id = ioas_id }, + }; + int ret; + + ret = ioctl(fd, IOMMU_TEST_CMD, &cmd); + if (ret) + return ret; + return 0; +} +#define test_cmd_access_replace_ioas(access_id, ioas_id) \ + ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id)) static int _test_cmd_create_access(int fd, unsigned int ioas_id, __u32 *access_id, unsigned int flags) @@ -276,3 +352,61 @@ static void teardown_iommufd(int fd, struct __test_metadata *_metadata) }) #endif + +/* @data can be NULL */ +static int _test_cmd_get_hw_info(int fd, __u32 device_id, + void *data, size_t data_len) +{ + struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data; + struct iommu_hw_info cmd = { + .size = sizeof(cmd), + .dev_id = device_id, + .data_len = data_len, + .data_uptr = (uint64_t)data, + }; + int ret; + + ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd); + if (ret) + return ret; + + assert(cmd.out_data_type == IOMMU_HW_INFO_TYPE_SELFTEST); + + /* + * The struct iommu_test_hw_info should be the one defined + * by the current kernel. + */ + assert(cmd.data_len == sizeof(struct iommu_test_hw_info)); + + /* + * Trailing bytes should be 0 if user buffer is larger than + * the data that kernel reports. + */ + if (data_len > cmd.data_len) { + char *ptr = (char *)(data + cmd.data_len); + int idx = 0; + + while (idx < data_len - cmd.data_len) { + assert(!*(ptr + idx)); + idx++; + } + } + + if (info) { + if (data_len >= offsetofend(struct iommu_test_hw_info, test_reg)) + assert(info->test_reg == IOMMU_HW_INFO_SELFTEST_REGVAL); + if (data_len >= offsetofend(struct iommu_test_hw_info, flags)) + assert(!info->flags); + } + + return 0; +} + +#define test_cmd_get_hw_info(device_id, data, data_len) \ + ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, \ + data, data_len)) + +#define test_err_get_hw_info(_errno, device_id, data, data_len) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_get_hw_info(self->fd, device_id, \ + data, data_len)) diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 829be379545a..529d29a35900 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -113,6 +113,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; } static inline void ksft_print_header(void) { + /* + * Force line buffering; If stdout is not connected to a terminal, it + * will otherwise default to fully buffered, which can cause output + * duplication if there is content in the buffer when fork()ing. If + * there is a crash, line buffering also means the most recent output + * line will be visible. + */ + setvbuf(stdout, NULL, _IOLBF, 0); + if (!(getenv("KSFT_TAP_LEVEL"))) printf("TAP version 13\n"); } diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 294619ade49f..cd2fb43eea61 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -8,7 +8,8 @@ export logfile=/dev/stdout export per_test_logging= # Defaults for "settings" file fields: -# "timeout" how many seconds to let each test run before failing. +# "timeout" how many seconds to let each test run before running +# over our soft timeout limit. export kselftest_default_timeout=45 # There isn't a shell-agnostic way to find the path of a sourced file, @@ -35,7 +36,8 @@ tap_timeout() { # Make sure tests will time out if utility is available. if [ -x /usr/bin/timeout ] ; then - /usr/bin/timeout --foreground "$kselftest_timeout" $1 + /usr/bin/timeout --foreground "$kselftest_timeout" \ + /usr/bin/timeout "$kselftest_timeout" $1 else $1 fi @@ -90,21 +92,32 @@ run_one() done < "$settings" fi + # Command line timeout overrides the settings file + if [ -n "$kselftest_override_timeout" ]; then + kselftest_timeout="$kselftest_override_timeout" + echo "# overriding timeout to $kselftest_timeout" >> "$logfile" + else + echo "# timeout set to $kselftest_timeout" >> "$logfile" + fi + TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST" echo "# $TEST_HDR_MSG" if [ ! -e "$TEST" ]; then echo "# Warning: file $TEST is missing!" echo "not ok $test_num $TEST_HDR_MSG" else + if [ -x /usr/bin/stdbuf ]; then + stdbuf="/usr/bin/stdbuf --output=L " + fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" - cmd="./$BASENAME_TEST $kselftest_cmd_args" + cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" if [ ! -x "$TEST" ]; then echo "# Warning: file $TEST is not executable" if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] then interpreter=$(head -n 1 "$TEST" | cut -c 3-) - cmd="$interpreter ./$BASENAME_TEST" + cmd="$stdbuf $interpreter ./$BASENAME_TEST" else echo "not ok $test_num $TEST_HDR_MSG" return diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh index 4bc14d9e8ff1..de59cc8f03c3 100755 --- a/tools/testing/selftests/kselftest_deps.sh +++ b/tools/testing/selftests/kselftest_deps.sh @@ -46,11 +46,11 @@ fi print_targets=0 while getopts "p" arg; do - case $arg in - p) + case $arg in + p) print_targets=1 shift;; - esac + esac done if [ $# -eq 0 ] @@ -92,6 +92,10 @@ pass_cnt=0 # Get all TARGETS from selftests Makefile targets=$(grep -E "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2) +# Initially, in LDLIBS related lines, the dep checker needs +# to ignore lines containing the following strings: +filter="\$(VAR_LDLIBS)\|pkg-config\|PKG_CONFIG\|IOURING_EXTRA_LIBS" + # Single test case if [ $# -eq 2 ] then @@ -100,6 +104,8 @@ then l1_test $test l2_test $test l3_test $test + l4_test $test + l5_test $test print_results $1 $2 exit $? @@ -113,7 +119,7 @@ fi # Append space at the end of the list to append more tests. l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \ - grep -v "VAR_LDLIBS" | awk -F: '{print $1}') + grep -v "$filter" | awk -F: '{print $1}' | uniq) # Level 2: LDLIBS set dynamically. # @@ -126,7 +132,7 @@ l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \ # Append space at the end of the list to append more tests. l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \ - grep -v "VAR_LDLIBS" | awk -F: '{print $1}') + grep -v "$filter" | awk -F: '{print $1}' | uniq) # Level 3 # memfd and others use pkg-config to find mount and fuse libs @@ -138,11 +144,32 @@ l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \ # VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null) l3_tests=$(grep -r --include=Makefile "^VAR_LDLIBS" | \ - grep -v "pkg-config" | awk -F: '{print $1}') + grep -v "pkg-config\|PKG_CONFIG" | awk -F: '{print $1}' | uniq) -#echo $l1_tests -#echo $l2_1_tests -#echo $l3_tests +# Level 4 +# some tests may fall back to default using `|| echo -l<libname>` +# if pkg-config doesn't find the libs, instead of using VAR_LDLIBS +# as per level 3 checks. +# e.g: +# netfilter/Makefile +# LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) +l4_tests=$(grep -r --include=Makefile "^LDLIBS" | \ + grep "pkg-config\|PKG_CONFIG" | awk -F: '{print $1}' | uniq) + +# Level 5 +# some tests may use IOURING_EXTRA_LIBS to add extra libs to LDLIBS, +# which in turn may be defined in a sub-Makefile +# e.g.: +# mm/Makefile +# $(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS) +l5_tests=$(grep -r --include=Makefile "LDLIBS +=.*\$(IOURING_EXTRA_LIBS)" | \ + awk -F: '{print $1}' | uniq) + +#echo l1_tests $l1_tests +#echo l2_tests $l2_tests +#echo l3_tests $l3_tests +#echo l4_tests $l4_tests +#echo l5_tests $l5_tests all_tests print_results $1 $2 @@ -164,24 +191,32 @@ all_tests() for test in $l3_tests; do l3_test $test done + + for test in $l4_tests; do + l4_test $test + done + + for test in $l5_tests; do + l5_test $test + done } # Use same parsing used for l1_tests and pick libraries this time. l1_test() { test_libs=$(grep --include=Makefile "^LDLIBS" $test | \ - grep -v "VAR_LDLIBS" | \ + grep -v "$filter" | \ sed -e 's/\:/ /' | \ sed -e 's/+/ /' | cut -d "=" -f 2) check_libs $test $test_libs } -# Use same parsing used for l2__tests and pick libraries this time. +# Use same parsing used for l2_tests and pick libraries this time. l2_test() { test_libs=$(grep --include=Makefile ": LDLIBS" $test | \ - grep -v "VAR_LDLIBS" | \ + grep -v "$filter" | \ sed -e 's/\:/ /' | sed -e 's/+/ /' | \ cut -d "=" -f 2) @@ -197,6 +232,24 @@ l3_test() check_libs $test $test_libs } +l4_test() +{ + test_libs=$(grep --include=Makefile "^VAR_LDLIBS\|^LDLIBS" $test | \ + grep "\(pkg-config\|PKG_CONFIG\).*|| echo " | \ + sed -e 's/.*|| echo //' | sed -e 's/)$//') + + check_libs $test $test_libs +} + +l5_test() +{ + tests=$(find $(dirname "$test") -type f -name "*.mk") + test_libs=$(grep "^IOURING_EXTRA_LIBS +\?=" $tests | \ + cut -d "=" -f 2) + + check_libs $test $test_libs +} + check_libs() { diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 5fd49ad0c696..e05ac8261046 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -938,7 +938,11 @@ void __wait_for_test(struct __test_metadata *t) fprintf(TH_LOG_STREAM, "# %s: Test terminated by timeout\n", t->name); } else if (WIFEXITED(status)) { - if (t->termsig != -1) { + if (WEXITSTATUS(status) == 255) { + /* SKIP */ + t->passed = 1; + t->skip = 1; + } else if (t->termsig != -1) { t->passed = 0; fprintf(TH_LOG_STREAM, "# %s: Test exited normally instead of by signal (code: %d)\n", @@ -950,11 +954,6 @@ void __wait_for_test(struct __test_metadata *t) case 0: t->passed = 1; break; - /* SKIP */ - case 255: - t->passed = 1; - t->skip = 1; - break; /* Other failure, assume step report. */ default: t->passed = 0; diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 4761b768b773..a3bb36fb3cfc 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -23,6 +23,7 @@ LIBKVM += lib/guest_modes.c LIBKVM += lib/io.c LIBKVM += lib/kvm_util.c LIBKVM += lib/memstress.c +LIBKVM += lib/guest_sprintf.c LIBKVM += lib/rbtree.c LIBKVM += lib/sparsebit.c LIBKVM += lib/test_util.c @@ -61,6 +62,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh # Compiled test targets TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test +TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test @@ -121,6 +123,7 @@ TEST_GEN_PROGS_x86_64 += access_tracking_perf_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += guest_print_test TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test @@ -139,7 +142,6 @@ TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs TEST_GEN_PROGS_aarch64 += aarch64/arch_timer TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions -TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test @@ -151,6 +153,8 @@ TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test +TEST_GEN_PROGS_aarch64 += guest_print_test +TEST_GEN_PROGS_aarch64 += get-reg-list TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test @@ -164,8 +168,11 @@ TEST_GEN_PROGS_s390x = s390x/memop TEST_GEN_PROGS_s390x += s390x/resets TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot +TEST_GEN_PROGS_s390x += s390x/cmma_test +TEST_GEN_PROGS_s390x += s390x/debug_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test +TEST_GEN_PROGS_s390x += guest_print_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += kvm_page_table_test TEST_GEN_PROGS_s390x += rseq_test @@ -174,16 +181,22 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test +TEST_GEN_PROGS_riscv += guest_print_test +TEST_GEN_PROGS_riscv += get-reg-list TEST_GEN_PROGS_riscv += kvm_create_max_vcpus TEST_GEN_PROGS_riscv += kvm_page_table_test TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += kvm_binary_stats_test +SPLIT_TESTS += get-reg-list + TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR)) TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR)) TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR)) LIBKVM += $(LIBKVM_$(ARCH_DIR)) +OVERRIDE_TARGETS = 1 + # lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most # importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`, # which causes the environment variable to override the makefile). @@ -198,8 +211,9 @@ else LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include endif CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ - -Wno-gnu-variable-sized-type-not-at-end \ + -Wno-gnu-variable-sized-type-not-at-end -MD\ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \ + -fno-builtin-strnlen \ -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ @@ -224,8 +238,25 @@ LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING)) LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ) +SPLIT_TESTS_TARGETS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS)) +SPLIT_TESTS_OBJS := $(patsubst %, $(ARCH_DIR)/%.o, $(SPLIT_TESTS)) + +TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS)) +TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED)) +TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS)) +TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TESTS_OBJS)) +-include $(TEST_DEP_FILES) + +$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): %: %.o + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@ +$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ + +$(SPLIT_TESTS_TARGETS): %: %.o $(SPLIT_TESTS_OBJS) + $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ -EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.* +EXTRA_CLEAN += $(LIBKVM_OBJS) $(TEST_DEP_FILES) $(TEST_GEN_OBJ) $(SPLIT_TESTS_OBJS) cscope.* x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) $(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c index 4951ac53d1f8..b90580840b22 100644 --- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c @@ -98,7 +98,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) uint64_t val; vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); /* * Expect the ioctl to succeed with no effect on the register @@ -107,7 +107,7 @@ static void test_user_raz_wi(struct kvm_vcpu *vcpu) vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); } } @@ -127,14 +127,14 @@ static void test_user_raz_invariant(struct kvm_vcpu *vcpu) uint64_t val; vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL); TEST_ASSERT(r < 0 && errno == EINVAL, "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno); vcpu_get_reg(vcpu, reg_id, &val); - ASSERT_EQ(val, 0); + TEST_ASSERT_EQ(val, 0); } } diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c index 8ef370924a02..274b8465b42a 100644 --- a/tools/testing/selftests/kvm/aarch64/arch_timer.c +++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c @@ -19,7 +19,6 @@ * * Copyright (c) 2021, Google LLC. */ - #define _GNU_SOURCE #include <stdlib.h> @@ -155,11 +154,13 @@ static void guest_validate_irq(unsigned int intid, xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt); /* Make sure we are dealing with the correct timer IRQ */ - GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq); + GUEST_ASSERT_EQ(intid, timer_irq); /* Basic 'timer condition met' check */ - GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us); - GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl); + __GUEST_ASSERT(xcnt >= cval, + "xcnt = 0x%llx, cval = 0x%llx, xcnt_diff_us = 0x%llx", + xcnt, cval, xcnt_diff_us); + __GUEST_ASSERT(xctl & CTL_ISTATUS, "xcnt = 0x%llx", xcnt); WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1); } @@ -192,8 +193,7 @@ static void guest_run_stage(struct test_vcpu_shared_data *shared_data, TIMER_TEST_ERR_MARGIN_US); irq_iter = READ_ONCE(shared_data->nr_iter); - GUEST_ASSERT_2(config_iter + 1 == irq_iter, - config_iter + 1, irq_iter); + GUEST_ASSERT_EQ(config_iter + 1, irq_iter); } } @@ -243,13 +243,9 @@ static void *test_vcpu_run(void *arg) break; case UCALL_ABORT: sync_global_from_guest(vm, *shared_data); - REPORT_GUEST_ASSERT_N(uc, "values: %lu, %lu; %lu, vcpu %u; stage; %u; iter: %u", - GUEST_ASSERT_ARG(uc, 0), - GUEST_ASSERT_ARG(uc, 1), - GUEST_ASSERT_ARG(uc, 2), - vcpu_idx, - shared_data->guest_stage, - shared_data->nr_iter); + fprintf(stderr, "Guest assert failed, vcpu %u; stage; %u; iter: %u\n", + vcpu_idx, shared_data->guest_stage, shared_data->nr_iter); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Unexpected guest exit\n"); diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 637be796086f..f5b6cb3a0019 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -365,7 +365,7 @@ static void guest_wp_handler(struct ex_regs *regs) static void guest_ss_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(ss_idx < 4, ss_idx); + __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%u'", ss_idx); ss_addr[ss_idx++] = regs->pc; regs->pstate |= SPSR_SS; } @@ -410,8 +410,8 @@ static void guest_code_ss(int test_cnt) /* Userspace disables Single Step when the end is nigh. */ asm volatile("iter_ss_end:\n"); - GUEST_ASSERT(bvr == w_bvr); - GUEST_ASSERT(wvr == w_wvr); + GUEST_ASSERT_EQ(bvr, w_bvr); + GUEST_ASSERT_EQ(wvr, w_wvr); } GUEST_DONE(); } @@ -450,7 +450,7 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_run(vcpu); switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index d4e1f4af29d6..709d7d721760 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -4,113 +4,46 @@ * * Copyright (C) 2020, Red Hat, Inc. * - * When attempting to migrate from a host with an older kernel to a host - * with a newer kernel we allow the newer kernel on the destination to - * list new registers with get-reg-list. We assume they'll be unused, at - * least until the guest reboots, and so they're relatively harmless. - * However, if the destination host with the newer kernel is missing - * registers which the source host with the older kernel has, then that's - * a regression in get-reg-list. This test checks for that regression by - * checking the current list against a blessed list. We should never have - * missing registers, but if new ones appear then they can probably be - * added to the blessed list. A completely new blessed list can be created - * by running the test with the --list command line argument. - * - * Note, the blessed list should be created from the oldest possible - * kernel. We can't go older than v4.15, though, because that's the first - * release to expose the ID system registers in KVM_GET_REG_LIST, see - * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features - * from guests"). Also, one must use the --core-reg-fixup command line - * option when running on an older kernel that doesn't include df205b5c6328 - * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + * While the blessed list should be created from the oldest possible + * kernel, we can't go older than v5.2, though, because that's the first + * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid + * core register IDs in KVM_GET_REG_LIST"). Without that commit the core + * registers won't match expectations. */ #include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/wait.h> #include "kvm_util.h" #include "test_util.h" #include "processor.h" -static struct kvm_reg_list *reg_list; -static __u64 *blessed_reg, blessed_n; - -struct reg_sublist { - const char *name; - long capability; - int feature; - bool finalize; - __u64 *regs; - __u64 regs_n; - __u64 *rejects_set; - __u64 rejects_set_n; -}; - -struct vcpu_config { - char *name; - struct reg_sublist sublists[]; +struct feature_id_reg { + __u64 reg; + __u64 id_reg; + __u64 feat_shift; + __u64 feat_min; }; -static struct vcpu_config *vcpu_configs[]; -static int vcpu_configs_n; - -#define for_each_sublist(c, s) \ - for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) - -#define for_each_reg(i) \ - for ((i) = 0; (i) < reg_list->n; ++(i)) - -#define for_each_reg_filtered(i) \ - for_each_reg(i) \ - if (!filter_reg(reg_list->reg[i])) - -#define for_each_missing_reg(i) \ - for ((i) = 0; (i) < blessed_n; ++(i)) \ - if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) - -#define for_each_new_reg(i) \ - for_each_reg_filtered(i) \ - if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) - -static const char *config_name(struct vcpu_config *c) -{ - struct reg_sublist *s; - int len = 0; - - if (c->name) - return c->name; - - for_each_sublist(c, s) - len += strlen(s->name) + 1; - - c->name = malloc(len); - - len = 0; - for_each_sublist(c, s) { - if (!strcmp(s->name, "base")) - continue; - strcat(c->name + len, s->name); - len += strlen(s->name) + 1; - c->name[len - 1] = '+'; +static struct feature_id_reg feat_id_regs[] = { + { + ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 0, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 4, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 4, + 1 } - c->name[len - 1] = '\0'; - - return c->name; -} - -static bool has_cap(struct vcpu_config *c, long capability) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) - if (s->capability == capability) - return true; - return false; -} +}; -static bool filter_reg(__u64 reg) +bool filter_reg(__u64 reg) { /* * DEMUX register presence depends on the host's CLIDR_EL1. @@ -122,27 +55,46 @@ static bool filter_reg(__u64 reg) return false; } -static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg) { - int i; + int i, ret; + __u64 data, feat_val; - for (i = 0; i < nr_regs; ++i) - if (reg == regs[i]) - return true; - return false; + for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) { + if (feat_id_regs[i].reg == reg) { + ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data); + if (ret < 0) + return false; + + feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf); + return feat_val >= feat_id_regs[i].feat_min; + } + } + + return true; } -static const char *str_with_index(const char *template, __u64 index) +bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) { - char *str, *p; - int n; + return check_supported_feat_reg(vcpu, reg); +} + +bool check_reject_set(int err) +{ + return err == EPERM; +} - str = strdup(template); - p = strstr(str, "##"); - n = sprintf(p, "%lld", index); - strcat(p + n, strstr(template, "##") + 2); +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int feature; - return (const char *)str; + for_each_sublist(c, s) { + if (s->finalize) { + feature = s->feature; + vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); + } + } } #define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) @@ -151,7 +103,7 @@ static const char *str_with_index(const char *template, __u64 index) #define CORE_SPSR_XX_NR_WORDS 2 #define CORE_FPREGS_XX_NR_WORDS 4 -static const char *core_id_to_str(struct vcpu_config *c, __u64 id) +static const char *core_id_to_str(const char *prefix, __u64 id) { __u64 core_off = id & ~REG_MASK, idx; @@ -162,8 +114,8 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id) case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... KVM_REG_ARM_CORE_REG(regs.regs[30]): idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; - TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx); case KVM_REG_ARM_CORE_REG(regs.sp): return "KVM_REG_ARM_CORE_REG(regs.sp)"; case KVM_REG_ARM_CORE_REG(regs.pc): @@ -177,24 +129,24 @@ static const char *core_id_to_str(struct vcpu_config *c, __u64 id) case KVM_REG_ARM_CORE_REG(spsr[0]) ... KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; - TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; - TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", config_name(c), idx); - return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx); + return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx); case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; } - TEST_FAIL("%s: Unknown core reg id: 0x%llx", config_name(c), id); + TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); return NULL; } -static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) +static const char *sve_id_to_str(const char *prefix, __u64 id) { __u64 sve_off, n, i; @@ -204,37 +156,37 @@ static const char *sve_id_to_str(struct vcpu_config *c, __u64 id) sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); - TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", config_name(c), id); + TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id); switch (sve_off) { case KVM_REG_ARM64_SVE_ZREG_BASE ... KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), - "%s: Unexpected bits set in SVE ZREG id: 0x%llx", config_name(c), id); - return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n); case KVM_REG_ARM64_SVE_PREG_BASE ... KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), - "%s: Unexpected bits set in SVE PREG id: 0x%llx", config_name(c), id); - return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id); + return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n); case KVM_REG_ARM64_SVE_FFR_BASE: TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), - "%s: Unexpected bits set in SVE FFR id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id); return "KVM_REG_ARM64_SVE_FFR(0)"; } return NULL; } -static void print_reg(struct vcpu_config *c, __u64 id) +void print_reg(const char *prefix, __u64 id) { unsigned op0, op1, crn, crm, op2; const char *reg_size = NULL; TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, - "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", config_name(c), id); + "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id); switch (id & KVM_REG_SIZE_MASK) { case KVM_REG_SIZE_U8: @@ -266,16 +218,16 @@ static void print_reg(struct vcpu_config *c, __u64 id) break; default: TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", - config_name(c), (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); } switch (id & KVM_REG_ARM_COPROC_MASK) { case KVM_REG_ARM_CORE: - printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(c, id)); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id)); break; case KVM_REG_ARM_DEMUX: TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), - "%s: Unexpected bits set in DEMUX reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); break; @@ -286,370 +238,34 @@ static void print_reg(struct vcpu_config *c, __u64 id) crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), - "%s: Unexpected bits set in SYSREG reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id); printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); break; case KVM_REG_ARM_FW: TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), - "%s: Unexpected bits set in FW reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM_FW_FEAT_BMAP: TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff), - "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", config_name(c), id); + "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id); printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff); break; case KVM_REG_ARM64_SVE: - if (has_cap(c, KVM_CAP_ARM_SVE)) - printf("\t%s,\n", sve_id_to_str(c, id)); - else - TEST_FAIL("%s: KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", config_name(c), id); + printf("\t%s,\n", sve_id_to_str(prefix, id)); break; default: TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx", - config_name(c), (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); - } -} - -/* - * Older kernels listed each 32-bit word of CORE registers separately. - * For 64 and 128-bit registers we need to ignore the extra words. We - * also need to fixup the sizes, because the older kernels stated all - * registers were 64-bit, even when they weren't. - */ -static void core_reg_fixup(void) -{ - struct kvm_reg_list *tmp; - __u64 id, core_off; - int i; - - tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); - - for (i = 0; i < reg_list->n; ++i) { - id = reg_list->reg[i]; - - if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { - tmp->reg[tmp->n++] = id; - continue; - } - - core_off = id & ~REG_MASK; - - switch (core_off) { - case 0x52: case 0xd2: case 0xd6: - /* - * These offsets are pointing at padding. - * We need to ignore them too. - */ - continue; - case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... - KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): - if (core_off & 3) - continue; - id &= ~KVM_REG_SIZE_MASK; - id |= KVM_REG_SIZE_U128; - tmp->reg[tmp->n++] = id; - continue; - case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): - case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): - id &= ~KVM_REG_SIZE_MASK; - id |= KVM_REG_SIZE_U32; - tmp->reg[tmp->n++] = id; - continue; - default: - if (core_off & 1) - continue; - tmp->reg[tmp->n++] = id; - break; - } - } - - free(reg_list); - reg_list = tmp; -} - -static void prepare_vcpu_init(struct vcpu_config *c, struct kvm_vcpu_init *init) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) - if (s->capability) - init->features[s->feature / 32] |= 1 << (s->feature % 32); -} - -static void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_config *c) -{ - struct reg_sublist *s; - int feature; - - for_each_sublist(c, s) { - if (s->finalize) { - feature = s->feature; - vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature); - } - } -} - -static void check_supported(struct vcpu_config *c) -{ - struct reg_sublist *s; - - for_each_sublist(c, s) { - if (!s->capability) - continue; - - __TEST_REQUIRE(kvm_has_cap(s->capability), - "%s: %s not available, skipping tests\n", - config_name(c), s->name); - } -} - -static bool print_list; -static bool print_filtered; -static bool fixup_core_regs; - -static void run_test(struct vcpu_config *c) -{ - struct kvm_vcpu_init init = { .target = -1, }; - int new_regs = 0, missing_regs = 0, i, n; - int failed_get = 0, failed_set = 0, failed_reject = 0; - struct kvm_vcpu *vcpu; - struct kvm_vm *vm; - struct reg_sublist *s; - - check_supported(c); - - vm = vm_create_barebones(); - prepare_vcpu_init(c, &init); - vcpu = __vm_vcpu_add(vm, 0); - aarch64_vcpu_setup(vcpu, &init); - finalize_vcpu(vcpu, c); - - reg_list = vcpu_get_reg_list(vcpu); - - if (fixup_core_regs) - core_reg_fixup(); - - if (print_list || print_filtered) { - putchar('\n'); - for_each_reg(i) { - __u64 id = reg_list->reg[i]; - if ((print_list && !filter_reg(id)) || - (print_filtered && filter_reg(id))) - print_reg(c, id); - } - putchar('\n'); - return; - } - - /* - * We only test that we can get the register and then write back the - * same value. Some registers may allow other values to be written - * back, but others only allow some bits to be changed, and at least - * for ID registers set will fail if the value does not exactly match - * what was returned by get. If registers that allow other values to - * be written need to have the other values tested, then we should - * create a new set of tests for those in a new independent test - * executable. - */ - for_each_reg(i) { - uint8_t addr[2048 / 8]; - struct kvm_one_reg reg = { - .id = reg_list->reg[i], - .addr = (__u64)&addr, - }; - bool reject_reg = false; - int ret; - - ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); - if (ret) { - printf("%s: Failed to get ", config_name(c)); - print_reg(c, reg.id); - putchar('\n'); - ++failed_get; - } - - /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ - for_each_sublist(c, s) { - if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { - reject_reg = true; - ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); - if (ret != -1 || errno != EPERM) { - printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); - print_reg(c, reg.id); - putchar('\n'); - ++failed_reject; - } - break; - } - } - - if (!reject_reg) { - ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); - if (ret) { - printf("%s: Failed to set ", config_name(c)); - print_reg(c, reg.id); - putchar('\n'); - ++failed_set; - } - } - } - - for_each_sublist(c, s) - blessed_n += s->regs_n; - blessed_reg = calloc(blessed_n, sizeof(__u64)); - - n = 0; - for_each_sublist(c, s) { - for (i = 0; i < s->regs_n; ++i) - blessed_reg[n++] = s->regs[i]; - } - - for_each_new_reg(i) - ++new_regs; - - for_each_missing_reg(i) - ++missing_regs; - - if (new_regs || missing_regs) { - n = 0; - for_each_reg_filtered(i) - ++n; - - printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); - printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", - config_name(c), reg_list->n, reg_list->n - n); - } - - if (new_regs) { - printf("\n%s: There are %d new registers.\n" - "Consider adding them to the blessed reg " - "list with the following lines:\n\n", config_name(c), new_regs); - for_each_new_reg(i) - print_reg(c, reg_list->reg[i]); - putchar('\n'); - } - - if (missing_regs) { - printf("\n%s: There are %d missing registers.\n" - "The following lines are missing registers:\n\n", config_name(c), missing_regs); - for_each_missing_reg(i) - print_reg(c, blessed_reg[i]); - putchar('\n'); - } - - TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, - "%s: There are %d missing registers; " - "%d registers failed get; %d registers failed set; %d registers failed reject", - config_name(c), missing_regs, failed_get, failed_set, failed_reject); - - pr_info("%s: PASS\n", config_name(c)); - blessed_n = 0; - free(blessed_reg); - free(reg_list); - kvm_vm_free(vm); -} - -static void help(void) -{ - struct vcpu_config *c; - int i; - - printf( - "\n" - "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered] [--core-reg-fixup]\n\n" - " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" - " '<selection>' may be\n"); - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - printf( - " '%s'\n", config_name(c)); - } - - printf( - "\n" - " --list Print the register list rather than test it (requires --config)\n" - " --list-filtered Print registers that would normally be filtered out (requires --config)\n" - " --core-reg-fixup Needed when running on old kernels with broken core reg listings\n" - "\n" - ); -} - -static struct vcpu_config *parse_config(const char *config) -{ - struct vcpu_config *c; - int i; - - if (config[8] != '=') - help(), exit(1); - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - if (strcmp(config_name(c), &config[9]) == 0) - break; - } - - if (i == vcpu_configs_n) - help(), exit(1); - - return c; -} - -int main(int ac, char **av) -{ - struct vcpu_config *c, *sel = NULL; - int i, ret = 0; - pid_t pid; - - for (i = 1; i < ac; ++i) { - if (strcmp(av[i], "--core-reg-fixup") == 0) - fixup_core_regs = true; - else if (strncmp(av[i], "--config", 8) == 0) - sel = parse_config(av[i]); - else if (strcmp(av[i], "--list") == 0) - print_list = true; - else if (strcmp(av[i], "--list-filtered") == 0) - print_filtered = true; - else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) - help(), exit(0); - else - help(), exit(1); - } - - if (print_list || print_filtered) { - /* - * We only want to print the register list of a single config. - */ - if (!sel) - help(), exit(1); + prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); } - - for (i = 0; i < vcpu_configs_n; ++i) { - c = vcpu_configs[i]; - if (sel && c != sel) - continue; - - pid = fork(); - - if (!pid) { - run_test(c); - exit(0); - } else { - int wstatus; - pid_t wpid = wait(&wstatus); - TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); - if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) - ret = KSFT_FAIL; - } - } - - return ret; } /* - * The current blessed list was primed with the output of kernel version + * The original blessed list was primed with the output of kernel version * v4.15 with --core-reg-fixup and then later updated with new registers. + * (The --core-reg-fixup option and it's fixup function have been removed + * from the test, as it's unlikely to use this type of test on a kernel + * older than v5.2.) * * The blessed list is up to date with kernel version v6.4 (or so we hope) */ @@ -843,12 +459,15 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ @@ -1079,14 +698,14 @@ static __u64 pauth_generic_regs[] = { .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } -static struct vcpu_config vregs_config = { +static struct vcpu_reg_list vregs_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, {0}, }, }; -static struct vcpu_config vregs_pmu_config = { +static struct vcpu_reg_list vregs_pmu_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, @@ -1094,14 +713,14 @@ static struct vcpu_config vregs_pmu_config = { {0}, }, }; -static struct vcpu_config sve_config = { +static struct vcpu_reg_list sve_config = { .sublists = { BASE_SUBLIST, SVE_SUBLIST, {0}, }, }; -static struct vcpu_config sve_pmu_config = { +static struct vcpu_reg_list sve_pmu_config = { .sublists = { BASE_SUBLIST, SVE_SUBLIST, @@ -1109,7 +728,7 @@ static struct vcpu_config sve_pmu_config = { {0}, }, }; -static struct vcpu_config pauth_config = { +static struct vcpu_reg_list pauth_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, @@ -1117,7 +736,7 @@ static struct vcpu_config pauth_config = { {0}, }, }; -static struct vcpu_config pauth_pmu_config = { +static struct vcpu_reg_list pauth_pmu_config = { .sublists = { BASE_SUBLIST, VREGS_SUBLIST, @@ -1127,7 +746,7 @@ static struct vcpu_config pauth_pmu_config = { }, }; -static struct vcpu_config *vcpu_configs[] = { +struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, &sve_config, @@ -1135,4 +754,4 @@ static struct vcpu_config *vcpu_configs[] = { &pauth_config, &pauth_pmu_config, }; -static int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c index bef1499fb465..31f66ba97228 100644 --- a/tools/testing/selftests/kvm/aarch64/hypercalls.c +++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c @@ -8,7 +8,6 @@ * hypercalls are properly masked or unmasked to the guest when disabled or * enabled from the KVM userspace, respectively. */ - #include <errno.h> #include <linux/arm-smccc.h> #include <asm/kvm.h> @@ -105,15 +104,17 @@ static void guest_test_hvc(const struct test_hvc_info *hc_info) switch (stage) { case TEST_STAGE_HVC_IFACE_FEAT_DISABLED: case TEST_STAGE_HVC_IFACE_FALSE_INFO: - GUEST_ASSERT_3(res.a0 == SMCCC_RET_NOT_SUPPORTED, - res.a0, hc_info->func_id, hc_info->arg1); + __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); break; case TEST_STAGE_HVC_IFACE_FEAT_ENABLED: - GUEST_ASSERT_3(res.a0 != SMCCC_RET_NOT_SUPPORTED, - res.a0, hc_info->func_id, hc_info->arg1); + __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED, + "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%llx, stage = %u", + res.a0, hc_info->func_id, hc_info->arg1, stage); break; default: - GUEST_ASSERT_1(0, stage); + GUEST_FAIL("Unexpected stage = %u", stage); } } } @@ -132,7 +133,7 @@ static void guest_code(void) guest_test_hvc(false_hvc_info); break; default: - GUEST_ASSERT_1(0, stage); + GUEST_FAIL("Unexpected stage = %u", stage); } GUEST_SYNC(stage); @@ -290,10 +291,7 @@ static void test_run(void) guest_done = true; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_N(uc, "values: 0x%lx, 0x%lx; 0x%lx, stage: %u", - GUEST_ASSERT_ARG(uc, 0), - GUEST_ASSERT_ARG(uc, 1), - GUEST_ASSERT_ARG(uc, 2), stage); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Unexpected guest exit\n"); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index df10f1ffa20d..47bb914ab2fa 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -7,7 +7,6 @@ * hugetlbfs with a hole). It checks that the expected handling method is * called (e.g., uffd faults with the right address and write/read flag). */ - #define _GNU_SOURCE #include <linux/bitmap.h> #include <fcntl.h> @@ -293,12 +292,12 @@ static void guest_code(struct test_desc *test) static void no_dabt_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(false, read_sysreg(far_el1)); + GUEST_FAIL("Unexpected dabt, far_el1 = 0x%llx", read_sysreg(far_el1)); } static void no_iabt_handler(struct ex_regs *regs) { - GUEST_ASSERT_1(false, regs->pc); + GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc); } static struct uffd_args { @@ -318,7 +317,7 @@ static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg, TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING, "The only expected UFFD mode is MISSING"); - ASSERT_EQ(addr, (uint64_t)args->hva); + TEST_ASSERT_EQ(addr, (uint64_t)args->hva); pr_debug("uffd fault: addr=%p write=%d\n", (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE)); @@ -432,7 +431,7 @@ static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run) region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA); hva = (void *)region->region.userspace_addr; - ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); + TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr); memcpy(hva, run->mmio.data, run->mmio.len); events.mmio_exits += 1; @@ -631,9 +630,9 @@ static void setup_default_handlers(struct test_desc *test) static void check_event_counts(struct test_desc *test) { - ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); - ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); - ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); + TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults); + TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits); + TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs); } static void print_test_banner(enum vm_guest_mode mode, struct test_params *p) @@ -679,7 +678,7 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu, } break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index 90d854e0fcff..2e64b4856e38 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -7,7 +7,6 @@ * host to inject a specific intid via a GUEST_SYNC call, and then checks that * it received it. */ - #include <asm/kvm.h> #include <asm/kvm_para.h> #include <sys/eventfd.h> @@ -781,7 +780,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split) run_guest_cmd(vcpu, gic_fd, &inject_args, &args); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 2439c4043fed..09c116a82a84 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -128,6 +128,7 @@ static void prefault_mem(void *alias, uint64_t len) static void run_test(enum vm_guest_mode mode, void *arg) { + struct memstress_vcpu_args *vcpu_args; struct test_params *p = arg; struct uffd_desc **uffd_descs = NULL; struct timespec start; @@ -145,24 +146,24 @@ static void run_test(enum vm_guest_mode mode, void *arg) "Failed to allocate buffer for guest data pattern"); memset(guest_data_prototype, 0xAB, demand_paging_size); + if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) { + for (i = 0; i < nr_vcpus; i++) { + vcpu_args = &memstress_args.vcpu_args[i]; + prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa), + vcpu_args->pages * memstress_args.guest_page_size); + } + } + if (p->uffd_mode) { uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *)); TEST_ASSERT(uffd_descs, "Memory allocation failed"); - for (i = 0; i < nr_vcpus; i++) { - struct memstress_vcpu_args *vcpu_args; void *vcpu_hva; - void *vcpu_alias; vcpu_args = &memstress_args.vcpu_args[i]; /* Cache the host addresses of the region */ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa); - vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa); - - prefault_mem(vcpu_alias, - vcpu_args->pages * memstress_args.guest_page_size); - /* * Set up user fault fd to handle demand paging * requests. @@ -207,10 +208,11 @@ static void help(char *name) { puts(""); printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" - " [-b memory] [-s type] [-v vcpus] [-o]\n", name); + " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name); guest_modes_help(); printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); + kvm_print_vcpu_pinning_help(); printf(" -d: add a delay in usec to the User Fault\n" " FD handler to simulate demand paging\n" " overheads. Ignored without -u.\n"); @@ -228,6 +230,7 @@ static void help(char *name) int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); + const char *cpulist = NULL; struct test_params p = { .src_type = DEFAULT_VM_MEM_SRC, .partition_vcpu_memory_access = true, @@ -236,7 +239,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) { + while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -263,6 +266,9 @@ int main(int argc, char *argv[]) TEST_ASSERT(nr_vcpus <= max_vcpus, "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; + case 'c': + cpulist = optarg; + break; case 'o': p.partition_vcpu_memory_access = false; break; @@ -278,6 +284,12 @@ int main(int argc, char *argv[]) TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s"); } + if (cpulist) { + kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu, + nr_vcpus); + memstress_args.pin_vcpus = true; + } + for_each_guest_mode(run_test, &p); return 0; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index e9d6d1aecf89..d374dbcf9a53 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -136,77 +136,6 @@ struct test_params { bool random_access; }; -static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) -{ - int i; - - for (i = 0; i < slots; i++) { - int slot = MEMSTRESS_MEM_SLOT_INDEX + i; - int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0; - - vm_mem_region_set_flags(vm, slot, flags); - } -} - -static inline void enable_dirty_logging(struct kvm_vm *vm, int slots) -{ - toggle_dirty_logging(vm, slots, true); -} - -static inline void disable_dirty_logging(struct kvm_vm *vm, int slots) -{ - toggle_dirty_logging(vm, slots, false); -} - -static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots) -{ - int i; - - for (i = 0; i < slots; i++) { - int slot = MEMSTRESS_MEM_SLOT_INDEX + i; - - kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); - } -} - -static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], - int slots, uint64_t pages_per_slot) -{ - int i; - - for (i = 0; i < slots; i++) { - int slot = MEMSTRESS_MEM_SLOT_INDEX + i; - - kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); - } -} - -static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot) -{ - unsigned long **bitmaps; - int i; - - bitmaps = malloc(slots * sizeof(bitmaps[0])); - TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array."); - - for (i = 0; i < slots; i++) { - bitmaps[i] = bitmap_zalloc(pages_per_slot); - TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap."); - } - - return bitmaps; -} - -static void free_bitmaps(unsigned long *bitmaps[], int slots) -{ - int i; - - for (i = 0; i < slots; i++) - free(bitmaps[i]); - - free(bitmaps); -} - static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; @@ -236,7 +165,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) host_num_pages = vm_num_host_pages(mode, guest_num_pages); pages_per_slot = host_num_pages / p->slots; - bitmaps = alloc_bitmaps(p->slots, pages_per_slot); + bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot); if (dirty_log_manual_caps) vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, @@ -277,7 +206,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Enable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - enable_dirty_logging(vm, p->slots); + memstress_enable_dirty_logging(vm, p->slots); ts_diff = timespec_elapsed(start); pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", ts_diff.tv_sec, ts_diff.tv_nsec); @@ -306,7 +235,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - get_dirty_log(vm, bitmaps, p->slots); + memstress_get_dirty_log(vm, bitmaps, p->slots); ts_diff = timespec_elapsed(start); get_dirty_log_total = timespec_add(get_dirty_log_total, ts_diff); @@ -315,7 +244,8 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot); + memstress_clear_dirty_log(vm, bitmaps, p->slots, + pages_per_slot); ts_diff = timespec_elapsed(start); clear_dirty_log_total = timespec_add(clear_dirty_log_total, ts_diff); @@ -334,7 +264,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Disable dirty logging */ clock_gettime(CLOCK_MONOTONIC, &start); - disable_dirty_logging(vm, p->slots); + memstress_disable_dirty_logging(vm, p->slots); ts_diff = timespec_elapsed(start); pr_info("Disabling dirty logging time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); @@ -359,7 +289,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } - free_bitmaps(bitmaps, p->slots); + memstress_free_bitmaps(bitmaps, p->slots); arch_cleanup_vm(vm); memstress_destroy_vm(vm); } @@ -402,17 +332,7 @@ static void help(char *name) " so -w X means each page has an X%% chance of writing\n" " and a (100-X)%% chance of reading.\n" " (default: 100 i.e. all pages are written to.)\n"); - printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" - " values (target pCPU), one for each vCPU, plus an optional\n" - " entry for the main application task (specified via entry\n" - " <nr_vcpus + 1>). If used, entries must be provided for all\n" - " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" - " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" - " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" - " ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n" - " To leave the application task unpinned, drop the final entry:\n\n" - " ./dirty_log_perf_test -v 3 -c 22,23,24\n\n" - " (default: no pinning)\n"); + kvm_print_vcpu_pinning_help(); puts(""); exit(0); } diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c new file mode 100644 index 000000000000..be7bf5224434 --- /dev/null +++ b/tools/testing/selftests/kvm/get-reg-list.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * The blessed list should be created from the oldest possible kernel. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +static struct kvm_reg_list *reg_list; +static __u64 *blessed_reg, blessed_n; + +extern struct vcpu_reg_list *vcpu_configs[]; +extern int vcpu_configs_n; + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_reg_filtered(i) \ + for_each_reg(i) \ + if (!filter_reg(reg_list->reg[i])) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \ + if (check_supported_reg(vcpu, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for_each_reg_filtered(i) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + +#define for_each_present_blessed_reg(i) \ + for_each_reg(i) \ + if (find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + +static const char *config_name(struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + int len = 0; + + if (c->name) + return c->name; + + for_each_sublist(c, s) + len += strlen(s->name) + 1; + + c->name = malloc(len); + + len = 0; + for_each_sublist(c, s) { + if (!strcmp(s->name, "base")) + continue; + strcat(c->name + len, s->name); + len += strlen(s->name) + 1; + c->name[len - 1] = '+'; + } + c->name[len - 1] = '\0'; + + return c->name; +} + +bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg) +{ + return true; +} + +bool __weak filter_reg(__u64 reg) +{ + return false; +} + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +void __weak print_reg(const char *prefix, __u64 id) +{ + printf("\t0x%llx,\n", id); +} + +bool __weak check_reject_set(int err) +{ + return true; +} + +void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ +} + +#ifdef __aarch64__ +static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init) +{ + struct vcpu_reg_sublist *s; + + for_each_sublist(c, s) + if (s->capability) + init->features[s->feature / 32] |= 1 << (s->feature % 32); +} + +static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) +{ + struct kvm_vcpu_init init = { .target = -1, }; + struct kvm_vcpu *vcpu; + + prepare_vcpu_init(c, &init); + vcpu = __vm_vcpu_add(vm, 0); + aarch64_vcpu_setup(vcpu, &init); + + return vcpu; +} +#else +static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm) +{ + return __vm_vcpu_add(vm, 0); +} +#endif + +static void check_supported(struct vcpu_reg_list *c) +{ + struct vcpu_reg_sublist *s; + + for_each_sublist(c, s) { + if (!s->capability) + continue; + + __TEST_REQUIRE(kvm_has_cap(s->capability), + "%s: %s not available, skipping tests\n", + config_name(c), s->name); + } +} + +static bool print_list; +static bool print_filtered; + +static void run_test(struct vcpu_reg_list *c) +{ + int new_regs = 0, missing_regs = 0, i, n; + int failed_get = 0, failed_set = 0, failed_reject = 0; + int skipped_set = 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct vcpu_reg_sublist *s; + + check_supported(c); + + vm = vm_create_barebones(); + vcpu = vcpu_config_get_vcpu(c, vm); + finalize_vcpu(vcpu, c); + + reg_list = vcpu_get_reg_list(vcpu); + + if (print_list || print_filtered) { + putchar('\n'); + for_each_reg(i) { + __u64 id = reg_list->reg[i]; + if ((print_list && !filter_reg(id)) || + (print_filtered && filter_reg(id))) + print_reg(config_name(c), id); + } + putchar('\n'); + return; + } + + for_each_sublist(c, s) + blessed_n += s->regs_n; + blessed_reg = calloc(blessed_n, sizeof(__u64)); + + n = 0; + for_each_sublist(c, s) { + for (i = 0; i < s->regs_n; ++i) + blessed_reg[n++] = s->regs[i]; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + * + * Only do the get/set tests on present, blessed list registers, + * since we don't know the capabilities of any new registers. + */ + for_each_present_blessed_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + bool reject_reg = false, skip_reg = false; + int ret; + + ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr); + if (ret) { + printf("%s: Failed to get ", config_name(c)); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_get; + } + + for_each_sublist(c, s) { + /* rejects_set registers are rejected for set operation */ + if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) { + reject_reg = true; + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + if (ret != -1 || !check_reject_set(errno)) { + printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_reject; + } + break; + } + + /* skips_set registers are skipped for set operation */ + if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) { + skip_reg = true; + ++skipped_set; + break; + } + } + + if (!reject_reg && !skip_reg) { + ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, ®); + if (ret) { + printf("%s: Failed to set ", config_name(c)); + print_reg(config_name(c), reg.id); + putchar('\n'); + ++failed_set; + } + } + } + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + n = 0; + for_each_reg_filtered(i) + ++n; + + printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n); + printf("%s: Number registers: %5lld (includes %lld filtered registers)\n", + config_name(c), reg_list->n, reg_list->n - n); + } + + if (new_regs) { + printf("\n%s: There are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", config_name(c), new_regs); + for_each_new_reg(i) + print_reg(config_name(c), reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\n%s: There are %d missing registers.\n" + "The following lines are missing registers:\n\n", config_name(c), missing_regs); + for_each_missing_reg(i) + print_reg(config_name(c), blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "%s: There are %d missing registers; %d registers failed get; " + "%d registers failed set; %d registers failed reject; %d registers skipped set", + config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set); + + pr_info("%s: PASS\n", config_name(c)); + blessed_n = 0; + free(blessed_reg); + free(reg_list); + kvm_vm_free(vm); +} + +static void help(void) +{ + struct vcpu_reg_list *c; + int i; + + printf( + "\n" + "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n" + " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n" + " '<selection>' may be\n"); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + printf( + " '%s'\n", config_name(c)); + } + + printf( + "\n" + " --list Print the register list rather than test it (requires --config)\n" + " --list-filtered Print registers that would normally be filtered out (requires --config)\n" + "\n" + ); +} + +static struct vcpu_reg_list *parse_config(const char *config) +{ + struct vcpu_reg_list *c = NULL; + int i; + + if (config[8] != '=') + help(), exit(1); + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (strcmp(config_name(c), &config[9]) == 0) + break; + } + + if (i == vcpu_configs_n) + help(), exit(1); + + return c; +} + +int main(int ac, char **av) +{ + struct vcpu_reg_list *c, *sel = NULL; + int i, ret = 0; + pid_t pid; + + for (i = 1; i < ac; ++i) { + if (strncmp(av[i], "--config", 8) == 0) + sel = parse_config(av[i]); + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else if (strcmp(av[i], "--list-filtered") == 0) + print_filtered = true; + else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0) + help(), exit(0); + else + help(), exit(1); + } + + if (print_list || print_filtered) { + /* + * We only want to print the register list of a single config. + */ + if (!sel) + help(), exit(1); + } + + for (i = 0; i < vcpu_configs_n; ++i) { + c = vcpu_configs[i]; + if (sel && c != sel) + continue; + + pid = fork(); + + if (!pid) { + run_test(c); + exit(0); + } else { + int wstatus; + pid_t wpid = wait(&wstatus); + TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return"); + if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP) + ret = KSFT_FAIL; + } + } + + return ret; +} diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c new file mode 100644 index 000000000000..41230b746190 --- /dev/null +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * A test for GUEST_PRINTF + * + * Copyright 2022, Google, Inc. and/or its affiliates. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +struct guest_vals { + uint64_t a; + uint64_t b; + uint64_t type; +}; + +static struct guest_vals vals; + +/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */ +#define TYPE_LIST \ +TYPE(test_type_i64, I64, "%ld", int64_t) \ +TYPE(test_type_u64, U64u, "%lu", uint64_t) \ +TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \ +TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \ +TYPE(test_type_u32, U32u, "%u", uint32_t) \ +TYPE(test_type_x32, U32x, "0x%x", uint32_t) \ +TYPE(test_type_X32, U32X, "0x%X", uint32_t) \ +TYPE(test_type_int, INT, "%d", int) \ +TYPE(test_type_char, CHAR, "%c", char) \ +TYPE(test_type_str, STR, "'%s'", const char *) \ +TYPE(test_type_ptr, PTR, "%p", uintptr_t) + +enum args_type { +#define TYPE(fn, ext, fmt_t, T) TYPE_##ext, + TYPE_LIST +#undef TYPE +}; + +static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, + const char *expected_assert); + +#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) \ +const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t; \ +const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead"; \ +static void fn(struct kvm_vcpu *vcpu, T a, T b) \ +{ \ + char expected_printf[UCALL_BUFFER_LEN]; \ + char expected_assert[UCALL_BUFFER_LEN]; \ + \ + snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \ + snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \ + vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \ + sync_global_to_guest(vcpu->vm, vals); \ + run_test(vcpu, expected_printf, expected_assert); \ +} + +#define TYPE(fn, ext, fmt_t, T) \ + BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) + TYPE_LIST +#undef TYPE + +static void guest_code(void) +{ + while (1) { + switch (vals.type) { +#define TYPE(fn, ext, fmt_t, T) \ + case TYPE_##ext: \ + GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b); \ + __GUEST_ASSERT(vals.a == vals.b, \ + ASSERT_FMT_##ext, vals.a, vals.b); \ + break; + TYPE_LIST +#undef TYPE + default: + GUEST_SYNC(vals.type); + } + + GUEST_DONE(); + } +} + +/* + * Unfortunately this gets a little messy because 'assert_msg' doesn't + * just contains the matching string, it also contains additional assert + * info. Fortunately the part that matches should be at the very end of + * 'assert_msg'. + */ +static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) +{ + int len_str = strlen(assert_msg); + int len_substr = strlen(expected_assert_msg); + int offset = len_str - len_substr; + + TEST_ASSERT(len_substr <= len_str, + "Expected '%s' to be a substring of '%s'\n", + assert_msg, expected_assert_msg); + + TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0, + "Unexpected mismatch. Expected: '%s', got: '%s'", + expected_assert_msg, &assert_msg[offset]); +} + +static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, + const char *expected_assert) +{ + struct kvm_run *run = vcpu->run; + struct ucall uc; + + while (1) { + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]); + break; + case UCALL_PRINTF: + TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0, + "Unexpected mismatch. Expected: '%s', got: '%s'", + expected_printf, uc.buffer); + break; + case UCALL_ABORT: + ucall_abort(uc.buffer, expected_assert); + break; + case UCALL_DONE: + return; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } +} + +static void guest_code_limits(void) +{ + char test_str[UCALL_BUFFER_LEN + 10]; + + memset(test_str, 'a', sizeof(test_str)); + test_str[sizeof(test_str) - 1] = 0; + + GUEST_PRINTF("%s", test_str); +} + +static void test_limits(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + struct ucall uc; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); + run = vcpu->run; + vcpu_run(vcpu); + + TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, exit_reason_str(run->exit_reason)); + + TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT, + "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)\n", + uc.cmd, UCALL_ABORT); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + test_type_i64(vcpu, -1, -1); + test_type_i64(vcpu, -1, 1); + test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + test_type_u32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_u32(vcpu, 0x90abcdef, 0x90abcdee); + test_type_x32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_x32(vcpu, 0x90abcdef, 0x90abcdee); + test_type_X32(vcpu, 0x90abcdef, 0x90abcdef); + test_type_X32(vcpu, 0x90abcdef, 0x90abcdee); + + test_type_int(vcpu, -1, -1); + test_type_int(vcpu, -1, 1); + test_type_int(vcpu, 1, 1); + + test_type_char(vcpu, 'a', 'a'); + test_type_char(vcpu, 'a', 'A'); + test_type_char(vcpu, 'a', 'b'); + + test_type_str(vcpu, "foo", "foo"); + test_type_str(vcpu, "foo", "bar"); + + test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef); + test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee); + + kvm_vm_free(vm); + + test_limits(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h index cb7c03de3a21..b3e97525cb55 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h @@ -41,7 +41,7 @@ static inline uint64_t timer_get_cntct(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntpct_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ @@ -58,7 +58,7 @@ static inline void timer_set_cval(enum arch_timer timer, uint64_t cval) write_sysreg(cval, cntp_cval_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -72,7 +72,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntp_cval_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ @@ -89,7 +89,7 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) write_sysreg(tval, cntp_tval_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -105,7 +105,7 @@ static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) write_sysreg(ctl, cntp_ctl_el0); break; default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } isb(); @@ -119,7 +119,7 @@ static inline uint32_t timer_get_ctl(enum arch_timer timer) case PHYSICAL: return read_sysreg(cntp_ctl_el0); default: - GUEST_ASSERT_1(0, timer); + GUEST_FAIL("Unexpected timer type = %u", timer); } /* We should not reach here */ diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h new file mode 100644 index 000000000000..4b68f37efd36 --- /dev/null +++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util_base.h" + +#define UCALL_EXIT_REASON KVM_EXIT_MMIO + +/* + * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each + * VM), it must not be accessed from host code. + */ +extern vm_vaddr_t *ucall_exit_mmio_addr; + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + WRITE_ONCE(*ucall_exit_mmio_addr, uc); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h index a089c356f354..a18db6a7b3cf 100644 --- a/tools/testing/selftests/kvm/include/kvm_util_base.h +++ b/tools/testing/selftests/kvm/include/kvm_util_base.h @@ -15,6 +15,7 @@ #include <linux/kernel.h> #include <linux/kvm.h> #include "linux/rbtree.h" +#include <linux/types.h> #include <asm/atomic.h> @@ -124,6 +125,26 @@ struct kvm_vm { uint32_t memslots[NR_MEM_REGIONS]; }; +struct vcpu_reg_sublist { + const char *name; + long capability; + int feature; + bool finalize; + __u64 *regs; + __u64 regs_n; + __u64 *rejects_set; + __u64 rejects_set_n; + __u64 *skips_set; + __u64 skips_set_n; +}; + +struct vcpu_reg_list { + char *name; + struct vcpu_reg_sublist sublists[]; +}; + +#define for_each_sublist(c, s) \ + for ((s) = &(c)->sublists[0]; (s)->regs; ++(s)) #define kvm_for_each_vcpu(vm, i, vcpu) \ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \ @@ -362,8 +383,10 @@ static inline void read_stats_header(int stats_fd, struct kvm_stats_header *head { ssize_t ret; - ret = read(stats_fd, header, sizeof(*header)); - TEST_ASSERT(ret == sizeof(*header), "Read stats header"); + ret = pread(stats_fd, header, sizeof(*header), 0); + TEST_ASSERT(ret == sizeof(*header), + "Failed to read '%lu' header bytes, ret = '%ld'", + sizeof(*header), ret); } struct kvm_stats_desc *read_stats_descriptors(int stats_fd, @@ -733,6 +756,7 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int nr_vcpus); diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h index 72e3e358ef7b..ce4e603050ea 100644 --- a/tools/testing/selftests/kvm/include/memstress.h +++ b/tools/testing/selftests/kvm/include/memstress.h @@ -72,4 +72,12 @@ void memstress_guest_code(uint32_t vcpu_id); uint64_t memstress_nested_pages(int nr_vcpus); void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]); +void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots); +void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots); +void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots); +void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], + int slots, uint64_t pages_per_slot); +unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot); +void memstress_free_bitmaps(unsigned long *bitmaps[], int slots); + #endif /* SELFTEST_KVM_MEMSTRESS_H */ diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h index d00d213c3805..5b62a3d2aa9b 100644 --- a/tools/testing/selftests/kvm/include/riscv/processor.h +++ b/tools/testing/selftests/kvm/include/riscv/processor.h @@ -38,6 +38,9 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t idx, KVM_REG_RISCV_TIMER_REG(name), \ KVM_REG_SIZE_U64) +#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \ + idx, KVM_REG_SIZE_ULONG) + /* L3 index Bit[47:39] */ #define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL #define PGTBL_L3_INDEX_SHIFT 39 diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h new file mode 100644 index 000000000000..be46eb32ec27 --- /dev/null +++ b/tools/testing/selftests/kvm/include/riscv/ucall.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "processor.h" + +#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, + KVM_RISCV_SELFTESTS_SBI_UCALL, + uc, 0, 0, 0, 0, 0); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h new file mode 100644 index 000000000000..b231bf2e49d6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/ucall.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util_base.h" + +#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +static inline void ucall_arch_do_ucall(vm_vaddr_t uc) +{ + /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ + asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index a6e9f215ce70..7e614adc6cf4 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -53,14 +53,13 @@ void test_assert(bool exp, const char *exp_str, #define TEST_ASSERT(e, fmt, ...) \ test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__) -#define ASSERT_EQ(a, b) do { \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - TEST_ASSERT(__a == __b, \ - "ASSERT_EQ(%s, %s) failed.\n" \ - "\t%s is %#lx\n" \ - "\t%s is %#lx", \ - #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \ +#define TEST_ASSERT_EQ(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__, \ + "%#lx != %#lx (%s != %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b);\ } while (0) #define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do { \ @@ -186,4 +185,9 @@ static inline uint32_t atoi_non_negative(const char *name, const char *num_str) return num; } +int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args); +int guest_snprintf(char *buf, int n, const char *fmt, ...); + +char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1))); + #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h index 1a6aaef5ccae..112bc1da732a 100644 --- a/tools/testing/selftests/kvm/include/ucall_common.h +++ b/tools/testing/selftests/kvm/include/ucall_common.h @@ -7,21 +7,25 @@ #ifndef SELFTEST_KVM_UCALL_COMMON_H #define SELFTEST_KVM_UCALL_COMMON_H #include "test_util.h" +#include "ucall.h" /* Common ucalls */ enum { UCALL_NONE, UCALL_SYNC, UCALL_ABORT, + UCALL_PRINTF, UCALL_DONE, UCALL_UNHANDLED, }; #define UCALL_MAX_ARGS 7 +#define UCALL_BUFFER_LEN 1024 struct ucall { uint64_t cmd; uint64_t args[UCALL_MAX_ARGS]; + char buffer[UCALL_BUFFER_LEN]; /* Host virtual address of this struct. */ struct ucall *hva; @@ -32,8 +36,12 @@ void ucall_arch_do_ucall(vm_vaddr_t uc); void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu); void ucall(uint64_t cmd, int nargs, ...); +void ucall_fmt(uint64_t cmd, const char *fmt, ...); +void ucall_assert(uint64_t cmd, const char *exp, const char *file, + unsigned int line, const char *fmt, ...); uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc); void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); +int ucall_nr_pages_required(uint64_t page_size); /* * Perform userspace call without any associated data. This bare call avoids @@ -46,8 +54,11 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa); #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4) #define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage) +#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args) #define GUEST_DONE() ucall(UCALL_DONE, 0) +#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer) + enum guest_assert_builtin_args { GUEST_ERROR_STRING, GUEST_FILE, @@ -55,70 +66,41 @@ enum guest_assert_builtin_args { GUEST_ASSERT_BUILTIN_NARGS }; -#define __GUEST_ASSERT(_condition, _condstr, _nargs, _args...) \ -do { \ - if (!(_condition)) \ - ucall(UCALL_ABORT, GUEST_ASSERT_BUILTIN_NARGS + _nargs, \ - "Failed guest assert: " _condstr, \ - __FILE__, __LINE__, ##_args); \ +#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...) \ +do { \ + if (!(_condition)) \ + ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args); \ } while (0) -#define GUEST_ASSERT(_condition) \ - __GUEST_ASSERT(_condition, #_condition, 0, 0) - -#define GUEST_ASSERT_1(_condition, arg1) \ - __GUEST_ASSERT(_condition, #_condition, 1, (arg1)) - -#define GUEST_ASSERT_2(_condition, arg1, arg2) \ - __GUEST_ASSERT(_condition, #_condition, 2, (arg1), (arg2)) - -#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \ - __GUEST_ASSERT(_condition, #_condition, 3, (arg1), (arg2), (arg3)) - -#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \ - __GUEST_ASSERT(_condition, #_condition, 4, (arg1), (arg2), (arg3), (arg4)) - -#define GUEST_ASSERT_EQ(a, b) __GUEST_ASSERT((a) == (b), #a " == " #b, 2, a, b) +#define __GUEST_ASSERT(_condition, _fmt, _args...) \ + ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args) -#define __REPORT_GUEST_ASSERT(_ucall, fmt, _args...) \ - TEST_FAIL("%s at %s:%ld\n" fmt, \ - (const char *)(_ucall).args[GUEST_ERROR_STRING], \ - (const char *)(_ucall).args[GUEST_FILE], \ - (_ucall).args[GUEST_LINE], \ - ##_args) +#define GUEST_ASSERT(_condition) \ + __GUEST_ASSERT(_condition, #_condition) -#define GUEST_ASSERT_ARG(ucall, i) ((ucall).args[GUEST_ASSERT_BUILTIN_NARGS + i]) +#define GUEST_FAIL(_fmt, _args...) \ + ucall_assert(UCALL_ABORT, "Unconditional guest failure", \ + __FILE__, __LINE__, _fmt, ##_args) -#define REPORT_GUEST_ASSERT(ucall) \ - __REPORT_GUEST_ASSERT((ucall), "") - -#define REPORT_GUEST_ASSERT_1(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0)) - -#define REPORT_GUEST_ASSERT_2(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1)) - -#define REPORT_GUEST_ASSERT_3(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1), \ - GUEST_ASSERT_ARG((ucall), 2)) +#define GUEST_ASSERT_EQ(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b); \ +} while (0) -#define REPORT_GUEST_ASSERT_4(ucall, fmt) \ - __REPORT_GUEST_ASSERT((ucall), \ - fmt, \ - GUEST_ASSERT_ARG((ucall), 0), \ - GUEST_ASSERT_ARG((ucall), 1), \ - GUEST_ASSERT_ARG((ucall), 2), \ - GUEST_ASSERT_ARG((ucall), 3)) +#define GUEST_ASSERT_NE(a, b) \ +do { \ + typeof(a) __a = (a); \ + typeof(b) __b = (b); \ + ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)", \ + (unsigned long)(__a), (unsigned long)(__b), #a, #b); \ +} while (0) -#define REPORT_GUEST_ASSERT_N(ucall, fmt, args...) \ - __REPORT_GUEST_ASSERT((ucall), fmt, ##args) +#define REPORT_GUEST_ASSERT(ucall) \ + test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING], \ + (const char *)(ucall).args[GUEST_FILE], \ + (ucall).args[GUEST_LINE], "%s", (ucall).buffer) #endif /* SELFTEST_KVM_UCALL_COMMON_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index aa434c8f19c5..4fd042112526 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -239,7 +239,12 @@ struct kvm_x86_cpu_property { #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31) #define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7) #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15) +#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23) #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31) +#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31) +#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4) +#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12) #define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31) #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31) diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h new file mode 100644 index 000000000000..06b244bd06ee --- /dev/null +++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef SELFTEST_KVM_UCALL_H +#define SELFTEST_KVM_UCALL_H + +#include "kvm_util_base.h" + +#define UCALL_EXIT_REASON KVM_EXIT_IO + +static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) +{ +} + +#endif diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c index a7001e29dc06..698c1cfa3111 100644 --- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c +++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c @@ -43,8 +43,10 @@ static void stats_test(int stats_fd) id = malloc(header.name_size); TEST_ASSERT(id, "Allocate memory for id string"); - ret = read(stats_fd, id, header.name_size); - TEST_ASSERT(ret == header.name_size, "Read id string"); + ret = pread(stats_fd, id, header.name_size, sizeof(header)); + TEST_ASSERT(ret == header.name_size, + "Expected header size '%u', read '%lu' bytes", + header.name_size, ret); /* Check id string, that should start with "kvm" */ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size, @@ -165,23 +167,7 @@ static void stats_test(int stats_fd) free(stats_data); free(stats_desc); free(id); -} - - -static void vm_stats_test(struct kvm_vm *vm) -{ - int stats_fd = vm_get_stats_fd(vm); - - stats_test(stats_fd); - close(stats_fd); - TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed"); -} - -static void vcpu_stats_test(struct kvm_vcpu *vcpu) -{ - int stats_fd = vcpu_get_stats_fd(vcpu); - stats_test(stats_fd); close(stats_fd); TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed"); } @@ -199,6 +185,7 @@ static void vcpu_stats_test(struct kvm_vcpu *vcpu) int main(int argc, char *argv[]) { + int vm_stats_fds, *vcpu_stats_fds; int i, j; struct kvm_vcpu **vcpus; struct kvm_vm **vms; @@ -231,23 +218,58 @@ int main(int argc, char *argv[]) vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu); TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers"); + /* + * Not per-VM as the array is populated, used, and invalidated within a + * single for-loop iteration. + */ + vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds)); + TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds"); + for (i = 0; i < max_vm; ++i) { vms[i] = vm_create_barebones(); for (j = 0; j < max_vcpu; ++j) vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j); } - /* Check stats read for every VM and VCPU */ + /* + * Check stats read for every VM and vCPU, with a variety of flavors. + * Note, stats_test() closes the passed in stats fd. + */ for (i = 0; i < max_vm; ++i) { - vm_stats_test(vms[i]); + /* + * Verify that creating multiple userspace references to a + * single stats file works and doesn't cause explosions. + */ + vm_stats_fds = vm_get_stats_fd(vms[i]); + stats_test(dup(vm_stats_fds)); + + /* Verify userspace can instantiate multiple stats files. */ + stats_test(vm_get_stats_fd(vms[i])); + + for (j = 0; j < max_vcpu; ++j) { + vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]); + stats_test(dup(vcpu_stats_fds[j])); + stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j])); + } + + /* + * Close the VM fd and redo the stats tests. KVM should gift a + * reference (to the VM) to each stats fd, i.e. stats should + * still be accessible even after userspace has put its last + * _direct_ reference to the VM. + */ + kvm_vm_free(vms[i]); + + stats_test(vm_stats_fds); for (j = 0; j < max_vcpu; ++j) - vcpu_stats_test(vcpus[i * max_vcpu + j]); + stats_test(vcpu_stats_fds[j]); + ksft_test_result_pass("vm%i\n", i); } - for (i = 0; i < max_vm; ++i) - kvm_vm_free(vms[i]); free(vms); + free(vcpus); + free(vcpu_stats_fds); ksft_finished(); /* Print results and exit() accordingly */ } diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index b3b00be1ef82..69f26d80c821 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -200,7 +200,7 @@ static void *vcpu_worker(void *data) if (READ_ONCE(host_quit)) return NULL; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); ret = _vcpu_run(vcpu); ts_diff = timespec_elapsed(start); @@ -367,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) /* Test the stage of KVM creating mappings */ *current_stage = KVM_CREATE_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); @@ -380,7 +380,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) *current_stage = KVM_UPDATE_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); @@ -392,7 +392,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) *current_stage = KVM_ADJUST_MAPPINGS; - clock_gettime(CLOCK_MONOTONIC_RAW, &start); + clock_gettime(CLOCK_MONOTONIC, &start); vcpus_complete_new_stage(*current_stage); ts_diff = timespec_elapsed(start); diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index f212bd8ab93d..ddab0ce89d4d 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -6,11 +6,7 @@ */ #include "kvm_util.h" -/* - * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each - * VM), it must not be accessed from host code. - */ -static vm_vaddr_t *ucall_exit_mmio_addr; +vm_vaddr_t *ucall_exit_mmio_addr; void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) { @@ -23,11 +19,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva); } -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - WRITE_ONCE(*ucall_exit_mmio_addr, uc); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c new file mode 100644 index 000000000000..c4a69d8aeb68 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c @@ -0,0 +1,307 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "test_util.h" +#include "kvm_util.h" +#include "ucall_common.h" + +#define APPEND_BUFFER_SAFE(str, end, v) \ +do { \ + GUEST_ASSERT(str < end); \ + *str++ = (v); \ +} while (0) + +static int isdigit(int ch) +{ + return (ch >= '0') && (ch <= '9'); +} + +static int skip_atoi(const char **s) +{ + int i = 0; + + while (isdigit(**s)) + i = i * 10 + *((*s)++) - '0'; + return i; +} + +#define ZEROPAD 1 /* pad with zero */ +#define SIGN 2 /* unsigned/signed long */ +#define PLUS 4 /* show plus */ +#define SPACE 8 /* space if plus */ +#define LEFT 16 /* left justified */ +#define SMALL 32 /* Must be 32 == 0x20 */ +#define SPECIAL 64 /* 0x */ + +#define __do_div(n, base) \ +({ \ + int __res; \ + \ + __res = ((uint64_t) n) % (uint32_t) base; \ + n = ((uint64_t) n) / (uint32_t) base; \ + __res; \ +}) + +static char *number(char *str, const char *end, long num, int base, int size, + int precision, int type) +{ + /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ + static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ + + char tmp[66]; + char c, sign, locase; + int i; + + /* + * locase = 0 or 0x20. ORing digits or letters with 'locase' + * produces same digits or (maybe lowercased) letters + */ + locase = (type & SMALL); + if (type & LEFT) + type &= ~ZEROPAD; + if (base < 2 || base > 16) + return NULL; + c = (type & ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & PLUS) { + sign = '+'; + size--; + } else if (type & SPACE) { + sign = ' '; + size--; + } + } + if (type & SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++] = '0'; + else + while (num != 0) + tmp[i++] = (digits[__do_div(num, base)] | locase); + if (i > precision) + precision = i; + size -= precision; + if (!(type & (ZEROPAD + LEFT))) + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + if (sign) + APPEND_BUFFER_SAFE(str, end, sign); + if (type & SPECIAL) { + if (base == 8) + APPEND_BUFFER_SAFE(str, end, '0'); + else if (base == 16) { + APPEND_BUFFER_SAFE(str, end, '0'); + APPEND_BUFFER_SAFE(str, end, 'x'); + } + } + if (!(type & LEFT)) + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, c); + while (i < precision--) + APPEND_BUFFER_SAFE(str, end, '0'); + while (i-- > 0) + APPEND_BUFFER_SAFE(str, end, tmp[i]); + while (size-- > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + + return str; +} + +int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args) +{ + char *str, *end; + const char *s; + uint64_t num; + int i, base; + int len; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* + * min. # of digits for integers; max + * number of chars for from string + */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + + end = buf + n; + GUEST_ASSERT(buf < end); + GUEST_ASSERT(n > 0); + + for (str = buf; *fmt; ++fmt) { + if (*fmt != '%') { + APPEND_BUFFER_SAFE(str, end, *fmt); + continue; + } + + /* process flags */ + flags = 0; +repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': + flags |= LEFT; + goto repeat; + case '+': + flags |= PLUS; + goto repeat; + case ' ': + flags |= SPACE; + goto repeat; + case '#': + flags |= SPECIAL; + goto repeat; + case '0': + flags |= ZEROPAD; + goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & LEFT)) + while (--field_width > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + APPEND_BUFFER_SAFE(str, end, + (uint8_t)va_arg(args, int)); + while (--field_width > 0) + APPEND_BUFFER_SAFE(str, end, ' '); + continue; + + case 's': + s = va_arg(args, char *); + len = strnlen(s, precision); + + if (!(flags & LEFT)) + while (len < field_width--) + APPEND_BUFFER_SAFE(str, end, ' '); + for (i = 0; i < len; ++i) + APPEND_BUFFER_SAFE(str, end, *s++); + while (len < field_width--) + APPEND_BUFFER_SAFE(str, end, ' '); + continue; + + case 'p': + if (field_width == -1) { + field_width = 2 * sizeof(void *); + flags |= SPECIAL | SMALL | ZEROPAD; + } + str = number(str, end, + (uint64_t)va_arg(args, void *), 16, + field_width, precision, flags); + continue; + + case 'n': + if (qualifier == 'l') { + long *ip = va_arg(args, long *); + *ip = (str - buf); + } else { + int *ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + APPEND_BUFFER_SAFE(str, end, '%'); + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'x': + flags |= SMALL; + case 'X': + base = 16; + break; + + case 'd': + case 'i': + flags |= SIGN; + case 'u': + break; + + default: + APPEND_BUFFER_SAFE(str, end, '%'); + if (*fmt) + APPEND_BUFFER_SAFE(str, end, *fmt); + else + --fmt; + continue; + } + if (qualifier == 'l') + num = va_arg(args, uint64_t); + else if (qualifier == 'h') { + num = (uint16_t)va_arg(args, int); + if (flags & SIGN) + num = (int16_t)num; + } else if (flags & SIGN) + num = va_arg(args, int); + else + num = va_arg(args, uint32_t); + str = number(str, end, num, base, field_width, precision, flags); + } + + GUEST_ASSERT(str < end); + *str = '\0'; + return str - buf; +} + +int guest_snprintf(char *buf, int n, const char *fmt, ...) +{ + va_list va; + int len; + + va_start(va, fmt); + len = guest_vsnprintf(buf, n, fmt, va); + va_end(va); + + return len; +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 298c4372fb1a..7a8af1821f5d 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -312,6 +312,7 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus, uint64_t extra_mem_pages) { + uint64_t page_size = vm_guest_mode_params[mode].page_size; uint64_t nr_pages; TEST_ASSERT(nr_runnable_vcpus, @@ -340,6 +341,9 @@ static uint64_t vm_nr_pages_required(enum vm_guest_mode mode, */ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2; + /* Account for the number of pages needed by ucall. */ + nr_pages += ucall_nr_pages_required(page_size); + return vm_adjust_num_guest_pages(mode, nr_pages); } @@ -494,6 +498,23 @@ static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) return pcpu; } +void kvm_print_vcpu_pinning_help(void) +{ + const char *name = program_invocation_name; + + printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n" + " values (target pCPU), one for each vCPU, plus an optional\n" + " entry for the main application task (specified via entry\n" + " <nr_vcpus + 1>). If used, entries must be provided for all\n" + " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n" + " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n" + " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n" + " %s -v 3 -c 22,23,24,50\n\n" + " To leave the application task unpinned, drop the final entry:\n\n" + " %s -v 3 -c 22,23,24\n\n" + " (default: no pinning)\n", name, name); +} + void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int nr_vcpus) { @@ -977,7 +998,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, if (src_type == VM_MEM_SRC_ANONYMOUS_THP) alignment = max(backing_src_pagesz, alignment); - ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); + TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz)); /* Add enough memory to align up if necessary */ if (alignment > 1) diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 5f1d3173c238..df457452d146 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -5,6 +5,7 @@ #define _GNU_SOURCE #include <inttypes.h> +#include <linux/bitmap.h> #include "kvm_util.h" #include "memstress.h" @@ -64,6 +65,9 @@ void memstress_guest_code(uint32_t vcpu_idx) GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx); while (true) { + for (i = 0; i < sizeof(memstress_args); i += args->guest_page_size) + (void) *((volatile char *)args + i); + for (i = 0; i < pages; i++) { if (args->random_access) page = guest_random_u32(&rand_state) % pages; @@ -320,3 +324,74 @@ void memstress_join_vcpu_threads(int nr_vcpus) for (i = 0; i < nr_vcpus; i++) pthread_join(vcpu_threads[i].thread, NULL); } + +static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable) +{ + int i; + + for (i = 0; i < slots; i++) { + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; + int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0; + + vm_mem_region_set_flags(vm, slot, flags); + } +} + +void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots) +{ + toggle_dirty_logging(vm, slots, true); +} + +void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots) +{ + toggle_dirty_logging(vm, slots, false); +} + +void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots) +{ + int i; + + for (i = 0; i < slots; i++) { + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; + + kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); + } +} + +void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], + int slots, uint64_t pages_per_slot) +{ + int i; + + for (i = 0; i < slots; i++) { + int slot = MEMSTRESS_MEM_SLOT_INDEX + i; + + kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); + } +} + +unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot) +{ + unsigned long **bitmaps; + int i; + + bitmaps = malloc(slots * sizeof(bitmaps[0])); + TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array."); + + for (i = 0; i < slots; i++) { + bitmaps[i] = bitmap_zalloc(pages_per_slot); + TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap."); + } + + return bitmaps; +} + +void memstress_free_bitmaps(unsigned long *bitmaps[], int slots) +{ + int i; + + for (i = 0; i < slots; i++) + free(bitmaps[i]); + + free(bitmaps); +} diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c index 9a3476a2dfca..fe6d1004f018 100644 --- a/tools/testing/selftests/kvm/lib/riscv/ucall.c +++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c @@ -10,10 +10,6 @@ #include "kvm_util.h" #include "processor.h" -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg1, unsigned long arg2, unsigned long arg3, unsigned long arg4, @@ -40,13 +36,6 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, return ret; } -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT, - KVM_RISCV_SELFTESTS_SBI_UCALL, - uc, 0, 0, 0, 0, 0); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index a7f02dc372cf..cca98734653d 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -6,16 +6,6 @@ */ #include "kvm_util.h" -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - -void ucall_arch_do_ucall(vm_vaddr_t uc) -{ - /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */ - asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory"); -} - void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index 50e0cf41a7dd..88cb6b84e6f3 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep) tmp = node_prev(s, nodep); node_rm(s, nodep); - nodep = NULL; nodep = tmp; reduction_performed = true; diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c index 632398adc229..5d1c87277c49 100644 --- a/tools/testing/selftests/kvm/lib/string_override.c +++ b/tools/testing/selftests/kvm/lib/string_override.c @@ -37,3 +37,12 @@ void *memset(void *s, int c, size_t count) *xs++ = c; return s; } + +size_t strnlen(const char *s, size_t count) +{ + const char *sc; + + for (sc = s; count-- && *sc != '\0'; ++sc) + /* nothing */; + return sc - s; +} diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index b772193f6c18..5d7f28b02d73 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -5,6 +5,9 @@ * Copyright (C) 2020, Google LLC. */ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdarg.h> #include <assert.h> #include <ctype.h> #include <limits.h> @@ -377,3 +380,15 @@ int atoi_paranoid(const char *num_str) return num; } + +char *strdup_printf(const char *fmt, ...) +{ + va_list ap; + char *str; + + va_start(ap, fmt); + TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed"); + va_end(ap); + + return str; +} diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c index 2f0e2ea941cc..816a3fa109bf 100644 --- a/tools/testing/selftests/kvm/lib/ucall_common.c +++ b/tools/testing/selftests/kvm/lib/ucall_common.c @@ -11,6 +11,11 @@ struct ucall_header { struct ucall ucalls[KVM_MAX_VCPUS]; }; +int ucall_nr_pages_required(uint64_t page_size) +{ + return align_up(sizeof(struct ucall_header), page_size) / page_size; +} + /* * ucall_pool holds per-VM values (global data is duplicated by each VM), it * must not be accessed from host code. @@ -70,6 +75,45 @@ static void ucall_free(struct ucall *uc) clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use); } +void ucall_assert(uint64_t cmd, const char *exp, const char *file, + unsigned int line, const char *fmt, ...) +{ + struct ucall *uc; + va_list va; + + uc = ucall_alloc(); + uc->cmd = cmd; + + WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp)); + WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file)); + WRITE_ONCE(uc->args[GUEST_LINE], line); + + va_start(va, fmt); + guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + +void ucall_fmt(uint64_t cmd, const char *fmt, ...) +{ + struct ucall *uc; + va_list va; + + uc = ucall_alloc(); + uc->cmd = cmd; + + va_start(va, fmt); + guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va); + va_end(va); + + ucall_arch_do_ucall((vm_vaddr_t)uc->hva); + + ucall_free(uc); +} + void ucall(uint64_t cmd, int nargs, ...) { struct ucall *uc; diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c index 92cef20902f1..271f63891581 100644 --- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c +++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c @@ -70,7 +70,7 @@ static void *uffd_handler_thread_fn(void *arg) r = read(pollfd[1].fd, &tmp_chr, 1); TEST_ASSERT(r == 1, "Error reading pipefd in UFFD thread\n"); - return NULL; + break; } if (!(pollfd[0].revents & POLLIN)) @@ -103,7 +103,7 @@ static void *uffd_handler_thread_fn(void *arg) ts_diff = timespec_elapsed(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, - pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC)); return NULL; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index d4a0b504b1e0..d8288374078e 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -1074,11 +1074,6 @@ static bool kvm_fixup_exception(struct ex_regs *regs) return true; } -void kvm_exit_unexpected_vector(uint32_t value) -{ - ucall(UCALL_UNHANDLED, 1, value); -} - void route_exception(struct ex_regs *regs) { typedef void(*handler)(struct ex_regs *); @@ -1092,7 +1087,10 @@ void route_exception(struct ex_regs *regs) if (kvm_fixup_exception(regs)) return; - kvm_exit_unexpected_vector(regs->vector); + ucall_assert(UCALL_UNHANDLED, + "Unhandled exception in guest", __FILE__, __LINE__, + "Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); } void vm_init_descriptor_tables(struct kvm_vm *vm) @@ -1135,12 +1133,8 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) { - uint64_t vector = uc.args[0]; - - TEST_FAIL("Unexpected vectored event in guest (vector:0x%lx)", - vector); - } + if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + REPORT_GUEST_ASSERT(uc); } const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index 4d41dc63cc9e..1265cecc7dd1 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -8,14 +8,38 @@ #define UCALL_PIO_PORT ((uint16_t)0x1000) -void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa) -{ -} - void ucall_arch_do_ucall(vm_vaddr_t uc) { - asm volatile("in %[port], %%al" - : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory"); + /* + * FIXME: Revert this hack (the entire commit that added it) once nVMX + * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g. + * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be + * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP + * in particular is problematic) along with RDX and RDI (which are + * inputs), and clobber volatile GPRs. *sigh* + */ +#define HORRIFIC_L2_UCALL_CLOBBER_HACK \ + "rcx", "rsi", "r8", "r9", "r10", "r11" + + asm volatile("push %%rbp\n\t" + "push %%r15\n\t" + "push %%r14\n\t" + "push %%r13\n\t" + "push %%r12\n\t" + "push %%rbx\n\t" + "push %%rdx\n\t" + "push %%rdi\n\t" + "in %[port], %%al\n\t" + "pop %%rdi\n\t" + "pop %%rdx\n\t" + "pop %%rbx\n\t" + "pop %%r12\n\t" + "pop %%r13\n\t" + "pop %%r14\n\t" + "pop %%r15\n\t" + "pop %%rbp\n\t" + : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory", + HORRIFIC_L2_UCALL_CLOBBER_HACK); } void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu) diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c index feaf2be20ff2..6628dc4dda89 100644 --- a/tools/testing/selftests/kvm/max_guest_memory_test.c +++ b/tools/testing/selftests/kvm/max_guest_memory_test.c @@ -55,7 +55,7 @@ static void rendezvous_with_boss(void) static void run_vcpu(struct kvm_vcpu *vcpu) { vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); } static void *vcpu_worker(void *data) diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 4210cd21d159..20eb2e730800 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -157,7 +157,7 @@ static void *vcpu_worker(void *__data) goto done; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_1(uc, "val = %lu"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; @@ -560,7 +560,7 @@ static void guest_code_test_memslot_rw(void) ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) { uint64_t val = *(uint64_t *)ptr; - GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val); + GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2); *(uint64_t *)ptr = 0; } diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c new file mode 100644 index 000000000000..9f99ea42f45f --- /dev/null +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -0,0 +1,898 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (c) 2023 Intel Corporation + * + */ +#include <stdio.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK) + +static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX]; + +bool filter_reg(__u64 reg) +{ + switch (reg & ~REG_MASK) { + /* + * Same set of ISA_EXT registers are not present on all host because + * ISA_EXT registers are visible to the KVM user space based on the + * ISA extensions available on the host. Also, disabling an ISA + * extension using corresponding ISA_EXT register does not affect + * the visibility of the ISA_EXT register itself. + * + * Based on above, we should filter-out all ISA_EXT registers. + */ + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_A: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_C: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_I: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_M: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_V: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVNAPOT: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI: + case KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM: + return true; + /* AIA registers are always available when Ssaia can't be disabled */ + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA]; + default: + break; + } + + return false; +} + +bool check_reject_set(int err) +{ + return err == EINVAL; +} + +static inline bool vcpu_has_ext(struct kvm_vcpu *vcpu, int ext) +{ + int ret; + unsigned long value; + + ret = __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(ext), &value); + return (ret) ? false : !!value; +} + +void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c) +{ + unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 }; + struct vcpu_reg_sublist *s; + int rc; + + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) + __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]); + + /* + * Disable all extensions which were enabled by default + * if they were available in the risc-v host. + */ + for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) { + rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0); + if (rc && isa_ext_state[i]) + isa_ext_cant_disable[i] = true; + } + + for_each_sublist(c, s) { + if (!s->feature) + continue; + + /* Try to enable the desired extension */ + __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(s->feature), 1); + + /* Double check whether the desired extension was enabled */ + __TEST_REQUIRE(vcpu_has_ext(vcpu, s->feature), + "%s not available, skipping tests\n", s->name); + } +} + +static const char *config_id_to_str(__u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_config */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG); + + switch (reg_off) { + case KVM_REG_RISCV_CONFIG_REG(isa): + return "KVM_REG_RISCV_CONFIG_REG(isa)"; + case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size): + return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)"; + case KVM_REG_RISCV_CONFIG_REG(mvendorid): + return "KVM_REG_RISCV_CONFIG_REG(mvendorid)"; + case KVM_REG_RISCV_CONFIG_REG(marchid): + return "KVM_REG_RISCV_CONFIG_REG(marchid)"; + case KVM_REG_RISCV_CONFIG_REG(mimpid): + return "KVM_REG_RISCV_CONFIG_REG(mimpid)"; + case KVM_REG_RISCV_CONFIG_REG(satp_mode): + return "KVM_REG_RISCV_CONFIG_REG(satp_mode)"; + } + + /* + * Config regs would grow regularly with new pseudo reg added, so + * just show raw id to indicate a new pseudo config reg. + */ + return strdup_printf("KVM_REG_RISCV_CONFIG_REG(%lld) /* UNKNOWN */", reg_off); +} + +static const char *core_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_core */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE); + + switch (reg_off) { + case KVM_REG_RISCV_CORE_REG(regs.pc): + return "KVM_REG_RISCV_CORE_REG(regs.pc)"; + case KVM_REG_RISCV_CORE_REG(regs.ra): + return "KVM_REG_RISCV_CORE_REG(regs.ra)"; + case KVM_REG_RISCV_CORE_REG(regs.sp): + return "KVM_REG_RISCV_CORE_REG(regs.sp)"; + case KVM_REG_RISCV_CORE_REG(regs.gp): + return "KVM_REG_RISCV_CORE_REG(regs.gp)"; + case KVM_REG_RISCV_CORE_REG(regs.tp): + return "KVM_REG_RISCV_CORE_REG(regs.tp)"; + case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.t0)); + case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.s0)); + case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.a0)); + case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2); + case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6): + return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)", + reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3); + case KVM_REG_RISCV_CORE_REG(mode): + return "KVM_REG_RISCV_CORE_REG(mode)"; + } + + TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id); + return NULL; +} + +#define RISCV_CSR_GENERAL(csr) \ + "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")" +#define RISCV_CSR_AIA(csr) \ + "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")" + +static const char *general_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_REG(sstatus): + return RISCV_CSR_GENERAL(sstatus); + case KVM_REG_RISCV_CSR_REG(sie): + return RISCV_CSR_GENERAL(sie); + case KVM_REG_RISCV_CSR_REG(stvec): + return RISCV_CSR_GENERAL(stvec); + case KVM_REG_RISCV_CSR_REG(sscratch): + return RISCV_CSR_GENERAL(sscratch); + case KVM_REG_RISCV_CSR_REG(sepc): + return RISCV_CSR_GENERAL(sepc); + case KVM_REG_RISCV_CSR_REG(scause): + return RISCV_CSR_GENERAL(scause); + case KVM_REG_RISCV_CSR_REG(stval): + return RISCV_CSR_GENERAL(stval); + case KVM_REG_RISCV_CSR_REG(sip): + return RISCV_CSR_GENERAL(sip); + case KVM_REG_RISCV_CSR_REG(satp): + return RISCV_CSR_GENERAL(satp); + case KVM_REG_RISCV_CSR_REG(scounteren): + return RISCV_CSR_GENERAL(scounteren); + } + + TEST_FAIL("Unknown general csr reg: 0x%llx", reg_off); + return NULL; +} + +static const char *aia_csr_id_to_str(__u64 reg_off) +{ + /* reg_off is the offset into struct kvm_riscv_aia_csr */ + switch (reg_off) { + case KVM_REG_RISCV_CSR_AIA_REG(siselect): + return RISCV_CSR_AIA(siselect); + case KVM_REG_RISCV_CSR_AIA_REG(iprio1): + return RISCV_CSR_AIA(iprio1); + case KVM_REG_RISCV_CSR_AIA_REG(iprio2): + return RISCV_CSR_AIA(iprio2); + case KVM_REG_RISCV_CSR_AIA_REG(sieh): + return RISCV_CSR_AIA(sieh); + case KVM_REG_RISCV_CSR_AIA_REG(siph): + return RISCV_CSR_AIA(siph); + case KVM_REG_RISCV_CSR_AIA_REG(iprio1h): + return RISCV_CSR_AIA(iprio1h); + case KVM_REG_RISCV_CSR_AIA_REG(iprio2h): + return RISCV_CSR_AIA(iprio2h); + } + + TEST_FAIL("Unknown aia csr reg: 0x%llx", reg_off); + return NULL; +} + +static const char *csr_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_CSR_GENERAL: + return general_csr_id_to_str(reg_off); + case KVM_REG_RISCV_CSR_AIA: + return aia_csr_id_to_str(reg_off); + } + + TEST_FAIL("%s: Unknown csr subtype: 0x%llx", prefix, reg_subtype); + return NULL; +} + +static const char *timer_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct kvm_riscv_timer */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER); + + switch (reg_off) { + case KVM_REG_RISCV_TIMER_REG(frequency): + return "KVM_REG_RISCV_TIMER_REG(frequency)"; + case KVM_REG_RISCV_TIMER_REG(time): + return "KVM_REG_RISCV_TIMER_REG(time)"; + case KVM_REG_RISCV_TIMER_REG(compare): + return "KVM_REG_RISCV_TIMER_REG(compare)"; + case KVM_REG_RISCV_TIMER_REG(state): + return "KVM_REG_RISCV_TIMER_REG(state)"; + } + + TEST_FAIL("%s: Unknown timer reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *fp_f_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_f_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F); + + switch (reg_off) { + case KVM_REG_RISCV_FP_F_REG(f[0]) ... + KVM_REG_RISCV_FP_F_REG(f[31]): + return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off); + case KVM_REG_RISCV_FP_F_REG(fcsr): + return "KVM_REG_RISCV_FP_F_REG(fcsr)"; + } + + TEST_FAIL("%s: Unknown fp_f reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *fp_d_id_to_str(const char *prefix, __u64 id) +{ + /* reg_off is the offset into struct __riscv_d_ext_state */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D); + + switch (reg_off) { + case KVM_REG_RISCV_FP_D_REG(f[0]) ... + KVM_REG_RISCV_FP_D_REG(f[31]): + return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off); + case KVM_REG_RISCV_FP_D_REG(fcsr): + return "KVM_REG_RISCV_FP_D_REG(fcsr)"; + } + + TEST_FAIL("%s: Unknown fp_d reg id: 0x%llx", prefix, id); + return NULL; +} + +static const char *isa_ext_id_to_str(__u64 id) +{ + /* reg_off is the offset into unsigned long kvm_isa_ext_arr[] */ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT); + + static const char * const kvm_isa_ext_reg_name[] = { + "KVM_RISCV_ISA_EXT_A", + "KVM_RISCV_ISA_EXT_C", + "KVM_RISCV_ISA_EXT_D", + "KVM_RISCV_ISA_EXT_F", + "KVM_RISCV_ISA_EXT_H", + "KVM_RISCV_ISA_EXT_I", + "KVM_RISCV_ISA_EXT_M", + "KVM_RISCV_ISA_EXT_SVPBMT", + "KVM_RISCV_ISA_EXT_SSTC", + "KVM_RISCV_ISA_EXT_SVINVAL", + "KVM_RISCV_ISA_EXT_ZIHINTPAUSE", + "KVM_RISCV_ISA_EXT_ZICBOM", + "KVM_RISCV_ISA_EXT_ZICBOZ", + "KVM_RISCV_ISA_EXT_ZBB", + "KVM_RISCV_ISA_EXT_SSAIA", + "KVM_RISCV_ISA_EXT_V", + "KVM_RISCV_ISA_EXT_SVNAPOT", + "KVM_RISCV_ISA_EXT_ZBA", + "KVM_RISCV_ISA_EXT_ZBS", + "KVM_RISCV_ISA_EXT_ZICNTR", + "KVM_RISCV_ISA_EXT_ZICSR", + "KVM_RISCV_ISA_EXT_ZIFENCEI", + "KVM_RISCV_ISA_EXT_ZIHPM", + }; + + if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name)) { + /* + * isa_ext regs would grow regularly with new isa extension added, so + * just show "reg" to indicate a new extension. + */ + return strdup_printf("%lld /* UNKNOWN */", reg_off); + } + + return kvm_isa_ext_reg_name[reg_off]; +} + +static const char *sbi_ext_single_id_to_str(__u64 reg_off) +{ + /* reg_off is KVM_RISCV_SBI_EXT_ID */ + static const char * const kvm_sbi_ext_reg_name[] = { + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL", + "KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR", + }; + + if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name)) { + /* + * sbi_ext regs would grow regularly with new sbi extension added, so + * just show "reg" to indicate a new extension. + */ + return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off); + } + + return kvm_sbi_ext_reg_name[reg_off]; +} + +static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off) +{ + if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST) { + /* + * sbi_ext regs would grow regularly with new sbi extension added, so + * just show "reg" to indicate a new extension. + */ + return strdup_printf("%lld /* UNKNOWN */", reg_off); + } + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_MULTI_EN: + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld", reg_off); + case KVM_REG_RISCV_SBI_MULTI_DIS: + return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld", reg_off); + } + + return NULL; +} + +static const char *sbi_ext_id_to_str(const char *prefix, __u64 id) +{ + __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT); + __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK; + + reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK; + + switch (reg_subtype) { + case KVM_REG_RISCV_SBI_SINGLE: + return sbi_ext_single_id_to_str(reg_off); + case KVM_REG_RISCV_SBI_MULTI_EN: + case KVM_REG_RISCV_SBI_MULTI_DIS: + return sbi_ext_multi_id_to_str(reg_subtype, reg_off); + } + + TEST_FAIL("%s: Unknown sbi ext subtype: 0x%llx", prefix, reg_subtype); + return NULL; +} + +void print_reg(const char *prefix, __u64 id) +{ + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV, + "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + default: + TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx", + prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_RISCV_TYPE_MASK) { + case KVM_REG_RISCV_CONFIG: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n", + reg_size, config_id_to_str(id)); + break; + case KVM_REG_RISCV_CORE: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n", + reg_size, core_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_CSR: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n", + reg_size, csr_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_TIMER: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n", + reg_size, timer_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_FP_F: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n", + reg_size, fp_f_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_FP_D: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n", + reg_size, fp_d_id_to_str(prefix, id)); + break; + case KVM_REG_RISCV_ISA_EXT: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n", + reg_size, isa_ext_id_to_str(id)); + break; + case KVM_REG_RISCV_SBI_EXT: + printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n", + reg_size, sbi_ext_id_to_str(prefix, id)); + break; + default: + TEST_FAIL("%s: Unexpected reg type: 0x%llx in reg id: 0x%llx", prefix, + (id & KVM_REG_RISCV_TYPE_MASK) >> KVM_REG_RISCV_TYPE_SHIFT, id); + } +} + +/* + * The current blessed list was primed with the output of kernel version + * v6.5-rc3 and then later updated with new registers. + */ +static __u64 base_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_EN | 0, + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_MULTI_DIS | 0, +}; + +/* + * The skips_set list registers that should skip set test. + * - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly. + */ +static __u64 base_skips_set[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state), +}; + +static __u64 h_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_H, +}; + +static __u64 zicbom_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOM, +}; + +static __u64 zicboz_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICBOZ, +}; + +static __u64 svpbmt_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVPBMT, +}; + +static __u64 sstc_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSTC, +}; + +static __u64 svinval_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SVINVAL, +}; + +static __u64 zihintpause_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHINTPAUSE, +}; + +static __u64 zba_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBA, +}; + +static __u64 zbb_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBB, +}; + +static __u64 zbs_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZBS, +}; + +static __u64 zicntr_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICNTR, +}; + +static __u64 zicsr_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZICSR, +}; + +static __u64 zifencei_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIFENCEI, +}; + +static __u64 zihpm_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_ZIHPM, +}; + +static __u64 aia_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_SSAIA, +}; + +static __u64 fp_f_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_F, +}; + +static __u64 fp_d_regs[] = { + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]), + KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]), + KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr), + KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_RISCV_ISA_EXT_D, +}; + +#define BASE_SUBLIST \ + {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \ + .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),} +#define H_REGS_SUBLIST \ + {"h", .feature = KVM_RISCV_ISA_EXT_H, .regs = h_regs, .regs_n = ARRAY_SIZE(h_regs),} +#define ZICBOM_REGS_SUBLIST \ + {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),} +#define ZICBOZ_REGS_SUBLIST \ + {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),} +#define SVPBMT_REGS_SUBLIST \ + {"svpbmt", .feature = KVM_RISCV_ISA_EXT_SVPBMT, .regs = svpbmt_regs, .regs_n = ARRAY_SIZE(svpbmt_regs),} +#define SSTC_REGS_SUBLIST \ + {"sstc", .feature = KVM_RISCV_ISA_EXT_SSTC, .regs = sstc_regs, .regs_n = ARRAY_SIZE(sstc_regs),} +#define SVINVAL_REGS_SUBLIST \ + {"svinval", .feature = KVM_RISCV_ISA_EXT_SVINVAL, .regs = svinval_regs, .regs_n = ARRAY_SIZE(svinval_regs),} +#define ZIHINTPAUSE_REGS_SUBLIST \ + {"zihintpause", .feature = KVM_RISCV_ISA_EXT_ZIHINTPAUSE, .regs = zihintpause_regs, .regs_n = ARRAY_SIZE(zihintpause_regs),} +#define ZBA_REGS_SUBLIST \ + {"zba", .feature = KVM_RISCV_ISA_EXT_ZBA, .regs = zba_regs, .regs_n = ARRAY_SIZE(zba_regs),} +#define ZBB_REGS_SUBLIST \ + {"zbb", .feature = KVM_RISCV_ISA_EXT_ZBB, .regs = zbb_regs, .regs_n = ARRAY_SIZE(zbb_regs),} +#define ZBS_REGS_SUBLIST \ + {"zbs", .feature = KVM_RISCV_ISA_EXT_ZBS, .regs = zbs_regs, .regs_n = ARRAY_SIZE(zbs_regs),} +#define ZICNTR_REGS_SUBLIST \ + {"zicntr", .feature = KVM_RISCV_ISA_EXT_ZICNTR, .regs = zicntr_regs, .regs_n = ARRAY_SIZE(zicntr_regs),} +#define ZICSR_REGS_SUBLIST \ + {"zicsr", .feature = KVM_RISCV_ISA_EXT_ZICSR, .regs = zicsr_regs, .regs_n = ARRAY_SIZE(zicsr_regs),} +#define ZIFENCEI_REGS_SUBLIST \ + {"zifencei", .feature = KVM_RISCV_ISA_EXT_ZIFENCEI, .regs = zifencei_regs, .regs_n = ARRAY_SIZE(zifencei_regs),} +#define ZIHPM_REGS_SUBLIST \ + {"zihpm", .feature = KVM_RISCV_ISA_EXT_ZIHPM, .regs = zihpm_regs, .regs_n = ARRAY_SIZE(zihpm_regs),} +#define AIA_REGS_SUBLIST \ + {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),} +#define FP_F_REGS_SUBLIST \ + {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \ + .regs_n = ARRAY_SIZE(fp_f_regs),} +#define FP_D_REGS_SUBLIST \ + {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \ + .regs_n = ARRAY_SIZE(fp_d_regs),} + +static struct vcpu_reg_list h_config = { + .sublists = { + BASE_SUBLIST, + H_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicbom_config = { + .sublists = { + BASE_SUBLIST, + ZICBOM_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicboz_config = { + .sublists = { + BASE_SUBLIST, + ZICBOZ_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list svpbmt_config = { + .sublists = { + BASE_SUBLIST, + SVPBMT_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list sstc_config = { + .sublists = { + BASE_SUBLIST, + SSTC_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list svinval_config = { + .sublists = { + BASE_SUBLIST, + SVINVAL_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zihintpause_config = { + .sublists = { + BASE_SUBLIST, + ZIHINTPAUSE_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zba_config = { + .sublists = { + BASE_SUBLIST, + ZBA_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zbb_config = { + .sublists = { + BASE_SUBLIST, + ZBB_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zbs_config = { + .sublists = { + BASE_SUBLIST, + ZBS_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicntr_config = { + .sublists = { + BASE_SUBLIST, + ZICNTR_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zicsr_config = { + .sublists = { + BASE_SUBLIST, + ZICSR_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zifencei_config = { + .sublists = { + BASE_SUBLIST, + ZIFENCEI_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list zihpm_config = { + .sublists = { + BASE_SUBLIST, + ZIHPM_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list aia_config = { + .sublists = { + BASE_SUBLIST, + AIA_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list fp_f_config = { + .sublists = { + BASE_SUBLIST, + FP_F_REGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list fp_d_config = { + .sublists = { + BASE_SUBLIST, + FP_D_REGS_SUBLIST, + {0}, + }, +}; + +struct vcpu_reg_list *vcpu_configs[] = { + &h_config, + &zicbom_config, + &zicboz_config, + &svpbmt_config, + &sstc_config, + &svinval_config, + &zihintpause_config, + &zba_config, + &zbb_config, + &zbs_config, + &zicntr_config, + &zicsr_config, + &zifencei_config, + &zihpm_config, + &aia_config, + &fp_f_config, + &fp_d_config, +}; +int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c new file mode 100644 index 000000000000..c8e0a6495a63 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -0,0 +1,700 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for s390x CMMA migration + * + * Copyright IBM Corp. 2023 + * + * Authors: + * Nico Boehr <nrb@linux.ibm.com> + */ + +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "kselftest.h" + +#define MAIN_PAGE_COUNT 512 + +#define TEST_DATA_PAGE_COUNT 512 +#define TEST_DATA_MEMSLOT 1 +#define TEST_DATA_START_GFN 4096 + +#define TEST_DATA_TWO_PAGE_COUNT 256 +#define TEST_DATA_TWO_MEMSLOT 2 +#define TEST_DATA_TWO_START_GFN 8192 + +static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; + +/** + * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot, + * so use_cmma goes on and the CMMA related ioctls do something. + */ +static void guest_do_one_essa(void) +{ + asm volatile( + /* load TEST_DATA_START_GFN into r1 */ + " llilf 1,%[start_gfn]\n" + /* calculate the address from the gfn */ + " sllg 1,1,12(0)\n" + /* set the first page in TEST_DATA memslot to STABLE */ + " .insn rrf,0xb9ab0000,2,1,1,0\n" + /* hypercall */ + " diag 0,0,0x501\n" + "0: j 0b" + : + : [start_gfn] "L"(TEST_DATA_START_GFN) + : "r1", "r2", "memory", "cc" + ); +} + +/** + * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable + * state. + */ +static void guest_dirty_test_data(void) +{ + asm volatile( + /* r1 = TEST_DATA_START_GFN */ + " xgr 1,1\n" + " llilf 1,%[start_gfn]\n" + /* r5 = TEST_DATA_PAGE_COUNT */ + " lghi 5,%[page_count]\n" + /* r5 += r1 */ + "2: agfr 5,1\n" + /* r2 = r1 << 12 */ + "1: sllg 2,1,12(0)\n" + /* essa(r4, r2, SET_STABLE) */ + " .insn rrf,0xb9ab0000,4,2,1,0\n" + /* i++ */ + " agfi 1,1\n" + /* if r1 < r5 goto 1 */ + " cgrjl 1,5,1b\n" + /* hypercall */ + " diag 0,0,0x501\n" + "0: j 0b" + : + : [start_gfn] "L"(TEST_DATA_START_GFN), + [page_count] "L"(TEST_DATA_PAGE_COUNT) + : + /* the counter in our loop over the pages */ + "r1", + /* the calculated page physical address */ + "r2", + /* ESSA output register */ + "r4", + /* last page */ + "r5", + "cc", "memory" + ); +} + +static struct kvm_vm *create_vm(void) +{ + return ____vm_create(VM_MODE_DEFAULT); +} + +static void create_main_memslot(struct kvm_vm *vm) +{ + int i; + + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0); + /* set the array of memslots to zero like __vm_create does */ + for (i = 0; i < NR_MEM_REGIONS; i++) + vm->memslots[i] = 0; +} + +static void create_test_memslot(struct kvm_vm *vm) +{ + vm_userspace_mem_region_add(vm, + VM_MEM_SRC_ANONYMOUS, + TEST_DATA_START_GFN << vm->page_shift, + TEST_DATA_MEMSLOT, + TEST_DATA_PAGE_COUNT, + 0 + ); + vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT; +} + +static void create_memslots(struct kvm_vm *vm) +{ + /* + * Our VM has the following memory layout: + * +------+---------------------------+ + * | GFN | Memslot | + * +------+---------------------------+ + * | 0 | | + * | ... | MAIN (Code, Stack, ...) | + * | 511 | | + * +------+---------------------------+ + * | 4096 | | + * | ... | TEST_DATA | + * | 4607 | | + * +------+---------------------------+ + */ + create_main_memslot(vm); + create_test_memslot(vm); +} + +static void finish_vm_setup(struct kvm_vm *vm) +{ + struct userspace_mem_region *slot0; + + kvm_vm_elf_load(vm, program_invocation_name); + + slot0 = memslot2region(vm, 0); + ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size); + + kvm_arch_vm_post_create(vm); +} + +static struct kvm_vm *create_vm_two_memslots(void) +{ + struct kvm_vm *vm; + + vm = create_vm(); + + create_memslots(vm); + + finish_vm_setup(vm); + + return vm; +} + +static void enable_cmma(struct kvm_vm *vm) +{ + int r; + + r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL); + TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno); +} + +static void enable_dirty_tracking(struct kvm_vm *vm) +{ + vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES); + vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); +} + +static int __enable_migration_mode(struct kvm_vm *vm) +{ + return __kvm_device_attr_set(vm->fd, + KVM_S390_VM_MIGRATION, + KVM_S390_VM_MIGRATION_START, + NULL + ); +} + +static void enable_migration_mode(struct kvm_vm *vm) +{ + int r = __enable_migration_mode(vm); + + TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno); +} + +static bool is_migration_mode_on(struct kvm_vm *vm) +{ + u64 out; + int r; + + r = __kvm_device_attr_get(vm->fd, + KVM_S390_VM_MIGRATION, + KVM_S390_VM_MIGRATION_STATUS, + &out + ); + TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno); + return out; +} + +static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out) +{ + struct kvm_s390_cmma_log args; + int rc; + + errno = 0; + + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = flags, + .values = (__u64)&cmma_value_buf[0] + }; + rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + + *errno_out = errno; + return rc; +} + +static void test_get_cmma_basic(void) +{ + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_vcpu *vcpu; + int rc, errno_out; + + /* GET_CMMA_BITS without CMMA enabled should fail */ + rc = vm_get_cmma_bits(vm, 0, &errno_out); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, ENXIO); + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + + vcpu_run(vcpu); + + /* GET_CMMA_BITS without migration mode and without peeking should fail */ + rc = vm_get_cmma_bits(vm, 0, &errno_out); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); + + /* GET_CMMA_BITS without migration mode and with peeking should work */ + rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT_EQ(errno_out, 0); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + /* GET_CMMA_BITS with invalid flags */ + rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno_out, EINVAL); + + kvm_vm_free(vm); +} + +static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu) +{ + TEST_ASSERT_EQ(vcpu->run->exit_reason, 13); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000); +} + +static void test_migration_mode(void) +{ + struct kvm_vm *vm = create_vm(); + struct kvm_vcpu *vcpu; + u64 orig_psw; + int rc; + + /* enabling migration mode on a VM without memory should fail */ + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); + errno = 0; + + create_memslots(vm); + finish_vm_setup(vm); + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + orig_psw = vcpu->run->psw_addr; + + /* + * Execute one essa instruction in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* migration mode when memslots have dirty tracking off should fail */ + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, -1); + TEST_ASSERT_EQ(errno, EINVAL); + TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off"); + errno = 0; + + /* enable dirty tracking */ + enable_dirty_tracking(vm); + + /* enabling migration mode should work now */ + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + errno = 0; + + /* execute another ESSA instruction to see this goes fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* + * With migration mode on, create a new memslot with dirty tracking off. + * This should turn off migration mode. + */ + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + vm_userspace_mem_region_add(vm, + VM_MEM_SRC_ANONYMOUS, + TEST_DATA_TWO_START_GFN << vm->page_shift, + TEST_DATA_TWO_MEMSLOT, + TEST_DATA_TWO_PAGE_COUNT, + 0 + ); + TEST_ASSERT(!is_migration_mode_on(vm), + "creating memslot without dirty tracking turns off migration mode" + ); + + /* ESSA instructions should still execute fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + /* + * Turn on dirty tracking on the new memslot. + * It should be possible to turn migration mode back on again. + */ + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES); + rc = __enable_migration_mode(vm); + TEST_ASSERT_EQ(rc, 0); + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + errno = 0; + + /* + * Turn off dirty tracking again, this time with just a flag change. + * Again, migration mode should turn off. + */ + TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on"); + vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0); + TEST_ASSERT(!is_migration_mode_on(vm), + "disabling dirty tracking should turn off migration mode" + ); + + /* ESSA instructions should still execute fine */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + kvm_vm_free(vm); +} + +/** + * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have + * CMMA attributes of all pages in both memslots and nothing more dirty. + * This has the useful side effect of ensuring nothing is CMMA dirty after this + * function. + */ +static void assert_all_slots_cmma_dirty(struct kvm_vm *vm) +{ + struct kvm_s390_cmma_log args; + + /* + * First iteration - everything should be dirty. + * Start at the main memslot... + */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT); + TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, 0); + + /* ...and then - after a hole - the TEST_DATA memslot should follow */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = MAIN_PAGE_COUNT, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT); + TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN); + TEST_ASSERT_EQ(args.remaining, 0); + + /* ...and nothing else should be there */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + TEST_ASSERT_EQ(args.count, 0); + TEST_ASSERT_EQ(args.start_gfn, 0); + TEST_ASSERT_EQ(args.remaining, 0); +} + +/** + * Given a VM, assert no pages are CMMA dirty. + */ +static void assert_no_pages_cmma_dirty(struct kvm_vm *vm) +{ + struct kvm_s390_cmma_log args; + + /* If we start from GFN 0 again, nothing should be dirty. */ + args = (struct kvm_s390_cmma_log){ + .start_gfn = 0, + .count = sizeof(cmma_value_buf), + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args); + if (args.count || args.remaining || args.start_gfn) + TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu", + args.start_gfn, + args.count, + args.remaining + ); +} + +static void test_get_inital_dirty(void) +{ + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_vcpu *vcpu; + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa); + + /* + * Execute one essa instruction in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + assert_all_slots_cmma_dirty(vm); + + /* Start from the beginning again and make sure nothing else is dirty */ + assert_no_pages_cmma_dirty(vm); + + kvm_vm_free(vm); +} + +static void query_cmma_range(struct kvm_vm *vm, + u64 start_gfn, u64 gfn_count, + struct kvm_s390_cmma_log *res_out) +{ + *res_out = (struct kvm_s390_cmma_log){ + .start_gfn = start_gfn, + .count = gfn_count, + .flags = 0, + .values = (__u64)&cmma_value_buf[0] + }; + memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf)); + vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out); +} + +/** + * Assert the given cmma_log struct that was executed by query_cmma_range() + * indicates the first dirty gfn is at first_dirty_gfn and contains exactly + * dirty_gfn_count CMMA values. + */ +static void assert_cmma_dirty(u64 first_dirty_gfn, + u64 dirty_gfn_count, + const struct kvm_s390_cmma_log *res) +{ + TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn); + TEST_ASSERT_EQ(res->count, dirty_gfn_count); + for (size_t i = 0; i < dirty_gfn_count; i++) + TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */ + TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */ +} + +static void test_get_skip_holes(void) +{ + size_t gfn_offset; + struct kvm_vm *vm = create_vm_two_memslots(); + struct kvm_s390_cmma_log log; + struct kvm_vcpu *vcpu; + u64 orig_psw; + + enable_cmma(vm); + vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data); + + orig_psw = vcpu->run->psw_addr; + + /* + * Execute some essa instructions in the guest. Otherwise the guest will + * not have use_cmm enabled and GET_CMMA_BITS will return no pages. + */ + vcpu_run(vcpu); + assert_exit_was_hypercall(vcpu); + + enable_dirty_tracking(vm); + enable_migration_mode(vm); + + /* un-dirty all pages */ + assert_all_slots_cmma_dirty(vm); + + /* Then, dirty just the TEST_DATA memslot */ + vcpu->run->psw_addr = orig_psw; + vcpu_run(vcpu); + + gfn_offset = TEST_DATA_START_GFN; + /** + * Query CMMA attributes of one page, starting at page 0. Since the + * main memslot was not touched by the VM, this should yield the first + * page of the TEST_DATA memslot. + * The dirty bitmap should now look like this: + * 0: not dirty + * [0x1, 0x200): dirty + */ + query_cmma_range(vm, 0, 1, &log); + assert_cmma_dirty(gfn_offset, 1, &log); + gfn_offset++; + + /** + * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA + * memslot. This should wrap back to the beginning of the TEST_DATA + * memslot, page 1. + * The dirty bitmap should now look like this: + * [0, 0x21): not dirty + * [0x21, 0x200): dirty + */ + query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log); + assert_cmma_dirty(gfn_offset, 0x20, &log); + gfn_offset += 0x20; + + /* Skip 32 pages */ + gfn_offset += 0x20; + + /** + * After skipping 32 pages, query the next 32 (0x20) pages. + * The dirty bitmap should now look like this: + * [0, 0x21): not dirty + * [0x21, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + query_cmma_range(vm, gfn_offset, 0x20, &log); + assert_cmma_dirty(gfn_offset, 0x20, &log); + gfn_offset += 0x20; + + /** + * Query 1 page from the beginning of the TEST_DATA memslot. This should + * yield page 0x21. + * The dirty bitmap should now look like this: + * [0, 0x22): not dirty + * [0x22, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log); + assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log); + gfn_offset++; + + /** + * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot. + * This should yield pages [0x23, 0x33). + * The dirty bitmap should now look like this: + * [0, 0x22): not dirty + * 0x22: dirty + * [0x23, 0x33): not dirty + * [0x33, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x23; + query_cmma_range(vm, gfn_offset, 15, &log); + assert_cmma_dirty(gfn_offset, 15, &log); + + /** + * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot. + * This should yield page [0x22, 0x33) + * The dirty bitmap should now look like this: + * [0, 0x33): not dirty + * [0x33, 0x41): dirty + * [0x41, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x22; + query_cmma_range(vm, gfn_offset, 17, &log); + assert_cmma_dirty(gfn_offset, 17, &log); + + /** + * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot. + * This should yield page 0x40 and nothing more, since there are more + * than 16 non-dirty pages after page 0x40. + * The dirty bitmap should now look like this: + * [0, 0x33): not dirty + * [0x33, 0x40): dirty + * [0x40, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x40; + query_cmma_range(vm, gfn_offset, 25, &log); + assert_cmma_dirty(gfn_offset, 1, &log); + + /** + * Query pages [0x33, 0x40). + * The dirty bitmap should now look like this: + * [0, 0x61): not dirty + * [0x61, 0x200): dirty + */ + gfn_offset = TEST_DATA_START_GFN + 0x33; + query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log); + assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log); + + /** + * Query the remaining pages [0x61, 0x200). + */ + gfn_offset = TEST_DATA_START_GFN; + query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log); + assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log); + + assert_no_pages_cmma_dirty(vm); +} + +struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "migration mode and dirty tracking", test_migration_mode }, + { "GET_CMMA_BITS: basic calls", test_get_cmma_basic }, + { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty }, + { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes }, +}; + +/** + * The kernel may support CMMA, but the machine may not (i.e. if running as + * guest-3). + * + * In this case, the CMMA capabilities are all there, but the CMMA-related + * ioctls fail. To find out whether the machine supports CMMA, create a + * temporary VM and then query the CMMA feature of the VM. + */ +static int machine_has_cmma(void) +{ + struct kvm_vm *vm = create_vm(); + int r; + + r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA); + kvm_vm_free(vm); + + return r; +} + +int main(int argc, char *argv[]) +{ + int idx; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS)); + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION)); + TEST_REQUIRE(machine_has_cmma()); + + ksft_print_header(); + + ksft_set_plan(ARRAY_SIZE(testlist)); + + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + + ksft_finished(); /* Print results and exit() accordingly */ +} diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c new file mode 100644 index 000000000000..84313fb27529 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/debug_test.c @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Test KVM debugging features. */ +#include "kvm_util.h" +#include "test_util.h" + +#include <linux/kvm.h> + +#define __LC_SVC_NEW_PSW 0x1c0 +#define __LC_PGM_NEW_PSW 0x1d0 +#define ICPT_INSTRUCTION 0x04 +#define IPA0_DIAG 0x8300 +#define PGM_SPECIFICATION 0x06 + +/* Common code for testing single-stepping interruptions. */ +extern char int_handler[]; +asm("int_handler:\n" + "j .\n"); + +static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code, + size_t new_psw_off, uint64_t *new_psw) +{ + struct kvm_guest_debug debug = {}; + struct kvm_regs regs; + struct kvm_vm *vm; + char *lowcore; + + vm = vm_create_with_one_vcpu(vcpu, guest_code); + lowcore = addr_gpa2hva(vm, 0); + new_psw[0] = (*vcpu)->run->psw_mask; + new_psw[1] = (uint64_t)int_handler; + memcpy(lowcore + new_psw_off, new_psw, 16); + vcpu_regs_get(*vcpu, ®s); + regs.gprs[2] = -1; + vcpu_regs_set(*vcpu, ®s); + debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP; + vcpu_guest_debug_set(*vcpu, &debug); + vcpu_run(*vcpu); + + return vm; +} + +static void test_step_int(void *guest_code, size_t new_psw_off) +{ + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* Test single-stepping "boring" program interruptions. */ +extern char test_step_pgm_guest_code[]; +asm("test_step_pgm_guest_code:\n" + ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n" + "j .\n"); + +static void test_step_pgm(void) +{ + test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by DIAG. + * Userspace emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_diag_guest_code[]; +asm("test_step_pgm_diag_guest_code:\n" + "diag %r0,%r0,0\n" + "j .\n"); + +static void test_step_pgm_diag(void) +{ + struct kvm_s390_irq irq = { + .type = KVM_S390_PROGRAM_INT, + .u.pgm.code = PGM_SPECIFICATION, + }; + struct kvm_vcpu *vcpu; + uint64_t new_psw[2]; + struct kvm_vm *vm; + + vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, + __LC_PGM_NEW_PSW, new_psw); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); + vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG); + TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]); + TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]); + kvm_vm_free(vm); +} + +/* + * Test single-stepping program interruptions caused by ISKE. + * CPUSTAT_KSS handling must not interfere with single-stepping. + */ +extern char test_step_pgm_iske_guest_code[]; +asm("test_step_pgm_iske_guest_code:\n" + "iske %r2,%r2\n" + "j .\n"); + +static void test_step_pgm_iske(void) +{ + test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW); +} + +/* + * Test single-stepping program interruptions caused by LCTL. + * KVM emulation must not interfere with single-stepping. + */ +extern char test_step_pgm_lctl_guest_code[]; +asm("test_step_pgm_lctl_guest_code:\n" + "lctl %c0,%c0,1\n" + "j .\n"); + +static void test_step_pgm_lctl(void) +{ + test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW); +} + +/* Test single-stepping supervisor-call interruptions. */ +extern char test_step_svc_guest_code[]; +asm("test_step_svc_guest_code:\n" + "svc 0\n" + "j .\n"); + +static void test_step_svc(void) +{ + test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW); +} + +/* Run all tests above. */ +static struct testdef { + const char *name; + void (*test)(void); +} testlist[] = { + { "single-step pgm", test_step_pgm }, + { "single-step pgm caused by diag", test_step_pgm_diag }, + { "single-step pgm caused by iske", test_step_pgm_iske }, + { "single-step pgm caused by lctl", test_step_pgm_lctl }, + { "single-step svc", test_step_svc }, +}; + +int main(int argc, char *argv[]) +{ + int idx; + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + testlist[idx].test(); + ksft_test_result_pass("%s\n", testlist[idx].name); + } + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 8e4b94d7b8dd..bb3ca9a5d731 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -4,7 +4,6 @@ * * Copyright (C) 2019, Red Hat, Inc. */ - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -279,10 +278,10 @@ enum stage { vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ if (uc.cmd == UCALL_ABORT) { \ - REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ + REPORT_GUEST_ASSERT(uc); \ } \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ }) \ static void prepare_mem12(void) @@ -469,7 +468,7 @@ static __uint128_t cut_to_size(int size, __uint128_t val) case 16: return val; } - GUEST_ASSERT_1(false, "Invalid size"); + GUEST_FAIL("Invalid size = %u", size); return 0; } @@ -598,7 +597,7 @@ static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t return ret; } } - GUEST_ASSERT_1(false, "Invalid size"); + GUEST_FAIL("Invalid size = %u", size); return 0; } @@ -808,7 +807,7 @@ static void test_termination(void) HOST_SYNC(t.vcpu, STAGE_IDLED); MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168)); /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */ - ASSERT_EQ(teid & teid_mask, 0); + TEST_ASSERT_EQ(teid & teid_mask, 0); kvm_vm_free(t.kvm_vm); } diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index a9a0b76e5fa4..c73f948c9b63 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -4,7 +4,6 @@ * * Copyright IBM Corp. 2021 */ - #include <sys/mman.h> #include "test_util.h" #include "kvm_util.h" @@ -156,7 +155,9 @@ static enum stage perform_next_stage(int *i, bool mapped_0) !mapped_0; if (!skip) { result = test_protection(tests[*i].addr, tests[*i].key); - GUEST_ASSERT_2(result == tests[*i].expected, *i, result); + __GUEST_ASSERT(result == tests[*i].expected, + "Wanted %u, got %u, for i = %u", + tests[*i].expected, result, *i); } } return stage; @@ -190,9 +191,9 @@ static void guest_code(void) vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ if (uc.cmd == UCALL_ABORT) \ - REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ - ASSERT_EQ(uc.cmd, UCALL_SYNC); \ - ASSERT_EQ(uc.args[1], __stage); \ + REPORT_GUEST_ASSERT(uc); \ + TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \ + TEST_ASSERT_EQ(uc.args[1], __stage); \ }) #define HOST_SYNC(vcpu, stage) \ diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index a849ce23ca97..b32960189f5f 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -88,7 +88,7 @@ static void *vcpu_worker(void *data) } if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT) - REPORT_GUEST_ASSERT_1(uc, "val = %lu"); + REPORT_GUEST_ASSERT(uc); return NULL; } @@ -156,19 +156,22 @@ static void guest_code_move_memory_region(void) * window where the memslot is invalid is usually quite small. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + __GUEST_ASSERT(val == 1 || val == MMIO_VAL, + "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); /* Spin until the misaligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 1 || val == 0, val); + __GUEST_ASSERT(val == 1 || val == 0, + "Expected '0' or '1' (no MMIO), got '%llx'", val); /* Spin until the memory region starts to get re-aligned. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val); + __GUEST_ASSERT(val == 1 || val == MMIO_VAL, + "Expected '1' or MMIO ('%llx'), got '%llx'", MMIO_VAL, val); /* Spin until the re-aligning memory region move completes. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 1, val); + GUEST_ASSERT_EQ(val, 1); GUEST_DONE(); } @@ -224,15 +227,15 @@ static void guest_code_delete_memory_region(void) /* Spin until the memory region is deleted. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == MMIO_VAL, val); + GUEST_ASSERT_EQ(val, MMIO_VAL); /* Spin until the memory region is recreated. */ val = guest_spin_on_val(MMIO_VAL); - GUEST_ASSERT_1(val == 0, val); + GUEST_ASSERT_EQ(val, 0); /* Spin until the memory region is deleted. */ val = guest_spin_on_val(0); - GUEST_ASSERT_1(val == MMIO_VAL, val); + GUEST_ASSERT_EQ(val, MMIO_VAL); asm("1:\n\t" ".pushsection .rodata\n\t" @@ -249,7 +252,7 @@ static void guest_code_delete_memory_region(void) "final_rip_end: .quad 1b\n\t" ".popsection"); - GUEST_ASSERT_1(0, 0); + GUEST_ASSERT(0); } static void test_delete_memory_region(void) diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index c87f38712073..171adfb2a6cb 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -31,8 +31,8 @@ static uint64_t guest_stolen_time[NR_VCPUS]; static void check_status(struct kvm_steal_time *st) { GUEST_ASSERT(!(READ_ONCE(st->version) & 1)); - GUEST_ASSERT(READ_ONCE(st->flags) == 0); - GUEST_ASSERT(READ_ONCE(st->preempted) == 0); + GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0); } static void guest_code(int cpu) @@ -40,7 +40,7 @@ static void guest_code(int cpu) struct kvm_steal_time *st = st_gva[cpu]; uint32_t version; - GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); + GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED)); memset(st, 0, sizeof(*st)); GUEST_SYNC(0); @@ -122,8 +122,8 @@ static int64_t smccc(uint32_t func, uint64_t arg) static void check_status(struct st_time *st) { - GUEST_ASSERT(READ_ONCE(st->rev) == 0); - GUEST_ASSERT(READ_ONCE(st->attr) == 0); + GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0); + GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0); } static void guest_code(int cpu) @@ -132,15 +132,15 @@ static void guest_code(int cpu) int64_t status; status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_FEATURES, PV_TIME_ST); - GUEST_ASSERT(status == 0); + GUEST_ASSERT_EQ(status, 0); status = smccc(PV_TIME_ST, 0); - GUEST_ASSERT(status != -1); - GUEST_ASSERT(status == (ulong)st_gva[cpu]); + GUEST_ASSERT_NE(status, -1); + GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]); st = (struct st_time *)status; GUEST_SYNC(0); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 2fc3ad9c887e..3b34d8156d1c 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -35,10 +35,10 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) guest_cpuid->entries[i].index, &eax, &ebx, &ecx, &edx); - GUEST_ASSERT(eax == guest_cpuid->entries[i].eax && - ebx == guest_cpuid->entries[i].ebx && - ecx == guest_cpuid->entries[i].ecx && - edx == guest_cpuid->entries[i].edx); + GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax); + GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx); + GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx); + GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx); } } @@ -51,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_SYNC(2); - GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001); + GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001); GUEST_DONE(); } @@ -116,7 +116,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) case UCALL_DONE: return; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); @@ -163,6 +163,25 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu) ent->eax = eax; } +static void test_get_cpuid2(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1); + int i, r; + + vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); + TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent, + "KVM didn't update nent on success, wanted %u, got %u\n", + vcpu->cpuid->nent, cpuid->nent); + + for (i = 0; i < vcpu->cpuid->nent; i++) { + cpuid->nent = i; + r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid); + TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r)); + TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure"); + } + free(cpuid); +} + int main(void) { struct kvm_vcpu *vcpu; @@ -183,5 +202,7 @@ int main(void) set_cpuid_after_run(vcpu); + test_get_cpuid2(vcpu); + kvm_vm_free(vm); } diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c new file mode 100644 index 000000000000..634c6bfcd572 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty logging page splitting test + * + * Based on dirty_log_perf.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2023, Google, Inc. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <linux/bitmap.h> + +#include "kvm_util.h" +#include "test_util.h" +#include "memstress.h" +#include "guest_modes.h" + +#define VCPUS 2 +#define SLOTS 2 +#define ITERATIONS 2 + +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + +static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB; + +static u64 dirty_log_manual_caps; +static bool host_quit; +static int iteration; +static int vcpu_last_completed_iteration[KVM_MAX_VCPUS]; + +struct kvm_page_stats { + uint64_t pages_4k; + uint64_t pages_2m; + uint64_t pages_1g; + uint64_t hugepages; +}; + +static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage) +{ + stats->pages_4k = vm_get_stat(vm, "pages_4k"); + stats->pages_2m = vm_get_stat(vm, "pages_2m"); + stats->pages_1g = vm_get_stat(vm, "pages_1g"); + stats->hugepages = stats->pages_2m + stats->pages_1g; + + pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n", + stage, stats->pages_4k, stats->pages_2m, stats->pages_1g, + stats->hugepages); +} + +static void run_vcpu_iteration(struct kvm_vm *vm) +{ + int i; + + iteration++; + for (i = 0; i < VCPUS; i++) { + while (READ_ONCE(vcpu_last_completed_iteration[i]) != + iteration) + ; + } +} + +static void vcpu_worker(struct memstress_vcpu_args *vcpu_args) +{ + struct kvm_vcpu *vcpu = vcpu_args->vcpu; + int vcpu_idx = vcpu_args->vcpu_idx; + + while (!READ_ONCE(host_quit)) { + int current_iteration = READ_ONCE(iteration); + + vcpu_run(vcpu); + + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + + vcpu_last_completed_iteration[vcpu_idx] = current_iteration; + + /* Wait for the start of the next iteration to be signaled. */ + while (current_iteration == READ_ONCE(iteration) && + READ_ONCE(iteration) >= 0 && + !READ_ONCE(host_quit)) + ; + } +} + +static void run_test(enum vm_guest_mode mode, void *unused) +{ + struct kvm_vm *vm; + unsigned long **bitmaps; + uint64_t guest_num_pages; + uint64_t host_num_pages; + uint64_t pages_per_slot; + int i; + uint64_t total_4k_pages; + struct kvm_page_stats stats_populated; + struct kvm_page_stats stats_dirty_logging_enabled; + struct kvm_page_stats stats_dirty_pass[ITERATIONS]; + struct kvm_page_stats stats_clear_pass[ITERATIONS]; + struct kvm_page_stats stats_dirty_logging_disabled; + struct kvm_page_stats stats_repopulated; + + vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size, + SLOTS, backing_src, false); + + guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + pages_per_slot = host_num_pages / SLOTS; + + bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot); + + if (dirty_log_manual_caps) + vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, + dirty_log_manual_caps); + + /* Start the iterations */ + iteration = -1; + host_quit = false; + + for (i = 0; i < VCPUS; i++) + vcpu_last_completed_iteration[i] = -1; + + memstress_start_vcpu_threads(VCPUS, vcpu_worker); + + run_vcpu_iteration(vm); + get_page_stats(vm, &stats_populated, "populating memory"); + + /* Enable dirty logging */ + memstress_enable_dirty_logging(vm, SLOTS); + + get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging"); + + while (iteration < ITERATIONS) { + run_vcpu_iteration(vm); + get_page_stats(vm, &stats_dirty_pass[iteration - 1], + "dirtying memory"); + + memstress_get_dirty_log(vm, bitmaps, SLOTS); + + if (dirty_log_manual_caps) { + memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot); + + get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log"); + } + } + + /* Disable dirty logging */ + memstress_disable_dirty_logging(vm, SLOTS); + + get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging"); + + /* Run vCPUs again to fault pages back in. */ + run_vcpu_iteration(vm); + get_page_stats(vm, &stats_repopulated, "repopulating memory"); + + /* + * Tell the vCPU threads to quit. No need to manually check that vCPUs + * have stopped running after disabling dirty logging, the join will + * wait for them to exit. + */ + host_quit = true; + memstress_join_vcpu_threads(VCPUS); + + memstress_free_bitmaps(bitmaps, SLOTS); + memstress_destroy_vm(vm); + + /* Make assertions about the page counts. */ + total_4k_pages = stats_populated.pages_4k; + total_4k_pages += stats_populated.pages_2m * 512; + total_4k_pages += stats_populated.pages_1g * 512 * 512; + + /* + * Check that all huge pages were split. Since large pages can only + * exist in the data slot, and the vCPUs should have dirtied all pages + * in the data slot, there should be no huge pages left after splitting. + * Splitting happens at dirty log enable time without + * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass + * with that capability. + */ + if (dirty_log_manual_caps) { + TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0); + TEST_ASSERT_EQ(stats_clear_pass[0].pages_4k, total_4k_pages); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages); + } else { + TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0); + TEST_ASSERT_EQ(stats_dirty_logging_enabled.pages_4k, total_4k_pages); + } + + /* + * Once dirty logging is disabled and the vCPUs have touched all their + * memory again, the page counts should be the same as they were + * right after initial population of memory. + */ + TEST_ASSERT_EQ(stats_populated.pages_4k, stats_repopulated.pages_4k); + TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m); + TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g); +} + +static void help(char *name) +{ + puts(""); + printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n", + name); + puts(""); + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + backing_src_help("-s"); + puts(""); +} + +int main(int argc, char *argv[]) +{ + int opt; + + TEST_REQUIRE(get_kvm_param_bool("eager_page_split")); + TEST_REQUIRE(get_kvm_param_bool("tdp_mmu")); + + while ((opt = getopt(argc, argv, "b:hs:")) != -1) { + switch (opt) { + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'h': + help(argv[0]); + exit(0); + case 's': + backing_src = parse_backing_src_type(optarg); + break; + default: + help(argv[0]); + exit(1); + } + } + + if (!is_backing_src_hugetlb(backing_src)) { + pr_info("This test will only work reliably with HugeTLB memory. " + "It can work with THP, but that is best effort.\n"); + } + + guest_modes_append_default(); + + dirty_log_manual_caps = 0; + for_each_guest_mode(run_test, NULL); + + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + + if (dirty_log_manual_caps) { + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + for_each_guest_mode(run_test, NULL); + } else { + pr_info("Skipping testing with MANUAL_PROTECT as it is not supported"); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c index e334844d6e1d..6c2e5e0ceb1f 100644 --- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c +++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c @@ -35,7 +35,7 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); handle_flds_emulation_failure_exit(vcpu); vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c index 73af44d2167f..e036db1f32b9 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c @@ -8,7 +8,6 @@ * Copyright 2022 Google LLC * Author: Vipin Sharma <vipinsh@google.com> */ - #include "kvm_util.h" #include "processor.h" #include "hyperv.h" @@ -84,7 +83,7 @@ int main(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "arg1 = %ld, arg2 = %ld"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: break; diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c index 78606de9385d..9f28aa276c4e 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c @@ -53,16 +53,21 @@ static void guest_msr(struct msr_data *msr) vector = rdmsr_safe(msr->idx, &msr_val); if (msr->fault_expected) - GUEST_ASSERT_3(vector == GP_VECTOR, msr->idx, vector, GP_VECTOR); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on %sMSR(0x%x), got vector '0x%x'", + msr->idx, msr->write ? "WR" : "RD", vector); else - GUEST_ASSERT_3(!vector, msr->idx, vector, 0); + __GUEST_ASSERT(!vector, + "Expected success on %sMSR(0x%x), got vector '0x%x'", + msr->idx, msr->write ? "WR" : "RD", vector); if (vector || is_write_only_msr(msr->idx)) goto done; if (msr->write) - GUEST_ASSERT_3(msr_val == msr->write_val, msr->idx, - msr_val, msr->write_val); + __GUEST_ASSERT(!vector, + "WRMSR(0x%x) to '0x%llx', RDMSR read '0x%llx'", + msr->idx, msr->write_val, msr_val); /* Invariant TSC bit appears when TSC invariant control MSR is written to */ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) { @@ -82,7 +87,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) u64 res, input, output; uint8_t vector; - GUEST_ASSERT(hcall->control); + GUEST_ASSERT_NE(hcall->control, 0); wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa); @@ -96,10 +101,14 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall) vector = __hyperv_hypercall(hcall->control, input, output, &res); if (hcall->ud_expected) { - GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector); + __GUEST_ASSERT(vector == UD_VECTOR, + "Expected #UD for control '%u', got vector '0x%x'", + hcall->control, vector); } else { - GUEST_ASSERT_2(!vector, hcall->control, vector); - GUEST_ASSERT_2(res == hcall->expect, hcall->expect, res); + __GUEST_ASSERT(!vector, + "Expected no exception for control '%u', got vector '0x%x'", + hcall->control, vector); + GUEST_ASSERT_EQ(res, hcall->expect); } GUEST_DONE(); @@ -495,7 +504,7 @@ static void guest_test_msrs_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "MSR = %lx, arg1 = %lx, arg2 = %lx"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: break; @@ -665,7 +674,7 @@ static void guest_test_hcalls_access(void) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "arg1 = %lx, arg2 = %lx"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: break; diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c index f774a9e62858..9e2879af7c20 100644 --- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -46,10 +46,10 @@ static void test_msr(struct msr_data *msr) PR_MSR(msr); vector = rdmsr_safe(msr->idx, &ignored); - GUEST_ASSERT_1(vector == GP_VECTOR, vector); + GUEST_ASSERT_EQ(vector, GP_VECTOR); vector = wrmsr_safe(msr->idx, 0); - GUEST_ASSERT_1(vector == GP_VECTOR, vector); + GUEST_ASSERT_EQ(vector, GP_VECTOR); } struct hcall_data { @@ -77,7 +77,7 @@ static void test_hcall(struct hcall_data *hc) PR_HCALL(hc); r = kvm_hypercall(hc->nr, 0, 0, 0, 0); - GUEST_ASSERT(r == -KVM_ENOSYS); + GUEST_ASSERT_EQ(r, -KVM_ENOSYS); } static void guest_main(void) @@ -125,7 +125,7 @@ static void enter_guest(struct kvm_vcpu *vcpu) pr_hcall(&uc); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_1(uc, "vector = %lu"); + REPORT_GUEST_ASSERT(uc); return; case UCALL_DONE: return; diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c index 72812644d7f5..80aa3d8b18f8 100644 --- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c @@ -16,14 +16,25 @@ enum monitor_mwait_testcases { MWAIT_DISABLED = BIT(2), }; +/* + * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all + * other scenarios KVM should emulate them as nops. + */ +#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \ +do { \ + bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \ + ((testcase) & MWAIT_DISABLED); \ + \ + if (fault_wanted) \ + __GUEST_ASSERT((vector) == UD_VECTOR, \ + "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", vector); \ + else \ + __GUEST_ASSERT(!(vector), \ + "Expected success on " insn " for testcase '0x%x', got '0x%x'", vector); \ +} while (0) + static void guest_monitor_wait(int testcase) { - /* - * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, - * in all other scenarios KVM should emulate them as nops. - */ - bool fault_wanted = (testcase & MWAIT_QUIRK_DISABLED) && - (testcase & MWAIT_DISABLED); u8 vector; GUEST_SYNC(testcase); @@ -33,16 +44,10 @@ static void guest_monitor_wait(int testcase) * intercept checks, so the inputs for MONITOR and MWAIT must be valid. */ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0)); - if (fault_wanted) - GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); - else - GUEST_ASSERT_2(!vector, testcase, vector); + GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector); vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); - if (fault_wanted) - GUEST_ASSERT_2(vector == UD_VECTOR, testcase, vector); - else - GUEST_ASSERT_2(!vector, testcase, vector); + GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); } static void guest_code(void) @@ -85,7 +90,7 @@ int main(int argc, char *argv[]) testcase = uc.args[1]; break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "testcase = %lx, vector = %ld"); + REPORT_GUEST_ASSERT(uc); goto done; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c index 6502aa23c2f8..3670331adf21 100644 --- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c +++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c @@ -180,9 +180,7 @@ static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector) "Expected L2 to ask for %d, L2 says it's done", vector); break; case UCALL_ABORT: - TEST_FAIL("%s at %s:%ld (0x%lx != 0x%lx)", - (const char *)uc.args[0], __FILE__, uc.args[1], - uc.args[2], uc.args[3]); + REPORT_GUEST_ASSERT(uc); break; default: TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd); @@ -247,12 +245,12 @@ int main(int argc, char *argv[]) /* Verify the pending events comes back out the same as it went in. */ vcpu_events_get(vcpu, &events); - ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, - KVM_VCPUEVENT_VALID_PAYLOAD); - ASSERT_EQ(events.exception.pending, true); - ASSERT_EQ(events.exception.nr, SS_VECTOR); - ASSERT_EQ(events.exception.has_error_code, true); - ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); + TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD, + KVM_VCPUEVENT_VALID_PAYLOAD); + TEST_ASSERT_EQ(events.exception.pending, true); + TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR); + TEST_ASSERT_EQ(events.exception.has_error_code, true); + TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE); /* * Run for real with the pending #SS, L1 should get a VM-Exit due to diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c index 251794f83719..7f36c32fa760 100644 --- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c +++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c @@ -226,7 +226,7 @@ static void help(char *name) puts(""); printf("usage: %s [-h] [-p period_ms] [-t token]\n", name); puts(""); - printf(" -p: The NX reclaim period in miliseconds.\n"); + printf(" -p: The NX reclaim period in milliseconds.\n"); printf(" -t: The magic token to indicate environment setup is done.\n"); printf(" -r: The test has reboot permissions and can disable NX huge pages.\n"); puts(""); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 40507ed9fe8a..283cc55597a4 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -27,6 +27,15 @@ #define ARCH_PERFMON_BRANCHES_RETIRED 5 #define NUM_BRANCHES 42 +#define INTEL_PMC_IDX_FIXED 32 + +/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */ +#define MAX_FILTER_EVENTS 300 +#define MAX_TEST_EVENTS 10 + +#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1) +#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1) +#define PMU_EVENT_FILTER_INVALID_NEVENTS (MAX_FILTER_EVENTS + 1) /* * This is how the event selector and unit mask are stored in an AMD @@ -69,21 +78,33 @@ #define INST_RETIRED EVENT(0xc0, 0) +struct __kvm_pmu_event_filter { + __u32 action; + __u32 nevents; + __u32 fixed_counter_bitmap; + __u32 flags; + __u32 pad[4]; + __u64 events[MAX_FILTER_EVENTS]; +}; + /* * This event list comprises Intel's eight architectural events plus * AMD's "retired branch instructions" for Zen[123] (and possibly * other AMD CPUs). */ -static const uint64_t event_list[] = { - EVENT(0x3c, 0), - INST_RETIRED, - EVENT(0x3c, 1), - EVENT(0x2e, 0x4f), - EVENT(0x2e, 0x41), - EVENT(0xc4, 0), - EVENT(0xc5, 0), - EVENT(0xa4, 1), - AMD_ZEN_BR_RETIRED, +static const struct __kvm_pmu_event_filter base_event_filter = { + .nevents = ARRAY_SIZE(base_event_filter.events), + .events = { + EVENT(0x3c, 0), + INST_RETIRED, + EVENT(0x3c, 1), + EVENT(0x2e, 0x4f), + EVENT(0x2e, 0x41), + EVENT(0xc4, 0), + EVENT(0xc5, 0), + EVENT(0xa4, 1), + AMD_ZEN_BR_RETIRED, + }, }; struct { @@ -225,48 +246,11 @@ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) return !r; } -static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) -{ - struct kvm_pmu_event_filter *f; - int size = sizeof(*f) + nevents * sizeof(f->events[0]); - - f = malloc(size); - TEST_ASSERT(f, "Out of memory"); - memset(f, 0, size); - f->nevents = nevents; - return f; -} - - -static struct kvm_pmu_event_filter * -create_pmu_event_filter(const uint64_t event_list[], int nevents, - uint32_t action, uint32_t flags) -{ - struct kvm_pmu_event_filter *f; - int i; - - f = alloc_pmu_event_filter(nevents); - f->action = action; - f->flags = flags; - for (i = 0; i < nevents; i++) - f->events[i] = event_list[i]; - - return f; -} - -static struct kvm_pmu_event_filter *event_filter(uint32_t action) -{ - return create_pmu_event_filter(event_list, - ARRAY_SIZE(event_list), - action, 0); -} - /* * Remove the first occurrence of 'event' (if any) from the filter's * event list. */ -static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, - uint64_t event) +static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event) { bool found = false; int i; @@ -279,7 +263,6 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, } if (found) f->nevents--; - return f; } #define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ @@ -315,66 +298,73 @@ static void test_without_filter(struct kvm_vcpu *vcpu) } static void test_with_filter(struct kvm_vcpu *vcpu, - struct kvm_pmu_event_filter *f) + struct __kvm_pmu_event_filter *__f) { + struct kvm_pmu_event_filter *f = (void *)__f; + vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); run_vcpu_and_sync_pmc_results(vcpu); } static void test_amd_deny_list(struct kvm_vcpu *vcpu) { - uint64_t event = EVENT(0x1C2, 0); - struct kvm_pmu_event_filter *f; + struct __kvm_pmu_event_filter f = { + .action = KVM_PMU_EVENT_DENY, + .nevents = 1, + .events = { + EVENT(0x1C2, 0), + }, + }; - f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0); - test_with_filter(vcpu, f); - free(f); + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_member_deny_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + struct __kvm_pmu_event_filter f = base_event_filter; - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_DENY; + test_with_filter(vcpu, &f); ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } static void test_member_allow_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + struct __kvm_pmu_event_filter f = base_event_filter; - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_ALLOW; + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_deny_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); + struct __kvm_pmu_event_filter f = base_event_filter; - remove_event(f, INST_RETIRED); - remove_event(f, INTEL_BR_RETIRED); - remove_event(f, AMD_ZEN_BR_RETIRED); - test_with_filter(vcpu, f); - free(f); + f.action = KVM_PMU_EVENT_DENY; + + remove_event(&f, INST_RETIRED); + remove_event(&f, INTEL_BR_RETIRED); + remove_event(&f, AMD_ZEN_BR_RETIRED); + test_with_filter(vcpu, &f); ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_allow_list(struct kvm_vcpu *vcpu) { - struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = KVM_PMU_EVENT_ALLOW; - remove_event(f, INST_RETIRED); - remove_event(f, INTEL_BR_RETIRED); - remove_event(f, AMD_ZEN_BR_RETIRED); - test_with_filter(vcpu, f); - free(f); + remove_event(&f, INST_RETIRED); + remove_event(&f, INTEL_BR_RETIRED); + remove_event(&f, AMD_ZEN_BR_RETIRED); + test_with_filter(vcpu, &f); ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } @@ -569,19 +559,16 @@ static void run_masked_events_test(struct kvm_vcpu *vcpu, const uint64_t masked_events[], const int nmasked_events) { - struct kvm_pmu_event_filter *f; + struct __kvm_pmu_event_filter f = { + .nevents = nmasked_events, + .action = KVM_PMU_EVENT_ALLOW, + .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + }; - f = create_pmu_event_filter(masked_events, nmasked_events, - KVM_PMU_EVENT_ALLOW, - KVM_PMU_EVENT_FLAG_MASKED_EVENTS); - test_with_filter(vcpu, f); - free(f); + memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events); + test_with_filter(vcpu, &f); } -/* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */ -#define MAX_FILTER_EVENTS 300 -#define MAX_TEST_EVENTS 10 - #define ALLOW_LOADS BIT(0) #define ALLOW_STORES BIT(1) #define ALLOW_LOADS_STORES BIT(2) @@ -753,21 +740,33 @@ static void test_masked_events(struct kvm_vcpu *vcpu) run_masked_events_tests(vcpu, events, nevents); } -static int run_filter_test(struct kvm_vcpu *vcpu, const uint64_t *events, - int nevents, uint32_t flags) +static int set_pmu_event_filter(struct kvm_vcpu *vcpu, + struct __kvm_pmu_event_filter *__f) { - struct kvm_pmu_event_filter *f; - int r; + struct kvm_pmu_event_filter *f = (void *)__f; - f = create_pmu_event_filter(events, nevents, KVM_PMU_EVENT_ALLOW, flags); - r = __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); - free(f); + return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); +} - return r; +static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event, + uint32_t flags, uint32_t action) +{ + struct __kvm_pmu_event_filter f = { + .nevents = 1, + .flags = flags, + .action = action, + .events = { + event, + }, + }; + + return set_pmu_event_filter(vcpu, &f); } static void test_filter_ioctl(struct kvm_vcpu *vcpu) { + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct __kvm_pmu_event_filter f; uint64_t e = ~0ul; int r; @@ -775,15 +774,144 @@ static void test_filter_ioctl(struct kvm_vcpu *vcpu) * Unfortunately having invalid bits set in event data is expected to * pass when flags == 0 (bits other than eventsel+umask). */ - r = run_filter_test(vcpu, &e, 1, 0); + r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); - r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail"); e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf); - r = run_filter_test(vcpu, &e, 1, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); + r = set_pmu_single_event_filter(vcpu, e, + KVM_PMU_EVENT_FLAG_MASKED_EVENTS, + KVM_PMU_EVENT_ALLOW); TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing"); + + f = base_event_filter; + f.action = PMU_EVENT_FILTER_INVALID_ACTION; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid action is expected to fail"); + + f = base_event_filter; + f.flags = PMU_EVENT_FILTER_INVALID_FLAGS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Set invalid flags is expected to fail"); + + f = base_event_filter; + f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS; + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(r, "Exceeding the max number of filter events should fail"); + + f = base_event_filter; + f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0); + r = set_pmu_event_filter(vcpu, &f); + TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed"); +} + +static void intel_run_fixed_counter_guest_code(uint8_t fixed_ctr_idx) +{ + for (;;) { + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + wrmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx, 0); + + /* Only OS_EN bit is enabled for fixed counter[idx]. */ + wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, BIT_ULL(4 * fixed_ctr_idx)); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, + BIT_ULL(INTEL_PMC_IDX_FIXED + fixed_ctr_idx)); + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + fixed_ctr_idx)); + } +} + +static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu, + uint32_t action, uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = { + .action = action, + .fixed_counter_bitmap = bitmap, + }; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu, + uint32_t action, + uint32_t bitmap) +{ + struct __kvm_pmu_event_filter f = base_event_filter; + + f.action = action; + f.fixed_counter_bitmap = bitmap; + set_pmu_event_filter(vcpu, &f); + + return run_vcpu_to_sync(vcpu); +} + +static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx, + uint8_t nr_fixed_counters) +{ + unsigned int i; + uint32_t bitmap; + uint64_t count; + + TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8, + "Invalid nr_fixed_counters"); + + /* + * Check the fixed performance counter can count normally when KVM + * userspace doesn't set any pmu filter. + */ + count = run_vcpu_to_sync(vcpu); + TEST_ASSERT(count, "Unexpected count value: %ld\n", count); + + for (i = 0; i < BIT(nr_fixed_counters); i++) { + bitmap = BIT(i); + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + + /* + * Check that fixed_counter_bitmap has higher priority than + * events[] when both are set. + */ + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_ALLOW, + bitmap); + TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx))); + + count = test_set_gp_and_fixed_event_filter(vcpu, + KVM_PMU_EVENT_DENY, + bitmap); + TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx))); + } +} + +static void test_fixed_counter_bitmap(void) +{ + uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS); + struct kvm_vm *vm; + struct kvm_vcpu *vcpu; + uint8_t idx; + + /* + * Check that pmu_event_filter works as expected when it's applied to + * fixed performance counters. + */ + for (idx = 0; idx < nr_fixed_counters; idx++) { + vm = vm_create_with_one_vcpu(&vcpu, + intel_run_fixed_counter_guest_code); + vcpu_args_set(vcpu, 1, idx); + __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters); + kvm_vm_free(vm); + } } int main(int argc, char *argv[]) @@ -829,6 +957,7 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_pmu_config_disable(guest_code); + test_fixed_counter_bitmap(); return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c index 4c416ebe7d66..cbc92a862ea9 100644 --- a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c +++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c @@ -57,7 +57,7 @@ int main(void) for (i = 0; i < KVM_MAX_VCPUS; i++) vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC); - ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); + TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0); vcpuN = vcpus[KVM_MAX_VCPUS - 1]; for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { @@ -65,8 +65,8 @@ int main(void) vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED); } - ASSERT_EQ(pthread_cancel(thread), 0); - ASSERT_EQ(pthread_join(thread, NULL), 0); + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c index b25d7556b638..366cf18600bc 100644 --- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c +++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c @@ -20,7 +20,7 @@ static void guest_bsp_vcpu(void *arg) { GUEST_SYNC(1); - GUEST_ASSERT(get_bsp_flag() != 0); + GUEST_ASSERT_NE(get_bsp_flag(), 0); GUEST_DONE(); } @@ -29,7 +29,7 @@ static void guest_not_bsp_vcpu(void *arg) { GUEST_SYNC(1); - GUEST_ASSERT(get_bsp_flag() == 0); + GUEST_ASSERT_EQ(get_bsp_flag(), 0); GUEST_DONE(); } @@ -65,7 +65,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu) stage); break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c index a284fcef6ed7..3610981d9162 100644 --- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c +++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c @@ -22,26 +22,25 @@ #include "kvm_util.h" #include "processor.h" -static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig, - uint64_t feature_bit) -{ - struct kvm_sregs sregs; - int rc; - - /* Skip the sub-test, the feature is supported. */ - if (orig->cr4 & feature_bit) - return; - - memcpy(&sregs, orig, sizeof(sregs)); - sregs.cr4 |= feature_bit; - - rc = _vcpu_sregs_set(vcpu, &sregs); - TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit); - - /* Sanity check that KVM didn't change anything. */ - vcpu_sregs_get(vcpu, &sregs); - TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs"); -} +#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \ +do { \ + struct kvm_sregs new; \ + int rc; \ + \ + /* Skip the sub-test, the feature/bit is supported. */ \ + if (orig.cr & bit) \ + break; \ + \ + memcpy(&new, &orig, sizeof(sregs)); \ + new.cr |= bit; \ + \ + rc = _vcpu_sregs_set(vcpu, &new); \ + TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \ + \ + /* Sanity check that KVM didn't change anything. */ \ + vcpu_sregs_get(vcpu, &new); \ + TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \ +} while (0) static uint64_t calc_supported_cr4_feature_bits(void) { @@ -80,7 +79,7 @@ int main(int argc, char *argv[]) struct kvm_vcpu *vcpu; struct kvm_vm *vm; uint64_t cr4; - int rc; + int rc, i; /* * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and @@ -92,6 +91,7 @@ int main(int argc, char *argv[]) vcpu_sregs_get(vcpu, &sregs); + sregs.cr0 = 0; sregs.cr4 |= calc_supported_cr4_feature_bits(); cr4 = sregs.cr4; @@ -103,16 +103,24 @@ int main(int argc, char *argv[]) sregs.cr4, cr4); /* Verify all unsupported features are rejected by KVM. */ - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP); - test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP); + TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE); + + for (i = 32; i < 64; i++) + TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i)); + + /* NW without CD is illegal, as is PG without PE. */ + TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW); + TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG); + kvm_vm_free(vm); /* Create a "real" VM and verify APIC_BASE can be set. */ diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c index 4e2479716da6..7ee44496cf97 100644 --- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c +++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c @@ -8,7 +8,6 @@ * Copyright (C) 2021, Red Hat, Inc. * */ - #include <stdatomic.h> #include <stdio.h> #include <unistd.h> @@ -34,13 +33,12 @@ static void l2_guest_code_int(void); static void guest_int_handler(struct ex_regs *regs) { int_fired++; - GUEST_ASSERT_2(regs->rip == (unsigned long)l2_guest_code_int, - regs->rip, (unsigned long)l2_guest_code_int); + GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int); } static void l2_guest_code_int(void) { - GUEST_ASSERT_1(int_fired == 1, int_fired); + GUEST_ASSERT_EQ(int_fired, 1); /* * Same as the vmmcall() function, but with a ud2 sneaked after the @@ -53,7 +51,7 @@ static void l2_guest_code_int(void) : "rbx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"); - GUEST_ASSERT_1(bp_fired == 1, bp_fired); + GUEST_ASSERT_EQ(bp_fired, 1); hlt(); } @@ -66,9 +64,9 @@ static void guest_nmi_handler(struct ex_regs *regs) if (nmi_stage_get() == 1) { vmmcall(); - GUEST_ASSERT(false); + GUEST_FAIL("Unexpected resume after VMMCALL"); } else { - GUEST_ASSERT_1(nmi_stage_get() == 3, nmi_stage_get()); + GUEST_ASSERT_EQ(nmi_stage_get(), 3); GUEST_DONE(); } } @@ -104,7 +102,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i } run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL, + "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -112,7 +111,7 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i clgi(); x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI); - GUEST_ASSERT_1(nmi_stage_get() == 1, nmi_stage_get()); + GUEST_ASSERT_EQ(nmi_stage_get(), 1); nmi_stage_inc(); stgi(); @@ -133,7 +132,8 @@ static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t i vmcb->control.next_rip = vmcb->save.rip + 2; run_guest(vmcb, svm->vmcb_gpa); - GUEST_ASSERT_3(vmcb->control.exit_code == SVM_EXIT_HLT, + __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT, + "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%llx, info2 = '0x%llx'", vmcb->control.exit_code, vmcb->control.exit_info_1, vmcb->control.exit_info_2); @@ -185,7 +185,7 @@ static void run_test(bool is_nmi) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "vals = 0x%lx 0x%lx 0x%lx"); + REPORT_GUEST_ASSERT(uc); break; /* NOT REACHED */ case UCALL_DONE: diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c index 2da89fdc2471..00965ba33f73 100644 --- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c +++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> +#include <pthread.h> #include "test_util.h" #include "kvm_util.h" @@ -80,6 +81,133 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, #define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) #define INVALID_SYNC_FIELD 0x80000000 +/* + * Set an exception as pending *and* injected while KVM is processing events. + * KVM is supposed to ignore/drop pending exceptions if userspace is also + * requesting that an exception be injected. + */ +static void *race_events_inj_pen(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + WRITE_ONCE(events->exception.nr, UD_VECTOR); + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.injected, 1); + WRITE_ONCE(events->exception.pending, 1); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Set an invalid exception vector while KVM is processing events. KVM is + * supposed to reject any vector >= 32, as well as NMIs (vector 2). + */ +static void *race_events_exc(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + struct kvm_vcpu_events *events = &run->s.regs.events; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS); + WRITE_ONCE(events->flags, 0); + WRITE_ONCE(events->exception.nr, UD_VECTOR); + WRITE_ONCE(events->exception.pending, 1); + WRITE_ONCE(events->exception.nr, 255); + + pthread_testcancel(); + } + + return NULL; +} + +/* + * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is + * illegal, and KVM's MMU heavily relies on vCPU state being valid. + */ +static noinline void *race_sregs_cr4(void *arg) +{ + struct kvm_run *run = (struct kvm_run *)arg; + __u64 *cr4 = &run->s.regs.sregs.cr4; + __u64 pae_enabled = *cr4; + __u64 pae_disabled = *cr4 & ~X86_CR4_PAE; + + for (;;) { + WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS); + WRITE_ONCE(*cr4, pae_enabled); + asm volatile(".rept 512\n\t" + "nop\n\t" + ".endr"); + WRITE_ONCE(*cr4, pae_disabled); + + pthread_testcancel(); + } + + return NULL; +} + +static void race_sync_regs(void *racer) +{ + const time_t TIMEOUT = 2; /* seconds, roughly */ + struct kvm_x86_state *state; + struct kvm_translation tr; + struct kvm_vcpu *vcpu; + struct kvm_run *run; + struct kvm_vm *vm; + pthread_t thread; + time_t t; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + run = vcpu->run; + + run->kvm_valid_regs = KVM_SYNC_X86_SREGS; + vcpu_run(vcpu); + run->kvm_valid_regs = 0; + + /* Save state *before* spawning the thread that mucks with vCPU state. */ + state = vcpu_save_state(vcpu); + + /* + * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE + * should already be set in guest state. + */ + TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) && + (run->s.regs.sregs.efer & EFER_LME), + "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d", + !!(run->s.regs.sregs.cr4 & X86_CR4_PAE), + !!(run->s.regs.sregs.efer & EFER_LME)); + + TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0); + + for (t = time(NULL) + TIMEOUT; time(NULL) < t;) { + /* + * Reload known good state if the vCPU triple faults, e.g. due + * to the unhandled #GPs being injected. VMX preserves state + * on shutdown, but SVM synthesizes an INIT as the VMCB state + * is architecturally undefined on triple fault. + */ + if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN) + vcpu_load_state(vcpu, state); + + if (racer == race_sregs_cr4) { + tr = (struct kvm_translation) { .linear_address = 0 }; + __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr); + } + } + + TEST_ASSERT_EQ(pthread_cancel(thread), 0); + TEST_ASSERT_EQ(pthread_join(thread, NULL), 0); + + kvm_x86_state_cleanup(state); + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -218,5 +346,9 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); + race_sync_regs(race_sregs_cr4); + race_sync_regs(race_events_exc); + race_sync_regs(race_events_inj_pen); + return 0; } diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c index c9f67702f657..12b0964f4f13 100644 --- a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c +++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c @@ -84,7 +84,7 @@ static void run_vcpu(struct kvm_vcpu *vcpu, int stage) ksft_test_result_pass("stage %d passed\n", stage + 1); return; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_ASSERT(false, "Unexpected exit: %s", exit_reason_str(vcpu->run->exit_reason)); @@ -103,39 +103,39 @@ int main(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code); val = 0; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC affect both MSRs. */ run_vcpu(vcpu, 1); val = 1ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */ run_vcpu(vcpu, 2); val = 2ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Host: writes to MSR_IA32_TSC set the host-side offset * and therefore do not change MSR_IA32_TSC_ADJUST. */ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); run_vcpu(vcpu, 3); /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456); /* Restore previous value. */ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the @@ -143,8 +143,8 @@ int main(void) */ run_vcpu(vcpu, 4); val = 3ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val); /* * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side @@ -152,8 +152,8 @@ int main(void) */ run_vcpu(vcpu, 5); val = 4ull * GUEST_STEP; - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); - ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val); + TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST); kvm_vm_free(vm); diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c index 0cb51fa42773..255c50b0dc32 100644 --- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c +++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c @@ -20,8 +20,8 @@ static void guest_ins_port80(uint8_t *buffer, unsigned int count) end = (unsigned long)buffer + 8192; asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory"); - GUEST_ASSERT_1(count == 0, count); - GUEST_ASSERT_2((unsigned long)buffer == end, buffer, end); + GUEST_ASSERT_EQ(count, 0); + GUEST_ASSERT_EQ((unsigned long)buffer, end); } static void guest_code(void) @@ -43,7 +43,9 @@ static void guest_code(void) memset(buffer, 0, sizeof(buffer)); guest_ins_port80(buffer, 8192); for (i = 0; i < 8192; i++) - GUEST_ASSERT_2(buffer[i] == 0xaa, i, buffer[i]); + __GUEST_ASSERT(buffer[i] == 0xaa, + "Expected '0xaa', got '0x%x' at buffer[%u]", + buffer[i], i); GUEST_DONE(); } @@ -91,7 +93,7 @@ int main(int argc, char *argv[]) case UCALL_DONE: break; case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "argN+1 = 0x%lx, argN+2 = 0x%lx"); + REPORT_GUEST_ASSERT(uc); default: TEST_FAIL("Unknown ucall %lu", uc.cmd); } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c index be0bdb8c6f78..a9b827c69f32 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c @@ -50,7 +50,7 @@ static void set_timer(void) timer.it_value.tv_sec = 0; timer.it_value.tv_usec = 200; timer.it_interval = timer.it_value; - ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); + TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0); } static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set) diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c index fa03c8d1ce4e..e710b6e7fb38 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c @@ -116,29 +116,21 @@ static void l1_guest_code(struct vmx_pages *vmx_pages) GUEST_DONE(); } -static void stable_tsc_check_supported(void) +static bool system_has_stable_tsc(void) { + bool tsc_is_stable; FILE *fp; char buf[4]; fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); if (fp == NULL) - goto skip_test; + return false; - if (fgets(buf, sizeof(buf), fp) == NULL) - goto close_fp; + tsc_is_stable = fgets(buf, sizeof(buf), fp) && + !strncmp(buf, "tsc", sizeof(buf)); - if (strncmp(buf, "tsc", sizeof(buf))) - goto close_fp; - - fclose(fp); - return; - -close_fp: fclose(fp); -skip_test: - print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable"); - exit(KSFT_SKIP); + return tsc_is_stable; } int main(int argc, char *argv[]) @@ -156,7 +148,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL)); - stable_tsc_check_supported(); + TEST_REQUIRE(system_has_stable_tsc()); /* * We set L1's scale factor to be a random number from 2 to 10. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 4c90f76930f9..ebbcb0a3f743 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -10,7 +10,6 @@ * and check it can be retrieved with KVM_GET_MSR, also test * the invalid LBR formats are rejected. */ - #define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> @@ -52,23 +51,24 @@ static const union perf_capabilities format_caps = { .pebs_format = -1, }; +static void guest_test_perf_capabilities_gp(uint64_t val) +{ + uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val); + + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP for value '0x%llx', got vector '0x%x'", + val, vector); +} + static void guest_code(uint64_t current_val) { - uint8_t vector; int i; - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val); - GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector); - - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0); - GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector); + guest_test_perf_capabilities_gp(current_val); + guest_test_perf_capabilities_gp(0); - for (i = 0; i < 64; i++) { - vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, - current_val ^ BIT_ULL(i)); - GUEST_ASSERT_2(vector == GP_VECTOR, - current_val ^ BIT_ULL(i), vector); - } + for (i = 0; i < 64; i++) + guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i)); GUEST_DONE(); } @@ -95,7 +95,7 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: break; @@ -103,7 +103,8 @@ static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) TEST_FAIL("Unexpected ucall: %lu", uc.cmd); } - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities); + TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), + host_cap.capabilities); vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 396c13f42457..ab75b873a4ad 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -65,17 +65,17 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic); vcpu_run(vcpu); - ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); - ASSERT_EQ(uc.args[1], val); + TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC); + TEST_ASSERT_EQ(uc.args[1], val); vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic); icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) { val &= (-1u | (0xffull << (32 + 24))); - ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); } else { - ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); } } diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 905bd5ae4431..77d04a7bdadd 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -4,7 +4,6 @@ * * Copyright (C) 2022, Google LLC. */ - #include <fcntl.h> #include <stdio.h> #include <stdlib.h> @@ -20,13 +19,14 @@ * Assert that architectural dependency rules are satisfied, e.g. that AVX is * supported if and only if SSE is supported. */ -#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ -do { \ - uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ - \ - GUEST_ASSERT_3((__supported & (xfeatures)) != (xfeatures) || \ - __supported == ((xfeatures) | (dependencies)), \ - __supported, (xfeatures), (dependencies)); \ +#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \ +do { \ + uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \ + \ + __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \ + __supported == ((xfeatures) | (dependencies)), \ + "supported = 0x%llx, xfeatures = 0x%llx, dependencies = 0x%llx", \ + __supported, (xfeatures), (dependencies)); \ } while (0) /* @@ -41,7 +41,8 @@ do { \ do { \ uint64_t __supported = (supported_xcr0) & (xfeatures); \ \ - GUEST_ASSERT_2(!__supported || __supported == (xfeatures), \ + __GUEST_ASSERT(!__supported || __supported == (xfeatures), \ + "supported = 0x%llx, xfeatures = 0x%llx", \ __supported, (xfeatures)); \ } while (0) @@ -79,14 +80,18 @@ static void guest_code(void) XFEATURE_MASK_XTILE); vector = xsetbv_safe(0, supported_xcr0); - GUEST_ASSERT_2(!vector, supported_xcr0, vector); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(0x%llx), got vector '0x%x'", + supported_xcr0, vector); for (i = 0; i < 64; i++) { if (supported_xcr0 & BIT_ULL(i)) continue; vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i)); - GUEST_ASSERT_3(vector == GP_VECTOR, supported_xcr0, vector, BIT_ULL(i)); + __GUEST_ASSERT(vector == GP_VECTOR, + "Expected #GP on XSETBV(0x%llx), supported XCR0 = %llx, got vector '0x%x'", + BIT_ULL(i), supported_xcr0, vector); } GUEST_DONE(); @@ -117,7 +122,7 @@ int main(int argc, char *argv[]) switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: - REPORT_GUEST_ASSERT_3(uc, "0x%lx 0x%lx 0x%lx"); + REPORT_GUEST_ASSERT(uc); break; case UCALL_DONE: goto done; diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index c94cde3b523f..e149d0574961 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -108,16 +108,16 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); if (run->exit_reason == KVM_EXIT_XEN) { - ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); - ASSERT_EQ(run->xen.u.hcall.cpl, 0); - ASSERT_EQ(run->xen.u.hcall.longmode, 1); - ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); - ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); - ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); - ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); - ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); - ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); - ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); + TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL); + TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0); + TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1); + TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE); + TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5)); + TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6)); run->xen.u.hcall.result = RETVALUE; continue; } diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config index 0f0a65287bac..3dc9e438eab1 100644 --- a/tools/testing/selftests/landlock/config +++ b/tools/testing/selftests/landlock/config @@ -1,7 +1,10 @@ +CONFIG_CGROUPS=y +CONFIG_CGROUP_SCHED=y CONFIG_OVERLAY_FS=y -CONFIG_SECURITY_LANDLOCK=y -CONFIG_SECURITY_PATH=y +CONFIG_PROC_FS=y CONFIG_SECURITY=y +CONFIG_SECURITY_LANDLOCK=y CONFIG_SHMEM=y -CONFIG_TMPFS_XATTR=y +CONFIG_SYSFS=y CONFIG_TMPFS=y +CONFIG_TMPFS_XATTR=y diff --git a/tools/testing/selftests/landlock/config.um b/tools/testing/selftests/landlock/config.um new file mode 100644 index 000000000000..40937c0395d6 --- /dev/null +++ b/tools/testing/selftests/landlock/config.um @@ -0,0 +1 @@ +CONFIG_HOSTFS=y diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index b6c4be3faf7a..251594306d40 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -10,6 +10,7 @@ #define _GNU_SOURCE #include <fcntl.h> #include <linux/landlock.h> +#include <linux/magic.h> #include <sched.h> #include <stdio.h> #include <string.h> @@ -19,6 +20,7 @@ #include <sys/sendfile.h> #include <sys/stat.h> #include <sys/sysmacros.h> +#include <sys/vfs.h> #include <unistd.h> #include "common.h" @@ -107,9 +109,11 @@ static bool fgrep(FILE *const inf, const char *const str) return false; } -static bool supports_overlayfs(void) +static bool supports_filesystem(const char *const filesystem) { - bool res; + char str[32]; + int len; + bool res = true; FILE *const inf = fopen("/proc/filesystems", "r"); /* @@ -119,11 +123,35 @@ static bool supports_overlayfs(void) if (!inf) return true; - res = fgrep(inf, "nodev\toverlay\n"); + /* filesystem can be null for bind mounts. */ + if (!filesystem) + goto out; + + len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem); + if (len >= sizeof(str)) + /* Ignores too-long filesystem names. */ + goto out; + + res = fgrep(inf, str); + +out: fclose(inf); return res; } +static bool cwd_matches_fs(unsigned int fs_magic) +{ + struct statfs statfs_buf; + + if (!fs_magic) + return true; + + if (statfs(".", &statfs_buf)) + return true; + + return statfs_buf.f_type == fs_magic; +} + static void mkdir_parents(struct __test_metadata *const _metadata, const char *const path) { @@ -206,7 +234,26 @@ out: return err; } -static void prepare_layout(struct __test_metadata *const _metadata) +struct mnt_opt { + const char *const source; + const char *const type; + const unsigned long flags; + const char *const data; +}; + +const struct mnt_opt mnt_tmp = { + .type = "tmpfs", + .data = "size=4m,mode=700", +}; + +static int mount_opt(const struct mnt_opt *const mnt, const char *const target) +{ + return mount(mnt->source ?: mnt->type, target, mnt->type, mnt->flags, + mnt->data); +} + +static void prepare_layout_opt(struct __test_metadata *const _metadata, + const struct mnt_opt *const mnt) { disable_caps(_metadata); umask(0077); @@ -217,12 +264,28 @@ static void prepare_layout(struct __test_metadata *const _metadata) * for tests relying on pivot_root(2) and move_mount(2). */ set_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(0, unshare(CLONE_NEWNS)); - ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700")); + ASSERT_EQ(0, unshare(CLONE_NEWNS | CLONE_NEWCGROUP)); + ASSERT_EQ(0, mount_opt(mnt, TMP_DIR)) + { + TH_LOG("Failed to mount the %s filesystem: %s", mnt->type, + strerror(errno)); + /* + * FIXTURE_TEARDOWN() is not called when FIXTURE_SETUP() + * failed, so we need to explicitly do a minimal cleanup to + * avoid cascading errors with other tests that don't depend on + * the same filesystem. + */ + remove_path(TMP_DIR); + } ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL)); clear_cap(_metadata, CAP_SYS_ADMIN); } +static void prepare_layout(struct __test_metadata *const _metadata) +{ + prepare_layout_opt(_metadata, &mnt_tmp); +} + static void cleanup_layout(struct __test_metadata *const _metadata) { set_cap(_metadata, CAP_SYS_ADMIN); @@ -231,6 +294,20 @@ static void cleanup_layout(struct __test_metadata *const _metadata) EXPECT_EQ(0, remove_path(TMP_DIR)); } +/* clang-format off */ +FIXTURE(layout0) {}; +/* clang-format on */ + +FIXTURE_SETUP(layout0) +{ + prepare_layout(_metadata); +} + +FIXTURE_TEARDOWN(layout0) +{ + cleanup_layout(_metadata); +} + static void create_layout1(struct __test_metadata *const _metadata) { create_file(_metadata, file1_s1d1); @@ -248,7 +325,7 @@ static void create_layout1(struct __test_metadata *const _metadata) create_file(_metadata, file1_s3d1); create_directory(_metadata, dir_s3d2); set_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700")); + ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2)); clear_cap(_metadata, CAP_SYS_ADMIN); ASSERT_EQ(0, mkdir(dir_s3d3, 0700)); @@ -262,11 +339,13 @@ static void remove_layout1(struct __test_metadata *const _metadata) EXPECT_EQ(0, remove_path(file1_s1d3)); EXPECT_EQ(0, remove_path(file1_s1d2)); EXPECT_EQ(0, remove_path(file1_s1d1)); + EXPECT_EQ(0, remove_path(dir_s1d3)); EXPECT_EQ(0, remove_path(file2_s2d3)); EXPECT_EQ(0, remove_path(file1_s2d3)); EXPECT_EQ(0, remove_path(file1_s2d2)); EXPECT_EQ(0, remove_path(file1_s2d1)); + EXPECT_EQ(0, remove_path(dir_s2d2)); EXPECT_EQ(0, remove_path(file1_s3d1)); EXPECT_EQ(0, remove_path(dir_s3d3)); @@ -510,7 +589,7 @@ TEST_F_FORK(layout1, file_and_dir_access_rights) ASSERT_EQ(0, close(ruleset_fd)); } -TEST_F_FORK(layout1, unknown_access_rights) +TEST_F_FORK(layout0, unknown_access_rights) { __u64 access_mask; @@ -608,7 +687,7 @@ static void enforce_ruleset(struct __test_metadata *const _metadata, } } -TEST_F_FORK(layout1, proc_nsfs) +TEST_F_FORK(layout0, proc_nsfs) { const struct rule rules[] = { { @@ -657,11 +736,11 @@ TEST_F_FORK(layout1, proc_nsfs) ASSERT_EQ(0, close(path_beneath.parent_fd)); } -TEST_F_FORK(layout1, unpriv) +TEST_F_FORK(layout0, unpriv) { const struct rule rules[] = { { - .path = dir_s1d2, + .path = TMP_DIR, .access = ACCESS_RO, }, {}, @@ -1301,12 +1380,12 @@ TEST_F_FORK(layout1, inherit_superset) ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); } -TEST_F_FORK(layout1, max_layers) +TEST_F_FORK(layout0, max_layers) { int i, err; const struct rule rules[] = { { - .path = dir_s1d2, + .path = TMP_DIR, .access = ACCESS_RO, }, {}, @@ -4030,21 +4109,24 @@ static const char (*merge_sub_files[])[] = { * └── work */ -/* clang-format off */ -FIXTURE(layout2_overlay) {}; -/* clang-format on */ +FIXTURE(layout2_overlay) +{ + bool skip_test; +}; FIXTURE_SETUP(layout2_overlay) { - if (!supports_overlayfs()) - SKIP(return, "overlayfs is not supported"); + if (!supports_filesystem("overlay")) { + self->skip_test = true; + SKIP(return, "overlayfs is not supported (setup)"); + } prepare_layout(_metadata); create_directory(_metadata, LOWER_BASE); set_cap(_metadata, CAP_SYS_ADMIN); /* Creates tmpfs mount points to get deterministic overlayfs. */ - ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700")); + ASSERT_EQ(0, mount_opt(&mnt_tmp, LOWER_BASE)); clear_cap(_metadata, CAP_SYS_ADMIN); create_file(_metadata, lower_fl1); create_file(_metadata, lower_dl1_fl2); @@ -4054,7 +4136,7 @@ FIXTURE_SETUP(layout2_overlay) create_directory(_metadata, UPPER_BASE); set_cap(_metadata, CAP_SYS_ADMIN); - ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700")); + ASSERT_EQ(0, mount_opt(&mnt_tmp, UPPER_BASE)); clear_cap(_metadata, CAP_SYS_ADMIN); create_file(_metadata, upper_fu1); create_file(_metadata, upper_du1_fu2); @@ -4075,8 +4157,8 @@ FIXTURE_SETUP(layout2_overlay) FIXTURE_TEARDOWN(layout2_overlay) { - if (!supports_overlayfs()) - SKIP(return, "overlayfs is not supported"); + if (self->skip_test) + SKIP(return, "overlayfs is not supported (teardown)"); EXPECT_EQ(0, remove_path(lower_do1_fl3)); EXPECT_EQ(0, remove_path(lower_dl1_fl2)); @@ -4109,8 +4191,8 @@ FIXTURE_TEARDOWN(layout2_overlay) TEST_F_FORK(layout2_overlay, no_restriction) { - if (!supports_overlayfs()) - SKIP(return, "overlayfs is not supported"); + if (self->skip_test) + SKIP(return, "overlayfs is not supported (test)"); ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY)); ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY)); @@ -4275,8 +4357,8 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) size_t i; const char *path_entry; - if (!supports_overlayfs()) - SKIP(return, "overlayfs is not supported"); + if (self->skip_test) + SKIP(return, "overlayfs is not supported (test)"); /* Sets rules on base directories (i.e. outside overlay scope). */ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); @@ -4423,4 +4505,261 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) } } +FIXTURE(layout3_fs) +{ + bool has_created_dir; + bool has_created_file; + char *dir_path; + bool skip_test; +}; + +FIXTURE_VARIANT(layout3_fs) +{ + const struct mnt_opt mnt; + const char *const file_path; + unsigned int cwd_fs_magic; +}; + +/* clang-format off */ +FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) { + /* clang-format on */ + .mnt = mnt_tmp, + .file_path = file1_s1d1, +}; + +FIXTURE_VARIANT_ADD(layout3_fs, ramfs) { + .mnt = { + .type = "ramfs", + .data = "mode=700", + }, + .file_path = TMP_DIR "/dir/file", +}; + +FIXTURE_VARIANT_ADD(layout3_fs, cgroup2) { + .mnt = { + .type = "cgroup2", + }, + .file_path = TMP_DIR "/test/cgroup.procs", +}; + +FIXTURE_VARIANT_ADD(layout3_fs, proc) { + .mnt = { + .type = "proc", + }, + .file_path = TMP_DIR "/self/status", +}; + +FIXTURE_VARIANT_ADD(layout3_fs, sysfs) { + .mnt = { + .type = "sysfs", + }, + .file_path = TMP_DIR "/kernel/notes", +}; + +FIXTURE_VARIANT_ADD(layout3_fs, hostfs) { + .mnt = { + .source = TMP_DIR, + .flags = MS_BIND, + }, + .file_path = TMP_DIR "/dir/file", + .cwd_fs_magic = HOSTFS_SUPER_MAGIC, +}; + +FIXTURE_SETUP(layout3_fs) +{ + struct stat statbuf; + const char *slash; + size_t dir_len; + + if (!supports_filesystem(variant->mnt.type) || + !cwd_matches_fs(variant->cwd_fs_magic)) { + self->skip_test = true; + SKIP(return, "this filesystem is not supported (setup)"); + } + + slash = strrchr(variant->file_path, '/'); + ASSERT_NE(slash, NULL); + dir_len = (size_t)slash - (size_t)variant->file_path; + ASSERT_LT(0, dir_len); + self->dir_path = malloc(dir_len + 1); + self->dir_path[dir_len] = '\0'; + strncpy(self->dir_path, variant->file_path, dir_len); + + prepare_layout_opt(_metadata, &variant->mnt); + + /* Creates directory when required. */ + if (stat(self->dir_path, &statbuf)) { + set_cap(_metadata, CAP_DAC_OVERRIDE); + EXPECT_EQ(0, mkdir(self->dir_path, 0700)) + { + TH_LOG("Failed to create directory \"%s\": %s", + self->dir_path, strerror(errno)); + free(self->dir_path); + self->dir_path = NULL; + } + self->has_created_dir = true; + clear_cap(_metadata, CAP_DAC_OVERRIDE); + } + + /* Creates file when required. */ + if (stat(variant->file_path, &statbuf)) { + int fd; + + set_cap(_metadata, CAP_DAC_OVERRIDE); + fd = creat(variant->file_path, 0600); + EXPECT_LE(0, fd) + { + TH_LOG("Failed to create file \"%s\": %s", + variant->file_path, strerror(errno)); + } + EXPECT_EQ(0, close(fd)); + self->has_created_file = true; + clear_cap(_metadata, CAP_DAC_OVERRIDE); + } +} + +FIXTURE_TEARDOWN(layout3_fs) +{ + if (self->skip_test) + SKIP(return, "this filesystem is not supported (teardown)"); + + if (self->has_created_file) { + set_cap(_metadata, CAP_DAC_OVERRIDE); + /* + * Don't check for error because the file might already + * have been removed (cf. release_inode test). + */ + unlink(variant->file_path); + clear_cap(_metadata, CAP_DAC_OVERRIDE); + } + + if (self->has_created_dir) { + set_cap(_metadata, CAP_DAC_OVERRIDE); + /* + * Don't check for error because the directory might already + * have been removed (cf. release_inode test). + */ + rmdir(self->dir_path); + clear_cap(_metadata, CAP_DAC_OVERRIDE); + } + free(self->dir_path); + self->dir_path = NULL; + + cleanup_layout(_metadata); +} + +static void layer3_fs_tag_inode(struct __test_metadata *const _metadata, + FIXTURE_DATA(layout3_fs) * self, + const FIXTURE_VARIANT(layout3_fs) * variant, + const char *const rule_path) +{ + const struct rule layer1_allow_read_file[] = { + { + .path = rule_path, + .access = LANDLOCK_ACCESS_FS_READ_FILE, + }, + {}, + }; + const struct landlock_ruleset_attr layer2_deny_everything_attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, + }; + const char *const dev_null_path = "/dev/null"; + int ruleset_fd; + + if (self->skip_test) + SKIP(return, "this filesystem is not supported (test)"); + + /* Checks without Landlock. */ + EXPECT_EQ(0, test_open(dev_null_path, O_RDONLY | O_CLOEXEC)); + EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC)); + + ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, + layer1_allow_read_file); + EXPECT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + + EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC)); + EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC)); + + /* Forbids directory reading. */ + ruleset_fd = + landlock_create_ruleset(&layer2_deny_everything_attr, + sizeof(layer2_deny_everything_attr), 0); + EXPECT_LE(0, ruleset_fd); + enforce_ruleset(_metadata, ruleset_fd); + EXPECT_EQ(0, close(ruleset_fd)); + + /* Checks with Landlock and forbidden access. */ + EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC)); + EXPECT_EQ(EACCES, test_open(variant->file_path, O_RDONLY | O_CLOEXEC)); +} + +/* Matrix of tests to check file hierarchy evaluation. */ + +TEST_F_FORK(layout3_fs, tag_inode_dir_parent) +{ + /* The current directory must not be the root for this test. */ + layer3_fs_tag_inode(_metadata, self, variant, "."); +} + +TEST_F_FORK(layout3_fs, tag_inode_dir_mnt) +{ + layer3_fs_tag_inode(_metadata, self, variant, TMP_DIR); +} + +TEST_F_FORK(layout3_fs, tag_inode_dir_child) +{ + layer3_fs_tag_inode(_metadata, self, variant, self->dir_path); +} + +TEST_F_FORK(layout3_fs, tag_inode_file) +{ + layer3_fs_tag_inode(_metadata, self, variant, variant->file_path); +} + +/* Light version of layout1.release_inodes */ +TEST_F_FORK(layout3_fs, release_inodes) +{ + const struct rule layer1[] = { + { + .path = TMP_DIR, + .access = LANDLOCK_ACCESS_FS_READ_DIR, + }, + {}, + }; + int ruleset_fd; + + if (self->skip_test) + SKIP(return, "this filesystem is not supported (test)"); + + /* Clean up for the teardown to not fail. */ + if (self->has_created_file) + EXPECT_EQ(0, remove_path(variant->file_path)); + + if (self->has_created_dir) + /* Don't check for error because of cgroup specificities. */ + remove_path(self->dir_path); + + ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1); + ASSERT_LE(0, ruleset_fd); + + /* Unmount the filesystem while it is being used by a ruleset. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, umount(TMP_DIR)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + /* Replaces with a new mount point to simplify FIXTURE_TEARDOWN. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount_opt(&mnt_tmp, TMP_DIR)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that access to the new mount point is denied. */ + ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 05400462c779..118e0964bda9 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -44,10 +44,26 @@ endif selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST)))) top_srcdir = $(selfdir)/../../.. -ifeq ($(KHDR_INCLUDES),) -KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include +ifeq ("$(origin O)", "command line") + KBUILD_OUTPUT := $(O) endif +ifneq ($(KBUILD_OUTPUT),) + # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot + # expand a shell special character '~'. We use a somewhat tedious way here. + abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd) + $(if $(abs_objtree),, \ + $(error failed to create output directory "$(KBUILD_OUTPUT)")) + # $(realpath ...) resolves symlinks + abs_objtree := $(realpath $(abs_objtree)) + KHDR_DIR := ${abs_objtree}/usr/include +else + abs_srctree := $(shell cd $(top_srcdir) && pwd) + KHDR_DIR := ${abs_srctree}/usr/include +endif + +KHDR_INCLUDES := -isystem $(KHDR_DIR) + # The following are built by lib.mk common compile rules. # TEST_CUSTOM_PROGS should be used by tests that require # custom build rule and prevent common build rule use. @@ -58,7 +74,25 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS)) TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED)) TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES)) -all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) +all: kernel_header_files $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) \ + $(TEST_GEN_FILES) + +kernel_header_files: + @ls $(KHDR_DIR)/linux/*.h >/dev/null 2>/dev/null; \ + if [ $$? -ne 0 ]; then \ + RED='\033[1;31m'; \ + NOCOLOR='\033[0m'; \ + echo; \ + echo -e "$${RED}error$${NOCOLOR}: missing kernel header files."; \ + echo "Please run this and try again:"; \ + echo; \ + echo " cd $(top_srcdir)"; \ + echo " make headers"; \ + echo; \ + exit 1; \ + fi + +.PHONY: kernel_header_files define RUN_TESTS BASE_DIR="$(selfdir)"; \ @@ -72,7 +106,7 @@ endef run_tests: all ifdef building_out_of_srctree @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ - rsync -aLq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ + rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi @if [ "X$(TEST_PROGS)" != "X" ]; then \ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \ @@ -86,7 +120,7 @@ endif define INSTALL_SINGLE_RULE $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)) - $(if $(INSTALL_LIST),rsync -aL $(INSTALL_LIST) $(INSTALL_PATH)/) + $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST) $(INSTALL_PATH)/) endef define INSTALL_RULE diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c index 0f6aef2e2593..2c44e115f2f0 100644 --- a/tools/testing/selftests/media_tests/video_device_test.c +++ b/tools/testing/selftests/media_tests/video_device_test.c @@ -37,45 +37,58 @@ #include <time.h> #include <linux/videodev2.h> -int main(int argc, char **argv) +#define PRIORITY_MAX 4 + +int priority_test(int fd) { - int opt; - char video_dev[256]; - int count; - struct v4l2_tuner vtuner; - struct v4l2_capability vcap; + /* This test will try to update the priority associated with a file descriptor */ + + enum v4l2_priority old_priority, new_priority, priority_to_compare; int ret; - int fd; + int result = 0; - if (argc < 2) { - printf("Usage: %s [-d </dev/videoX>]\n", argv[0]); - exit(-1); + ret = ioctl(fd, VIDIOC_G_PRIORITY, &old_priority); + if (ret < 0) { + printf("Failed to get priority: %s\n", strerror(errno)); + return -1; + } + new_priority = (old_priority + 1) % PRIORITY_MAX; + ret = ioctl(fd, VIDIOC_S_PRIORITY, &new_priority); + if (ret < 0) { + printf("Failed to set priority: %s\n", strerror(errno)); + return -1; + } + ret = ioctl(fd, VIDIOC_G_PRIORITY, &priority_to_compare); + if (ret < 0) { + printf("Failed to get new priority: %s\n", strerror(errno)); + result = -1; + goto cleanup; + } + if (priority_to_compare != new_priority) { + printf("Priority wasn't set - test failed\n"); + result = -1; } - /* Process arguments */ - while ((opt = getopt(argc, argv, "d:")) != -1) { - switch (opt) { - case 'd': - strncpy(video_dev, optarg, sizeof(video_dev) - 1); - video_dev[sizeof(video_dev)-1] = '\0'; - break; - default: - printf("Usage: %s [-d </dev/videoX>]\n", argv[0]); - exit(-1); - } +cleanup: + ret = ioctl(fd, VIDIOC_S_PRIORITY, &old_priority); + if (ret < 0) { + printf("Failed to restore priority: %s\n", strerror(errno)); + return -1; } + return result; +} + +int loop_test(int fd) +{ + int count; + struct v4l2_tuner vtuner; + struct v4l2_capability vcap; + int ret; /* Generate random number of interations */ srand((unsigned int) time(NULL)); count = rand(); - /* Open Video device and keep it open */ - fd = open(video_dev, O_RDWR); - if (fd == -1) { - printf("Video Device open errno %s\n", strerror(errno)); - exit(-1); - } - printf("\nNote:\n" "While test is running, remove the device or unbind\n" "driver and ensure there are no use after free errors\n" @@ -98,4 +111,46 @@ int main(int argc, char **argv) sleep(10); count--; } + return 0; +} + +int main(int argc, char **argv) +{ + int opt; + char video_dev[256]; + int fd; + int test_result; + + if (argc < 2) { + printf("Usage: %s [-d </dev/videoX>]\n", argv[0]); + exit(-1); + } + + /* Process arguments */ + while ((opt = getopt(argc, argv, "d:")) != -1) { + switch (opt) { + case 'd': + strncpy(video_dev, optarg, sizeof(video_dev) - 1); + video_dev[sizeof(video_dev)-1] = '\0'; + break; + default: + printf("Usage: %s [-d </dev/videoX>]\n", argv[0]); + exit(-1); + } + } + + /* Open Video device and keep it open */ + fd = open(video_dev, O_RDWR); + if (fd == -1) { + printf("Video Device open errno %s\n", strerror(errno)); + exit(-1); + } + + test_result = priority_test(fd); + if (!test_result) + printf("Priority test - PASSED\n"); + else + printf("Priority test - FAILED\n"); + + loop_test(fd); } diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c index dba0e8ba002f..3df008677239 100644 --- a/tools/testing/selftests/memfd/memfd_test.c +++ b/tools/testing/selftests/memfd/memfd_test.c @@ -18,6 +18,7 @@ #include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> +#include <ctype.h> #include "common.h" @@ -43,7 +44,6 @@ */ static size_t mfd_def_size = MFD_DEF_SIZE; static const char *memfd_str = MEMFD_STR; -static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *)); static int newpid_thread_fn2(void *arg); static void join_newpid_thread(pid_t pid); @@ -96,12 +96,12 @@ static void sysctl_assert_write(const char *val) int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC); if (fd < 0) { - printf("open sysctl failed\n"); + printf("open sysctl failed: %m\n"); abort(); } if (write(fd, val, strlen(val)) < 0) { - printf("write sysctl failed\n"); + printf("write sysctl %s failed: %m\n", val); abort(); } } @@ -111,7 +111,7 @@ static void sysctl_fail_write(const char *val) int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC); if (fd < 0) { - printf("open sysctl failed\n"); + printf("open sysctl failed: %m\n"); abort(); } @@ -122,6 +122,33 @@ static void sysctl_fail_write(const char *val) } } +static void sysctl_assert_equal(const char *val) +{ + char *p, buf[128] = {}; + int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC); + + if (fd < 0) { + printf("open sysctl failed: %m\n"); + abort(); + } + + if (read(fd, buf, sizeof(buf)) < 0) { + printf("read sysctl failed: %m\n"); + abort(); + } + + /* Strip trailing whitespace. */ + p = buf; + while (!isspace(*p)) + p++; + *p = '\0'; + + if (strcmp(buf, val) != 0) { + printf("unexpected sysctl value: expected %s, got %s\n", val, buf); + abort(); + } +} + static int mfd_assert_reopen_fd(int fd_in) { int fd; @@ -736,7 +763,7 @@ static int idle_thread_fn(void *arg) return 0; } -static pid_t spawn_idle_thread(unsigned int flags) +static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg) { uint8_t *stack; pid_t pid; @@ -747,10 +774,7 @@ static pid_t spawn_idle_thread(unsigned int flags) abort(); } - pid = clone(idle_thread_fn, - stack + STACK_SIZE, - SIGCHLD | flags, - NULL); + pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg); if (pid < 0) { printf("clone() failed: %m\n"); abort(); @@ -759,6 +783,33 @@ static pid_t spawn_idle_thread(unsigned int flags) return pid; } +static void join_thread(pid_t pid) +{ + int wstatus; + + if (waitpid(pid, &wstatus, 0) < 0) { + printf("newpid thread: waitpid() failed: %m\n"); + abort(); + } + + if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) { + printf("newpid thread: exited with non-zero error code %d\n", + WEXITSTATUS(wstatus)); + abort(); + } + + if (WIFSIGNALED(wstatus)) { + printf("newpid thread: killed by signal %d\n", + WTERMSIG(wstatus)); + abort(); + } +} + +static pid_t spawn_idle_thread(unsigned int flags) +{ + return spawn_thread(flags, idle_thread_fn, NULL); +} + static void join_idle_thread(pid_t pid) { kill(pid, SIGTERM); @@ -1111,109 +1162,260 @@ static void test_noexec_seal(void) close(fd); } -static void test_sysctl_child(void) +static void test_sysctl_sysctl0(void) { int fd; - int pid; - printf("%s sysctl 0\n", memfd_str); - sysctl_assert_write("0"); - fd = mfd_assert_new("kern_memfd_sysctl_0", + sysctl_assert_equal("0"); + + fd = mfd_assert_new("kern_memfd_sysctl_0_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); - mfd_assert_mode(fd, 0777); mfd_assert_has_seals(fd, 0); mfd_assert_chmod(fd, 0644); close(fd); +} - printf("%s sysctl 1\n", memfd_str); - sysctl_assert_write("1"); - fd = mfd_assert_new("kern_memfd_sysctl_1", +static void test_sysctl_set_sysctl0(void) +{ + sysctl_assert_write("0"); + test_sysctl_sysctl0(); +} + +static void test_sysctl_sysctl1(void) +{ + int fd; + + sysctl_assert_equal("1"); + + fd = mfd_assert_new("kern_memfd_sysctl_1_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0666); + mfd_assert_has_seals(fd, F_SEAL_EXEC); + mfd_fail_chmod(fd, 0777); + close(fd); - printf("%s child ns\n", memfd_str); - pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn2); - join_newpid_thread(pid); + fd = mfd_assert_new("kern_memfd_sysctl_1_exec", + mfd_def_size, + MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0777); + mfd_assert_has_seals(fd, 0); + mfd_assert_chmod(fd, 0644); + close(fd); + fd = mfd_assert_new("kern_memfd_sysctl_1_noexec", + mfd_def_size, + MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING); mfd_assert_mode(fd, 0666); mfd_assert_has_seals(fd, F_SEAL_EXEC); mfd_fail_chmod(fd, 0777); - sysctl_fail_write("0"); close(fd); - - printf("%s sysctl 2\n", memfd_str); - sysctl_assert_write("2"); - mfd_fail_new("kern_memfd_sysctl_2", - MFD_CLOEXEC | MFD_ALLOW_SEALING); - sysctl_fail_write("0"); - sysctl_fail_write("1"); } -static int newpid_thread_fn(void *arg) +static void test_sysctl_set_sysctl1(void) { - test_sysctl_child(); - return 0; + sysctl_assert_write("1"); + test_sysctl_sysctl1(); } -static void test_sysctl_child2(void) +static void test_sysctl_sysctl2(void) { int fd; - sysctl_fail_write("0"); - fd = mfd_assert_new("kern_memfd_sysctl_1", + sysctl_assert_equal("2"); + + fd = mfd_assert_new("kern_memfd_sysctl_2_dfl", mfd_def_size, MFD_CLOEXEC | MFD_ALLOW_SEALING); + mfd_assert_mode(fd, 0666); + mfd_assert_has_seals(fd, F_SEAL_EXEC); + mfd_fail_chmod(fd, 0777); + close(fd); + + mfd_fail_new("kern_memfd_sysctl_2_exec", + MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING); + fd = mfd_assert_new("kern_memfd_sysctl_2_noexec", + mfd_def_size, + MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING); mfd_assert_mode(fd, 0666); mfd_assert_has_seals(fd, F_SEAL_EXEC); mfd_fail_chmod(fd, 0777); close(fd); } -static int newpid_thread_fn2(void *arg) +static void test_sysctl_set_sysctl2(void) +{ + sysctl_assert_write("2"); + test_sysctl_sysctl2(); +} + +static int sysctl_simple_child(void *arg) +{ + int fd; + int pid; + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + printf("%s sysctl 1\n", memfd_str); + test_sysctl_set_sysctl1(); + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + printf("%s sysctl 2\n", memfd_str); + test_sysctl_set_sysctl2(); + + printf("%s sysctl 1\n", memfd_str); + test_sysctl_set_sysctl1(); + + printf("%s sysctl 0\n", memfd_str); + test_sysctl_set_sysctl0(); + + return 0; +} + +/* + * Test sysctl + * A very basic test to make sure the core sysctl semantics work. + */ +static void test_sysctl_simple(void) +{ + int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL); + + join_thread(pid); +} + +static int sysctl_nested(void *arg) { - test_sysctl_child2(); + void (*fn)(void) = arg; + + fn(); return 0; } -static pid_t spawn_newpid_thread(unsigned int flags, int (*fn)(void *)) + +static int sysctl_nested_wait(void *arg) { - uint8_t *stack; - pid_t pid; + /* Wait for a SIGCONT. */ + kill(getpid(), SIGSTOP); + return sysctl_nested(arg); +} - stack = malloc(STACK_SIZE); - if (!stack) { - printf("malloc(STACK_SIZE) failed: %m\n"); - abort(); - } +static void test_sysctl_sysctl1_failset(void) +{ + sysctl_fail_write("0"); + test_sysctl_sysctl1(); +} - pid = clone(fn, - stack + STACK_SIZE, - SIGCHLD | flags, - NULL); - if (pid < 0) { - printf("clone() failed: %m\n"); - abort(); - } +static void test_sysctl_sysctl2_failset(void) +{ + sysctl_fail_write("1"); + test_sysctl_sysctl2(); - return pid; + sysctl_fail_write("0"); + test_sysctl_sysctl2(); } -static void join_newpid_thread(pid_t pid) +static int sysctl_nested_child(void *arg) { - waitpid(pid, NULL, 0); + int fd; + int pid; + + printf("%s nested sysctl 0\n", memfd_str); + sysctl_assert_write("0"); + /* A further nested pidns works the same. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL); + join_thread(pid); + + printf("%s nested sysctl 1\n", memfd_str); + sysctl_assert_write("1"); + /* Child inherits our setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1); + join_thread(pid); + /* Child cannot raise the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_sysctl1_failset); + join_thread(pid); + /* Child can lower the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_set_sysctl2); + join_thread(pid); + /* Child lowering the setting has no effect on our setting. */ + test_sysctl_sysctl1(); + + printf("%s nested sysctl 2\n", memfd_str); + sysctl_assert_write("2"); + /* Child inherits our setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2); + join_thread(pid); + /* Child cannot raise the setting. */ + pid = spawn_thread(CLONE_NEWPID, sysctl_nested, + test_sysctl_sysctl2_failset); + join_thread(pid); + + /* Verify that the rules are actually inherited after fork. */ + printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str); + sysctl_assert_write("0"); + + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl1_failset); + sysctl_assert_write("1"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str); + sysctl_assert_write("0"); + + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2_failset); + sysctl_assert_write("2"); + kill(pid, SIGCONT); + join_thread(pid); + + /* + * Verify that the current effective setting is saved on fork, meaning + * that the parent lowering the sysctl doesn't affect already-forked + * children. + */ + printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str); + sysctl_assert_write("2"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2); + sysctl_assert_write("1"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str); + sysctl_assert_write("2"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl2); + sysctl_assert_write("0"); + kill(pid, SIGCONT); + join_thread(pid); + + printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str); + sysctl_assert_write("1"); + pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait, + test_sysctl_sysctl1); + sysctl_assert_write("0"); + kill(pid, SIGCONT); + join_thread(pid); + + return 0; } /* - * Test sysctl - * A very basic sealing test to see whether setting/retrieving seals works. + * Test sysctl with nested pid namespaces + * Make sure that the sysctl nesting semantics work correctly. */ -static void test_sysctl(void) +static void test_sysctl_nested(void) { - int pid = spawn_newpid_thread(CLONE_NEWPID, newpid_thread_fn); + int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL); - join_newpid_thread(pid); + join_thread(pid); } /* @@ -1399,6 +1601,9 @@ int main(int argc, char **argv) test_seal_grow(); test_seal_resize(); + test_sysctl_simple(); + test_sysctl_nested(); + test_share_dup("SHARE-DUP", ""); test_share_mmap("SHARE-MMAP", ""); test_share_open("SHARE-OPEN", ""); @@ -1413,8 +1618,6 @@ int main(int argc, char **argv) test_share_fork("SHARE-FORK", SHARED_FT_STR); join_idle_thread(pid); - test_sysctl(); - printf("memfd: DONE\n"); return 0; diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index 4c88238fc8f0..e949a43a6145 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -150,8 +150,8 @@ TEST(check_huge_pages) MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); if (addr == MAP_FAILED) { - if (errno == ENOMEM) - SKIP(return, "No huge pages available."); + if (errno == ENOMEM || errno == EINVAL) + SKIP(return, "No huge pages available or CONFIG_HUGETLB_PAGE disabled."); else TH_LOG("mmap error: %s", strerror(errno)); } diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index 8917455f4f51..cdc9ce4426b9 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -5,6 +5,7 @@ hugepage-mremap hugepage-shm hugepage-vmemmap hugetlb-madvise +hugetlb-read-hwpoison khugepaged map_hugetlb map_populate @@ -39,3 +40,6 @@ local_config.h local_config.mk ksm_functional_tests mdwe_test +gup_longterm +mkdirty +va_high_addr_switch diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 4f0c50c33ba7..6a9fc5693145 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -32,41 +32,46 @@ endif # LDLIBS. MAKEFLAGS += --no-builtin-rules -CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/tools/include/uapi $(EXTRA_CFLAGS) $(KHDR_INCLUDES) +CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) LDLIBS = -lrt -lpthread -TEST_GEN_PROGS = cow -TEST_GEN_PROGS += compaction_test -TEST_GEN_PROGS += gup_test -TEST_GEN_PROGS += hmm-tests -TEST_GEN_PROGS += hugetlb-madvise -TEST_GEN_PROGS += hugepage-mmap -TEST_GEN_PROGS += hugepage-mremap -TEST_GEN_PROGS += hugepage-shm -TEST_GEN_PROGS += hugepage-vmemmap -TEST_GEN_PROGS += khugepaged -TEST_GEN_PROGS += madv_populate -TEST_GEN_PROGS += map_fixed_noreplace -TEST_GEN_PROGS += map_hugetlb -TEST_GEN_PROGS += map_populate -TEST_GEN_PROGS += memfd_secret -TEST_GEN_PROGS += migration -TEST_GEN_PROGS += mkdirty -TEST_GEN_PROGS += mlock-random-test -TEST_GEN_PROGS += mlock2-tests -TEST_GEN_PROGS += mrelease_test -TEST_GEN_PROGS += mremap_dontunmap -TEST_GEN_PROGS += mremap_test -TEST_GEN_PROGS += on-fault-limit -TEST_GEN_PROGS += thuge-gen -TEST_GEN_PROGS += transhuge-stress -TEST_GEN_PROGS += uffd-stress -TEST_GEN_PROGS += uffd-unit-tests +TEST_GEN_FILES = cow +TEST_GEN_FILES += compaction_test +TEST_GEN_FILES += gup_longterm +TEST_GEN_FILES += gup_test +TEST_GEN_FILES += hmm-tests +TEST_GEN_FILES += hugetlb-madvise +TEST_GEN_FILES += hugetlb-read-hwpoison +TEST_GEN_FILES += hugepage-mmap +TEST_GEN_FILES += hugepage-mremap +TEST_GEN_FILES += hugepage-shm +TEST_GEN_FILES += hugepage-vmemmap +TEST_GEN_FILES += khugepaged +TEST_GEN_FILES += madv_populate +TEST_GEN_FILES += map_fixed_noreplace +TEST_GEN_FILES += map_hugetlb +TEST_GEN_FILES += map_populate +TEST_GEN_FILES += memfd_secret +TEST_GEN_FILES += migration +TEST_GEN_FILES += mkdirty +TEST_GEN_FILES += mlock-random-test +TEST_GEN_FILES += mlock2-tests +TEST_GEN_FILES += mrelease_test +TEST_GEN_FILES += mremap_dontunmap +TEST_GEN_FILES += mremap_test +TEST_GEN_FILES += on-fault-limit +TEST_GEN_FILES += thuge-gen +TEST_GEN_FILES += transhuge-stress +TEST_GEN_FILES += uffd-stress +TEST_GEN_FILES += uffd-unit-tests +TEST_GEN_FILES += split_huge_page_test +TEST_GEN_FILES += ksm_tests +TEST_GEN_FILES += ksm_functional_tests +TEST_GEN_FILES += mdwe_test + +ifneq ($(ARCH),arm64) TEST_GEN_PROGS += soft-dirty -TEST_GEN_PROGS += split_huge_page_test -TEST_GEN_PROGS += ksm_tests -TEST_GEN_PROGS += ksm_functional_tests -TEST_GEN_PROGS += mdwe_test +endif ifeq ($(ARCH),x86_64) CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32) @@ -82,24 +87,24 @@ CFLAGS += -no-pie endif ifeq ($(CAN_BUILD_I386),1) -TEST_GEN_PROGS += $(BINARIES_32) +TEST_GEN_FILES += $(BINARIES_32) endif ifeq ($(CAN_BUILD_X86_64),1) -TEST_GEN_PROGS += $(BINARIES_64) +TEST_GEN_FILES += $(BINARIES_64) endif else ifneq (,$(findstring $(ARCH),ppc64)) -TEST_GEN_PROGS += protection_keys +TEST_GEN_FILES += protection_keys endif endif ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64)) -TEST_GEN_PROGS += va_high_addr_switch -TEST_GEN_PROGS += virtual_address_range -TEST_GEN_PROGS += write_to_hugetlbfs +TEST_GEN_FILES += va_high_addr_switch +TEST_GEN_FILES += virtual_address_range +TEST_GEN_FILES += write_to_hugetlbfs endif TEST_PROGS := run_vmtests.sh @@ -111,6 +116,7 @@ TEST_FILES += va_high_addr_switch.sh include ../lib.mk $(TEST_GEN_PROGS): vm_util.c +$(TEST_GEN_FILES): vm_util.c $(OUTPUT)/uffd-stress: uffd-common.c $(OUTPUT)/uffd-unit-tests: uffd-common.c @@ -167,6 +173,8 @@ endif # IOURING_EXTRA_LIBS may get set in local_config.mk, or it may be left empty. $(OUTPUT)/cow: LDLIBS += $(IOURING_EXTRA_LIBS) +$(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS) + $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap $(OUTPUT)/ksm_tests: LDLIBS += -lnuma diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index a5cb4b09a46c..0899019a7fcb 100644..100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -25,7 +25,7 @@ if [[ "$1" == "-cgroup-v2" ]]; then fi if [[ $cgroup2 ]]; then - cgroup_path=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup2 none $cgroup_path @@ -33,7 +33,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb" >$cgroup_path/cgroup.subtree_control else - cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then cgroup_path=/dev/cgroup/memory mount -t cgroup memory,hugetlb $cgroup_path diff --git a/tools/testing/selftests/mm/check_config.sh b/tools/testing/selftests/mm/check_config.sh index 3954f4746161..3954f4746161 100644..100755 --- a/tools/testing/selftests/mm/check_config.sh +++ b/tools/testing/selftests/mm/check_config.sh diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index dc9d6fe86028..7324ce5363c0 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -14,8 +14,8 @@ #include <unistd.h> #include <errno.h> #include <fcntl.h> -#include <dirent.h> #include <assert.h> +#include <linux/mman.h> #include <sys/mman.h> #include <sys/ioctl.h> #include <sys/wait.h> @@ -30,13 +30,6 @@ #include "../kselftest.h" #include "vm_util.h" -#ifndef MADV_PAGEOUT -#define MADV_PAGEOUT 21 -#endif -#ifndef MADV_COLLAPSE -#define MADV_COLLAPSE 25 -#endif - static size_t pagesize; static int pagemap_fd; static size_t thpsize; @@ -70,31 +63,6 @@ static void detect_huge_zeropage(void) close(fd); } -static void detect_hugetlbsizes(void) -{ - DIR *dir = opendir("/sys/kernel/mm/hugepages/"); - - if (!dir) - return; - - while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) { - struct dirent *entry = readdir(dir); - size_t kb; - - if (!entry) - break; - if (entry->d_type != DT_DIR) - continue; - if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) - continue; - hugetlbsizes[nr_hugetlbsizes] = kb * 1024; - nr_hugetlbsizes++; - ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n", - kb); - } - closedir(dir); -} - static bool range_is_swapped(void *addr, size_t size) { for (; size; addr += pagesize, size -= pagesize) @@ -1717,7 +1685,8 @@ int main(int argc, char **argv) if (thpsize) ksft_print_msg("[INFO] detected THP size: %zu KiB\n", thpsize / 1024); - detect_hugetlbsizes(); + nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, + ARRAY_SIZE(hugetlbsizes)); detect_huge_zeropage(); ksft_print_header(); diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c new file mode 100644 index 000000000000..d33d3e68ffab --- /dev/null +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -0,0 +1,459 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * GUP long-term page pinning tests. + * + * Copyright 2023, Red Hat, Inc. + * + * Author(s): David Hildenbrand <david@redhat.com> + */ +#define _GNU_SOURCE +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <stdint.h> +#include <unistd.h> +#include <errno.h> +#include <fcntl.h> +#include <assert.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/vfs.h> +#include <linux/magic.h> +#include <linux/memfd.h> + +#include "local_config.h" +#ifdef LOCAL_CONFIG_HAVE_LIBURING +#include <liburing.h> +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + +#include "../../../../mm/gup_test.h" +#include "../kselftest.h" +#include "vm_util.h" + +static size_t pagesize; +static int nr_hugetlbsizes; +static size_t hugetlbsizes[10]; +static int gup_fd; + +static __fsword_t get_fs_type(int fd) +{ + struct statfs fs; + int ret; + + do { + ret = fstatfs(fd, &fs); + } while (ret && errno == EINTR); + + return ret ? 0 : fs.f_type; +} + +static bool fs_is_unknown(__fsword_t fs_type) +{ + /* + * We only support some filesystems in our tests when dealing with + * R/W long-term pinning. For these filesystems, we can be fairly sure + * whether they support it or not. + */ + switch (fs_type) { + case TMPFS_MAGIC: + case HUGETLBFS_MAGIC: + case BTRFS_SUPER_MAGIC: + case EXT4_SUPER_MAGIC: + case XFS_SUPER_MAGIC: + return false; + default: + return true; + } +} + +static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type) +{ + assert(!fs_is_unknown(fs_type)); + switch (fs_type) { + case TMPFS_MAGIC: + case HUGETLBFS_MAGIC: + return true; + default: + return false; + } +} + +enum test_type { + TEST_TYPE_RO, + TEST_TYPE_RO_FAST, + TEST_TYPE_RW, + TEST_TYPE_RW_FAST, +#ifdef LOCAL_CONFIG_HAVE_LIBURING + TEST_TYPE_IOURING, +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ +}; + +static void do_test(int fd, size_t size, enum test_type type, bool shared) +{ + __fsword_t fs_type = get_fs_type(fd); + bool should_work; + char *mem; + int ret; + + if (ftruncate(fd, size)) { + ksft_test_result_fail("ftruncate() failed\n"); + return; + } + + if (fallocate(fd, 0, 0, size)) { + if (size == pagesize) + ksft_test_result_fail("fallocate() failed\n"); + else + ksft_test_result_skip("need more free huge pages\n"); + return; + } + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, + shared ? MAP_SHARED : MAP_PRIVATE, fd, 0); + if (mem == MAP_FAILED) { + if (size == pagesize || shared) + ksft_test_result_fail("mmap() failed\n"); + else + ksft_test_result_skip("need more free huge pages\n"); + return; + } + + /* + * Fault in the page writable such that GUP-fast can eventually pin + * it immediately. + */ + memset(mem, 0, size); + + switch (type) { + case TEST_TYPE_RO: + case TEST_TYPE_RO_FAST: + case TEST_TYPE_RW: + case TEST_TYPE_RW_FAST: { + struct pin_longterm_test args; + const bool fast = type == TEST_TYPE_RO_FAST || + type == TEST_TYPE_RW_FAST; + const bool rw = type == TEST_TYPE_RW || + type == TEST_TYPE_RW_FAST; + + if (gup_fd < 0) { + ksft_test_result_skip("gup_test not available\n"); + break; + } + + if (rw && shared && fs_is_unknown(fs_type)) { + ksft_test_result_skip("Unknown filesystem\n"); + return; + } + /* + * R/O pinning or pinning in a private mapping is always + * expected to work. Otherwise, we expect long-term R/W pinning + * to only succeed for special fielesystems. + */ + should_work = !shared || !rw || + fs_supports_writable_longterm_pinning(fs_type); + + args.addr = (__u64)(uintptr_t)mem; + args.size = size; + args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0; + args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0; + ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args); + if (ret && errno == EINVAL) { + ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n"); + break; + } else if (ret && errno == EFAULT) { + ksft_test_result(!should_work, "Should have failed\n"); + break; + } else if (ret) { + ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n"); + break; + } + + if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP)) + ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n"); + + /* + * TODO: if the kernel ever supports long-term R/W pinning on + * some previously unsupported filesystems, we might want to + * perform some additional tests for possible data corruptions. + */ + ksft_test_result(should_work, "Should have worked\n"); + break; + } +#ifdef LOCAL_CONFIG_HAVE_LIBURING + case TEST_TYPE_IOURING: { + struct io_uring ring; + struct iovec iov; + + /* io_uring always pins pages writable. */ + if (shared && fs_is_unknown(fs_type)) { + ksft_test_result_skip("Unknown filesystem\n"); + return; + } + should_work = !shared || + fs_supports_writable_longterm_pinning(fs_type); + + /* Skip on errors, as we might just lack kernel support. */ + ret = io_uring_queue_init(1, &ring, 0); + if (ret < 0) { + ksft_test_result_skip("io_uring_queue_init() failed\n"); + break; + } + /* + * Register the range as a fixed buffer. This will FOLL_WRITE | + * FOLL_PIN | FOLL_LONGTERM the range. + */ + iov.iov_base = mem; + iov.iov_len = size; + ret = io_uring_register_buffers(&ring, &iov, 1); + /* Only new kernels return EFAULT. */ + if (ret && (errno == ENOSPC || errno == EOPNOTSUPP || + errno == EFAULT)) { + ksft_test_result(!should_work, "Should have failed\n"); + } else if (ret) { + /* + * We might just lack support or have insufficient + * MEMLOCK limits. + */ + ksft_test_result_skip("io_uring_register_buffers() failed\n"); + } else { + ksft_test_result(should_work, "Should have worked\n"); + io_uring_unregister_buffers(&ring); + } + + io_uring_queue_exit(&ring); + break; + } +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + default: + assert(false); + } + + munmap(mem, size); +} + +typedef void (*test_fn)(int fd, size_t size); + +static void run_with_memfd(test_fn fn, const char *desc) +{ + int fd; + + ksft_print_msg("[RUN] %s ... with memfd\n", desc); + + fd = memfd_create("test", 0); + if (fd < 0) { + ksft_test_result_fail("memfd_create() failed\n"); + return; + } + + fn(fd, pagesize); + close(fd); +} + +static void run_with_tmpfile(test_fn fn, const char *desc) +{ + FILE *file; + int fd; + + ksft_print_msg("[RUN] %s ... with tmpfile\n", desc); + + file = tmpfile(); + if (!file) { + ksft_test_result_fail("tmpfile() failed\n"); + return; + } + + fd = fileno(file); + if (fd < 0) { + ksft_test_result_fail("fileno() failed\n"); + return; + } + + fn(fd, pagesize); + fclose(file); +} + +static void run_with_local_tmpfile(test_fn fn, const char *desc) +{ + char filename[] = __FILE__"_tmpfile_XXXXXX"; + int fd; + + ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc); + + fd = mkstemp(filename); + if (fd < 0) { + ksft_test_result_fail("mkstemp() failed\n"); + return; + } + + if (unlink(filename)) { + ksft_test_result_fail("unlink() failed\n"); + goto close; + } + + fn(fd, pagesize); +close: + close(fd); +} + +static void run_with_memfd_hugetlb(test_fn fn, const char *desc, + size_t hugetlbsize) +{ + int flags = MFD_HUGETLB; + int fd; + + ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc, + hugetlbsize / 1024); + + flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT; + + fd = memfd_create("test", flags); + if (fd < 0) { + ksft_test_result_skip("memfd_create() failed\n"); + return; + } + + fn(fd, hugetlbsize); + close(fd); +} + +struct test_case { + const char *desc; + test_fn fn; +}; + +static void test_shared_rw_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RW, true); +} + +static void test_shared_rw_fast_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RW_FAST, true); +} + +static void test_shared_ro_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RO, true); +} + +static void test_shared_ro_fast_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RO_FAST, true); +} + +static void test_private_rw_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RW, false); +} + +static void test_private_rw_fast_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RW_FAST, false); +} + +static void test_private_ro_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RO, false); +} + +static void test_private_ro_fast_pin(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_RO_FAST, false); +} + +#ifdef LOCAL_CONFIG_HAVE_LIBURING +static void test_shared_iouring(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_IOURING, true); +} + +static void test_private_iouring(int fd, size_t size) +{ + do_test(fd, size, TEST_TYPE_IOURING, false); +} +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ + +static const struct test_case test_cases[] = { + { + "R/W longterm GUP pin in MAP_SHARED file mapping", + test_shared_rw_pin, + }, + { + "R/W longterm GUP-fast pin in MAP_SHARED file mapping", + test_shared_rw_fast_pin, + }, + { + "R/O longterm GUP pin in MAP_SHARED file mapping", + test_shared_ro_pin, + }, + { + "R/O longterm GUP-fast pin in MAP_SHARED file mapping", + test_shared_ro_fast_pin, + }, + { + "R/W longterm GUP pin in MAP_PRIVATE file mapping", + test_private_rw_pin, + }, + { + "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping", + test_private_rw_fast_pin, + }, + { + "R/O longterm GUP pin in MAP_PRIVATE file mapping", + test_private_ro_pin, + }, + { + "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping", + test_private_ro_fast_pin, + }, +#ifdef LOCAL_CONFIG_HAVE_LIBURING + { + "io_uring fixed buffer with MAP_SHARED file mapping", + test_shared_iouring, + }, + { + "io_uring fixed buffer with MAP_PRIVATE file mapping", + test_private_iouring, + }, +#endif /* LOCAL_CONFIG_HAVE_LIBURING */ +}; + +static void run_test_case(struct test_case const *test_case) +{ + int i; + + run_with_memfd(test_case->fn, test_case->desc); + run_with_tmpfile(test_case->fn, test_case->desc); + run_with_local_tmpfile(test_case->fn, test_case->desc); + for (i = 0; i < nr_hugetlbsizes; i++) + run_with_memfd_hugetlb(test_case->fn, test_case->desc, + hugetlbsizes[i]); +} + +static int tests_per_test_case(void) +{ + return 3 + nr_hugetlbsizes; +} + +int main(int argc, char **argv) +{ + int i, err; + + pagesize = getpagesize(); + nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes, + ARRAY_SIZE(hugetlbsizes)); + + ksft_print_header(); + ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case()); + + gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) + run_test_case(&test_cases[i]); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index 4adaad1b822f..20294553a5dd 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -57,9 +57,14 @@ enum { #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) /* Just the flags we need, copied from mm.h: */ + +#ifndef FOLL_WRITE #define FOLL_WRITE 0x01 /* check pte is writable */ -#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite */ +#endif +#ifndef FOLL_LONGTERM +#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */ +#endif FIXTURE(hmm) { int fd; diff --git a/tools/testing/selftests/mm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c index e2527f32005b..478bb1e989e9 100644 --- a/tools/testing/selftests/mm/hugepage-shm.c +++ b/tools/testing/selftests/mm/hugepage-shm.c @@ -35,10 +35,6 @@ #include <sys/shm.h> #include <sys/mman.h> -#ifndef SHM_HUGETLB -#define SHM_HUGETLB 04000 -#endif - #define LENGTH (256UL*1024*1024) #define dprintf(x) printf(x) diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c index 557bdbd4f87e..5b354c209e93 100644 --- a/tools/testing/selftests/mm/hugepage-vmemmap.c +++ b/tools/testing/selftests/mm/hugepage-vmemmap.c @@ -13,10 +13,6 @@ #define MAP_LENGTH (2UL * 1024 * 1024) -#ifndef MAP_HUGETLB -#define MAP_HUGETLB 0x40000 /* arch specific */ -#endif - #define PAGE_SIZE 4096 #define PAGE_COMPOUND_HEAD (1UL << 15) diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c index 28426e30d9bc..d55322df4b73 100644 --- a/tools/testing/selftests/mm/hugetlb-madvise.c +++ b/tools/testing/selftests/mm/hugetlb-madvise.c @@ -65,11 +65,15 @@ void write_fault_pages(void *addr, unsigned long nr_pages) void read_fault_pages(void *addr, unsigned long nr_pages) { - unsigned long dummy = 0; + volatile unsigned long dummy = 0; unsigned long i; - for (i = 0; i < nr_pages; i++) + for (i = 0; i < nr_pages; i++) { dummy += *((unsigned long *)(addr + (i * huge_page_size))); + + /* Prevent the compiler from optimizing out the entire loop: */ + asm volatile("" : "+r" (dummy)); + } } int main(int argc, char **argv) diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c new file mode 100644 index 000000000000..ba6cc6f9cabc --- /dev/null +++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c @@ -0,0 +1,322 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <linux/magic.h> +#include <sys/mman.h> +#include <sys/statfs.h> +#include <errno.h> +#include <stdbool.h> + +#include "../kselftest.h" + +#define PREFIX " ... " +#define ERROR_PREFIX " !!! " + +#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +enum test_status { + TEST_PASSED = 0, + TEST_FAILED = 1, + TEST_SKIPPED = 2, +}; + +static char *status_to_str(enum test_status status) +{ + switch (status) { + case TEST_PASSED: + return "TEST_PASSED"; + case TEST_FAILED: + return "TEST_FAILED"; + case TEST_SKIPPED: + return "TEST_SKIPPED"; + default: + return "TEST_???"; + } +} + +static int setup_filemap(char *filemap, size_t len, size_t wr_chunk_size) +{ + char iter = 0; + + for (size_t offset = 0; offset < len; + offset += wr_chunk_size) { + iter++; + memset(filemap + offset, iter, wr_chunk_size); + } + + return 0; +} + +static bool verify_chunk(char *buf, size_t len, char val) +{ + size_t i; + + for (i = 0; i < len; ++i) { + if (buf[i] != val) { + printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n", + i, buf[i], val); + return false; + } + } + + return true; +} + +static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size, + off_t offset, size_t expected) +{ + char buf[MAX_WRITE_READ_CHUNK_SIZE]; + ssize_t ret_count = 0; + ssize_t total_ret_count = 0; + char val = offset / wr_chunk_size + offset % wr_chunk_size; + + printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset); + printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n", + expected); + if (lseek(fd, offset, SEEK_SET) < 0) { + perror(PREFIX ERROR_PREFIX "seek failed"); + return false; + } + + while (offset + total_ret_count < len) { + ret_count = read(fd, buf, wr_chunk_size); + if (ret_count == 0) { + printf(PREFIX PREFIX "read reach end of the file\n"); + break; + } else if (ret_count < 0) { + perror(PREFIX ERROR_PREFIX "read failed"); + break; + } + ++val; + if (!verify_chunk(buf, ret_count, val)) + return false; + + total_ret_count += ret_count; + } + printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n", + total_ret_count); + + return total_ret_count == expected; +} + +static bool read_hugepage_filemap(int fd, size_t len, + size_t wr_chunk_size, size_t expected) +{ + char buf[MAX_WRITE_READ_CHUNK_SIZE]; + ssize_t ret_count = 0; + ssize_t total_ret_count = 0; + char val = 0; + + printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n", + expected); + while (total_ret_count < len) { + ret_count = read(fd, buf, wr_chunk_size); + if (ret_count == 0) { + printf(PREFIX PREFIX "read reach end of the file\n"); + break; + } else if (ret_count < 0) { + perror(PREFIX ERROR_PREFIX "read failed"); + break; + } + ++val; + if (!verify_chunk(buf, ret_count, val)) + return false; + + total_ret_count += ret_count; + } + printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n", + total_ret_count); + + return total_ret_count == expected; +} + +static enum test_status +test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size) +{ + enum test_status status = TEST_SKIPPED; + char *filemap = NULL; + + if (ftruncate(fd, len) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate failed"); + return status; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed"); + goto done; + } + + setup_filemap(filemap, len, wr_chunk_size); + status = TEST_FAILED; + + if (read_hugepage_filemap(fd, len, wr_chunk_size, len)) + status = TEST_PASSED; + + munmap(filemap, len); +done: + if (ftruncate(fd, 0) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed"); + status = TEST_FAILED; + } + + return status; +} + +static enum test_status +test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size, + bool skip_hwpoison_page) +{ + enum test_status status = TEST_SKIPPED; + char *filemap = NULL; + char *hwp_addr = NULL; + const unsigned long pagesize = getpagesize(); + + if (ftruncate(fd, len) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate failed"); + return status; + } + + filemap = mmap(NULL, len, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (filemap == MAP_FAILED) { + perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed"); + goto done; + } + + setup_filemap(filemap, len, wr_chunk_size); + status = TEST_FAILED; + + /* + * Poisoned hugetlb page layout (assume hugepagesize=2MB): + * |<---------------------- 1MB ---------------------->| + * |<---- healthy page ---->|<---- HWPOISON page ----->| + * |<------------------- (1MB - 8KB) ----------------->| + */ + hwp_addr = filemap + len / 2 + pagesize; + if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) { + perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed"); + goto unmap; + } + + if (!skip_hwpoison_page) { + /* + * Userspace should be able to read (1MB + 1 page) from + * the beginning of the HWPOISONed hugepage. + */ + if (read_hugepage_filemap(fd, len, wr_chunk_size, + len / 2 + pagesize)) + status = TEST_PASSED; + } else { + /* + * Userspace should be able to read (1MB - 2 pages) from + * HWPOISONed hugepage. + */ + if (seek_read_hugepage_filemap(fd, len, wr_chunk_size, + len / 2 + MAX(2 * pagesize, wr_chunk_size), + len / 2 - MAX(2 * pagesize, wr_chunk_size))) + status = TEST_PASSED; + } + +unmap: + munmap(filemap, len); +done: + if (ftruncate(fd, 0) < 0) { + perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed"); + status = TEST_FAILED; + } + + return status; +} + +static int create_hugetlbfs_file(struct statfs *file_stat) +{ + int fd; + + fd = memfd_create("hugetlb_tmp", MFD_HUGETLB); + if (fd < 0) { + perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file"); + return -1; + } + + memset(file_stat, 0, sizeof(*file_stat)); + if (fstatfs(fd, file_stat)) { + perror(PREFIX ERROR_PREFIX "fstatfs failed"); + goto close; + } + if (file_stat->f_type != HUGETLBFS_MAGIC) { + printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n"); + goto close; + } + + return fd; +close: + close(fd); + return -1; +} + +int main(void) +{ + int fd; + struct statfs file_stat; + enum test_status status; + /* Test read() in different granularity. */ + size_t wr_chunk_sizes[] = { + getpagesize() / 2, getpagesize(), + getpagesize() * 2, getpagesize() * 4 + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) { + printf("Write/read chunk size=0x%lx\n", + wr_chunk_sizes[i]); + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB read regression test...\n"); + status = test_hugetlb_read(fd, file_stat.f_bsize, + wr_chunk_sizes[i]); + printf(PREFIX "HugeTLB read regression test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB read HWPOISON test...\n"); + status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize, + wr_chunk_sizes[i], false); + printf(PREFIX "HugeTLB read HWPOISON test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + + fd = create_hugetlbfs_file(&file_stat); + if (fd < 0) + goto create_failure; + printf(PREFIX "HugeTLB seek then read HWPOISON test...\n"); + status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize, + wr_chunk_sizes[i], true); + printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n", + status_to_str(status)); + close(fd); + if (status == TEST_FAILED) + return -1; + } + + return 0; + +create_failure: + printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n"); + return -1; +} diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index bf2d2a684edf..14d26075c863 100644..100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -20,7 +20,7 @@ fi if [[ $cgroup2 ]]; then - CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup2 none $CGROUP_ROOT @@ -28,7 +28,7 @@ if [[ $cgroup2 ]]; then fi echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control else - CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk -e '{print $3}') + CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then CGROUP_ROOT=/dev/cgroup/memory mount -t cgroup memory,hugetlb $CGROUP_ROOT diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 97adc0f34f9c..030667cb5533 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -11,6 +11,7 @@ #include <string.h> #include <unistd.h> +#include <linux/mman.h> #include <sys/mman.h> #include <sys/wait.h> #include <sys/types.h> @@ -22,16 +23,6 @@ #include "vm_util.h" -#ifndef MADV_PAGEOUT -#define MADV_PAGEOUT 21 -#endif -#ifndef MADV_POPULATE_READ -#define MADV_POPULATE_READ 22 -#endif -#ifndef MADV_COLLAPSE -#define MADV_COLLAPSE 25 -#endif - #define BASE_ADDR ((void *)(1UL << 30)) static unsigned long hpage_pmd_size; static unsigned long page_size; diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c index 26853badae70..901e950f9138 100644 --- a/tools/testing/selftests/mm/ksm_functional_tests.c +++ b/tools/testing/selftests/mm/ksm_functional_tests.c @@ -27,8 +27,12 @@ #define KiB 1024u #define MiB (1024 * KiB) +static int mem_fd; static int ksm_fd; static int ksm_full_scans_fd; +static int proc_self_ksm_stat_fd; +static int proc_self_ksm_merging_pages_fd; +static int ksm_use_zero_pages_fd; static int pagemap_fd; static size_t pagesize; @@ -59,6 +63,49 @@ static bool range_maps_duplicates(char *addr, unsigned long size) return false; } +static long get_my_ksm_zero_pages(void) +{ + char buf[200]; + char *substr_ksm_zero; + size_t value_pos; + ssize_t read_size; + unsigned long my_ksm_zero_pages; + + if (!proc_self_ksm_stat_fd) + return 0; + + read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0); + if (read_size < 0) + return -errno; + + buf[read_size] = 0; + + substr_ksm_zero = strstr(buf, "ksm_zero_pages"); + if (!substr_ksm_zero) + return 0; + + value_pos = strcspn(substr_ksm_zero, "0123456789"); + my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10); + + return my_ksm_zero_pages; +} + +static long get_my_merging_pages(void) +{ + char buf[10]; + ssize_t ret; + + if (proc_self_ksm_merging_pages_fd < 0) + return proc_self_ksm_merging_pages_fd; + + ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0); + if (ret <= 0) + return -errno; + buf[ret] = 0; + + return strtol(buf, NULL, 10); +} + static long ksm_get_full_scans(void) { char buf[10]; @@ -91,11 +138,30 @@ static int ksm_merge(void) return 0; } -static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) +static int ksm_unmerge(void) +{ + if (write(ksm_fd, "2", 1) != 1) + return -errno; + return 0; +} + +static char *mmap_and_merge_range(char val, unsigned long size, int prot, + bool use_prctl) { char *map; int ret; + /* Stabilize accounting by disabling KSM completely. */ + if (ksm_unmerge()) { + ksft_test_result_fail("Disabling (unmerging) KSM failed\n"); + goto unmap; + } + + if (get_my_merging_pages() > 0) { + ksft_test_result_fail("Still pages merged\n"); + goto unmap; + } + map = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (map == MAP_FAILED) { @@ -112,6 +178,11 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) /* Make sure each page contains the same values to merge them. */ memset(map, val, size); + if (mprotect(map, size, prot)) { + ksft_test_result_skip("mprotect() failed\n"); + goto unmap; + } + if (use_prctl) { ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0); if (ret < 0 && errno == EINVAL) { @@ -131,6 +202,16 @@ static char *mmap_and_merge_range(char val, unsigned long size, bool use_prctl) ksft_test_result_fail("Running KSM failed\n"); goto unmap; } + + /* + * Check if anything was merged at all. Ignore the zero page that is + * accounted differently (depending on kernel support). + */ + if (val && !get_my_merging_pages()) { + ksft_test_result_fail("No pages got merged\n"); + goto unmap; + } + return map; unmap: munmap(map, size); @@ -144,7 +225,7 @@ static void test_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -159,6 +240,70 @@ unmap: munmap(map, size); } +static void test_unmerge_zero_pages(void) +{ + const unsigned int size = 2 * MiB; + char *map; + unsigned int offs; + unsigned long pages_expected; + + ksft_print_msg("[RUN] %s\n", __func__); + + if (proc_self_ksm_stat_fd < 0) { + ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n"); + return; + } + if (ksm_use_zero_pages_fd < 0) { + ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + return; + } + if (write(ksm_use_zero_pages_fd, "1", 1) != 1) { + ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n"); + return; + } + + /* Let KSM deduplicate zero pages. */ + map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false); + if (map == MAP_FAILED) + return; + + /* Check if ksm_zero_pages is updated correctly after KSM merging */ + pages_expected = size / pagesize; + if (pages_expected != get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after merging\n"); + goto unmap; + } + + /* Try to unmerge half of the region */ + if (madvise(map, size / 2, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + /* Check if ksm_zero_pages is updated correctly after unmerging */ + pages_expected /= 2; + if (pages_expected != get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n"); + goto unmap; + } + + /* Trigger unmerging of the other half by writing to the pages. */ + for (offs = size / 2; offs < size; offs += pagesize) + *((unsigned int *)&map[offs]) = offs; + + /* Now we should have no zeropages remaining. */ + if (get_my_ksm_zero_pages()) { + ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n"); + goto unmap; + } + + /* Check if ksm zero pages are really unmerged */ + ksft_test_result(!range_maps_duplicates(map, size), + "KSM zero pages were unmerged\n"); +unmap: + munmap(map, size); +} + static void test_unmerge_discarded(void) { const unsigned int size = 2 * MiB; @@ -166,7 +311,7 @@ static void test_unmerge_discarded(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -198,7 +343,7 @@ static void test_unmerge_uffd_wp(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, false); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false); if (map == MAP_FAILED) return; @@ -341,7 +486,7 @@ static void test_prctl_unmerge(void) ksft_print_msg("[RUN] %s\n", __func__); - map = mmap_and_merge_range(0xcf, size, true); + map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true); if (map == MAP_FAILED) return; @@ -356,9 +501,42 @@ unmap: munmap(map, size); } +static void test_prot_none(void) +{ + const unsigned int size = 2 * MiB; + char *map; + int i; + + ksft_print_msg("[RUN] %s\n", __func__); + + map = mmap_and_merge_range(0x11, size, PROT_NONE, false); + if (map == MAP_FAILED) + goto unmap; + + /* Store a unique value in each page on one half using ptrace */ + for (i = 0; i < size / 2; i += pagesize) { + lseek(mem_fd, (uintptr_t) map + i, SEEK_SET); + if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) { + ksft_test_result_fail("ptrace write failed\n"); + goto unmap; + } + } + + /* Trigger unsharing on the other half. */ + if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) { + ksft_test_result_fail("MADV_UNMERGEABLE failed\n"); + goto unmap; + } + + ksft_test_result(!range_maps_duplicates(map, size), + "Pages were unmerged\n"); +unmap: + munmap(map, size); +} + int main(int argc, char **argv) { - unsigned int tests = 5; + unsigned int tests = 7; int err; #ifdef __NR_userfaultfd @@ -370,6 +548,9 @@ int main(int argc, char **argv) pagesize = getpagesize(); + mem_fd = open("/proc/self/mem", O_RDWR); + if (mem_fd < 0) + ksft_exit_fail_msg("opening /proc/self/mem failed\n"); ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR); if (ksm_fd < 0) ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n"); @@ -379,13 +560,20 @@ int main(int argc, char **argv) pagemap_fd = open("/proc/self/pagemap", O_RDONLY); if (pagemap_fd < 0) ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n"); + proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY); + proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages", + O_RDONLY); + ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR); test_unmerge(); + test_unmerge_zero_pages(); test_unmerge_discarded(); #ifdef __NR_userfaultfd test_unmerge_uffd_wp(); #endif + test_prot_none(); + test_prctl(); test_prctl_fork(); test_prctl_unmerge(); diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c index 435acebdc325..380b691d3eb9 100644 --- a/tools/testing/selftests/mm/ksm_tests.c +++ b/tools/testing/selftests/mm/ksm_tests.c @@ -831,6 +831,7 @@ int main(int argc, char *argv[]) printf("Size must be greater than 0\n"); return KSFT_FAIL; } + break; case 't': { int tmp = atoi(optarg); diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c index 262eae6b58f2..17bcb07f19f3 100644 --- a/tools/testing/selftests/mm/madv_populate.c +++ b/tools/testing/selftests/mm/madv_populate.c @@ -20,13 +20,6 @@ #include "../kselftest.h" #include "vm_util.h" -#ifndef MADV_POPULATE_READ -#define MADV_POPULATE_READ 22 -#endif /* MADV_POPULATE_READ */ -#ifndef MADV_POPULATE_WRITE -#define MADV_POPULATE_WRITE 23 -#endif /* MADV_POPULATE_WRITE */ - /* * For now, we're using 2 MiB of private anonymous memory for all tests. */ @@ -271,14 +264,35 @@ static void test_softdirty(void) munmap(addr, SIZE); } +static int system_has_softdirty(void) +{ + /* + * There is no way to check if the kernel supports soft-dirty, other + * than by writing to a page and seeing if the bit was set. But the + * tests are intended to check that the bit gets set when it should, so + * doing that check would turn a potentially legitimate fail into a + * skip. Fortunately, we know for sure that arm64 does not support + * soft-dirty. So for now, let's just use the arch as a corse guide. + */ +#if defined(__aarch64__) + return 0; +#else + return 1; +#endif +} + int main(int argc, char **argv) { + int nr_tests = 16; int err; pagesize = getpagesize(); + if (system_has_softdirty()) + nr_tests += 5; + ksft_print_header(); - ksft_set_plan(21); + ksft_set_plan(nr_tests); sense_support(); test_prot_read(); @@ -286,7 +300,8 @@ int main(int argc, char **argv) test_holes(); test_populate_read(); test_populate_write(); - test_softdirty(); + if (system_has_softdirty()) + test_softdirty(); err = ksft_get_fail_cnt(); if (err) diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c index eed44322d1a6..598159f3df1f 100644 --- a/tools/testing/selftests/mm/map_fixed_noreplace.c +++ b/tools/testing/selftests/mm/map_fixed_noreplace.c @@ -13,10 +13,6 @@ #include <stdlib.h> #include <unistd.h> -#ifndef MAP_FIXED_NOREPLACE -#define MAP_FIXED_NOREPLACE 0x100000 -#endif - static void dump_maps(void) { char cmd[32]; diff --git a/tools/testing/selftests/mm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c index 312889edb84a..193281560b61 100644 --- a/tools/testing/selftests/mm/map_hugetlb.c +++ b/tools/testing/selftests/mm/map_hugetlb.c @@ -19,18 +19,6 @@ #define LENGTH (256UL*1024*1024) #define PROTECTION (PROT_READ | PROT_WRITE) -#ifndef MAP_HUGETLB -#define MAP_HUGETLB 0x40000 /* arch specific */ -#endif - -#ifndef MAP_HUGE_SHIFT -#define MAP_HUGE_SHIFT 26 -#endif - -#ifndef MAP_HUGE_MASK -#define MAP_HUGE_MASK 0x3f -#endif - /* Only ia64 requires this */ #ifdef __ia64__ #define ADDR (void *)(0x8000000000000000UL) diff --git a/tools/testing/selftests/mm/map_populate.c b/tools/testing/selftests/mm/map_populate.c index 6b8aeaa0bf7a..7945d0754875 100644 --- a/tools/testing/selftests/mm/map_populate.c +++ b/tools/testing/selftests/mm/map_populate.c @@ -17,9 +17,7 @@ #include <string.h> #include <unistd.h> -#ifndef MMAP_SZ #define MMAP_SZ 4096 -#endif #define BUG_ON(condition, description) \ do { \ @@ -79,7 +77,7 @@ int main(int argc, char **argv) unsigned long *smap; ftmp = tmpfile(); - BUG_ON(ftmp == 0, "tmpfile()"); + BUG_ON(!ftmp, "tmpfile()"); ret = ftruncate(fileno(ftmp), MMAP_SZ); BUG_ON(ret, "ftruncate()"); diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c index 1cec8425e3ca..6908569ef406 100644 --- a/tools/testing/selftests/mm/migration.c +++ b/tools/testing/selftests/mm/migration.c @@ -10,12 +10,13 @@ #include <numa.h> #include <numaif.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <sys/types.h> #include <signal.h> #include <time.h> #define TWOMEG (2<<20) -#define RUNTIME (60) +#define RUNTIME (20) #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) @@ -95,12 +96,15 @@ int migrate(uint64_t *ptr, int n1, int n2) void *access_mem(void *ptr) { - uint64_t y = 0; + volatile uint64_t y = 0; volatile uint64_t *x = ptr; while (1) { pthread_testcancel(); y += *x; + + /* Prevent the compiler from optimizing out the writes to y: */ + asm volatile("" : "+r" (y)); } return NULL; @@ -152,10 +156,15 @@ TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME) memset(ptr, 0xde, TWOMEG); for (i = 0; i < self->nthreads - 1; i++) { pid = fork(); - if (!pid) + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGHUP); + /* Parent may have died before prctl so check now. */ + if (getppid() == 1) + kill(getpid(), SIGHUP); access_mem(ptr); - else + } else { self->pids[i] = pid; + } } ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0); diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c index 6d71d972997b..301abb99e027 100644 --- a/tools/testing/selftests/mm/mkdirty.c +++ b/tools/testing/selftests/mm/mkdirty.c @@ -321,8 +321,8 @@ close_uffd: munmap: munmap(dst, pagesize); free(src); -#endif /* __NR_userfaultfd */ } +#endif /* __NR_userfaultfd */ int main(void) { diff --git a/tools/testing/selftests/mm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c index 782ea94dee2f..1fba77df7f62 100644 --- a/tools/testing/selftests/mm/mlock-random-test.c +++ b/tools/testing/selftests/mm/mlock-random-test.c @@ -7,6 +7,7 @@ #include <sys/resource.h> #include <sys/capability.h> #include <sys/mman.h> +#include <linux/mman.h> #include <fcntl.h> #include <string.h> #include <sys/ipc.h> diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c index 11b2301f3aa3..80cddc0de206 100644 --- a/tools/testing/selftests/mm/mlock2-tests.c +++ b/tools/testing/selftests/mm/mlock2-tests.c @@ -50,7 +50,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area) printf("cannot parse /proc/self/maps\n"); goto out; } - stop = '\0'; sscanf(line, "%lx", &start); sscanf(end_addr, "%lx", &end); diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h index 2a6e76c226bc..8e02991b313c 100644 --- a/tools/testing/selftests/mm/mlock2.h +++ b/tools/testing/selftests/mm/mlock2.h @@ -4,14 +4,6 @@ #include <stdio.h> #include <stdlib.h> -#ifndef MLOCK_ONFAULT -#define MLOCK_ONFAULT 1 -#endif - -#ifndef MCL_ONFAULT -#define MCL_ONFAULT (MCL_FUTURE << 1) -#endif - static int mlock2_(void *start, size_t len, int flags) { #ifdef __NR_mlock2 diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c index 37b6d33b9e84..d822004a374e 100644 --- a/tools/testing/selftests/mm/mrelease_test.c +++ b/tools/testing/selftests/mm/mrelease_test.c @@ -7,20 +7,13 @@ #include <stdbool.h> #include <stdio.h> #include <stdlib.h> +#include <sys/syscall.h> #include <sys/wait.h> #include <unistd.h> +#include <asm-generic/unistd.h> #include "vm_util.h" - #include "../kselftest.h" -#ifndef __NR_pidfd_open -#define __NR_pidfd_open -1 -#endif - -#ifndef __NR_process_mrelease -#define __NR_process_mrelease -1 -#endif - #define MB(x) (x << 20) #define MAX_SIZE_MB 1024 diff --git a/tools/testing/selftests/mm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c index f01dc4a85b0b..ca2359835e75 100644 --- a/tools/testing/selftests/mm/mremap_dontunmap.c +++ b/tools/testing/selftests/mm/mremap_dontunmap.c @@ -15,10 +15,6 @@ #include "../kselftest.h" -#ifndef MREMAP_DONTUNMAP -#define MREMAP_DONTUNMAP 4 -#endif - unsigned long page_size; char *page_buffer; diff --git a/tools/testing/selftests/mm/on-fault-limit.c b/tools/testing/selftests/mm/on-fault-limit.c index 634d87dfb2a4..b5888d613f34 100644 --- a/tools/testing/selftests/mm/on-fault-limit.c +++ b/tools/testing/selftests/mm/on-fault-limit.c @@ -6,10 +6,6 @@ #include <sys/time.h> #include <sys/resource.h> -#ifndef MCL_ONFAULT -#define MCL_ONFAULT (MCL_FUTURE << 1) -#endif - static int test_limit(void) { int ret = 1; diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 1ebb586b2fbc..ae5df26104e5 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,9 +3,6 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 386 -#endif #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 72c14cd3ddc7..814758e109c0 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -5,29 +5,11 @@ #ifdef __i386__ -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 -#endif - #define REG_IP_IDX REG_EIP #define si_pkey_offset 0x14 #else -#ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 -#endif - -#ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 -#endif - #define REG_IP_IDX REG_RIP #define si_pkey_offset 0x20 @@ -132,7 +114,7 @@ int pkey_reg_xstate_offset(void) unsigned int ecx; unsigned int edx; int xstate_offset; - int xstate_size; + int xstate_size = 0; unsigned long XSTATE_CPUID = 0xd; int leaf; diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index 0381c34fdd56..48dc151f8fca 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -294,15 +294,6 @@ void pkey_access_deny(int pkey) pkey_disable_set(pkey, PKEY_DISABLE_ACCESS); } -/* Failed address bound checks: */ -#ifndef SEGV_BNDERR -# define SEGV_BNDERR 3 -#endif - -#ifndef SEGV_PKUERR -# define SEGV_PKUERR 4 -#endif - static char *si_code_str(int si_code) { if (si_code == SEGV_MAPERR) @@ -476,7 +467,7 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, ptr, size, orig_prot, pkey); errno = 0; - sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey); + sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey); if (errno) { dprintf2("SYS_mprotect_key sret: %d\n", sret); dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot); @@ -1684,7 +1675,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) return; } - sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey); + sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey); pkey_assert(sret < 0); } diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 4893eb60d96d..3e2bc818d566 100644..100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -12,11 +12,14 @@ exitcode=0 usage() { cat <<EOF -usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"] +usage: ${BASH_SOURCE[0]:-$0} [ options ] + + -a: run all tests, including extra ones -t: specify specific categories to tests to run -h: display this message -The default behavior is to run all tests. +The default behavior is to run required tests only. If -a is specified, +will run all tests. Alternatively, specific groups tests can be run by passing a string to the -t argument containing one or more of the following categories @@ -24,7 +27,7 @@ separated by spaces: - mmap tests for mmap(2) - gup_test - tests for gup using gup_test interface + tests for gup - userfaultfd tests for userfaultfd(2) - compaction @@ -55,14 +58,27 @@ separated by spaces: test soft dirty page bit semantics - cow test copy-on-write semantics +- thp + test transparent huge pages +- migration + invoke move_pages(2) to exercise the migration entry code + paths in the kernel +- mkdirty + test handling of code that might set PTE/PMD dirty in + read-only VMAs +- mdwe + test prctl(PR_SET_MDWE, ...) + example: ./run_vmtests.sh -t "hmm mmap ksm" EOF exit 0 } +RUN_ALL=false -while getopts "ht:" OPT; do +while getopts "aht:" OPT; do case ${OPT} in + "a") RUN_ALL=true ;; "h") usage ;; "t") VM_SELFTEST_ITEMS=${OPTARG} ;; esac @@ -85,6 +101,30 @@ test_selected() { fi } +run_gup_matrix() { + # -t: thp=on, -T: thp=off, -H: hugetlb=on + local hugetlb_mb=$(( needmem_KB / 1024 )) + + for huge in -t -T "-H -m $hugetlb_mb"; do + # -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm + for test_cmd in -u -U -a -b -L; do + # -w: write=1, -W: write=0 + for write in -w -W; do + # -S: shared + for share in -S " "; do + # -n: How many pages to fetch together? 512 is special + # because it's default thp size (or 2M on x86), 123 to + # just test partial gup when hit a huge in whatever form + for num in "-n 1" "-n 512" "-n 123"; do + CATEGORY="gup_test" run_test ./gup_test \ + $huge $test_cmd $write $share $num + done + done + done + done + done +} + # get huge pagesize and freepages from /proc/meminfo while read -r name size unit; do if [ "$name" = "HugePages_Free:" ]; then @@ -189,12 +229,17 @@ fi CATEGORY="mmap" run_test ./map_fixed_noreplace -# get_user_pages_fast() benchmark -CATEGORY="gup_test" run_test ./gup_test -u -# pin_user_pages_fast() benchmark -CATEGORY="gup_test" run_test ./gup_test -a +if $RUN_ALL; then + run_gup_matrix +else + # get_user_pages_fast() benchmark + CATEGORY="gup_test" run_test ./gup_test -u + # pin_user_pages_fast() benchmark + CATEGORY="gup_test" run_test ./gup_test -a +fi # Dump pages 0, 19, and 4096, using pin_user_pages: CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000 +CATEGORY="gup_test" run_test ./gup_longterm CATEGORY="userfaultfd" run_test ./uffd-unit-tests uffd_stress_bin=./uffd-stress @@ -242,24 +287,28 @@ if [ $VADDR64 -ne 0 ]; then if [ "$ARCH" == "$ARCH_ARM64" ]; then echo 6 > /proc/sys/vm/nr_hugepages fi - CATEGORY="hugevm" run_test ./va_high_addr_switch.sh + CATEGORY="hugevm" run_test bash ./va_high_addr_switch.sh if [ "$ARCH" == "$ARCH_ARM64" ]; then echo $prev_nr_hugepages > /proc/sys/vm/nr_hugepages fi fi # VADDR64 # vmalloc stability smoke test -CATEGORY="vmalloc" run_test ./test_vmalloc.sh smoke +CATEGORY="vmalloc" run_test bash ./test_vmalloc.sh smoke CATEGORY="mremap" run_test ./mremap_dontunmap -CATEGORY="hmm" run_test ./test_hmm.sh smoke +CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate CATEGORY="memfd_secret" run_test ./memfd_secret +# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 +CATEGORY="ksm" run_test ./ksm_tests -H -s 100 +# KSM KSM_MERGE_TIME test with size of 100 +CATEGORY="ksm" run_test ./ksm_tests -P -s 100 # KSM MADV_MERGEABLE test with 10 identical pages CATEGORY="ksm" run_test ./ksm_tests -M -p 10 # KSM unmerge test @@ -288,11 +337,26 @@ then CATEGORY="pkey" run_test ./protection_keys_64 fi -CATEGORY="soft_dirty" run_test ./soft-dirty +if [ -x ./soft-dirty ] +then + CATEGORY="soft_dirty" run_test ./soft-dirty +fi # COW tests CATEGORY="cow" run_test ./cow +CATEGORY="thp" run_test ./khugepaged + +CATEGORY="thp" run_test ./transhuge-stress -d 20 + +CATEGORY="thp" run_test ./split_huge_page_test + +CATEGORY="migration" run_test ./migration + +CATEGORY="mkdirty" run_test ./mkdirty + +CATEGORY="mdwe" run_test ./mdwe_test + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" exit $exitcode diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index 9abfc60e9e6f..a953c96aa16e 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=45 +timeout=180 diff --git a/tools/testing/selftests/mm/test_hmm.sh b/tools/testing/selftests/mm/test_hmm.sh index 46e19b5d648d..46e19b5d648d 100644..100755 --- a/tools/testing/selftests/mm/test_hmm.sh +++ b/tools/testing/selftests/mm/test_hmm.sh diff --git a/tools/testing/selftests/mm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh index d73b846736f1..d73b846736f1 100644..100755 --- a/tools/testing/selftests/mm/test_vmalloc.sh +++ b/tools/testing/selftests/mm/test_vmalloc.sh diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c index 380ab5f0a534..16ed4dfa7359 100644 --- a/tools/testing/selftests/mm/thuge-gen.c +++ b/tools/testing/selftests/mm/thuge-gen.c @@ -139,7 +139,7 @@ void test_mmap(unsigned long size, unsigned flags) before, after, before - after, size); assert(size == getpagesize() || (before - after) == NUM_PAGES); show(size); - err = munmap(map, size); + err = munmap(map, size * NUM_PAGES); assert(!err); } @@ -222,7 +222,7 @@ int main(void) test_mmap(ps, MAP_HUGETLB | arg); } printf("Testing default huge mmap\n"); - test_mmap(default_hps, SHM_HUGETLB); + test_mmap(default_hps, MAP_HUGETLB); puts("Testing non-huge shmget"); test_shmget(getpagesize(), 0); diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c index ba9d37ad3a89..c61fb9350b8c 100644 --- a/tools/testing/selftests/mm/transhuge-stress.c +++ b/tools/testing/selftests/mm/transhuge-stress.c @@ -25,13 +25,14 @@ int main(int argc, char **argv) { size_t ram, len; void *ptr, *p; - struct timespec a, b; + struct timespec start, a, b; int i = 0; char *name = NULL; double s; uint8_t *map; size_t map_len; int pagemap_fd; + int duration = 0; ram = sysconf(_SC_PHYS_PAGES); if (ram > SIZE_MAX / psize() / 4) @@ -42,9 +43,11 @@ int main(int argc, char **argv) while (++i < argc) { if (!strcmp(argv[i], "-h")) - errx(1, "usage: %s [size in MiB]", argv[0]); + errx(1, "usage: %s [-f <filename>] [-d <duration>] [size in MiB]", argv[0]); else if (!strcmp(argv[i], "-f")) name = argv[++i]; + else if (!strcmp(argv[i], "-d")) + duration = atoi(argv[++i]); else len = atoll(argv[i]) << 20; } @@ -78,6 +81,8 @@ int main(int argc, char **argv) if (!map) errx(2, "map malloc"); + clock_gettime(CLOCK_MONOTONIC, &start); + while (1) { int nr_succeed = 0, nr_failed = 0, nr_pages = 0; @@ -118,5 +123,8 @@ int main(int argc, char **argv) "%4d succeed, %4d failed, %4d different pages", s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20), nr_succeed, nr_failed, nr_pages); + + if (duration > 0 && b.tv_sec - start.tv_sec >= duration) + return 0; } } diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c index 61c6250adf93..02b89860e193 100644 --- a/tools/testing/selftests/mm/uffd-common.c +++ b/tools/testing/selftests/mm/uffd-common.c @@ -499,6 +499,9 @@ void *uffd_poll_thread(void *arg) int ret; char tmp_chr; + if (!args->handle_fault) + args->handle_fault = uffd_handle_page_fault; + pollfd[0].fd = uffd; pollfd[0].events = POLLIN; pollfd[1].fd = pipefd[cpu*2]; @@ -527,7 +530,7 @@ void *uffd_poll_thread(void *arg) err("unexpected msg event %u\n", msg.event); break; case UFFD_EVENT_PAGEFAULT: - uffd_handle_page_fault(&msg, args); + args->handle_fault(&msg, args); break; case UFFD_EVENT_FORK: close(uffd); @@ -616,3 +619,62 @@ int copy_page(int ufd, unsigned long offset, bool wp) { return __copy_page(ufd, offset, false, wp); } + +int uffd_open_dev(unsigned int flags) +{ + int fd, uffd; + + fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); + if (fd < 0) + return fd; + uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags); + close(fd); + + return uffd; +} + +int uffd_open_sys(unsigned int flags) +{ +#ifdef __NR_userfaultfd + return syscall(__NR_userfaultfd, flags); +#else + return -1; +#endif +} + +int uffd_open(unsigned int flags) +{ + int uffd = uffd_open_sys(flags); + + if (uffd < 0) + uffd = uffd_open_dev(flags); + + return uffd; +} + +int uffd_get_features(uint64_t *features) +{ + struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 }; + /* + * This should by default work in most kernels; the feature list + * will be the same no matter what we pass in here. + */ + int fd = uffd_open(UFFD_USER_MODE_ONLY); + + if (fd < 0) + /* Maybe the kernel is older than user-only mode? */ + fd = uffd_open(0); + + if (fd < 0) + return fd; + + if (ioctl(fd, UFFDIO_API, &uffdio_api)) { + close(fd); + return -errno; + } + + *features = uffdio_api.features; + close(fd); + + return 0; +} diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h index 6068f2346b86..7c4fa964c3b0 100644 --- a/tools/testing/selftests/mm/uffd-common.h +++ b/tools/testing/selftests/mm/uffd-common.h @@ -77,6 +77,9 @@ struct uffd_args { unsigned long missing_faults; unsigned long wp_faults; unsigned long minor_faults; + + /* A custom fault handler; defaults to uffd_handle_page_fault. */ + void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args); }; struct uffd_test_ops { @@ -110,6 +113,11 @@ int __copy_page(int ufd, unsigned long offset, bool retry, bool wp); int copy_page(int ufd, unsigned long offset, bool wp); void *uffd_poll_thread(void *arg); +int uffd_open_dev(unsigned int flags); +int uffd_open_sys(unsigned int flags); +int uffd_open(unsigned int flags); +int uffd_get_features(uint64_t *features); + #define TEST_ANON 1 #define TEST_HUGETLB 2 #define TEST_SHMEM 3 diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c index f1ad9eef1c3a..469e0476af26 100644 --- a/tools/testing/selftests/mm/uffd-stress.c +++ b/tools/testing/selftests/mm/uffd-stress.c @@ -53,21 +53,21 @@ pthread_attr_t attr; do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) const char *examples = - "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" - "./userfaultfd anon 100 99999\n\n" - "# Run share memory test on 1GiB region with 99 bounces:\n" - "./userfaultfd shmem 1000 99\n\n" - "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" - "./userfaultfd hugetlb 256 50\n\n" - "# Run the same hugetlb test but using private file:\n" - "./userfaultfd hugetlb-private 256 50\n\n" - "# 10MiB-~6GiB 999 bounces anonymous test, " - "continue forever unless an error triggers\n" - "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; + "# Run anonymous memory test on 100MiB region with 99999 bounces:\n" + "./uffd-stress anon 100 99999\n\n" + "# Run share memory test on 1GiB region with 99 bounces:\n" + "./uffd-stress shmem 1000 99\n\n" + "# Run hugetlb memory test on 256MiB region with 50 bounces:\n" + "./uffd-stress hugetlb 256 50\n\n" + "# Run the same hugetlb test but using private file:\n" + "./uffd-stress hugetlb-private 256 50\n\n" + "# 10MiB-~6GiB 999 bounces anonymous test, " + "continue forever unless an error triggers\n" + "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n"; static void usage(void) { - fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces>\n\n"); + fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n"); fprintf(stderr, "Supported <test type>: anon, hugetlb, " "hugetlb-private, shmem, shmem-private\n\n"); fprintf(stderr, "Examples:\n\n"); @@ -88,16 +88,6 @@ static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus) } } -static inline uint64_t uffd_minor_feature(void) -{ - if (test_type == TEST_HUGETLB && map_shared) - return UFFD_FEATURE_MINOR_HUGETLBFS; - else if (test_type == TEST_SHMEM) - return UFFD_FEATURE_MINOR_SHMEM; - else - return 0; -} - static void *locking_thread(void *arg) { unsigned long cpu = (unsigned long) arg; @@ -199,10 +189,8 @@ static int stress(struct uffd_args *args) locking_thread, (void *)cpu)) return 1; if (bounces & BOUNCE_POLL) { - if (pthread_create(&uffd_threads[cpu], &attr, - uffd_poll_thread, - (void *)&args[cpu])) - return 1; + if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu])) + err("uffd_poll_thread create"); } else { if (pthread_create(&uffd_threads[cpu], &attr, uffd_read_thread, @@ -260,6 +248,8 @@ static int userfaultfd_stress(void) struct uffd_args args[nr_cpus]; uint64_t mem_size = nr_pages * page_size; + memset(args, 0, sizeof(struct uffd_args) * nr_cpus); + if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL)) err("context init failed"); diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index 269c86768a02..2709a34a39c5 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -109,12 +109,11 @@ static void uffd_test_pass(void) ksft_inc_fail_cnt(); \ } while (0) -#define uffd_test_skip(...) do { \ - printf("skipped [reason: "); \ - printf(__VA_ARGS__); \ - printf("]\n"); \ - ksft_inc_xskip_cnt(); \ - } while (0) +static void uffd_test_skip(const char *message) +{ + printf("skipped [reason: %s]\n", message); + ksft_inc_xskip_cnt(); +} /* * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll @@ -952,6 +951,117 @@ static void uffd_zeropage_test(uffd_test_args_t *args) uffd_test_pass(); } +static void uffd_register_poison(int uffd, void *addr, uint64_t len) +{ + uint64_t ioctls = 0; + uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON); + + if (uffd_register_with_ioctls(uffd, addr, len, true, + false, false, &ioctls)) + err("poison register fail"); + + if ((ioctls & expected) != expected) + err("registered area doesn't support COPY and POISON ioctls"); +} + +static void do_uffdio_poison(int uffd, unsigned long offset) +{ + struct uffdio_poison uffdio_poison = { 0 }; + int ret; + __s64 res; + + uffdio_poison.range.start = (unsigned long) area_dst + offset; + uffdio_poison.range.len = page_size; + uffdio_poison.mode = 0; + ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison); + res = uffdio_poison.updated; + + if (ret) + err("UFFDIO_POISON error: %"PRId64, (int64_t)res); + else if (res != page_size) + err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res); +} + +static void uffd_poison_handle_fault( + struct uffd_msg *msg, struct uffd_args *args) +{ + unsigned long offset; + + if (msg->event != UFFD_EVENT_PAGEFAULT) + err("unexpected msg event %u", msg->event); + + if (msg->arg.pagefault.flags & + (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR)) + err("unexpected fault type %llu", msg->arg.pagefault.flags); + + offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; + offset &= ~(page_size-1); + + /* Odd pages -> copy zeroed page; even pages -> poison. */ + if (offset & page_size) + copy_page(uffd, offset, false); + else + do_uffdio_poison(uffd, offset); +} + +static void uffd_poison_test(uffd_test_args_t *targs) +{ + pthread_t uffd_mon; + char c; + struct uffd_args args = { 0 }; + struct sigaction act = { 0 }; + unsigned long nr_sigbus = 0; + unsigned long nr; + + fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK); + + uffd_register_poison(uffd, area_dst, nr_pages * page_size); + memset(area_src, 0, nr_pages * page_size); + + args.handle_fault = uffd_poison_handle_fault; + if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args)) + err("uffd_poll_thread create"); + + sigbuf = &jbuf; + act.sa_sigaction = sighndl; + act.sa_flags = SA_SIGINFO; + if (sigaction(SIGBUS, &act, 0)) + err("sigaction"); + + for (nr = 0; nr < nr_pages; ++nr) { + unsigned long offset = nr * page_size; + const char *bytes = (const char *) area_dst + offset; + const char *i; + + if (sigsetjmp(*sigbuf, 1)) { + /* + * Access below triggered a SIGBUS, which was caught by + * sighndl, which then jumped here. Count this SIGBUS, + * and move on to next page. + */ + ++nr_sigbus; + continue; + } + + for (i = bytes; i < bytes + page_size; ++i) { + if (*i) + err("nonzero byte in area_dst (%p) at %p: %u", + area_dst, i, *i); + } + } + + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) + err("pipe write"); + if (pthread_join(uffd_mon, NULL)) + err("pthread_join()"); + + if (nr_sigbus != nr_pages / 2) + err("expected to receive %lu SIGBUS, actually received %lu", + nr_pages / 2, nr_sigbus); + + uffd_test_pass(); +} + /* * Test the returned uffdio_register.ioctls with different register modes. * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test. @@ -1127,6 +1237,12 @@ uffd_test_case_t uffd_tests[] = { UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_HUGETLBFS_SHMEM, }, + { + .name = "poison", + .uffd_fn = uffd_poison_test, + .mem_targets = MEM_ALL, + .uffd_feature_required = UFFD_FEATURE_POISON, + }, }; static void usage(const char *prog) @@ -1149,7 +1265,6 @@ int main(int argc, char *argv[]) uffd_test_case_t *test; mem_type_t *mem_type; uffd_test_args_t args; - char test_name[128]; const char *errmsg; int has_uffd, opt; int i, j; @@ -1192,10 +1307,8 @@ int main(int argc, char *argv[]) mem_type = &mem_types[j]; if (!(test->mem_targets & mem_type->mem_flag)) continue; - snprintf(test_name, sizeof(test_name), - "%s on %s", test->name, mem_type->name); - uffd_test_start(test_name); + uffd_test_start("%s on %s", test->name, mem_type->name); if (!uffd_feature_supported(test)) { uffd_test_skip("feature missing"); continue; diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c index 7cfaf4a74c57..cfbc501290d3 100644 --- a/tools/testing/selftests/mm/va_high_addr_switch.c +++ b/tools/testing/selftests/mm/va_high_addr_switch.c @@ -292,7 +292,7 @@ static int supported_arch(void) #elif defined(__x86_64__) return 1; #elif defined(__aarch64__) - return 1; + return getpagesize() == PAGE_SIZE; #else return 0; #endif diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh index 45cae7cab27e..45cae7cab27e 100644..100755 --- a/tools/testing/selftests/mm/va_high_addr_switch.sh +++ b/tools/testing/selftests/mm/va_high_addr_switch.sh diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c index 9b06a5034808..558c9cd8901c 100644 --- a/tools/testing/selftests/mm/vm_util.c +++ b/tools/testing/selftests/mm/vm_util.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <string.h> #include <fcntl.h> +#include <dirent.h> #include <sys/ioctl.h> #include <linux/userfaultfd.h> #include <sys/syscall.h> @@ -198,6 +199,32 @@ unsigned long default_huge_page_size(void) return hps; } +int detect_hugetlb_page_sizes(size_t sizes[], int max) +{ + DIR *dir = opendir("/sys/kernel/mm/hugepages/"); + int count = 0; + + if (!dir) + return 0; + + while (count < max) { + struct dirent *entry = readdir(dir); + size_t kb; + + if (!entry) + break; + if (entry->d_type != DT_DIR) + continue; + if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1) + continue; + sizes[count++] = kb * 1024; + ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n", + kb); + } + closedir(dir); + return count; +} + /* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */ int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls) @@ -242,62 +269,3 @@ int uffd_unregister(int uffd, void *addr, uint64_t len) return ret; } - -int uffd_open_dev(unsigned int flags) -{ - int fd, uffd; - - fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC); - if (fd < 0) - return fd; - uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags); - close(fd); - - return uffd; -} - -int uffd_open_sys(unsigned int flags) -{ -#ifdef __NR_userfaultfd - return syscall(__NR_userfaultfd, flags); -#else - return -1; -#endif -} - -int uffd_open(unsigned int flags) -{ - int uffd = uffd_open_sys(flags); - - if (uffd < 0) - uffd = uffd_open_dev(flags); - - return uffd; -} - -int uffd_get_features(uint64_t *features) -{ - struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 }; - /* - * This should by default work in most kernels; the feature list - * will be the same no matter what we pass in here. - */ - int fd = uffd_open(UFFD_USER_MODE_ONLY); - - if (fd < 0) - /* Maybe the kernel is older than user-only mode? */ - fd = uffd_open(0); - - if (fd < 0) - return fd; - - if (ioctl(fd, UFFDIO_API, &uffdio_api)) { - close(fd); - return -errno; - } - - *features = uffdio_api.features; - close(fd); - - return 0; -} diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h index b950bd16083a..c7fa61f0dff8 100644 --- a/tools/testing/selftests/mm/vm_util.h +++ b/tools/testing/selftests/mm/vm_util.h @@ -44,14 +44,11 @@ bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size); bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size); int64_t allocate_transhuge(void *ptr, int pagemap_fd); unsigned long default_huge_page_size(void); +int detect_hugetlb_page_sizes(size_t sizes[], int max); int uffd_register(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor); int uffd_unregister(int uffd, void *addr, uint64_t len); -int uffd_open_dev(unsigned int flags); -int uffd_open_sys(unsigned int flags); -int uffd_open(unsigned int flags); -int uffd_get_features(uint64_t *features); int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len, bool miss, bool wp, bool minor, uint64_t *ioctls); diff --git a/tools/testing/selftests/mm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh index 70a02301f4c2..70a02301f4c2 100644..100755 --- a/tools/testing/selftests/mm/write_hugetlb_memory.sh +++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index f27a7338b60e..2f9d378edec3 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -15,6 +15,7 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +log.txt msg_zerocopy nettest psock_fanout @@ -29,6 +30,7 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello +scm_pidfd sk_bind_sendto_listen sk_connect_zero_addr socket @@ -44,6 +46,7 @@ test_unix_oob timestamping tls toeplitz +tools tun txring_overwrite txtimestamp diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index c12df57d5539..8b017070960d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -3,6 +3,8 @@ CFLAGS = -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) +# Additional include paths needed by kselftest.h +CFLAGS += -I../ TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh @@ -38,6 +40,7 @@ TEST_PROGS += srv6_end_dt6_l3vpn_test.sh TEST_PROGS += srv6_hencap_red_l3vpn_test.sh TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh +TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh TEST_PROGS += srv6_end_flavors_test.sh TEST_PROGS += vrf_strict_mode_test.sh TEST_PROGS += arp_ndisc_evict_nocarrier.sh @@ -84,6 +87,8 @@ TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh +TEST_PROGS += test_vxlan_nolocalbypass.sh +TEST_PROGS += test_bridge_backup_port.sh TEST_FILES := settings @@ -112,7 +117,7 @@ $(MAKE_DIRS): mkdir -p $@ # Get Clang's default includes on this system, as opposed to those seen by -# '-target bpf'. This fixes "missing" files on some architectures/distros, +# '--target=bpf'. This fixes "missing" files on some architectures/distros, # such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. # # Use '-idirafter': Don't interfere with include mechanics except where the @@ -130,7 +135,7 @@ endif CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) $(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) - $(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ + $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 1e4b397cece6..221c387a7d7f 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,3 +1,4 @@ -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect +CFLAGS += $(KHDR_INCLUDES) +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c new file mode 100644 index 000000000000..a86222143d79 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -0,0 +1,430 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +#define _GNU_SOURCE +#include <error.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <linux/socket.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <sys/un.h> +#include <sys/signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "../../kselftest_harness.h" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \ + clean_errno(), ##__VA_ARGS__) + +#ifndef SCM_PIDFD +#define SCM_PIDFD 0x04 +#endif + +static void child_die() +{ + exit(1); +} + +static int safe_int(const char *numstr, int *converted) +{ + char *err = NULL; + long sli; + + errno = 0; + sli = strtol(numstr, &err, 0); + if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN)) + return -ERANGE; + + if (errno != 0 && sli == 0) + return -EINVAL; + + if (err == numstr || *err != '\0') + return -EINVAL; + + if (sli > INT_MAX || sli < INT_MIN) + return -ERANGE; + + *converted = (int)sli; + return 0; +} + +static int char_left_gc(const char *buffer, size_t len) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (buffer[i] == ' ' || buffer[i] == '\t') + continue; + + return i; + } + + return 0; +} + +static int char_right_gc(const char *buffer, size_t len) +{ + int i; + + for (i = len - 1; i >= 0; i--) { + if (buffer[i] == ' ' || buffer[i] == '\t' || + buffer[i] == '\n' || buffer[i] == '\0') + continue; + + return i + 1; + } + + return 0; +} + +static char *trim_whitespace_in_place(char *buffer) +{ + buffer += char_left_gc(buffer, strlen(buffer)); + buffer[char_right_gc(buffer, strlen(buffer))] = '\0'; + return buffer; +} + +/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */ +static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen) +{ + int ret; + char path[512]; + FILE *f; + size_t n = 0; + pid_t result = -1; + char *line = NULL; + + snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd); + + f = fopen(path, "re"); + if (!f) + return -1; + + while (getline(&line, &n, f) != -1) { + char *numstr; + + if (strncmp(line, key, keylen)) + continue; + + numstr = trim_whitespace_in_place(line + 4); + ret = safe_int(numstr, &result); + if (ret < 0) + goto out; + + break; + } + +out: + free(line); + fclose(f); + return result; +} + +static int cmsg_check(int fd) +{ + struct msghdr msg = { 0 }; + struct cmsghdr *cmsg; + struct iovec iov; + struct ucred *ucred = NULL; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + int *pidfd = NULL; + pid_t parent_pid; + int err; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; + } + + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + ucred = (void *)CMSG_DATA(cmsg); + } + } + + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } + + if (!pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + return 1; + } + + return 0; +} + +struct sock_addr { + char sock_name[32]; + struct sockaddr_un listen_addr; + socklen_t addrlen; +}; + +FIXTURE(scm_pidfd) +{ + int server; + pid_t client_pid; + int startup_pipe[2]; + struct sock_addr server_addr; + struct sock_addr *client_addr; +}; + +FIXTURE_VARIANT(scm_pidfd) +{ + int type; + bool abstract; +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, stream_pathname) +{ + .type = SOCK_STREAM, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, stream_abstract) +{ + .type = SOCK_STREAM, + .abstract = 1, +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, dgram_pathname) +{ + .type = SOCK_DGRAM, + .abstract = 0, +}; + +FIXTURE_VARIANT_ADD(scm_pidfd, dgram_abstract) +{ + .type = SOCK_DGRAM, + .abstract = 1, +}; + +FIXTURE_SETUP(scm_pidfd) +{ + self->client_addr = mmap(NULL, sizeof(*self->client_addr), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, self->client_addr); +} + +FIXTURE_TEARDOWN(scm_pidfd) +{ + close(self->server); + + kill(self->client_pid, SIGKILL); + waitpid(self->client_pid, NULL, 0); + + if (!variant->abstract) { + unlink(self->server_addr.sock_name); + unlink(self->client_addr->sock_name); + } +} + +static void fill_sockaddr(struct sock_addr *addr, bool abstract) +{ + char *sun_path_buf = (char *)&addr->listen_addr.sun_path; + + addr->listen_addr.sun_family = AF_UNIX; + addr->addrlen = offsetof(struct sockaddr_un, sun_path); + snprintf(addr->sock_name, sizeof(addr->sock_name), "scm_pidfd_%d", getpid()); + addr->addrlen += strlen(addr->sock_name); + if (abstract) { + *sun_path_buf = '\0'; + addr->addrlen++; + sun_path_buf++; + } else { + unlink(addr->sock_name); + } + memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); +} + +static void client(FIXTURE_DATA(scm_pidfd) *self, + const FIXTURE_VARIANT(scm_pidfd) *variant) +{ + int err; + int cfd; + socklen_t len; + struct ucred peer_cred; + int peer_pidfd; + pid_t peer_pid; + int on = 0; + + cfd = socket(AF_UNIX, variant->type, 0); + if (cfd < 0) { + log_err("socket"); + child_die(); + } + + if (variant->type == SOCK_DGRAM) { + fill_sockaddr(self->client_addr, variant->abstract); + + if (bind(cfd, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen)) { + log_err("bind"); + child_die(); + } + } + + if (connect(cfd, (struct sockaddr *)&self->server_addr.listen_addr, + self->server_addr.addrlen) != 0) { + log_err("connect"); + child_die(); + } + + on = 1; + if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + child_die(); + } + + if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + child_die(); + } + + close(self->startup_pipe[1]); + + if (cmsg_check(cfd)) { + log_err("cmsg_check failed"); + child_die(); + } + + /* skip further for SOCK_DGRAM as it's not applicable */ + if (variant->type == SOCK_DGRAM) + return; + + len = sizeof(peer_cred); + if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) { + log_err("Failed to get SO_PEERCRED"); + child_die(); + } + + len = sizeof(peer_pidfd); + if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) { + log_err("Failed to get SO_PEERPIDFD"); + child_die(); + } + + /* pid from SO_PEERCRED should point to the parent process PID */ + if (peer_cred.pid != getppid()) { + log_err("peer_cred.pid != getppid(): %d != %d", peer_cred.pid, getppid()); + child_die(); + } + + peer_pid = get_pid_from_fdinfo_file(peer_pidfd, + "Pid:", sizeof("Pid:") - 1); + if (peer_pid != peer_cred.pid) { + log_err("peer_pid != peer_cred.pid: %d != %d", peer_pid, peer_cred.pid); + child_die(); + } +} + +TEST_F(scm_pidfd, test) +{ + int err; + int pfd; + int child_status = 0; + + self->server = socket(AF_UNIX, variant->type, 0); + ASSERT_NE(-1, self->server); + + fill_sockaddr(&self->server_addr, variant->abstract); + + err = bind(self->server, (struct sockaddr *)&self->server_addr.listen_addr, self->server_addr.addrlen); + ASSERT_EQ(0, err); + + if (variant->type == SOCK_STREAM) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + err = pipe(self->startup_pipe); + ASSERT_NE(-1, err); + + self->client_pid = fork(); + ASSERT_NE(-1, self->client_pid); + if (self->client_pid == 0) { + close(self->server); + close(self->startup_pipe[0]); + client(self, variant); + exit(0); + } + close(self->startup_pipe[1]); + + if (variant->type == SOCK_STREAM) { + pfd = accept(self->server, NULL, NULL); + ASSERT_NE(-1, pfd); + } else { + pfd = self->server; + } + + /* wait until the child arrives at checkpoint */ + read(self->startup_pipe[0], &err, sizeof(int)); + close(self->startup_pipe[0]); + + if (variant->type == SOCK_DGRAM) { + err = sendto(pfd, "x", sizeof(char), 0, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen); + ASSERT_NE(-1, err); + } else { + err = send(pfd, "x", sizeof(char), 0); + ASSERT_NE(-1, err); + } + + close(pfd); + waitpid(self->client_pid, &child_status, 0); + ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh index ca0292d4b441..a28563bdaae0 100755 --- a/tools/testing/selftests/net/bind_bhash.sh +++ b/tools/testing/selftests/net/bind_bhash.sh @@ -2,7 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 NR_FILES=32768 -SAVED_NR_FILES=$(ulimit -n) +readonly NETNS="ns-$(mktemp -u XXXXXX)" # default values port=443 @@ -36,21 +36,21 @@ while getopts "ha:p:64" opt; do done setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link add veth0 type veth peer name veth1 + ip -netns "${NETNS}" link set lo up + ip -netns "${NETNS}" link set veth0 up + ip -netns "${NETNS}" link set veth1 up + if [[ "$use_v6" == true ]]; then - ip addr add $addr_v6 nodad dev eth0 + ip -netns "${NETNS}" addr add $addr_v6 nodad dev veth0 else - ip addr add $addr_v4 dev lo + ip -netns "${NETNS}" addr add $addr_v4 dev lo fi - ulimit -n $NR_FILES } cleanup() { - if [[ "$use_v6" == true ]]; then - ip addr del $addr_v6 dev eth0 - else - ip addr del $addr_v4/32 dev lo - fi - ulimit -n $SAVED_NR_FILES + ip netns del "${NETNS}" } if [[ "$addr" != "" ]]; then @@ -59,8 +59,10 @@ if [[ "$addr" != "" ]]; then fi setup if [[ "$use_v6" == true ]] ; then - ./bind_bhash $port "ipv6" $addr_v6 + ip netns exec "${NETNS}" sh -c \ + "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv6 ${addr_v6}" else - ./bind_bhash $port "ipv4" $addr_v4 + ip netns exec "${NETNS}" sh -c \ + "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv4 ${addr_v4}" fi cleanup diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index 58edfc15d28b..a2662348cdb1 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -6,41 +6,91 @@ #include "../kselftest_harness.h" +struct in6_addr in6addr_v4mapped_any = { + .s6_addr = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 255, 255, + 0, 0, 0, 0 + } +}; + +struct in6_addr in6addr_v4mapped_loopback = { + .s6_addr = { + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 255, 255, + 127, 0, 0, 1 + } +}; + FIXTURE(bind_wildcard) { struct sockaddr_in addr4; struct sockaddr_in6 addr6; - int expected_errno; }; FIXTURE_VARIANT(bind_wildcard) { const __u32 addr4_const; const struct in6_addr *addr6_const; + int expected_errno; }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) { .addr4_const = INADDR_ANY, .addr6_const = &in6addr_any, + .expected_errno = EADDRINUSE, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) { .addr4_const = INADDR_ANY, .addr6_const = &in6addr_loopback, + .expected_errno = 0, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_v4mapped_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) +{ + .addr4_const = INADDR_ANY, + .addr6_const = &in6addr_v4mapped_loopback, + .expected_errno = EADDRINUSE, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { .addr4_const = INADDR_LOOPBACK, .addr6_const = &in6addr_any, + .expected_errno = EADDRINUSE, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) { .addr4_const = INADDR_LOOPBACK, .addr6_const = &in6addr_loopback, + .expected_errno = 0, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_v4mapped_any, + .expected_errno = EADDRINUSE, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) +{ + .addr4_const = INADDR_LOOPBACK, + .addr6_const = &in6addr_v4mapped_loopback, + .expected_errno = EADDRINUSE, }; FIXTURE_SETUP(bind_wildcard) @@ -52,11 +102,6 @@ FIXTURE_SETUP(bind_wildcard) self->addr6.sin6_family = AF_INET6; self->addr6.sin6_port = htons(0); self->addr6.sin6_addr = *variant->addr6_const; - - if (variant->addr6_const == &in6addr_any) - self->expected_errno = EADDRINUSE; - else - self->expected_errno = 0; } FIXTURE_TEARDOWN(bind_wildcard) @@ -65,6 +110,7 @@ FIXTURE_TEARDOWN(bind_wildcard) void bind_sockets(struct __test_metadata *_metadata, FIXTURE_DATA(bind_wildcard) *self, + int expected_errno, struct sockaddr *addr1, socklen_t addrlen1, struct sockaddr *addr2, socklen_t addrlen2) { @@ -86,9 +132,9 @@ void bind_sockets(struct __test_metadata *_metadata, ASSERT_GT(fd[1], 0); ret = bind(fd[1], addr2, addrlen2); - if (self->expected_errno) { + if (expected_errno) { ASSERT_EQ(ret, -1); - ASSERT_EQ(errno, self->expected_errno); + ASSERT_EQ(errno, expected_errno); } else { ASSERT_EQ(ret, 0); } @@ -99,14 +145,14 @@ void bind_sockets(struct __test_metadata *_metadata, TEST_F(bind_wildcard, v4_v6) { - bind_sockets(_metadata, self, - (struct sockaddr *)&self->addr4, sizeof(self->addr6), + bind_sockets(_metadata, self, variant->expected_errno, + (struct sockaddr *)&self->addr4, sizeof(self->addr4), (struct sockaddr *)&self->addr6, sizeof(self->addr6)); } TEST_F(bind_wildcard, v6_v4) { - bind_sockets(_metadata, self, + bind_sockets(_metadata, self, variant->expected_errno, (struct sockaddr *)&self->addr6, sizeof(self->addr6), (struct sockaddr *)&self->addr4, sizeof(self->addr4)); } diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index d1d421ec10a3..8da562a9ae87 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -50,3 +50,5 @@ CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m CONFIG_VXLAN=m CONFIG_IP_SCTP=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_CRYPTO_ARIA=y diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c index 82a1c1839da6..90eb06fefa59 100644 --- a/tools/testing/selftests/net/csum.c +++ b/tools/testing/selftests/net/csum.c @@ -91,6 +91,8 @@ #include <sys/types.h> #include <unistd.h> +#include "kselftest.h" + static bool cfg_bad_csum; static int cfg_family = PF_INET6; static int cfg_num_pkt = 4; @@ -450,7 +452,7 @@ static void send_packet(int fd, const char *buf, int len) iov[2].iov_len = len; msg.msg_iov = iov; - msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + msg.msg_iovlen = ARRAY_SIZE(iov); msg.msg_name = &addr; msg.msg_namelen = sizeof(addr); @@ -505,7 +507,7 @@ static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport) struct sock_fprog prog = {}; prog.filter = filter; - prog.len = sizeof(filter) / sizeof(struct sock_filter); + prog.len = ARRAY_SIZE(filter); if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) error(1, errno, "setsockopt filter"); } diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index ee6880ac3e5e..d32a14ba069a 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -592,6 +592,20 @@ ipv4_ping_novrf() done # + # out, but don't use gateway if peer is not on link + # + a=${NSB_IP} + log_start + run_cmd ping -c 1 -w 1 -r ${a} + log_test_addr ${a} $? 0 "ping out (don't route), peer on link" + + a=${NSB_LO_IP} + log_start + show_hint "Fails since peer is not on link" + run_cmd ping -c 1 -w 1 -r ${a} + log_test_addr ${a} $? 1 "ping out (don't route), peer not on link" + + # # in # for a in ${NSA_IP} ${NSA_LO_IP} @@ -1105,6 +1119,59 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0() set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept" } +ipv4_tcp_dontroute() +{ + local syncookies=$1 + local nsa_syncookies + local nsb_syncookies + local a + + # + # Link local connection tests (SO_DONTROUTE). + # Connections should succeed only when the remote IP address is + # on link (doesn't need to be routed through a gateway). + # + + nsa_syncookies=$(ip netns exec "${NSA}" sysctl -n net.ipv4.tcp_syncookies) + nsb_syncookies=$(ip netns exec "${NSB}" sysctl -n net.ipv4.tcp_syncookies) + ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies} + ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies} + + # Test with eth1 address (on link). + + a=${NSB_IP} + log_start + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute + log_test_addr ${a} $? 0 "SO_DONTROUTE client, syncookies=${syncookies}" + + a=${NSB_IP} + log_start + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --server-dontroute + log_test_addr ${a} $? 0 "SO_DONTROUTE server, syncookies=${syncookies}" + + # Test with loopback address (routed). + # + # The client would use the eth1 address as source IP by default. + # Therefore, we need to use the -c option here, to force the use of the + # routed (loopback) address as source IP (so that the server will try + # to respond to a routed address and not a link local one). + + a=${NSB_LO_IP} + log_start + show_hint "Should fail 'Network is unreachable' since server is not on link" + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --client-dontroute + log_test_addr ${a} $? 1 "SO_DONTROUTE client, syncookies=${syncookies}" + + a=${NSB_LO_IP} + log_start + show_hint "Should timeout since server cannot respond (client is not on link)" + do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --server-dontroute + log_test_addr ${a} $? 2 "SO_DONTROUTE server, syncookies=${syncookies}" + + ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${nsb_syncookies} + ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${nsa_syncookies} +} + ipv4_tcp_novrf() { local a @@ -1224,6 +1291,9 @@ ipv4_tcp_novrf() log_test_addr ${a} $? 1 "No server, device client, local conn" [ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf + + ipv4_tcp_dontroute 0 + ipv4_tcp_dontroute 2 } ipv4_tcp_vrf() @@ -1594,6 +1664,23 @@ ipv4_udp_novrf() log_start run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 2 "No server, device client, local conn" + + # + # Link local connection tests (SO_DONTROUTE). + # Connections should succeed only when the remote IP address is + # on link (doesn't need to be routed through a gateway). + # + + a=${NSB_IP} + log_start + do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute + log_test_addr ${a} $? 0 "SO_DONTROUTE client" + + a=${NSB_LO_IP} + log_start + show_hint "Should fail 'Network is unreachable' since server is not on link" + do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute + log_test_addr ${a} $? 1 "SO_DONTROUTE client" } ipv4_udp_vrf() diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 0f5e88c8f4ff..a6f2c0b9555d 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -29,6 +29,7 @@ IPV4_TESTS=" ipv4_large_res_grp ipv4_compat_mode ipv4_fdb_grp_fcnal + ipv4_mpath_select ipv4_torture ipv4_res_torture " @@ -42,6 +43,7 @@ IPV6_TESTS=" ipv6_large_res_grp ipv6_compat_mode ipv6_fdb_grp_fcnal + ipv6_mpath_select ipv6_torture ipv6_res_torture " @@ -370,6 +372,27 @@ check_large_res_grp() log_test $? 0 "Dump large (x$buckets) nexthop buckets" } +get_route_dev() +{ + local pfx="$1" + local out + + if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then + echo "$out" + fi +} + +check_route_dev() +{ + local pfx="$1" + local expected="$2" + local out + + out=$(get_route_dev "$pfx") + + check_output "$out" "$expected" +} + start_ip_monitor() { local mtype=$1 @@ -575,6 +598,112 @@ ipv4_fdb_grp_fcnal() $IP link del dev vx10 } +ipv4_mpath_select() +{ + local rc dev match h addr + + echo + echo "IPv4 multipath selection" + echo "------------------------" + if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test; need jq tool" + return $ksft_skip + fi + + # Use status of existing neighbor entry when determining nexthop for + # multipath routes. + local -A gws + gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2) + local -A other_dev + other_dev=([veth1]=veth3 [veth3]=veth1) + + run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1" + run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3" + run_cmd "$IP nexthop add id 1001 group 1/2" + run_cmd "$IP ro add 172.16.101.0/24 nhid 1001" + rc=0 + for dev in veth1 veth3; do + match=0 + for h in {1..254}; do + addr="172.16.101.$h" + if [ "$(get_route_dev "$addr")" = "$dev" ]; then + match=1 + break + fi + done + if (( match == 0 )); then + echo "SKIP: Did not find a route using device $dev" + return $ksft_skip + fi + run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed" + if ! check_route_dev "$addr" "${other_dev[$dev]}"; then + rc=1 + break + fi + run_cmd "$IP neigh del ${gws[$dev]} dev $dev" + done + log_test $rc 0 "Use valid neighbor during multipath selection" + + run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete" + run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete" + run_cmd "$IP route get 172.16.101.1" + # if we did not crash, success + log_test $rc 0 "Multipath selection with no valid neighbor" +} + +ipv6_mpath_select() +{ + local rc dev match h addr + + echo + echo "IPv6 multipath selection" + echo "------------------------" + if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test; need jq tool" + return $ksft_skip + fi + + # Use status of existing neighbor entry when determining nexthop for + # multipath routes. + local -A gws + gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2) + local -A other_dev + other_dev=([veth1]=veth3 [veth3]=veth1) + + run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1" + run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3" + run_cmd "$IP nexthop add id 1001 group 1/2" + run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001" + rc=0 + for dev in veth1 veth3; do + match=0 + for h in {1..65535}; do + addr=$(printf "2001:db8:101::%x" $h) + if [ "$(get_route_dev "$addr")" = "$dev" ]; then + match=1 + break + fi + done + if (( match == 0 )); then + echo "SKIP: Did not find a route using device $dev" + return $ksft_skip + fi + run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed" + if ! check_route_dev "$addr" "${other_dev[$dev]}"; then + rc=1 + break + fi + run_cmd "$IP neigh del ${gws[$dev]} dev $dev" + done + log_test $rc 0 "Use valid neighbor during multipath selection" + + run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete" + run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete" + run_cmd "$IP route get 2001:db8:101::1" + # if we did not crash, success + log_test $rc 0 "Multipath selection with no valid neighbor" +} + ################################################################################ # basic operations (add, delete, replace) on nexthops and nexthop groups # @@ -1981,6 +2110,11 @@ basic() run_cmd "$IP link set dev lo up" + # Dump should not loop endlessly when maximum nexthop ID is configured. + run_cmd "$IP nexthop add id $((2**32-1)) blackhole" + run_cmd "timeout 5 $IP nexthop" + log_test $? 0 "Maximum nexthop ID dump" + # # groups # @@ -2201,6 +2335,11 @@ basic_res() run_cmd "$IP nexthop bucket list fdb" log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword" + # Dump should not loop endlessly when maximum nexthop ID is configured. + run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4" + run_cmd "timeout 5 $IP nexthop bucket" + log_test $? 0 "Maximum nexthop ID dump" + # # resilient nexthop buckets get requests # diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 35d89dfa6f11..e7d2a530618a 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -9,13 +9,17 @@ ret=0 ksft_skip=4 # all tests in this script. Can be overridden with -t option -TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh" +TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ + ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ + ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ + ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ + ipv4_mpath_list ipv6_mpath_list" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns ns1" -NS_EXEC="ip netns exec ns1" +IP="$(which ip) -netns ns1" +NS_EXEC="$(which ip) netns exec ns1" which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) @@ -747,6 +751,68 @@ fib_notify_test() cleanup &> /dev/null } +fib6_gc_test() +{ + setup + + echo + echo "Fib6 garbage collection test" + set -e + + EXPIRE=3 + + # Check expiration of routes every $EXPIRE seconds (GC) + $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE + + $IP link add dummy_10 type dummy + $IP link set dev dummy_10 up + $IP -6 address add 2001:10::1/64 dev dummy_10 + + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + + # Temporary routes + for i in $(seq 1 1000); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + sleep $(($EXPIRE * 2)) + N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) + if [ $N_EXP_SLEEP -ne 0 ]; then + echo "FAIL: expected 0 routes with expires, got $N_EXP_SLEEP" + ret=1 + else + ret=0 + fi + + # Permanent routes + for i in $(seq 1 5000); do + $IP -6 route add 2001:30::$i \ + via 2001:10::2 dev dummy_10 + done + # Temporary routes + for i in $(seq 1 1000); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + sleep $(($EXPIRE * 2)) + N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) + if [ $N_EXP_SLEEP -ne 0 ]; then + echo "FAIL: expected 0 routes with expires," \ + "got $N_EXP_SLEEP (5000 permanent routes)" + ret=1 + else + ret=0 + fi + + set +e + + log_test $ret 0 "ipv6 route garbage collection" + + cleanup &> /dev/null +} + fib_suppress_test() { echo @@ -1869,6 +1935,155 @@ ipv4_del_addr_test() cleanup } +ipv6_del_addr_test() +{ + echo + echo "IPv6 delete address route tests" + + setup + + set -e + for i in $(seq 6); do + $IP li add dummy${i} up type dummy + done + + $IP li add red up type vrf table 1111 + $IP ro add vrf red unreachable default + for i in $(seq 4 6); do + $IP li set dummy${i} vrf red + done + + $IP addr add dev dummy1 fe80::1/128 + $IP addr add dev dummy1 2001:db8:101::1/64 + $IP addr add dev dummy1 2001:db8:101::10/64 + $IP addr add dev dummy1 2001:db8:101::11/64 + $IP addr add dev dummy1 2001:db8:101::12/64 + $IP addr add dev dummy1 2001:db8:101::13/64 + $IP addr add dev dummy1 2001:db8:101::14/64 + $IP addr add dev dummy1 2001:db8:101::15/64 + $IP addr add dev dummy2 fe80::1/128 + $IP addr add dev dummy2 2001:db8:101::1/64 + $IP addr add dev dummy2 2001:db8:101::11/64 + $IP addr add dev dummy3 fe80::1/128 + + $IP addr add dev dummy4 2001:db8:101::1/64 + $IP addr add dev dummy4 2001:db8:101::10/64 + $IP addr add dev dummy4 2001:db8:101::11/64 + $IP addr add dev dummy4 2001:db8:101::12/64 + $IP addr add dev dummy4 2001:db8:101::13/64 + $IP addr add dev dummy4 2001:db8:101::14/64 + $IP addr add dev dummy5 2001:db8:101::1/64 + $IP addr add dev dummy5 2001:db8:101::11/64 + + # Single device using src address + $IP route add 2001:db8:110::/64 dev dummy3 src 2001:db8:101::10 + # Two devices with the same source address + $IP route add 2001:db8:111::/64 dev dummy3 src 2001:db8:101::11 + # VRF with single device using src address + $IP route add vrf red 2001:db8:110::/64 dev dummy6 src 2001:db8:101::10 + # VRF with two devices using src address + $IP route add vrf red 2001:db8:111::/64 dev dummy6 src 2001:db8:101::11 + # src address and nexthop dev in same VRF + $IP route add 2001:db8:112::/64 dev dummy3 src 2001:db8:101::12 + $IP route add vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12 + # src address and nexthop device in different VRF + $IP route add 2001:db8:113::/64 dev lo src 2001:db8:101::13 + $IP route add vrf red 2001:db8:113::/64 dev lo src 2001:db8:101::13 + # table ID 0 + $IP route add table 0 2001:db8:115::/64 via 2001:db8:101::2 src 2001:db8:101::15 + # Link local source route + $IP route add 2001:db8:116::/64 dev dummy2 src fe80::1 + $IP route add 2001:db8:117::/64 dev dummy3 src fe80::1 + set +e + + echo " Single device using src address" + + $IP addr del dev dummy1 2001:db8:101::10/64 + $IP -6 route show | grep -q "src 2001:db8:101::10 " + log_test $? 1 "Prefsrc removed when src address removed on other device" + + echo " Two devices with the same source address" + + $IP addr del dev dummy1 2001:db8:101::11/64 + $IP -6 route show | grep -q "src 2001:db8:101::11 " + log_test $? 0 "Prefsrc not removed when src address exist on other device" + + $IP addr del dev dummy2 2001:db8:101::11/64 + $IP -6 route show | grep -q "src 2001:db8:101::11 " + log_test $? 1 "Prefsrc removed when src address removed on all devices" + + echo " VRF with single device using src address" + + $IP addr del dev dummy4 2001:db8:101::10/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::10 " + log_test $? 1 "Prefsrc removed when src address removed on other device" + + echo " VRF with two devices using src address" + + $IP addr del dev dummy4 2001:db8:101::11/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::11 " + log_test $? 0 "Prefsrc not removed when src address exist on other device" + + $IP addr del dev dummy5 2001:db8:101::11/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::11 " + log_test $? 1 "Prefsrc removed when src address removed on all devices" + + echo " src address and nexthop dev in same VRF" + + $IP addr del dev dummy4 2001:db8:101::12/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::12 " + log_test $? 1 "Prefsrc removed from VRF when source address deleted" + $IP -6 route show | grep -q " src 2001:db8:101::12 " + log_test $? 0 "Prefsrc in default VRF not removed" + + $IP addr add dev dummy4 2001:db8:101::12/64 + $IP route replace vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12 + $IP addr del dev dummy1 2001:db8:101::12/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::12 " + log_test $? 0 "Prefsrc not removed from VRF when source address exist" + $IP -6 route show | grep -q " src 2001:db8:101::12 " + log_test $? 1 "Prefsrc in default VRF removed" + + echo " src address and nexthop device in different VRF" + + $IP addr del dev dummy4 2001:db8:101::13/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::13 " + log_test $? 0 "Prefsrc not removed from VRF when nexthop dev in diff VRF" + $IP -6 route show | grep -q "src 2001:db8:101::13 " + log_test $? 0 "Prefsrc not removed in default VRF" + + $IP addr add dev dummy4 2001:db8:101::13/64 + $IP addr del dev dummy1 2001:db8:101::13/64 + $IP -6 route show vrf red | grep -q "src 2001:db8:101::13 " + log_test $? 1 "Prefsrc removed from VRF when nexthop dev in diff VRF" + $IP -6 route show | grep -q "src 2001:db8:101::13 " + log_test $? 1 "Prefsrc removed in default VRF" + + echo " Table ID 0" + + $IP addr del dev dummy1 2001:db8:101::15/64 + $IP -6 route show | grep -q "src 2001:db8:101::15" + log_test $? 1 "Prefsrc removed from default VRF when source address deleted" + + echo " Link local source route" + $IP addr del dev dummy1 fe80::1/128 + $IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1" + log_test $? 0 "Prefsrc not removed when delete ll addr from other dev" + $IP addr del dev dummy2 fe80::1/128 + $IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1" + log_test $? 1 "Prefsrc removed when delete ll addr" + $IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1" + log_test $? 0 "Prefsrc not removed when delete ll addr from other dev" + $IP addr add dev dummy1 fe80::1/128 + $IP addr del dev dummy3 fe80::1/128 + $IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1" + log_test $? 1 "Prefsrc removed even ll addr still exist on other dev" + + for i in $(seq 6); do + $IP li del dummy${i} + done + cleanup +} ipv4_route_v6_gw_test() { @@ -2138,6 +2353,156 @@ ipv4_bcast_neigh_test() cleanup } +mpath_dep_check() +{ + if [ ! -x "$(command -v mausezahn)" ]; then + echo "mausezahn command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v bc)" ]; then + echo "bc command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v perf)" ]; then + echo "perf command not found. Skipping test" + return 1 + fi + + perf list fib:* | grep -q fib_table_lookup + if [ $? -ne 0 ]; then + echo "IPv4 FIB tracepoint not found. Skipping test" + return 1 + fi + + perf list fib6:* | grep -q fib6_table_lookup + if [ $? -ne 0 ]; then + echo "IPv6 FIB tracepoint not found. Skipping test" + return 1 + fi + + return 0 +} + +link_stats_get() +{ + local ns=$1; shift + local dev=$1; shift + local dir=$1; shift + local stat=$1; shift + + ip -n $ns -j -s link show dev $dev \ + | jq '.[]["stats64"]["'$dir'"]["'$stat'"]' +} + +list_rcv_eval() +{ + local file=$1; shift + local expected=$1; shift + + local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor') + local ratio=$(echo "scale=2; $count / $expected" | bc -l) + local res=$(echo "$ratio >= 0.95" | bc) + [[ $res -eq 1 ]] + log_test $? 0 "Multipath route hit ratio ($ratio)" +} + +ipv4_mpath_list_test() +{ + echo + echo "IPv4 multipath list receive tests" + + mpath_dep_check || return 1 + + route_setup + + set -e + run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n ns2 link add name nh1 up type dummy" + run_cmd "ip -n ns2 link add name nh2 up type dummy" + run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1" + run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2" + run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n ns2 route add 203.0.113.0/24 + nexthop via 172.16.201.2 nexthop via 172.16.202.2" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + set +e + + local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local tmp_file=$(mktemp) + local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac + -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" + + # Packets forwarded in a list using a multipath route must not reuse a + # cached result so that a flow always hits the same nexthop. In other + # words, the FIB lookup tracepoint needs to be triggered for every + # packet. + local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) + list_rcv_eval $tmp_file $diff + + rm $tmp_file + route_cleanup +} + +ipv6_mpath_list_test() +{ + echo + echo "IPv6 multipath list receive tests" + + mpath_dep_check || return 1 + + route_setup + + set -e + run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n ns2 link add name nh1 up type dummy" + run_cmd "ip -n ns2 link add name nh2 up type dummy" + run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1" + run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2" + run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64 + nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2" + run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" + set +e + + local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local tmp_file=$(mktemp) + local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac + -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" + + # Packets forwarded in a list using a multipath route must not reuse a + # cached result so that a flow always hits the same nexthop. In other + # words, the FIB lookup tracepoint needs to be triggered for every + # packet. + local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" + local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) + list_rcv_eval $tmp_file $diff + + rm $tmp_file + route_cleanup +} + ################################################################################ # usage @@ -2211,12 +2576,16 @@ do ipv6_addr_metric) ipv6_addr_metric_test;; ipv4_addr_metric) ipv4_addr_metric_test;; ipv4_del_addr) ipv4_del_addr_test;; + ipv6_del_addr) ipv6_del_addr_test;; ipv6_route_metrics) ipv6_route_metrics_test;; ipv4_route_metrics) ipv4_route_metrics_test;; ipv4_route_v6_gw) ipv4_route_v6_gw_test;; ipv4_mangle) ipv4_mangle_test;; ipv6_mangle) ipv6_mangle_test;; ipv4_bcast_neigh) ipv4_bcast_neigh_test;; + fib6_gc_test|ipv6_gc) fib6_gc_test;; + ipv4_mpath_list) ipv4_mpath_list_test;; + ipv6_mpath_list) ipv6_mpath_list_test;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index a474c60fe348..74e754e266c3 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -64,7 +64,13 @@ TEST_PROGS = bridge_igmp.sh \ q_in_vni_ipv6.sh \ q_in_vni.sh \ router_bridge.sh \ + router_bridge_1d.sh \ + router_bridge_1d_lag.sh \ + router_bridge_lag.sh \ router_bridge_vlan.sh \ + router_bridge_vlan_upper.sh \ + router_bridge_pvid_vlan_upper.sh \ + router_bridge_vlan_upper_pvid.sh \ router_broadcast.sh \ router_mpath_nh_res.sh \ router_mpath_nh.sh \ @@ -83,6 +89,9 @@ TEST_PROGS = bridge_igmp.sh \ tc_chains.sh \ tc_flower_router.sh \ tc_flower.sh \ + tc_flower_l2_miss.sh \ + tc_flower_cfm.sh \ + tc_flower_port_range.sh \ tc_mpls_l2vpn.sh \ tc_police.sh \ tc_shblocks.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh index dc92d32464f6..9af9f6964808 100755 --- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -9,6 +9,7 @@ ALL_TESTS=" locked_port_mab_roam locked_port_mab_config locked_port_mab_flush + locked_port_mab_redirect " NUM_NETIFS=4 @@ -319,6 +320,41 @@ locked_port_mab_flush() log_test "Locked port MAB FDB flush" } +# Check that traffic can be redirected from a locked bridge port and that it +# does not create locked FDB entries. +locked_port_mab_redirect() +{ + RET=0 + check_port_mab_support || return 0 + + bridge link set dev $swp1 learning on locked on mab on + tc qdisc add dev $swp1 clsact + tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \ + action mirred egress redirect dev $swp2 + + ping_do $h1 192.0.2.2 + check_err $? "Ping did not work with redirection" + + bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \ + grep "dev $swp1" | grep -q "locked" + check_fail $? "Locked entry created for redirected traffic" + + tc filter del dev $swp1 ingress protocol all pref 1 handle 101 flower + + ping_do $h1 192.0.2.2 + check_fail $? "Ping worked without redirection" + + bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \ + grep "dev $swp1" | grep -q "locked" + check_err $? "Locked entry not created after deleting filter" + + bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master + tc qdisc del dev $swp1 clsact + bridge link set dev $swp1 learning off locked off mab off + + log_test "Locked port MAB redirect" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index ae3f9462a2b6..d0c6c499d5da 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -617,7 +617,7 @@ __cfg_test_port_ip_sg() grep -q "permanent" check_err $? "Entry not added as \"permanent\" when should" bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "0.00" + grep -q " 0.00" check_err $? "\"permanent\" entry has a pending group timer" bridge mdb del dev br0 port $swp1 $grp_key vid 10 @@ -626,7 +626,7 @@ __cfg_test_port_ip_sg() grep -q "temp" check_err $? "Entry not added as \"temp\" when should" bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "0.00" + grep -q " 0.00" check_fail $? "\"temp\" entry has an unpending group timer" bridge mdb del dev br0 port $swp1 $grp_key vid 10 @@ -659,7 +659,7 @@ __cfg_test_port_ip_sg() grep -q "permanent" check_err $? "Entry not marked as \"permanent\" after replace" bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "0.00" + grep -q " 0.00" check_err $? "Entry has a pending group timer after replace" bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp @@ -667,7 +667,7 @@ __cfg_test_port_ip_sg() grep -q "temp" check_err $? "Entry not marked as \"temp\" after replace" bridge -d -s mdb show dev br0 vid 10 | grep "$grp_key" | \ - grep -q "0.00" + grep -q " 0.00" check_fail $? "Entry has an unpending group timer after replace" bridge mdb del dev br0 port $swp1 $grp_key vid 10 @@ -850,6 +850,7 @@ cfg_test() __fwd_test_host_ip() { local grp=$1; shift + local dmac=$1; shift local src=$1; shift local mode=$1; shift local name @@ -872,27 +873,27 @@ __fwd_test_host_ip() # Packet should only be flooded to multicast router ports when there is # no matching MDB entry. The bridge is not configured as a multicast # router port. - $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q tc_check_packets "dev br0 ingress" 1 0 check_err $? "Packet locally received after flood" # Install a regular port group entry and expect the packet to not be # locally received. bridge mdb add dev br0 port $swp2 grp $grp temp vid 10 - $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q tc_check_packets "dev br0 ingress" 1 0 check_err $? "Packet locally received after installing a regular entry" # Add a host entry and expect the packet to be locally received. bridge mdb add dev br0 port br0 grp $grp temp vid 10 - $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q tc_check_packets "dev br0 ingress" 1 1 check_err $? "Packet not locally received after adding a host entry" # Remove the host entry and expect the packet to not be locally # received. bridge mdb del dev br0 port br0 grp $grp vid 10 - $MZ $mode $h1.10 -c 1 -p 128 -A $src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q tc_check_packets "dev br0 ingress" 1 1 check_err $? "Packet locally received after removing a host entry" @@ -905,8 +906,8 @@ __fwd_test_host_ip() fwd_test_host_ip() { - __fwd_test_host_ip "239.1.1.1" "192.0.2.1" "-4" - __fwd_test_host_ip "ff0e::1" "2001:db8:1::1" "-6" + __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4" + __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6" } fwd_test_host_l2() @@ -966,6 +967,7 @@ fwd_test_host() __fwd_test_port_ip() { local grp=$1; shift + local dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mode=$1; shift @@ -999,43 +1001,43 @@ __fwd_test_port_ip() vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \ src_ip $invalid_src action drop - $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 1 0 check_err $? "Packet from valid source received on H2 before adding entry" - $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 2 0 check_err $? "Packet from invalid source received on H2 before adding entry" bridge mdb add dev br0 port $swp2 grp $grp vid 10 \ filter_mode $filter_mode source_list $src_list - $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 1 1 check_err $? "Packet from valid source not received on H2 after adding entry" - $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 2 0 check_err $? "Packet from invalid source received on H2 after adding entry" bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \ filter_mode exclude - $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 1 2 check_err $? "Packet from valid source not received on H2 after allowing all sources" - $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 2 1 check_err $? "Packet from invalid source not received on H2 after allowing all sources" bridge mdb del dev br0 port $swp2 grp $grp vid 10 - $MZ $mode $h1.10 -c 1 -p 128 -A $valid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 1 2 check_err $? "Packet from valid source received on H2 after deleting entry" - $MZ $mode $h1.10 -c 1 -p 128 -A $invalid_src -B $grp -t udp -q + $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q tc_check_packets "dev $h2 ingress" 2 1 check_err $? "Packet from invalid source received on H2 after deleting entry" @@ -1047,11 +1049,11 @@ __fwd_test_port_ip() fwd_test_port_ip() { - __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "exclude" - __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude" + __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \ "exclude" - __fwd_test_port_ip "239.1.1.1" "192.0.2.1" "192.0.2.2" "-4" "include" - __fwd_test_port_ip "ff0e::1" "2001:db8:1::1" "2001:db8:1::2" "-6" \ + __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include" + __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \ "include" } @@ -1127,7 +1129,7 @@ ctrl_igmpv3_is_in_test() filter_mode include source_list 192.0.2.1 # IS_IN ( 192.0.2.2 ) - $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -q 192.0.2.2 @@ -1140,7 +1142,7 @@ ctrl_igmpv3_is_in_test() filter_mode include source_list 192.0.2.1 # IS_IN ( 192.0.2.2 ) - $MZ $h1.10 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ + $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q bridge -d mdb show dev br0 vid 10 | grep 239.1.1.1 | grep -v "src" | \ @@ -1167,7 +1169,7 @@ ctrl_mldv2_is_in_test() # IS_IN ( 2001:db8:1::2 ) local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2) - $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \ + $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \ -t ip hop=1,next=0,p="$p" -q bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | \ @@ -1181,7 +1183,7 @@ ctrl_mldv2_is_in_test() filter_mode include source_list 2001:db8:1::1 # IS_IN ( 2001:db8:1::2 ) - $MZ -6 $h1.10 -c 1 -A fe80::1 -B ff0e::1 \ + $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \ -t ip hop=1,next=0,p="$p" -q bridge -d mdb show dev br0 vid 10 | grep ff0e::1 | grep -v "src" | \ @@ -1206,6 +1208,11 @@ ctrl_test() ctrl_mldv2_is_in_test } +if ! bridge mdb help 2>&1 | grep -q "replace"; then + echo "SKIP: iproute2 too old, missing bridge mdb replace support" + exit $ksft_skip +fi + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh index ae255b662ba3..3da9d93ab36f 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh @@ -252,7 +252,8 @@ ctl4_entries_add() local IPs=$(seq -f 192.0.2.%g 1 $((n - 1))) local peer=$(locus_dev_peer $locus) local GRP=239.1.1.${grp} - $MZ $peer -c 1 -A 192.0.2.1 -B $GRP \ + local dmac=01:00:5e:01:01:$(printf "%02x" $grp) + $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \ -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q sleep 1 @@ -272,7 +273,8 @@ ctl4_entries_del() local peer=$(locus_dev_peer $locus) local GRP=239.1.1.${grp} - $MZ $peer -c 1 -A 192.0.2.1 -B 224.0.0.2 \ + local dmac=01:00:5e:00:00:02 + $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \ -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q sleep 1 ! bridge mdb show dev br0 | grep -q $GRP @@ -289,8 +291,10 @@ ctl6_entries_add() local peer=$(locus_dev_peer $locus) local SIP=fe80::1 local GRP=ff0e::${grp} + local dmac=33:33:00:00:00:$(printf "%02x" $grp) local p=$(mldv2_is_in_get $SIP $GRP $IPs) - $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q + $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \ + -t ip hop=1,next=0,p="$p" -q sleep 1 local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l) @@ -310,8 +314,10 @@ ctl6_entries_del() local peer=$(locus_dev_peer $locus) local SIP=fe80::1 local GRP=ff0e::${grp} + local dmac=33:33:00:00:00:$(printf "%02x" $grp) local p=$(mldv1_done_get $SIP $GRP) - $MZ -6 $peer -c 1 -A $SIP -B $GRP -t ip hop=1,next=0,p="$p" -q + $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \ + -t ip hop=1,next=0,p="$p" -q sleep 1 ! bridge mdb show dev br0 | grep -q $GRP } @@ -1328,6 +1334,11 @@ test_8021qvs() switch_destroy } +if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then + echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support" + exit $ksft_skip +fi + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh index 5148d97a5df8..68ee92df3e07 100755 --- a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh +++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh @@ -132,6 +132,7 @@ switch_create() #### BR1 #### ip link add name br1 type bridge vlan_filtering 1 \ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br1 addrgenmode none # Make sure the bridge uses the MAC address of the local port and not # that of the VxLAN's device. ip link set dev br1 address $(mac_get $swp1) diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh index dbb9fcf759e0..aa2eafb7b243 100755 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ b/tools/testing/selftests/net/forwarding/ethtool.sh @@ -286,6 +286,8 @@ different_speeds_autoneg_on() ethtool -s $h1 autoneg on } +skip_on_veth + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh index 072faa77f53b..17f89c3b7c02 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -108,6 +108,8 @@ no_cable() ip link set dev $swp3 down } +skip_on_veth + setup_prepare tests_run diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh index c580ad623848..39e736f30322 100755 --- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh @@ -258,11 +258,6 @@ h2_destroy() setup_prepare() { - check_ethtool_mm_support - check_tc_fp_support - require_command lldptool - bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" - h1=${NETIFS[p1]} h2=${NETIFS[p2]} @@ -278,6 +273,19 @@ cleanup() h1_destroy } +check_ethtool_mm_support +check_tc_fp_support +require_command lldptool +bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" + +for netif in ${NETIFS[@]}; do + ethtool --show-mm $netif 2>&1 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: $netif does not support MAC Merge" + exit $ksft_skip + fi +done + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh index eb9ec4a68f84..7594bbb49029 100755 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh +++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh @@ -99,6 +99,8 @@ test_stats_rx() test_stats g2a rx } +skip_on_veth + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh index 9f5b3e2e5e95..49fa94b53a1c 100755 --- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -14,6 +14,8 @@ ALL_TESTS=" NUM_NETIFS=4 source lib.sh +require_command $TROUTE6 + h1_create() { simple_if_init $h1 2001:1:1::2/64 diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 9ddb68dd6a08..e37a15eda6c2 100755 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -30,6 +30,7 @@ REQUIRE_MZ=${REQUIRE_MZ:=yes} REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no} STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} +TROUTE6=${TROUTE6:=traceroute6} relative_path="${BASH_SOURCE%/*}" if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then @@ -163,6 +164,17 @@ check_port_mab_support() fi } +skip_on_veth() +{ + local kind=$(ip -j -d link show dev ${NETIFS[p1]} | + jq -r '.[].linkinfo.info_kind') + + if [[ $kind == veth ]]; then + echo "SKIP: Test cannot be run with veth pairs" + exit $ksft_skip + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit $ksft_skip @@ -225,6 +237,11 @@ create_netif_veth() for ((i = 1; i <= NUM_NETIFS; ++i)); do local j=$((i+1)) + if [ -z ${NETIFS[p$i]} ]; then + echo "SKIP: Cannot create interface. Name not specified" + exit $ksft_skip + fi + ip link show dev ${NETIFS[p$i]} &> /dev/null if [[ $? -ne 0 ]]; then ip link add ${NETIFS[p$i]} type veth \ @@ -1215,6 +1232,15 @@ ping_test() log_test "ping$3" } +ping_test_fails() +{ + RET=0 + + ping_do $1 $2 + check_fail $? + log_test "ping fails$3" +} + ping6_do() { local if_name=$1 @@ -1237,6 +1263,15 @@ ping6_test() log_test "ping6$3" } +ping6_test_fails() +{ + RET=0 + + ping6_do $1 $2 + check_fail $? + log_test "ping6 fails$3" +} + learning_test() { local bridge=$1 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh index 360ca133bead..6c257ec03756 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -98,6 +98,7 @@ switch_create() # Bridge between H1 and H2. ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index aec752a22e9e..04fd14b0a9b7 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -65,7 +65,8 @@ setup_prepare() vrf_prepare mirror_gre_topo_create - ip link add name br2 type bridge vlan_filtering 0 + ip link add name br2 address $(mac_get $swp3) \ + type bridge vlan_filtering 0 ip link set dev br2 up ip link set dev $swp3 master br2 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index 1b27f2b0f196..f35313c76fac 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -35,7 +35,8 @@ setup_prepare() vrf_prepare mirror_gre_topo_create - ip link add name br2 type bridge vlan_filtering 0 + ip link add name br2 address $(mac_get $swp3) \ + type bridge vlan_filtering 0 ip link set dev br2 up vlan_create $swp3 555 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index 91e431cd919e..c53148b1dc63 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -140,7 +140,8 @@ switch_create() ip link set dev $swp3 up ip link set dev $swp4 up - ip link add name br1 type bridge vlan_filtering 1 + ip link add name br1 address $(mac_get $swp3) \ + type bridge vlan_filtering 1 team_create lag loadbalance $swp3 $swp4 ip link set dev lag master br1 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index aff88f78e339..5ea9d63915f7 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -72,7 +72,8 @@ test_span_gre_ttl() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev \ + "prot ip flower $tcflags ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ flower skip_hw ip_ttl 50 action pass diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh index 04979e5962e7..bb1adbb7b98a 100644 --- a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh @@ -60,6 +60,7 @@ mirror_topo_switch_create() ip link set dev $swp3 up ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none ip link set dev br1 up ip link set dev $swp1 master br1 diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh index 64fbd211d907..af008fbf2725 100755 --- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -60,7 +60,9 @@ h2_destroy() switch_create() { - ip link add name br1 up type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev $swp1 master br1 ip link set dev $swp1 up ip link set dev $swp2 master br1 diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh index 4c50c0234bce..798b13525c02 100755 --- a/tools/testing/selftests/net/forwarding/q_in_vni.sh +++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh @@ -137,6 +137,7 @@ switch_create() { ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \ vlan_default_pvid 0 mcast_snooping 0 + ip link set dev br1 addrgenmode none # Make sure the bridge uses the MAC address of the local port and not # that of the VxLAN's device. ip link set dev br1 address $(mac_get $swp1) diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh index ebc596a272f7..0182eb2abfa6 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge.sh @@ -1,9 +1,39 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | | | | | | +# +----|-------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|-----------------------------+ + $swp2 | +# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 | +# | | 192.0.2.2/28 | 2001:db8:2::1/64 | +# | | 2001:db8:1::1/64 | | +# | | | | +# | +--------------------------------+ | +# +---------------------------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 + config_remaster + ping_ipv4 + ping_ipv6 + config_remove_pvid + ping_ipv4_fails + ping_ipv6_fails + config_add_pvid + ping_ipv4 + ping_ipv6 + config_late_pvid + ping_ipv4 + ping_ipv6 " NUM_NETIFS=4 source lib.sh @@ -38,7 +68,8 @@ h2_destroy() router_create() { - ip link add name br1 type bridge vlan_filtering 1 + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 ip link set dev br1 up ip link set dev $swp1 master br1 @@ -61,6 +92,42 @@ router_destroy() ip link del dev br1 } +config_remaster() +{ + log_info "Remaster bridge slave" + + ip link set dev $swp1 nomaster + sleep 2 + ip link set dev $swp1 master br1 +} + +config_remove_pvid() +{ + log_info "Remove PVID from the bridge" + + bridge vlan add dev br1 vid 1 self + sleep 2 +} + +config_add_pvid() +{ + log_info "Add PVID to the bridge" + + bridge vlan add dev br1 vid 1 self pvid untagged + sleep 2 +} + +config_late_pvid() +{ + log_info "Add bridge PVID after enslaving port" + + ip link set dev $swp1 nomaster + ip link set dev br1 type bridge vlan_default_pvid 0 + sleep 2 + ip link set dev $swp1 master br1 + ip link set dev br1 type bridge vlan_default_pvid 1 +} + setup_prepare() { h1=${NETIFS[p1]} @@ -103,6 +170,16 @@ ping_ipv6() ping6_test $h1 2001:db8:2::2 } +ping_ipv4_fails() +{ + ping_test_fails $h1 192.0.2.130 +} + +ping_ipv6_fails() +{ + ping6_test_fails $h1 2001:db8:2::2 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh new file mode 100755 index 000000000000..6d51f2ca72a2 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +---------------------------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.100 + $h1.200 | | + $h2 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 | +# | \_________ __________/ | | | 2001:db8:2::2/64 | +# | V | | | 2001:db8:4::2/64 | +# | + $h1 | | | | +# +--------------|------------------------------+ +--|-------------------+ +# | | +# +--------------|----------------------------------------|-------------------+ +# | SW + $swp1 + $swp2 | +# | | 192.0.2.129/28 | +# | | 192.0.2.145/28 | +# | | 2001:db8:2::1/64 | +# | ________^___________________________ 2001:db8:4::1/64 | +# | / \ | +# | +---|------------------------------+ +---|------------------------------+ | +# | | + $swp1.100 BR1 (802.1d) | | + $swp1.200 BR2 (802.1d) | | +# | | 192.0.2.2/28 | | 192.0.2.18/28 | | +# | | 2001:db8:1::2/64 | | 2001:db8:3::2/64 | | +# | | | | | | +# | +----------------------------------+ +----------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + config_remaster + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create $h1 200 v$h1 192.0.2.17/28 2001:db8:3::1/64 + ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 + ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:4::/64 vrf v$h1 + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.144/28 vrf v$h1 + ip -4 route del 192.0.2.128/28 vrf v$h1 + vlan_destroy $h1 200 + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \ + 192.0.2.146/28 2001:db8:4::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf v$h2 + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.16/28 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 \ + 192.0.2.146/28 2001:db8:4::2/64 +} + +router_create() +{ + ip link set dev $swp1 up + + vlan_create $swp1 100 + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 address $(mac_get $swp1.100) + ip link set dev $swp1.100 master br1 + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + ip link set dev br1 up + + vlan_create $swp1 200 + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 address $(mac_get $swp1.200) + ip link set dev $swp1.200 master br2 + __addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64 + ip link set dev br2 up + + ip link set dev $swp2 up + __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 \ + 192.0.2.145/28 2001:db8:4::1/64 +} + +router_destroy() +{ + __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 \ + 192.0.2.145/28 2001:db8:4::1/64 + ip link set dev $swp2 down + + __addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64 + ip link set dev $swp1.200 nomaster + ip link del dev br2 + vlan_destroy $swp1 200 + + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + ip link set dev $swp1.100 nomaster + ip link del dev br1 + vlan_destroy $swp1 100 + + ip link set dev $swp1 down +} + +config_remaster() +{ + log_info "Remaster bridge slaves" + + ip link set dev $swp1.100 nomaster + ip link set dev $swp1.200 nomaster + sleep 2 + ip link set dev $swp1.200 master br2 + ip link set dev $swp1.100 master br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.130 ": via 100" + ping_test $h1 192.0.2.146 ": via 200" +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 ": via 100" + ping6_test $h1 2001:db8:4::2 ": via 200" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh new file mode 100755 index 000000000000..e064b946e821 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh @@ -0,0 +1,408 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------------------------------+ +# | H1 (vrf) | +# | | +# | + LAG1.100 + LAG1.200 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | +# | | 2001:db8:1::1/64 | 2001:db8:3:1/64 | +# | \___________ _______/ | +# | v | +# | + LAG1 (team) | +# | | | +# | ____^____ | +# | / \ | +# | + $h1 + $h4 | +# | | | | +# +----------|-----------|---------------------+ +# | | +# +----------|-----------|---------------------+ +# | SW | | | +# | + $swp1 + $swp4 | +# | \____ ____/ | +# | v | +# | LAG2 (team) + | +# | | | +# | _______^______________ | +# | / \ | +# | +------|------------+ +-------|----------+ | +# | | + LAG2.100 | | + LAG2.200 | | +# | | | | | | +# | | BR1 (802.1d) | | BR2 (802.1d) | | +# | | 192.0.2.2/28 | | 192.0.2.18/28 | | +# | | 2001:db8:1::2/64 | | 2001:db8:3:2/64 | | +# | | | | | | +# | +-------------------+ +------------------+ | +# | | +# | + LAG3.100 + LAG3.200 | +# | | 192.0.2.129/28 | 192.0.2.145/28 | +# | | 2001:db8:2::1/64 | 2001:db8:4::1/64 | +# | | | | +# | \_________ ___________/ | +# | v | +# | + LAG3 (team) | +# | ____|____ | +# | / \ | +# | + $swp2 + $swp3 | +# | | | | +# +-------|---------|--------------------------+ +# | | +# +-------|---------|--------------------------+ +# | | | | +# | + $h2 + $h3 | +# | \____ ___/ | +# | | | +# | + LAG4 (team) | +# | | | +# | __________^__________ | +# | / \ | +# | | | | +# | + LAG4.100 + LAG4.200 | +# | 192.0.2.130/28 192.0.2.146/28 | +# | 2001:db8:2::2/64 2001:db8:4::2/64 | +# | | +# | H2 (vrf) | +# +--------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 slaves ) + config_deslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp4 + config_deslave_swp1 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp4 + config_enslave_swp1 + config_enslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 itself ) + config_remaster_lag2 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG3 slaves ) + config_deslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp2 + config_deslave_swp3 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp2 + config_enslave_swp3 + config_enslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=8 +source lib.sh + +h1_create() +{ + team_create lag1 lacp + ip link set dev lag1 addrgenmode none + ip link set dev lag1 address $(mac_get $h1) + ip link set dev $h1 master lag1 + ip link set dev $h4 master lag1 + simple_if_init lag1 + ip link set dev $h1 up + ip link set dev $h4 up + + vlan_create lag1 100 vlag1 192.0.2.1/28 2001:db8:1::1/64 + vlan_create lag1 200 vlag1 192.0.2.17/28 2001:db8:3::1/64 + + ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2 + + ip -4 route add 192.0.2.144/28 vrf vlag1 nexthop via 192.0.2.18 + ip -6 route add 2001:db8:4::/64 vrf vlag1 nexthop via 2001:db8:3::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:4::/64 vrf vlag1 + ip -4 route del 192.0.2.144/28 vrf vlag1 + + ip -6 route del 2001:db8:2::/64 vrf vlag1 + ip -4 route del 192.0.2.128/28 vrf vlag1 + + vlan_destroy lag1 200 + vlan_destroy lag1 100 + + ip link set dev $h4 down + ip link set dev $h1 down + simple_if_fini lag1 + ip link set dev $h4 nomaster + ip link set dev $h1 nomaster + team_destroy lag1 +} + +h2_create() +{ + team_create lag4 lacp + ip link set dev lag4 addrgenmode none + ip link set dev lag4 address $(mac_get $h2) + ip link set dev $h2 master lag4 + ip link set dev $h3 master lag4 + simple_if_init lag4 + ip link set dev $h2 up + ip link set dev $h3 up + + vlan_create lag4 100 vlag4 192.0.2.130/28 2001:db8:2::2/64 + vlan_create lag4 200 vlag4 192.0.2.146/28 2001:db8:4::2/64 + + ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1 + + ip -4 route add 192.0.2.16/28 vrf vlag4 nexthop via 192.0.2.145 + ip -6 route add 2001:db8:3::/64 vrf vlag4 nexthop via 2001:db8:4::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:3::/64 vrf vlag4 + ip -4 route del 192.0.2.16/28 vrf vlag4 + + ip -6 route del 2001:db8:1::/64 vrf vlag4 + ip -4 route del 192.0.2.0/28 vrf vlag4 + + vlan_destroy lag4 200 + vlan_destroy lag4 100 + + ip link set dev $h3 down + ip link set dev $h2 down + simple_if_fini lag4 + ip link set dev $h3 nomaster + ip link set dev $h2 nomaster + team_destroy lag4 +} + +router_create() +{ + team_create lag2 lacp + ip link set dev lag2 addrgenmode none + ip link set dev lag2 address $(mac_get $swp1) + ip link set dev $swp1 master lag2 + ip link set dev $swp4 master lag2 + + vlan_create lag2 100 + vlan_create lag2 200 + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev br1 address $(mac_get lag2.100) + ip link set dev lag2.100 master br1 + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev br2 address $(mac_get lag2.200) + ip link set dev lag2.200 master br2 + + ip link set dev $swp1 up + ip link set dev $swp4 up + ip link set dev br1 up + ip link set dev br2 up + + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + __addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64 + + team_create lag3 lacp + ip link set dev lag3 addrgenmode none + ip link set dev lag3 address $(mac_get $swp2) + ip link set dev $swp2 master lag3 + ip link set dev $swp3 master lag3 + ip link set dev $swp2 up + ip link set dev $swp3 up + + vlan_create lag3 100 + vlan_create lag3 200 + + __addr_add_del lag3.100 add 192.0.2.129/28 2001:db8:2::1/64 + __addr_add_del lag3.200 add 192.0.2.145/28 2001:db8:4::1/64 +} + +router_destroy() +{ + __addr_add_del lag3.200 del 192.0.2.145/28 2001:db8:4::1/64 + __addr_add_del lag3.100 del 192.0.2.129/28 2001:db8:2::1/64 + + vlan_destroy lag3 200 + vlan_destroy lag3 100 + + ip link set dev $swp3 down + ip link set dev $swp2 down + ip link set dev $swp3 nomaster + ip link set dev $swp2 nomaster + team_destroy lag3 + + __addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64 + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + + ip link set dev br2 down + ip link set dev br1 down + ip link set dev $swp4 down + ip link set dev $swp1 down + + ip link set dev lag2.200 nomaster + ip link del dev br2 + + ip link set dev lag2.100 nomaster + ip link del dev br1 + + vlan_destroy lag2 200 + vlan_destroy lag2 100 + + ip link set dev $swp4 nomaster + ip link set dev $swp1 nomaster + team_destroy lag2 +} + +config_remaster_lag2() +{ + log_info "Remaster bridge slaves" + + ip link set dev lag2.200 nomaster + ip link set dev lag2.100 nomaster + sleep 2 + ip link set dev lag2.100 master br1 + ip link set dev lag2.200 master br2 +} + +config_deslave() +{ + local netdev=$1; shift + + log_info "Deslave $netdev" + ip link set dev $netdev down + ip link set dev $netdev nomaster + ip link set dev $netdev up +} + +config_deslave_swp1() +{ + config_deslave $swp1 +} + +config_deslave_swp2() +{ + config_deslave $swp2 +} + +config_deslave_swp3() +{ + config_deslave $swp3 +} + +config_deslave_swp4() +{ + config_deslave $swp4 +} + +config_enslave() +{ + local netdev=$1; shift + local master=$1; shift + + log_info "Enslave $netdev to $master" + ip link set dev $netdev down + ip link set dev $netdev master $master + ip link set dev $netdev up +} + +config_enslave_swp1() +{ + config_enslave $swp1 lag2 +} + +config_enslave_swp2() +{ + config_enslave $swp2 lag3 +} + +config_enslave_swp3() +{ + config_enslave $swp3 lag3 +} + +config_enslave_swp4() +{ + config_enslave $swp4 lag2 +} + +config_wait() +{ + setup_wait_dev lag2 + setup_wait_dev lag3 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h4=${NETIFS[p7]} + swp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test lag1.100 192.0.2.130 ": via 100" + ping_test lag1.200 192.0.2.146 ": via 200" +} + +ping_ipv6() +{ + ping6_test lag1.100 2001:db8:2::2 ": via 100" + ping6_test lag1.200 2001:db8:4::2 ": via 200" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh new file mode 100755 index 000000000000..f05ffe213c46 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh @@ -0,0 +1,323 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +----------------------------+ +--------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | | | | +# | + LAG1 (team) | | + LAG4 (team) | +# | | 192.0.2.1/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | __^___ | | __^_____ | +# | / \ | | / \ | +# | + $h1 + $h4 | | + $h2 + $h3 | +# | | | | | | | | +# +----|--------|--------------+ +-|----------|-------------+ +# | | | | +# +----|--------|------------------------------------|----------|-------------+ +# | SW | | | | | +# | + $swp1 + $swp4 + $swp2 + $swp3 | +# | \__ ___/ \__ _____/ | +# | v v | +# | +------|-------------------------------+ | | +# | | + LAG2 BR1 (802.1q) | + LAG3 (team) | +# | | (team) 192.0.2.2/28 | 192.0.2.129/28 | +# | | 2001:db8:1::2/64 | 2001:db8:2::1/64 | +# | | | | +# | +--------------------------------------+ | +# +---------------------------------------------------------------------------+ + +: ${ALL_TESTS:=" + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 slaves ) + config_deslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp4 + config_deslave_swp1 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp4 + config_enslave_swp1 + config_enslave_swp4 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG2 itself ) + config_remaster_lag2 + config_wait + ping_ipv4 + ping_ipv6 + + $(: exercise remastering of LAG3 slaves ) + config_deslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 + config_enslave_swp2 + config_deslave_swp3 + config_wait + ping_ipv4 + ping_ipv6 + config_deslave_swp2 + config_enslave_swp3 + config_enslave_swp2 + config_wait + ping_ipv4 + ping_ipv6 + + $(: move LAG3 to a bridge and then out ) + config_remaster_lag3 + config_wait + ping_ipv4 + ping_ipv6 + "} +NUM_NETIFS=8 +: ${lib_dir:=.} +source $lib_dir/lib.sh +$EXTRA_SOURCE + +h1_create() +{ + team_create lag1 lacp + ip link set dev lag1 address $(mac_get $h1) + ip link set dev $h1 master lag1 + ip link set dev $h4 master lag1 + simple_if_init lag1 192.0.2.1/28 2001:db8:1::1/64 + ip link set dev $h1 up + ip link set dev $h4 up + ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf vlag1 + ip -4 route del 192.0.2.128/28 vrf vlag1 + ip link set dev $h4 down + ip link set dev $h1 down + simple_if_fini lag1 192.0.2.1/28 2001:db8:1::1/64 + ip link set dev $h4 nomaster + ip link set dev $h1 nomaster + team_destroy lag1 +} + +h2_create() +{ + team_create lag4 lacp + ip link set dev lag4 address $(mac_get $h2) + ip link set dev $h2 master lag4 + ip link set dev $h3 master lag4 + simple_if_init lag4 192.0.2.130/28 2001:db8:2::2/64 + ip link set dev $h2 up + ip link set dev $h3 up + ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf vlag4 + ip -4 route del 192.0.2.0/28 vrf vlag4 + ip link set dev $h3 down + ip link set dev $h2 down + simple_if_fini lag4 192.0.2.130/28 2001:db8:2::2/64 + ip link set dev $h3 nomaster + ip link set dev $h2 nomaster + team_destroy lag4 +} + +router_create() +{ + team_create lag2 lacp + ip link set dev lag2 address $(mac_get $swp1) + ip link set dev $swp1 master lag2 + ip link set dev $swp4 master lag2 + + ip link add name br1 address $(mac_get lag2) \ + type bridge vlan_filtering 1 + ip link set dev lag2 master br1 + + ip link set dev $swp1 up + ip link set dev $swp4 up + ip link set dev br1 up + + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + + team_create lag3 lacp + ip link set dev lag3 address $(mac_get $swp2) + ip link set dev $swp2 master lag3 + ip link set dev $swp3 master lag3 + ip link set dev $swp2 up + ip link set dev $swp3 up + __addr_add_del lag3 add 192.0.2.129/28 2001:db8:2::1/64 +} + +router_destroy() +{ + __addr_add_del lag3 del 192.0.2.129/28 2001:db8:2::1/64 + ip link set dev $swp3 down + ip link set dev $swp2 down + ip link set dev $swp3 nomaster + ip link set dev $swp2 nomaster + team_destroy lag3 + + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + + ip link set dev $swp4 down + ip link set dev $swp1 down + ip link set dev br1 down + + ip link set dev lag2 nomaster + ip link del dev br1 + + ip link set dev $swp4 nomaster + ip link set dev $swp1 nomaster + team_destroy lag2 +} + +config_remaster_lag2() +{ + log_info "Remaster bridge slave" + + ip link set dev lag2 nomaster + sleep 2 + ip link set dev lag2 master br1 +} + +config_remaster_lag3() +{ + log_info "Move lag3 to the bridge, then out again" + + ip link set dev lag3 master br1 + sleep 2 + ip link set dev lag3 nomaster +} + +config_deslave() +{ + local netdev=$1; shift + + log_info "Deslave $netdev" + ip link set dev $netdev down + ip link set dev $netdev nomaster + ip link set dev $netdev up +} + +config_deslave_swp1() +{ + config_deslave $swp1 +} + +config_deslave_swp2() +{ + config_deslave $swp2 +} + +config_deslave_swp3() +{ + config_deslave $swp3 +} + +config_deslave_swp4() +{ + config_deslave $swp4 +} + +config_enslave() +{ + local netdev=$1; shift + local master=$1; shift + + log_info "Enslave $netdev to $master" + ip link set dev $netdev down + ip link set dev $netdev master $master + ip link set dev $netdev up +} + +config_enslave_swp1() +{ + config_enslave $swp1 lag2 +} + +config_enslave_swp2() +{ + config_enslave $swp2 lag3 +} + +config_enslave_swp3() +{ + config_enslave $swp3 lag3 +} + +config_enslave_swp4() +{ + config_enslave $swp4 lag2 +} + +config_wait() +{ + setup_wait_dev lag2 + setup_wait_dev lag3 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h4=${NETIFS[p7]} + swp4=${NETIFS[p8]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test lag1 192.0.2.130 +} + +ping_ipv6() +{ + ping6_test lag1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh new file mode 100755 index 000000000000..76e4941fef73 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh @@ -0,0 +1,155 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +----------------------------+ +# | H1 (vrf) | +# | + $h1.10 | +----------------------+ +# | | 192.0.2.1/28 | | H2 (vrf) | +# | | 2001:db8:1::1/64 | | + $h2 | +# | | | | | 192.0.2.130/28 | +# | + $h1 | | | 2001:db8:2::2/64 | +# +---|------------------------+ +--|-------------------+ +# | | +# +---|--------------------------------------------------|-------------------+ +# | | router (main VRF) | | +# | +-|----------------------------------+ + $swp2 | +# | | + $swp1 BR1 (802.1q, pvid=10) | 192.0.2.129/28 | +# | | 192.0.2.2/28 | 2001:db8:2::1/64 | +# | | 2001:db8:1::2/64 | | +# | +------------------------------------+ | +# +--------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + shuffle_pvid + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.128/28 vrf v$h1 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 +} + +router_create() +{ + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br1 up + __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 up + __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 + + bridge vlan add dev br1 vid 10 pvid untagged self + bridge vlan add dev $swp1 vid 10 +} + +router_destroy() +{ + bridge vlan del dev $swp1 vid 10 + bridge vlan del dev br1 vid 10 self + + __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 + ip link set dev $swp2 down + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +shuffle_pvid() +{ + log_info "Add and remove VLAN upper for PVID VLAN" + + # Adding and removing a VLAN upper for the PVID VLAN shouldn't change + # anything. The address is arbitrary, just to make sure it will be an L3 + # netdevice. + vlan_create br1 10 "" 192.0.2.33/28 + sleep 1 + vlan_destroy br1 10 +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.130 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh index fa6a88c50750..b76a4a707a5b 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh @@ -1,10 +1,43 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +------------------------------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.555 + $h1.777 | | + $h2 | +# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 | +# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 | +# | | .-----------------' | | | 2001:db8:2::2/64 | +# | |/ | | | 2001:db8:4::2/64 | +# | + $h1 | | | | +# +----|-------------------------------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|-------------------------------+ + $swp2 | +# | | + $swp1 | 192.0.2.129/28 | +# | | vid 555 777 | 192.0.2.145/28 | +# | | | 2001:db8:2::1/64 | +# | | + BR1 (802.1q) | 2001:db8:4::1/64 | +# | | vid 555 pvid untagged | | +# | | 192.0.2.2/28 | | +# | | 192.0.2.18/28 | | +# | | 2001:db8:1::2/64 | | +# | | 2001:db8:3::2/64 | | +# | +----------------------------------+ | +# +---------------------------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 vlan + config_777 + ping_ipv4_fails + ping_ipv6_fails + ping_ipv4_777 + ping_ipv6_777 + config_555 + ping_ipv4 + ping_ipv6 " NUM_NETIFS=4 source lib.sh @@ -12,36 +45,52 @@ source lib.sh h1_create() { simple_if_init $h1 + vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64 ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 + + vlan_create $h1 777 v$h1 192.0.2.17/28 2001:db8:3::1/64 + ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18 + ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2 } h1_destroy() { + ip -6 route del 2001:db8:4::/64 vrf v$h1 + ip -4 route del 192.0.2.144/28 vrf v$h1 + vlan_destroy $h1 777 + ip -6 route del 2001:db8:2::/64 vrf v$h1 ip -4 route del 192.0.2.128/28 vrf v$h1 vlan_destroy $h1 555 + simple_if_fini $h1 } h2_create() { - simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \ + 192.0.2.146/28 2001:db8:4::2/64 ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145 ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 + ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1 } h2_destroy() { + ip -6 route del 2001:db8:3::/64 vrf v$h2 ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.16/28 vrf v$h2 ip -4 route del 192.0.2.0/28 vrf v$h2 - simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 + simple_if_fini $h2 192.0.2.146/28 2001:db8:4::2/64 \ + 192.0.2.130/28 2001:db8:2::2/64 } router_create() { - ip link add name br1 type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 ip link set dev br1 up ip link set dev $swp1 master br1 @@ -49,18 +98,23 @@ router_create() bridge vlan add dev br1 vid 555 self pvid untagged bridge vlan add dev $swp1 vid 555 + bridge vlan add dev $swp1 vid 777 __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64 + __addr_add_del br1 add 192.0.2.18/28 2001:db8:3::2/64 ip link set dev $swp2 up __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 + __addr_add_del $swp2 add 192.0.2.145/28 2001:db8:4::1/64 } router_destroy() { + __addr_add_del $swp2 del 192.0.2.145/28 2001:db8:4::1/64 __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 ip link set dev $swp2 down + __addr_add_del br1 del 192.0.2.18/28 2001:db8:3::2/64 __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64 ip link set dev $swp1 down ip link set dev $swp1 nomaster @@ -86,6 +140,24 @@ setup_prepare() forwarding_enable } +config_555() +{ + log_info "Configure VLAN 555 as PVID" + + bridge vlan add dev br1 vid 555 self pvid untagged + bridge vlan del dev br1 vid 777 self + sleep 2 +} + +config_777() +{ + log_info "Configure VLAN 777 as PVID" + + bridge vlan add dev br1 vid 777 self pvid untagged + bridge vlan del dev br1 vid 555 self + sleep 2 +} + cleanup() { pre_cleanup @@ -114,12 +186,32 @@ vlan() ping_ipv4() { - ping_test $h1 192.0.2.130 + ping_test $h1.555 192.0.2.130 } ping_ipv6() { - ping6_test $h1 2001:db8:2::2 + ping6_test $h1.555 2001:db8:2::2 +} + +ping_ipv4_fails() +{ + ping_test_fails $h1.555 192.0.2.130 ": via 555" +} + +ping_ipv6_fails() +{ + ping6_test_fails $h1.555 2001:db8:2::2 ": via 555" +} + +ping_ipv4_777() +{ + ping_test $h1.777 192.0.2.146 ": via 777" +} + +ping_ipv6_777() +{ + ping6_test $h1.777 2001:db8:4::2 ": via 777" } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh new file mode 100755 index 000000000000..215309ea1c8c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh @@ -0,0 +1,169 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +------------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1.555 | | + $h2.777 | +# | | 192.0.2.1/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 | +# | | | | | | +# | + $h1 | | + $h2 | +# +----|-------------------+ +--|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1q) + $swp2 | | +# | | | | +# | +------+------------------------------------------+---------------------+ | +# | | | | +# | + br1.555 + br1.777 | +# | 192.0.2.2/28 192.0.2.17/28 | +# | 2001:db8:1::2/64 2001:db8:2::1/64 | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + respin_config + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.16/28 vrf v$h1 + vlan_destroy $h1 555 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + vlan_create $h2 777 v$h2 192.0.2.18/28 2001:db8:2::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + vlan_destroy $h2 777 + simple_if_fini $h2 +} + +router_create() +{ + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp2 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 up + + bridge vlan add dev br1 vid 555 self + bridge vlan add dev br1 vid 777 self + bridge vlan add dev $swp1 vid 555 + bridge vlan add dev $swp2 vid 777 + + vlan_create br1 555 "" 192.0.2.2/28 2001:db8:1::2/64 + vlan_create br1 777 "" 192.0.2.17/28 2001:db8:2::1/64 +} + +router_destroy() +{ + vlan_destroy br1 777 + vlan_destroy br1 555 + + bridge vlan del dev $swp2 vid 777 + bridge vlan del dev $swp1 vid 555 + bridge vlan del dev br1 vid 777 self + bridge vlan del dev br1 vid 555 self + + ip link set dev $swp2 down nomaster + ip link set dev $swp1 down nomaster + + ip link set dev br1 down + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.18 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +respin_config() +{ + log_info "Remaster bridge slave" + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + + sleep 2 + + ip link set dev $swp1 master br1 + ip link set dev $swp2 master br1 + + bridge vlan add dev $swp1 vid 555 + bridge vlan add dev $swp2 vid 777 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh new file mode 100755 index 000000000000..138558452402 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +----------------------------+ +# | H1 (vrf) | +# | + $h1.10 | +----------------------+ +# | | 192.0.2.1/28 | | H2 (vrf) | +# | | 2001:db8:1::1/64 | | + $h2 | +# | | | | | 192.0.2.130/28 | +# | + $h1 | | | 2001:db8:2::2/64 | +# +---|------------------------+ +--|-------------------+ +# | | +# +---|--------------------------------------------------|-------------------+ +# | | router (main VRF) | | +# | +-|--------------------------+ + $swp2 | +# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 | +# | +-----+----------------------+ 2001:db8:2::1/64 | +# | | | +# | + br1.10 | +# | 192.0.2.2/28 | +# | 2001:db8:1::2/64 | +# +--------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + ping_ipv6 + pvid_set_unset + ping_ipv4 + ping_ipv6 + pvid_set_move + ping_ipv4 + ping_ipv6 +" +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 + vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64 + ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2 + ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 +} + +h1_destroy() +{ + ip -6 route del 2001:db8:2::/64 vrf v$h1 + ip -4 route del 192.0.2.128/28 vrf v$h1 + vlan_destroy $h1 10 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 + ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129 + ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1 +} + +h2_destroy() +{ + ip -6 route del 2001:db8:1::/64 vrf v$h2 + ip -4 route del 192.0.2.0/28 vrf v$h2 + simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 +} + +router_create() +{ + ip link add name br1 address $(mac_get $swp1) \ + type bridge vlan_filtering 1 vlan_default_pvid 0 + ip link set dev br1 up + + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + + ip link set dev $swp2 up + __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 + + bridge vlan add dev br1 vid 10 self + bridge vlan add dev $swp1 vid 10 + vlan_create br1 10 "" 192.0.2.2/28 2001:db8:1::2/64 +} + +router_destroy() +{ + vlan_destroy br1 10 + bridge vlan del dev $swp1 vid 10 + bridge vlan del dev br1 vid 10 self + + __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 + ip link set dev $swp2 down + + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + router_create + + forwarding_enable +} + +pvid_set_unset() +{ + log_info "Set and unset PVID on VLAN 10" + + bridge vlan add dev br1 vid 10 pvid self + sleep 1 + bridge vlan add dev br1 vid 10 self +} + +pvid_set_move() +{ + log_info "Set PVID on VLAN 10, then move it to VLAN 20" + + bridge vlan add dev br1 vid 10 pvid self + sleep 1 + bridge vlan add dev br1 vid 20 pvid self +} + +shuffle_vlan() +{ + log_info "" +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + router_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.130 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh index bde11dc27873..3dd5fcbd3eaa 100755 --- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -54,7 +54,9 @@ h2_destroy() switch_create() { - ip link add name br1 up type bridge vlan_filtering 1 + ip link add name br1 type bridge vlan_filtering 1 + ip link set dev br1 addrgenmode none + ip link set dev br1 up ip link set dev $swp1 master br1 ip link set dev $swp1 up ip link set dev $swp2 master br1 diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index a96cff8e7219..b0f5e55d2d0b 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -9,6 +9,8 @@ NUM_NETIFS=4 source tc_common.sh source lib.sh +require_command ncat + tcflags="skip_hw" h1_create() @@ -220,9 +222,9 @@ mirred_egress_to_ingress_tcp_test() ip_proto icmp \ action drop - ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & + ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 & local rpid=$! - ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 + ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1 wait -n $rpid cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2 check_err $? "server output check failed" diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index 683711f41aa9..b1daad19b01e 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -52,8 +52,8 @@ match_dst_mac_test() tc_check_packets "dev $h2 ingress" 101 1 check_fail $? "Matched on a wrong filter" - tc_check_packets "dev $h2 ingress" 102 1 - check_err $? "Did not match on correct filter" + tc_check_packets "dev $h2 ingress" 102 0 + check_fail $? "Did not match on correct filter" tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower @@ -78,8 +78,8 @@ match_src_mac_test() tc_check_packets "dev $h2 ingress" 101 1 check_fail $? "Matched on a wrong filter" - tc_check_packets "dev $h2 ingress" 102 1 - check_err $? "Did not match on correct filter" + tc_check_packets "dev $h2 ingress" 102 0 + check_fail $? "Did not match on correct filter" tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower diff --git a/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh new file mode 100755 index 000000000000..3ca20df952eb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh @@ -0,0 +1,206 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="match_cfm_opcode match_cfm_level match_cfm_level_and_opcode" +NUM_NETIFS=2 +source tc_common.sh +source lib.sh + +h1_create() +{ + simple_if_init $h1 +} + +h1_destroy() +{ + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 +} + +u8_to_hex() +{ + local u8=$1; shift + + printf "%02x" $u8 +} + +generate_cfm_hdr() +{ + local mdl=$1; shift + local op=$1; shift + local flags=$1; shift + local tlv_offset=$1; shift + + local cfm_hdr=$(: + )"$(u8_to_hex $((mdl << 5))):"$( : MD level and Version + )"$(u8_to_hex $op):"$( : OpCode + )"$(u8_to_hex $flags):"$( : Flags + )"$(u8_to_hex $tlv_offset)"$( : TLV offset + ) + + echo $cfm_hdr +} + +match_cfm_opcode() +{ + local ethtype="89 02"; readonly ethtype + RET=0 + + tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \ + flower cfm op 47 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \ + flower cfm op 43 action drop + + pkt="$ethtype $(generate_cfm_hdr 7 47 0 32)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 6 5 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct opcode" + + tc_check_packets "dev $h2 ingress" 102 0 + check_err $? "Matched on the wrong opcode" + + pkt="$ethtype $(generate_cfm_hdr 0 43 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Matched on the wrong opcode" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct opcode" + + tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower + + log_test "CFM opcode match test" +} + +match_cfm_level() +{ + local ethtype="89 02"; readonly ethtype + RET=0 + + tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \ + flower cfm mdl 5 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \ + flower cfm mdl 3 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 103 \ + flower cfm mdl 0 action drop + + pkt="$ethtype $(generate_cfm_hdr 5 42 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 6 1 0 70)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 0 1 0 70)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct level" + + tc_check_packets "dev $h2 ingress" 102 0 + check_err $? "Matched on the wrong level" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Did not match on correct level" + + pkt="$ethtype $(generate_cfm_hdr 3 0 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Matched on the wrong level" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct level" + + tc_check_packets "dev $h2 ingress" 103 1 + check_err $? "Matched on the wrong level" + + tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 103 flower + + log_test "CFM level match test" +} + +match_cfm_level_and_opcode() +{ + local ethtype="89 02"; readonly ethtype + RET=0 + + tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \ + flower cfm mdl 5 op 41 action drop + tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \ + flower cfm mdl 7 op 42 action drop + + pkt="$ethtype $(generate_cfm_hdr 5 41 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 7 3 0 4)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + pkt="$ethtype $(generate_cfm_hdr 3 42 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Did not match on correct level and opcode" + + tc_check_packets "dev $h2 ingress" 102 0 + check_err $? "Matched on the wrong level and opcode" + + pkt="$ethtype $(generate_cfm_hdr 7 42 0 12)" + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q + + tc_check_packets "dev $h2 ingress" 101 1 + check_err $? "Matched on the wrong level and opcode" + + tc_check_packets "dev $h2 ingress" 102 1 + check_err $? "Did not match on correct level and opcode" + + tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower + tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower + + log_test "CFM opcode and level match test" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + h2=${NETIFS[p2]} + h1mac=$(mac_get $h1) + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create +} + +cleanup() +{ + pre_cleanup + + h2_destroy + h1_destroy + + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh new file mode 100755 index 000000000000..20a7cb7222b8 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh @@ -0,0 +1,353 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | | +# +----|------------------+ +------------------|---+ +# | | +# +----|-------------------------------------------------------------------|---+ +# | SW | | | +# | +-|-------------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +-----------------------------------------------------------------------+ | +# +----------------------------------------------------------------------------+ + +ALL_TESTS=" + test_l2_miss_unicast + test_l2_miss_multicast + test_l2_miss_ll_multicast + test_l2_miss_broadcast +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +test_l2_miss_unicast() +{ + local dmac=00:01:02:03:04:05 + local dip=192.0.2.2 + local sip=192.0.2.1 + + RET=0 + + # Unknown unicast. + tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \ + flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + # Known unicast. + tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \ + flower indev $swp1 l2_miss 0 dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + + # Before adding FDB entry. + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unknown unicast filter was not hit before adding FDB entry" + + tc_check_packets "dev $swp2 egress" 102 0 + check_err $? "Known unicast filter was hit before adding FDB entry" + + # Adding FDB entry. + bridge fdb replace $dmac dev $swp2 master static + + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unknown unicast filter was hit after adding FDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Known unicast filter was not hit after adding FDB entry" + + # Deleting FDB entry. + bridge fdb del $dmac dev $swp2 master static + + $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Unknown unicast filter was not hit after deleting FDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Known unicast filter was hit after deleting FDB entry" + + tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower + + log_test "L2 miss - Unicast" +} + +test_l2_miss_multicast_common() +{ + local proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local mode=$1; shift + local name=$1; shift + + RET=0 + + # Unregistered multicast. + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower indev $swp1 l2_miss 1 src_ip $sip dst_ip $dip \ + action pass + # Registered multicast. + tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \ + flower indev $swp1 l2_miss 0 src_ip $sip dst_ip $dip \ + action pass + + # Before adding MDB entry. + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unregistered multicast filter was not hit before adding MDB entry" + + tc_check_packets "dev $swp2 egress" 102 0 + check_err $? "Registered multicast filter was hit before adding MDB entry" + + # Adding MDB entry. + bridge mdb replace dev br1 port $swp2 grp $dip permanent + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Unregistered multicast filter was hit after adding MDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Registered multicast filter was not hit after adding MDB entry" + + # Deleting MDB entry. + bridge mdb del dev br1 port $swp2 grp $dip + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Unregistered multicast filter was not hit after deleting MDB entry" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "Registered multicast filter was hit after deleting MDB entry" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + + log_test "L2 miss - Multicast ($name)" +} + +test_l2_miss_multicast_ipv4() +{ + local proto="ipv4" + local sip=192.0.2.1 + local dip=239.1.1.1 + local dmac=01:00:5e:01:01:01 + local mode="-4" + local name="IPv4" + + test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name +} + +test_l2_miss_multicast_ipv6() +{ + local proto="ipv6" + local sip=2001:db8:1::1 + local dip=ff0e::1 + local dmac=33:33:00:00:00:01 + local mode="-6" + local name="IPv6" + + test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name +} + +test_l2_miss_multicast() +{ + # Configure $swp2 as a multicast router port so that it will forward + # both registered and unregistered multicast traffic. + bridge link set dev $swp2 mcast_router 2 + + # Forwarding according to MDB entries only takes place when the bridge + # detects that there is a valid querier in the network. Set the bridge + # as the querier and assign it a valid IPv6 link-local address to be + # used as the source address for MLD queries. + ip link set dev br1 type bridge mcast_querier 1 + ip -6 address add fe80::1/64 nodad dev br1 + # Wait the default Query Response Interval (10 seconds) for the bridge + # to determine that there are no other queriers in the network. + sleep 10 + + test_l2_miss_multicast_ipv4 + test_l2_miss_multicast_ipv6 + + ip -6 address del fe80::1/64 dev br1 + ip link set dev br1 type bridge mcast_querier 0 + bridge link set dev $swp2 mcast_router 1 +} + +test_l2_miss_multicast_common2() +{ + local name=$1; shift + local dmac=$1; shift + local dip=224.0.0.1 + local sip=192.0.2.1 + +} + +test_l2_miss_ll_multicast_common() +{ + local proto=$1; shift + local dmac=$1; shift + local sip=$1; shift + local dip=$1; shift + local mode=$1; shift + local name=$1; shift + + RET=0 + + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \ + dst_ip $dip action pass + + $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Filter was not hit" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + + log_test "L2 miss - Link-local multicast ($name)" +} + +test_l2_miss_ll_multicast_ipv4() +{ + local proto=ipv4 + local dmac=01:00:5e:00:00:01 + local sip=192.0.2.1 + local dip=224.0.0.1 + local mode="-4" + local name="IPv4" + + test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name +} + +test_l2_miss_ll_multicast_ipv6() +{ + local proto=ipv6 + local dmac=33:33:00:00:00:01 + local sip=2001:db8:1::1 + local dip=ff02::1 + local mode="-6" + local name="IPv6" + + test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name +} + +test_l2_miss_ll_multicast() +{ + test_l2_miss_ll_multicast_ipv4 + test_l2_miss_ll_multicast_ipv6 +} + +test_l2_miss_broadcast() +{ + local dmac=ff:ff:ff:ff:ff:ff + local smac=00:01:02:03:04:05 + + RET=0 + + tc filter add dev $swp2 egress protocol all handle 101 pref 1 \ + flower l2_miss 1 dst_mac $dmac src_mac $smac \ + action pass + tc filter add dev $swp2 egress protocol all handle 102 pref 1 \ + flower l2_miss 0 dst_mac $dmac src_mac $smac \ + action pass + + $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q + + tc_check_packets "dev $swp2 egress" 101 0 + check_err $? "L2 miss filter was hit when should not" + + tc_check_packets "dev $swp2 egress" 102 1 + check_err $? "L2 no miss filter was not hit when should" + + tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower + tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower + + log_test "L2 miss - Broadcast" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh new file mode 100755 index 000000000000..3885a2a91f7d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -0,0 +1,228 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +----------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | | +# +----|------------------+ +------------------|---+ +# | | +# +----|-------------------------------------------------------------------|---+ +# | SW | | | +# | +-|-------------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +-----------------------------------------------------------------------+ | +# +----------------------------------------------------------------------------+ + +ALL_TESTS=" + test_port_range_ipv4_udp + test_port_range_ipv4_tcp + test_port_range_ipv6_udp + test_port_range_ipv6_tcp +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 type bridge + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + ip link set dev br1 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev br1 down + ip link set dev $swp2 down + ip link set dev $swp2 nomaster + ip link set dev $swp1 down + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +__test_port_range() +{ + local proto=$1; shift + local ip_proto=$1; shift + local sip=$1; shift + local dip=$1; shift + local mode=$1; shift + local name=$1; shift + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=100 + local sport_max=200 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport_min=300 + local dport_max=400 + local dport_mid=$((dport_min + (dport_max - dport_min) / 2)) + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport_min-$dport_max \ + action pass + tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport_min-$dport_max \ + action drop + + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport_min" + tc_check_packets "dev $swp1 ingress" 101 1 + check_err $? "Ingress filter not hit with minimum ports" + tc_check_packets "dev $swp2 egress" 101 1 + check_err $? "Egress filter not hit with minimum ports" + + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport_mid" + tc_check_packets "dev $swp1 ingress" 101 2 + check_err $? "Ingress filter not hit with middle ports" + tc_check_packets "dev $swp2 egress" 101 2 + check_err $? "Egress filter not hit with middle ports" + + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport_max" + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Ingress filter not hit with maximum ports" + tc_check_packets "dev $swp2 egress" 101 3 + check_err $? "Egress filter not hit with maximum ports" + + # Send traffic when both ports are out of range and when only one port + # is out of range. + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport_min" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport_min" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$((dport_min - 1))" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$((dport_max + 1))" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$((dport_max + 1))" + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Ingress filter was hit when should not" + tc_check_packets "dev $swp2 egress" 101 3 + check_err $? "Egress filter was hit when should not" + + tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + +test_port_range_ipv4_udp() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +test_port_range_ipv4_tcp() +{ + local proto=ipv4 + local ip_proto=tcp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 TCP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +test_port_range_ipv6_udp() +{ + local proto=ipv6 + local ip_proto=udp + local sip=2001:db8:1::1 + local dip=2001:db8:1::2 + local mode="-6" + local name="IPv6 UDP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +test_port_range_ipv6_tcp() +{ + local proto=ipv6 + local ip_proto=tcp + local sip=2001:db8:1::1 + local dip=2001:db8:1::2 + local mode="-6" + local name="IPv6 TCP" + + __test_port_range $proto $ip_proto $sip $dip $mode "$name" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh index 5ac184d51809..5a5dd9034819 100755 --- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh +++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh @@ -104,11 +104,14 @@ tunnel_key_nofrag_test() local i tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \ - flower ip_flags nofrag action drop + flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \ + ip_flags nofrag action drop tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \ - flower ip_flags firstfrag action drop + flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \ + ip_flags firstfrag action drop tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \ - flower ip_flags nofirstfrag action drop + flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \ + ip_flags nofirstfrag action drop # test 'nofrag' set tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \ diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index df9143538708..1c6457e54625 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -41,61 +41,6 @@ cleanup() done } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -trap cleanup EXIT - -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done - -echo "INFO: preparing interfaces." -# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. -# -# ns1eth1 ----- ns2eth1 -# hsr1 hsr2 -# ns1eth2 ns2eth2 -# | | -# ns3eth1 ns3eth2 -# \ / -# hsr3 -# -# Interfaces -ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" -ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" -ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" - -# HSRv0. -ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version 0 proto 0 -ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version 0 proto 0 -ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version 0 proto 0 - -# IP for HSR -ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 -ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad -ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 -ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad -ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 -ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad - -# All Links up -ip -net "$ns1" link set ns1eth1 up -ip -net "$ns1" link set ns1eth2 up -ip -net "$ns1" link set hsr1 up - -ip -net "$ns2" link set ns2eth1 up -ip -net "$ns2" link set ns2eth2 up -ip -net "$ns2" link set hsr2 up - -ip -net "$ns3" link set ns3eth1 up -ip -net "$ns3" link set ns3eth2 up -ip -net "$ns3" link set hsr3 up - # $1: IP address is_v6() { @@ -164,93 +109,168 @@ stop_if_error() fi } - -echo "INFO: Initial validation ping." -# Each node has to be able each one. -do_ping "$ns1" 100.64.0.2 -do_ping "$ns2" 100.64.0.1 -do_ping "$ns3" 100.64.0.1 -stop_if_error "Initial validation failed." - -do_ping "$ns1" 100.64.0.3 -do_ping "$ns2" 100.64.0.3 -do_ping "$ns3" 100.64.0.2 - -do_ping "$ns1" dead:beef:1::2 -do_ping "$ns1" dead:beef:1::3 -do_ping "$ns2" dead:beef:1::1 -do_ping "$ns2" dead:beef:1::2 -do_ping "$ns3" dead:beef:1::1 -do_ping "$ns3" dead:beef:1::2 - -stop_if_error "Initial validation failed." +do_complete_ping_test() +{ + echo "INFO: Initial validation ping." + # Each node has to be able each one. + do_ping "$ns1" 100.64.0.2 + do_ping "$ns2" 100.64.0.1 + do_ping "$ns3" 100.64.0.1 + stop_if_error "Initial validation failed." + + do_ping "$ns1" 100.64.0.3 + do_ping "$ns2" 100.64.0.3 + do_ping "$ns3" 100.64.0.2 + + do_ping "$ns1" dead:beef:1::2 + do_ping "$ns1" dead:beef:1::3 + do_ping "$ns2" dead:beef:1::1 + do_ping "$ns2" dead:beef:1::2 + do_ping "$ns3" dead:beef:1::1 + do_ping "$ns3" dead:beef:1::2 + + stop_if_error "Initial validation failed." # Wait until supervisor all supervision frames have been processed and the node # entries have been merged. Otherwise duplicate frames will be observed which is # valid at this stage. -WAIT=5 -while [ ${WAIT} -gt 0 ] -do - grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table - if [ $? -ne 0 ] - then - break - fi - sleep 1 - let WAIT = WAIT - 1 -done + WAIT=5 + while [ ${WAIT} -gt 0 ] + do + grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table + if [ $? -ne 0 ] + then + break + fi + sleep 1 + let "WAIT = WAIT - 1" + done # Just a safety delay in case the above check didn't handle it. -sleep 1 + sleep 1 + + echo "INFO: Longer ping test." + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" dead:beef:1::2 + do_ping_long "$ns1" 100.64.0.3 + do_ping_long "$ns1" dead:beef:1::3 -echo "INFO: Longer ping test." -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" dead:beef:1::2 -do_ping_long "$ns1" 100.64.0.3 -do_ping_long "$ns1" dead:beef:1::3 + stop_if_error "Longer ping test failed." -stop_if_error "Longer ping test failed." + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" dead:beef:1::1 + do_ping_long "$ns2" 100.64.0.3 + do_ping_long "$ns2" dead:beef:1::2 + stop_if_error "Longer ping test failed." -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" dead:beef:1::1 -do_ping_long "$ns2" 100.64.0.3 -do_ping_long "$ns2" dead:beef:1::2 -stop_if_error "Longer ping test failed." + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" dead:beef:1::1 + do_ping_long "$ns3" 100.64.0.2 + do_ping_long "$ns3" dead:beef:1::2 + stop_if_error "Longer ping test failed." -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" dead:beef:1::1 -do_ping_long "$ns3" 100.64.0.2 -do_ping_long "$ns3" dead:beef:1::2 -stop_if_error "Longer ping test failed." + echo "INFO: Cutting one link." + do_ping_long "$ns1" 100.64.0.3 & -echo "INFO: Cutting one link." -do_ping_long "$ns1" 100.64.0.3 & + sleep 3 + ip -net "$ns3" link set ns3eth1 down + wait -sleep 3 -ip -net "$ns3" link set ns3eth1 down -wait + ip -net "$ns3" link set ns3eth1 up -ip -net "$ns3" link set ns3eth1 up + stop_if_error "Failed with one link down." -stop_if_error "Failed with one link down." + echo "INFO: Delay the link and drop a few packages." + tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms + tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% -echo "INFO: Delay the link and drop a few packages." -tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms -tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25% + do_ping_long "$ns1" 100.64.0.2 + do_ping_long "$ns1" 100.64.0.3 -do_ping_long "$ns1" 100.64.0.2 -do_ping_long "$ns1" 100.64.0.3 + stop_if_error "Failed with delay and packetloss." -stop_if_error "Failed with delay and packetloss." + do_ping_long "$ns2" 100.64.0.1 + do_ping_long "$ns2" 100.64.0.3 -do_ping_long "$ns2" 100.64.0.1 -do_ping_long "$ns2" 100.64.0.3 + stop_if_error "Failed with delay and packetloss." -stop_if_error "Failed with delay and packetloss." + do_ping_long "$ns3" 100.64.0.1 + do_ping_long "$ns3" 100.64.0.2 + stop_if_error "Failed with delay and packetloss." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv}." +# Three HSR nodes. Each node has one link to each of its neighbour, two links in total. +# +# ns1eth1 ----- ns2eth1 +# hsr1 hsr2 +# ns1eth2 ns2eth2 +# | | +# ns3eth1 ns3eth2 +# \ / +# hsr3 +# + # Interfaces + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3" + ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2" + + # HSRv0/1 + ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0 + ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0 + + # IP for HSR + ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad + ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2 + ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad + ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3 + ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad + + # All Links up + ip -net "$ns1" link set ns1eth1 up + ip -net "$ns1" link set ns1eth2 up + ip -net "$ns1" link set hsr1 up + + ip -net "$ns2" link set ns2eth1 up + ip -net "$ns2" link set ns2eth2 up + ip -net "$ns2" link set hsr2 up + + ip -net "$ns3" link set ns3eth1 up + ip -net "$ns3" link set ns3eth2 up + ip -net "$ns3" link set hsr3 up +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done + +setup_hsr_interfaces 0 +do_complete_ping_test +cleanup + +for i in "$ns1" "$ns2" "$ns3" ;do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up +done -do_ping_long "$ns3" 100.64.0.1 -do_ping_long "$ns3" 100.64.0.2 -stop_if_error "Failed with delay and packetloss." +setup_hsr_interfaces 1 +do_complete_ping_test -echo "INFO: All good." exit $ret diff --git a/tools/testing/selftests/net/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c index e1fdee841021..170728c96c46 100644 --- a/tools/testing/selftests/net/hwtstamp_config.c +++ b/tools/testing/selftests/net/hwtstamp_config.c @@ -16,6 +16,8 @@ #include <linux/net_tstamp.h> #include <linux/sockios.h> +#include "kselftest.h" + static int lookup_value(const char **names, int size, const char *name) { @@ -50,7 +52,7 @@ static const char *tx_types[] = { TX_TYPE(ONESTEP_SYNC) #undef TX_TYPE }; -#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) +#define N_TX_TYPES ((int)(ARRAY_SIZE(tx_types))) static const char *rx_filters[] = { #define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name @@ -71,7 +73,7 @@ static const char *rx_filters[] = { RX_FILTER(PTP_V2_DELAY_REQ), #undef RX_FILTER }; -#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) +#define N_RX_FILTERS ((int)(ARRAY_SIZE(rx_filters))) static void usage(void) { diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 6032f9b23c4c..e317c2e44dae 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -6,6 +6,7 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_SYN_COOKIES=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index fa9e09ad97d9..85a8ee9395b3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -65,12 +65,15 @@ __chk_nr() if [ $nr != $expected ]; then if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then echo "[ skip ] Feature probably not supported" + mptcp_lib_result_skip "${msg}" else echo "[ fail ] expected $expected found $nr" + mptcp_lib_result_fail "${msg}" ret=$test_cnt fi else echo "[ ok ]" + mptcp_lib_result_pass "${msg}" fi test_cnt=$((test_cnt+1)) } @@ -111,12 +114,15 @@ wait_msk_nr() printf "%-50s" "$msg" if [ $i -ge $timeout ]; then echo "[ fail ] timeout while expecting $expected max $max last $nr" + mptcp_lib_result_fail "${msg} # timeout" ret=$test_cnt elif [ $nr != $expected ]; then echo "[ fail ] expected $expected found $nr" + mptcp_lib_result_fail "${msg} # unexpected result" ret=$test_cnt else echo "[ ok ]" + mptcp_lib_result_pass "${msg}" fi test_cnt=$((test_cnt+1)) } @@ -276,4 +282,5 @@ flush_pids chk_msk_inuse 0 "....chk 0 msk in use after flush" +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 773dd770a567..b1fc8afd072d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -7,6 +7,7 @@ time_start=$(date +%s) optstring="S:R:d:e:l:r:h4cm:f:tC" ret=0 +final_ret=0 sin="" sout="" cin_disconnect="" @@ -128,6 +129,7 @@ ns3="ns3-$rndh" ns4="ns4-$rndh" TEST_COUNT=0 +TEST_GROUP="" cleanup() { @@ -285,6 +287,7 @@ check_mptcp_disabled() # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" + mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default" ret=1 return 1 fi @@ -297,11 +300,13 @@ check_mptcp_disabled() if [ ${err} -eq 0 ]; then echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" + mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl" ret=1 return 1 fi echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl" return 0 } @@ -317,14 +322,16 @@ do_ping() local connector_ns="$2" local connect_addr="$3" local ping_args="-q -c 1" + local rc=0 if is_v6 "${connect_addr}"; then $ipv6 || return 0 ping_args="${ping_args} -6" fi - ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null - if [ $? -ne 0 ] ; then + ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1 + + if [ $rc -ne 0 ] ; then echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 ret=1 @@ -403,7 +410,9 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto} + local result_msg + result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + printf "%s\t" "${result_msg}" if $capture; then local capuser @@ -478,6 +487,7 @@ do_transfer() local duration duration=$((stop-start)) + result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo "[ FAIL ] client exit code $retc, server $rets" 1>&2 @@ -490,6 +500,7 @@ do_transfer() echo cat "$capout" + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" return 1 fi @@ -549,6 +560,9 @@ do_transfer() if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then printf "[ OK ]" + mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + else + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi if [ $cookies -eq 2 ];then @@ -691,6 +705,8 @@ run_test_transparent() local lret=0 local r6flag="" + TEST_GROUP="${msg}" + # skip if we don't want v6 if ! $ipv6 && is_v6 "${connect_addr}"; then return 0 @@ -702,6 +718,7 @@ run_test_transparent() # checking for a specific kernel version. if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then echo "INFO: ${msg} not supported by the kernel: SKIP" + mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -718,6 +735,8 @@ table inet mangle { EOF if [ $? -ne 0 ]; then echo "SKIP: $msg, could not load nft ruleset" + mptcp_lib_fail_if_expected_feature "nft rules" + mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -733,6 +752,8 @@ EOF if [ $? -ne 0 ]; then ip netns exec "$listener_ns" nft flush ruleset echo "SKIP: $msg, ip $r6flag rule failed" + mptcp_lib_fail_if_expected_feature "ip rule" + mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -741,6 +762,8 @@ EOF ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 echo "SKIP: $msg, ip route add local $local_addr failed" + mptcp_lib_fail_if_expected_feature "ip route" + mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -770,6 +793,7 @@ run_tests_peekmode() { local peekmode="$1" + TEST_GROUP="peek mode: ${peekmode}" echo "INFO: with peek mode: ${peekmode}" run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" @@ -777,8 +801,11 @@ run_tests_peekmode() run_tests_mptfo() { + TEST_GROUP="MPTFO" + if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then echo "INFO: TFO not supported by the kernel: SKIP" + mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -802,14 +829,17 @@ run_tests_disconnect() local old_cin=$cin local old_sin=$sin + TEST_GROUP="full disconnect" + if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then echo "INFO: Full disconnect not supported: SKIP" + mptcp_lib_result_skip "${TEST_GROUP}" return fi cat $cin $cin $cin > "$cin".disconnect - # force do_transfer to cope with the multiple tranmissions + # force do_transfer to cope with the multiple transmissions sin="$cin.disconnect" cin="$cin.disconnect" cin_disconnect="$old_cin" @@ -834,14 +864,26 @@ display_time() echo "Time: ${time_run} seconds" } -stop_if_error() +log_if_error() { local msg="$1" if [ ${ret} -ne 0 ]; then echo "FAIL: ${msg}" 1>&2 + + final_ret=${ret} + ret=0 + + return ${final_ret} + fi +} + +stop_if_error() +{ + if ! log_if_error "${@}"; then display_time - exit ${ret} + mptcp_lib_result_print_all_tap + exit ${final_ret} fi } @@ -871,6 +913,8 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done +mptcp_lib_result_code "${ret}" "ping tests" + stop_if_error "Could not even run ping tests" [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms @@ -900,12 +944,15 @@ echo "on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder +TEST_GROUP="loopback v4" run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 stop_if_error "Could not even run loopback test" +TEST_GROUP="loopback v6" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 stop_if_error "Could not even run loopback v6 test" +TEST_GROUP="multihosts" for sender in $ns1 $ns2 $ns3 $ns4;do # ns1<->ns2 is not subject to reordering/tc delays. Use it to test # mptcp syncookie support. @@ -931,23 +978,25 @@ for sender in $ns1 $ns2 $ns3 $ns4;do run_tests "$ns4" $sender 10.0.3.1 run_tests "$ns4" $sender dead:beef:3::1 - stop_if_error "Tests with $sender as a sender have failed" + log_if_error "Tests with $sender as a sender have failed" done run_tests_peekmode "saveWithPeek" run_tests_peekmode "saveAfterPeek" -stop_if_error "Tests with peek mode have failed" +log_if_error "Tests with peek mode have failed" # MPTFO (MultiPath TCP Fatopen tests) run_tests_mptfo -stop_if_error "Tests with MPTFO have failed" +log_if_error "Tests with MPTFO have failed" # connect to ns4 ip address, ns2 should intercept/proxy run_test_transparent 10.0.3.1 "tproxy ipv4" run_test_transparent dead:beef:3::1 "tproxy ipv6" -stop_if_error "Tests with tproxy have failed" +log_if_error "Tests with tproxy have failed" run_tests_disconnect +log_if_error "Tests of the full disconnection have failed" display_time -exit $ret +mptcp_lib_result_print_all_tap +exit ${final_ret} diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 0ae8cafde439..ee1f89a872b3 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -39,6 +39,9 @@ evts_ns1="" evts_ns2="" evts_ns1_pid=0 evts_ns2_pid=0 +last_test_failed=0 +last_test_skipped=0 +last_test_ignored=1 declare -A all_tests declare -a only_tests_ids @@ -46,9 +49,17 @@ declare -a only_tests_names declare -A failed_tests TEST_COUNT=0 TEST_NAME="" -nr_blank=40 - -export FAILING_LINKS="" +nr_blank=6 + +# These var are used only in some tests, make sure they are not already set +unset FAILING_LINKS +unset test_linkfail +unset addr_nr_ns1 +unset addr_nr_ns2 +unset sflags +unset fastclose +unset fullmesh +unset speed # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" @@ -94,7 +105,6 @@ init_partial() check_invert=0 validate_checksum=$checksum - FAILING_LINKS="" # ns1 ns2 # ns1eth1 ns2eth1 @@ -156,9 +166,7 @@ check_tools() elif ! iptables -V &> /dev/null; then echo "SKIP: Could not run all tests without iptables tool" exit $ksft_skip - fi - - if ! ip6tables -V &> /dev/null; then + elif ! ip6tables -V &> /dev/null; then echo "SKIP: Could not run all tests without ip6tables tool" exit $ksft_skip fi @@ -179,8 +187,8 @@ init() { trap cleanup EXIT - make_file "$cin" "client" 1 - make_file "$sin" "server" 1 + make_file "$cin" "client" 1 >/dev/null + make_file "$sin" "server" 1 >/dev/null } cleanup() @@ -192,10 +200,37 @@ cleanup() cleanup_partial } -# $1: msg print_title() { - printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${1}" + printf "%03u %s\n" "${TEST_COUNT}" "${TEST_NAME}" +} + +print_check() +{ + printf "%-${nr_blank}s%-36s" " " "${*}" +} + +print_info() +{ + # It can be empty, no need to print anything then + [ -z "${1}" ] && return + + mptcp_lib_print_info " Info: ${*}" +} + +print_ok() +{ + mptcp_lib_print_ok "[ ok ]${1:+ ${*}}" +} + +print_fail() +{ + mptcp_lib_print_err "[fail]${1:+ ${*}}" +} + +print_skip() +{ + mptcp_lib_print_warn "[skip]${1:+ ${*}}" } # [ $1: fail msg ] @@ -205,8 +240,10 @@ mark_as_skipped() mptcp_lib_fail_if_expected_feature "${msg}" - print_title "[ skip ] ${msg}" - printf "\n" + print_check "${msg}" + print_skip + + last_test_skipped=1 } # $@: condition @@ -239,17 +276,37 @@ skip_test() return 0 } +append_prev_results() +{ + if [ ${last_test_failed} -eq 1 ]; then + mptcp_lib_result_fail "${TEST_NAME}" + elif [ ${last_test_skipped} -eq 1 ]; then + mptcp_lib_result_skip "${TEST_NAME}" + elif [ ${last_test_ignored} -ne 1 ]; then + mptcp_lib_result_pass "${TEST_NAME}" + fi + + last_test_failed=0 + last_test_skipped=0 + last_test_ignored=0 +} + # $1: test name reset() { + append_prev_results + TEST_NAME="${1}" TEST_COUNT=$((TEST_COUNT+1)) if skip_test; then + last_test_ignored=1 return 1 fi + print_title + if [ "${init}" != "1" ]; then init else @@ -430,10 +487,19 @@ reset_with_tcp_filter() fi } +# $1: err msg fail_test() { ret=1 - failed_tests[${TEST_COUNT}]="${TEST_NAME}" + + print_fail "${@}" + + # just in case a test is marked twice as failed + if [ ${last_test_failed} -eq 0 ]; then + failed_tests[${TEST_COUNT}]="${TEST_NAME}" + dump_stats + last_test_failed=1 + fi } get_failed_tests_ids() @@ -448,7 +514,7 @@ get_failed_tests_ids() print_file_err() { ls -l "$1" 1>&2 - echo "Trailing bytes are: " + echo -n "Trailing bytes are: " tail -c 27 "$1" } @@ -466,8 +532,7 @@ check_transfer() # when truncating we must check the size explicitly out_size=$(wc -c $out | awk '{print $1}') if [ $out_size -ne $bytes ]; then - echo "[ FAIL ] $what output file has wrong size ($out_size, $bytes)" - fail_test + fail_test "$what output file has wrong size ($out_size, $bytes)" return 1 fi @@ -482,14 +547,13 @@ check_transfer() cmp -l "$in" "$out" | while read -r i a b; do local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then - echo "[ FAIL ] $what does not match (in, out):" + fail_test "$what does not match (in, out):" print_file_err "$in" print_file_err "$out" - fail_test return 1 else - echo "$what has inverted byte at ${i}" + print_info "$what has inverted byte at ${i}" fi done @@ -503,8 +567,7 @@ do_ping() local connect_addr="$3" if ! ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 - fail_test + fail_test "$listener_ns -> $connect_addr connectivity" fi } @@ -584,6 +647,26 @@ wait_rm_addr() done } +rm_sf_count() +{ + get_counter "${1}" "MPTcpExtRmSubflow" +} + +# $1: ns, $2: old rm_sf counter in $ns +wait_rm_sf() +{ + local ns="${1}" + local old_cnt="${2}" + local cnt + + local i + for i in $(seq 10); do + cnt=$(rm_sf_count ${ns}) + [ "$cnt" = "${old_cnt}" ] || break + sleep 0.1 + done +} + wait_mpj() { local ns="${1}" @@ -678,6 +761,7 @@ pm_nl_del_endpoint() local addr=$3 if [ $ip_mptcp -eq 1 ]; then + [ $id -ne 0 ] && addr='' ip -n $ns mptcp endpoint delete id $id $addr else ip netns exec $ns ./pm_nl_ctl del $id $addr @@ -722,10 +806,9 @@ pm_nl_change_endpoint() pm_nl_check_endpoint() { local line expected_line - local need_title=$1 - local msg="$2" - local ns=$3 - local addr=$4 + local msg="$1" + local ns=$2 + local addr=$3 local _flags="" local flags local _port @@ -734,13 +817,9 @@ pm_nl_check_endpoint() local _id local id - if [ "${need_title}" = 1 ]; then - printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" - else - printf "%-${nr_blank}s %s" " " "${msg}" - fi + print_check "${msg}" - shift 4 + shift 3 while [ -n "$1" ]; do if [ $1 = "flags" ]; then _flags=$2 @@ -763,15 +842,16 @@ pm_nl_check_endpoint() done if [ -z "$id" ]; then - echo "[skip] bad test - missing endpoint id" + test_fail "bad test - missing endpoint id" return fi if [ $ip_mptcp -eq 1 ]; then + # get line and trim trailing whitespace line=$(ip -n $ns mptcp endpoint show $id) + line="${line% }" # the dump order is: address id flags port dev - expected_line="$addr" - [ -n "$addr" ] && expected_line="$expected_line $addr" + [ -n "$addr" ] && expected_line="$addr" expected_line="$expected_line $id" [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}" [ -n "$dev" ] && expected_line="$expected_line $dev" @@ -786,142 +866,28 @@ pm_nl_check_endpoint() [ -n "$_port" ] && expected_line="$expected_line $_port" fi if [ "$line" = "$expected_line" ]; then - echo "[ ok ]" + print_ok else - echo "[fail] expected '$expected_line' found '$line'" - fail_test + fail_test "expected '$expected_line' found '$line'" fi } -do_transfer() +pm_nl_set_endpoint() { local listener_ns="$1" local connector_ns="$2" - local cl_proto="$3" - local srv_proto="$4" - local connect_addr="$5" - local test_link_fail="$6" - local addr_nr_ns1="$7" - local addr_nr_ns2="$8" - local speed="$9" - local sflags="${10}" - - local port=$((10000 + TEST_COUNT - 1)) - local cappid - local userspace_pm=0 - - :> "$cout" - :> "$sout" - :> "$capout" - - if [ $capture -eq 1 ]; then - local capuser - if [ -z $SUDO_USER ] ; then - capuser="" - else - capuser="-Z $SUDO_USER" - fi - - capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") - - echo "Capturing traffic for test $TEST_COUNT into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - cappid=$! - - sleep 1 - fi - - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - - local extra_args - if [ $speed = "fast" ]; then - extra_args="-j" - elif [ $speed = "slow" ]; then - extra_args="-r 50" - elif [[ $speed = "speed_"* ]]; then - extra_args="-r ${speed:6}" - fi + local connect_addr="$3" - if [[ "${addr_nr_ns1}" = "userspace_"* ]]; then - userspace_pm=1 - addr_nr_ns1=${addr_nr_ns1:10} - fi + local addr_nr_ns1=${addr_nr_ns1:-0} + local addr_nr_ns2=${addr_nr_ns2:-0} + local sflags=${sflags:-""} + local fullmesh=${fullmesh:-""} local flags="subflow" - local extra_cl_args="" - local extra_srv_args="" - local trunc_size="" - if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then - if [ ${test_link_fail} -le 1 ]; then - echo "fastclose tests need test_link_fail argument" - fail_test - return 1 - fi - - # disconnect - trunc_size=${test_link_fail} - local side=${addr_nr_ns2:10} - - if [ ${side} = "client" ]; then - extra_cl_args="-f ${test_link_fail}" - extra_srv_args="-f -1" - elif [ ${side} = "server" ]; then - extra_srv_args="-f ${test_link_fail}" - extra_cl_args="-f -1" - else - echo "wrong/unknown fastclose spec ${side}" - fail_test - return 1 - fi - addr_nr_ns2=0 - elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then - userspace_pm=1 - addr_nr_ns2=${addr_nr_ns2:10} - elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then + if [ -n "${fullmesh}" ]; then flags="${flags},fullmesh" - addr_nr_ns2=${addr_nr_ns2:9} - fi - - extra_srv_args="$extra_args $extra_srv_args" - if [ "$test_link_fail" -gt 1 ];then - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sinfail" > "$sout" & - else - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sin" > "$sout" & - fi - local spid=$! - - wait_local_port_listen "${listener_ns}" "${port}" - - extra_cl_args="$extra_args $extra_cl_args" - if [ "$test_link_fail" -eq 0 ];then - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr < "$cin" > "$cout" & - elif [ "$test_link_fail" -eq 1 ] || [ "$test_link_fail" -eq 2 ];then - ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ - tee "$cinsent" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & - else - tee "$cinsent" < "$cinfail" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & + addr_nr_ns2=${fullmesh} fi - local cpid=$! # let the mptcp subflow be established in background before # do endpoint manipulation @@ -933,7 +899,6 @@ do_transfer() local counter=2 local add_nr_ns1=${addr_nr_ns1} local id=10 - local tk while [ $add_nr_ns1 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -941,24 +906,7 @@ do_transfer() else addr="10.0.$counter.1" fi - if [ $userspace_pm -eq 0 ]; then - pm_nl_add_endpoint $ns1 $addr flags signal - else - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec ${listener_ns} ./pm_nl_ctl ann $addr token $tk id $id - sleep 1 - sp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - da=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') - dp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec ${listener_ns} ./pm_nl_ctl rem token $tk id $id - ip netns exec ${listener_ns} ./pm_nl_ctl dsf lip "::ffff:$addr" \ - lport $sp rip $da rport $dp token $tk - fi - + pm_nl_add_endpoint $ns1 $addr flags signal counter=$((counter + 1)) add_nr_ns1=$((add_nr_ns1 - 1)) id=$((id + 1)) @@ -1003,7 +951,6 @@ do_transfer() local add_nr_ns2=${addr_nr_ns2} local counter=3 local id=20 - local tk da dp sp while [ $add_nr_ns2 -gt 0 ]; do local addr if is_v6 "${connect_addr}"; then @@ -1011,21 +958,7 @@ do_transfer() else addr="10.0.$counter.2" fi - if [ $userspace_pm -eq 0 ]; then - pm_nl_add_endpoint $ns2 $addr flags $flags - else - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - ip netns exec ${connector_ns} ./pm_nl_ctl csf lip $addr lid $id \ - rip $da rport $dp token $tk - sleep 1 - sp=$(grep "type:10" "$evts_ns2" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec ${connector_ns} ./pm_nl_ctl rem token $tk id $id - ip netns exec ${connector_ns} ./pm_nl_ctl dsf lip $addr lport $sp \ - rip $da rport $dp token $tk - fi + pm_nl_add_endpoint $ns2 $addr flags $flags counter=$((counter + 1)) add_nr_ns2=$((add_nr_ns2 - 1)) id=$((id + 1)) @@ -1094,6 +1027,121 @@ do_transfer() done done fi +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + + local port=$((10000 + TEST_COUNT - 1)) + local cappid + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} + + :> "$cout" + :> "$sout" + :> "$capout" + + if [ $capture -eq 1 ]; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") + + echo "Capturing traffic for test $TEST_COUNT into $capfile" + ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi + + NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ + nstat -n + NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ + nstat -n + + local extra_args + if [ $speed = "fast" ]; then + extra_args="-j" + elif [ $speed = "slow" ]; then + extra_args="-r 50" + elif [ $speed -gt 0 ]; then + extra_args="-r ${speed}" + fi + + local extra_cl_args="" + local extra_srv_args="" + local trunc_size="" + if [ -n "${fastclose}" ]; then + if [ ${test_linkfail} -le 1 ]; then + fail_test "fastclose tests need test_linkfail argument" + return 1 + fi + + # disconnect + trunc_size=${test_linkfail} + local side=${fastclose} + + if [ ${side} = "client" ]; then + extra_cl_args="-f ${test_linkfail}" + extra_srv_args="-f -1" + elif [ ${side} = "server" ]; then + extra_srv_args="-f ${test_linkfail}" + extra_cl_args="-f -1" + else + fail_test "wrong/unknown fastclose spec ${side}" + return 1 + fi + fi + + extra_srv_args="$extra_args $extra_srv_args" + if [ "$test_linkfail" -gt 1 ];then + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sinfail" > "$sout" & + else + timeout ${timeout_test} \ + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_srv_args "::" < "$sin" > "$sout" & + fi + local spid=$! + + wait_local_port_listen "${listener_ns}" "${port}" + + extra_cl_args="$extra_args $extra_cl_args" + if [ "$test_linkfail" -eq 0 ];then + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & + elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ + tee "$cinsent" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + else + tee "$cinsent" < "$cinfail" | \ + timeout ${timeout_test} \ + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & + fi + local cpid=$! + + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr wait $cpid local retc=$? @@ -1111,7 +1159,7 @@ do_transfer() nstat | grep Tcp > /tmp/${connector_ns}.out if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo " client exit code $retc, server $rets" 1>&2 + fail_test "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" cat /tmp/${listener_ns}.out @@ -1120,17 +1168,16 @@ do_transfer() cat /tmp/${connector_ns}.out cat "$capout" - fail_test return 1 fi - if [ "$test_link_fail" -gt 1 ];then + if [ "$test_linkfail" -gt 1 ];then check_transfer $sinfail $cout "file received by client" $trunc_size else check_transfer $sin $cout "file received by client" $trunc_size fi retc=$? - if [ "$test_link_fail" -eq 0 ];then + if [ "$test_linkfail" -eq 0 ];then check_transfer $cin $sout "file received by server" $trunc_size else check_transfer $cinsent $sout "file received by server" $trunc_size @@ -1155,7 +1202,7 @@ make_file() dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" - echo "Created $name (size $size KB) containing data sent by $who" + print_info "Test file (size $size KB) for $who" } run_tests() @@ -1163,13 +1210,9 @@ run_tests() local listener_ns="$1" local connector_ns="$2" local connect_addr="$3" - local test_linkfail="${4:-0}" - local addr_nr_ns1="${5:-0}" - local addr_nr_ns2="${6:-0}" - local speed="${7:-fast}" - local sflags="${8:-""}" local size + local test_linkfail=${test_linkfail:-0} # The values above 2 are reused to make test files # with the given sizes (KB) @@ -1211,8 +1254,7 @@ run_tests() make_file "$sinfail" "server" $size fi - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ - ${test_linkfail} ${addr_nr_ns1} ${addr_nr_ns2} ${speed} ${sflags} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} } dump_stats() @@ -1228,7 +1270,6 @@ chk_csum_nr() local csum_ns1=${1:-0} local csum_ns2=${2:-0} local count - local dump_stats local extra_msg="" local allow_multi_errors_ns1=0 local allow_multi_errors_ns2=0 @@ -1242,39 +1283,34 @@ chk_csum_nr() csum_ns2=${csum_ns2:1} fi - printf "%-${nr_blank}s %s" " " "sum" + print_check "sum" count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then extra_msg="$extra_msg ns1=$count" fi if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then - echo "[fail] got $count data checksum error[s] expected $csum_ns1" - fail_test - dump_stats=1 + fail_test "got $count data checksum error[s] expected $csum_ns1" else - echo -n "[ ok ]" + print_ok fi - echo -n " - csum " + print_check "csum" count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then extra_msg="$extra_msg ns2=$count" fi if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then - echo "[fail] got $count data checksum error[s] expected $csum_ns2" - fail_test - dump_stats=1 + fail_test "got $count data checksum error[s] expected $csum_ns2" else - echo -n "[ ok ]" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats - echo "$extra_msg" + print_info "$extra_msg" } chk_fail_nr() @@ -1283,7 +1319,6 @@ chk_fail_nr() local fail_rx=$2 local ns_invert=${3:-""} local count - local dump_stats local ns_tx=$ns1 local ns_rx=$ns2 local extra_msg="" @@ -1293,7 +1328,7 @@ chk_fail_nr() if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg=" invert" + extra_msg="invert" fi if [[ "${fail_tx}" = "-"* ]]; then @@ -1305,41 +1340,35 @@ chk_fail_nr() fail_rx=${fail_rx:1} fi - printf "%-${nr_blank}s %s" " " "ftx" + print_check "ftx" count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then extra_msg="$extra_msg,tx=$count" fi if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then - echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx" - fail_test - dump_stats=1 + fail_test "got $count MP_FAIL[s] TX expected $fail_tx" else - echo -n "[ ok ]" + print_ok fi - echo -n " - failrx" + print_check "failrx" count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then extra_msg="$extra_msg,rx=$count" fi if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then - echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx" - fail_test - dump_stats=1 + fail_test "got $count MP_FAIL[s] RX expected $fail_rx" else - echo -n "[ ok ]" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats - - echo "$extra_msg" + print_info "$extra_msg" } chk_fclose_nr() @@ -1348,46 +1377,39 @@ chk_fclose_nr() local fclose_rx=$2 local ns_invert=$3 local count - local dump_stats local ns_tx=$ns2 local ns_rx=$ns1 - local extra_msg=" " + local extra_msg="" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns1 ns_rx=$ns2 - extra_msg=${extra_msg}"invert" + extra_msg="invert" fi - printf "%-${nr_blank}s %s" " " "ctx" + print_check "ctx" count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$fclose_tx" ]; then extra_msg="$extra_msg,tx=$count" - echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx" - fail_test - dump_stats=1 + fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else - echo -n "[ ok ]" + print_ok fi - echo -n " - fclzrx" + print_check "fclzrx" count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$fclose_rx" ]; then extra_msg="$extra_msg,rx=$count" - echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx" - fail_test - dump_stats=1 + fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else - echo -n "[ ok ]" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats - - echo "$extra_msg" + print_info "$extra_msg" } chk_rst_nr() @@ -1396,7 +1418,6 @@ chk_rst_nr() local rst_rx=$2 local ns_invert=${3:-""} local count - local dump_stats local ns_tx=$ns1 local ns_rx=$ns2 local extra_msg="" @@ -1404,36 +1425,30 @@ chk_rst_nr() if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg=" invert" + extra_msg="invert" fi - printf "%-${nr_blank}s %s" " " "rtx" + print_check "rtx" count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ $count -lt $rst_tx ]; then - echo "[fail] got $count MP_RST[s] TX expected $rst_tx" - fail_test - dump_stats=1 + fail_test "got $count MP_RST[s] TX expected $rst_tx" else - echo -n "[ ok ]" + print_ok fi - echo -n " - rstrx " + print_check "rstrx" count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" -lt "$rst_rx" ]; then - echo "[fail] got $count MP_RST[s] RX expected $rst_rx" - fail_test - dump_stats=1 + fail_test "got $count MP_RST[s] RX expected $rst_rx" else - echo -n "[ ok ]" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats - - echo "$extra_msg" + print_info "$extra_msg" } chk_infi_nr() @@ -1441,33 +1456,26 @@ chk_infi_nr() local infi_tx=$1 local infi_rx=$2 local count - local dump_stats - printf "%-${nr_blank}s %s" " " "itx" + print_check "itx" count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$infi_tx" ]; then - echo "[fail] got $count infinite map[s] TX expected $infi_tx" - fail_test - dump_stats=1 + fail_test "got $count infinite map[s] TX expected $infi_tx" else - echo -n "[ ok ]" + print_ok fi - echo -n " - infirx" + print_check "infirx" count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then - echo "[skip]" + print_skip elif [ "$count" != "$infi_rx" ]; then - echo "[fail] got $count infinite map[s] RX expected $infi_rx" - fail_test - dump_stats=1 + fail_test "got $count infinite map[s] RX expected $infi_rx" else - echo "[ ok ]" + print_ok fi - - [ "${dump_stats}" = 1 ] && dump_stats } chk_join_nr() @@ -1482,58 +1490,49 @@ chk_join_nr() local infi_nr=${8:-0} local corrupted_pkts=${9:-0} local count - local dump_stats local with_cookie - local title="${TEST_NAME}" if [ "${corrupted_pkts}" -gt 0 ]; then - title+=": ${corrupted_pkts} corrupted pkts" + print_info "${corrupted_pkts} corrupted pkts" fi - printf "%03u %-36s %s" "${TEST_COUNT}" "${title}" "syn" + print_check "syn" count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$syn_nr" ]; then - echo "[fail] got $count JOIN[s] syn expected $syn_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] syn expected $syn_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - synack" + print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then - echo -n "[ ok ]" + print_ok else - echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] synack expected $syn_ack_nr" fi else - echo -n "[ ok ]" + print_ok fi - echo -n " - ack" + print_check "ack" count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - echo "[skip]" + print_skip elif [ "$count" != "$ack_nr" ]; then - echo "[fail] got $count JOIN[s] ack expected $ack_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] ack expected $ack_nr" else - echo "[ ok ]" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats if [ $validate_checksum -eq 1 ]; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1557,22 +1556,21 @@ chk_stale_nr() local stale_nr local recover_nr - printf "%-${nr_blank}s %-18s" " " "stale" + print_check "stale" stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then - echo "[skip]" + print_skip elif [ $stale_nr -lt $stale_min ] || { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } || [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then - echo "[fail] got $stale_nr stale[s] $recover_nr recover[s], " \ + fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \ " expected stale in range [$stale_min..$stale_max]," \ - " stale-recover delta $stale_delta " - fail_test + " stale-recover delta $stale_delta" dump_stats=1 else - echo "[ ok ]" + print_ok fi if [ "${dump_stats}" = 1 ]; then @@ -1593,119 +1591,130 @@ chk_add_nr() local mis_syn_nr=${7:-0} local mis_ack_nr=${8:-0} local count - local dump_stats local timeout timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - printf "%-${nr_blank}s %s" " " "add" + print_check "add" count=$(get_counter ${ns2} "MPTcpExtAddAddr") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip # if the test configured a short timeout tolerate greater then expected # add addrs options, due to retransmissions elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then - echo "[fail] got $count ADD_ADDR[s] expected $add_nr" - fail_test - dump_stats=1 + fail_test "got $count ADD_ADDR[s] expected $add_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - echo " + print_check "echo" count=$(get_counter ${ns1} "MPTcpExtEchoAdd") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$echo_nr" ]; then - echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" - fail_test - dump_stats=1 + fail_test "got $count ADD_ADDR echo[s] expected $echo_nr" else - echo -n "[ ok ]" + print_ok fi if [ $port_nr -gt 0 ]; then - echo -n " - pt " + print_check "pt" count=$(get_counter ${ns2} "MPTcpExtPortAdd") if [ -z "$count" ]; then - echo "[skip]" + print_skip elif [ "$count" != "$port_nr" ]; then - echo "[fail] got $count ADD_ADDR[s] with a port-number expected $port_nr" - fail_test - dump_stats=1 + fail_test "got $count ADD_ADDR[s] with a port-number expected $port_nr" else - echo "[ ok ]" + print_ok fi - printf "%-${nr_blank}s %s" " " "syn" + print_check "syn" count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$syn_nr" ]; then - echo "[fail] got $count JOIN[s] syn with a different \ - port-number expected $syn_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] syn with a different \ + port-number expected $syn_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - synack" + print_check "synack" count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$syn_ack_nr" ]; then - echo "[fail] got $count JOIN[s] synack with a different \ - port-number expected $syn_ack_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] synack with a different \ + port-number expected $syn_ack_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - ack" + print_check "ack" count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then - echo "[skip]" + print_skip elif [ "$count" != "$ack_nr" ]; then - echo "[fail] got $count JOIN[s] ack with a different \ - port-number expected $ack_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] ack with a different \ + port-number expected $ack_nr" else - echo "[ ok ]" + print_ok fi - printf "%-${nr_blank}s %s" " " "syn" + print_check "syn" count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$mis_syn_nr" ]; then - echo "[fail] got $count JOIN[s] syn with a mismatched \ - port-number expected $mis_syn_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] syn with a mismatched \ + port-number expected $mis_syn_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - ack " + print_check "ack" count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then - echo "[skip]" + print_skip elif [ "$count" != "$mis_ack_nr" ]; then - echo "[fail] got $count JOIN[s] ack with a mismatched \ - port-number expected $mis_ack_nr" - fail_test - dump_stats=1 + fail_test "got $count JOIN[s] ack with a mismatched \ + port-number expected $mis_ack_nr" else - echo "[ ok ]" + print_ok fi + fi +} + +chk_add_tx_nr() +{ + local add_tx_nr=$1 + local echo_tx_nr=$2 + local timeout + local count + + timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + + print_check "add TX" + count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") + if [ -z "$count" ]; then + print_skip + # if the test configured a short timeout tolerate greater then expected + # add addrs options, due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else - echo "" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats + print_check "echo TX" + count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$echo_tx_nr" ]; then + fail_test "got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr" + else + print_ok + fi } chk_rm_nr() @@ -1715,7 +1724,6 @@ chk_rm_nr() local invert local simult local count - local dump_stats local addr_ns=$ns1 local subflow_ns=$ns2 local extra_msg="" @@ -1733,25 +1741,23 @@ chk_rm_nr() elif [ $invert = "true" ]; then addr_ns=$ns2 subflow_ns=$ns1 - extra_msg=" invert" + extra_msg="invert" fi - printf "%-${nr_blank}s %s" " " "rm " + print_check "rm" count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$rm_addr_nr" ]; then - echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" - fail_test - dump_stats=1 + fail_test "got $count RM_ADDR[s] expected $rm_addr_nr" else - echo -n "[ ok ]" + print_ok fi - echo -n " - rmsf " + print_check "rmsf" count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ -n "$simult" ]; then local cnt suffix @@ -1763,23 +1769,32 @@ chk_rm_nr() [ "$count" != "$rm_subflow_nr" ] && suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then - echo -n "[ ok ] $suffix" + print_ok "$suffix" else - echo "[fail] got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" - fail_test - dump_stats=1 + fail_test "got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]" fi elif [ "$count" != "$rm_subflow_nr" ]; then - echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" - fail_test - dump_stats=1 + fail_test "got $count RM_SUBFLOW[s] expected $rm_subflow_nr" else - echo -n "[ ok ]" + print_ok fi - [ "${dump_stats}" = 1 ] && dump_stats + print_info "$extra_msg" +} + +chk_rm_tx_nr() +{ + local rm_addr_tx_nr=$1 - echo "$extra_msg" + print_check "rm TX" + count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$rm_addr_tx_nr" ]; then + fail_test "got $count RM_ADDR[s] expected $rm_addr_tx_nr" + else + print_ok + fi } chk_prio_nr() @@ -1787,105 +1802,84 @@ chk_prio_nr() local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 local count - local dump_stats - printf "%-${nr_blank}s %s" " " "ptx" + print_check "ptx" count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then - echo -n "[skip]" + print_skip elif [ "$count" != "$mp_prio_nr_tx" ]; then - echo "[fail] got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" - fail_test - dump_stats=1 + fail_test "got $count MP_PRIO[s] TX expected $mp_prio_nr_tx" else - echo -n "[ ok ]" + print_ok fi - echo -n " - prx " + print_check "prx" count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then - echo "[skip]" + print_skip elif [ "$count" != "$mp_prio_nr_rx" ]; then - echo "[fail] got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" - fail_test - dump_stats=1 + fail_test "got $count MP_PRIO[s] RX expected $mp_prio_nr_rx" else - echo "[ ok ]" + print_ok fi - - [ "${dump_stats}" = 1 ] && dump_stats } chk_subflow_nr() { - local need_title="$1" - local msg="$2" - local subflow_nr=$3 + local msg="$1" + local subflow_nr=$2 local cnt1 local cnt2 local dump_stats - if [ -n "${need_title}" ]; then - printf "%03u %-36s %s" "${TEST_COUNT}" "${TEST_NAME}" "${msg}" - else - printf "%-${nr_blank}s %s" " " "${msg}" - fi + print_check "${msg}" cnt1=$(ss -N $ns1 -tOni | grep -c token) cnt2=$(ss -N $ns2 -tOni | grep -c token) if [ "$cnt1" != "$subflow_nr" ] || [ "$cnt2" != "$subflow_nr" ]; then - echo "[fail] got $cnt1:$cnt2 subflows expected $subflow_nr" - fail_test + fail_test "got $cnt1:$cnt2 subflows expected $subflow_nr" dump_stats=1 else - echo "[ ok ]" + print_ok fi if [ "${dump_stats}" = 1 ]; then ss -N $ns1 -tOni ss -N $ns1 -tOni | grep token ip -n $ns1 mptcp endpoint - dump_stats fi } chk_mptcp_info() { - local nr_info=$1 - local info + local info1=$1 + local exp1=$2 + local info2=$3 + local exp2=$4 local cnt1 local cnt2 local dump_stats - if [[ $nr_info = "subflows_"* ]]; then - info="subflows" - nr_info=${nr_info:9} - else - echo "[fail] unsupported argument: $nr_info" - fail_test - return 1 - fi + print_check "mptcp_info ${info1:0:8}=$exp1:$exp2" - printf "%-${nr_blank}s %-30s" " " "mptcp_info $info=$nr_info" - - cnt1=$(ss -N $ns1 -inmHM | grep "$info:" | - sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" | + sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" | + sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q') + # 'ss' only display active connections and counters that are not 0. [ -z "$cnt1" ] && cnt1=0 - cnt2=$(ss -N $ns2 -inmHM | grep "$info:" | - sed -n 's/.*\('"$info"':\)\([[:digit:]]*\).*$/\2/p;q') [ -z "$cnt2" ] && cnt2=0 - if [ "$cnt1" != "$nr_info" ] || [ "$cnt2" != "$nr_info" ]; then - echo "[fail] got $cnt1:$cnt2 $info expected $nr_info" - fail_test + + if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then + fail_test "got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2" dump_stats=1 else - echo "[ ok ]" + print_ok fi if [ "$dump_stats" = 1 ]; then ss -N $ns1 -inmHM ss -N $ns2 -inmHM - dump_stats fi } @@ -1902,13 +1896,12 @@ chk_link_usage() local tx_rate=$((tx_link * 100 / tx_total)) local tolerance=5 - printf "%-${nr_blank}s %-18s" " " "link usage" + print_check "link usage" if [ $tx_rate -lt $((expected_rate - tolerance)) ] || \ [ $tx_rate -gt $((expected_rate + tolerance)) ]; then - echo "[fail] got $tx_rate% usage, expected $expected_rate%" - fail_test + fail_test "got $tx_rate% usage, expected $expected_rate%" else - echo "[ ok ]" + print_ok fi } @@ -2009,7 +2002,8 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 fi @@ -2020,7 +2014,8 @@ subflows_error_tests() pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi @@ -2031,7 +2026,8 @@ subflows_error_tests() pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi @@ -2043,7 +2039,8 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow & + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & # mpj subflow will be in TW after the reset wait_attempt_fail $ns2 @@ -2063,6 +2060,7 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 + chk_add_tx_nr 1 1 chk_add_nr 1 1 fi @@ -2141,7 +2139,8 @@ signal_address_tests() # the peer could possibly miss some addr notification, allow retransmission ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 # It is not directly linked to the commit introducing this # symbol but for the parent one which is linked anyway. @@ -2173,7 +2172,8 @@ link_failure_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow - run_tests $ns1 $ns2 10.0.1.1 1 + test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_stale_nr $ns2 1 5 1 @@ -2188,7 +2188,8 @@ link_failure_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow - run_tests $ns1 $ns2 10.0.1.1 2 + test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_stale_nr $ns2 1 -1 1 @@ -2201,9 +2202,9 @@ link_failure_tests() pm_nl_set_limits $ns1 0 2 pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal pm_nl_set_limits $ns2 1 2 - FAILING_LINKS="1" pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 1 + FAILING_LINKS="1" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_link_usage $ns2 ns2eth3 $cinsent 0 @@ -2217,8 +2218,8 @@ link_failure_tests() pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup - FAILING_LINKS="1 2" - run_tests $ns1 $ns2 10.0.1.1 1 + FAILING_LINKS="1 2" test_linkfail=1 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_stale_nr $ns2 2 4 2 @@ -2233,8 +2234,8 @@ link_failure_tests() pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup - FAILING_LINKS="1 2" - run_tests $ns1 $ns2 10.0.1.1 2 + FAILING_LINKS="1 2" test_linkfail=2 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_stale_nr $ns2 1 -1 2 @@ -2249,8 +2250,10 @@ add_addr_timeout_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 + chk_add_tx_nr 4 4 chk_add_nr 4 0 fi @@ -2259,7 +2262,8 @@ add_addr_timeout_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 1 1 1 chk_add_nr 4 0 fi @@ -2270,7 +2274,8 @@ add_addr_timeout_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_set_limits $ns2 2 2 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10 + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 8 0 fi @@ -2281,7 +2286,8 @@ add_addr_timeout_tests() pm_nl_add_endpoint $ns1 10.0.12.1 flags signal pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_set_limits $ns2 2 2 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 speed_10 + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 8 0 fi @@ -2294,8 +2300,10 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 + chk_rm_tx_nr 1 chk_rm_nr 1 1 fi @@ -2305,7 +2313,8 @@ remove_tests() pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -2 slow + addr_nr_ns2=-2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_rm_nr 2 2 fi @@ -2315,7 +2324,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert @@ -2327,7 +2337,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -2340,7 +2351,8 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -2 speed_10 + addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 @@ -2353,7 +2365,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert @@ -2366,7 +2379,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -3 0 speed_10 + addr_nr_ns1=-3 speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert @@ -2379,7 +2393,8 @@ remove_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 1 3 invert simult @@ -2392,10 +2407,12 @@ remove_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 if mptcp_lib_kversion_ge 5.18; then + chk_rm_tx_nr 0 chk_rm_nr 0 3 simult else chk_rm_nr 3 3 @@ -2409,7 +2426,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow + addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 chk_rm_nr 3 3 invert simult @@ -2422,7 +2440,8 @@ remove_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow + addr_nr_ns1=-8 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert @@ -2433,7 +2452,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow + addr_nr_ns2=-9 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_rm_nr 1 1 fi @@ -2443,7 +2463,8 @@ remove_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow + addr_nr_ns1=-9 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert @@ -2456,7 +2477,8 @@ add_tests() if reset "add single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow + addr_nr_ns2=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi @@ -2464,7 +2486,8 @@ add_tests() if reset "add signal address"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + addr_nr_ns1=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 fi @@ -2473,7 +2496,8 @@ add_tests() if reset "add multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - run_tests $ns1 $ns2 10.0.1.1 0 0 2 slow + addr_nr_ns2=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 fi @@ -2481,7 +2505,8 @@ add_tests() if reset "add multiple subflows IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - run_tests $ns1 $ns2 dead:beef:1::1 0 0 2 slow + addr_nr_ns2=2 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 fi @@ -2489,7 +2514,8 @@ add_tests() if reset "add multiple addresses IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 - run_tests $ns1 $ns2 dead:beef:1::1 0 2 0 slow + addr_nr_ns1=2 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 chk_add_nr 2 2 fi @@ -2502,14 +2528,16 @@ ipv6_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 1 1 1 fi # add_address, unused IPv6 if reset "unused signal address IPv6"; then pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 0 0 0 chk_add_nr 1 1 fi @@ -2519,7 +2547,8 @@ ipv6_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 1 1 1 chk_add_nr 1 1 fi @@ -2529,7 +2558,8 @@ ipv6_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_rm_nr 1 1 invert @@ -2541,7 +2571,8 @@ ipv6_tests() pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow - run_tests $ns1 $ns2 dead:beef:1::1 0 -1 -1 slow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -2642,7 +2673,8 @@ mixed_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 fi @@ -2652,7 +2684,8 @@ mixed_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns1 10.0.1.1 flags signal - run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:2::1 chk_join_nr 1 1 1 fi @@ -2663,7 +2696,8 @@ mixed_tests() pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh pm_nl_add_endpoint $ns1 10.0.1.1 flags signal - run_tests $ns1 $ns2 dead:beef:2::1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 dead:beef:2::1 chk_join_nr 1 1 1 fi @@ -2675,7 +2709,8 @@ mixed_tests() pm_nl_set_limits $ns2 2 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal - run_tests $ns1 $ns2 dead:beef:1::1 0 0 fullmesh_1 slow + fullmesh=1 speed=slow \ + run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 4 4 4 fi } @@ -2688,7 +2723,8 @@ backup_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_prio_nr 0 1 fi @@ -2699,7 +2735,8 @@ backup_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_prio_nr 1 1 @@ -2711,7 +2748,8 @@ backup_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 chk_prio_nr 1 1 @@ -2720,7 +2758,8 @@ backup_tests() if reset "mpc backup" && continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_prio_nr 0 1 fi @@ -2729,7 +2768,8 @@ backup_tests() continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_prio_nr 1 1 fi @@ -2737,7 +2777,8 @@ backup_tests() if reset "mpc switch to backup" && continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_prio_nr 0 1 fi @@ -2746,7 +2787,8 @@ backup_tests() continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_prio_nr 1 1 fi @@ -2774,15 +2816,15 @@ verify_listener_events() if [ $e_type = $LISTENER_CREATED ]; then name="LISTENER_CREATED" elif [ $e_type = $LISTENER_CLOSED ]; then - name="LISTENER_CLOSED" + name="LISTENER_CLOSED " else name="$e_type" fi - printf "%-${nr_blank}s %s %s:%s " " " "$name" "$e_saddr" "$e_sport" + print_check "$name $e_saddr:$e_sport" if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then - printf "[skip]: event not supported\n" + print_skip "event not supported" return fi @@ -2799,11 +2841,10 @@ verify_listener_events() [ $family ] && [ $family = $e_family ] && [ $saddr ] && [ $saddr = $e_saddr ] && [ $sport ] && [ $sport = $e_sport ]; then - echo "[ ok ]" + print_ok return 0 fi - fail_test - echo "[fail]" + fail_test "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport" } add_addr_ports_tests() @@ -2835,7 +2876,8 @@ add_addr_ports_tests() pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow + addr_nr_ns1=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 1 chk_rm_nr 1 1 invert @@ -2851,7 +2893,8 @@ add_addr_ports_tests() pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 pm_nl_set_limits $ns2 1 2 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -1 -1 slow + addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_add_nr 1 1 1 chk_rm_nr 1 1 @@ -2864,7 +2907,8 @@ add_addr_ports_tests() pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 -8 -2 slow + addr_nr_ns1=-8 addr_nr_ns2=-2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 chk_rm_nr 1 3 invert simult @@ -3066,7 +3110,8 @@ fullmesh_tests() pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh - run_tests $ns1 $ns2 10.0.1.1 0 1 0 slow + addr_nr_ns1=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 4 4 4 chk_add_nr 1 1 fi @@ -3078,7 +3123,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow + fullmesh=1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 1 1 fi @@ -3090,7 +3136,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 2 5 pm_nl_set_limits $ns2 1 5 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + fullmesh=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 5 5 5 chk_add_nr 1 1 fi @@ -3103,7 +3150,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 2 4 pm_nl_set_limits $ns2 1 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_2 slow + fullmesh=2 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 4 4 4 chk_add_nr 1 1 fi @@ -3114,7 +3162,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 - run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow fullmesh + addr_nr_ns2=1 sflags=fullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_rm_nr 0 1 fi @@ -3125,7 +3174,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh pm_nl_set_limits $ns2 4 4 - run_tests $ns1 $ns2 10.0.1.1 0 0 fullmesh_1 slow nofullmesh + fullmesh=1 sflags=nofullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_rm_nr 0 1 fi @@ -3136,7 +3186,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow pm_nl_set_limits $ns2 4 4 - run_tests $ns1 $ns2 10.0.1.1 0 0 1 slow backup,fullmesh + addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_prio_nr 0 1 chk_rm_nr 0 1 @@ -3148,7 +3199,8 @@ fullmesh_tests() pm_nl_set_limits $ns1 4 4 pm_nl_set_limits $ns2 4 4 pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow nobackup,nofullmesh + sflags=nobackup,nofullmesh speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 chk_prio_nr 0 1 chk_rm_nr 0 1 @@ -3158,14 +3210,16 @@ fullmesh_tests() fastclose_tests() { if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then - run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client + test_linkfail=1024 fastclose=client \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_fclose_nr 1 1 chk_rst_nr 1 1 invert fi if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then - run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server + test_linkfail=1024 fastclose=server \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 @@ -3183,7 +3237,8 @@ fail_tests() { # single subflow if reset_with_fail "Infinite map" 1; then - run_tests $ns1 $ns2 10.0.1.1 128 + test_linkfail=128 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" chk_fail_nr 1 -1 invert fi @@ -3194,11 +3249,77 @@ fail_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 1024 + test_linkfail=1024 \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" fi } +userspace_pm_add_addr() +{ + local addr=$1 + local id=$2 + local tk + + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id + sleep 1 +} + +userspace_pm_rm_sf_addr_ns1() +{ + local addr=$1 + local id=$2 + local tk sp da dp + + tk=$(grep "type:1," "$evts_ns1" | + sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') + sp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + da=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + dp=$(grep "type:10" "$evts_ns1" | + sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id + ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \ + lport $sp rip $da rport $dp token $tk + wait_rm_addr $ns1 1 + wait_rm_sf $ns1 1 +} + +userspace_pm_add_sf() +{ + local addr=$1 + local id=$2 + local tk da dp + + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \ + rip $da rport $dp token $tk + sleep 1 +} + +userspace_pm_rm_sf_addr_ns2() +{ + local addr=$1 + local id=$2 + local tk da dp sp + + tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") + dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") + sp=$(grep "type:10" "$evts_ns2" | + sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') + ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id + ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \ + rip $da rport $dp token $tk + wait_rm_addr $ns2 1 + wait_rm_sf $ns2 1 +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3254,7 +3375,8 @@ userspace_tests() pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup + sflags=backup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 0 chk_prio_nr 0 0 fi @@ -3267,7 +3389,8 @@ userspace_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow + addr_nr_ns2=-1 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 chk_rm_nr 0 0 fi @@ -3277,11 +3400,20 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - run_tests $ns1 $ns2 10.0.1.1 0 userspace_1 0 slow + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr 10.0.2.1 10 chk_join_nr 1 1 1 chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_sf_addr_ns1 10.0.2.1 10 chk_rm_nr 1 1 invert + chk_mptcp_info subflows 0 subflows 0 kill_events_pids + wait $tests_pid fi # userspace pm create destroy subflow @@ -3289,10 +3421,18 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - run_tests $ns1 $ns2 10.0.1.1 0 0 userspace_1 slow + speed=10 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf 10.0.3.2 20 chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + userspace_pm_rm_sf_addr_ns2 10.0.3.2 20 chk_rm_nr 1 1 + chk_mptcp_info subflows 0 subflows 0 kill_events_pids + wait $tests_pid fi } @@ -3305,18 +3445,21 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow 2>/dev/null & + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & wait_mpj $ns1 - pm_nl_check_endpoint 1 "creation" \ + pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 1 flags implicit + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 - pm_nl_add_endpoint $ns2 10.0.2.2 id 33 - pm_nl_check_endpoint 0 "ID change is prevented" \ + pm_nl_add_endpoint $ns2 10.0.2.2 id 33 2>/dev/null + pm_nl_check_endpoint "ID change is prevented" \ $ns2 10.0.2.2 id 1 flags implicit pm_nl_add_endpoint $ns2 10.0.2.2 flags signal - pm_nl_check_endpoint 0 "modif is allowed" \ + pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal kill_tests_wait fi @@ -3326,21 +3469,22 @@ endpoint_tests() pm_nl_set_limits $ns1 1 1 pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - run_tests $ns1 $ns2 10.0.1.1 4 0 0 speed_20 2>/dev/null & + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & wait_mpj $ns2 - chk_subflow_nr needtitle "before delete" 2 - chk_mptcp_info subflows_1 + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr "" "after delete" 1 - chk_mptcp_info subflows_0 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow wait_mpj $ns2 - chk_subflow_nr "" "after re-add" 2 - chk_mptcp_info subflows_1 + chk_subflow_nr "after re-add" 2 + chk_mptcp_info subflows 1 subflows 1 kill_tests_wait fi } @@ -3457,4 +3601,7 @@ if [ ${ret} -ne 0 ]; then echo fi +append_prev_results +mptcp_lib_result_print_all_tap + exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index f32045b23b89..92a5befe8039 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -1,9 +1,52 @@ #! /bin/bash # SPDX-License-Identifier: GPL-2.0 +readonly KSFT_PASS=0 readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 +# shellcheck disable=SC2155 # declare and assign separately +readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g') + +MPTCP_LIB_SUBTESTS=() + +# only if supported (or forced) and not disabled, see no-color.org +if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && + [ "${NO_COLOR:-}" != "1" ]; then + readonly MPTCP_LIB_COLOR_RED="\E[1;31m" + readonly MPTCP_LIB_COLOR_GREEN="\E[1;32m" + readonly MPTCP_LIB_COLOR_YELLOW="\E[1;33m" + readonly MPTCP_LIB_COLOR_BLUE="\E[1;34m" + readonly MPTCP_LIB_COLOR_RESET="\E[0m" +else + readonly MPTCP_LIB_COLOR_RED= + readonly MPTCP_LIB_COLOR_GREEN= + readonly MPTCP_LIB_COLOR_YELLOW= + readonly MPTCP_LIB_COLOR_BLUE= + readonly MPTCP_LIB_COLOR_RESET= +fi + +# $1: color, $2: text +mptcp_lib_print_color() { + echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}" +} + +mptcp_lib_print_ok() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_GREEN}${*}" +} + +mptcp_lib_print_warn() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_YELLOW}${*}" +} + +mptcp_lib_print_info() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_BLUE}${*}" +} + +mptcp_lib_print_err() { + mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. @@ -102,3 +145,65 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } + +__mptcp_lib_result_add() { + local result="${1}" + shift + + local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") +} + +# $1: test name +mptcp_lib_result_pass() { + __mptcp_lib_result_add "ok" "${1}" +} + +# $1: test name +mptcp_lib_result_fail() { + __mptcp_lib_result_add "not ok" "${1}" +} + +# $1: test name +mptcp_lib_result_skip() { + __mptcp_lib_result_add "ok" "${1} # SKIP" +} + +# $1: result code ; $2: test name +mptcp_lib_result_code() { + local ret="${1}" + local name="${2}" + + case "${ret}" in + "${KSFT_PASS}") + mptcp_lib_result_pass "${name}" + ;; + "${KSFT_FAIL}") + mptcp_lib_result_fail "${name}" + ;; + "${KSFT_SKIP}") + mptcp_lib_result_skip "${name}" + ;; + *) + echo "ERROR: wrong result code: ${ret}" + exit ${KSFT_FAIL} + ;; + esac +} + +mptcp_lib_result_print_all_tap() { + local subtest + + if [ ${#MPTCP_LIB_SUBTESTS[@]} -eq 0 ] || + [ "${SELFTESTS_MPTCP_LIB_NO_TAP:-}" = "1" ]; then + return + fi + + printf "\nTAP version 13\n" + printf "1..%d\n" "${#MPTCP_LIB_SUBTESTS[@]}" + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + printf "%s\n" "${subtest}" + done +} diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index b35148edbf02..926b0be87c99 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -51,6 +51,11 @@ struct mptcp_info { __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; }; struct mptcp_subflow_data { @@ -81,10 +86,41 @@ struct mptcp_subflow_addrs { #define MPTCP_SUBFLOW_ADDRS 3 #endif +#ifndef MPTCP_FULL_INFO +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_full_info { + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ + __u32 size_arrays_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info/tcp_info + * are respectively at least: + * size_arrays * size_sfinfo_user + * size_arrays * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info; + __aligned_u64 tcp_info; + struct mptcp_info mptcp_info; +}; + +#define MPTCP_FULL_INFO 4 +#endif + struct so_state { struct mptcp_info mi; + struct mptcp_info last_sample; + struct tcp_info tcp_info; + struct mptcp_subflow_addrs addrs; uint64_t mptcpi_rcv_delta; uint64_t tcpi_rcv_delta; + bool pkt_stats_avail; }; #ifndef MIN @@ -322,8 +358,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w) if (ret < 0) die_perror("getsockopt MPTCP_INFO"); - assert(olen == sizeof(i)); + s->pkt_stats_avail = olen >= sizeof(i); + s->last_sample = i; if (s->mi.mptcpi_write_seq == 0) s->mi = i; @@ -362,6 +399,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t olen -= sizeof(struct mptcp_subflow_data); assert(olen == ti.d.size_user); + s->tcp_info = ti.ti[0]; + if (ti.ti[0].tcpi_bytes_sent == w && ti.ti[0].tcpi_bytes_received == r) goto done; @@ -383,7 +422,7 @@ done: do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); } -static void do_getsockopt_subflow_addrs(int fd) +static void do_getsockopt_subflow_addrs(struct so_state *s, int fd) { struct sockaddr_storage remote, local; socklen_t olen, rlen, llen; @@ -431,6 +470,7 @@ static void do_getsockopt_subflow_addrs(int fd) assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0); assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0); + s->addrs = addrs.addr[0]; memset(&addrs, 0, sizeof(addrs)); @@ -451,13 +491,70 @@ static void do_getsockopt_subflow_addrs(int fd) do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); } +static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd) +{ + size_t data_size = sizeof(struct mptcp_full_info); + struct mptcp_subflow_info sfinfo[2]; + struct tcp_info tcp_info[2]; + struct mptcp_full_info mfi; + socklen_t olen; + int ret; + + memset(&mfi, 0, data_size); + memset(tcp_info, 0, sizeof(tcp_info)); + memset(sfinfo, 0, sizeof(sfinfo)); + + mfi.size_tcpinfo_user = sizeof(struct tcp_info); + mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info); + mfi.size_arrays_user = 2; + mfi.subflow_info = (unsigned long)&sfinfo[0]; + mfi.tcp_info = (unsigned long)&tcp_info[0]; + olen = data_size; + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + perror("MPTCP_FULL_INFO test skipped"); + return; + } + xerror("getsockopt MPTCP_FULL_INFO"); + } + + assert(olen <= data_size); + assert(mfi.size_tcpinfo_kernel > 0); + assert(mfi.size_tcpinfo_user == + MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info))); + assert(mfi.size_sfinfo_kernel > 0); + assert(mfi.size_sfinfo_user == + MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info))); + assert(mfi.num_subflows == 1); + + /* Tolerate future extension to mptcp_info struct and running newer + * test on top of older kernel. + * Anyway any kernel supporting MPTCP_FULL_INFO must at least include + * the following in mptcp_info. + */ + assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info)); + assert(mfi.mptcp_info.mptcpi_subflows == 0); + assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent); + assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received); + + assert(sfinfo[0].id == 1); + assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent); + assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received); + assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs))); +} + static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) { do_getsockopt_mptcp_info(s, fd, w); do_getsockopt_tcp_info(s, fd, r, w); - do_getsockopt_subflow_addrs(fd); + do_getsockopt_subflow_addrs(s, fd); + + if (r) + do_getsockopt_mptcp_full_info(s, fd); } static void connect_one_server(int fd, int pipefd) @@ -562,6 +659,23 @@ static void process_one_client(int fd, int pipefd) do_getsockopts(&s, fd, ret, ret2); if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + + /* be nice when running on top of older kernel */ + if (s.pkt_stats_avail) { + if (s.last_sample.mptcpi_bytes_sent != ret2) + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_sent, ret2, + s.last_sample.mptcpi_bytes_sent - ret2); + if (s.last_sample.mptcpi_bytes_received != ret) + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_received, ret, + s.last_sample.mptcpi_bytes_received - ret); + if (s.last_sample.mptcpi_bytes_acked != ret) + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_acked, ret2, + s.last_sample.mptcpi_bytes_acked - ret2); + } + close(fd); } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index f295a371ff14..8c8694f21e7d 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -12,6 +12,8 @@ ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) mptcp_connect="" +iptables="iptables" +ip6tables="ip6tables" sec=$(date +%s) rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) @@ -25,7 +27,7 @@ add_mark_rules() local m=$2 local t - for t in iptables ip6tables; do + for t in ${iptables} ${ip6tables}; do # just to debug: check we have multiple subflows connection requests ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT @@ -95,14 +97,14 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -iptables -V > /dev/null 2>&1 -if [ $? -ne 0 ];then +# Use the legacy version if available to support old kernel versions +if iptables-legacy -V &> /dev/null; then + iptables="iptables-legacy" + ip6tables="ip6tables-legacy" +elif ! iptables -V &> /dev/null; then echo "SKIP: Could not run all tests without iptables tool" exit $ksft_skip -fi - -ip6tables -V > /dev/null 2>&1 -if [ $? -ne 0 ];then +elif ! ip6tables -V &> /dev/null; then echo "SKIP: Could not run all tests without ip6tables tool" exit $ksft_skip fi @@ -112,10 +114,10 @@ check_mark() local ns=$1 local af=$2 - local tables=iptables + local tables=${iptables} if [ $af -eq 6 ];then - tables=ip6tables + tables=${ip6tables} fi local counters values @@ -126,6 +128,7 @@ check_mark() for v in $values; do if [ $v -ne 0 ]; then echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 + ret=1 return 1 fi done @@ -180,11 +183,13 @@ do_transfer() local mptcp_connect="./mptcp_connect -r 20" - local local_addr + local local_addr ip if is_v6 "${connect_addr}"; then local_addr="::" + ip=ipv6 else local_addr="0.0.0.0" + ip=ipv4 fi cmsg="TIMESTAMPNS" @@ -220,22 +225,26 @@ do_transfer() echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + mptcp_lib_result_fail "transfer ${ip}" + ret=1 return 1 fi if [ $local_addr = "::" ];then - check_mark $listener_ns 6 - check_mark $connector_ns 6 + check_mark $listener_ns 6 || retc=1 + check_mark $connector_ns 6 || retc=1 else - check_mark $listener_ns 4 - check_mark $connector_ns 4 + check_mark $listener_ns 4 || retc=1 + check_mark $connector_ns 4 || retc=1 fi check_transfer $cin $sout "file received by server" - rets=$? + mptcp_lib_result_code "${retc}" "mark ${ip}" + mptcp_lib_result_code "${rets}" "transfer ${ip}" + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then return 0 fi @@ -261,6 +270,7 @@ do_mptcp_sockopt_tests() if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then echo "INFO: MPTCP sockopt not supported: SKIP" + mptcp_lib_result_skip "sockopt" return fi @@ -269,18 +279,22 @@ do_mptcp_sockopt_tests() if [ $lret -ne 0 ]; then echo "FAIL: SOL_MPTCP getsockopt" 1>&2 + mptcp_lib_result_fail "sockopt v4" ret=$lret return fi + mptcp_lib_result_pass "sockopt v4" ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? if [ $lret -ne 0 ]; then echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2 + mptcp_lib_result_fail "sockopt v6" ret=$lret return fi + mptcp_lib_result_pass "sockopt v6" } run_tests() @@ -307,10 +321,12 @@ do_tcpinq_test() if [ $lret -ne 0 ];then ret=$lret echo "FAIL: mptcp_inq $@" 1>&2 + mptcp_lib_result_fail "TCP_INQ: $*" return $lret fi echo "PASS: TCP_INQ cmsg/ioctl $@" + mptcp_lib_result_pass "TCP_INQ: $*" return $lret } @@ -320,6 +336,7 @@ do_tcpinq_tests() if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then echo "INFO: TCP_INQ not supported: SKIP" + mptcp_lib_result_skip "TCP_INQ" return fi @@ -364,4 +381,6 @@ if [ $ret -eq 0 ];then fi do_tcpinq_tests + +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index d02e0d63a8f9..8f4ff123a7eb 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -58,16 +58,19 @@ check() local out=`$cmd 2>$err` local cmd_ret=$? - printf "%-50s %s" "$msg" + printf "%-50s" "$msg" if [ $cmd_ret -ne 0 ]; then echo "[FAIL] command execution '$cmd' stderr " cat $err + mptcp_lib_result_fail "${msg} # error ${cmd_ret}" ret=1 elif [ "$out" = "$expected" ]; then echo "[ OK ]" + mptcp_lib_result_pass "${msg}" else echo -n "[FAIL] " echo "expected '$expected' got '$out'" + mptcp_lib_result_fail "${msg} # different output" ret=1 fi } @@ -96,7 +99,7 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" \ "id 1 flags 10.0.1.1 id 3 flags signal,backup 10.0.1.3" "dump addrs after del" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal @@ -124,10 +127,10 @@ id 8 flags signal 10.0.1.8" "id limit" ip netns exec $ns1 ./pm_nl_ctl flush check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" -ip netns exec $ns1 ./pm_nl_ctl limits 9 1 +ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" -ip netns exec $ns1 ./pm_nl_ctl limits 1 9 +ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" ip netns exec $ns1 ./pm_nl_ctl limits 8 8 @@ -193,4 +196,5 @@ subflow 10.0.1.1" " (nofullmesh)" subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" fi +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index abddf4c63e79..49369c4a5f26 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -66,20 +66,25 @@ static int init_genl_req(char *data, int family, int cmd, int version) return off; } -static void nl_error(struct nlmsghdr *nh) +static int nl_error(struct nlmsghdr *nh) { struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh); int len = nh->nlmsg_len - sizeof(*nh); uint32_t off; - if (len < sizeof(struct nlmsgerr)) + if (len < sizeof(struct nlmsgerr)) { error(1, 0, "netlink error message truncated %d min %ld", len, sizeof(struct nlmsgerr)); + return -1; + } - if (!err->error) { + if (err->error) { /* check messages from kernel */ struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh); + fprintf(stderr, "netlink error %d (%s)\n", + err->error, strerror(-err->error)); + while (RTA_OK(attrs, len)) { if (attrs->rta_type == NLMSGERR_ATTR_MSG) fprintf(stderr, "netlink ext ack msg: %s\n", @@ -91,9 +96,10 @@ static void nl_error(struct nlmsghdr *nh) } attrs = RTA_NEXT(attrs, len); } - } else { - fprintf(stderr, "netlink error %d", err->error); + return -1; } + + return 0; } static int capture_events(int fd, int event_group) @@ -198,7 +204,7 @@ static int capture_events(int fd, int event_group) return 0; } -/* do a netlink command and, if max > 0, fetch the reply */ +/* do a netlink command and, if max > 0, fetch the reply ; nh's size >1024B */ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) { struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; @@ -207,12 +213,16 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) int rem, ret; int err = 0; + /* If no expected answer, ask for an ACK to look for errors if any */ + if (max == 0) { + nh->nlmsg_flags |= NLM_F_ACK; + max = 1024; + } + nh->nlmsg_len = len; ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr)); if (ret != len) error(1, errno, "send netlink: %uB != %uB\n", ret, len); - if (max == 0) - return 0; addr_len = sizeof(nladdr); rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len); @@ -221,10 +231,11 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */ for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) { - if (nh->nlmsg_type == NLMSG_ERROR) { - nl_error(nh); + if (nh->nlmsg_type == NLMSG_DONE) + break; + + if (nh->nlmsg_type == NLMSG_ERROR && nl_error(nh)) err = 1; - } } if (err) error(1, 0, "bailing out due to netlink error[s]"); @@ -425,7 +436,7 @@ int dsf(int fd, int pm_family, int argc, char *argv[]) } /* token */ - token = atoi(params[4]); + token = strtoul(params[4], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -551,7 +562,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) } /* token */ - token = atoi(params[4]); + token = strtoul(params[4], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -598,7 +609,7 @@ int remove_addr(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing token value"); - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); rta = (void *)(data + off); rta->rta_type = MPTCP_PM_ATTR_TOKEN; rta->rta_len = RTA_LENGTH(4); @@ -710,7 +721,7 @@ int announce_addr(int fd, int pm_family, int argc, char *argv[]) if (++arg >= argc) error(1, 0, " missing token value"); - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); } else error(1, 0, "unknown keyword %s", argv[arg]); } @@ -1347,7 +1358,7 @@ int set_flags(int fd, int pm_family, int argc, char *argv[]) error(1, 0, " missing token value"); /* token */ - token = atoi(argv[arg]); + token = strtoul(argv[arg], NULL, 10); } else if (!strcmp(argv[arg], "flags")) { char *tok, *str; diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 36a3c9d92e20..ce9203b817f8 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -261,6 +261,7 @@ run_test() printf "%-60s" "$msg" do_transfer $small $large $time lret=$? + mptcp_lib_result_code "${lret}" "${msg}" if [ $lret -ne 0 ]; then ret=$lret [ $bail -eq 0 ] || exit $ret @@ -269,6 +270,7 @@ run_test() printf "%-60s" "$msg - reverse direction" do_transfer $large $small $time lret=$? + mptcp_lib_result_code "${lret}" "${msg}" if [ $lret -ne 0 ]; then ret=$lret [ $bail -eq 0 ] || exit $ret @@ -305,4 +307,6 @@ run_test 10 10 1 50 "balanced bwidth with unbalanced delay" run_test 30 10 0 0 "unbalanced bwidth" run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" + +mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 98d9e4d2d3fc..b25a3e33eb25 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -1,6 +1,13 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it. +#shellcheck disable=SC2086 + +# Some variables are used below but indirectly, see check_expected_one() +#shellcheck disable=SC2034 + . "$(dirname "${0}")/mptcp_lib.sh" mptcp_lib_check_mptcp @@ -11,8 +18,7 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then exit ${KSFT_SKIP} fi -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! ip -Version &> /dev/null; then echo "SKIP: Cannot not run test without ip tool" exit ${KSFT_SKIP} fi @@ -52,10 +58,54 @@ sec=$(date +%s) rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) ns1="ns1-$rndh" ns2="ns2-$rndh" +ret=0 +test_name="" + +_printf() { + stdbuf -o0 -e0 printf "${@}" +} print_title() { - stdbuf -o0 -e0 printf "INFO: %s\n" "${1}" + _printf "INFO: %s\n" "${1}" +} + +# $1: test name +print_test() +{ + test_name="${1}" + + _printf "%-63s" "${test_name}" +} + +print_results() +{ + _printf "[%s]\n" "${1}" +} + +test_pass() +{ + print_results " OK " + mptcp_lib_result_pass "${test_name}" +} + +test_skip() +{ + print_results "SKIP" + mptcp_lib_result_skip "${test_name}" +} + +# $1: msg +test_fail() +{ + print_results "FAIL" + ret=1 + + if [ -n "${1}" ]; then + _printf "\t%s\n" "${1}" + fi + + mptcp_lib_result_fail "${test_name}" } kill_wait() @@ -67,6 +117,8 @@ kill_wait() wait $1 2>/dev/null } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { print_title "Cleanup" @@ -86,7 +138,7 @@ cleanup() rm -rf $file $client_evts $server_evts - stdbuf -o0 -e0 printf "Done\n" + _printf "Done\n" } trap cleanup EXIT @@ -118,7 +170,8 @@ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad ip -net "$ns2" link set ns2eth1 up print_title "Init" -stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2 \t\t\t[OK]\n" +print_test "Created network namespaces ns1, ns2" +test_pass make_file() { @@ -203,16 +256,14 @@ make_connection() server_serverside=$(grep "type:1," "$server_evts" | sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') - stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1 \t\t" $is_v6 + print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && [ "$server_serverside" = 1 ] then - stdbuf -o0 -e0 printf "[OK]\n" + test_pass else - stdbuf -o0 -e0 printf "[FAIL]\n" - stdbuf -o0 -e0 printf "\tExpected tokens (c:%s - s:%s) and server (c:%d - s:%d)\n" \ - "${client_token}" "${server_token}" \ - "${client_serverside}" "${server_serverside}" + test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" + mptcp_lib_result_print_all_tap exit 1 fi @@ -246,10 +297,10 @@ check_expected_one() if [ "${prev_ret}" = "0" ] then - stdbuf -o0 -e0 printf "[FAIL]\n" + test_fail fi - stdbuf -o0 -e0 printf "\tExpected value for '%s': '%s', got '%s'.\n" \ + _printf "\tExpected value for '%s': '%s', got '%s'.\n" \ "${var}" "${!exp}" "${!var}" return 1 } @@ -257,21 +308,21 @@ check_expected_one() # $@: all var names to check check_expected() { - local ret=0 + local rc=0 local var for var in "${@}" do - check_expected_one "${var}" "${ret}" || ret=1 + check_expected_one "${var}" "${rc}" || rc=1 done - if [ ${ret} -eq 0 ] + if [ ${rc} -eq 0 ] then - stdbuf -o0 -e0 printf "[OK]\n" + test_pass return 0 fi - exit 1 + return 1 } verify_announce_event() @@ -317,21 +368,20 @@ test_announce() local type type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") - stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token \t\t" + print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token" if [ "$type" = "" ] then - stdbuf -o0 -e0 printf "[OK]\n" + test_pass else - stdbuf -o0 -e0 printf "[FAIL]\n\ttype defined: %s\n" "${type}" - exit 1 + test_fail "type defined: ${type}" fi # ADD_ADDR from the client to server machine reusing the subflow port :>"$server_evts" ip netns exec "$ns2"\ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\ - ns2eth1 > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id + ns2eth1 + print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \ "$client4_port" @@ -339,8 +389,8 @@ test_announce() # ADD_ADDR6 from the client to server machine reusing the subflow port :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann\ - dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id + dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 + print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\ "$client_addr_id" "$client6_port" "v6" @@ -349,8 +399,8 @@ test_announce() :>"$server_evts" client_addr_id=$((client_addr_id+1)) ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ - $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id + $client_addr_id dev ns2eth1 port $new4_port + print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port" sleep 0.5 verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\ "$client_addr_id" "$new4_port" @@ -360,8 +410,8 @@ test_announce() # ADD_ADDR from the server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ - $server_addr_id dev ns1eth2 > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$app4_port" @@ -369,8 +419,8 @@ test_announce() # ADD_ADDR6 from the server to client machine reusing the subflow port :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ - $server_addr_id dev ns1eth2 > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\ "$server_addr_id" "$app6_port" "v6" @@ -379,8 +429,8 @@ test_announce() :>"$client_evts" server_addr_id=$((server_addr_id+1)) ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ - $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id + $server_addr_id dev ns1eth2 port $new4_port + print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\ "$server_addr_id" "$new4_port" @@ -414,37 +464,34 @@ test_remove() local invalid_token=$(( client4_token - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\ $client_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token \t"\ - $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" local type type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then - stdbuf -o0 -e0 printf "[OK]\n" + test_pass else - stdbuf -o0 -e0 printf "[FAIL]\n" + test_fail fi # RM_ADDR using an invalid addr id should result in no action local invalid_id=$(( client_addr_id + 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id \t"\ - $invalid_id + print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") if [ "$type" = "" ] then - stdbuf -o0 -e0 printf "[OK]\n" + test_pass else - stdbuf -o0 -e0 printf "[FAIL]\n" + test_fail fi # RM_ADDR from the client to server machine :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ - $client_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ - $client_addr_id + $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" @@ -452,18 +499,16 @@ test_remove() :>"$server_evts" client_addr_id=$(( client_addr_id - 1 )) ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ - $client_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1 \t"\ - $client_addr_id + $client_addr_id + print_test "RM_ADDR id:${client_addr_id} ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id" # RM_ADDR6 from the client to server machine :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\ - $client_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1 \t"\ - $client_addr_id + $client_addr_id + print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1" sleep 0.5 verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id" @@ -472,9 +517,8 @@ test_remove() # RM_ADDR from the server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ - $server_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t"\ - $server_addr_id + $server_addr_id + print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" @@ -482,16 +526,16 @@ test_remove() :>"$client_evts" server_addr_id=$(( server_addr_id - 1 )) ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\ - $server_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2 \t" $server_addr_id + $server_addr_id + print_test "RM_ADDR id:${server_addr_id} ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id" # RM_ADDR6 from the server to client machine :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\ - $server_addr_id > /dev/null 2>&1 - stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2 \t" $server_addr_id + $server_addr_id + print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2" sleep 0.5 verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id" } @@ -518,25 +562,24 @@ verify_subflow_events() local dport local locid local remid + local info + + info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then if [ "$e_family" = "$AF_INET6" ] then - stdbuf -o0 -e0 printf "CREATE_SUBFLOW6 %s (%s) => %s (%s) "\ - "$e_saddr" "$e_from" "$e_daddr" "$e_to" + print_test "CREATE_SUBFLOW6 ${info}" else - stdbuf -o0 -e0 printf "CREATE_SUBFLOW %s (%s) => %s (%s) \t"\ - "$e_saddr" "$e_from" "$e_daddr" "$e_to" + print_test "CREATE_SUBFLOW ${info}" fi else if [ "$e_family" = "$AF_INET6" ] then - stdbuf -o0 -e0 printf "DESTROY_SUBFLOW6 %s (%s) => %s (%s) "\ - "$e_saddr" "$e_from" "$e_daddr" "$e_to" + print_test "DESTROY_SUBFLOW6 ${info}" else - stdbuf -o0 -e0 printf "DESTROY_SUBFLOW %s (%s) => %s (%s) \t"\ - "$e_saddr" "$e_from" "$e_daddr" "$e_to" + print_test "DESTROY_SUBFLOW ${info}" fi fi @@ -567,18 +610,18 @@ test_subflows() # Attempt to add a listener at 10.0.2.2:<subflow-port> ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ - "$client4_port" > /dev/null 2>&1 & + "$client4_port" & local listener_pid=$! # ADD_ADDR from client to server machine reusing the subflow port ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ - $client_addr_id > /dev/null 2>&1 + $client_addr_id sleep 0.5 # CREATE_SUBFLOW from server to client machine :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ - rport "$client4_port" token "$server4_token" > /dev/null 2>&1 + rport "$client4_port" token "$server4_token" sleep 0.5 verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" @@ -592,31 +635,31 @@ test_subflows() # DESTROY_SUBFLOW from server to client machine :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ - "$client4_port" token "$server4_token" > /dev/null 2>&1 + "$client4_port" token "$server4_token" sleep 0.5 verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ - "$client4_token" > /dev/null 2>&1 + "$client4_token" sleep 0.5 # Attempt to add a listener at dead:beef:2::2:<subflow-port> ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\ - "$client6_port" > /dev/null 2>&1 & + "$client6_port" & listener_pid=$! # ADD_ADDR6 from client to server machine reusing the subflow port :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\ - $client_addr_id > /dev/null 2>&1 + $client_addr_id sleep 0.5 # CREATE_SUBFLOW6 from server to client machine :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\ - dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 + dead:beef:2::2 rport "$client6_port" token "$server6_token" sleep 0.5 verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ @@ -630,7 +673,7 @@ test_subflows() # DESTROY_SUBFLOW6 from server to client machine :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\ - dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1 + dead:beef:2::2 rport "$client6_port" token "$server6_token" sleep 0.5 verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\ @@ -638,24 +681,24 @@ test_subflows() # RM_ADDR from client to server machine ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ - "$client6_token" > /dev/null 2>&1 + "$client6_token" sleep 0.5 # Attempt to add a listener at 10.0.2.2:<new-port> ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\ - $new4_port > /dev/null 2>&1 & + $new4_port & listener_pid=$! # ADD_ADDR from client to server machine using a new port :>"$server_evts" ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\ - $client_addr_id port $new4_port > /dev/null 2>&1 + $client_addr_id port $new4_port sleep 0.5 # CREATE_SUBFLOW from server to client machine :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\ - $new4_port token "$server4_token" > /dev/null 2>&1 + $new4_port token "$server4_token" sleep 0.5 verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\ "10.0.2.1" "10.0.2.2" "$new4_port" "23"\ @@ -669,32 +712,32 @@ test_subflows() # DESTROY_SUBFLOW from server to client machine :>"$server_evts" ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\ - $new4_port token "$server4_token" > /dev/null 2>&1 + $new4_port token "$server4_token" sleep 0.5 verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\ "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2" # RM_ADDR from client to server machine ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\ - "$client4_token" > /dev/null 2>&1 + "$client4_token" # Capture events on the network namespace running the client :>"$client_evts" # Attempt to add a listener at 10.0.2.1:<subflow-port> ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ - $app4_port > /dev/null 2>&1 & + $app4_port & listener_pid=$! # ADD_ADDR from server to client machine reusing the subflow port ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ - $server_addr_id > /dev/null 2>&1 + $server_addr_id sleep 0.5 # CREATE_SUBFLOW from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ - $app4_port token "$client4_token" > /dev/null 2>&1 + $app4_port token "$client4_token" sleep 0.5 verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" @@ -707,31 +750,31 @@ test_subflows() # DESTROY_SUBFLOW from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ - $app4_port token "$client4_token" > /dev/null 2>&1 + $app4_port token "$client4_token" sleep 0.5 verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ - "$server4_token" > /dev/null 2>&1 + "$server4_token" sleep 0.5 # Attempt to add a listener at dead:beef:2::1:<subflow-port> ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\ - $app6_port > /dev/null 2>&1 & + $app6_port & listener_pid=$! # ADD_ADDR6 from server to client machine reusing the subflow port :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\ - $server_addr_id > /dev/null 2>&1 + $server_addr_id sleep 0.5 # CREATE_SUBFLOW6 from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\ - dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 + dead:beef:2::1 rport $app6_port token "$client6_token" sleep 0.5 verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ "$AF_INET6" "dead:beef:2::2"\ @@ -746,31 +789,31 @@ test_subflows() # DESTROY_SUBFLOW6 from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\ - dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1 + dead:beef:2::1 rport $app6_port token "$client6_token" sleep 0.5 verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\ "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR6 from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ - "$server6_token" > /dev/null 2>&1 + "$server6_token" sleep 0.5 # Attempt to add a listener at 10.0.2.1:<new-port> ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ - $new4_port > /dev/null 2>&1 & + $new4_port & listener_pid=$! # ADD_ADDR from server to client machine using a new port :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\ - $server_addr_id port $new4_port > /dev/null 2>&1 + $server_addr_id port $new4_port sleep 0.5 # CREATE_SUBFLOW from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ - $new4_port token "$client4_token" > /dev/null 2>&1 + $new4_port token "$client4_token" sleep 0.5 verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\ "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" @@ -783,14 +826,14 @@ test_subflows() # DESTROY_SUBFLOW from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ - $new4_port token "$client4_token" > /dev/null 2>&1 + $new4_port token "$client4_token" sleep 0.5 verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\ "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # RM_ADDR from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ - "$server4_token" > /dev/null 2>&1 + "$server4_token" } test_subflows_v4_v6_mix() @@ -799,15 +842,15 @@ test_subflows_v4_v6_mix() # Attempt to add a listener at 10.0.2.1:<subflow-port> ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\ - $app6_port > /dev/null 2>&1 & + $app6_port & local listener_pid=$! # ADD_ADDR4 from server to client machine reusing the subflow port on # the established v6 connection :>"$client_evts" ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\ - $server_addr_id dev ns1eth2 > /dev/null 2>&1 - stdbuf -o0 -e0 printf "ADD_ADDR4 id:%d 10.0.2.1 (ns1) => ns2, reuse port\t\t" $server_addr_id + $server_addr_id dev ns1eth2 + print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port" sleep 0.5 verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\ "$server_addr_id" "$app6_port" @@ -815,7 +858,7 @@ test_subflows_v4_v6_mix() # CREATE_SUBFLOW from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\ - $app6_port token "$client6_token" > /dev/null 2>&1 + $app6_port token "$client6_token" sleep 0.5 verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\ "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ @@ -829,7 +872,7 @@ test_subflows_v4_v6_mix() # DESTROY_SUBFLOW from client to server machine :>"$client_evts" ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\ - $app6_port token "$client6_token" > /dev/null 2>&1 + $app6_port token "$client6_token" sleep 0.5 verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \ "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\ @@ -837,7 +880,7 @@ test_subflows_v4_v6_mix() # RM_ADDR from server to client machine ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\ - "$server6_token" > /dev/null 2>&1 + "$server6_token" sleep 0.5 } @@ -848,29 +891,27 @@ test_prio() local count # Send MP_PRIO signal from client to server machine - ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port" + ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port" sleep 0.5 # Check TX - stdbuf -o0 -e0 printf "MP_PRIO TX \t" + print_test "MP_PRIO TX" count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') [ -z "$count" ] && count=0 if [ $count != 1 ]; then - stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}" - exit 1 + test_fail "Count != 1: ${count}" else - stdbuf -o0 -e0 printf "[OK]\n" + test_pass fi # Check RX - stdbuf -o0 -e0 printf "MP_PRIO RX \t" + print_test "MP_PRIO RX" count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') [ -z "$count" ] && count=0 if [ $count != 1 ]; then - stdbuf -o0 -e0 printf "[FAIL]\n\tCount != 1: %d\n" "${count}" - exit 1 + test_fail "Count != 1: ${count}" else - stdbuf -o0 -e0 printf "[OK]\n" + test_pass fi } @@ -887,11 +928,9 @@ verify_listener_events() local sport if [ $e_type = $LISTENER_CREATED ]; then - stdbuf -o0 -e0 printf "CREATE_LISTENER %s:%s\t\t\t\t\t"\ - $e_saddr $e_sport + print_test "CREATE_LISTENER $e_saddr:$e_sport" elif [ $e_type = $LISTENER_CLOSED ]; then - stdbuf -o0 -e0 printf "CLOSE_LISTENER %s:%s\t\t\t\t\t"\ - $e_saddr $e_sport + print_test "CLOSE_LISTENER $e_saddr:$e_sport" fi type=$(grep "type:$e_type," $evt | @@ -916,7 +955,8 @@ test_listener() print_title "Listener tests" if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then - stdbuf -o0 -e0 printf "LISTENER events \t[SKIP] Not supported\n" + print_test "LISTENER events" + test_skip return fi @@ -925,7 +965,7 @@ test_listener() # Attempt to add a listener at 10.0.2.2:<subflow-port> ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\ - $client4_port > /dev/null 2>&1 & + $client4_port & local listener_pid=$! sleep 0.5 @@ -933,12 +973,12 @@ test_listener() # ADD_ADDR from client to server machine reusing the subflow port ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\ - $client_addr_id > /dev/null 2>&1 + $client_addr_id sleep 0.5 # CREATE_SUBFLOW from server to client machine ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\ - rport $client4_port token $server4_token > /dev/null 2>&1 + rport $client4_port token $server4_token sleep 0.5 # Delete the listener from the client ns, if one was created @@ -959,4 +999,5 @@ test_subflows_v4_v6_mix test_prio test_listener -exit 0 +mptcp_lib_result_print_all_tap +exit ${ret} diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index ee9a72982705..39a0e01f8554 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -76,7 +76,9 @@ struct sock_args { has_grp:1, has_expected_laddr:1, has_expected_raddr:1, - bind_test_only:1; + bind_test_only:1, + client_dontroute:1, + server_dontroute:1; unsigned short port; @@ -611,6 +613,18 @@ static int set_dsfield(int sd, int version, int dsfield) return 0; } +static int set_dontroute(int sd) +{ + unsigned int one = 1; + + if (setsockopt(sd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) { + log_err_errno("setsockopt(SO_DONTROUTE)"); + return -1; + } + + return 0; +} + static int str_to_uint(const char *str, int min, int max, unsigned int *value) { int number; @@ -1351,6 +1365,14 @@ static int msock_init(struct sock_args *args, int server) if (set_dsfield(sd, AF_INET, args->dsfield) != 0) goto out_err; + if (server) { + if (args->server_dontroute && set_dontroute(sd) != 0) + goto out_err; + } else { + if (args->client_dontroute && set_dontroute(sd) != 0) + goto out_err; + } + if (args->dev && bind_to_device(sd, args->dev) != 0) goto out_err; else if (args->use_setsockopt && @@ -1482,6 +1504,9 @@ static int lsock_init(struct sock_args *args) if (set_dsfield(sd, args->version, args->dsfield) != 0) goto err; + if (args->server_dontroute && set_dontroute(sd) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1698,6 +1723,9 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args) if (set_dsfield(sd, args->version, args->dsfield) != 0) goto err; + if (args->client_dontroute && set_dontroute(sd) != 0) + goto err; + if (args->dev && bind_to_device(sd, args->dev) != 0) goto err; else if (args->use_setsockopt && @@ -1905,10 +1933,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) #define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf" #define OPT_FORCE_BIND_KEY_IFINDEX 1001 #define OPT_NO_BIND_KEY_IFINDEX 1002 +#define OPT_CLIENT_DONTROUTE 1003 +#define OPT_SERVER_DONTROUTE 1004 static struct option long_opts[] = { {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX}, {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX}, + {"client-dontroute", 0, 0, OPT_CLIENT_DONTROUTE}, + {"server-dontroute", 0, 0, OPT_SERVER_DONTROUTE}, {0, 0, 0, 0} }; @@ -1954,6 +1986,12 @@ static void print_usage(char *prog) " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n" " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n" " (default: only if -I is passed)\n" + " --client-dontroute: don't use gateways for client socket: send\n" + " packets only if destination is on link (see\n" + " SO_DONTROUTE in socket(7))\n" + " --server-dontroute: don't use gateways for server socket: send\n" + " packets only if destination is on link (see\n" + " SO_DONTROUTE in socket(7))\n" "\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" @@ -2076,6 +2114,12 @@ int main(int argc, char *argv[]) case OPT_NO_BIND_KEY_IFINDEX: args.bind_key_ifindex = -1; break; + case OPT_CLIENT_DONTROUTE: + args.client_dontroute = 1; + break; + case OPT_SERVER_DONTROUTE: + args.server_dontroute = 1; + break; case 'X': args.client_pw = optarg; break; diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 3117a4be0cd0..9c2012d70b08 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -11,8 +11,13 @@ VERBOSE=0 TRACING=0 tests=" + arp_ping eth-arp: Basic arp ping between two NS + ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct + connect_v4 ip4-xon: Basic ipv4 ping between two NS + nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT netlink_checks ovsnl: validate netlink attrs and settings - upcall_interfaces ovs: test the upcall interfaces" + upcall_interfaces ovs: test the upcall interfaces + drop_reason drop: test drop reasons are emitted" info() { [ $VERBOSE = 0 ] || echo $* @@ -127,6 +132,35 @@ ovs_add_netns_and_veths () { return 0 } +ovs_add_flow () { + info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4" + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4" + if [ $? -ne 0 ]; then + echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log + return 1 + fi + return 0 +} + +ovs_drop_record_and_run () { + local sbx=$1 + shift + + perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \ + >> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr + return $? +} + +ovs_drop_reason_count() +{ + local reason=$1 + + local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace` + local pattern="skb:kfree_skb:.*reason: $reason" + + return `echo "$perf_output" | grep "$pattern" | wc -l` +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." @@ -141,6 +175,285 @@ usage() { exit 1 } +# drop_reason test +# - drop packets and verify the right drop reason is reported +test_drop_reason() { + which perf >/dev/null 2>&1 || return $ksft_skip + + sbx_add "test_drop_reason" || return $? + + ovs_add_dp "test_drop_reason" dropreason || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + # Setup client namespace + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + + # Setup server namespace + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Allow ARP + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_drop_reason" dropreason \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + + # Allow client ICMP traffic but drop return path + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2' + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' + + ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 + ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + if [[ "$?" -ne "2" ]]; then + info "Did not detect expected drops: $?" + return 1 + fi + + # Drop UDP 6000 traffic with an explicit action and an error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \ + 'drop(42)' + # Drop UDP 7000 traffic with an explicit action with no error code. + ovs_add_flow "test_drop_reason" dropreason \ + "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \ + 'drop(0)' + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 + ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit error drops: $?" + return 1 + fi + + ovs_drop_record_and_run \ + "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 + ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + if [[ "$?" -ne "1" ]]; then + info "Did not detect expected explicit drops: $?" + return 1 + fi + + return 0 +} + +# arp_ping test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - send ARP ping between two ns +test_arp_ping () { + + which arping >/dev/null 2>&1 || return $ksft_skip + + sbx_add "test_arp_ping" || return $? + + ovs_add_dp "test_arp_ping" arpping || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_arp_ping" "arpping" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + # Setup client namespace + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + HW_CLIENT=`ip netns exec client ip link show dev c1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'` + info "Client hwaddr: $HW_CLIENT" + + # Setup server namespace + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + HW_SERVER=`ip netns exec server ip link show dev s1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'` + info "Server hwaddr: $HW_SERVER" + + ovs_add_flow "test_arp_ping" arpping \ + "in_port(1),eth(),eth_type(0x0806),arp(sip=172.31.110.10,tip=172.31.110.20,sha=$HW_CLIENT,tha=ff:ff:ff:ff:ff:ff)" '2' || return 1 + ovs_add_flow "test_arp_ping" arpping \ + "in_port(2),eth(),eth_type(0x0806),arp()" '1' || return 1 + + ovs_sbx "test_arp_ping" ip netns exec client arping -I c1 172.31.110.20 -c 1 || return 1 + + return 0 +} + +# ct_connect_v4 test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - use ICMP to ping in each direction +# - only allow CT state stuff to pass through new in c -> s +test_ct_connect_v4 () { + + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_ct_connect_v4" || return $? + + ovs_add_dp "test_ct_connect_v4" ct4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_ct_connect_v4" "ct4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Add forwarding for ARP and ip packets - completely wildcarded + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'ct_state(-trk),eth(),eth_type(0x0800),ipv4()' \ + 'ct(commit),recirc(0x1)' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+new),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \ + '2' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+est),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \ + '2' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+est),in_port(2),eth(),eth_type(0x0800),ipv4(dst=172.31.110.10)' \ + '1' || return 1 + ovs_add_flow "test_ct_connect_v4" ct4 \ + 'recirc_id(0x1),ct_state(+trk+inv),eth(),eth_type(0x0800),ipv4()' 'drop' || \ + return 1 + + # do a ping + ovs_sbx "test_ct_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1 + + # create an echo server in 'server' + echo "server" | \ + ovs_netns_spawn_daemon "test_ct_connect_v4" "server" \ + nc -lvnp 4443 + ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.20 4443 || return 1 + + # Now test in the other direction (should fail) + echo "client" | \ + ovs_netns_spawn_daemon "test_ct_connect_v4" "client" \ + nc -lvnp 4443 + ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443 + if [ $? == 0 ]; then + info "ct connect to client was successful" + return 1 + fi + + info "done..." + return 0 +} + +# connect_v4 test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - use ICMP to ping in each direction +test_connect_v4 () { + + sbx_add "test_connect_v4" || return $? + + ovs_add_dp "test_connect_v4" cv4 || return 1 + + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_connect_v4" "cv4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + # Add forwarding for ARP and ip packets - completely wildcarded + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' '2' || return 1 + ovs_add_flow "test_connect_v4" cv4 \ + 'in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20)' '1' || return 1 + + # do a ping + ovs_sbx "test_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1 + + info "done..." + return 0 +} + +# nat_connect_v4 test +# - client has 1500 byte MTU +# - server has 1500 byte MTU +# - use ICMP to ping in each direction +# - only allow CT state stuff to pass through new in c -> s +test_nat_connect_v4 () { + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_nat_connect_v4" || return $? + + ovs_add_dp "test_nat_connect_v4" nat4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_nat_connect_v4" "nat4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + ip netns exec client ip route add default via 172.31.110.20 + + ovs_add_flow "test_nat_connect_v4" nat4 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "test_nat_connect_v4" nat4 \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "test_nat_connect_v4" nat4 \ + "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20)" \ + "ct(commit,nat(dst=172.31.110.20)),recirc(0x1)" + ovs_add_flow "test_nat_connect_v4" nat4 \ + "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "ct(commit,nat),recirc(0x2)" + + ovs_add_flow "test_nat_connect_v4" nat4 \ + "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" "2" + ovs_add_flow "test_nat_connect_v4" nat4 \ + "recirc_id(0x2),ct_state(+trk-inv),in_port(2),eth(),eth_type(0x0800),ipv4()" "1" + + # do a ping + ovs_sbx "test_nat_connect_v4" ip netns exec client ping 192.168.0.20 -c 3 || return 1 + + # create an echo server in 'server' + echo "server" | \ + ovs_netns_spawn_daemon "test_nat_connect_v4" "server" \ + nc -lvnp 4443 + ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 192.168.0.20 4443 || return 1 + + # Now test in the other direction (should fail) + echo "client" | \ + ovs_netns_spawn_daemon "test_nat_connect_v4" "client" \ + nc -lvnp 4443 + ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443 + if [ $? == 0 ]; then + info "connect to client was successful" + return 1 + fi + + info "done..." + return 0 +} + # netlink_validation # - Create a dp # - check no warning with "old version" simulation @@ -170,6 +483,16 @@ test_netlink_checks () { wc -l) == 2 ] || \ return 1 + ERR_MSG="Flow actions may not be safe on all matching packets" + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ + &> /dev/null && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - error not generated" + return 1 + fi return 0 } diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 1c8b36bc15d4..912dc8c49085 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -9,9 +9,12 @@ import errno import ipaddress import logging import multiprocessing +import re import struct import sys import time +import types +import uuid try: from pyroute2 import NDB @@ -59,24 +62,207 @@ def macstr(mac): return outstr -def convert_mac(mac_str, mask=False): - if mac_str is None or mac_str == "": - mac_str = "00:00:00:00:00:00" - if mask is True and mac_str != "00:00:00:00:00:00": - mac_str = "FF:FF:FF:FF:FF:FF" - mac_split = mac_str.split(":") - ret = bytearray([int(i, 16) for i in mac_split]) - return bytes(ret) +def strcspn(str1, str2): + tot = 0 + for char in str1: + if str2.find(char) != -1: + return tot + tot += 1 + return tot -def convert_ipv4(ip, mask=False): - if ip is None: - ip = 0 - if mask is True: - if ip != 0: - ip = int(ipaddress.IPv4Address(ip)) & 0xFFFFFFFF +def strspn(str1, str2): + tot = 0 + for char in str1: + if str2.find(char) == -1: + return tot + tot += 1 + return tot - return int(ipaddress.IPv4Address(ip)) + +def intparse(statestr, defmask="0xffffffff"): + totalparse = strspn(statestr, "0123456789abcdefABCDEFx/") + # scan until "/" + count = strspn(statestr, "x0123456789abcdefABCDEF") + + firstnum = statestr[:count] + if firstnum[-1] == "/": + firstnum = firstnum[:-1] + k = int(firstnum, 0) + + m = None + if defmask is not None: + secondnum = defmask + if statestr[count] == "/": + secondnum = statestr[count + 1 :] # this is wrong... + m = int(secondnum, 0) + + return statestr[totalparse + 1 :], k, m + + +def parse_flags(flag_str, flag_vals): + bitResult = 0 + maskResult = 0 + + if len(flag_str) == 0: + return flag_str, bitResult, maskResult + + if flag_str[0].isdigit(): + idx = 0 + while flag_str[idx].isdigit() or flag_str[idx] == "x": + idx += 1 + digits = flag_str[:idx] + flag_str = flag_str[idx:] + + bitResult = int(digits, 0) + maskResult = int(digits, 0) + + while len(flag_str) > 0 and (flag_str[0] == "+" or flag_str[0] == "-"): + if flag_str[0] == "+": + setFlag = True + elif flag_str[0] == "-": + setFlag = False + + flag_str = flag_str[1:] + + flag_len = 0 + while ( + flag_str[flag_len] != "+" + and flag_str[flag_len] != "-" + and flag_str[flag_len] != "," + and flag_str[flag_len] != ")" + ): + flag_len += 1 + + flag = flag_str[0:flag_len] + + if flag in flag_vals: + if maskResult & flag_vals[flag]: + raise KeyError( + "Flag %s set once, cannot be set in multiples" % flag + ) + + if setFlag: + bitResult |= flag_vals[flag] + + maskResult |= flag_vals[flag] + else: + raise KeyError("Missing flag value: %s" % flag) + + flag_str = flag_str[flag_len:] + + return flag_str, bitResult, maskResult + + +def parse_ct_state(statestr): + ct_flags = { + "new": 1 << 0, + "est": 1 << 1, + "rel": 1 << 2, + "rpl": 1 << 3, + "inv": 1 << 4, + "trk": 1 << 5, + "snat": 1 << 6, + "dnat": 1 << 7, + } + + return parse_flags(statestr, ct_flags) + + +def convert_mac(data): + def to_bytes(mac): + mac_split = mac.split(":") + ret = bytearray([int(i, 16) for i in mac_split]) + return bytes(ret) + + mac_str, _, mask_str = data.partition('/') + + if not mac_str: + mac_str = mask_str = "00:00:00:00:00:00" + elif not mask_str: + mask_str = "FF:FF:FF:FF:FF:FF" + + return to_bytes(mac_str), to_bytes(mask_str) + +def convert_ipv4(data): + ip, _, mask = data.partition('/') + + if not ip: + ip = mask = 0 + elif not mask: + mask = 0xFFFFFFFF + elif mask.isdigit(): + mask = (0xFFFFFFFF << (32 - int(mask))) & 0xFFFFFFFF + + return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask)) + +def convert_int(size): + def convert_int_sized(data): + value, _, mask = data.partition('/') + + if not value: + return 0, 0 + elif not mask: + return int(value, 0), pow(2, size) - 1 + else: + return int(value, 0), int(mask, 0) + + return convert_int_sized + +def parse_starts_block(block_str, scanstr, returnskipped, scanregex=False): + if scanregex: + m = re.search(scanstr, block_str) + if m is None: + if returnskipped: + return block_str + return False + if returnskipped: + block_str = block_str[len(m.group(0)) :] + return block_str + return True + + if block_str.startswith(scanstr): + if returnskipped: + block_str = block_str[len(scanstr) :] + else: + return True + + if returnskipped: + return block_str + + return False + + +def parse_extract_field( + block_str, fieldstr, scanfmt, convert, masked=False, defval=None +): + if fieldstr and not block_str.startswith(fieldstr): + return block_str, defval + + if fieldstr: + str_skiplen = len(fieldstr) + str_skipped = block_str[str_skiplen:] + if str_skiplen == 0: + return str_skipped, defval + else: + str_skiplen = 0 + str_skipped = block_str + + m = re.search(scanfmt, str_skipped) + if m is None: + raise ValueError("Bad fmt string") + + data = m.group(0) + if convert: + data = convert(m.group(0)) + + str_skipped = str_skipped[len(m.group(0)) :] + if masked: + if str_skipped[0] == "/": + raise ValueError("Masking support TBD...") + + str_skipped = str_skipped[strspn(str_skipped, ", ") :] + return str_skipped, data class ovs_dp_msg(genlmsg): @@ -115,6 +301,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), + ("OVS_ACTION_ATTR_DROP", "uint32"), ) class ctact(nla): @@ -261,6 +448,8 @@ class ovsactions(nla): print_str += "recirc(0x%x)" % int(self.get_attr(field[0])) elif field[0] == "OVS_ACTION_ATTR_TRUNC": print_str += "trunc(%d)" % int(self.get_attr(field[0])) + elif field[0] == "OVS_ACTION_ATTR_DROP": + print_str += "drop(%d)" % int(self.get_attr(field[0])) elif field[1] == "flag": if field[0] == "OVS_ACTION_ATTR_CT_CLEAR": print_str += "ct_clear" @@ -278,6 +467,153 @@ class ovsactions(nla): return print_str + def parse(self, actstr): + while len(actstr) != 0: + parsed = False + if actstr.startswith("drop"): + # If no reason is provided, the implicit drop is used (i.e no + # action). If some reason is given, an explicit action is used. + actstr, reason = parse_extract_field( + actstr, + "drop(", + "([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + if reason is not None: + self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) + parsed = True + else: + return + + elif parse_starts_block(actstr, "^(\d+)", False, True): + actstr, output = parse_extract_field( + actstr, None, "(\d+)", lambda x: int(x), False, "0" + ) + self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output]) + parsed = True + elif parse_starts_block(actstr, "recirc(", False): + actstr, recircid = parse_extract_field( + actstr, + "recirc(", + "([0-9a-fA-Fx]+)", + lambda x: int(x, 0), + False, + 0, + ) + self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid]) + parsed = True + + parse_flat_map = ( + ("ct_clear", "OVS_ACTION_ATTR_CT_CLEAR"), + ("pop_vlan", "OVS_ACTION_ATTR_POP_VLAN"), + ("pop_eth", "OVS_ACTION_ATTR_POP_ETH"), + ("pop_nsh", "OVS_ACTION_ATTR_POP_NSH"), + ) + + for flat_act in parse_flat_map: + if parse_starts_block(actstr, flat_act[0], False): + actstr += len(flat_act[0]) + self["attrs"].append([flat_act[1]]) + actstr = actstr[strspn(actstr, ", ") :] + parsed = True + + if parse_starts_block(actstr, "ct(", False): + actstr = actstr[len("ct(") :] + ctact = ovsactions.ctact() + + for scan in ( + ("commit", "OVS_CT_ATTR_COMMIT", None), + ("force_commit", "OVS_CT_ATTR_FORCE_COMMIT", None), + ("zone", "OVS_CT_ATTR_ZONE", int), + ("mark", "OVS_CT_ATTR_MARK", int), + ("helper", "OVS_CT_ATTR_HELPER", lambda x, y: str(x)), + ("timeout", "OVS_CT_ATTR_TIMEOUT", lambda x, y: str(x)), + ): + if actstr.startswith(scan[0]): + actstr = actstr[len(scan[0]) :] + if scan[2] is not None: + if actstr[0] != "=": + raise ValueError("Invalid ct attr") + actstr = actstr[1:] + pos = strcspn(actstr, ",)") + datum = scan[2](actstr[:pos], 0) + ctact["attrs"].append([scan[1], datum]) + actstr = actstr[pos:] + else: + ctact["attrs"].append([scan[1], None]) + actstr = actstr[strspn(actstr, ", ") :] + # it seems strange to put this here, but nat() is a complex + # sub-action and this lets it sit anywhere in the ct() action + if actstr.startswith("nat"): + actstr = actstr[3:] + natact = ovsactions.ctact.natattr() + + if actstr.startswith("("): + t = None + actstr = actstr[1:] + if actstr.startswith("src"): + t = "OVS_NAT_ATTR_SRC" + actstr = actstr[3:] + elif actstr.startswith("dst"): + t = "OVS_NAT_ATTR_DST" + actstr = actstr[3:] + + actstr, ip_block_min = parse_extract_field( + actstr, "=", "([0-9a-fA-F\.]+)", str, False + ) + actstr, ip_block_max = parse_extract_field( + actstr, "-", "([0-9a-fA-F\.]+)", str, False + ) + + actstr, proto_min = parse_extract_field( + actstr, ":", "(\d+)", int, False + ) + actstr, proto_max = parse_extract_field( + actstr, "-", "(\d+)", int, False + ) + + if t is not None: + natact["attrs"].append([t, None]) + + if ip_block_min is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_IP_MIN", ip_block_min] + ) + if ip_block_max is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_IP_MAX", ip_block_max] + ) + if proto_min is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_PROTO_MIN", proto_min] + ) + if proto_max is not None: + natact["attrs"].append( + ["OVS_NAT_ATTR_PROTO_MAX", proto_max] + ) + + for natscan in ( + ("persistent", "OVS_NAT_ATTR_PERSISTENT"), + ("hash", "OVS_NAT_ATTR_PROTO_HASH"), + ("random", "OVS_NAT_ATTR_PROTO_RANDOM"), + ): + if actstr.startswith(natscan[0]): + actstr = actstr[len(natscan[0]) :] + natact["attrs"].append([natscan[1], None]) + actstr = actstr[strspn(actstr, ", ") :] + + ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact]) + actstr = actstr[strspn(actstr, ",) ") :] + + self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) + parsed = True + + actstr = actstr[strspn(actstr, "), ") :] + if not parsed: + raise ValueError("Action str: '%s' not supported" % actstr) + class ovskey(nla): nla_flags = NLA_F_NESTED @@ -324,8 +660,10 @@ class ovskey(nla): ) fields_map = ( - ("src", "src", "%d", lambda x: int(x) if x is not None else 0), - ("dst", "dst", "%d", lambda x: int(x) if x is not None else 0), + ("src", "src", "%d", lambda x: int(x) if x else 0, + convert_int(16)), + ("dst", "dst", "%d", lambda x: int(x) if x else 0, + convert_int(16)), ) def __init__( @@ -347,6 +685,49 @@ class ovskey(nla): init=init, ) + def parse(self, flowstr, typeInst): + if not flowstr.startswith(self.proto_str): + return None, None + + k = typeInst() + m = typeInst() + + flowstr = flowstr[len(self.proto_str) :] + if flowstr.startswith("("): + flowstr = flowstr[1:] + + keybits = b"" + maskbits = b"" + for f in self.fields_map: + if flowstr.startswith(f[1]): + # the following assumes that the field looks + # something like 'field.' where '.' is a + # character that we don't exactly care about. + flowstr = flowstr[len(f[1]) + 1 :] + splitchar = 0 + for c in flowstr: + if c == "," or c == ")": + break + splitchar += 1 + data = flowstr[:splitchar] + flowstr = flowstr[splitchar:] + else: + data = "" + + if len(f) > 4: + k[f[0]], m[f[0]] = f[4](data) + else: + k[f[0]] = f[3](data) + m[f[0]] = f[3](data) + + flowstr = flowstr[strspn(flowstr, ", ") :] + if len(flowstr) == 0: + return flowstr, k, m + + flowstr = flowstr[strspn(flowstr, "), ") :] + + return flowstr, k, m + def dpstr(self, masked=None, more=False): outstr = self.proto_str + "(" first = False @@ -441,10 +822,14 @@ class ovskey(nla): int, convert_ipv4, ), - ("proto", "proto", "%d", lambda x: int(x) if x is not None else 0), - ("tos", "tos", "%d", lambda x: int(x) if x is not None else 0), - ("ttl", "ttl", "%d", lambda x: int(x) if x is not None else 0), - ("frag", "frag", "%d", lambda x: int(x) if x is not None else 0), + ("proto", "proto", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ("tos", "tos", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ("ttl", "ttl", "%d", lambda x: int(x) if x else 0, + convert_int(8)), + ("frag", "frag", "%d", lambda x: int(x) if x else 0, + convert_int(8)), ) def __init__( @@ -580,8 +965,8 @@ class ovskey(nla): ) fields_map = ( - ("type", "type", "%d", int), - ("code", "code", "%d", int), + ("type", "type", "%d", lambda x: int(x) if x else 0), + ("code", "code", "%d", lambda x: int(x) if x else 0), ) def __init__( @@ -646,7 +1031,7 @@ class ovskey(nla): int, convert_ipv4, ), - ("op", "op", "%d", lambda x: int(x) if x is not None else 0), + ("op", "op", "%d", lambda x: int(x) if x else 0), ( "sha", "sha", @@ -810,6 +1195,81 @@ class ovskey(nla): class ovs_key_mpls(nla): fields = (("lse", ">I"),) + def parse(self, flowstr, mask=None): + for field in ( + ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), + ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse), + ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse), + ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse), + ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state), + ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse), + ("OVS_KEY_ATTR_CT_MARK", "ct_mark", intparse), + ("OVS_KEY_ATTR_IN_PORT", "in_port", intparse), + ( + "OVS_KEY_ATTR_ETHERNET", + "eth", + ovskey.ethaddr, + ), + ( + "OVS_KEY_ATTR_ETHERTYPE", + "eth_type", + lambda x: intparse(x, "0xffff"), + ), + ( + "OVS_KEY_ATTR_IPV4", + "ipv4", + ovskey.ovs_key_ipv4, + ), + ( + "OVS_KEY_ATTR_IPV6", + "ipv6", + ovskey.ovs_key_ipv6, + ), + ( + "OVS_KEY_ATTR_ARP", + "arp", + ovskey.ovs_key_arp, + ), + ( + "OVS_KEY_ATTR_TCP", + "tcp", + ovskey.ovs_key_tcp, + ), + ( + "OVS_KEY_ATTR_UDP", + "udp", + ovskey.ovs_key_udp, + ), + ( + "OVS_KEY_ATTR_ICMP", + "icmp", + ovskey.ovs_key_icmp, + ), + ( + "OVS_KEY_ATTR_TCP_FLAGS", + "tcp_flags", + lambda x: parse_flags(x, None), + ), + ): + fld = field[1] + "(" + if not flowstr.startswith(fld): + continue + + if not isinstance(field[2], types.FunctionType): + nk = field[2]() + flowstr, k, m = nk.parse(flowstr, field[2]) + else: + flowstr = flowstr[len(fld) :] + flowstr, k, m = field[2](flowstr) + + if m and mask is not None: + mask["attrs"].append([field[0], m]) + self["attrs"].append([field[0], k]) + + flowstr = flowstr[strspn(flowstr, "),") :] + + return flowstr + def dpstr(self, mask=None, more=False): print_str = "" @@ -1358,11 +1818,92 @@ class OvsFlow(GenericNetlinkSocket): return print_str + def parse(self, flowstr, actstr, dpidx=0): + OVS_UFID_F_OMIT_KEY = 1 << 0 + OVS_UFID_F_OMIT_MASK = 1 << 1 + OVS_UFID_F_OMIT_ACTIONS = 1 << 2 + + self["cmd"] = 0 + self["version"] = 0 + self["reserved"] = 0 + self["dpifindex"] = 0 + + if flowstr.startswith("ufid:"): + count = 5 + while flowstr[count] != ",": + count += 1 + ufidstr = flowstr[5:count] + flowstr = flowstr[count + 1 :] + else: + ufidstr = str(uuid.uuid4()) + uuidRawObj = uuid.UUID(ufidstr).fields + + self["attrs"].append( + [ + "OVS_FLOW_ATTR_UFID", + [ + uuidRawObj[0], + uuidRawObj[1] << 16 | uuidRawObj[2], + uuidRawObj[3] << 24 + | uuidRawObj[4] << 16 + | uuidRawObj[5] & (0xFF << 32) >> 32, + uuidRawObj[5] & (0xFFFFFFFF), + ], + ] + ) + self["attrs"].append( + [ + "OVS_FLOW_ATTR_UFID_FLAGS", + int( + OVS_UFID_F_OMIT_KEY + | OVS_UFID_F_OMIT_MASK + | OVS_UFID_F_OMIT_ACTIONS + ), + ] + ) + + k = ovskey() + m = ovskey() + k.parse(flowstr, m) + self["attrs"].append(["OVS_FLOW_ATTR_KEY", k]) + self["attrs"].append(["OVS_FLOW_ATTR_MASK", m]) + + a = ovsactions() + a.parse(actstr) + self["attrs"].append(["OVS_FLOW_ATTR_ACTIONS", a]) + def __init__(self): GenericNetlinkSocket.__init__(self) self.bind(OVS_FLOW_FAMILY, OvsFlow.ovs_flow_msg) + def add_flow(self, dpifindex, flowmsg): + """ + Send a new flow message to the kernel. + + dpifindex should be a valid datapath obtained by calling + into the OvsDatapath lookup + + flowmsg is a flow object obtained by calling a dpparse + """ + + flowmsg["cmd"] = OVS_FLOW_CMD_NEW + flowmsg["version"] = OVS_DATAPATH_VERSION + flowmsg["reserved"] = 0 + flowmsg["dpifindex"] = dpifindex + + try: + reply = self.nlm_request( + flowmsg, + msg_type=self.prid, + msg_flags=NLM_F_REQUEST | NLM_F_ACK, + ) + reply = reply[0] + except NetlinkError as ne: + print(flowmsg) + raise ne + return reply + def dump(self, dpifindex, flowspec=None): """ Returns a list of messages containing flows. @@ -1514,6 +2055,11 @@ def main(argv): dumpflcmd = subparsers.add_parser("dump-flows") dumpflcmd.add_argument("dumpdp", help="Datapath Name") + addflcmd = subparsers.add_parser("add-flow") + addflcmd.add_argument("flbr", help="Datapath name") + addflcmd.add_argument("flow", help="Flow specification") + addflcmd.add_argument("acts", help="Flow actions") + args = parser.parse_args() if args.verbose > 0: @@ -1589,6 +2135,14 @@ def main(argv): rep = ovsflow.dump(rep["dpifindex"]) for flow in rep: print(flow.dpstr(True if args.verbose > 0 else False)) + elif hasattr(args, "flbr"): + rep = ovsdp.info(args.flbr, 0) + if rep is None: + print("DP '%s' not found." % args.flbr) + return 1 + flow = OvsFlow.ovs_flow_msg() + flow.parse(args.flow, args.acts, rep["dpifindex"]) + ovsflow.add_flow(rep["dpifindex"], flow) return 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index dfe3d287f01d..f838dd370f6a 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -361,6 +361,7 @@ err_buf= tcpdump_pids= nettest_pids= socat_pids= +tmpoutfile= err() { err_buf="${err_buf}${1} @@ -951,6 +952,7 @@ cleanup() { ip link del veth_A-R1 2>/dev/null ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null + rm -f "$tmpoutfile" } mtu() { @@ -1328,6 +1330,39 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface" pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface" + + tmpoutfile=$(mktemp) + + # Flush Exceptions, retry with TCP + run_cmd ${ns_a} ip route flush cached ${dst} + run_cmd ${ns_b} ip route flush cached ${dst} + run_cmd ${ns_c} ip route flush cached ${dst} + + for target in "${ns_a}" "${ns_c}" ; do + if [ ${family} -eq 4 ]; then + TCPDST=TCP:${dst}:50000 + else + TCPDST="TCP:[${dst}]:50000" + fi + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile & + + sleep 1 + + dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + + size=$(du -sb $tmpoutfile) + size=${size%%/tmp/*} + + [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + done + + rm -f "$tmpoutfile" + + # Check that exceptions were created + pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface" + pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})" + check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface" } test_pmtu_ipv4_br_vxlan4_exception() { diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index faa884385c45..6e4fef560873 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -14,6 +14,8 @@ #include <arpa/inet.h> #include <unistd.h> +#include "kselftest.h" + #define DATA_LEN 100 #define DATA_CHAR 'a' #define DATA_CHAR_1 'b' @@ -63,7 +65,7 @@ static __maybe_unused void pair_udp_setfilter(int fd) struct sock_fprog bpf_prog; bpf_prog.filter = bpf_filter; - bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter); + bpf_prog.len = ARRAY_SIZE(bpf_filter); if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog, sizeof(bpf_prog))) { diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 383ac6fc037d..488f4964365e 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,6 +21,7 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec + kci_test_macsec_offload kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -643,6 +644,88 @@ kci_test_macsec() echo "PASS: macsec" } +kci_test_macsec_offload() +{ + sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ + sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ + probed=false + local ret=0 + + ip macsec help 2>&1 | grep -q "^Usage: ip macsec" + if [ $? -ne 0 ]; then + echo "SKIP: macsec: iproute2 too old" + return $ksft_skip + fi + + # setup netdevsim since dummydev doesn't have offload support + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + modprobe -q netdevsim + check_err $? + if [ $ret -ne 0 ]; then + echo "SKIP: macsec_offload can't load netdevsim" + return $ksft_skip + fi + probed=true + fi + + echo "0" > /sys/bus/netdevsim/new_device + while [ ! -d $sysfsnet ] ; do :; done + udevadm settle + dev=`ls $sysfsnet` + + ip link set $dev up + if [ ! -d $sysfsd ] ; then + echo "FAIL: macsec_offload can't create device $dev" + return 1 + fi + + ethtool -k $dev | grep -q 'macsec-hw-offload: on' + if [ $? -eq 1 ] ; then + echo "FAIL: macsec_offload netdevsim doesn't support MACsec offload" + return 1 + fi + + ip link add link $dev kci_macsec1 type macsec port 4 offload mac + check_err $? + + ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac + check_err $? + + ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac + check_err $? + + ip link add link $dev kci_macsec4 type macsec port 8 offload mac 2> /dev/null + check_fail $? + + msname=kci_macsec1 + + ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 + check_err $? + + ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" + check_err $? + + ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ + key 00 0123456789abcdef0123456789abcdef + check_err $? + + ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null + check_fail $? + + # clean up any leftovers + for msdev in kci_macsec{1,2,3,4} ; do + ip link del $msdev 2> /dev/null + done + echo 0 > /sys/bus/netdevsim/del_device + $probed && rmmod netdevsim + + if [ $ret -ne 0 ]; then + echo "FAIL: macsec_offload" + return 1 + fi + echo "PASS: macsec_offload" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -860,6 +943,7 @@ EOF fi # clean up any leftovers + echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim if [ $ret -ne 0 ]; then diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index 0e04f9fef986..a14818164102 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -159,7 +159,7 @@ void create_clients(struct __test_metadata *_metadata, /* Make sure SYN will be processed on the i-th CPU * and finally distributed to the i-th listener. */ - sched_setaffinity(0, sizeof(cpu_set), &cpu_set); + ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); ASSERT_EQ(ret, 0); for (j = 0; j < CLIENT_PER_SERVER; j++) { diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh new file mode 100755 index 000000000000..c79cb8ede17f --- /dev/null +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -0,0 +1,1213 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# author: Andrea Mayer <andrea.mayer@uniroma2.it> +# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it> +# +# This script is designed for testing the support of NEXT-C-SID flavor for SRv6 +# End.X behavior. +# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach +# [2] is assumed for the reader. +# +# The network topology used in the selftest is depicted hereafter, composed of +# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an +# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using +# the NEXT-C-SID flavor. The key components for such VPNs are: +# +# i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on +# traffic received by connected hosts, initiating the VPN tunnel; +# +# ii) The SRv6 End.X behavior [1] (Endpoint with L3 cross connect) is a +# variant of SRv6 End behavior. It advances the active SID in the SID +# List carried by the SRH and forwards the packet to an L3 adjacency; +# +# iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several +# SRv6 segments within a single 128-bit SID address, referred to as a +# Compressed SID (C-SID) container. In this way, the length of the SID +# List can be drastically reduced. +# The NEXT-C-SID is provided as a "flavor" of the SRv6 End.X behavior +# which advances the current C-SID (i.e. the Locator-Node Function defined +# in [2]) with the next one carried in the Argument, if available. +# When no more C-SIDs are available in the Argument, the SRv6 End.X +# behavior will apply the End.X function selecting the next SID in the SID +# List; +# +# iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and, +# thus, it terminates the VPN tunnel. Such a behavior is capable of +# handling, at the same time, both tunneled IPv4 and IPv6 traffic. +# +# [1] https://datatracker.ietf.org/doc/html/rfc8986 +# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression +# +# +# cafe::1 cafe::2 +# 10.0.0.1 10.0.0.2 +# +--------+ +--------+ +# | | | | +# | hs-1 | | hs-2 | +# | | | | +# +---+----+ +----+---+ +# cafe::/64 | | cafe::/64 +# 10.0.0.0/24 | | 10.0.0.0/24 +# +---+----+ +----+---+ +# | | fcf0:0:1:2::/64 | | +# | rt-1 +-------------------+ rt-2 | +# | | | | +# +---+----+ +----+---+ +# | . . | +# | fcf0:0:1:3::/64 . | +# | . . | +# | . . | +# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64 +# | . . | +# | . . | +# | fcf0:0:2:4::/64 . | +# | . . | +# +---+----+ +----+---+ +# | | | | +# | rt-4 +-------------------+ rt-3 | +# | | fcf0:0:3:4::/64 | | +# +---+----+ +----+---+ +# +# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in +# the selftest network. +# +# Local SID/C-SID table +# ===================== +# +# Each SRv6 router is configured with a Local SID/C-SID table in which +# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are +# configured in the Local SID/C-SIDs table as follows: +# +# Local SID/C-SID table for SRv6 router rt-x +# +-----------------------------------------------------------+ +# |fcff:x::d46 is associated with the non-compressed SRv6 | +# | End.DT46 behavior | +# +-----------------------------------------------------------+ +# |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor | +# | of SRv6 End.X behavior | +# +-----------------------------------------------------------+ +# |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46 | +# | behavior when NEXT-C-SID compression is turned on | +# +-----------------------------------------------------------+ +# +# The fcff::/16 prefix is reserved for implementing SRv6 services with regular +# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration +# of the IPv6 routing tables in the routers. +# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN +# services leveraging the NEXT-C-SID compression mechanism. Indeed, the +# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node +# Function is encoded with 16 bits. +# +# Incoming traffic classification and application of SRv6 Policies +# ================================================================ +# +# An SRv6 ingress router applies different SRv6 Policies to the traffic received +# from a connected host, considering the IPv4 or IPv6 destination address. +# SRv6 policy enforcement consists of encapsulating the received traffic into a +# new IPv6 packet with a given SID List contained in the SRH. +# When the SID List contains only one SID, the SRH could be omitted completely +# and that SID is stored directly in the IPv6 Destination Address (DA) (this is +# called "reduced" encapsulation). +# +# Test cases for NEXT-C-SID +# ========================= +# +# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID. +# +# In the single SID test case we have a number of segments that are all +# contained in a single Compressed SID (C-SID) container. Therefore the +# resulting SID List has only one SID. Using the reduced encapsulation format +# this will result in a packet with no SRH. +# +# In the double SID test case we have one segment carried in a Compressed SID +# (C-SID) container, followed by a regular (non compressed) SID. The resulting +# SID List has two segments and it is possible to test the advance to the next +# SID when all the C-SIDs in a C-SID container have been processed. Using the +# reduced encapsulation format this will result in a packet with an SRH +# containing 1 segment. +# +# For the single SID test case, we use the IPv6 addresses of hs-1 and hs-2, for +# the double SID test case, we use their IPv4 addresses. This is only done to +# simplify the test setup and avoid adding other hosts or multiple addresses on +# the same interface of a host. +# +# Traffic from hs-1 to hs-2 +# ------------------------- +# +# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1 +# which applies the SRv6 Policies as follows: +# +# i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0300:0200:d46:: +# ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46 +# +# ### i) single SID +# +# The router rt-1 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it +# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole +# C-SID container carrying several C-SIDs (e.g. 0300, 0200, etc). +# +# As the packet reaches the router rt-3, the enabled NEXT-C-SID SRv6 End.X +# behavior (associated with fcbb:0:0300::/48) is triggered. This behavior +# analyzes the IPv6 DA and checks whether the Argument of the C-SID container +# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is +# updated as follows: +# +# +-----------------------------------------------------------------+ +# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | +---------- Argument | +# | vvvvvvvvvv | +# | IPv6 DA fcbb:0:0300:0200:d46:: | +# | ^^^^ <-- shifting | +# | | | +# | Locator-Node Function | +# +-----------------------------------------------------------------+ +# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | +---------- Argument | +# | vvvvvv | +# | IPv6 DA fcbb:0:0200:d46:: | +# | ^^^^ | +# | | | +# | Locator-Node Function | +# +-----------------------------------------------------------------+ +# +# After having applied the enabled NEXT-C-SID SRv6 End.X behavior, the packet +# is sent to rt-4 node using the L3 adjacency address fcf0:0:3:4::4. +# +# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to +# its Local SID table and using the IPv6 DA fcbb:0:0200:d46:: . +# +# The router rt-2 is configured for decapsulating the inner IPv6 packet and, +# for this reason, it applies the SRv6 End.DT46 behavior on the received +# packet. It is worth noting that the SRv6 End.DT46 behavior does not require +# the presence of the SRH: it is fully capable to operate properly on +# IPv4/IPv6-in-IPv6 encapsulations. +# At the end of the decap operation, the packet is sent to the host hs-2. +# +# ### ii) double SID +# +# The router rt-1 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the +# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e. +# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an +# outer IPv6 header plus the SRH. +# +# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID +# SRv6 End.X behavior. +# +# +-----------------------------------------------------------------+ +# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | +---------- Argument | +# | vvvv (Argument is all filled with zeros) | +# | IPv6 DA fcbb:0:0300:: | +# | ^^^^ | +# | | | +# | Locator-Node Function | +# +-----------------------------------------------------------------+ +# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior | +# +-----------------------------------------------------------------+ +# | | +# | IPv6 DA fcff:2::d46 | +# | ^^^^^^^^^^^ | +# | | | +# | SID copied from the SID List contained in the SRH | +# +-----------------------------------------------------------------+ +# +# Since the Argument of the C-SID container is zero, the behavior can not +# update the Locator-Node function with the next C-SID carried in the Argument +# itself. Thus, the enabled NEXT-C-SID SRv6 End.X behavior operates as the +# traditional End.X behavior: it updates the IPv6 DA by copying the next +# available SID in the SID List carried by the SRH. Next, the packet is +# forwarded to the rt-4 node using the L3 adjacency fcf0:3:4::4 previously +# configured for this behavior. +# +# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to +# its Local SID table and using the IPv6 DA fcff:2::d46. +# +# Once the packet is received by rt-2, the router decapsulates the inner IPv4 +# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46) +# and sends it to the host hs-2. +# +# Traffic from hs-2 to hs-1 +# ------------------------- +# +# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2 +# which applies the SRv6 Policies as follows: +# +# i) IPv6 DA=cafe::1, SID List=fcbb:0:0400:0100:d46:: +# ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46 +# +# ### i) single SID +# +# The node hs-2 sends an IPv6 packet directed to node hs-1. The router rt-2 is +# directly connected to hs-2 and receives the packet. Rt-2 applies the +# H.Encap.Red behavior with policy i) described above. Since there is only one +# SID, the SRH header is omitted and the policy is inserted directly into the DA +# of IPv6 packet. +# +# The packet reaches the router rt-4 and the enabled NEXT-C-SID SRv6 End.X +# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior +# analyzes the IPv6 DA and checks whether the Argument of the C-SID container +# is zero or not. The Argument is *NOT* zero and the C-SID in the IPv6 DA is +# advanced. At this point, the current IPv6 DA is fcbb:0:0100:d46:: . +# The enabled NEXT-C-SID SRv6 End.X behavior is configured with the L3 adjacency +# fcf0:0:1:4::1, used to route traffic to the rt-1 node. +# +# The router rt-1 is configured for decapsulating the inner packet. It applies +# the SRv6 End.DT46 behavior on the received packet. Decapsulation does not +# require the presence of the SRH. At the end of the decap operation, the packet +# is sent to the host hs-1. +# +# ### ii) double SID +# +# The router rt-2 is configured to enforce the given Policy through the SRv6 +# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the +# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e. +# fcff:1::d46. Hence, the packet sent by hs-2 to hs-1 is encapsulated in an +# outer IPv6 header plus the SRH. +# +# As the packet reaches the node rt-3, the enabled NEXT-C-SID SRv6 End.X +# behavior bound to the SID fcbb:0:0300::/48 is triggered. +# Since the Argument of the C-SID container is zero, the behavior can not +# update the Locator-Node function with the next C-SID carried in the Argument +# itself. Thus, the enabled NEXT-C-SID SRv6 End-X behavior operates as the +# traditional End.X behavior: it updates the IPv6 DA by copying the next +# available SID in the SID List carried by the SRH. After that, the packet is +# forwarded to the rt-4 node using the L3 adjacency (fcf0:3:4::4) previously +# configured for this behavior. +# +# The node rt-4 performs a plain IPv6 forward to the rt-1 router according to +# its Local SID table, considering the IPv6 DA fcff:1::d46. +# +# Once the packet is received by rt-1, the router decapsulates the inner IPv4 +# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) +# and sends it to the host hs-1. + +# Kselftest framework requirement - SKIP code is 4. +readonly ksft_skip=4 + +readonly RDMSUFF="$(mktemp -u XXXXXXXX)" +readonly DUMMY_DEVNAME="dum0" +readonly VRF_TID=100 +readonly VRF_DEVNAME="vrf-${VRF_TID}" +readonly RT2HS_DEVNAME="veth-t${VRF_TID}" +readonly LOCALSID_TABLE_ID=90 +readonly IPv6_RT_NETWORK=fcf0:0 +readonly IPv6_HS_NETWORK=cafe +readonly IPv4_HS_NETWORK=10.0.0 +readonly VPN_LOCATOR_SERVICE=fcff +readonly DT46_FUNC=0d46 +readonly HEADEND_ENCAP="encap.red" + +# do not add ':' as separator +readonly LCBLOCK_ADDR=fcbb0000 +readonly LCBLOCK_BLEN=32 +# do not add ':' as separator +readonly LCNODEFUNC_FMT="0%d00" +readonly LCNODEFUNC_BLEN=16 + +readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN)) + +readonly CSID_CNTR_PREFIX="dead:beaf::/32" +# ID of the router used for testing the C-SID container cfgs +readonly CSID_CNTR_RT_ID_TEST=1 +# Routing table used for testing the C-SID container cfgs +readonly CSID_CNTR_RT_TABLE=91 + +# C-SID container configurations to be tested +# +# An entry of the array is defined as "a,b,c" where: +# - 'a' and 'b' elements represent respectively the Locator-Block length +# (lblen) in bits and the Locator-Node Function length (nflen) in bits. +# 'a' and 'b' can be set to default values using the placeholder "d" which +# indicates the default kernel values (32 for lblen and 16 for nflen); +# otherwise, any numeric value is accepted; +# - 'c' indicates whether the C-SID configuration provided by the values 'a' +# and 'b' should be considered valid ("y") or invalid ("n"). +declare -ra CSID_CONTAINER_CFGS=( + "d,d,y" + "d,16,y" + "16,d,y" + "16,32,y" + "32,16,y" + "48,8,y" + "8,48,y" + "d,0,n" + "0,d,n" + "32,0,n" + "0,32,n" + "17,d,n" + "d,17,n" + "120,16,n" + "16,120,n" + "0,128,n" + "128,0,n" + "130,0,n" + "0,130,n" + "0,0,n" +) + +PING_TIMEOUT_SEC=4 +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +# IDs of routers and hosts are initialized during the setup of the testing +# network +ROUTERS='' +HOSTS='' + +SETUP_ERR=1 + +ret=${ksft_skip} +nsuccess=0 +nfail=0 + +log_test() +{ + local rc="$1" + local expected="$2" + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + printf "\nTests passed: %3d\n" "${nsuccess}" + printf "Tests failed: %3d\n" "${nfail}" + + # when a test fails, the value of 'ret' is set to 1 (error code). + # Conversely, when all tests are passed successfully, the 'ret' value + # is set to 0 (success code). + if [ "${ret}" -ne 1 ]; then + ret=0 + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +test_command_or_ksft_skip() +{ + local cmd="$1" + + if [ ! -x "$(command -v "${cmd}")" ]; then + echo "SKIP: Could not run test without \"${cmd}\" tool"; + exit "${ksft_skip}" + fi +} + +get_nodename() +{ + local name="$1" + + echo "${name}-${RDMSUFF}" +} + +get_rtname() +{ + local rtid="$1" + + get_nodename "rt-${rtid}" +} + +get_hsname() +{ + local hsid="$1" + + get_nodename "hs-${hsid}" +} + +__create_namespace() +{ + local name="$1" + + ip netns add "${name}" +} + +create_router() +{ + local rtid="$1" + local nsname + + nsname="$(get_rtname "${rtid}")" + + __create_namespace "${nsname}" + + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 + + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 +} + +create_host() +{ + local hsid="$1" + local nsname + + nsname="$(get_hsname "${hsid}")" + + __create_namespace "${nsname}" +} + +cleanup() +{ + local nsname + local i + + # destroy routers + for i in ${ROUTERS}; do + nsname="$(get_rtname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # destroy hosts + for i in ${HOSTS}; do + nsname="$(get_hsname "${i}")" + + ip netns del "${nsname}" &>/dev/null || true + done + + # check whether the setup phase was completed successfully or not. In + # case of an error during the setup phase of the testing environment, + # the selftest is considered as "skipped". + if [ "${SETUP_ERR}" -ne 0 ]; then + echo "SKIP: Setting up the testing environment failed" + exit "${ksft_skip}" + fi + + exit "${ret}" +} + +add_link_rt_pairs() +{ + local rt="$1" + local rt_neighs="$2" + local neigh + local nsname + local neigh_nsname + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + neigh_nsname="$(get_rtname "${neigh}")" + + ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ + type veth peer name "veth-rt-${neigh}-${rt}" \ + netns "${neigh_nsname}" + done +} + +get_network_prefix() +{ + local rt="$1" + local neigh="$2" + local p="${rt}" + local q="${neigh}" + + if [ "${p}" -gt "${q}" ]; then + p="${q}"; q="${rt}" + fi + + echo "${IPv6_RT_NETWORK}:${p}:${q}" +} + +# Setup the basic networking for the routers +setup_rt_networking() +{ + local rt="$1" + local rt_neighs="$2" + local nsname + local net_prefix + local devname + local neigh + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + ip -netns "${nsname}" addr \ + add "${net_prefix}::${rt}/64" dev "${devname}" nodad + + ip -netns "${nsname}" link set "${devname}" up + done + + ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy + + ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up + ip -netns "${nsname}" link set lo up +} + +# build an ipv6 prefix/address based on the input string +# Note that the input string does not contain ':' and '::' which are considered +# to be implicit. +# e.g.: +# - input: fbcc00000400300 +# - output: fbcc:0000:0400:0300:0000:0000:0000:0000 +# ^^^^^^^^^^^^^^^^^^^ +# fill the address with 0s +build_ipv6_addr() +{ + local addr="$1" + local out="" + local strlen="${#addr}" + local padn + local i + + # add ":" every 4 digits (16 bits) + for (( i = 0; i < strlen; i++ )); do + if (( i > 0 && i < 32 && (i % 4) == 0 )); then + out="${out}:" + fi + + out="${out}${addr:$i:1}" + done + + # fill the remaining bits of the address with 0s + padn=$((32 - strlen)) + for (( i = padn; i > 0; i-- )); do + if (( i > 0 && i < 32 && (i % 4) == 0 )); then + out="${out}:" + fi + + out="${out}0" + done + + printf "${out}" +} + +build_csid() +{ + local nodeid="$1" + + printf "${LCNODEFUNC_FMT}" "${nodeid}" +} + +build_lcnode_func_prefix() +{ + local nodeid="$1" + local lcnodefunc + local prefix + local out + + lcnodefunc="$(build_csid "${nodeid}")" + prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")" + + out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}" + + echo "${out}" +} + +set_end_x_nextcsid() +{ + local rt="$1" + local adj="$2" + + nsname="$(get_rtname "${rt}")" + net_prefix="$(get_network_prefix "${rt}" "${adj}")" + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + + # enabled NEXT-C-SID SRv6 End.X behavior (note that "dev" is the dummy + # dum0 device chosen for the sake of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${net_prefix}::${adj}" \ + flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + +set_underlay_sids_reachability() +{ + local rt="$1" + local rt_neighs="$2" + + nsname="$(get_rtname "${rt}")" + + for neigh in ${rt_neighs}; do + devname="veth-rt-${rt}-${neigh}" + + net_prefix="$(get_network_prefix "${rt}" "${neigh}")" + + # set underlay network routes for SIDs reachability + ip -netns "${nsname}" -6 route \ + replace "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + + # set the underlay network for C-SIDs reachability + lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")" + + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + via "${net_prefix}::${neigh}" dev "${devname}" + done +} + +# Setup local SIDs for an SRv6 router +setup_rt_local_sids() +{ + local rt="$1" + local rt_neighs="$2" + local net_prefix + local devname + local nsname + local neigh + local lcnode_func_prefix + local lcblock_prefix + + nsname="$(get_rtname "${rt}")" + + set_underlay_sids_reachability "${rt}" "${rt_neighs}" + + # all SIDs for VPNs start with a common locator. Routes and SRv6 + # Endpoint behavior instaces are grouped together in the 'localsid' + # table. + ip -netns "${nsname}" -6 rule \ + add to "${VPN_LOCATOR_SERVICE}::/16" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 + + # common locator block for NEXT-C-SIDS compression mechanism. + lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")" + ip -netns "${nsname}" -6 rule \ + add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \ + lookup "${LOCALSID_TABLE_ID}" prio 999 +} + +# build and install the SRv6 policy into the ingress SRv6 router as well as the +# decap SID in the egress one. +# args: +# $1 - src host (evaluate automatically the ingress router) +# $2 - dst host (evaluate automatically the egress router) +# $3 - SRv6 routers configured for steering traffic (End.X behaviors) +# $4 - single SID or double SID +# $5 - traffic type (IPv6 or IPv4) +__setup_l3vpn() +{ + local src="$1" + local dst="$2" + local end_rts="$3" + local mode="$4" + local traffic="$5" + local nsname + local policy + local container + local decapsid + local lcnfunc + local dt + local n + local rtsrc_nsname + local rtdst_nsname + + rtsrc_nsname="$(get_rtname "${src}")" + rtdst_nsname="$(get_rtname "${dst}")" + + container="${LCBLOCK_ADDR}" + + # build first SID (C-SID container) + for n in ${end_rts}; do + lcnfunc="$(build_csid "${n}")" + + container="${container}${lcnfunc}" + done + + if [ "${mode}" -eq 1 ]; then + # single SID policy + dt="$(build_csid "${dst}")${DT46_FUNC}" + container="${container}${dt}" + # build the full ipv6 address for the container + policy="$(build_ipv6_addr "${container}")" + + # build the decap SID used in the decap node + container="${LCBLOCK_ADDR}${dt}" + decapsid="$(build_ipv6_addr "${container}")" + else + # double SID policy + decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}" + + policy="$(build_ipv6_addr "${container}"),${decapsid}" + fi + + # apply encap policy + if [ "${traffic}" -eq 6 ]; then + ip -netns "${rtsrc_nsname}" -6 route \ + add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + + ip -netns "${rtsrc_nsname}" -6 neigh \ + add proxy "${IPv6_HS_NETWORK}::${dst}" \ + dev "${RT2HS_DEVNAME}" + else + # "dev" must be different from the one where the packet is + # received, otherwise the proxy arp does not work. + ip -netns "${rtsrc_nsname}" -4 route \ + add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \ + encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \ + dev "${VRF_DEVNAME}" + fi + + # apply decap + # Local End.DT46 behavior (decap) + ip -netns "${rtdst_nsname}" -6 route \ + add "${decapsid}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.DT46 vrftable "${VRF_TID}" \ + dev "${VRF_DEVNAME}" +} + +# see __setup_l3vpn() +setup_ipv4_vpn_2sids() +{ + __setup_l3vpn "$1" "$2" "$3" 2 4 +} + +# see __setup_l3vpn() +setup_ipv6_vpn_1sid() +{ + __setup_l3vpn "$1" "$2" "$3" 1 6 +} + +setup_hs() +{ + local hs="$1" + local rt="$2" + local hsname + local rtname + + hsname="$(get_hsname "${hs}")" + rtname="$(get_rtname "${rt}")" + + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 + ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 + + ip -netns "${hsname}" link add veth0 type veth \ + peer name "${RT2HS_DEVNAME}" netns "${rtname}" + + ip -netns "${hsname}" addr \ + add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad + ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0 + + ip -netns "${hsname}" link set veth0 up + ip -netns "${hsname}" link set lo up + + # configure the VRF on the router which is directly connected to the + # source host. + ip -netns "${rtname}" link \ + add "${VRF_DEVNAME}" type vrf table "${VRF_TID}" + ip -netns "${rtname}" link set "${VRF_DEVNAME}" up + + # enslave the veth interface connecting the router with the host to the + # VRF in the access router + ip -netns "${rtname}" link \ + set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}" + + # set default routes to unreachable for both ipv6 and ipv4 + ip -netns "${rtname}" -6 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + ip -netns "${rtname}" -4 route \ + add unreachable default metric 4278198272 \ + vrf "${VRF_DEVNAME}" + + ip -netns "${rtname}" addr \ + add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad + ip -netns "${rtname}" addr \ + add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" + + ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up + + ip netns exec "${rtname}" \ + sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1 + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 + + # disable the rp_filter otherwise the kernel gets confused about how + # to route decap ipv4 packets. + ip netns exec "${rtname}" \ + sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 + + ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" +} + +setup() +{ + local i + + # create routers + ROUTERS="1 2 3 4"; readonly ROUTERS + for i in ${ROUTERS}; do + create_router "${i}" + done + + # create hosts + HOSTS="1 2"; readonly HOSTS + for i in ${HOSTS}; do + create_host "${i}" + done + + # set up the links for connecting routers + add_link_rt_pairs 1 "2 3 4" + add_link_rt_pairs 2 "3 4" + add_link_rt_pairs 3 "4" + + # set up the basic connectivity of routers and routes required for + # reachability of SIDs. + setup_rt_networking 1 "2 3 4" + setup_rt_networking 2 "1 3 4" + setup_rt_networking 3 "1 2 4" + setup_rt_networking 4 "1 2 3" + + # set up the hosts connected to routers + setup_hs 1 1 + setup_hs 2 2 + + # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46) + setup_rt_local_sids 1 "2 3 4" + setup_rt_local_sids 2 "1 3 4" + setup_rt_local_sids 3 "1 2 4" + setup_rt_local_sids 4 "1 2 3" + + # set up SRv6 Policies + + # create an IPv6 VPN between hosts hs-1 and hs-2. + # + # Direction hs-1 -> hs-2 + # - rt-1 encap (H.Encaps.Red) + # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor) + # - rt-4 Plain IPv6 Forwarding to rt-2 + # - rt-2 SRv6 End.DT46 behavior + setup_ipv6_vpn_1sid 1 2 "3" + + # Direction hs2 -> hs-1 + # - rt-2 encap (H.Encaps.Red) + # - rt-4 SRv6 End.X behavior adj rt-1 (NEXT-C-SID flavor) + # - rt-1 SRv6 End.DT46 behavior + setup_ipv6_vpn_1sid 2 1 "4" + + # create an IPv4 VPN between hosts hs-1 and hs-2 + # + # Direction hs-1 -> hs-2 + # - rt-1 encap (H.Encaps.Red) + # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor) + # - rt-4 Plain IPv6 Forwarding to rt-2 + # - rt-2 SRv6 End.DT46 behavior + setup_ipv4_vpn_2sids 1 2 "3" + + # Direction hs-2 -> hs-1 + # - rt-2 encap (H.Encaps.Red) + # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor) + # - rt-4 Plain IPv6 Forwarding to rt-1 + # - rt-1 SRv6 End.DT46 behavior + setup_ipv4_vpn_2sids 2 1 "3" + + # Setup the adjacencies in the SRv6 aware routers + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 + + # testing environment was set up successfully + SETUP_ERR=0 +} + +check_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + local prefix + local rtsrc_nsname + + rtsrc_nsname="$(get_rtname "${rtsrc}")" + + prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" + + ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${prefix}::${rtdst}" >/dev/null 2>&1 +} + +check_and_log_rt_connectivity() +{ + local rtsrc="$1" + local rtdst="$2" + + check_rt_connectivity "${rtsrc}" "${rtdst}" + log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}" +} + +check_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 +} + +check_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + local hssrc_nsname + + hssrc_nsname="$(get_hsname "${hssrc}")" + + ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ + "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 +} + +check_and_log_hs2gw_connectivity() +{ + local hssrc="$1" + + check_hs_ipv6_connectivity "${hssrc}" 254 + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw" + + check_hs_ipv4_connectivity "${hssrc}" 254 + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw" +} + +check_and_log_hs_ipv6_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv6_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +check_and_log_hs_ipv4_connectivity() +{ + local hssrc="$1" + local hsdst="$2" + + check_hs_ipv4_connectivity "${hssrc}" "${hsdst}" + log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}" +} + +router_tests() +{ + local i + local j + + log_section "IPv6 routers connectivity test" + + for i in ${ROUTERS}; do + for j in ${ROUTERS}; do + if [ "${i}" -eq "${j}" ]; then + continue + fi + + check_and_log_rt_connectivity "${i}" "${j}" + done + done +} + +host2gateway_tests() +{ + local hs + + log_section "IPv4/IPv6 connectivity test among hosts and gateways" + + for hs in ${HOSTS}; do + check_and_log_hs2gw_connectivity "${hs}" + done +} + +host_vpn_tests() +{ + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 +} + +__nextcsid_end_x_behavior_test() +{ + local nsname="$1" + local cmd="$2" + local blen="$3" + local flen="$4" + local layout="" + + if [ "${blen}" != "d" ]; then + layout="${layout} lblen ${blen}" + fi + + if [ "${flen}" != "d" ]; then + layout="${layout} nflen ${flen}" + fi + + ip -netns "${nsname}" -6 route \ + "${cmd}" "${CSID_CNTR_PREFIX}" \ + table "${CSID_CNTR_RT_TABLE}" \ + encap seg6local action End.X nh6 :: \ + flavors next-csid ${layout} \ + dev "${DUMMY_DEVNAME}" &>/dev/null + + return "$?" +} + +rt_x_nextcsid_end_x_behavior_test() +{ + local rt="$1" + local blen="$2" + local flen="$3" + local nsname + local ret + + nsname="$(get_rtname "${rt}")" + + __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" + ret="$?" + __nextcsid_end_x_behavior_test "${nsname}" "del" "${blen}" "${flen}" + + return "${ret}" +} + +__parse_csid_container_cfg() +{ + local cfg="$1" + local index="$2" + local out + + echo "${cfg}" | cut -d',' -f"${index}" +} + +csid_container_cfg_tests() +{ + local valid + local blen + local flen + local cfg + local ret + + log_section "C-SID Container config tests (legend: d='kernel default')" + + for cfg in "${CSID_CONTAINER_CFGS[@]}"; do + blen="$(__parse_csid_container_cfg "${cfg}" 1)" + flen="$(__parse_csid_container_cfg "${cfg}" 2)" + valid="$(__parse_csid_container_cfg "${cfg}" 3)" + + rt_x_nextcsid_end_x_behavior_test \ + "${CSID_CNTR_RT_ID_TEST}" \ + "${blen}" \ + "${flen}" + ret="$?" + + if [ "${valid}" == "y" ]; then + log_test "${ret}" 0 \ + "Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})" + else + log_test "${ret}" 2 \ + "Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})" + fi + done +} + +test_iproute2_supp_or_ksft_skip() +{ + if ! ip route help 2>&1 | grep -qo "next-csid"; then + echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2" + exit "${ksft_skip}" + fi +} + +test_dummy_dev_or_ksft_skip() +{ + local test_netns + + test_netns="dummy-$(mktemp -u XXXXXXXX)" + + if ! ip netns add "${test_netns}"; then + echo "SKIP: Cannot set up netns for testing dummy dev support" + exit "${ksft_skip}" + fi + + modprobe dummy &>/dev/null || true + if ! ip -netns "${test_netns}" link \ + add "${DUMMY_DEVNAME}" type dummy; then + echo "SKIP: dummy dev not supported" + + ip netns del "${test_netns}" + exit "${ksft_skip}" + fi + + ip netns del "${test_netns}" +} + +test_vrf_or_ksft_skip() +{ + modprobe vrf &>/dev/null || true + if [ ! -e /proc/sys/net/vrf/strict_mode ]; then + echo "SKIP: vrf sysctl does not exist" + exit "${ksft_skip}" + fi +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges" + exit "${ksft_skip}" +fi + +# required programs to carry out this selftest +test_command_or_ksft_skip ip +test_command_or_ksft_skip ping +test_command_or_ksft_skip sysctl +test_command_or_ksft_skip grep +test_command_or_ksft_skip cut + +test_iproute2_supp_or_ksft_skip +test_dummy_dev_or_ksft_skip +test_vrf_or_ksft_skip + +set -e +trap cleanup EXIT + +setup +set +e + +csid_container_cfg_tests + +router_tests +host2gateway_tests +host_vpn_tests + +print_log_test_results diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index 6e59b1461dcc..4fcce5150850 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -153,6 +153,19 @@ static void *mmap_large_buffer(size_t need, size_t *allocated) return buffer; } +static uint32_t tcp_info_get_rcv_mss(int fd) +{ + socklen_t sz = sizeof(struct tcp_info); + struct tcp_info info; + + if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &info, &sz)) { + fprintf(stderr, "Error fetching TCP_INFO\n"); + return 0; + } + + return info.tcpi_rcv_mss; +} + void *child_thread(void *arg) { unsigned char digest[SHA256_DIGEST_LENGTH]; @@ -288,7 +301,7 @@ end: total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec + 1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec; printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n" - " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n", + " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches, rcv_mss %u\n", total / (1024.0 * 1024.0), 100.0*total_mmap/total, (double)delta_usec / 1000000.0, @@ -296,7 +309,8 @@ end: (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0, (double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0, (double)total_usec/mb, - ru.ru_nvcsw); + ru.ru_nvcsw, + tcp_info_get_rcv_mss(fd)); } error: munmap(buffer, buffer_sz); diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh new file mode 100755 index 000000000000..112cfd8a10ad --- /dev/null +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -0,0 +1,759 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking bridge backup port and backup nexthop ID +# functionality. The topology consists of two bridge (VTEPs) connected using +# VXLAN. The test checks that when the switch port (swp1) is down, traffic is +# redirected to the VXLAN port (vx0). When a backup nexthop ID is configured, +# the test checks that traffic is redirected with the correct nexthop +# information. +# +# +------------------------------------+ +------------------------------------+ +# | + swp1 + vx0 | | + swp1 + vx0 | +# | | | | | | | | +# | | br0 | | | | | | +# | +------------+-----------+ | | +------------+-----------+ | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0 | | br0 | +# | + | | + | +# | | | | | | +# | | | | | | +# | + | | + | +# | br0.10 | | br0.10 | +# | 192.0.2.65/28 | | 192.0.2.66/28 | +# | | | | +# | | | | +# | 192.0.2.33 | | 192.0.2.34 | +# | + lo | | + lo | +# | | | | +# | | | | +# | 192.0.2.49/28 | | 192.0.2.50/28 | +# | veth0 +-------+ veth0 | +# | | | | +# | sw1 | | sw2 | +# +------------------------------------+ +------------------------------------+ + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# All tests in this script. Can be overridden with -t option. +TESTS=" + backup_port + backup_nhid + backup_nhid_invalid + backup_nhid_ping + backup_nhid_torture +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no +PING_TIMEOUT=5 + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup_topo_ns() +{ + local ns=$1; shift + + ip netns add $ns + ip -n $ns link set dev lo up + + ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0 +} + +setup_topo() +{ + local ns + + for ns in sw1 sw2; do + setup_topo_ns $ns + done + + ip link add name veth0 type veth peer name veth1 + ip link set dev veth0 netns sw1 name veth0 + ip link set dev veth1 netns sw2 name veth0 +} + +setup_sw_common() +{ + local ns=$1; shift + local local_addr=$1; shift + local remote_addr=$1; shift + local veth_addr=$1; shift + local gw_addr=$1; shift + local br_addr=$1; shift + + ip -n $ns address add $local_addr/32 dev lo + + ip -n $ns link set dev veth0 up + ip -n $ns address add $veth_addr/28 dev veth0 + ip -n $ns route add default via $gw_addr + + ip -n $ns link add name br0 up type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + + ip -n $ns link add link br0 name br0.10 up type vlan id 10 + bridge -n $ns vlan add vid 10 dev br0 self + ip -n $ns address add $br_addr/28 dev br0.10 + + ip -n $ns link add name swp1 up type dummy + ip -n $ns link set dev swp1 master br0 + bridge -n $ns vlan add vid 10 dev swp1 untagged + + ip -n $ns link add name vx0 up master br0 type vxlan \ + local $local_addr dstport 4789 nolearning external + bridge -n $ns link set dev vx0 vlan_tunnel on learning off + + bridge -n $ns vlan add vid 10 dev vx0 + bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010 +} + +setup_sw1() +{ + local ns=sw1 + local local_addr=192.0.2.33 + local remote_addr=192.0.2.34 + local veth_addr=192.0.2.49 + local gw_addr=192.0.2.50 + local br_addr=192.0.2.65 + + setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \ + $br_addr +} + +setup_sw2() +{ + local ns=sw2 + local local_addr=192.0.2.34 + local remote_addr=192.0.2.33 + local veth_addr=192.0.2.50 + local gw_addr=192.0.2.49 + local br_addr=192.0.2.66 + + setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \ + $br_addr +} + +setup() +{ + set -e + + setup_topo + setup_sw1 + setup_sw2 + + sleep 5 + + set +e +} + +cleanup() +{ + local ns + + for ns in h1 h2 sw1 sw2; do + ip netns del $ns &> /dev/null + done +} + +################################################################################ +# Tests + +backup_port() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + + echo + echo "Backup port" + echo "-----------" + + run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + + # Initial state - check that packets are forwarded out of swp1 when it + # has a carrier and not forwarded out of any port when it does not have + # a carrier. + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 1 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 1 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier on" + log_test $? 0 "swp1 carrier on" + + # Configure vx0 as the backup port of swp1 and check that packets are + # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 + # does not have a carrier. + run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 0 "vx0 configured as backup port of swp1" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 2 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 2 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier on" + log_test $? 0 "swp1 carrier on" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 3 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" + + # Remove vx0 as the backup port of swp1 and check that packets are no + # longer forwarded out of vx0 when swp1 does not have a carrier. + run_cmd "bridge -n sw1 link set dev swp1 nobackup_port" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 1 "vx0 not configured as backup port of swp1" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 4 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" +} + +backup_nhid() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + + echo + echo "Backup nexthop ID" + echo "-----------------" + + run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + + run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010" + + run_cmd "ip -n sw2 address replace 192.0.2.36/32 dev lo" + + # The first filter matches on packets forwarded using the backup + # nexthop ID and the second filter matches on packets forwarded using a + # regular VXLAN FDB entry. + run_cmd "tc -n sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass" + + # Configure vx0 as the backup port of swp1 and check that packets are + # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 + # does not have a carrier. When packets are forwarded out of vx0, check + # that they are forwarded by the VXLAN FDB entry. + run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 0 "vx0 configured as backup port of swp1" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 1 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 0 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 1 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 0 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "Forwarding using VXLAN FDB entry" + + run_cmd "ip -n sw1 link set dev swp1 carrier on" + log_test $? 0 "swp1 carrier on" + + # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check + # that when packets are forwarded out of vx0, they are forwarded using + # the backup nexthop ID. + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 2 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "No forwarding out of vx0" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 2 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "Forwarding using backup nexthop ID" + tc_check_packets sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "No forwarding using VXLAN FDB entry" + + run_cmd "ip -n sw1 link set dev swp1 carrier on" + log_test $? 0 "swp1 carrier on" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 3 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "No forwarding using VXLAN FDB entry" + + # Reset the backup nexthop ID to 0 and check that packets are no longer + # forwarded using the backup nexthop ID when swp1 does not have a + # carrier and are instead forwarded by the VXLAN FDB. + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid\"" + log_test $? 1 "No backup nexthop ID configured for swp1" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets sw2 "dev vx0 ingress" 102 1 + log_test $? 0 "No forwarding using VXLAN FDB entry" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 4 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 3 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + tc_check_packets sw2 "dev vx0 ingress" 102 2 + log_test $? 0 "Forwarding using VXLAN FDB entry" +} + +backup_nhid_invalid() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + local tx_drop + + echo + echo "Backup nexthop ID - invalid IDs" + echo "-------------------------------" + + # Check that when traffic is redirected with an invalid nexthop ID, it + # is forwarded out of the VXLAN port, but dropped by the VXLAN driver + # and does not crash the host. + + run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + # Drop all other Tx traffic to avoid changes to Tx drop counter. + run_cmd "tc -n sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop" + + tx_drop=$(ip -n sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]') + + run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + + run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + + run_cmd "tc -n sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + + # First, check that redirection works. + run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + log_test $? 0 "vx0 configured as backup port of swp1" + + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + log_test $? 0 "Valid nexthop as backup nexthop" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + log_test $? 0 "swp1 carrier off" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 1 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "Forwarding using backup nexthop ID" + run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'" + log_test $? 0 "No Tx drop increase" + + # Use a non-existent nexthop ID. + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 20" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 20\"" + log_test $? 0 "Non-existent nexthop as backup nexthop" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'" + log_test $? 0 "Tx drop increased" + + # Use a blckhole nexthop. + run_cmd "ip -n sw1 nexthop replace id 30 blackhole" + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 30" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 30\"" + log_test $? 0 "Blackhole nexthop as backup nexthop" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 3 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'" + log_test $? 0 "Tx drop increased" + + # Non-group FDB nexthop. + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 1" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 1\"" + log_test $? 0 "Non-group FDB nexthop as backup nexthop" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 4 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'" + log_test $? 0 "Tx drop increased" + + # IPv6 address family nexthop. + run_cmd "ip -n sw1 nexthop replace id 100 via 2001:db8:100::1 fdb" + run_cmd "ip -n sw1 nexthop replace id 200 via 2001:db8:100::1 fdb" + run_cmd "ip -n sw1 nexthop replace id 300 group 100/200 fdb" + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 300" + run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 300\"" + log_test $? 0 "IPv6 address family nexthop as backup nexthop" + + run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets sw1 "dev swp1 egress" 101 0 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets sw1 "dev vx0 egress" 101 5 + log_test $? 0 "Forwarding out of vx0" + tc_check_packets sw2 "dev vx0 ingress" 101 1 + log_test $? 0 "No forwarding using backup nexthop ID" + run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'" + log_test $? 0 "Tx drop increased" +} + +backup_nhid_ping() +{ + local sw1_mac + local sw2_mac + + echo + echo "Backup nexthop ID - ping" + echo "------------------------" + + # Test bidirectional traffic when traffic is redirected in both VTEPs. + sw1_mac=$(ip -n sw1 -j -p link show br0.10 | jq -r '.[]["address"]') + sw2_mac=$(ip -n sw2 -j -p link show br0.10 | jq -r '.[]["address"]') + + run_cmd "bridge -n sw1 fdb replace $sw2_mac dev swp1 master static vlan 10" + run_cmd "bridge -n sw2 fdb replace $sw1_mac dev swp1 master static vlan 10" + + run_cmd "ip -n sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10" + run_cmd "ip -n sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10" + + run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n sw2 nexthop replace id 1 via 192.0.2.33 fdb" + run_cmd "ip -n sw1 nexthop replace id 10 group 1 fdb" + run_cmd "ip -n sw2 nexthop replace id 10 group 1 fdb" + + run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n sw2 link set dev swp1 backup_port vx0" + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 10" + + run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n sw2 link set dev swp1 carrier off" + + run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + log_test $? 0 "Ping with backup nexthop ID" + + # Reset the backup nexthop ID to 0 and check that ping fails. + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 0" + + run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + log_test $? 1 "Ping after disabling backup nexthop ID" +} + +backup_nhid_add_del_loop() +{ + while true; do + ip -n sw1 nexthop del id 10 + ip -n sw1 nexthop replace id 10 group 1/2 fdb + done >/dev/null 2>&1 +} + +backup_nhid_torture() +{ + local dmac=00:11:22:33:44:55 + local smac=00:aa:bb:cc:dd:ee + local pid1 + local pid2 + local pid3 + + echo + echo "Backup nexthop ID - torture test" + echo "--------------------------------" + + # Continuously send traffic through the backup nexthop while adding and + # deleting the group. The test is considered successful if nothing + # crashed. + + run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + + run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + + run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" + run_cmd "ip -n sw1 link set dev swp1 carrier off" + + backup_nhid_add_del_loop & + pid1=$! + ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 & + pid2=$! + + sleep 30 + kill -9 $pid1 $pid2 + wait $pid1 $pid2 2>/dev/null + + log_test 0 0 "Torture test" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) + -w Timeout for ping +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:pPvhw:" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + w) PING_TIMEOUT=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tc)" ]; then + echo "SKIP: Could not run test without tc tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +bridge link help 2>&1 | grep -q "backup_nhid" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 bridge too old, missing backup nexthop ID support" + exit $ksft_skip +fi + +# Start clean. +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh new file mode 100755 index 000000000000..f75212bf142c --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh @@ -0,0 +1,240 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking the [no]localbypass VXLAN device option. The test +# configures two VXLAN devices in the same network namespace and a tc filter on +# the loopback device that drops encapsulated packets. The test sends packets +# from the first VXLAN device and verifies that by default these packets are +# received by the second VXLAN device. The test then enables the nolocalbypass +# option and verifies that packets are no longer received by the second VXLAN +# device. + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTS=" + nolocalbypass +" +VERBOSE=0 +PAUSE_ON_FAIL=no +PAUSE=no + +################################################################################ +# Utilities + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "$VERBOSE" = "1" ]; then + echo " rc=$rc, expected $expected" + fi + + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi + + if [ "${PAUSE}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + + [ "$VERBOSE" = "1" ] && echo +} + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + printf "COMMAND: $cmd\n" + stderr= + fi + + out=$(eval $cmd $stderr) + rc=$? + if [ "$VERBOSE" = "1" -a -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +tc_check_packets() +{ + local ns=$1; shift + local id=$1; shift + local handle=$1; shift + local count=$1; shift + local pkts + + sleep 0.1 + pkts=$(tc -n $ns -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats.packets") + [[ $pkts == $count ]] +} + +################################################################################ +# Setup + +setup() +{ + ip netns add ns1 + + ip -n ns1 link set dev lo up + ip -n ns1 address add 192.0.2.1/32 dev lo + ip -n ns1 address add 198.51.100.1/32 dev lo + + ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ + dstport 4789 nolearning + ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790 +} + +cleanup() +{ + ip netns del ns1 &> /dev/null +} + +################################################################################ +# Tests + +nolocalbypass() +{ + local smac=00:01:02:03:04:05 + local dmac=00:0a:0b:0c:0d:0e + + run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" + + run_cmd "tc -n ns1 qdisc add dev vx1 clsact" + run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" + + run_cmd "tc -n ns1 qdisc add dev lo clsact" + run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" + + run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + log_test $? 0 "localbypass enabled" + + run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + + tc_check_packets "ns1" "dev vx1 ingress" 101 1 + log_test $? 0 "Packet received by local VXLAN device - localbypass" + + run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass" + + run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" + log_test $? 0 "localbypass disabled" + + run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + + tc_check_packets "ns1" "dev vx1 ingress" 101 1 + log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass" + + run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass" + + run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + log_test $? 0 "localbypass enabled" + + run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + + tc_check_packets "ns1" "dev vx1 ingress" 101 2 + log_test $? 0 "Packet received by local VXLAN device - localbypass" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -P Pause after each test before cleanup + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +trap cleanup EXIT + +while getopts ":t:pPvh" opt; do + case $opt in + t) TESTS=$OPTARG ;; + p) PAUSE_ON_FAIL=yes;; + P) PAUSE=yes;; + v) VERBOSE=$(($VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# Make sure we don't pause twice. +[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v bridge)" ]; then + echo "SKIP: Could not run test without bridge tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit $ksft_skip +fi + +ip link help vxlan 2>&1 | grep -q "localbypass" +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 ip too old, missing VXLAN nolocalbypass support" + exit $ksft_skip +fi + +cleanup + +for t in $TESTS +do + setup; $t; cleanup; +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index ff36844d14b4..464853a7f982 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -15,6 +15,7 @@ #include <linux/tcp.h> #include <linux/socket.h> +#include <sys/epoll.h> #include <sys/types.h> #include <sys/sendfile.h> #include <sys/socket.h> @@ -29,12 +30,15 @@ static int fips_enabled; struct tls_crypto_info_keys { union { + struct tls_crypto_info crypto_info; struct tls12_crypto_info_aes_gcm_128 aes128; struct tls12_crypto_info_chacha20_poly1305 chacha20; struct tls12_crypto_info_sm4_gcm sm4gcm; struct tls12_crypto_info_sm4_ccm sm4ccm; struct tls12_crypto_info_aes_ccm_128 aesccm128; struct tls12_crypto_info_aes_gcm_256 aesgcm256; + struct tls12_crypto_info_aria_gcm_128 ariagcm128; + struct tls12_crypto_info_aria_gcm_256 ariagcm256; }; size_t len; }; @@ -75,6 +79,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, tls12->aesgcm256.info.version = tls_version; tls12->aesgcm256.info.cipher_type = cipher_type; break; + case TLS_CIPHER_ARIA_GCM_128: + tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_128); + tls12->ariagcm128.info.version = tls_version; + tls12->ariagcm128.info.cipher_type = cipher_type; + break; + case TLS_CIPHER_ARIA_GCM_256: + tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_256); + tls12->ariagcm256.info.version = tls_version; + tls12->ariagcm256.info.cipher_type = cipher_type; + break; default: break; } @@ -227,6 +241,31 @@ TEST_F(tls_basic, base_base) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); }; +TEST_F(tls_basic, bad_cipher) +{ + struct tls_crypto_info_keys tls12; + + tls12.crypto_info.version = 200; + tls12.crypto_info.cipher_type = TLS_CIPHER_AES_GCM_128; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 50; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 59; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 10; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); + + tls12.crypto_info.version = TLS_1_2_VERSION; + tls12.crypto_info.cipher_type = 70; + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); +} + FIXTURE(tls) { int fd, cfd; @@ -311,6 +350,18 @@ FIXTURE_VARIANT_ADD(tls, 13_nopad) .nopad = true, }; +FIXTURE_VARIANT_ADD(tls, 12_aria_gcm) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_ARIA_GCM_128, +}; + +FIXTURE_VARIANT_ADD(tls, 12_aria_gcm_256) +{ + .tls_version = TLS_1_2_VERSION, + .cipher_type = TLS_CIPHER_ARIA_GCM_256, +}; + FIXTURE_SETUP(tls) { struct tls_crypto_info_keys tls12; @@ -485,6 +536,17 @@ TEST_F(tls, msg_more_unsent) EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); } +TEST_F(tls, msg_eor) +{ + char const *test_str = "test_read"; + int send_len = 10; + char buf[10]; + + EXPECT_EQ(send(self->fd, test_str, send_len, MSG_EOR), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + TEST_F(tls, sendmsg_single) { struct msghdr msg; @@ -551,11 +613,11 @@ TEST_F(tls, sendmsg_large) msg.msg_iov = &vec; msg.msg_iovlen = 1; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len); } while (recvs++ < sends) { - EXPECT_NE(recv(self->fd, mem, send_len, 0), -1); + EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1); } free(mem); @@ -584,9 +646,9 @@ TEST_F(tls, sendmsg_multiple) msg.msg_iov = vec; msg.msg_iovlen = iov_len; - EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len); + EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len); buf = malloc(total_len); - EXPECT_NE(recv(self->fd, buf, total_len, 0), -1); + EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1); for (i = 0; i < iov_len; i++) { EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp, strlen(test_strs[i])), @@ -1460,6 +1522,40 @@ TEST_F(tls, shutdown_reuse) EXPECT_EQ(errno, EISCONN); } +TEST_F(tls, getsockopt) +{ + struct tls_crypto_info_keys expect, get; + socklen_t len; + + /* get only the version/cipher */ + len = sizeof(struct tls_crypto_info); + memrnd(&get, sizeof(get)); + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); + EXPECT_EQ(len, sizeof(struct tls_crypto_info)); + EXPECT_EQ(get.crypto_info.version, variant->tls_version); + EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); + + /* get the full crypto_info */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + len = expect.len; + memrnd(&get, sizeof(get)); + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); + EXPECT_EQ(len, expect.len); + EXPECT_EQ(get.crypto_info.version, variant->tls_version); + EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); + EXPECT_EQ(memcmp(&get, &expect, expect.len), 0); + + /* short get should fail */ + len = sizeof(struct tls_crypto_info) - 1; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1); + EXPECT_EQ(errno, EINVAL); + + /* partial get of the cipher data should fail */ + len = expect.len - 1; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1); + EXPECT_EQ(errno, EINVAL); +} + FIXTURE(tls_err) { int fd, cfd; @@ -1646,6 +1742,136 @@ TEST_F(tls_err, timeo) } } +TEST_F(tls_err, poll_partial_rec) +{ + struct pollfd pfd = { }; + ssize_t rec_len; + char rec[256]; + char buf[128]; + + if (self->notls) + SKIP(return, "no TLS support"); + + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + /* Write 100B, not the full record ... */ + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + /* ... no full record should mean no POLLIN */ + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 0); + /* Now write the rest, and it should all pop out of the other end. */ + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100); + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 1), 1); + EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf)); + EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0); +} + +TEST_F(tls_err, epoll_partial_rec) +{ + struct epoll_event ev, events[10]; + ssize_t rec_len; + char rec[256]; + char buf[128]; + int epollfd; + + if (self->notls) + SKIP(return, "no TLS support"); + + epollfd = epoll_create1(0); + ASSERT_GE(epollfd, 0); + + memset(&ev, 0, sizeof(ev)); + ev.events = EPOLLIN; + ev.data.fd = self->cfd2; + ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0); + + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + /* Write 100B, not the full record ... */ + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + /* ... no full record should mean no POLLIN */ + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0); + /* Now write the rest, and it should all pop out of the other end. */ + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100); + EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1); + EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf)); + EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0); + + close(epollfd); +} + +TEST_F(tls_err, poll_partial_rec_async) +{ + struct pollfd pfd = { }; + ssize_t rec_len; + char rec[256]; + char buf[128]; + char token; + int p[2]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + ASSERT_GE(pipe(p), 0); + + memrnd(buf, sizeof(buf)); + EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf)); + rec_len = recv(self->cfd, rec, sizeof(rec), 0); + EXPECT_GT(rec_len, sizeof(buf)); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int status, pid2; + + close(p[1]); + usleep(1000); /* Give child a head start */ + + EXPECT_EQ(send(self->fd2, rec, 100, 0), 100); + + EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */ + + EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), + rec_len - 100); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + close(p[0]); + + /* Child should sleep in poll(), never get a wake */ + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5), 0); + + EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */ + + pfd.fd = self->cfd2; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5), 1); + + exit(!_metadata->passed); + } +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 23cf924754a5..dedc52562b4f 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -565,7 +565,7 @@ EOF command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" -TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym" ret=0 nsuccess=0 diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore index 4cb887b57413..4b2928e1c19d 100644 --- a/tools/testing/selftests/netfilter/.gitignore +++ b/tools/testing/selftests/netfilter/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only nf-queue connect_close +audit_logread diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 3686bfa6c58d..ef90aca4cc96 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,13 +6,14 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh rpath.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \ + conntrack_sctp_collision.sh HOSTPKG_CONFIG := pkg-config CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_GEN_FILES = nf-queue connect_close +TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision include ../lib.mk diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include <linux/audit.h> +#include <linux/netlink.h> + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config index 4faf2ce021d9..7c42b1b2c69b 100644 --- a/tools/testing/selftests/netfilter/config +++ b/tools/testing/selftests/netfilter/config @@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m CONFIG_NFT_MASQ=m CONFIG_NFT_FLOW_OFFLOAD=m CONFIG_NF_CT_NETLINK=m +CONFIG_AUDIT=y diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 000000000000..a924e595cfd8 --- /dev/null +++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +ROUTER_NS=$(mktemp -u router-XXXXXXXX) +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + ip net add $CLIENT_NS + ip net add $SERVER_NS + ip net add $ROUTER_NS + ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS + ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS + + ip -n $SERVER_NS link set link0 up + ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0 + ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n $ROUTER_NS link set link1 up + ip -n $ROUTER_NS link set link2 up + ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1 + ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2 + ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1 + + ip -n $CLIENT_NS link set link3 up + ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3 + ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb + tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms + + # simulate the ctstate check on OVS nf_conntrack + ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe sctp + ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null + ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null + ip net del "$CLIENT_NS" + ip net del "$SERVER_NS" + ip net del "$ROUTER_NS" +} + +do_test() { + ip net exec $SERVER_NS ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec $CLIENT_NS ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh new file mode 100755 index 000000000000..bb34329e02a7 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_audit.sh @@ -0,0 +1,193 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +logfile=$(mktemp) +rulefile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) +} + +nft flush ruleset + +# adding tables, chains and rules + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done >$rulefile +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +# resetting rules + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +# resetting sets and elements + +elem=(22 ,80 ,443) +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +exit $RC diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c new file mode 100644 index 000000000000..21bb1cfd8a85 --- /dev/null +++ b/tools/testing/selftests/netfilter/sctp_collision.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> + +int main(int argc, char *argv[]) +{ + struct sockaddr_in saddr = {}, daddr = {}; + int sd, ret, len = sizeof(daddr); + struct timeval tv = {25, 0}; + char buf[] = "hello"; + + if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", + argv[0]); + return -1; + } + + sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + if (sd < 0) { + printf("Failed to create sd\n"); + return -1; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = inet_addr(argv[2]); + saddr.sin_port = htons(atoi(argv[3])); + + ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + printf("Failed to bind to address\n"); + goto out; + } + + ret = listen(sd, 5); + if (ret < 0) { + printf("Failed to listen on port\n"); + goto out; + } + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = inet_addr(argv[4]); + daddr.sin_port = htons(atoi(argv[5])); + + /* make test shorter than 25s */ + ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + if (ret < 0) { + printf("Failed to setsockopt SO_RCVTIMEO\n"); + goto out; + } + + if (!strcmp(argv[1], "server")) { + sleep(1); /* wait a bit for client's INIT */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + printf("Server: sent! %d\n", ret); + } + + if (!strcmp(argv[1], "client")) { + usleep(300000); /* wait a bit for server's listening */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */ + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + printf("Client: rcvd! %d\n", ret); + } + ret = 0; +out: + close(sd); + return ret; +} diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore index 4696df589d68..52f613cdad54 100644 --- a/tools/testing/selftests/nolibc/.gitignore +++ b/tools/testing/selftests/nolibc/.gitignore @@ -1,4 +1,5 @@ /initramfs/ +/libc-test /nolibc-test /run.out /sysroot/ diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index bbce57420465..dfe66776a331 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -14,6 +14,31 @@ include $(srctree)/scripts/subarch.include ARCH = $(SUBARCH) endif +# XARCH extends the kernel's ARCH with a few variants of the same +# architecture that only differ by the configuration, the toolchain +# and the Qemu program used. It is copied as-is into ARCH except for +# a few specific values which are mapped like this: +# +# XARCH | ARCH | config +# -------------|-----------|------------------------- +# ppc | powerpc | 32 bits +# ppc64 | powerpc | 64 bits big endian +# ppc64le | powerpc | 64 bits little endian +# +# It is recommended to only use XARCH, though it does not harm if +# ARCH is already set. For simplicity, ARCH is sufficient for all +# architectures where both are equal. + +# configure default variants for target kernel supported architectures +XARCH_powerpc = ppc +XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) + +# map from user input variants to their kernel supported architectures +ARCH_ppc = powerpc +ARCH_ppc64 = powerpc +ARCH_ppc64le = powerpc +ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) + # kernel image names by architecture IMAGE_i386 = arch/x86/boot/bzImage IMAGE_x86_64 = arch/x86/boot/bzImage @@ -21,10 +46,13 @@ IMAGE_x86 = arch/x86/boot/bzImage IMAGE_arm64 = arch/arm64/boot/Image IMAGE_arm = arch/arm/boot/zImage IMAGE_mips = vmlinuz +IMAGE_ppc = vmlinux +IMAGE_ppc64 = vmlinux +IMAGE_ppc64le = arch/powerpc/boot/zImage IMAGE_riscv = arch/riscv/boot/Image IMAGE_s390 = arch/s390/boot/bzImage IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi -IMAGE = $(IMAGE_$(ARCH)) +IMAGE = $(IMAGE_$(XARCH)) IMAGE_NAME = $(notdir $(IMAGE)) # default kernel configurations that appear to be usable @@ -34,10 +62,13 @@ DEFCONFIG_x86 = defconfig DEFCONFIG_arm64 = defconfig DEFCONFIG_arm = multi_v7_defconfig DEFCONFIG_mips = malta_defconfig +DEFCONFIG_ppc = pmac32_defconfig +DEFCONFIG_ppc64 = powernv_be_defconfig +DEFCONFIG_ppc64le = powernv_defconfig DEFCONFIG_riscv = defconfig DEFCONFIG_s390 = defconfig DEFCONFIG_loongarch = defconfig -DEFCONFIG = $(DEFCONFIG_$(ARCH)) +DEFCONFIG = $(DEFCONFIG_$(XARCH)) # optional tests to run (default = all) TEST = @@ -49,10 +80,13 @@ QEMU_ARCH_x86 = x86_64 QEMU_ARCH_arm64 = aarch64 QEMU_ARCH_arm = arm QEMU_ARCH_mips = mipsel # works with malta_defconfig +QEMU_ARCH_ppc = ppc +QEMU_ARCH_ppc64 = ppc64 +QEMU_ARCH_ppc64le = ppc64le QEMU_ARCH_riscv = riscv64 QEMU_ARCH_s390 = s390x QEMU_ARCH_loongarch = loongarch64 -QEMU_ARCH = $(QEMU_ARCH_$(ARCH)) +QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) # QEMU_ARGS : some arch-specific args to pass to qemu QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" @@ -61,10 +95,13 @@ QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $( QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_mips = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = $(QEMU_ARGS_$(ARCH)) +QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise # it defaults to this nolibc directory. @@ -76,17 +113,21 @@ else Q=@ endif -CFLAGS_STACKPROTECTOR = -DNOLIBC_STACKPROTECTOR \ - $(call cc-option,-mstack-protector-guard=global) \ - $(call cc-option,-fstack-protector-all) -CFLAGS_STKP_i386 = $(CFLAGS_STACKPROTECTOR) -CFLAGS_STKP_x86_64 = $(CFLAGS_STACKPROTECTOR) -CFLAGS_STKP_x86 = $(CFLAGS_STACKPROTECTOR) +CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) +CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) +CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) CFLAGS_s390 = -m64 -CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables \ +CFLAGS_mips = -EL +CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) +CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ $(call cc-option,-fno-stack-protector) \ - $(CFLAGS_STKP_$(ARCH)) $(CFLAGS_$(ARCH)) -LDFLAGS := -s + $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) +LDFLAGS := + +REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ + END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ + if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ + printf("\nSee all results in %s\n", ARGV[1]); }' help: @echo "Supported targets under selftests/nolibc:" @@ -94,24 +135,26 @@ help: @echo " help this help" @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" - @echo " run-user runs the executable under QEMU (uses \$$ARCH, \$$TEST)" + @echo " libc-test build an executable using the compiler's default libc instead" + @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" @echo " initramfs prepare the initramfs with nolibc-test" - @echo " defconfig create a fresh new default config (uses \$$ARCH)" - @echo " kernel (re)build the kernel with the initramfs (uses \$$ARCH)" - @echo " run runs the kernel in QEMU after building it (uses \$$ARCH, \$$TEST)" - @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$ARCH, \$$TEST)" + @echo " defconfig create a fresh new default config (uses \$$XARCH)" + @echo " kernel (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" + @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" @echo " clean clean the sysroot, initramfs, build and output files" @echo "" @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." @echo "" @echo "Currently using the following variables:" @echo " ARCH = $(ARCH)" + @echo " XARCH = $(XARCH)" @echo " CROSS_COMPILE = $(CROSS_COMPILE)" @echo " CC = $(CC)" @echo " OUTPUT = $(OUTPUT)" @echo " TEST = $(TEST)" - @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$ARCH]" - @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$ARCH]" + @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]" + @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]" @echo "" all: run @@ -124,14 +167,33 @@ sysroot/$(ARCH)/include: $(Q)$(MAKE) -C ../../../include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone $(Q)mv sysroot/sysroot sysroot/$(ARCH) +ifneq ($(NOLIBC_SYSROOT),0) nolibc-test: nolibc-test.c sysroot/$(ARCH)/include $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ -nostdlib -static -Isysroot/$(ARCH)/include $< -lgcc +else +nolibc-test: nolibc-test.c + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -static -include ../../../include/nolibc/nolibc.h $< -lgcc +endif + +libc-test: nolibc-test.c + $(QUIET_CC)$(HOSTCC) -o $@ $< + +# local libc-test +run-libc-test: libc-test + $(Q)./libc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +# local nolibc-test +run-nolibc-test: nolibc-test + $(Q)./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out # qemu user-land test run-user: nolibc-test $(Q)qemu-$(QEMU_ARCH) ./nolibc-test > "$(CURDIR)/run.out" || : - $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + $(Q)$(REPORT) $(CURDIR)/run.out initramfs: nolibc-test $(QUIET_MKDIR)mkdir -p initramfs @@ -147,18 +209,24 @@ kernel: initramfs # run the tests after building the kernel run: kernel $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" - $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + $(Q)$(REPORT) $(CURDIR)/run.out # re-run the tests from an existing kernel rerun: $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(srctree)/$(IMAGE)" -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" - $(Q)grep -w FAIL "$(CURDIR)/run.out" && echo "See all results in $(CURDIR)/run.out" || echo "$$(grep -c ^[0-9].*OK $(CURDIR)/run.out) test(s) passed." + $(Q)$(REPORT) $(CURDIR)/run.out + +# report with existing test log +report: + $(Q)$(REPORT) $(CURDIR)/run.out clean: $(call QUIET_CLEAN, sysroot) $(Q)rm -rf sysroot $(call QUIET_CLEAN, nolibc-test) $(Q)rm -f nolibc-test + $(call QUIET_CLEAN, libc-test) + $(Q)rm -f libc-test $(call QUIET_CLEAN, initramfs) $(Q)rm -rf initramfs $(call QUIET_CLEAN, run.out) diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 21bacc928bf7..fb3bf91462e2 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1,9 +1,7 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ #define _GNU_SOURCE - -/* platform-specific include files coming from the compiler */ -#include <limits.h> +#define _LARGEFILE64_SOURCE /* libc-specific include files * The program may be built in 3 ways: @@ -17,10 +15,12 @@ #include <string.h> #ifndef _NOLIBC_STDIO_H /* standard libcs need more includes */ -#include <linux/reboot.h> +#include <sys/auxv.h> #include <sys/io.h> #include <sys/ioctl.h> +#include <sys/mman.h> #include <sys/mount.h> +#include <sys/prctl.h> #include <sys/reboot.h> #include <sys/stat.h> #include <sys/syscall.h> @@ -34,17 +34,33 @@ #include <sched.h> #include <signal.h> #include <stdarg.h> +#include <stddef.h> +#include <stdint.h> #include <unistd.h> +#include <limits.h> #endif #endif -/* will be used by nolibc by getenv() */ -char **environ; +/* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */ +#define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2))) +#define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1) + +/* will be used to test initialization of environ */ +static char **test_envp; + +/* will be used to test initialization of argv */ +static char **test_argv; + +/* will be used to test initialization of argc */ +static int test_argc; + +/* will be used by some test cases as readable file, please don't write it */ +static const char *argv0; /* definition of a series of tests */ struct test { - const char *name; // test name - int (*func)(int min, int max); // handler + const char *name; /* test name */ + int (*func)(int min, int max); /* handler */ }; #ifndef _NOLIBC_STDLIB_H @@ -64,7 +80,7 @@ char *itoa(int i) /* returns the error name (e.g. "ENOENT") for common errors, "SUCCESS" for 0, * or the decimal value for less common ones. */ -const char *errorname(int err) +static const char *errorname(int err) { switch (err) { case 0: return "SUCCESS"; @@ -103,24 +119,41 @@ const char *errorname(int err) CASE_ERR(EDOM); CASE_ERR(ERANGE); CASE_ERR(ENOSYS); + CASE_ERR(EOVERFLOW); default: return itoa(err); } } -static int pad_spc(int llen, int cnt, const char *fmt, ...) +static void putcharn(char c, size_t n) { - va_list args; - int len; - int ret; + char buf[64]; - for (len = 0; len < cnt - llen; len++) - putchar(' '); + memset(buf, c, n); + buf[n] = '\0'; + fputs(buf, stdout); +} - va_start(args, fmt); - ret = vfprintf(stdout, fmt, args); - va_end(args); - return ret < 0 ? ret : ret + len; +enum RESULT { + OK, + FAIL, + SKIPPED, +}; + +static void result(int llen, enum RESULT r) +{ + const char *msg; + + if (r == OK) + msg = " [OK]"; + else if (r == SKIPPED) + msg = "[SKIPPED]"; + else + msg = "[FAIL]"; + + if (llen < 64) + putcharn(' ', 64 - llen); + puts(msg); } /* The tests below are intended to be used by the macroes, which evaluate @@ -130,289 +163,428 @@ static int pad_spc(int llen, int cnt, const char *fmt, ...) */ #define EXPECT_ZR(cond, expr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_zr(expr, llen); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_zr(expr, llen); } while (0) -static int expect_zr(int expr, int llen) +static __attribute__((unused)) +int expect_zr(int expr, int llen) { int ret = !(expr == 0); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_NZ(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_nz(expr, llen; } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen; } while (0) -static int expect_nz(int expr, int llen) +static __attribute__((unused)) +int expect_nz(int expr, int llen) { int ret = !(expr != 0); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_EQ(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_eq(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_eq(expr, llen, val); } while (0) -static int expect_eq(uint64_t expr, int llen, uint64_t val) +static __attribute__((unused)) +int expect_eq(uint64_t expr, int llen, uint64_t val) { int ret = !(expr == val); - llen += printf(" = %lld ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + llen += printf(" = %lld ", (long long)expr); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_NE(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ne(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ne(expr, llen, val); } while (0) -static int expect_ne(int expr, int llen, int val) +static __attribute__((unused)) +int expect_ne(int expr, int llen, int val) { int ret = !(expr != val); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_GE(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ge(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ge(expr, llen, val); } while (0) -static int expect_ge(int expr, int llen, int val) +static __attribute__((unused)) +int expect_ge(int expr, int llen, int val) { int ret = !(expr >= val); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_GT(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_gt(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_gt(expr, llen, val); } while (0) -static int expect_gt(int expr, int llen, int val) +static __attribute__((unused)) +int expect_gt(int expr, int llen, int val) { int ret = !(expr > val); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_LE(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_le(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_le(expr, llen, val); } while (0) -static int expect_le(int expr, int llen, int val) +static __attribute__((unused)) +int expect_le(int expr, int llen, int val) { int ret = !(expr <= val); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_LT(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_lt(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_lt(expr, llen, val); } while (0) -static int expect_lt(int expr, int llen, int val) +static __attribute__((unused)) +int expect_lt(int expr, int llen, int val) { int ret = !(expr < val); llen += printf(" = %d ", expr); - pad_spc(llen, 64, ret ? "[FAIL]\n" : " [OK]\n"); + result(llen, ret ? FAIL : OK); return ret; } #define EXPECT_SYSZR(cond, expr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syszr(expr, llen); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syszr(expr, llen); } while (0) -static int expect_syszr(int expr, int llen) +static __attribute__((unused)) +int expect_syszr(int expr, int llen) { int ret = 0; if (expr) { ret = 1; llen += printf(" = %d %s ", expr, errorname(errno)); - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { llen += printf(" = %d ", expr); - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_SYSEQ(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syseq(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syseq(expr, llen, val); } while (0) -static int expect_syseq(int expr, int llen, int val) +static __attribute__((unused)) +int expect_syseq(int expr, int llen, int val) { int ret = 0; if (expr != val) { ret = 1; llen += printf(" = %d %s ", expr, errorname(errno)); - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { llen += printf(" = %d ", expr); - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_SYSNE(cond, expr, val) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_sysne(expr, llen, val); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_sysne(expr, llen, val); } while (0) -static int expect_sysne(int expr, int llen, int val) +static __attribute__((unused)) +int expect_sysne(int expr, int llen, int val) { int ret = 0; if (expr == val) { ret = 1; llen += printf(" = %d %s ", expr, errorname(errno)); - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { llen += printf(" = %d ", expr); - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } +#define EXPECT_SYSER2(cond, expr, expret, experr1, experr2) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0) + #define EXPECT_SYSER(cond, expr, expret, experr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_syserr(expr, expret, experr, llen); } while (0) + EXPECT_SYSER2(cond, expr, expret, experr, 0) -static int expect_syserr(int expr, int expret, int experr, int llen) +static __attribute__((unused)) +int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen) { int ret = 0; int _errno = errno; llen += printf(" = %d %s ", expr, errorname(_errno)); - if (expr != expret || _errno != experr) { + if (expr != expret || (_errno != experr1 && _errno != experr2)) { ret = 1; - llen += printf(" != (%d %s) ", expret, errorname(experr)); - llen += pad_spc(llen, 64, "[FAIL]\n"); + if (experr2 == 0) + llen += printf(" != (%d %s) ", expret, errorname(experr1)); + else + llen += printf(" != (%d %s %s) ", expret, errorname(experr1), errorname(experr2)); + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_PTRZR(cond, expr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrzr(expr, llen); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrzr(expr, llen); } while (0) -static int expect_ptrzr(const void *expr, int llen) +static __attribute__((unused)) +int expect_ptrzr(const void *expr, int llen) { int ret = 0; llen += printf(" = <%p> ", expr); if (expr) { ret = 1; - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_PTRNZ(cond, expr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_ptrnz(expr, llen); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrnz(expr, llen); } while (0) -static int expect_ptrnz(const void *expr, int llen) +static __attribute__((unused)) +int expect_ptrnz(const void *expr, int llen) { int ret = 0; llen += printf(" = <%p> ", expr); if (!expr) { ret = 1; - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); + } else { + result(llen, OK); + } + return ret; +} + +#define EXPECT_PTREQ(cond, expr, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptreq(expr, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_ptreq(const void *expr, int llen, const void *cmp) +{ + int ret = 0; + + llen += printf(" = <%p> ", expr); + if (expr != cmp) { + ret = 1; + result(llen, FAIL); + } else { + result(llen, OK); + } + return ret; +} + +#define EXPECT_PTRNE(cond, expr, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrne(expr, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_ptrne(const void *expr, int llen, const void *cmp) +{ + int ret = 0; + + llen += printf(" = <%p> ", expr); + if (expr == cmp) { + ret = 1; + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } +#define EXPECT_PTRGE(cond, expr, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrge(expr, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_ptrge(const void *expr, int llen, const void *cmp) +{ + int ret = !(expr >= cmp); + + llen += printf(" = <%p> ", expr); + result(llen, ret ? FAIL : OK); + return ret; +} + +#define EXPECT_PTRGT(cond, expr, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrgt(expr, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_ptrgt(const void *expr, int llen, const void *cmp) +{ + int ret = !(expr > cmp); + + llen += printf(" = <%p> ", expr); + result(llen, ret ? FAIL : OK); + return ret; +} + + +#define EXPECT_PTRLE(cond, expr, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrle(expr, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_ptrle(const void *expr, int llen, const void *cmp) +{ + int ret = !(expr <= cmp); + + llen += printf(" = <%p> ", expr); + result(llen, ret ? FAIL : OK); + return ret; +} + + +#define EXPECT_PTRLT(cond, expr, cmp) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrlt(expr, llen, cmp); } while (0) + +static __attribute__((unused)) +int expect_ptrlt(const void *expr, int llen, const void *cmp) +{ + int ret = !(expr < cmp); + + llen += printf(" = <%p> ", expr); + result(llen, ret ? FAIL : OK); + return ret; +} + +#define EXPECT_PTRER2(cond, expr, expret, experr1, experr2) \ + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0) + +#define EXPECT_PTRER(cond, expr, expret, experr) \ + EXPECT_PTRER2(cond, expr, expret, experr, 0) + +static __attribute__((unused)) +int expect_ptrerr2(const void *expr, const void *expret, int experr1, int experr2, int llen) +{ + int ret = 0; + int _errno = errno; + + llen += printf(" = <%p> %s ", expr, errorname(_errno)); + if (expr != expret || (_errno != experr1 && _errno != experr2)) { + ret = 1; + if (experr2 == 0) + llen += printf(" != (<%p> %s) ", expret, errorname(experr1)); + else + llen += printf(" != (<%p> %s %s) ", expret, errorname(experr1), errorname(experr2)); + result(llen, FAIL); + } else { + result(llen, OK); + } + return ret; +} #define EXPECT_STRZR(cond, expr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strzr(expr, llen); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strzr(expr, llen); } while (0) -static int expect_strzr(const char *expr, int llen) +static __attribute__((unused)) +int expect_strzr(const char *expr, int llen) { int ret = 0; llen += printf(" = <%s> ", expr); if (expr) { ret = 1; - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_STRNZ(cond, expr) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strnz(expr, llen); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strnz(expr, llen); } while (0) -static int expect_strnz(const char *expr, int llen) +static __attribute__((unused)) +int expect_strnz(const char *expr, int llen) { int ret = 0; llen += printf(" = <%s> ", expr); if (!expr) { ret = 1; - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_STREQ(cond, expr, cmp) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_streq(expr, llen, cmp); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_streq(expr, llen, cmp); } while (0) -static int expect_streq(const char *expr, int llen, const char *cmp) +static __attribute__((unused)) +int expect_streq(const char *expr, int llen, const char *cmp) { int ret = 0; llen += printf(" = <%s> ", expr); if (strcmp(expr, cmp) != 0) { ret = 1; - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } #define EXPECT_STRNE(cond, expr, cmp) \ - do { if (!cond) pad_spc(llen, 64, "[SKIPPED]\n"); else ret += expect_strne(expr, llen, cmp); } while (0) + do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strne(expr, llen, cmp); } while (0) -static int expect_strne(const char *expr, int llen, const char *cmp) +static __attribute__((unused)) +int expect_strne(const char *expr, int llen, const char *cmp) { int ret = 0; llen += printf(" = <%s> ", expr); if (strcmp(expr, cmp) == 0) { ret = 1; - llen += pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); } else { - llen += pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); } return ret; } @@ -422,6 +594,51 @@ static int expect_strne(const char *expr, int llen, const char *cmp) #define CASE_TEST(name) \ case __LINE__: llen += printf("%d %s", test, #name); +int run_startup(int min, int max) +{ + int test; + int ret = 0; + /* kernel at least passes HOME and TERM, shell passes more */ + int env_total = 2; + /* checking NULL for argv/argv0, environ and _auxv is not enough, let's compare with sbrk(0) or &end */ + extern char end; + char *brk = sbrk(0) != (void *)-1 ? sbrk(0) : &end; + /* differ from nolibc, both glibc and musl have no global _auxv */ + const unsigned long *test_auxv = (void *)-1; +#ifdef NOLIBC + test_auxv = _auxv; +#endif + + for (test = min; test >= 0 && test <= max; test++) { + int llen = 0; /* line length */ + + /* avoid leaving empty lines below, this will insert holes into + * test numbers. + */ + switch (test + __LINE__ + 1) { + CASE_TEST(argc); EXPECT_GE(1, test_argc, 1); break; + CASE_TEST(argv_addr); EXPECT_PTRGT(1, test_argv, brk); break; + CASE_TEST(argv_environ); EXPECT_PTRLT(1, test_argv, environ); break; + CASE_TEST(argv_total); EXPECT_EQ(1, environ - test_argv - 1, test_argc ?: 1); break; + CASE_TEST(argv0_addr); EXPECT_PTRGT(1, argv0, brk); break; + CASE_TEST(argv0_str); EXPECT_STRNZ(1, argv0 > brk ? argv0 : NULL); break; + CASE_TEST(argv0_len); EXPECT_GE(1, argv0 > brk ? strlen(argv0) : 0, 1); break; + CASE_TEST(environ_addr); EXPECT_PTRGT(1, environ, brk); break; + CASE_TEST(environ_envp); EXPECT_PTREQ(1, environ, test_envp); break; + CASE_TEST(environ_auxv); EXPECT_PTRLT(test_auxv != (void *)-1, environ, test_auxv); break; + CASE_TEST(environ_total); EXPECT_GE(test_auxv != (void *)-1, (void *)test_auxv - (void *)environ - 1, env_total); break; + CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break; + CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break; + CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break; + CASE_TEST(auxv_AT_PAGESZ); EXPECT_GE(1, getauxval(AT_PAGESZ), 4096); break; + case __LINE__: + return ret; /* must be last */ + /* note: do not set any defaults so as to permit holes above */ + } + } + return ret; +} + /* used by some syscall tests below */ int test_getdents64(const char *dir) @@ -442,9 +659,9 @@ int test_getdents64(const char *dir) return ret; } -static int test_getpagesize(void) +int test_getpagesize(void) { - long x = getpagesize(); + int x = getpagesize(); int c; if (x < 0) @@ -471,11 +688,140 @@ static int test_getpagesize(void) return !c; } +int test_fork(void) +{ + int status; + pid_t pid; + + /* flush the printf buffer to avoid child flush it */ + fflush(stdout); + fflush(stderr); + + pid = fork(); + + switch (pid) { + case -1: + return 1; + + case 0: + exit(123); + + default: + pid = waitpid(pid, &status, 0); + + return pid == -1 || !WIFEXITED(status) || WEXITSTATUS(status) != 123; + } +} + +int test_stat_timestamps(void) +{ + struct stat st; + + if (sizeof(st.st_atim.tv_sec) != sizeof(st.st_atime)) + return 1; + + if (stat("/proc/self/", &st) && stat(argv0, &st) && stat("/", &st)) + return 1; + + if (st.st_atim.tv_sec != st.st_atime || st.st_atim.tv_nsec > 1000000000) + return 1; + + if (st.st_mtim.tv_sec != st.st_mtime || st.st_mtim.tv_nsec > 1000000000) + return 1; + + if (st.st_ctim.tv_sec != st.st_ctime || st.st_ctim.tv_nsec > 1000000000) + return 1; + + return 0; +} + +int test_mmap_munmap(void) +{ + int ret, fd, i, page_size; + void *mem; + size_t file_size, length; + off_t offset, pa_offset; + struct stat stat_buf; + const char * const files[] = { + "/dev/zero", + "/proc/1/exe", "/proc/self/exe", + argv0, + NULL + }; + + page_size = getpagesize(); + if (page_size < 0) + return 1; + + /* find a right file to mmap, existed and accessible */ + for (i = 0; files[i] != NULL; i++) { + ret = fd = open(files[i], O_RDONLY); + if (ret == -1) + continue; + else + break; + } + if (ret == -1) + return 1; + + ret = stat(files[i], &stat_buf); + if (ret == -1) + goto end; + + /* file size of the special /dev/zero is 0, let's assign one manually */ + if (i == 0) + file_size = 3*page_size; + else + file_size = stat_buf.st_size; + + offset = file_size - 1; + if (offset < 0) + offset = 0; + length = file_size - offset; + pa_offset = offset & ~(page_size - 1); + + mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset); + if (mem == MAP_FAILED) { + ret = 1; + goto end; + } + + ret = munmap(mem, length + offset - pa_offset); + +end: + close(fd); + return !!ret; +} + +int test_pipe(void) +{ + const char *const msg = "hello, nolibc"; + int pipefd[2]; + char buf[32]; + size_t len; + + if (pipe(pipefd) == -1) + return 1; + + write(pipefd[1], msg, strlen(msg)); + close(pipefd[1]); + len = read(pipefd[0], buf, sizeof(buf)); + close(pipefd[0]); + + if (len != strlen(msg)) + return 1; + + return !!memcmp(buf, msg, len); +} + + /* Run syscall tests between IDs <min> and <max>. * Return 0 on success, non-zero on failure. */ int run_syscall(int min, int max) { + struct timeval tv; + struct timezone tz; struct stat stat_buf; int euid0; int proc; @@ -483,6 +829,7 @@ int run_syscall(int min, int max) int tmp; int ret = 0; void *p1, *p2; + int has_gettid = 1; /* <proc> indicates whether or not /proc is mounted */ proc = stat("/proc", &stat_buf) == 0; @@ -490,8 +837,13 @@ int run_syscall(int min, int max) /* this will be used to skip certain tests that can't be run unprivileged */ euid0 = geteuid() == 0; + /* from 2.30, glibc provides gettid() */ +#if defined(__GLIBC_MINOR__) && defined(__GLIBC__) + has_gettid = __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30); +#endif + for (test = min; test >= 0 && test <= max; test++) { - int llen = 0; // line length + int llen = 0; /* line length */ /* avoid leaving empty lines below, this will insert holes into * test numbers. @@ -499,25 +851,24 @@ int run_syscall(int min, int max) switch (test + __LINE__ + 1) { CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break; CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break; -#ifdef NOLIBC - CASE_TEST(gettid); EXPECT_SYSNE(1, gettid(), -1); break; -#endif + CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break; CASE_TEST(getpgid_self); EXPECT_SYSNE(1, getpgid(0), -1); break; CASE_TEST(getpgid_bad); EXPECT_SYSER(1, getpgid(-1), -1, ESRCH); break; CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break; CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break; CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break; + CASE_TEST(sbrk_0); EXPECT_PTRNE(1, sbrk(0), (void *)-1); break; CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break; CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break; - CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); break; + CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break; CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break; CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break; - CASE_TEST(chmod_net); EXPECT_SYSZR(proc, chmod("/proc/self/net", 0555)); break; + CASE_TEST(chmod_argv0); EXPECT_SYSZR(1, chmod(argv0, 0555)); break; CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break; CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break; CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; - CASE_TEST(chroot_exe); EXPECT_SYSER(proc, chroot("/proc/self/exe"), -1, ENOTDIR); break; + CASE_TEST(chroot_exe); EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break; CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; @@ -527,36 +878,40 @@ int run_syscall(int min, int max) CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; + CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break; CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; - CASE_TEST(gettimeofday_null); EXPECT_SYSZR(1, gettimeofday(NULL, NULL)); break; -#ifdef NOLIBC - CASE_TEST(gettimeofday_bad1); EXPECT_SYSER(1, gettimeofday((void *)1, NULL), -1, EFAULT); break; - CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; - CASE_TEST(gettimeofday_bad2); EXPECT_SYSER(1, gettimeofday(NULL, (void *)1), -1, EFAULT); break; -#endif + CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break; + CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break; CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break; CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break; CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break; CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break; CASE_TEST(link_dir); EXPECT_SYSER(euid0, link("/", "/blah"), -1, EPERM); break; - CASE_TEST(link_cross); EXPECT_SYSER(proc, link("/proc/self/net", "/blah"), -1, EXDEV); break; + CASE_TEST(link_cross); EXPECT_SYSER(proc, link("/proc/self/cmdline", "/blah"), -1, EXDEV); break; CASE_TEST(lseek_m1); EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break; CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break; CASE_TEST(mkdir_root); EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break; + CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break; + CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap((void *)1, 0), -1, EINVAL); break; + CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break; + CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break; CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break; CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break; CASE_TEST(poll_fault); EXPECT_SYSER(1, poll((void *)1, 1, 0), -1, EFAULT); break; + CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break; CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break; + CASE_TEST(rmdir_blah); EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break; CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break; CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break; CASE_TEST(select_stdout); EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break; CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break; CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break; - CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break; + CASE_TEST(stat_fault); EXPECT_SYSER(1, stat((void *)1, &stat_buf), -1, EFAULT); break; + CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break; CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break; @@ -565,6 +920,8 @@ int run_syscall(int min, int max) CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break; CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break; CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break; + CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break; + CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -576,12 +933,10 @@ int run_syscall(int min, int max) int run_stdlib(int min, int max) { int test; - int tmp; int ret = 0; - void *p1, *p2; for (test = min; test >= 0 && test <= max; test++) { - int llen = 0; // line length + int llen = 0; /* line length */ /* avoid leaving empty lines below, this will insert holes into * test numbers. @@ -633,32 +988,106 @@ int run_stdlib(int min, int max) CASE_TEST(limit_int_fast8_max); EXPECT_EQ(1, INT_FAST8_MAX, (int_fast8_t) 0x7f); break; CASE_TEST(limit_int_fast8_min); EXPECT_EQ(1, INT_FAST8_MIN, (int_fast8_t) 0x80); break; CASE_TEST(limit_uint_fast8_max); EXPECT_EQ(1, UINT_FAST8_MAX, (uint_fast8_t) 0xff); break; - CASE_TEST(limit_int_fast16_min); EXPECT_EQ(1, INT_FAST16_MIN, (int_fast16_t) INTPTR_MIN); break; - CASE_TEST(limit_int_fast16_max); EXPECT_EQ(1, INT_FAST16_MAX, (int_fast16_t) INTPTR_MAX); break; + CASE_TEST(limit_int_fast16_min); EXPECT_EQ(1, INT_FAST16_MIN, (int_fast16_t) SINT_MIN_OF_TYPE(int_fast16_t)); break; + CASE_TEST(limit_int_fast16_max); EXPECT_EQ(1, INT_FAST16_MAX, (int_fast16_t) SINT_MAX_OF_TYPE(int_fast16_t)); break; CASE_TEST(limit_uint_fast16_max); EXPECT_EQ(1, UINT_FAST16_MAX, (uint_fast16_t) UINTPTR_MAX); break; - CASE_TEST(limit_int_fast32_min); EXPECT_EQ(1, INT_FAST32_MIN, (int_fast32_t) INTPTR_MIN); break; - CASE_TEST(limit_int_fast32_max); EXPECT_EQ(1, INT_FAST32_MAX, (int_fast32_t) INTPTR_MAX); break; + CASE_TEST(limit_int_fast32_min); EXPECT_EQ(1, INT_FAST32_MIN, (int_fast32_t) SINT_MIN_OF_TYPE(int_fast32_t)); break; + CASE_TEST(limit_int_fast32_max); EXPECT_EQ(1, INT_FAST32_MAX, (int_fast32_t) SINT_MAX_OF_TYPE(int_fast32_t)); break; CASE_TEST(limit_uint_fast32_max); EXPECT_EQ(1, UINT_FAST32_MAX, (uint_fast32_t) UINTPTR_MAX); break; - CASE_TEST(limit_int_fast64_min); EXPECT_EQ(1, INT_FAST64_MIN, (int_fast64_t) INTPTR_MIN); break; - CASE_TEST(limit_int_fast64_max); EXPECT_EQ(1, INT_FAST64_MAX, (int_fast64_t) INTPTR_MAX); break; - CASE_TEST(limit_uint_fast64_max); EXPECT_EQ(1, UINT_FAST64_MAX, (uint_fast64_t) UINTPTR_MAX); break; -#if __SIZEOF_LONG__ == 8 - CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, (intptr_t) 0x8000000000000000LL); break; - CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, (intptr_t) 0x7fffffffffffffffLL); break; - CASE_TEST(limit_uintptr_max); EXPECT_EQ(1, UINTPTR_MAX, (uintptr_t) 0xffffffffffffffffULL); break; - CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, (ptrdiff_t) 0x8000000000000000LL); break; - CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffffffffffffLL); break; - CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, (size_t) 0xffffffffffffffffULL); break; -#elif __SIZEOF_LONG__ == 4 - CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, (intptr_t) 0x80000000); break; - CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, (intptr_t) 0x7fffffff); break; - CASE_TEST(limit_uintptr_max); EXPECT_EQ(1, UINTPTR_MAX, (uintptr_t) 0xffffffffU); break; - CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, (ptrdiff_t) 0x80000000); break; - CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, (ptrdiff_t) 0x7fffffff); break; - CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, (size_t) 0xffffffffU); break; -#else -# warning "__SIZEOF_LONG__ is undefined" -#endif /* __SIZEOF_LONG__ */ + CASE_TEST(limit_int_fast64_min); EXPECT_EQ(1, INT_FAST64_MIN, (int_fast64_t) INT64_MIN); break; + CASE_TEST(limit_int_fast64_max); EXPECT_EQ(1, INT_FAST64_MAX, (int_fast64_t) INT64_MAX); break; + CASE_TEST(limit_uint_fast64_max); EXPECT_EQ(1, UINT_FAST64_MAX, (uint_fast64_t) UINT64_MAX); break; + CASE_TEST(sizeof_long_sane); EXPECT_EQ(1, sizeof(long) == 8 || sizeof(long) == 4, 1); break; + CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, sizeof(long) == 8 ? (intptr_t) 0x8000000000000000LL : (intptr_t) 0x80000000); break; + CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, sizeof(long) == 8 ? (intptr_t) 0x7fffffffffffffffLL : (intptr_t) 0x7fffffff); break; + CASE_TEST(limit_uintptr_max); EXPECT_EQ(1, UINTPTR_MAX, sizeof(long) == 8 ? (uintptr_t) 0xffffffffffffffffULL : (uintptr_t) 0xffffffffU); break; + CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break; + CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break; + CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break; + + case __LINE__: + return ret; /* must be last */ + /* note: do not set any defaults so as to permit holes above */ + } + } + return ret; +} + +#define EXPECT_VFPRINTF(c, expected, fmt, ...) \ + ret += expect_vfprintf(llen, c, expected, fmt, ##__VA_ARGS__) + +static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...) +{ + int ret, fd; + ssize_t w, r; + char buf[100]; + FILE *memfile; + va_list args; + + fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR, 0600); + if (fd == -1) { + result(llen, SKIPPED); + return 0; + } + + memfile = fdopen(fd, "w+"); + if (!memfile) { + result(llen, FAIL); + return 1; + } + + va_start(args, fmt); + w = vfprintf(memfile, fmt, args); + va_end(args); + + if (w != c) { + llen += printf(" written(%d) != %d", (int)w, c); + result(llen, FAIL); + return 1; + } + + fflush(memfile); + lseek(fd, 0, SEEK_SET); + + r = read(fd, buf, sizeof(buf) - 1); + + fclose(memfile); + + if (r != w) { + llen += printf(" written(%d) != read(%d)", (int)w, (int)r); + result(llen, FAIL); + return 1; + } + + buf[r] = '\0'; + llen += printf(" \"%s\" = \"%s\"", expected, buf); + ret = strncmp(expected, buf, c); + + result(llen, ret ? FAIL : OK); + return ret; +} + +static int run_vfprintf(int min, int max) +{ + int test; + int ret = 0; + + for (test = min; test >= 0 && test <= max; test++) { + int llen = 0; /* line length */ + + /* avoid leaving empty lines below, this will insert holes into + * test numbers. + */ + switch (test + __LINE__ + 1) { + CASE_TEST(empty); EXPECT_VFPRINTF(0, "", ""); break; + CASE_TEST(simple); EXPECT_VFPRINTF(3, "foo", "foo"); break; + CASE_TEST(string); EXPECT_VFPRINTF(3, "foo", "%s", "foo"); break; + CASE_TEST(number); EXPECT_VFPRINTF(4, "1234", "%d", 1234); break; + CASE_TEST(negnumber); EXPECT_VFPRINTF(5, "-1234", "%d", -1234); break; + CASE_TEST(unsigned); EXPECT_VFPRINTF(5, "12345", "%u", 12345); break; + CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break; + CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break; + CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -667,47 +1096,54 @@ int run_stdlib(int min, int max) return ret; } -#if defined(__clang__) -__attribute__((optnone)) -#elif defined(__GNUC__) -__attribute__((optimize("O0"))) -#endif static int smash_stack(void) { char buf[100]; + volatile char *ptr = buf; + size_t i; - for (size_t i = 0; i < 200; i++) - buf[i] = 'P'; + for (i = 0; i < 200; i++) + ptr[i] = 'P'; return 1; } -static int run_protection(int min, int max) +static int run_protection(int min __attribute__((unused)), + int max __attribute__((unused))) { pid_t pid; int llen = 0, status; llen += printf("0 -fstackprotector "); -#if !defined(NOLIBC_STACKPROTECTOR) +#if !defined(_NOLIBC_STACKPROTECTOR) llen += printf("not supported"); - pad_spc(llen, 64, "[SKIPPED]\n"); + result(llen, SKIPPED); return 0; #endif +#if defined(_NOLIBC_STACKPROTECTOR) + if (!__stack_chk_guard) { + llen += printf("__stack_chk_guard not initialized"); + result(llen, FAIL); + return 1; + } +#endif + pid = -1; pid = fork(); switch (pid) { case -1: llen += printf("fork()"); - pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); return 1; case 0: close(STDOUT_FILENO); close(STDERR_FILENO); + prctl(PR_SET_DUMPABLE, 0, 0, 0, 0); smash_stack(); return 1; @@ -716,10 +1152,10 @@ static int run_protection(int min, int max) if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) { llen += printf("waitpid()"); - pad_spc(llen, 64, "[FAIL]\n"); + result(llen, FAIL); return 1; } - pad_spc(llen, 64, " [OK]\n"); + result(llen, OK); return 0; } } @@ -735,11 +1171,13 @@ int prepare(void) */ if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) { if (stat("/dev/console", &stat_buf) != 0 || - stat("/dev/null", &stat_buf) != 0) { + stat("/dev/null", &stat_buf) != 0 || + stat("/dev/zero", &stat_buf) != 0) { /* try devtmpfs first, otherwise fall back to manual creation */ if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) { mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1)); mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3)); + mknod("/dev/zero", 0666 | S_IFCHR, makedev(1, 5)); } } } @@ -766,32 +1204,72 @@ int prepare(void) /* try to mount /proc if not mounted. Silently fail otherwise */ if (stat("/proc/.", &stat_buf) == 0 || mkdir("/proc", 0755) == 0) { - if (stat("/proc/self", &stat_buf) != 0) - mount("/proc", "/proc", "proc", 0, 0); + if (stat("/proc/self", &stat_buf) != 0) { + /* If not mountable, remove /proc completely to avoid misuse */ + if (mount("none", "/proc", "proc", 0, 0) != 0) + rmdir("/proc"); + } } + /* some tests rely on a writable /tmp */ + mkdir("/tmp", 0755); + return 0; } /* This is the definition of known test names, with their functions */ static const struct test test_names[] = { /* add new tests here */ + { .name = "startup", .func = run_startup }, { .name = "syscall", .func = run_syscall }, { .name = "stdlib", .func = run_stdlib }, + { .name = "vfprintf", .func = run_vfprintf }, { .name = "protection", .func = run_protection }, { 0 } }; +static int is_setting_valid(char *test) +{ + int idx, len, test_len, valid = 0; + char delimiter; + + if (!test) + return valid; + + test_len = strlen(test); + + for (idx = 0; test_names[idx].name; idx++) { + len = strlen(test_names[idx].name); + if (test_len < len) + continue; + + if (strncmp(test, test_names[idx].name, len) != 0) + continue; + + delimiter = test[len]; + if (delimiter != ':' && delimiter != ',' && delimiter != '\0') + continue; + + valid = 1; + break; + } + + return valid; +} + int main(int argc, char **argv, char **envp) { int min = 0; - int max = __INT_MAX__; + int max = INT_MAX; int ret = 0; int err; int idx; char *test; - environ = envp; + argv0 = argv[0]; + test_argc = argc; + test_argv = argv; + test_envp = envp; /* when called as init, it's possible that no console was opened, for * example if no /dev file system was provided. We'll check that fd#1 @@ -807,10 +1285,10 @@ int main(int argc, char **argv, char **envp) * syscall:5-15[:.*],stdlib:8-10 */ test = argv[1]; - if (!test) + if (!is_setting_valid(test)) test = getenv("NOLIBC_TEST"); - if (test) { + if (is_setting_valid(test)) { char *comma, *colon, *dash, *value; do { @@ -833,7 +1311,7 @@ int main(int argc, char **argv, char **envp) * here, which defaults to the full range. */ do { - min = 0; max = __INT_MAX__; + min = 0; max = INT_MAX; value = colon; if (value && *value) { colon = strchr(value, ':'); @@ -888,18 +1366,14 @@ int main(int argc, char **argv, char **envp) */ printf("Leaving init with final status: %d\n", !!ret); if (ret == 0) - reboot(LINUX_REBOOT_CMD_POWER_OFF); + reboot(RB_POWER_OFF); #if defined(__x86_64__) /* QEMU started with "-device isa-debug-exit -no-reboot" will * exit with status code 2N+1 when N is written to 0x501. We * hard-code the syscall here as it's arch-dependent. */ -#if defined(_NOLIBC_SYS_H) - else if (my_syscall3(__NR_ioperm, 0x501, 1, 1) == 0) -#else - else if (ioperm(0x501, 1, 1) == 0) -#endif - asm volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0)); + else if (syscall(__NR_ioperm, 0x501, 1, 1) == 0) + __asm__ volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0)); /* if it does nothing, fall back to the regular panic */ #endif } diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile index 843ba56d8e49..254d676a2689 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/openat2/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test include ../lib.mk diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 6922d6417e1c..88d6830ee004 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -90,7 +90,6 @@ again: } ret = WEXITSTATUS(status); - ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret); return ret; } diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c index 3fd8e903118f..4e86f927880c 100644 --- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c +++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c @@ -143,6 +143,7 @@ static inline int child_join(struct child *child, struct error *err) r = -1; } + ksft_print_msg("waitpid WEXITSTATUS=%d\n", r); return r; } diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index e2dd4ed84984..00a07e7c571c 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -115,7 +115,8 @@ static int test_pidfd_send_signal_exited_fail(void) pidfd = open(buf, O_DIRECTORY | O_CLOEXEC); - (void)wait_for_pid(pid); + ret = wait_for_pid(pid); + ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret); if (pidfd < 0) ksft_exit_fail_msg( diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index ae2bfc0d822f..7ea42fa02eab 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -17,6 +17,7 @@ SUB_DIRS = alignment \ benchmarks \ cache_shape \ copyloops \ + dexcr \ dscr \ mm \ nx-gzip \ @@ -58,12 +59,11 @@ override define INSTALL_RULE done; endef -override define EMIT_TESTS +emit_tests: +@for TARGET in $(SUB_DIRS); do \ BUILD_TARGET=$(OUTPUT)/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\ + $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\ done; -endef override define CLEAN +@for TARGET in $(SUB_DIRS); do \ @@ -76,4 +76,4 @@ endef tags: find . -name '*.c' -o -name '*.h' | xargs ctags -.PHONY: tags $(SUB_DIRS) +.PHONY: tags $(SUB_DIRS) emit_tests diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/linux/export.h index e6b80d5fbd14..e6b80d5fbd14 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/export.h +++ b/tools/testing/selftests/powerpc/copyloops/linux/export.h diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore new file mode 100644 index 000000000000..b82f45dd46b9 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/.gitignore @@ -0,0 +1,2 @@ +hashchk_test +lsdexcr diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile new file mode 100644 index 000000000000..76210f2bcec3 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/Makefile @@ -0,0 +1,9 @@ +TEST_GEN_PROGS := hashchk_test +TEST_GEN_FILES := lsdexcr + +include ../../lib.mk + +$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect) + +$(TEST_GEN_PROGS): ../harness.c ../utils.c ./dexcr.c +$(TEST_GEN_FILES): ../utils.c ./dexcr.c diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c new file mode 100644 index 000000000000..65ec5347de98 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <errno.h> +#include <setjmp.h> +#include <signal.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "dexcr.h" +#include "reg.h" +#include "utils.h" + +static jmp_buf generic_signal_jump_buf; + +static void generic_signal_handler(int signum, siginfo_t *info, void *context) +{ + longjmp(generic_signal_jump_buf, 0); +} + +bool dexcr_exists(void) +{ + struct sigaction old; + volatile bool exists; + + old = push_signal_handler(SIGILL, generic_signal_handler); + if (setjmp(generic_signal_jump_buf)) + goto out; + + /* + * If the SPR is not recognised by the hardware it triggers + * a hypervisor emulation interrupt. If the kernel does not + * recognise/try to emulate it, we receive a SIGILL signal. + * + * If we do not receive a signal, assume we have the SPR or the + * kernel is trying to emulate it correctly. + */ + exists = false; + mfspr(SPRN_DEXCR_RO); + exists = true; + +out: + pop_signal_handler(SIGILL, old); + return exists; +} + +/* + * Just test if a bad hashchk triggers a signal, without checking + * for support or if the NPHIE aspect is enabled. + */ +bool hashchk_triggers(void) +{ + struct sigaction old; + volatile bool triggers; + + old = push_signal_handler(SIGILL, generic_signal_handler); + if (setjmp(generic_signal_jump_buf)) + goto out; + + triggers = true; + do_bad_hashchk(); + triggers = false; + +out: + pop_signal_handler(SIGILL, old); + return triggers; +} + +unsigned int get_dexcr(enum dexcr_source source) +{ + switch (source) { + case DEXCR: + return mfspr(SPRN_DEXCR_RO); + case HDEXCR: + return mfspr(SPRN_HDEXCR_RO); + case EFFECTIVE: + return mfspr(SPRN_DEXCR_RO) | mfspr(SPRN_HDEXCR_RO); + default: + FAIL_IF_EXIT_MSG(true, "bad enum dexcr_source"); + } +} + +void await_child_success(pid_t pid) +{ + int wstatus; + + FAIL_IF_EXIT_MSG(pid == -1, "fork failed"); + FAIL_IF_EXIT_MSG(waitpid(pid, &wstatus, 0) == -1, "wait failed"); + FAIL_IF_EXIT_MSG(!WIFEXITED(wstatus), "child did not exit cleanly"); + FAIL_IF_EXIT_MSG(WEXITSTATUS(wstatus) != 0, "child exit error"); +} + +/* + * Perform a hashst instruction. The following components determine the result + * + * 1. The LR value (any register technically) + * 2. The SP value (also any register, but it must be a valid address) + * 3. A secret key managed by the kernel + * + * The result is stored to the address held in SP. + */ +void hashst(unsigned long lr, void *sp) +{ + asm volatile ("addi 31, %0, 0;" /* set r31 (pretend LR) to lr */ + "addi 30, %1, 8;" /* set r30 (pretend SP) to sp + 8 */ + PPC_RAW_HASHST(31, -8, 30) /* compute hash into stack location */ + : : "r" (lr), "r" (sp) : "r31", "r30", "memory"); +} + +/* + * Perform a hashchk instruction. A hash is computed as per hashst(), + * however the result is not stored to memory. Instead the existing + * value is read and compared against the computed hash. + * + * If they match, execution continues. + * If they differ, an interrupt triggers. + */ +void hashchk(unsigned long lr, void *sp) +{ + asm volatile ("addi 31, %0, 0;" /* set r31 (pretend LR) to lr */ + "addi 30, %1, 8;" /* set r30 (pretend SP) to sp + 8 */ + PPC_RAW_HASHCHK(31, -8, 30) /* check hash at stack location */ + : : "r" (lr), "r" (sp) : "r31", "r30", "memory"); +} + +void do_bad_hashchk(void) +{ + unsigned long hash = 0; + + hashst(0, &hash); + hash += 1; + hashchk(0, &hash); +} diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h new file mode 100644 index 000000000000..f55cbbc8643b --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * POWER Dynamic Execution Control Facility (DEXCR) + * + * This header file contains helper functions and macros + * required for all the DEXCR related test cases. + */ +#ifndef _SELFTESTS_POWERPC_DEXCR_DEXCR_H +#define _SELFTESTS_POWERPC_DEXCR_DEXCR_H + +#include <stdbool.h> +#include <sys/types.h> + +#include "reg.h" + +#define DEXCR_PR_BIT(aspect) __MASK(63 - (32 + (aspect))) +#define DEXCR_PR_SBHE DEXCR_PR_BIT(0) +#define DEXCR_PR_IBRTPD DEXCR_PR_BIT(3) +#define DEXCR_PR_SRAPD DEXCR_PR_BIT(4) +#define DEXCR_PR_NPHIE DEXCR_PR_BIT(5) + +#define PPC_RAW_HASH_ARGS(b, i, a) \ + ((((i) >> 3) & 0x1F) << 21 | (a) << 16 | (b) << 11 | (((i) >> 8) & 0x1)) +#define PPC_RAW_HASHST(b, i, a) \ + str(.long (0x7C0005A4 | PPC_RAW_HASH_ARGS(b, i, a));) +#define PPC_RAW_HASHCHK(b, i, a) \ + str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));) + +bool dexcr_exists(void); + +bool hashchk_triggers(void); + +enum dexcr_source { + DEXCR, /* Userspace DEXCR value */ + HDEXCR, /* Hypervisor enforced DEXCR value */ + EFFECTIVE, /* Bitwise OR of UDEXCR and ENFORCED DEXCR bits */ +}; + +unsigned int get_dexcr(enum dexcr_source source); + +void await_child_success(pid_t pid); + +void hashst(unsigned long lr, void *sp); + +void hashchk(unsigned long lr, void *sp); + +void do_bad_hashchk(void); + +#endif /* _SELFTESTS_POWERPC_DEXCR_DEXCR_H */ diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c new file mode 100644 index 000000000000..7d5658c9ebe4 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <setjmp.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <unistd.h> + +#include "dexcr.h" +#include "utils.h" + +static int require_nphie(void) +{ + SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported"); + SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE), + "DEXCR[NPHIE] not enabled"); + + return 0; +} + +static jmp_buf hashchk_detected_buf; +static const char *hashchk_failure_msg; + +static void hashchk_handler(int signum, siginfo_t *info, void *context) +{ + if (signum != SIGILL) + hashchk_failure_msg = "wrong signal received"; + else if (info->si_code != ILL_ILLOPN) + hashchk_failure_msg = "wrong signal code received"; + + longjmp(hashchk_detected_buf, 0); +} + +/* + * Check that hashchk triggers when DEXCR[NPHIE] is enabled + * and is detected as such by the kernel exception handler + */ +static int hashchk_detected_test(void) +{ + struct sigaction old; + int err; + + err = require_nphie(); + if (err) + return err; + + old = push_signal_handler(SIGILL, hashchk_handler); + if (setjmp(hashchk_detected_buf)) + goto out; + + hashchk_failure_msg = NULL; + do_bad_hashchk(); + hashchk_failure_msg = "hashchk failed to trigger"; + +out: + pop_signal_handler(SIGILL, old); + FAIL_IF_MSG(hashchk_failure_msg, hashchk_failure_msg); + return 0; +} + +#define HASH_COUNT 8 + +static unsigned long hash_values[HASH_COUNT + 1]; + +static void fill_hash_values(void) +{ + for (unsigned long i = 0; i < HASH_COUNT; i++) + hashst(i, &hash_values[i]); + + /* Used to ensure the checks uses the same addresses as the hashes */ + hash_values[HASH_COUNT] = (unsigned long)&hash_values; +} + +static unsigned int count_hash_values_matches(void) +{ + unsigned long matches = 0; + + for (unsigned long i = 0; i < HASH_COUNT; i++) { + unsigned long orig_hash = hash_values[i]; + hash_values[i] = 0; + + hashst(i, &hash_values[i]); + + if (hash_values[i] == orig_hash) + matches++; + } + + return matches; +} + +static int hashchk_exec_child(void) +{ + ssize_t count; + + fill_hash_values(); + + count = write(STDOUT_FILENO, hash_values, sizeof(hash_values)); + return count == sizeof(hash_values) ? 0 : EOVERFLOW; +} + +static char *hashchk_exec_child_args[] = { "hashchk_exec_child", NULL }; + +/* + * Check that new programs get different keys so a malicious process + * can't recreate a victim's hash values. + */ +static int hashchk_exec_random_key_test(void) +{ + pid_t pid; + int err; + int pipefd[2]; + + err = require_nphie(); + if (err) + return err; + + FAIL_IF_MSG(pipe(pipefd), "failed to create pipe"); + + pid = fork(); + if (pid == 0) { + if (dup2(pipefd[1], STDOUT_FILENO) == -1) + _exit(errno); + + execve("/proc/self/exe", hashchk_exec_child_args, NULL); + _exit(errno); + } + + await_child_success(pid); + FAIL_IF_MSG(read(pipefd[0], hash_values, sizeof(hash_values)) != sizeof(hash_values), + "missing expected child output"); + + /* Verify the child used the same hash_values address */ + FAIL_IF_EXIT_MSG(hash_values[HASH_COUNT] != (unsigned long)&hash_values, + "bad address check"); + + /* If all hashes are the same it means (most likely) same key */ + FAIL_IF_MSG(count_hash_values_matches() == HASH_COUNT, "shared key detected"); + + return 0; +} + +/* + * Check that forks share the same key so that existing hash values + * remain valid. + */ +static int hashchk_fork_share_key_test(void) +{ + pid_t pid; + int err; + + err = require_nphie(); + if (err) + return err; + + fill_hash_values(); + + pid = fork(); + if (pid == 0) { + if (count_hash_values_matches() != HASH_COUNT) + _exit(1); + _exit(0); + } + + await_child_success(pid); + return 0; +} + +#define STACK_SIZE (1024 * 1024) + +static int hashchk_clone_child_fn(void *args) +{ + fill_hash_values(); + return 0; +} + +/* + * Check that threads share the same key so that existing hash values + * remain valid. + */ +static int hashchk_clone_share_key_test(void) +{ + void *child_stack; + pid_t pid; + int err; + + err = require_nphie(); + if (err) + return err; + + child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + + FAIL_IF_MSG(child_stack == MAP_FAILED, "failed to map child stack"); + + pid = clone(hashchk_clone_child_fn, child_stack + STACK_SIZE, + CLONE_VM | SIGCHLD, NULL); + + await_child_success(pid); + FAIL_IF_MSG(count_hash_values_matches() != HASH_COUNT, + "different key detected"); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + + if (argc >= 1 && !strcmp(argv[0], hashchk_exec_child_args[0])) + return hashchk_exec_child(); + + err |= test_harness(hashchk_detected_test, "hashchk_detected"); + err |= test_harness(hashchk_exec_random_key_test, "hashchk_exec_random_key"); + err |= test_harness(hashchk_fork_share_key_test, "hashchk_fork_share_key"); + err |= test_harness(hashchk_clone_share_key_test, "hashchk_clone_share_key"); + + return err; +} diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c new file mode 100644 index 000000000000..94abbfcc389e --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +#include "dexcr.h" +#include "utils.h" + +static unsigned int dexcr; +static unsigned int hdexcr; +static unsigned int effective; + +struct dexcr_aspect { + const char *name; + const char *desc; + unsigned int index; +}; + +static const struct dexcr_aspect aspects[] = { + { + .name = "SBHE", + .desc = "Speculative branch hint enable", + .index = 0, + }, + { + .name = "IBRTPD", + .desc = "Indirect branch recurrent target prediction disable", + .index = 3, + }, + { + .name = "SRAPD", + .desc = "Subroutine return address prediction disable", + .index = 4, + }, + { + .name = "NPHIE", + .desc = "Non-privileged hash instruction enable", + .index = 5, + }, + { + .name = "PHIE", + .desc = "Privileged hash instruction enable", + .index = 6, + }, +}; + +static void print_list(const char *list[], size_t len) +{ + for (size_t i = 0; i < len; i++) { + printf("%s", list[i]); + if (i + 1 < len) + printf(", "); + } +} + +static void print_dexcr(char *name, unsigned int bits) +{ + const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL}; + size_t j = 0; + + printf("%s: %08x", name, bits); + + if (bits == 0) { + printf("\n"); + return; + } + + for (size_t i = 0; i < ARRAY_SIZE(aspects); i++) { + unsigned int mask = DEXCR_PR_BIT(aspects[i].index); + + if (bits & mask) { + enabled_aspects[j++] = aspects[i].name; + bits &= ~mask; + } + } + + if (bits) + enabled_aspects[j++] = "unknown"; + + printf(" ("); + print_list(enabled_aspects, j); + printf(")\n"); +} + +static void print_aspect(const struct dexcr_aspect *aspect) +{ + const char *attributes[8] = {NULL}; + size_t j = 0; + unsigned long mask; + + mask = DEXCR_PR_BIT(aspect->index); + if (dexcr & mask) + attributes[j++] = "set"; + if (hdexcr & mask) + attributes[j++] = "set (hypervisor)"; + if (!(effective & mask)) + attributes[j++] = "clear"; + + printf("%12s %c (%d): ", aspect->name, effective & mask ? '*' : ' ', aspect->index); + print_list(attributes, j); + printf(" \t(%s)\n", aspect->desc); +} + +int main(int argc, char *argv[]) +{ + if (!dexcr_exists()) { + printf("DEXCR not detected on this hardware\n"); + return 1; + } + + dexcr = get_dexcr(DEXCR); + hdexcr = get_dexcr(HDEXCR); + effective = dexcr | hdexcr; + + print_dexcr(" DEXCR", dexcr); + print_dexcr(" HDEXCR", hdexcr); + print_dexcr("Effective", effective); + printf("\n"); + + for (size_t i = 0; i < ARRAY_SIZE(aspects); i++) + print_aspect(&aspects[i]); + printf("\n"); + + if (effective & DEXCR_PR_NPHIE) { + printf("DEXCR[NPHIE] enabled: hashst/hashchk "); + if (hashchk_triggers()) + printf("working\n"); + else + printf("failed to trigger\n"); + } else { + printf("DEXCR[NPHIE] disabled: hashst/hashchk "); + if (hashchk_triggers()) + printf("unexpectedly triggered\n"); + else + printf("ignored\n"); + } + + return 0; +} diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 0ad4f12b3d43..5876220d8ff2 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -24,7 +24,7 @@ /* Setting timeout to -1 disables the alarm */ static uint64_t timeout = 120; -int run_test(int (test_function)(void), char *name) +int run_test(int (test_function)(void), const char *name) { bool terminated; int rc, status; @@ -101,7 +101,7 @@ void test_harness_set_timeout(uint64_t time) timeout = time; } -int test_harness(int (test_function)(void), char *name) +int test_harness(int (test_function)(void), const char *name) { int rc; diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h index d5a547f72669..fad09c9d3387 100644 --- a/tools/testing/selftests/powerpc/include/reg.h +++ b/tools/testing/selftests/powerpc/include/reg.h @@ -19,6 +19,8 @@ #define mb() asm volatile("sync" : : : "memory"); #define barrier() asm volatile("" : : : "memory"); +#define SPRN_HDEXCR_RO 455 /* Userspace readonly view of SPRN_HDEXCR (471) */ + #define SPRN_MMCR2 769 #define SPRN_MMCRA 770 #define SPRN_MMCR0 779 @@ -47,6 +49,8 @@ #define SPRN_SDAR 781 #define SPRN_SIER 768 +#define SPRN_DEXCR_RO 812 /* Userspace readonly view of SPRN_DEXCR (828) */ + #define SPRN_TEXASR 0x82 /* Transaction Exception and Status Register */ #define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */ #define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */ diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h index 068d55fdf80f..b0bb774617c9 100644 --- a/tools/testing/selftests/powerpc/include/subunit.h +++ b/tools/testing/selftests/powerpc/include/subunit.h @@ -6,37 +6,37 @@ #ifndef _SELFTESTS_POWERPC_SUBUNIT_H #define _SELFTESTS_POWERPC_SUBUNIT_H -static inline void test_start(char *name) +static inline void test_start(const char *name) { printf("test: %s\n", name); } -static inline void test_failure_detail(char *name, char *detail) +static inline void test_failure_detail(const char *name, const char *detail) { printf("failure: %s [%s]\n", name, detail); } -static inline void test_failure(char *name) +static inline void test_failure(const char *name) { printf("failure: %s\n", name); } -static inline void test_error(char *name) +static inline void test_error(const char *name) { printf("error: %s\n", name); } -static inline void test_skip(char *name) +static inline void test_skip(const char *name) { printf("skip: %s\n", name); } -static inline void test_success(char *name) +static inline void test_success(const char *name) { printf("success: %s\n", name); } -static inline void test_finish(char *name, int status) +static inline void test_finish(const char *name, int status) { if (status) test_failure(name); @@ -44,7 +44,7 @@ static inline void test_finish(char *name, int status) test_success(name); } -static inline void test_set_git_version(char *value) +static inline void test_set_git_version(const char *value) { printf("tags: git_version:%s\n", value); } diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 44bfd48b93d6..66d7b2368dd4 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -9,11 +9,18 @@ #define __cacheline_aligned __attribute__((aligned(128))) #include <stdint.h> +#include <stdio.h> #include <stdbool.h> +#include <sys/signal.h> #include <linux/auxvec.h> #include <linux/perf_event.h> #include <asm/cputable.h> #include "reg.h" +#include <unistd.h> + +#ifndef ARRAY_SIZE +# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif /* Avoid headaches with PRI?64 - just use %ll? always */ typedef unsigned long long u64; @@ -25,7 +32,7 @@ typedef uint16_t u16; typedef uint8_t u8; void test_harness_set_timeout(uint64_t time); -int test_harness(int (test_function)(void), char *name); +int test_harness(int (test_function)(void), const char *name); int read_auxv(char *buf, ssize_t buf_size); void *find_auxv_entry(int type, char *auxv); @@ -67,7 +74,6 @@ struct perf_event_read { }; #if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30) -#include <unistd.h> #include <sys/syscall.h> static inline pid_t gettid(void) @@ -106,6 +112,9 @@ static inline char *auxv_platform(void) bool is_ppc64le(void); int using_hash_mmu(bool *using_hash); +struct sigaction push_signal_handler(int sig, void (*fn)(int, siginfo_t *, void *)); +struct sigaction pop_signal_handler(int sig, struct sigaction old_handler); + /* Yes, this is evil */ #define FAIL_IF(x) \ do { \ @@ -116,6 +125,16 @@ do { \ } \ } while (0) +#define FAIL_IF_MSG(x, msg) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d: %s\n", \ + __LINE__, msg); \ + return 1; \ + } \ +} while (0) + #define FAIL_IF_EXIT(x) \ do { \ if ((x)) { \ @@ -125,6 +144,16 @@ do { \ } \ } while (0) +#define FAIL_IF_EXIT_MSG(x, msg) \ +do { \ + if ((x)) { \ + fprintf(stderr, \ + "[FAIL] Test FAILED on line %d: %s\n", \ + __LINE__, msg); \ + _exit(1); \ + } \ +} while (0) + /* The test harness uses this, yes it's gross */ #define MAGIC_SKIP_RETURN_VALUE 99 diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 4e1a294eec35..0df1a3afc5e2 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,15 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only +bad_accesses +exec_prot hugetlb_vs_thp_test -subpage_prot -tempfile -prot_sao -segv_errors -wild_bctr large_vm_fork_separation -bad_accesses -tlbie_test +large_vm_gpr_corruption pkey_exec_prot pkey_siginfo +prot_sao +segv_errors stack_expansion_ldst stack_expansion_signal -large_vm_gpr_corruption +subpage_prot +tempfile +tlbie_test +wild_bctr diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index 2b95e44d20ff..a284fa874a9f 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -30,13 +30,14 @@ override define RUN_TESTS +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests endef -DEFAULT_EMIT_TESTS := $(EMIT_TESTS) -override define EMIT_TESTS - $(DEFAULT_EMIT_TESTS) +emit_tests: + for TEST in $(TEST_GEN_PROGS); do \ + BASENAME_TEST=`basename $$TEST`; \ + echo "$(COLLECTION):$$BASENAME_TEST"; \ + done +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests -endef DEFAULT_INSTALL_RULE := $(INSTALL_RULE) override define INSTALL_RULE @@ -64,4 +65,4 @@ sampling_tests: event_code_tests: TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all -.PHONY: all run_tests ebb sampling_tests event_code_tests +.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h index 4181755cf5a0..64e25cce1435 100644 --- a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h +++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h @@ -18,8 +18,6 @@ #define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */ #define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */ -#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) - extern int ev_mask_pmcxsel, ev_shift_pmcxsel; extern int ev_mask_marked, ev_shift_marked; extern int ev_mask_comb, ev_shift_comb; diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index cbeeaeae8837..1b39b86849da 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -36,6 +36,7 @@ $(TM_TESTS): CFLAGS += -I../tm -mhtm CFLAGS += $(KHDR_INCLUDES) -fno-pie $(OUTPUT)/ptrace-gpr: ptrace-gpr.S +$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread $(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h index d7275b7b33dc..df62ff0735f7 100644 --- a/tools/testing/selftests/powerpc/ptrace/child.h +++ b/tools/testing/selftests/powerpc/ptrace/child.h @@ -48,12 +48,12 @@ struct child_sync { } \ } while (0) -#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \ +#define PARENT_SKIP_IF_UNSUPPORTED(x, sync, msg) \ do { \ if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \ (sync)->parent_gave_up = true; \ prod_child(sync); \ - SKIP_IF(1); \ + SKIP_IF_MSG(1, msg); \ } \ } while (0) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6f8596ce8e1..f6da4cb30cd6 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -266,7 +266,7 @@ static int parent(struct shared_info *info, pid_t pid) * to the child. */ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); - PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported"); PARENT_FAIL_IF(ret, &info->child_sync); info->amr = regs[0]; diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index f75739bbad28..e374c6b7ace6 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -884,7 +884,7 @@ static int perf_hwbreak(void) { srand ( time(NULL) ); - SKIP_IF(!perf_breakpoint_supported()); + SKIP_IF_MSG(!perf_breakpoint_supported(), "Perf breakpoints not supported"); return runtest(); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 1345e9b9af0f..75d30d61ab0e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -64,26 +64,26 @@ static bool dawr_present(struct ppc_debug_info *dbginfo) static void write_var(int len) { - __u8 *pcvar; - __u16 *psvar; - __u32 *pivar; - __u64 *plvar; + volatile __u8 *pcvar; + volatile __u16 *psvar; + volatile __u32 *pivar; + volatile __u64 *plvar; switch (len) { case 1: - pcvar = (__u8 *)&glvar; + pcvar = (volatile __u8 *)&glvar; *pcvar = 0xff; break; case 2: - psvar = (__u16 *)&glvar; + psvar = (volatile __u16 *)&glvar; *psvar = 0xffff; break; case 4: - pivar = (__u32 *)&glvar; + pivar = (volatile __u32 *)&glvar; *pivar = 0xffffffff; break; case 8: - plvar = (__u64 *)&glvar; + plvar = (volatile __u64 *)&glvar; *plvar = 0xffffffffffffffffLL; break; } @@ -98,16 +98,16 @@ static void read_var(int len) switch (len) { case 1: - cvar = (__u8)glvar; + cvar = (volatile __u8)glvar; break; case 2: - svar = (__u16)glvar; + svar = (volatile __u16)glvar; break; case 4: - ivar = (__u32)glvar; + ivar = (volatile __u32)glvar; break; case 8: - lvar = (__u64)glvar; + lvar = (volatile __u64)glvar; break; } } @@ -603,7 +603,7 @@ static int ptrace_hwbreak(void) wait(NULL); get_dbginfo(child_pid, &dbginfo); - SKIP_IF(dbginfo.num_data_bps == 0); + SKIP_IF_MSG(dbginfo.num_data_bps == 0, "No data breakpoints present"); dawr = dawr_present(&dbginfo); run_tests(child_pid, &dbginfo, dawr); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S new file mode 100644 index 000000000000..9aa2e58f3189 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include <ppc-asm.h> + +.global same_watch_addr_load +.global same_watch_addr_trap + +FUNC_START(same_watch_addr_child) + nop +same_watch_addr_load: + ld 0,0(3) + nop +same_watch_addr_trap: + trap + blr +FUNC_END(same_watch_addr_child) + + +.global perf_then_ptrace_load1 +.global perf_then_ptrace_load2 +.global perf_then_ptrace_trap + +FUNC_START(perf_then_ptrace_child) + nop +perf_then_ptrace_load1: + ld 0,0(3) +perf_then_ptrace_load2: + ld 0,0(4) + nop +perf_then_ptrace_trap: + trap + blr +FUNC_END(perf_then_ptrace_child) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c index 3344e74a97b4..a0a0b9bb5854 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -1,659 +1,445 @@ // SPDX-License-Identifier: GPL-2.0+ -#include <stdio.h> -#include <string.h> -#include <signal.h> -#include <stdlib.h> -#include <unistd.h> -#include <errno.h> -#include <linux/hw_breakpoint.h> -#include <linux/perf_event.h> + #include <asm/unistd.h> -#include <sys/ptrace.h> +#include <linux/hw_breakpoint.h> +#include <linux/ptrace.h> +#include <memory.h> +#include <stdlib.h> #include <sys/wait.h> -#include "ptrace.h" -char data[16]; +#include "utils.h" -/* Overlapping address range */ -volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; -volatile __u64 *perf_data1 = (__u64 *)&data[4]; +/* + * Child subroutine that performs a load on the address, then traps + */ +void same_watch_addr_child(unsigned long *addr); -/* Non-overlapping address range */ -volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; -volatile __u64 *perf_data2 = (__u64 *)&data[8]; +/* Address of the ld instruction in same_watch_addr_child() */ +extern char same_watch_addr_load[]; -static unsigned long pid_max_addr(void) -{ - FILE *fp; - char *line, *c; - char addr[100]; - size_t len = 0; - - fp = fopen("/proc/kallsyms", "r"); - if (!fp) { - printf("Failed to read /proc/kallsyms. Exiting..\n"); - exit(EXIT_FAILURE); - } +/* Address of the end trap instruction in same_watch_addr_child() */ +extern char same_watch_addr_trap[]; - while (getline(&line, &len, fp) != -1) { - if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || - strstr(line, "pid_max_min")) - continue; +/* + * Child subroutine that performs a load on the first address, then a load on + * the second address (with no instructions separating this from the first + * load), then traps. + */ +void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr); - strncpy(addr, line, len < 100 ? len : 100); - c = strchr(addr, ' '); - *c = '\0'; - return strtoul(addr, &c, 16); - } - fclose(fp); - printf("Could not find pix_max. Exiting..\n"); - exit(EXIT_FAILURE); - return -1; -} +/* Address of the first ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load1[]; -static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) -{ - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = addr; - attr->bp_len = len; - attr->exclude_kernel = 1; - attr->exclude_hv = 1; -} +/* Address of the second ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load2[]; -static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +/* Address of the end trap instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_trap[]; + +static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data) { - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = pid_max_addr(); - attr->bp_len = sizeof(unsigned long); - attr->exclude_user = 1; - attr->exclude_hv = 1; + return syscall(__NR_ptrace, request, pid, addr, data); } -static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +static long ptrace_traceme(void) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_TRACEME, 0, 0, 0); } -static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +static long ptrace_getregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +static long ptrace_setregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); + return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_kernel_event_open(pid_t child_pid) +static long ptrace_cont(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_CONT, pid, 0, signal); } -static int perf_cpu_kernel_event_open(int cpu) +static long ptrace_singlestep(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal); } -static int child(void) +static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo) { - int ret; - - ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); - if (ret) { - printf("Error: PTRACE_TRACEME failed\n"); - return 0; - } - kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ - - return 0; + return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo); } -static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, - __u64 addr, int len) +static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info) { - info->version = 1; - info->trigger_type = type; - info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - info->addr = addr; - info->addr2 = addr + len; - info->condition_value = 0; - if (!len) - info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; - else - info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info); } -static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id) { - struct ppc_hw_breakpoint info; - - ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); - return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id); } -static int test1(pid_t child_pid) +static long ptrace_getreg_pc(pid_t pid, void **pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range overlaps) - * fail; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; + err = ptrace_getregs(pid, ®s); + if (err) + return err; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; + *pc = (void *)regs.nip; - close(perf_fd); - return ret; + return 0; } -static int test2(pid_t child_pid) +static long ptrace_setreg_pc(pid_t pid, void *pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range does not overlaps) - * allow; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - -perf_close: - close(perf_fd); - return ret; -} + err = ptrace_getregs(pid, ®s); + if (err) + return err; -static int test3(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; + regs.nip = (unsigned long)pc; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; + err = ptrace_setregs(pid, ®s); + if (err) + return err; - close(perf_fd); - return ret; + return 0; } -static int test4(pid_t child_pid) +static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range does not overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - -perf_close: - close(perf_fd); - return ret; + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } -static int test5(pid_t child_pid) +static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the different thread) - * allow; - */ - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } + memset(attr, 0, sizeof(struct perf_event_attr)); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); -perf_close: - close(perf_fd); -kill_child: - kill(cpid, SIGINT); - return ret; + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = (u64)addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; } -static int test6(pid_t child_pid) +static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread kernel event by perf) - * if (existing thread event by ptrace on the same thread) - * allow; - * -- OR -- - * if (new per cpu kernel event by perf) - * if (existing thread event by ptrace) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_kernel_event_open(child_pid); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - - perf_fd = perf_cpu_kernel_event_open(0); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + struct perf_event_attr attr; -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + perf_user_event_attr_set(&attr, addr, len); + return perf_event_open(&attr, child_pid, -1, -1, 0); } -static int test7(pid_t child_pid) +static int perf_read_counter(int perf_fd, u64 *count) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; + /* + * A perf counter is retrieved by the read() syscall. It contains + * the current count as 8 bytes that are interpreted as a u64 */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + ssize_t len = read(perf_fd, count, sizeof(*count)); - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + if (len != sizeof(*count)) + return -1; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test8(pid_t child_pid) +static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info, + int type, void *addr, int len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlaps) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = (u64)addr; + info->addr2 = (u64)addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; } -static int test9(pid_t child_pid) +/* + * Checks if we can place at least 2 watchpoints on the child process + */ +static int check_watchpoints(pid_t pid) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } + struct ppc_debug_info dbginfo; - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); + FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed"); + SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test10(pid_t child_pid) +/* + * Wrapper around a plain fork() call that sets up the child for + * ptrace-ing. Both the parent and child return from this, though + * the child is stopped until ptrace_cont(pid) is run by the parent. + */ +static int ptrace_fork_child(pid_t *pid) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + int status; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; - - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + *pid = fork(); -static int test11(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + if (*pid < 0) + FAIL_IF_MSG(1, "Failed to fork child"); - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; + if (!*pid) { + FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed"); + FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP"); + } else { + /* Synchronise on child SIGSTOP */ + FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); } - close(perf_fd); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test12(pid_t child_pid) +/* + * Tests the interaction between ptrace and perf watching the same data. + * + * We expect ptrace to take 'priority', as it is has before-execute + * semantics. + * + * The perf counter should not be incremented yet because perf has after-execute + * semantics. E.g., if ptrace changes the child PC, we don't even execute the + * instruction at all. + * + * When the child is stopped for ptrace, we test both continue and single step. + * Both should increment the perf counter. We also test changing the PC somewhere + * different and stepping, which should not increment the perf counter. + */ +int same_watch_addr_test(void) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long value; /* Dummy value to be read/written to by child */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + if (!pid) { + same_watch_addr_child(&value); + exit(1); + } - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + err = check_watchpoints(pid); + if (err) + return err; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + /* Place a perf watchpoint counter on value */ + perf_fd = perf_watchpoint_open(pid, &value, sizeof(value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); -static int test13(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + /* Place a ptrace watchpoint on value */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); -static int test14(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; + /* + * We stopped before executing the load, so perf should not have + * recorded any events yet */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); - -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event"); + + /* Single stepping over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); + + /* + * Set up a ptrace watchpoint on the value again and trigger it. + * The perf counter should not have incremented because we do not + * execute the load yet. + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed"); + + /* Continuing over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 2, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, then continue, + * we should reach the end trap (because ptrace is one-shot) and have + * another perf event. + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, set a ptrace + * watchpoint on the load, then continue, we should immediately get + * the ptrace trap without incrementing the perf counter + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); + + /* + * If we change the PC while stopped on the load instruction, we should + * not increment the perf counter (because ptrace is before-execute, + * perf is after-execute). + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) -{ - int ret; + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); - ret = fun(arg); - if (ret) - printf("%s: Error\n", msg); - else - printf("%s: Ok\n", msg); - return ret; -} + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); -char *desc[14] = { - "perf cpu event -> ptrace thread event (Overlapping)", - "perf cpu event -> ptrace thread event (Non-overlapping)", - "perf thread event -> ptrace same thread event (Overlapping)", - "perf thread event -> ptrace same thread event (Non-overlapping)", - "perf thread event -> ptrace other thread event", - "ptrace thread event -> perf kernel event", - "ptrace thread event -> perf same thread event (Overlapping)", - "ptrace thread event -> perf same thread event (Non-overlapping)", - "ptrace thread event -> perf other thread event", - "ptrace thread event -> perf cpu event (Overlapping)", - "ptrace thread event -> perf cpu event (Non-overlapping)", - "ptrace thread event -> perf same thread & cpu event (Overlapping)", - "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", - "ptrace thread event -> perf other thread & cpu event", -}; - -static int test(pid_t child_pid) -{ - int ret = TEST_PASS; - - ret |= do_test(desc[0], test1, child_pid); - ret |= do_test(desc[1], test2, child_pid); - ret |= do_test(desc[2], test3, child_pid); - ret |= do_test(desc[3], test4, child_pid); - ret |= do_test(desc[4], test5, child_pid); - ret |= do_test(desc[5], test6, child_pid); - ret |= do_test(desc[6], test7, child_pid); - ret |= do_test(desc[7], test8, child_pid); - ret |= do_test(desc[8], test9, child_pid); - ret |= do_test(desc[9], test10, child_pid); - ret |= do_test(desc[10], test11, child_pid); - ret |= do_test(desc[11], test12, child_pid); - ret |= do_test(desc[12], test13, child_pid); - ret |= do_test(desc[13], test14, child_pid); - - return ret; + return 0; } -static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +/* + * Tests the interaction between ptrace and perf when: + * 1. perf watches a value + * 2. ptrace watches a different value + * 3. The perf value is read, then the ptrace value is read immediately after + * + * A breakpoint implementation may accidentally misattribute/skip one of + * the ptrace or perf handlers, as interrupt based work is done after perf + * and before ptrace. + * + * We expect the perf counter to increment before the ptrace watchpoint + * triggers. + */ +int perf_then_ptrace_test(void) { - if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { - perror("Can't get breakpoint info"); - exit(-1); + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long perf_value; /* Dummy value to be watched by perf */ + unsigned long ptrace_value; /* Dummy value to be watched by ptrace */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + /* + * If we are the child, run a subroutine that reads the perf value, + * then reads the ptrace value with consecutive load instructions + */ + if (!pid) { + perf_then_ptrace_child(&perf_value, &ptrace_value); + exit(0); } -} -static int ptrace_perf_hwbreak(void) -{ - int ret; - pid_t child_pid; - struct ppc_debug_info dbginfo; + err = check_watchpoints(pid); + if (err) + return err; - child_pid = fork(); - if (!child_pid) - return child(); + /* Place a perf watchpoint counter */ + perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); - /* parent */ - wait(NULL); /* <-- child (SIGUSR1) */ + /* Place a ptrace watchpoint */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, + &ptrace_value, sizeof(ptrace_value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - get_dbginfo(child_pid, &dbginfo); - SKIP_IF(dbginfo.num_data_bps <= 1); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); - ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - SKIP_IF(ret < 0); - close(ret); + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load"); - ret = test(child_pid); + /* perf should have recorded the first load */ + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - return ret; + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); + + return 0; } int main(int argc, char *argv[]) { - return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); + int err = 0; + + err |= test_harness(same_watch_addr_test, "same_watch_addr"); + err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace"); + + return err; } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bc454f899124..d89474377f11 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -192,7 +192,7 @@ static int parent(struct shared_info *info, pid_t pid) * to the child. */ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); - PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported"); PARENT_FAIL_IF(ret, &info->child_sync); info->amr1 = info->amr2 = regs[0]; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c index 4436ca9d3caf..14726c77a6ce 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c @@ -79,7 +79,7 @@ int ptrace_tar(void) int ret, status; // TAR was added in v2.07 - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_2_07), "TAR requires ISA 2.07 compatible hardware"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 5dc152b162df..7c70d62587c2 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -112,8 +112,8 @@ int ptrace_tm_gpr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 458cc1a70ccf..6c17ed099969 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -118,8 +118,8 @@ int ptrace_tm_spd_gpr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index e112a34fbe59..afd8dc2e2097 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -128,8 +128,8 @@ int ptrace_tm_spd_tar(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid == 0) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 40133d49fe39..14d2fac8f237 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -128,8 +128,8 @@ int ptrace_tm_spd_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); for (i = 0; i < 128; i++) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 880ba6a29a48..e64cdb04cecf 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -113,8 +113,8 @@ int ptrace_tm_spr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT); shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT); pid = fork(); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index d0db6df0f0ea..3963d4b0429f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -116,8 +116,8 @@ int ptrace_tm_tar(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); if (pid == 0) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 4f05ce4fd282..8c925d734a72 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -112,8 +112,8 @@ int ptrace_tm_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); for (i = 0; i < 128; i++) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c index cb9875f764ca..11bc624574fe 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c @@ -61,7 +61,7 @@ int ptrace_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_VSX), "Don't have VSX"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/linux/export.h index 2d14a9b4248c..2d14a9b4248c 100644 --- a/tools/testing/selftests/powerpc/stringloops/asm/export.h +++ b/tools/testing/selftests/powerpc/stringloops/linux/export.h diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 252fb4a95e90..e5f2d8735c64 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -618,3 +618,27 @@ out: fclose(f); return rc; } + +struct sigaction push_signal_handler(int sig, void (*fn)(int, siginfo_t *, void *)) +{ + struct sigaction sa; + struct sigaction old_handler; + + sa.sa_sigaction = fn; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_SIGINFO; + FAIL_IF_EXIT_MSG(sigaction(sig, &sa, &old_handler), + "failed to push signal handler"); + + return old_handler; +} + +struct sigaction pop_signal_handler(int sig, struct sigaction old_handler) +{ + struct sigaction popped; + + FAIL_IF_EXIT_MSG(sigaction(sig, &old_handler, &popped), + "failed to pop signal handler"); + + return popped; +} diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h deleted file mode 120000 index 942b1d00999c..000000000000 --- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h +++ /dev/null @@ -1 +0,0 @@ -../../../../../../arch/powerpc/include/asm/lppaca.h
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/vphn/asm/vphn.h b/tools/testing/selftests/powerpc/vphn/asm/vphn.h new file mode 120000 index 000000000000..3a0b2a00171c --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/asm/vphn.h @@ -0,0 +1 @@ +../../../../../../arch/powerpc/include/asm/vphn.h
\ No newline at end of file diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore index 7a657b25f686..05d5e31661df 100644 --- a/tools/testing/selftests/prctl/.gitignore +++ b/tools/testing/selftests/prctl/.gitignore @@ -3,3 +3,4 @@ disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test disable-tsc-test set-anon-vma-name-test +set-process-name diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile index c058b81eeb41..01dc90fbb509 100644 --- a/tools/testing/selftests/prctl/Makefile +++ b/tools/testing/selftests/prctl/Makefile @@ -5,12 +5,10 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/) ifeq ($(ARCH),x86) TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \ - disable-tsc-test set-anon-vma-name-test + disable-tsc-test set-anon-vma-name-test set-process-name all: $(TEST_PROGS) include ../lib.mk -clean: - rm -fr $(TEST_PROGS) endif endif diff --git a/tools/testing/selftests/prctl/set-anon-vma-name-test.c b/tools/testing/selftests/prctl/set-anon-vma-name-test.c index 26d853c5a0c1..4275cb256dce 100644 --- a/tools/testing/selftests/prctl/set-anon-vma-name-test.c +++ b/tools/testing/selftests/prctl/set-anon-vma-name-test.c @@ -97,7 +97,7 @@ TEST_F(vma, renaming) { TH_LOG("Try to pass invalid name (with non-printable character \\1) to rename the VMA"); EXPECT_EQ(rename_vma((unsigned long)self->ptr_anon, AREA_SIZE, BAD_NAME), -EINVAL); - TH_LOG("Try to rename non-anonynous VMA"); + TH_LOG("Try to rename non-anonymous VMA"); EXPECT_EQ(rename_vma((unsigned long) self->ptr_not_anon, AREA_SIZE, GOOD_NAME), -EINVAL); } diff --git a/tools/testing/selftests/prctl/set-process-name.c b/tools/testing/selftests/prctl/set-process-name.c new file mode 100644 index 000000000000..3bc5e0e09eb9 --- /dev/null +++ b/tools/testing/selftests/prctl/set-process-name.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This test covers the PR_SET_NAME functionality of prctl calls + */ + +#include <errno.h> +#include <sys/prctl.h> +#include <string.h> + +#include "../kselftest_harness.h" + +#define CHANGE_NAME "changename" +#define EMPTY_NAME "" +#define TASK_COMM_LEN 16 + +int set_name(char *name) +{ + int res; + + res = prctl(PR_SET_NAME, name, NULL, NULL, NULL); + + if (res < 0) + return -errno; + return res; +} + +int check_is_name_correct(char *check_name) +{ + char name[TASK_COMM_LEN]; + int res; + + res = prctl(PR_GET_NAME, name, NULL, NULL, NULL); + + if (res < 0) + return -errno; + + return !strcmp(name, check_name); +} + +int check_null_pointer(char *check_name) +{ + char *name = NULL; + int res; + + res = prctl(PR_GET_NAME, name, NULL, NULL, NULL); + + return res; +} + +TEST(rename_process) { + + EXPECT_GE(set_name(CHANGE_NAME), 0); + EXPECT_TRUE(check_is_name_correct(CHANGE_NAME)); + + EXPECT_GE(set_name(EMPTY_NAME), 0); + EXPECT_TRUE(check_is_name_correct(EMPTY_NAME)); + + EXPECT_GE(set_name(CHANGE_NAME), 0); + EXPECT_LT(check_null_pointer(CHANGE_NAME), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c index 7588428b8fcd..ee71ce52cb6a 100644 --- a/tools/testing/selftests/proc/proc-empty-vm.c +++ b/tools/testing/selftests/proc/proc-empty-vm.c @@ -1,3 +1,4 @@ +#if defined __amd64__ || defined __i386__ /* * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com> * @@ -37,6 +38,10 @@ #include <sys/wait.h> #include <unistd.h> +#ifdef __amd64__ +#define TEST_VSYSCALL +#endif + /* * 0: vsyscall VMA doesn't exist vsyscall=none * 1: vsyscall VMA is --xp vsyscall=xonly @@ -77,7 +82,7 @@ static const char proc_pid_smaps_vsyscall_1[] = "Swap: 0 kB\n" "SwapPss: 0 kB\n" "Locked: 0 kB\n" -"THPeligible: 0\n" +"THPeligible: 0\n" /* * "ProtectionKey:" field is conditional. It is possible to check it as well, * but I don't have such machine. @@ -107,7 +112,7 @@ static const char proc_pid_smaps_vsyscall_2[] = "Swap: 0 kB\n" "SwapPss: 0 kB\n" "Locked: 0 kB\n" -"THPeligible: 0\n" +"THPeligible: 0\n" /* * "ProtectionKey:" field is conditional. It is possible to check it as well, * but I'm too tired. @@ -119,6 +124,7 @@ static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___) _exit(EXIT_FAILURE); } +#ifdef TEST_VSYSCALL static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___) { _exit(g_vsyscall); @@ -170,6 +176,7 @@ static void vsyscall(void) exit(1); } } +#endif static int test_proc_pid_maps(pid_t pid) { @@ -260,6 +267,7 @@ static const char g_smaps_rollup[] = "Private_Dirty: 0 kB\n" "Referenced: 0 kB\n" "Anonymous: 0 kB\n" +"KSM: 0 kB\n" "LazyFree: 0 kB\n" "AnonHugePages: 0 kB\n" "ShmemPmdMapped: 0 kB\n" @@ -299,7 +307,9 @@ int main(void) { int rv = EXIT_SUCCESS; +#ifdef TEST_VSYSCALL vsyscall(); +#endif switch (g_vsyscall) { case 0: @@ -346,6 +356,14 @@ int main(void) #ifdef __amd64__ munmap(NULL, ((size_t)1 << 47) - 4096); +#elif defined __i386__ + { + size_t len; + + for (len = -4096;; len -= 4096) { + munmap(NULL, len); + } + } #else #error "implement 'unmap everything'" #endif @@ -386,3 +404,9 @@ int main(void) return rv; } +#else +int main(void) +{ + return 4; +} +#endif diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index cfa9562f3cd8..c9f6cca4feb4 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -110,7 +110,7 @@ static long ppb_to_scaled_ppm(int ppb) static int64_t pctns(struct ptp_clock_time *t) { - return t->sec * 1000000000LL + t->nsec; + return t->sec * NSEC_PER_SEC + t->nsec; } static void usage(char *progname) @@ -134,6 +134,7 @@ static void usage(char *progname) " 1 - external time stamp\n" " 2 - periodic output\n" " -n val shift the ptp clock time by 'val' nanoseconds\n" + " -o val phase offset (in nanoseconds) to be provided to the PHC servo\n" " -p val enable output with a period of 'val' nanoseconds\n" " -H val set output phase to 'val' nanoseconds (requires -p)\n" " -w val set output pulse width to 'val' nanoseconds (requires -p)\n" @@ -142,8 +143,10 @@ static void usage(char *progname) " -S set the system time from the ptp clock time\n" " -t val shift the ptp clock time by 'val' seconds\n" " -T val set the ptp clock time to 'val' seconds\n" + " -x val get an extended ptp clock time with the desired number of samples (up to %d)\n" + " -X get a ptp clock cross timestamp\n" " -z test combinations of rising/falling external time stamp flags\n", - progname); + progname, PTP_MAX_SAMPLES); } int main(int argc, char *argv[]) @@ -157,6 +160,8 @@ int main(int argc, char *argv[]) struct timex tx; struct ptp_clock_time *pct; struct ptp_sys_offset *sysoff; + struct ptp_sys_offset_extended *soe; + struct ptp_sys_offset_precise *xts; char *progname; unsigned int i; @@ -167,6 +172,7 @@ int main(int argc, char *argv[]) int adjfreq = 0x7fffffff; int adjtime = 0; int adjns = 0; + int adjphase = 0; int capabilities = 0; int extts = 0; int flagtest = 0; @@ -174,6 +180,8 @@ int main(int argc, char *argv[]) int index = 0; int list_pins = 0; int pct_offset = 0; + int getextended = 0; + int getcross = 0; int n_samples = 0; int pin_index = -1, pin_func; int pps = -1; @@ -188,7 +196,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:p:P:sSt:T:w:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xz"))) { switch (c) { case 'c': capabilities = 1; @@ -228,6 +236,9 @@ int main(int argc, char *argv[]) case 'n': adjns = atoi(optarg); break; + case 'o': + adjphase = atoi(optarg); + break; case 'p': perout = atoll(optarg); break; @@ -250,6 +261,18 @@ int main(int argc, char *argv[]) case 'w': pulsewidth = atoi(optarg); break; + case 'x': + getextended = atoi(optarg); + if (getextended < 1 || getextended > PTP_MAX_SAMPLES) { + fprintf(stderr, + "number of extended timestamp samples must be between 1 and %d; was asked for %d\n", + PTP_MAX_SAMPLES, getextended); + return -1; + } + break; + case 'X': + getcross = 1; + break; case 'z': flagtest = 1; break; @@ -287,7 +310,8 @@ int main(int argc, char *argv[]) " %d pulse per second\n" " %d programmable pins\n" " %d cross timestamping\n" - " %d adjust_phase\n", + " %d adjust_phase\n" + " %d maximum phase adjustment (ns)\n", caps.max_adj, caps.n_alarm, caps.n_ext_ts, @@ -295,7 +319,8 @@ int main(int argc, char *argv[]) caps.pps, caps.n_pins, caps.cross_timestamping, - caps.adjust_phase); + caps.adjust_phase, + caps.max_phase_adj); } } @@ -317,7 +342,7 @@ int main(int argc, char *argv[]) tx.time.tv_usec = adjns; while (tx.time.tv_usec < 0) { tx.time.tv_sec -= 1; - tx.time.tv_usec += 1000000000; + tx.time.tv_usec += NSEC_PER_SEC; } if (clock_adjtime(clkid, &tx) < 0) { @@ -327,6 +352,18 @@ int main(int argc, char *argv[]) } } + if (adjphase) { + memset(&tx, 0, sizeof(tx)); + tx.modes = ADJ_OFFSET | ADJ_NANO; + tx.offset = adjphase; + + if (clock_adjtime(clkid, &tx) < 0) { + perror("clock_adjtime"); + } else { + puts("phase adjustment okay"); + } + } + if (gettime) { if (clock_gettime(clkid, &ts)) { perror("clock_gettime"); @@ -516,6 +553,57 @@ int main(int argc, char *argv[]) free(sysoff); } + if (getextended) { + soe = calloc(1, sizeof(*soe)); + if (!soe) { + perror("calloc"); + return -1; + } + + soe->n_samples = getextended; + + if (ioctl(fd, PTP_SYS_OFFSET_EXTENDED, soe)) { + perror("PTP_SYS_OFFSET_EXTENDED"); + } else { + printf("extended timestamp request returned %d samples\n", + getextended); + + for (i = 0; i < getextended; i++) { + printf("sample #%2d: system time before: %lld.%09u\n", + i, soe->ts[i][0].sec, soe->ts[i][0].nsec); + printf(" phc time: %lld.%09u\n", + soe->ts[i][1].sec, soe->ts[i][1].nsec); + printf(" system time after: %lld.%09u\n", + soe->ts[i][2].sec, soe->ts[i][2].nsec); + } + } + + free(soe); + } + + if (getcross) { + xts = calloc(1, sizeof(*xts)); + if (!xts) { + perror("calloc"); + return -1; + } + + if (ioctl(fd, PTP_SYS_OFFSET_PRECISE, xts)) { + perror("PTP_SYS_OFFSET_PRECISE"); + } else { + puts("system and phc crosstimestamping request okay"); + + printf("device time: %lld.%09u\n", + xts->device.sec, xts->device.nsec); + printf("system time: %lld.%09u\n", + xts->sys_realtime.sec, xts->sys_realtime.nsec); + printf("monoraw time: %lld.%09u\n", + xts->sys_monoraw.sec, xts->sys_monoraw.nsec); + } + + free(xts); + } + close(fd); return 0; } diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh index b92dfeb7fbbf..99162d18bad3 100755 --- a/tools/testing/selftests/rcutorture/bin/configcheck.sh +++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh @@ -3,6 +3,8 @@ # # Usage: configcheck.sh .config .config-template # +# Non-empty output if errors detected. +# # Copyright (C) IBM Corporation, 2011 # # Authors: Paul E. McKenney <paulmck@linux.ibm.com> @@ -10,32 +12,35 @@ T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`" trap 'rm -rf $T' 0 -sed -e 's/"//g' < $1 > $T/.config +# function test_kconfig_enabled ( Kconfig-var=val ) +function test_kconfig_enabled () { + if ! grep -q "^$1$" $T/.config + then + echo :$1: improperly set + return 1 + fi + return 0 +} -sed -e 's/"//g' -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' < $2 | -awk ' -{ - print "if grep -q \"" $0 "\" < '"$T/.config"'"; - print "then"; - print "\t:"; - print "else"; - if ($1 == "#") { - print "\tif grep -q \"" $2 "\" < '"$T/.config"'"; - print "\tthen"; - print "\t\tif test \"$firsttime\" = \"\"" - print "\t\tthen" - print "\t\t\tfirsttime=1" - print "\t\tfi" - print "\t\techo \":" $2 ": improperly set\""; - print "\telse"; - print "\t\t:"; - print "\tfi"; - } else { - print "\tif test \"$firsttime\" = \"\"" - print "\tthen" - print "\t\tfirsttime=1" - print "\tfi" - print "\techo \":" $0 ": improperly set\""; - } - print "fi"; - }' | sh +# function test_kconfig_disabled ( Kconfig-var ) +function test_kconfig_disabled () { + if grep -q "^$1=n$" $T/.config + then + return 0 + fi + if grep -q "^$1=" $T/.config + then + echo :$1=n: improperly set + return 1 + fi + return 0 +} + +sed -e 's/"//g' < $1 > $T/.config +sed -e 's/^#CHECK#//' < $2 > $T/ConfigFragment +grep '^CONFIG_.*=n$' $T/ConfigFragment | + sed -e 's/^/test_kconfig_disabled /' -e 's/=n$//' > $T/kconfig-n.sh +. $T/kconfig-n.sh +grep -v '^CONFIG_.*=n$' $T/ConfigFragment | grep '^CONFIG_' | + sed -e 's/^/test_kconfig_enabled /' > $T/kconfig-not-n.sh +. $T/kconfig-not-n.sh diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh index b52d5069563c..b8e2ea23cb3f 100644 --- a/tools/testing/selftests/rcutorture/bin/functions.sh +++ b/tools/testing/selftests/rcutorture/bin/functions.sh @@ -45,7 +45,7 @@ checkarg () { configfrag_boot_params () { if test -r "$2.boot" then - echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '` + echo `grep -v '^#' "$2.boot" | tr '\012' ' '` $1 else echo $1 fi @@ -250,7 +250,7 @@ identify_qemu_args () { echo -machine virt,gic-version=host -cpu host ;; qemu-system-ppc64) - echo -enable-kvm -M pseries -nodefaults + echo -M pseries -nodefaults echo -device spapr-vscsi if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC" then diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh index b582113178ac..f683e424ddd5 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh @@ -40,6 +40,10 @@ awk ' sum += $5 / 1000.; } +/rcu_scale: Grace-period kthread CPU time/ { + cputime = $6; +} + END { newNR = asort(gptimes); if (newNR <= 0) { @@ -78,6 +82,8 @@ END { print "90th percentile grace-period duration: " gptimes[pct90]; print "99th percentile grace-period duration: " gptimes[pct99]; print "Maximum grace-period duration: " gptimes[newNR]; - print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches; + if (cputime != "") + cpustr = " CPU: " cputime; + print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches cpustr; print "Computed from rcuscale printk output."; }' diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 1df7e695edf7..5be670dd4009 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -16,6 +16,8 @@ T=/tmp/kvm-recheck.sh.$$ trap 'rm -f $T' 0 2 +configerrors=0 + PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH . functions.sh for rd in "$@" @@ -32,7 +34,7 @@ do fi TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE configfile=`echo $i | sed -e 's,^.*/,,'` - rm -f $i/console.log.*.diags + rm -f $i/console.log.*.diags $i/ConfigFragment.diags case "${TORTURE_SUITE}" in X*) ;; @@ -49,8 +51,21 @@ do then echo QEMU killed fi - configcheck.sh $i/.config $i/ConfigFragment > $T 2>&1 - cat $T + configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1 + if grep -q '^CONFIG_KCSAN=y$' $i/ConfigFragment.input + then + # KCSAN forces a number of Kconfig options, so remove + # complaints about those Kconfig options in KCSAN runs. + mv $i/ConfigFragment.diags $i/ConfigFragment.diags.kcsan + grep -v -E 'CONFIG_PROVE_RCU|CONFIG_PREEMPT_COUNT' $i/ConfigFragment.diags.kcsan > $i/ConfigFragment.diags + fi + if test -s $i/ConfigFragment.diags + then + cat $i/ConfigFragment.diags + configerrors=$((configerrors+1)) + else + rm $i/ConfigFragment.diags + fi if test -r $i/Make.oldconfig.err then cat $i/Make.oldconfig.err @@ -65,7 +80,14 @@ do if test -f "$i/buildonly" then echo Build-only run, no boot/test - configcheck.sh $i/.config $i/ConfigFragment + configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1 + if test -s $i/ConfigFragment.diags + then + cat $i/ConfigFragment.diags + configerrors=$((configerrors+1)) + else + rm $i/ConfigFragment.diags + fi parse-build.sh $i/Make.out $configfile elif test -f "$i/qemu-cmd" then @@ -79,10 +101,10 @@ do done if test -f "$rd/kcsan.sum" then - if ! test -f $T + if ! test -f $i/ConfigFragment.diags then : - elif grep -q CONFIG_KCSAN=y $T + elif grep -q CONFIG_KCSAN=y $i/ConfigFragment.diags then echo "Compiler or architecture does not support KCSAN!" echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'? @@ -94,17 +116,23 @@ do fi fi done + +if test "$configerrors" -gt 0 +then + echo $configerrors runs with .config errors. + ret=1 +fi EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1 builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`" if test "$builderrors" -gt 0 then echo $builderrors runs with build errors. - ret=1 + ret=2 fi runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`" if test "$runerrors" -gt 0 then echo $runerrors runs with runtime errors. - ret=2 + ret=3 fi exit $ret diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh index a2328163eba1..134cdef5a6e0 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh @@ -137,14 +137,20 @@ chmod +x $T/bin/kvm-remote-*.sh # Check first to avoid the need for cleanup for system-name typos for i in $systems do - ncpus="`ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN 2> /dev/null`" + ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr ret=$? if test "$ret" -ne 0 then - echo System $i unreachable, giving up. | tee -a "$oldrun/remote-log" + echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log" + echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log" + cat $T/ssh.stdout | tee -a "$oldrun/remote-log" + echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log" + echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log" + cat $T/ssh.stderr | tee -a "$oldrun/remote-log" + echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log" exit 4 fi - echo $i: $ncpus CPUs " " `date` | tee -a "$oldrun/remote-log" + echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log" done # Download and expand the tarball on all systems. diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index d2a3710a5f2a..b33cd8753689 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -9,9 +9,10 @@ # # Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in # -# qemu-args defaults to "-enable-kvm -nographic", along with arguments -# specifying the number of CPUs and other options -# generated from the underlying CPU architecture. +# qemu-args defaults to "-enable-kvm -display none -no-reboot", along +# with arguments specifying the number of CPUs +# and other options generated from the underlying +# CPU architecture. # boot_args_in defaults to value returned by the per_version_boot_params # shell function. # @@ -57,7 +58,6 @@ config_override_param () { cat $T/Kconfig_args >> $resdir/ConfigFragment.input config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp mv $T/$2.tmp $T/$2 - # Note that "#CHECK#" is not permitted on commandline. fi } @@ -140,7 +140,7 @@ then fi # Generate -smp qemu argument. -qemu_args="-enable-kvm -nographic $qemu_args" +qemu_args="-enable-kvm -display none -no-reboot $qemu_args" cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment` cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"` if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS" @@ -163,7 +163,7 @@ boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`" boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" if test -n "$TORTURE_BOOT_GDB_ARG" then - boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" + boot_args="$TORTURE_BOOT_GDB_ARG $boot_args" fi # Give bare-metal advice diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 62f3b0f56e4d..b0f36a638a69 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -186,7 +186,7 @@ do fi ;; --kconfig|--kconfigs) - checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$' + checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; @@ -655,4 +655,4 @@ fi # Control buffer size: --bootargs trace_buf_size=3k # Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops # Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu -# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn +# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn=1 diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh index 71f0dfbb2a6d..212c52ca90b5 100755 --- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -10,7 +10,6 @@ D=tools/testing/selftests/rcutorture # Prerequisite checks -[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1 if [ ! -d "$D" ]; then echo >&2 "$D does not exist: Malformed kernel source tree?" exit 1 @@ -34,12 +33,16 @@ cat > init.c << '___EOF___' volatile unsigned long delaycount; -int main(int argc, int argv[]) +int main(int argc, char *argv[]) { int i; struct timeval tv; struct timeval tvb; + printf("Torture-test rudimentary init program started, command line:\n"); + for (i = 0; i < argc; i++) + printf(" %s", argv[i]); + printf("\n"); for (;;) { sleep(1); /* Need some userspace time. */ @@ -64,15 +67,23 @@ ___EOF___ # build using nolibc on supported archs (smaller executable) and fall # back to regular glibc on other ones. if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ - "||__ARM_EABI__||__aarch64__||__s390x__\nyes\n#endif" \ + "||__ARM_EABI__||__aarch64__||__s390x__||__loongarch__\nyes\n#endif" \ | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ | grep -q '^yes'; then # architecture supported by nolibc ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \ -nostdlib -include ../../../../include/nolibc/nolibc.h \ -s -static -Os -o init init.c -lgcc + ret=$? else ${CROSS_COMPILE}gcc -s -static -Os -o init init.c + ret=$? +fi + +if [ "$ret" -ne 0 ] +then + echo "Failed to create a statically linked C-language initrd" + exit "$ret" fi rm init.c diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 5a2ae2264403..12b50a4a881a 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -55,6 +55,8 @@ do_kasan=yes do_kcsan=no do_clocksourcewd=yes do_rt=yes +do_rcutasksflavors=yes +do_srcu_lockdep=yes # doyesno - Helper function for yes/no arguments function doyesno () { @@ -73,18 +75,20 @@ usage () { echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\"" echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\"" echo " --do-all" - echo " --do-allmodconfig / --do-no-allmodconfig" - echo " --do-clocksourcewd / --do-no-clocksourcewd" - echo " --do-kasan / --do-no-kasan" - echo " --do-kcsan / --do-no-kcsan" - echo " --do-kvfree / --do-no-kvfree" - echo " --do-locktorture / --do-no-locktorture" + echo " --do-allmodconfig / --do-no-allmodconfig / --no-allmodconfig" + echo " --do-clocksourcewd / --do-no-clocksourcewd / --no-clocksourcewd" + echo " --do-kasan / --do-no-kasan / --no-kasan" + echo " --do-kcsan / --do-no-kcsan / --no-kcsan" + echo " --do-kvfree / --do-no-kvfree / --no-kvfree" + echo " --do-locktorture / --do-no-locktorture / --no-locktorture" echo " --do-none" - echo " --do-rcuscale / --do-no-rcuscale" - echo " --do-rcutorture / --do-no-rcutorture" - echo " --do-refscale / --do-no-refscale" - echo " --do-rt / --do-no-rt" - echo " --do-scftorture / --do-no-scftorture" + echo " --do-rcuscale / --do-no-rcuscale / --no-rcuscale" + echo " --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors" + echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" + echo " --do-refscale / --do-no-refscale / --no-refscale" + echo " --do-rt / --do-no-rt / --no-rt" + echo " --do-scftorture / --do-no-scftorture / --no-scftorture" + echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep" echo " --duration [ <minutes> | <hours>h | <days>d ]" echo " --kcsan-kmake-arg kernel-make-arguments" exit 1 @@ -115,6 +119,7 @@ do ;; --do-all|--doall) do_allmodconfig=yes + do_rcutasksflavor=yes do_rcutorture=yes do_locktorture=yes do_scftorture=yes @@ -125,27 +130,29 @@ do do_kasan=yes do_kcsan=yes do_clocksourcewd=yes + do_srcu_lockdep=yes ;; - --do-allmodconfig|--do-no-allmodconfig) + --do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig) do_allmodconfig=`doyesno "$1" --do-allmodconfig` ;; - --do-clocksourcewd|--do-no-clocksourcewd) + --do-clocksourcewd|--do-no-clocksourcewd|--no-clocksourcewd) do_clocksourcewd=`doyesno "$1" --do-clocksourcewd` ;; - --do-kasan|--do-no-kasan) + --do-kasan|--do-no-kasan|--no-kasan) do_kasan=`doyesno "$1" --do-kasan` ;; - --do-kcsan|--do-no-kcsan) + --do-kcsan|--do-no-kcsan|--no-kcsan) do_kcsan=`doyesno "$1" --do-kcsan` ;; - --do-kvfree|--do-no-kvfree) + --do-kvfree|--do-no-kvfree|--no-kvfree) do_kvfree=`doyesno "$1" --do-kvfree` ;; - --do-locktorture|--do-no-locktorture) + --do-locktorture|--do-no-locktorture|--no-locktorture) do_locktorture=`doyesno "$1" --do-locktorture` ;; --do-none|--donone) do_allmodconfig=no + do_rcutasksflavors=no do_rcutorture=no do_locktorture=no do_scftorture=no @@ -156,22 +163,29 @@ do do_kasan=no do_kcsan=no do_clocksourcewd=no + do_srcu_lockdep=no ;; - --do-rcuscale|--do-no-rcuscale) + --do-rcuscale|--do-no-rcuscale|--no-rcuscale) do_rcuscale=`doyesno "$1" --do-rcuscale` ;; - --do-rcutorture|--do-no-rcutorture) + --do-rcutasksflavors|--do-no-rcutasksflavors|--no-rcutasksflavors) + do_rcutasksflavors=`doyesno "$1" --do-rcutasksflavors` + ;; + --do-rcutorture|--do-no-rcutorture|--no-rcutorture) do_rcutorture=`doyesno "$1" --do-rcutorture` ;; - --do-refscale|--do-no-refscale) + --do-refscale|--do-no-refscale|--no-refscale) do_refscale=`doyesno "$1" --do-refscale` ;; - --do-rt|--do-no-rt) + --do-rt|--do-no-rt|--no-rt) do_rt=`doyesno "$1" --do-rt` ;; - --do-scftorture|--do-no-scftorture) + --do-scftorture|--do-no-scftorture|--no-scftorture) do_scftorture=`doyesno "$1" --do-scftorture` ;; + --do-srcu-lockdep|--do-no-srcu-lockdep|--no-srcu-lockdep) + do_srcu_lockdep=`doyesno "$1" --do-srcu-lockdep` + ;; --duration) checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(m\|h\|d\|\)$' '^error' mult=1 @@ -361,6 +375,40 @@ then fi fi +# Test building RCU Tasks flavors in isolation, both SMP and !SMP +if test "$do_rcutasksflavors" = "yes" +then + echo " --- rcutasksflavors:" Start `date` | tee -a $T/log + rtfdir="tools/testing/selftests/rcutorture/res/$ds/results-rcutasksflavors" + mkdir -p "$rtfdir" + cat > $T/rcutasksflavors << __EOF__ +#CHECK#CONFIG_TASKS_RCU=n +#CHECK#CONFIG_TASKS_RUDE_RCU=n +#CHECK#CONFIG_TASKS_TRACE_RCU=n +__EOF__ + for flavor in CONFIG_TASKS_RCU CONFIG_TASKS_RUDE_RCU CONFIG_TASKS_TRACE_RCU + do + forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`" + deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`" + echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log + tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1 + retcode=$? + if test "$retcode" -ne 0 + then + break + fi + done + if test "$retcode" -eq 0 + then + echo "rcutasksflavors($retcode)" $rtfdir >> $T/successes + echo Success >> $rtfdir/log + else + echo "rcutasksflavors($retcode)" $rtfdir >> $T/failures + echo " --- rcutasksflavors Test summary:" >> $rtfdir/log + echo " --- Summary: Exit code $retcode from $flavor, see Make.out" >> $rtfdir/log + fi +fi + # --torture rcu if test "$do_rcutorture" = "yes" then @@ -376,8 +424,10 @@ fi if test "$do_scftorture" = "yes" then + # Scale memory based on the number of CPUs. + scfmem=$((2+HALF_ALLOTED_CPUS/16)) torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1" - torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make + torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make fi if test "$do_rt" = "yes" @@ -391,6 +441,23 @@ then torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make fi +if test "$do_srcu_lockdep" = "yes" +then + echo " --- do-srcu-lockdep:" Start `date` | tee -a $T/log + tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh --datestamp "$ds/results-srcu-lockdep" > $T/srcu_lockdep.sh.out 2>&1 + retcode=$? + cp $T/srcu_lockdep.sh.out "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log" + if test "$retcode" -eq 0 + then + echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/successes + echo Success >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log" + else + echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/failures + echo " --- srcu_lockdep Test Summary:" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log" + echo " --- Summary: Exit code $retcode from srcu_lockdep.sh, see ds/results-srcu-lockdep" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log" + fi +fi + if test "$do_refscale" = yes then primlist="`grep '\.name[ ]*=' kernel/rcu/refscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`" @@ -541,11 +608,23 @@ then fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log +tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" +find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors +find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +if test -s "$T/configerrors" +then + echo " Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`" + nonkcsanbug="yes" +fi +if test -s "$T/builderrors" +then + echo " Scenarios with build errors: `wc -l "$T/builderrors" | awk '{ print $1 }'`" + nonkcsanbug="yes" +fi if test -z "$nonkcsanbug" && test -s "$T/failuresum" then echo " All bugs were KCSAN failures." fi -tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" if test -n "$tdir" && test $compress_concurrency -gt 0 then # KASAN vmlinux files can approach 1GB in size, so compress them. diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh index d3e4b2971f92..e7bb32709d78 100644 --- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh @@ -22,8 +22,9 @@ locktorture_param_onoff () { # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 `locktorture_param_onoff "$1" "$2"` \ + echo `locktorture_param_onoff "$1" "$2"` \ locktorture.stat_interval=15 \ locktorture.shutdown_secs=$3 \ - locktorture.verbose=1 + locktorture.verbose=1 \ + $1 } diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot index f57720c52c0f..84f6bb98ce99 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot @@ -5,4 +5,4 @@ rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs -tree.use_softirq=0 +rcutree.use_softirq=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index dea26c568678..2ef2fb69c360 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -6,6 +6,5 @@ CONFIG_PREEMPT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y -#CHECK#CONFIG_RCU_EXPERT=n CONFIG_TASKS_RCU=y CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 04831ef1f9b5..8ae41d5f81a3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -15,4 +15,3 @@ CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y -CONFIG_BOOTPARAM_HOTPLUG_CPU0=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot index 64f864f1f361..8e50bfd4b710 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -4,4 +4,4 @@ rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 rcutree.kthread_prio=2 threadirqs -tree.use_softirq=0 +rcutree.use_softirq=0 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh index e2bc99c785e7..c044df386876 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh @@ -46,10 +46,11 @@ rcutorture_param_stat_interval () { # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 `rcutorture_param_onoff "$1" "$2"` \ + echo `rcutorture_param_onoff "$1" "$2"` \ `rcutorture_param_n_barrier_cbs "$1"` \ `rcutorture_param_stat_interval "$1"` \ rcutorture.shutdown_secs=$3 \ rcutorture.test_no_idle_hz=1 \ - rcutorture.verbose=1 + rcutorture.verbose=1 \ + $1 } diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon index 6a00157bee5b..b1ffd7c67604 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon @@ -2,5 +2,7 @@ CONFIG_RCU_SCALE_TEST=y CONFIG_PRINTK_TIME=y CONFIG_FORCE_TASKS_RCU=y #CHECK#CONFIG_TASKS_RCU=y +CONFIG_FORCE_TASKS_RUDE_RCU=y +#CHECK#CONFIG_TASKS_RUDE_RCU=y CONFIG_FORCE_TASKS_TRACE_RCU=y #CHECK#CONFIG_TASKS_TRACE_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 index 227aba7783af..0059592c7408 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 @@ -2,6 +2,8 @@ CONFIG_SMP=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n +#CHECK#CONFIG_TREE_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index ffbe15109f0d..28070b43f017 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -11,6 +11,7 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 rcuscale.shutdown=1 \ - rcuscale.verbose=0 + echo rcuscale.shutdown=1 \ + rcuscale.verbose=0 \ + $1 } diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT index ef2b501a6971..67f9d2998afd 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT @@ -2,6 +2,7 @@ CONFIG_SMP=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n #CHECK#CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh index f81fa2c541a6..748465627601 100644 --- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh @@ -11,6 +11,7 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 refscale.shutdown=1 \ - refscale.verbose=0 + echo refscale.shutdown=1 \ + refscale.verbose=0 \ + $1 } diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT index 3a59346b3de7..6133f54ce2a7 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT @@ -2,6 +2,8 @@ CONFIG_SMP=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_PREEMPT_DYNAMIC=n +#CHECK#CONFIG_PREEMPT_RCU=n CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh index 2d949e58f5a5..7637f68ef0ce 100644 --- a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh @@ -22,8 +22,9 @@ scftorture_param_onoff () { # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 `scftorture_param_onoff "$1" "$2"` \ + echo `scftorture_param_onoff "$1" "$2"` \ scftorture.stat_interval=15 \ scftorture.shutdown_secs=$3 \ - scftorture.verbose=1 + scftorture.verbose=1 \ + $1 } diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile deleted file mode 100644 index 4bed0b678f8b..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -all: srcu.c store_buffering - -LINUX_SOURCE = ../../../../../.. - -modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \ - $(LINUX_SOURCE)/kernel/rcu/srcu.c - -modified_srcu_output = include/linux/srcu.h srcu.c - -include/linux/srcu.h: srcu.c - -srcu.c: modify_srcu.awk Makefile $(modified_srcu_input) - awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output) - -store_buffering: - @cd tests/store_buffering; make diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h deleted file mode 100644 index f2860dd1b407..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h +++ /dev/null @@ -1 +0,0 @@ -#include <LINUX_SOURCE/linux/kconfig.h> diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h deleted file mode 100644 index 8bc960e5e713..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h +++ /dev/null @@ -1,152 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This header has been modifies to remove definitions of types that - * are defined in standard userspace headers or are problematic for some - * other reason. - */ - -#ifndef _LINUX_TYPES_H -#define _LINUX_TYPES_H - -#define __EXPORTED_HEADERS__ -#include <uapi/linux/types.h> - -#ifndef __ASSEMBLY__ - -#define DECLARE_BITMAP(name, bits) \ - unsigned long name[BITS_TO_LONGS(bits)] - -typedef __u32 __kernel_dev_t; - -/* bsd */ -typedef unsigned char u_char; -typedef unsigned short u_short; -typedef unsigned int u_int; -typedef unsigned long u_long; - -/* sysv */ -typedef unsigned char unchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -#ifndef __BIT_TYPES_DEFINED__ -#define __BIT_TYPES_DEFINED__ - -typedef __u8 u_int8_t; -typedef __s8 int8_t; -typedef __u16 u_int16_t; -typedef __s16 int16_t; -typedef __u32 u_int32_t; -typedef __s32 int32_t; - -#endif /* !(__BIT_TYPES_DEFINED__) */ - -typedef __u8 uint8_t; -typedef __u16 uint16_t; -typedef __u32 uint32_t; - -/* this is a special 64bit data type that is 8-byte aligned */ -#define aligned_u64 __u64 __attribute__((aligned(8))) -#define aligned_be64 __be64 __attribute__((aligned(8))) -#define aligned_le64 __le64 __attribute__((aligned(8))) - -/** - * The type used for indexing onto a disc or disc partition. - * - * Linux always considers sectors to be 512 bytes long independently - * of the devices real block size. - * - * blkcnt_t is the type of the inode's block count. - */ -typedef u64 sector_t; - -/* - * The type of an index into the pagecache. - */ -#define pgoff_t unsigned long - -/* - * A dma_addr_t can hold any valid DMA address, i.e., any address returned - * by the DMA API. - * - * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32 - * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits, - * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses, - * so they don't care about the size of the actual bus addresses. - */ -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT -typedef u64 dma_addr_t; -#else -typedef u32 dma_addr_t; -#endif - -#ifdef CONFIG_PHYS_ADDR_T_64BIT -typedef u64 phys_addr_t; -#else -typedef u32 phys_addr_t; -#endif - -typedef phys_addr_t resource_size_t; - -/* - * This type is the placeholder for a hardware interrupt number. It has to be - * big enough to enclose whatever representation is used by a given platform. - */ -typedef unsigned long irq_hw_number_t; - -typedef struct { - int counter; -} atomic_t; - -#ifdef CONFIG_64BIT -typedef struct { - long counter; -} atomic64_t; -#endif - -struct list_head { - struct list_head *next, *prev; -}; - -struct hlist_head { - struct hlist_node *first; -}; - -struct hlist_node { - struct hlist_node *next, **pprev; -}; - -/** - * struct callback_head - callback structure for use with RCU and task_work - * @next: next update requests in a list - * @func: actual update function to call after the grace period. - * - * The struct is aligned to size of pointer. On most architectures it happens - * naturally due ABI requirements, but some architectures (like CRIS) have - * weird ABI and we need to ask it explicitly. - * - * The alignment is required to guarantee that bits 0 and 1 of @next will be - * clear under normal conditions -- as long as we use call_rcu() or - * call_srcu() to queue callback. - * - * This guarantee is important for few reasons: - * - future call_rcu_lazy() will make use of lower bits in the pointer; - * - the structure shares storage spacer in struct page with @compound_head, - * which encode PageTail() in bit 0. The guarantee is needed to avoid - * false-positive PageTail(). - */ -struct callback_head { - struct callback_head *next; - void (*func)(struct callback_head *head); -} __attribute__((aligned(sizeof(void *)))); -#define rcu_head callback_head - -typedef void (*rcu_callback_t)(struct rcu_head *head); -typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); - -/* clocksource cycle base type */ -typedef u64 cycle_t; - -#endif /* __ASSEMBLY__ */ -#endif /* _LINUX_TYPES_H */ diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk deleted file mode 100755 index e05182d3e47d..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk +++ /dev/null @@ -1,376 +0,0 @@ -#!/usr/bin/awk -f -# SPDX-License-Identifier: GPL-2.0 - -# Modify SRCU for formal verification. The first argument should be srcu.h and -# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the -# current directory. - -BEGIN { - if (ARGC != 5) { - print "Usange: input.h input.c output.h output.c" > "/dev/stderr"; - exit 1; - } - h_output = ARGV[3]; - c_output = ARGV[4]; - ARGC = 3; - - # Tokenize using FS and not RS as FS supports regular expressions. Each - # record is one line of source, except that backslashed lines are - # combined. Comments are treated as field separators, as are quotes. - quote_regexp="\"([^\\\\\"]|\\\\.)*\""; - comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)"; - FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+"; - - inside_srcu_struct = 0; - inside_srcu_init_def = 0; - srcu_init_param_name = ""; - in_macro = 0; - brace_nesting = 0; - paren_nesting = 0; - - # Allow the manipulation of the last field separator after has been - # seen. - last_fs = ""; - # Whether the last field separator was intended to be output. - last_fs_print = 0; - - # rcu_batches stores the initialization for each instance of struct - # rcu_batch - - in_comment = 0; - - outputfile = ""; -} - -{ - prev_outputfile = outputfile; - if (FILENAME ~ /\.h$/) { - outputfile = h_output; - if (FNR != NR) { - print "Incorrect file order" > "/dev/stderr"; - exit 1; - } - } - else - outputfile = c_output; - - if (prev_outputfile && outputfile != prev_outputfile) { - new_outputfile = outputfile; - outputfile = prev_outputfile; - update_fieldsep("", 0); - outputfile = new_outputfile; - } -} - -# Combine the next line into $0. -function combine_line() { - ret = getline next_line; - if (ret == 0) { - # Don't allow two consecutive getlines at the end of the file - if (eof_found) { - print "Error: expected more input." > "/dev/stderr"; - exit 1; - } else { - eof_found = 1; - } - } else if (ret == -1) { - print "Error reading next line of file" FILENAME > "/dev/stderr"; - exit 1; - } - $0 = $0 "\n" next_line; -} - -# Combine backslashed lines and multiline comments. -function combine_backslashes() { - while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) { - combine_line(); - } -} - -function read_line() { - combine_line(); - combine_backslashes(); -} - -# Print out field separators and update variables that depend on them. Only -# print if p is true. Call with sep="" and p=0 to print out the last field -# separator. -function update_fieldsep(sep, p) { - # Count braces - sep_tmp = sep; - gsub(quote_regexp "|" comment_regexp, "", sep_tmp); - while (1) - { - if (sub("[^{}()]*\\{", "", sep_tmp)) { - brace_nesting++; - continue; - } - if (sub("[^{}()]*\\}", "", sep_tmp)) { - brace_nesting--; - if (brace_nesting < 0) { - print "Unbalanced braces!" > "/dev/stderr"; - exit 1; - } - continue; - } - if (sub("[^{}()]*\\(", "", sep_tmp)) { - paren_nesting++; - continue; - } - if (sub("[^{}()]*\\)", "", sep_tmp)) { - paren_nesting--; - if (paren_nesting < 0) { - print "Unbalanced parenthesis!" > "/dev/stderr"; - exit 1; - } - continue; - } - - break; - } - - if (last_fs_print) - printf("%s", last_fs) > outputfile; - last_fs = sep; - last_fs_print = p; -} - -# Shifts the fields down by n positions. Calls next if there are no more. If p -# is true then print out field separators. -function shift_fields(n, p) { - do { - if (match($0, FS) > 0) { - update_fieldsep(substr($0, RSTART, RLENGTH), p); - if (RSTART + RLENGTH <= length()) - $0 = substr($0, RSTART + RLENGTH); - else - $0 = ""; - } else { - update_fieldsep("", 0); - print "" > outputfile; - next; - } - } while (--n > 0); -} - -# Shifts and prints the first n fields. -function print_fields(n) { - do { - update_fieldsep("", 0); - printf("%s", $1) > outputfile; - shift_fields(1, 1); - } while (--n > 0); -} - -{ - combine_backslashes(); -} - -# Print leading FS -{ - if (match($0, "^(" FS ")+") > 0) { - update_fieldsep(substr($0, RSTART, RLENGTH), 1); - if (RSTART + RLENGTH <= length()) - $0 = substr($0, RSTART + RLENGTH); - else - $0 = ""; - } -} - -# Parse the line. -{ - while (NF > 0) { - if ($1 == "struct" && NF < 3) { - read_line(); - continue; - } - - if (FILENAME ~ /\.h$/ && !inside_srcu_struct && - brace_nesting == 0 && paren_nesting == 0 && - $1 == "struct" && $2 == "srcu_struct" && - $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") { - inside_srcu_struct = 1; - print_fields(2); - continue; - } - if (inside_srcu_struct && brace_nesting == 0 && - paren_nesting == 0) { - inside_srcu_struct = 0; - update_fieldsep("", 0); - for (name in rcu_batches) - print "extern struct rcu_batch " name ";" > outputfile; - } - - if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") { - # Move rcu_batches outside of the struct. - rcu_batches[$3] = ""; - shift_fields(3, 1); - sub(/;[[:space:]]*$/, "", last_fs); - continue; - } - - if (FILENAME ~ /\.h$/ && !inside_srcu_init_def && - $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") { - inside_srcu_init_def = 1; - srcu_init_param_name = $3; - in_macro = 1; - print_fields(3); - continue; - } - if (inside_srcu_init_def && brace_nesting == 0 && - paren_nesting == 0) { - inside_srcu_init_def = 0; - in_macro = 0; - continue; - } - - if (inside_srcu_init_def && brace_nesting == 1 && - paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ && - $1 ~ /^[[:alnum:]_]+$/) { - name = $1; - if (name in rcu_batches) { - # Remove the dot. - sub(/\.[[:space:]]*$/, "", last_fs); - - old_record = $0; - do - shift_fields(1, 0); - while (last_fs !~ /,/ || paren_nesting > 0); - end_loc = length(old_record) - length($0); - end_loc += index(last_fs, ",") - length(last_fs); - - last_fs = substr(last_fs, index(last_fs, ",") + 1); - last_fs_print = 1; - - match(old_record, "^"name"("FS")+="); - start_loc = RSTART + RLENGTH; - - len = end_loc - start_loc; - initializer = substr(old_record, start_loc, len); - gsub(srcu_init_param_name "\\.", "", initializer); - rcu_batches[name] = initializer; - continue; - } - } - - # Don't include a nonexistent file - if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) { - update_fieldsep("", 0); - next; - } - - # Ignore most preprocessor stuff. - if (!in_macro && $1 ~ /#/) { - break; - } - - if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) { - read_line(); - continue; - } - if (brace_nesting > 0 && - $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" && - $2 in rcu_batches) { - # Make uses of rcu_batches global. Somewhat unreliable. - shift_fields(1, 0); - print_fields(1); - continue; - } - - if ($1 == "static" && NF < 3) { - read_line(); - continue; - } - if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" || - $2 == "void" && $3 == "srcu_flip")) { - shift_fields(1, 1); - print_fields(2); - continue; - } - - # Distinguish between read-side and write-side memory barriers. - if ($1 == "smp_mb" && NF < 2) { - read_line(); - continue; - } - if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) { - barrier_letter = substr($0, RLENGTH, 1); - if (barrier_letter ~ /A|D/) - new_barrier_name = "sync_smp_mb"; - else if (barrier_letter ~ /B|C/) - new_barrier_name = "rs_smp_mb"; - else { - print "Unrecognized memory barrier." > "/dev/null"; - exit 1; - } - - shift_fields(1, 1); - printf("%s", new_barrier_name) > outputfile; - continue; - } - - # Skip definition of rcu_synchronize, since it is already - # defined in misc.h. Only present in old versions of srcu. - if (brace_nesting == 0 && paren_nesting == 0 && - $1 == "struct" && $2 == "rcu_synchronize" && - $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") { - shift_fields(2, 0); - while (brace_nesting) { - if (NF < 2) - read_line(); - shift_fields(1, 0); - } - } - - # Skip definition of wakeme_after_rcu for the same reason - if (brace_nesting == 0 && $1 == "static" && $2 == "void" && - $3 == "wakeme_after_rcu") { - while (NF < 5) - read_line(); - shift_fields(3, 0); - do { - while (NF < 3) - read_line(); - shift_fields(1, 0); - } while (paren_nesting || brace_nesting); - } - - if ($1 ~ /^(unsigned|long)$/ && NF < 3) { - read_line(); - continue; - } - - # Give srcu_batches_completed the correct type for old SRCU. - if (brace_nesting == 0 && $1 == "long" && - $2 == "srcu_batches_completed") { - update_fieldsep("", 0); - printf("unsigned ") > outputfile; - print_fields(2); - continue; - } - if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" && - $3 == "srcu_batches_completed") { - print_fields(3); - continue; - } - - # Just print out the input code by default. - print_fields(1); - } - update_fieldsep("", 0); - print > outputfile; - next; -} - -END { - update_fieldsep("", 0); - - if (brace_nesting != 0) { - print "Unbalanced braces!" > "/dev/stderr"; - exit 1; - } - - # Define the rcu_batches - for (name in rcu_batches) - print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output; -} diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h deleted file mode 100644 index 570a49d9da7e..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASSUME_H -#define ASSUME_H - -/* Provide an assumption macro that can be disabled for gcc. */ -#ifdef RUN -#define assume(x) \ - do { \ - /* Evaluate x to suppress warnings. */ \ - (void) (x); \ - } while (0) - -#else -#define assume(x) __CPROVER_assume(x) -#endif - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h deleted file mode 100644 index 3f95a768a03b..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef BARRIERS_H -#define BARRIERS_H - -#define barrier() __asm__ __volatile__("" : : : "memory") - -#ifdef RUN -#define smp_mb() __sync_synchronize() -#define smp_mb__after_unlock_lock() __sync_synchronize() -#else -/* - * Copied from CBMC's implementation of __sync_synchronize(), which - * seems to be disabled by default. - */ -#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \ - "WWcumul", "RRcumul", "RWcumul", "WRcumul") -#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \ - "WWcumul", "RRcumul", "RWcumul", "WRcumul") -#endif - -/* - * Allow memory barriers to be disabled in either the read or write side - * of SRCU individually. - */ - -#ifndef NO_SYNC_SMP_MB -#define sync_smp_mb() smp_mb() -#else -#define sync_smp_mb() do {} while (0) -#endif - -#ifndef NO_READ_SIDE_SMP_MB -#define rs_smp_mb() smp_mb() -#else -#define rs_smp_mb() do {} while (0) -#endif - -#define READ_ONCE(x) (*(volatile typeof(x) *) &(x)) -#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val)) - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h deleted file mode 100644 index 5e7912c6a521..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef BUG_ON_H -#define BUG_ON_H - -#include <assert.h> - -#define BUG() assert(0) -#define BUG_ON(x) assert(!(x)) - -/* Does it make sense to treat warnings as errors? */ -#define WARN() BUG() -#define WARN_ON(x) (BUG_ON(x), false) - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c deleted file mode 100644 index e67ee5b3dd7c..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <config.h> - -/* Include all source files. */ - -#include "include_srcu.c" - -#include "preempt.c" -#include "misc.c" - -/* Used by test.c files */ -#include <pthread.h> -#include <stdlib.h> -#include <linux/srcu.h> diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h deleted file mode 100644 index 283d7103334f..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* "Cheater" definitions based on restricted Kconfig choices. */ - -#undef CONFIG_TINY_RCU -#undef __CHECKER__ -#undef CONFIG_DEBUG_LOCK_ALLOC -#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD -#undef CONFIG_HOTPLUG_CPU -#undef CONFIG_MODULES -#undef CONFIG_NO_HZ_FULL_SYSIDLE -#undef CONFIG_PREEMPT_COUNT -#undef CONFIG_PREEMPT_RCU -#undef CONFIG_PROVE_RCU -#undef CONFIG_RCU_NOCB_CPU -#undef CONFIG_RCU_NOCB_CPU_ALL -#undef CONFIG_RCU_STALL_COMMON -#undef CONFIG_RCU_TRACE -#undef CONFIG_RCU_USER_QS -#undef CONFIG_TASKS_RCU -#define CONFIG_TREE_RCU - -#define CONFIG_GENERIC_ATOMIC64 - -#if NR_CPUS > 1 -#define CONFIG_SMP -#else -#undef CONFIG_SMP -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c deleted file mode 100644 index e5202d4cff30..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c +++ /dev/null @@ -1,32 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <config.h> - -#include <assert.h> -#include <errno.h> -#include <inttypes.h> -#include <pthread.h> -#include <stddef.h> -#include <string.h> -#include <sys/types.h> - -#include "int_typedefs.h" - -#include "barriers.h" -#include "bug_on.h" -#include "locks.h" -#include "misc.h" -#include "preempt.h" -#include "percpu.h" -#include "workqueues.h" - -#ifdef USE_SIMPLE_SYNC_SRCU -#define synchronize_srcu(sp) synchronize_srcu_original(sp) -#endif - -#include <srcu.c> - -#ifdef USE_SIMPLE_SYNC_SRCU -#undef synchronize_srcu - -#include "simple_sync_srcu.c" -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h deleted file mode 100644 index 0dd27aa517a7..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef INT_TYPEDEFS_H -#define INT_TYPEDEFS_H - -#include <inttypes.h> - -typedef int8_t s8; -typedef uint8_t u8; -typedef int16_t s16; -typedef uint16_t u16; -typedef int32_t s32; -typedef uint32_t u32; -typedef int64_t s64; -typedef uint64_t u64; - -typedef int8_t __s8; -typedef uint8_t __u8; -typedef int16_t __s16; -typedef uint16_t __u16; -typedef int32_t __s32; -typedef uint32_t __u32; -typedef int64_t __s64; -typedef uint64_t __u64; - -#define S8_C(x) INT8_C(x) -#define U8_C(x) UINT8_C(x) -#define S16_C(x) INT16_C(x) -#define U16_C(x) UINT16_C(x) -#define S32_C(x) INT32_C(x) -#define U32_C(x) UINT32_C(x) -#define S64_C(x) INT64_C(x) -#define U64_C(x) UINT64_C(x) - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h deleted file mode 100644 index 1e24827f96f1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h +++ /dev/null @@ -1,221 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LOCKS_H -#define LOCKS_H - -#include <limits.h> -#include <pthread.h> -#include <stdbool.h> - -#include "assume.h" -#include "bug_on.h" -#include "preempt.h" - -int nondet_int(void); - -#define __acquire(x) -#define __acquires(x) -#define __release(x) -#define __releases(x) - -/* Only use one lock mechanism. Select which one. */ -#ifdef PTHREAD_LOCK -struct lock_impl { - pthread_mutex_t mutex; -}; - -static inline void lock_impl_lock(struct lock_impl *lock) -{ - BUG_ON(pthread_mutex_lock(&lock->mutex)); -} - -static inline void lock_impl_unlock(struct lock_impl *lock) -{ - BUG_ON(pthread_mutex_unlock(&lock->mutex)); -} - -static inline bool lock_impl_trylock(struct lock_impl *lock) -{ - int err = pthread_mutex_trylock(&lock->mutex); - - if (!err) - return true; - else if (err == EBUSY) - return false; - BUG(); -} - -static inline void lock_impl_init(struct lock_impl *lock) -{ - pthread_mutex_init(&lock->mutex, NULL); -} - -#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER} - -#else /* !defined(PTHREAD_LOCK) */ -/* Spinlock that assumes that it always gets the lock immediately. */ - -struct lock_impl { - bool locked; -}; - -static inline bool lock_impl_trylock(struct lock_impl *lock) -{ -#ifdef RUN - /* TODO: Should this be a test and set? */ - return __sync_bool_compare_and_swap(&lock->locked, false, true); -#else - __CPROVER_atomic_begin(); - bool old_locked = lock->locked; - lock->locked = true; - __CPROVER_atomic_end(); - - /* Minimal barrier to prevent accesses leaking out of lock. */ - __CPROVER_fence("RRfence", "RWfence"); - - return !old_locked; -#endif -} - -static inline void lock_impl_lock(struct lock_impl *lock) -{ - /* - * CBMC doesn't support busy waiting, so just assume that the - * lock is available. - */ - assume(lock_impl_trylock(lock)); - - /* - * If the lock was already held by this thread then the assumption - * is unsatisfiable (deadlock). - */ -} - -static inline void lock_impl_unlock(struct lock_impl *lock) -{ -#ifdef RUN - BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false)); -#else - /* Minimal barrier to prevent accesses leaking out of lock. */ - __CPROVER_fence("RWfence", "WWfence"); - - __CPROVER_atomic_begin(); - bool old_locked = lock->locked; - lock->locked = false; - __CPROVER_atomic_end(); - - BUG_ON(!old_locked); -#endif -} - -static inline void lock_impl_init(struct lock_impl *lock) -{ - lock->locked = false; -} - -#define LOCK_IMPL_INITIALIZER {.locked = false} - -#endif /* !defined(PTHREAD_LOCK) */ - -/* - * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing - * locks of different types. - */ -typedef struct { - struct lock_impl internal_lock; -} spinlock_t; - -#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER} -#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED -#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED - -static inline void spin_lock_init(spinlock_t *lock) -{ - lock_impl_init(&lock->internal_lock); -} - -static inline void spin_lock(spinlock_t *lock) -{ - /* - * Spin locks also need to be removed in order to eliminate all - * memory barriers. They are only used by the write side anyway. - */ -#ifndef NO_SYNC_SMP_MB - preempt_disable(); - lock_impl_lock(&lock->internal_lock); -#endif -} - -static inline void spin_unlock(spinlock_t *lock) -{ -#ifndef NO_SYNC_SMP_MB - lock_impl_unlock(&lock->internal_lock); - preempt_enable(); -#endif -} - -/* Don't bother with interrupts */ -#define spin_lock_irq(lock) spin_lock(lock) -#define spin_unlock_irq(lock) spin_unlock(lock) -#define spin_lock_irqsave(lock, flags) spin_lock(lock) -#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock) - -/* - * This is supposed to return an int, but I think that a bool should work as - * well. - */ -static inline bool spin_trylock(spinlock_t *lock) -{ -#ifndef NO_SYNC_SMP_MB - preempt_disable(); - return lock_impl_trylock(&lock->internal_lock); -#else - return true; -#endif -} - -struct completion { - /* Hopefully this won't overflow. */ - unsigned int count; -}; - -#define COMPLETION_INITIALIZER(x) {.count = 0} -#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x) -#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x) - -static inline void init_completion(struct completion *c) -{ - c->count = 0; -} - -static inline void wait_for_completion(struct completion *c) -{ - unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1); - - assume(prev_count); -} - -static inline void complete(struct completion *c) -{ - unsigned int prev_count = __sync_fetch_and_add(&c->count, 1); - - BUG_ON(prev_count == UINT_MAX); -} - -/* This function probably isn't very useful for CBMC. */ -static inline bool try_wait_for_completion(struct completion *c) -{ - BUG(); -} - -static inline bool completion_done(struct completion *c) -{ - return c->count; -} - -/* TODO: Implement complete_all */ -static inline void complete_all(struct completion *c) -{ - BUG(); -} - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c deleted file mode 100644 index 9440cc39e3c6..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c +++ /dev/null @@ -1,12 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <config.h> - -#include "misc.h" -#include "bug_on.h" - -struct rcu_head; - -void wakeme_after_rcu(struct rcu_head *head) -{ - BUG(); -} diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h deleted file mode 100644 index aca50030f954..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h +++ /dev/null @@ -1,58 +0,0 @@ -#ifndef MISC_H -#define MISC_H - -#include "assume.h" -#include "int_typedefs.h" -#include "locks.h" - -#include <linux/types.h> - -/* Probably won't need to deal with bottom halves. */ -static inline void local_bh_disable(void) {} -static inline void local_bh_enable(void) {} - -#define MODULE_ALIAS(X) -#define module_param(...) -#define EXPORT_SYMBOL_GPL(x) - -#define container_of(ptr, type, member) ({ \ - const typeof(((type *)0)->member) *__mptr = (ptr); \ - (type *)((char *)__mptr - offsetof(type, member)); \ -}) - -#ifndef USE_SIMPLE_SYNC_SRCU -/* Abuse udelay to make sure that busy loops terminate. */ -#define udelay(x) assume(0) - -#else - -/* The simple custom synchronize_srcu is ok with try_check_zero failing. */ -#define udelay(x) do { } while (0) -#endif - -#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \ - do { } while (0) - -#define notrace - -/* Avoid including rcupdate.h */ -struct rcu_synchronize { - struct rcu_head head; - struct completion completion; -}; - -void wakeme_after_rcu(struct rcu_head *head); - -#define rcu_lock_acquire(a) do { } while (0) -#define rcu_lock_release(a) do { } while (0) -#define rcu_lockdep_assert(c, s) do { } while (0) -#define RCU_LOCKDEP_WARN(c, s) do { } while (0) - -/* Let CBMC non-deterministically choose switch between normal and expedited. */ -bool rcu_gp_is_normal(void); -bool rcu_gp_is_expedited(void); - -/* Do the same for old versions of rcu. */ -#define rcu_expedited (rcu_gp_is_expedited()) - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h deleted file mode 100644 index 27e67a3f291f..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PERCPU_H -#define PERCPU_H - -#include <stddef.h> -#include "bug_on.h" -#include "preempt.h" - -#define __percpu - -/* Maximum size of any percpu data. */ -#define PERCPU_OFFSET (4 * sizeof(long)) - -/* Ignore alignment, as CBMC doesn't care about false sharing. */ -#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1) - -static inline void *__alloc_percpu(size_t size, size_t align) -{ - BUG(); - return NULL; -} - -static inline void free_percpu(void *ptr) -{ - BUG(); -} - -#define per_cpu_ptr(ptr, cpu) \ - ((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu)) - -#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1) -#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1) -#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n)) - -#define this_cpu_inc(pcp) this_cpu_add(pcp, 1) -#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1) -#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n)) - -/* Make CBMC use atomics to work around bug. */ -#ifdef RUN -#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x)) -#else -/* - * Split the atomic into a read and a write so that it has the least - * possible ordering. - */ -#define THIS_CPU_ADD_HELPER(ptr, x) \ - do { \ - typeof(ptr) this_cpu_add_helper_ptr = (ptr); \ - typeof(ptr) this_cpu_add_helper_x = (x); \ - typeof(*ptr) this_cpu_add_helper_temp; \ - __CPROVER_atomic_begin(); \ - this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \ - __CPROVER_atomic_end(); \ - this_cpu_add_helper_temp += this_cpu_add_helper_x; \ - __CPROVER_atomic_begin(); \ - *(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \ - __CPROVER_atomic_end(); \ - } while (0) -#endif - -/* - * For some reason CBMC needs an atomic operation even though this is percpu - * data. - */ -#define __this_cpu_add(pcp, n) \ - do { \ - BUG_ON(preemptible()); \ - THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \ - (typeof(pcp)) (n)); \ - } while (0) - -#define this_cpu_add(pcp, n) \ - do { \ - int this_cpu_add_impl_cpu = get_cpu(); \ - THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \ - (typeof(pcp)) (n)); \ - put_cpu(); \ - } while (0) - -/* - * This will cause a compiler warning because of the cast from char[][] to - * type*. This will cause a compile time error if type is too big. - */ -#define DEFINE_PER_CPU(type, name) \ - char name[NR_CPUS][PERCPU_OFFSET]; \ - typedef char percpu_too_big_##name \ - [sizeof(type) > PERCPU_OFFSET ? -1 : 1] - -#define for_each_possible_cpu(cpu) \ - for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu)) - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c deleted file mode 100644 index b4083ae348fb..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c +++ /dev/null @@ -1,79 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <config.h> - -#include "preempt.h" - -#include "assume.h" -#include "locks.h" - -/* Support NR_CPUS of at most 64 */ -#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER -#define CPU_PREEMPTION_LOCKS_INIT1 \ - CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0 -#define CPU_PREEMPTION_LOCKS_INIT2 \ - CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1 -#define CPU_PREEMPTION_LOCKS_INIT3 \ - CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2 -#define CPU_PREEMPTION_LOCKS_INIT4 \ - CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3 -#define CPU_PREEMPTION_LOCKS_INIT5 \ - CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4 - -/* - * Simulate disabling preemption by locking a particular cpu. NR_CPUS - * should be the actual number of cpus, not just the maximum. - */ -struct lock_impl cpu_preemption_locks[NR_CPUS] = { - CPU_PREEMPTION_LOCKS_INIT0 -#if (NR_CPUS - 1) & 1 - , CPU_PREEMPTION_LOCKS_INIT0 -#endif -#if (NR_CPUS - 1) & 2 - , CPU_PREEMPTION_LOCKS_INIT1 -#endif -#if (NR_CPUS - 1) & 4 - , CPU_PREEMPTION_LOCKS_INIT2 -#endif -#if (NR_CPUS - 1) & 8 - , CPU_PREEMPTION_LOCKS_INIT3 -#endif -#if (NR_CPUS - 1) & 16 - , CPU_PREEMPTION_LOCKS_INIT4 -#endif -#if (NR_CPUS - 1) & 32 - , CPU_PREEMPTION_LOCKS_INIT5 -#endif -}; - -#undef CPU_PREEMPTION_LOCKS_INIT0 -#undef CPU_PREEMPTION_LOCKS_INIT1 -#undef CPU_PREEMPTION_LOCKS_INIT2 -#undef CPU_PREEMPTION_LOCKS_INIT3 -#undef CPU_PREEMPTION_LOCKS_INIT4 -#undef CPU_PREEMPTION_LOCKS_INIT5 - -__thread int thread_cpu_id; -__thread int preempt_disable_count; - -void preempt_disable(void) -{ - BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX); - - if (preempt_disable_count++) - return; - - thread_cpu_id = nondet_int(); - assume(thread_cpu_id >= 0); - assume(thread_cpu_id < NR_CPUS); - lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]); -} - -void preempt_enable(void) -{ - BUG_ON(preempt_disable_count < 1); - - if (--preempt_disable_count) - return; - - lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]); -} diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h deleted file mode 100644 index f8b762cd214c..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef PREEMPT_H -#define PREEMPT_H - -#include <stdbool.h> - -#include "bug_on.h" - -/* This flag contains garbage if preempt_disable_count is 0. */ -extern __thread int thread_cpu_id; - -/* Support recursive preemption disabling. */ -extern __thread int preempt_disable_count; - -void preempt_disable(void); -void preempt_enable(void); - -static inline void preempt_disable_notrace(void) -{ - preempt_disable(); -} - -static inline void preempt_enable_no_resched(void) -{ - preempt_enable(); -} - -static inline void preempt_enable_notrace(void) -{ - preempt_enable(); -} - -static inline int preempt_count(void) -{ - return preempt_disable_count; -} - -static inline bool preemptible(void) -{ - return !preempt_count(); -} - -static inline int get_cpu(void) -{ - preempt_disable(); - return thread_cpu_id; -} - -static inline void put_cpu(void) -{ - preempt_enable(); -} - -static inline void might_sleep(void) -{ - BUG_ON(preempt_disable_count); -} - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c deleted file mode 100644 index 97f592048e0b..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <config.h> - -#include <assert.h> -#include <errno.h> -#include <inttypes.h> -#include <pthread.h> -#include <stddef.h> -#include <string.h> -#include <sys/types.h> - -#include "int_typedefs.h" - -#include "barriers.h" -#include "bug_on.h" -#include "locks.h" -#include "misc.h" -#include "preempt.h" -#include "percpu.h" -#include "workqueues.h" - -#include <linux/srcu.h> - -/* Functions needed from modify_srcu.c */ -bool try_check_zero(struct srcu_struct *sp, int idx, int trycount); -void srcu_flip(struct srcu_struct *sp); - -/* Simpler implementation of synchronize_srcu that ignores batching. */ -void synchronize_srcu(struct srcu_struct *sp) -{ - int idx; - /* - * This code assumes that try_check_zero will succeed anyway, - * so there is no point in multiple tries. - */ - const int trycount = 1; - - might_sleep(); - - /* Ignore the lock, as multiple writers aren't working yet anyway. */ - - idx = 1 ^ (sp->completed & 1); - - /* For comments see srcu_advance_batches. */ - - assume(try_check_zero(sp, idx, trycount)); - - srcu_flip(sp); - - assume(try_check_zero(sp, idx^1, trycount)); -} diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h deleted file mode 100644 index 28b960300971..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef WORKQUEUES_H -#define WORKQUEUES_H - -#include <stdbool.h> - -#include "barriers.h" -#include "bug_on.h" -#include "int_typedefs.h" - -#include <linux/types.h> - -/* Stub workqueue implementation. */ - -struct work_struct; -typedef void (*work_func_t)(struct work_struct *work); -void delayed_work_timer_fn(unsigned long __data); - -struct work_struct { -/* atomic_long_t data; */ - unsigned long data; - - struct list_head entry; - work_func_t func; -#ifdef CONFIG_LOCKDEP - struct lockdep_map lockdep_map; -#endif -}; - -struct timer_list { - struct hlist_node entry; - unsigned long expires; - void (*function)(unsigned long); - unsigned long data; - u32 flags; - int slack; -}; - -struct delayed_work { - struct work_struct work; - struct timer_list timer; - - /* target workqueue and CPU ->timer uses to queue ->work */ - struct workqueue_struct *wq; - int cpu; -}; - - -static inline bool schedule_work(struct work_struct *work) -{ - BUG(); - return true; -} - -static inline bool schedule_work_on(int cpu, struct work_struct *work) -{ - BUG(); - return true; -} - -static inline bool queue_work(struct workqueue_struct *wq, - struct work_struct *work) -{ - BUG(); - return true; -} - -static inline bool queue_delayed_work(struct workqueue_struct *wq, - struct delayed_work *dwork, - unsigned long delay) -{ - BUG(); - return true; -} - -#define INIT_WORK(w, f) \ - do { \ - (w)->data = 0; \ - (w)->func = (f); \ - } while (0) - -#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f)) - -#define __WORK_INITIALIZER(n, f) { \ - .data = 0, \ - .entry = { &(n).entry, &(n).entry }, \ - .func = f \ - } - -/* Don't bother initializing timer. */ -#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ - .work = __WORK_INITIALIZER((n).work, (f)), \ - } - -#define DECLARE_WORK(n, f) \ - struct workqueue_struct n = __WORK_INITIALIZER - -#define DECLARE_DELAYED_WORK(n, f) \ - struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0) - -#define system_power_efficient_wq ((struct workqueue_struct *) NULL) - -#endif diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile deleted file mode 100644 index ad21b925fbb4..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso - -all: - for i in ./*.pass; do \ - echo $$i ; \ - CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \ - done - for i in ./*.fail; do \ - echo $$i ; \ - CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \ - done diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail deleted file mode 100644 index 40c8075919d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail +++ /dev/null @@ -1 +0,0 @@ -test_cbmc_options="-DASSERT_END" diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail deleted file mode 100644 index ada5baf0b60d..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail +++ /dev/null @@ -1 +0,0 @@ -test_cbmc_options="-DFORCE_FAILURE" diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail deleted file mode 100644 index 8fe00c8db466..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail +++ /dev/null @@ -1 +0,0 @@ -test_cbmc_options="-DFORCE_FAILURE_2" diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail deleted file mode 100644 index 612ed6772844..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail +++ /dev/null @@ -1 +0,0 @@ -test_cbmc_options="-DFORCE_FAILURE_3" diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass +++ /dev/null diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c deleted file mode 100644 index 2ce2016f7871..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c +++ /dev/null @@ -1,73 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <src/combined_source.c> - -int x; -int y; - -int __unbuffered_tpr_x; -int __unbuffered_tpr_y; - -DEFINE_SRCU(ss); - -void rcu_reader(void) -{ - int idx; - -#ifndef FORCE_FAILURE_3 - idx = srcu_read_lock(&ss); -#endif - might_sleep(); - - __unbuffered_tpr_y = READ_ONCE(y); -#ifdef FORCE_FAILURE - srcu_read_unlock(&ss, idx); - idx = srcu_read_lock(&ss); -#endif - WRITE_ONCE(x, 1); - -#ifndef FORCE_FAILURE_3 - srcu_read_unlock(&ss, idx); -#endif - might_sleep(); -} - -void *thread_update(void *arg) -{ - WRITE_ONCE(y, 1); -#ifndef FORCE_FAILURE_2 - synchronize_srcu(&ss); -#endif - might_sleep(); - __unbuffered_tpr_x = READ_ONCE(x); - - return NULL; -} - -void *thread_process_reader(void *arg) -{ - rcu_reader(); - - return NULL; -} - -int main(int argc, char *argv[]) -{ - pthread_t tu; - pthread_t tpr; - - if (pthread_create(&tu, NULL, thread_update, NULL)) - abort(); - if (pthread_create(&tpr, NULL, thread_process_reader, NULL)) - abort(); - if (pthread_join(tu, NULL)) - abort(); - if (pthread_join(tpr, NULL)) - abort(); - assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0); - -#ifdef ASSERT_END - assert(0); -#endif - - return 0; -} diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh deleted file mode 100755 index 2fe1f0339b4f..000000000000 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh +++ /dev/null @@ -1,103 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 - -# This script expects a mode (either --should-pass or --should-fail) followed by -# an input file. The script uses the following environment variables. The test C -# source file is expected to be named test.c in the directory containing the -# input file. -# -# CBMC: The command to run CBMC. Default: cbmc -# CBMC_FLAGS: Additional flags to pass to CBMC -# NR_CPUS: Number of cpus to run tests with. Default specified by the test -# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple. -# kernel: Version included in the linux kernel source. -# simple: Use try_check_zero directly. -# -# The input file is a script that is sourced by this file. It can define any of -# the following variables to configure the test. -# -# test_cbmc_options: Extra options to pass to CBMC. -# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail. -# The test is expected to pass if it is run with fewer. (Only -# useful for .fail files) -# default_cpus: Quantity of CPUs to use for the test, if not specified on the -# command line. Default: Larger of 2 and MIN_CPUS_FAIL. - -set -e - -if test "$#" -ne 2; then - echo "Expected one option followed by an input file" 1>&2 - exit 99 -fi - -if test "x$1" = "x--should-pass"; then - should_pass="yes" -elif test "x$1" = "x--should-fail"; then - should_pass="no" -else - echo "Unrecognized argument '$1'" 1>&2 - - # Exit code 99 indicates a hard error. - exit 99 -fi - -CBMC=${CBMC:-cbmc} - -SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple} - -case ${SYNC_SRCU_MODE} in -kernel) sync_srcu_mode_flags="" ;; -simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;; - -*) - echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2 - exit 99 - ;; -esac - -min_cpus_fail=1 - -c_file=`dirname "$2"`/test.c - -# Source the input file. -. $2 - -if test ${min_cpus_fail} -gt 2; then - default_default_cpus=${min_cpus_fail} -else - default_default_cpus=2 -fi -default_cpus=${default_cpus:-${default_default_cpus}} -cpus=${NR_CPUS:-${default_cpus}} - -# Check if there are two few cpus to make the test fail. -if test $cpus -lt ${min_cpus_fail:-0}; then - should_pass="yes" -fi - -cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}" - -echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}" -if ${CBMC} ${cbmc_opts} "${c_file}"; then - # Verification successful. Make sure that it was supposed to verify. - test "x${should_pass}" = xyes -else - cbmc_exit_status=$? - - # An exit status of 10 indicates a failed verification. - # (see cbmc_parse_optionst::do_bmc in the CBMC source code) - if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then - : - else - echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2 - - # Parse errors have exit status 6. Any other type of error - # should be considered a hard error. - if test ${cbmc_exit_status} -ne 6 && \ - test ${cbmc_exit_status} -ne 10; then - exit 99 - else - exit 1 - fi - fi -fi diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile index 73d53257df42..5073dbc96125 100644 --- a/tools/testing/selftests/resctrl/Makefile +++ b/tools/testing/selftests/resctrl/Makefile @@ -7,4 +7,4 @@ TEST_GEN_PROGS := resctrl_tests include ../lib.mk -$(OUTPUT)/resctrl_tests: $(wildcard *.c) +$(OUTPUT)/resctrl_tests: $(wildcard *.[ch]) diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c index 8a4fe8693be6..d3cbb829ff6a 100644 --- a/tools/testing/selftests/resctrl/cache.c +++ b/tools/testing/selftests/resctrl/cache.c @@ -87,21 +87,19 @@ static int reset_enable_llc_perf(pid_t pid, int cpu_no) static int get_llc_perf(unsigned long *llc_perf_miss) { __u64 total_misses; + int ret; /* Stop counters after one span to get miss rate */ ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0); - if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) { + ret = read(fd_lm, &rf_cqm, sizeof(struct read_format)); + if (ret == -1) { perror("Could not get llc misses through perf"); - return -1; } total_misses = rf_cqm.values[0].value; - - close(fd_lm); - *llc_perf_miss = total_misses; return 0; @@ -212,7 +210,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid) */ int cat_val(struct resctrl_val_param *param) { - int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0; + int memflush = 1, operation = 0, ret = 0; char *resctrl_val = param->resctrl_val; pid_t bm_pid; @@ -232,40 +230,38 @@ int cat_val(struct resctrl_val_param *param) if (ret) return ret; - if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) - initialize_llc_perf(); + initialize_llc_perf(); /* Test runs until the callback setup() tells the test to stop. */ while (1) { - if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { - ret = param->setup(1, param); - if (ret == END_OF_TESTS) { - ret = 0; - break; - } - if (ret < 0) - break; - ret = reset_enable_llc_perf(bm_pid, param->cpu_no); - if (ret) - break; - - if (run_fill_buf(param->span, malloc_and_init_memory, - memflush, operation, resctrl_val)) { - fprintf(stderr, "Error-running fill buffer\n"); - ret = -1; - break; - } - - sleep(1); - ret = measure_cache_vals(param, bm_pid); - if (ret) - break; - } else { + ret = param->setup(param); + if (ret == END_OF_TESTS) { + ret = 0; + break; + } + if (ret < 0) + break; + ret = reset_enable_llc_perf(bm_pid, param->cpu_no); + if (ret) break; + + if (run_fill_buf(param->span, memflush, operation, true)) { + fprintf(stderr, "Error-running fill buffer\n"); + ret = -1; + goto pe_close; } + + sleep(1); + ret = measure_cache_vals(param, bm_pid); + if (ret) + goto pe_close; } return ret; + +pe_close: + close(fd_lm); + return ret; } /* @@ -282,7 +278,7 @@ int cat_val(struct resctrl_val_param *param) * Return: 0 on success. non-zero on failure. */ int show_cache_info(unsigned long sum_llc_val, int no_of_bits, - unsigned long cache_span, unsigned long max_diff, + size_t cache_span, unsigned long max_diff, unsigned long max_diff_percent, unsigned long num_of_runs, bool platform, bool cmt) { @@ -291,7 +287,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits, long avg_diff = 0; int ret; - avg_llc_val = sum_llc_val / (num_of_runs - 1); + avg_llc_val = sum_llc_val / num_of_runs; avg_diff = (long)abs(cache_span - avg_llc_val); diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100; @@ -304,7 +300,7 @@ int show_cache_info(unsigned long sum_llc_val, int no_of_bits, ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent)); ksft_print_msg("Number of bits: %d\n", no_of_bits); ksft_print_msg("Average LLC val: %lu\n", avg_llc_val); - ksft_print_msg("Cache span (%s): %lu\n", cmt ? "bytes" : "lines", + ksft_print_msg("Cache span (%s): %zu\n", cmt ? "bytes" : "lines", cache_span); return ret; diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c index fb1443f888c4..3848dfb46aba 100644 --- a/tools/testing/selftests/resctrl/cat_test.c +++ b/tools/testing/selftests/resctrl/cat_test.c @@ -17,27 +17,16 @@ #define MAX_DIFF_PERCENT 4 #define MAX_DIFF 1000000 -static int count_of_bits; -static char cbm_mask[256]; -static unsigned long long_mask; -static unsigned long cache_size; - /* * Change schemata. Write schemata to specified * con_mon grp, mon_grp in resctrl FS. * Run 5 times in order to get average values. */ -static int cat_setup(int num, ...) +static int cat_setup(struct resctrl_val_param *p) { - struct resctrl_val_param *p; char schemata[64]; - va_list param; int ret = 0; - va_start(param, num); - p = va_arg(param, struct resctrl_val_param *); - va_end(param); - /* Run NUM_OF_RUNS times */ if (p->num_of_runs >= NUM_OF_RUNS) return END_OF_TESTS; @@ -88,7 +77,7 @@ static int check_results(struct resctrl_val_param *param) no_of_bits = count_bits(param->mask); return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64, - MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, + MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, get_vendor() == ARCH_INTEL, false); } @@ -102,14 +91,12 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) { unsigned long l_mask, l_mask_1; int ret, pipefd[2], sibling_cpu_no; + unsigned long cache_size = 0; + unsigned long long_mask; + char cbm_mask[256]; + int count_of_bits; char pipe_message; - cache_size = 0; - - ret = remount_resctrlfs(true); - if (ret) - return ret; - /* Get default cbm mask for L3/L2 cache */ ret = get_cbm_mask(cache_type, cbm_mask); if (ret) @@ -144,7 +131,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) struct resctrl_val_param param = { .resctrl_val = CAT_STR, .cpu_no = cpu_no, - .mum_resctrlfs = false, .setup = cat_setup, }; @@ -227,8 +213,6 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type) out: cat_test_cleanup(); - if (bm_pid) - umount_resctrlfs(); return ret; } diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index af71b2141271..cb2197647c6c 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -16,20 +16,8 @@ #define MAX_DIFF 2000000 #define MAX_DIFF_PERCENT 15 -static int count_of_bits; -static char cbm_mask[256]; -static unsigned long long_mask; -static unsigned long cache_size; - -static int cmt_setup(int num, ...) +static int cmt_setup(struct resctrl_val_param *p) { - struct resctrl_val_param *p; - va_list param; - - va_start(param, num); - p = va_arg(param, struct resctrl_val_param *); - va_end(param); - /* Run NUM_OF_RUNS times */ if (p->num_of_runs >= NUM_OF_RUNS) return END_OF_TESTS; @@ -71,7 +59,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits) fclose(fp); return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span, - MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS, + MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true, true); } @@ -82,14 +70,12 @@ void cmt_test_cleanup(void) int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) { + unsigned long cache_size = 0; + unsigned long long_mask; + char cbm_mask[256]; + int count_of_bits; int ret; - cache_size = 0; - - ret = remount_resctrlfs(true); - if (ret) - return ret; - if (!validate_resctrl_feature_request(CMT_STR)) return -1; @@ -117,7 +103,6 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) .ctrlgrp = "c1", .mongrp = "m1", .cpu_no = cpu_no, - .mum_resctrlfs = false, .filename = RESULT_FILE_NAME, .mask = ~(long_mask << n) & long_mask, .span = cache_size * n / count_of_bits, @@ -126,7 +111,7 @@ int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd) }; if (strcmp(benchmark_cmd[0], "fill_buf") == 0) - sprintf(benchmark_cmd[1], "%lu", param.span); + sprintf(benchmark_cmd[1], "%zu", param.span); remove(RESULT_FILE_NAME); diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index 341cc93ca84c..0d425f26583a 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -22,8 +22,6 @@ #define PAGE_SIZE (4 * 1024) #define MB (1024 * 1024) -static unsigned char *startptr; - static void sb(void) { #if defined(__i386) || defined(__x86_64) @@ -40,32 +38,32 @@ static void cl_flush(void *p) #endif } -static void mem_flush(void *p, size_t s) +static void mem_flush(unsigned char *buf, size_t buf_size) { - char *cp = (char *)p; + unsigned char *cp = buf; size_t i = 0; - s = s / CL_SIZE; /* mem size in cache llines */ + buf_size = buf_size / CL_SIZE; /* mem size in cache lines */ - for (i = 0; i < s; i++) + for (i = 0; i < buf_size; i++) cl_flush(&cp[i * CL_SIZE]); sb(); } -static void *malloc_and_init_memory(size_t s) +static void *malloc_and_init_memory(size_t buf_size) { void *p = NULL; uint64_t *p64; size_t s64; int ret; - ret = posix_memalign(&p, PAGE_SIZE, s); + ret = posix_memalign(&p, PAGE_SIZE, buf_size); if (ret < 0) return NULL; p64 = (uint64_t *)p; - s64 = s / sizeof(uint64_t); + s64 = buf_size / sizeof(uint64_t); while (s64 > 0) { *p64 = (uint64_t)rand(); @@ -76,12 +74,13 @@ static void *malloc_and_init_memory(size_t s) return p; } -static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr) +static int fill_one_span_read(unsigned char *buf, size_t buf_size) { + unsigned char *end_ptr = buf + buf_size; unsigned char sum, *p; sum = 0; - p = start_ptr; + p = buf; while (p < end_ptr) { sum += *p; p += (CL_SIZE / 2); @@ -90,27 +89,26 @@ static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr) return sum; } -static -void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr) +static void fill_one_span_write(unsigned char *buf, size_t buf_size) { + unsigned char *end_ptr = buf + buf_size; unsigned char *p; - p = start_ptr; + p = buf; while (p < end_ptr) { *p = '1'; p += (CL_SIZE / 2); } } -static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, - char *resctrl_val) +static int fill_cache_read(unsigned char *buf, size_t buf_size, bool once) { int ret = 0; FILE *fp; while (1) { - ret = fill_one_span_read(start_ptr, end_ptr); - if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) + ret = fill_one_span_read(buf, buf_size); + if (once) break; } @@ -126,75 +124,52 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr, return 0; } -static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr, - char *resctrl_val) +static int fill_cache_write(unsigned char *buf, size_t buf_size, bool once) { while (1) { - fill_one_span_write(start_ptr, end_ptr); - if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) + fill_one_span_write(buf, buf_size); + if (once) break; } return 0; } -static int -fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush, - int op, char *resctrl_val) +static int fill_cache(size_t buf_size, int memflush, int op, bool once) { - unsigned char *start_ptr, *end_ptr; - unsigned long long i; + unsigned char *buf; int ret; - if (malloc_and_init) - start_ptr = malloc_and_init_memory(buf_size); - else - start_ptr = malloc(buf_size); - - if (!start_ptr) + buf = malloc_and_init_memory(buf_size); + if (!buf) return -1; - startptr = start_ptr; - end_ptr = start_ptr + buf_size; - - /* - * It's better to touch the memory once to avoid any compiler - * optimizations - */ - if (!malloc_and_init) { - for (i = 0; i < buf_size; i++) - *start_ptr++ = (unsigned char)rand(); - } - - start_ptr = startptr; - /* Flush the memory before using to avoid "cache hot pages" effect */ if (memflush) - mem_flush(start_ptr, buf_size); + mem_flush(buf, buf_size); if (op == 0) - ret = fill_cache_read(start_ptr, end_ptr, resctrl_val); + ret = fill_cache_read(buf, buf_size, once); else - ret = fill_cache_write(start_ptr, end_ptr, resctrl_val); + ret = fill_cache_write(buf, buf_size, once); + + free(buf); if (ret) { printf("\n Error in fill cache read/write...\n"); return -1; } - free(startptr); return 0; } -int run_fill_buf(unsigned long span, int malloc_and_init_memory, - int memflush, int op, char *resctrl_val) +int run_fill_buf(size_t span, int memflush, int op, bool once) { - unsigned long long cache_size = span; + size_t cache_size = span; int ret; - ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op, - resctrl_val); + ret = fill_cache(cache_size, memflush, op, once); if (ret) { printf("\n Error in fill cache\n"); return -1; diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index cde3781a9ab0..4d2f145804b8 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -22,18 +22,12 @@ * con_mon grp, mon_grp in resctrl FS. * For each allocation, run 5 times in order to get average values. */ -static int mba_setup(int num, ...) +static int mba_setup(struct resctrl_val_param *p) { static int runs_per_allocation, allocation = 100; - struct resctrl_val_param *p; char allocation_str[64]; - va_list param; int ret; - va_start(param, num); - p = va_arg(param, struct resctrl_val_param *); - va_end(param); - if (runs_per_allocation >= NUM_OF_RUNS) runs_per_allocation = 0; @@ -154,7 +148,6 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd) .ctrlgrp = "c1", .mongrp = "m1", .cpu_no = cpu_no, - .mum_resctrlfs = true, .filename = RESULT_FILE_NAME, .bw_report = bw_report, .setup = mba_setup diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 538d35a6485a..c7de6f5977f6 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -15,7 +15,7 @@ #define NUM_OF_RUNS 5 static int -show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) +show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) { unsigned long avg_bw_imc = 0, avg_bw_resc = 0; unsigned long sum_bw_imc = 0, sum_bw_resc = 0; @@ -40,14 +40,14 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span) ksft_print_msg("%s Check MBM diff within %d%%\n", ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); - ksft_print_msg("Span (MB): %d\n", span); + ksft_print_msg("Span (MB): %zu\n", span / MB); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); return ret; } -static int check_results(int span) +static int check_results(size_t span) { unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS]; char temp[1024], *token_array[8]; @@ -86,16 +86,10 @@ static int check_results(int span) return ret; } -static int mbm_setup(int num, ...) +static int mbm_setup(struct resctrl_val_param *p) { - struct resctrl_val_param *p; - va_list param; int ret = 0; - va_start(param, num); - p = va_arg(param, struct resctrl_val_param *); - va_end(param); - /* Run NUM_OF_RUNS times */ if (p->num_of_runs >= NUM_OF_RUNS) return END_OF_TESTS; @@ -115,7 +109,7 @@ void mbm_test_cleanup(void) remove(RESULT_FILE_NAME); } -int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) +int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd) { struct resctrl_val_param param = { .resctrl_val = MBM_STR, @@ -123,7 +117,6 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd) .mongrp = "m1", .span = span, .cpu_no = cpu_no, - .mum_resctrlfs = true, .filename = RESULT_FILE_NAME, .bw_report = bw_report, .setup = mbm_setup diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 87e39456dee0..838d1a438f33 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -3,7 +3,6 @@ #ifndef RESCTRL_H #define RESCTRL_H #include <stdio.h> -#include <stdarg.h> #include <math.h> #include <errno.h> #include <sched.h> @@ -43,6 +42,7 @@ do { \ perror(err_msg); \ kill(ppid, SIGKILL); \ + umount_resctrlfs(); \ exit(EXIT_FAILURE); \ } while (0) @@ -53,7 +53,6 @@ * @mongrp: Name of the monitor group (mon grp) * @cpu_no: CPU number to which the benchmark would be binded * @span: Memory bytes accessed in each benchmark iteration - * @mum_resctrlfs: Should the resctrl FS be remounted? * @filename: Name of file to which the o/p should be written * @bw_report: Bandwidth report type (reads vs writes) * @setup: Call back function to setup test environment @@ -63,13 +62,12 @@ struct resctrl_val_param { char ctrlgrp[64]; char mongrp[64]; int cpu_no; - unsigned long span; - bool mum_resctrlfs; + size_t span; char filename[64]; char *bw_report; unsigned long mask; int num_of_runs; - int (*setup)(int num, ...); + int (*setup)(struct resctrl_val_param *param); }; #define MBM_STR "mbm" @@ -84,8 +82,8 @@ extern char llc_occup_path[1024]; int get_vendor(void); bool check_resctrlfs_support(void); int filter_dmesg(void); -int remount_resctrlfs(bool mum_resctrlfs); int get_resource_id(int cpu_no, int *resource_id); +int mount_resctrlfs(void); int umount_resctrlfs(void); int validate_bw_report_request(char *bw_report); bool validate_resctrl_feature_request(const char *resctrl_val); @@ -98,10 +96,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp, char *resctrl_val); int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); -int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush, - int op, char *resctrl_va); +int run_fill_buf(size_t span, int memflush, int op, bool once); int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param); -int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd); +int mbm_bw_change(size_t span, int cpu_no, char *bw_report, char **benchmark_cmd); void tests_cleanup(void); void mbm_test_cleanup(void); int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd); @@ -120,7 +117,7 @@ void cmt_test_cleanup(void); int get_core_sibling(int cpu_no); int measure_cache_vals(struct resctrl_val_param *param, int bm_pid); int show_cache_info(unsigned long sum_llc_val, int no_of_bits, - unsigned long cache_span, unsigned long max_diff, + size_t cache_span, unsigned long max_diff, unsigned long max_diff_percent, unsigned long num_of_runs, bool platform, bool cmt); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index 9b9751206e1c..d511daeb6851 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -70,60 +70,81 @@ void tests_cleanup(void) cat_test_cleanup(); } -static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span, +static void run_mbm_test(char **benchmark_cmd, size_t span, int cpu_no, char *bw_report) { int res; ksft_print_msg("Starting MBM BW change ...\n"); + res = mount_resctrlfs(); + if (res) { + ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + return; + } + if (!validate_resctrl_feature_request(MBM_STR) || (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n"); - return; + goto umount; } - if (!has_ben) - sprintf(benchmark_cmd[5], "%s", MBA_STR); res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd); ksft_test_result(!res, "MBM: bw change\n"); if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + +umount: + umount_resctrlfs(); } -static void run_mba_test(bool has_ben, char **benchmark_cmd, int span, - int cpu_no, char *bw_report) +static void run_mba_test(char **benchmark_cmd, int cpu_no, char *bw_report) { int res; ksft_print_msg("Starting MBA Schemata change ...\n"); + res = mount_resctrlfs(); + if (res) { + ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + return; + } + if (!validate_resctrl_feature_request(MBA_STR) || (get_vendor() != ARCH_INTEL)) { ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n"); - return; + goto umount; } - if (!has_ben) - sprintf(benchmark_cmd[1], "%d", span); res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd); ksft_test_result(!res, "MBA: schemata change\n"); + +umount: + umount_resctrlfs(); } -static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no) +static void run_cmt_test(char **benchmark_cmd, int cpu_no) { int res; ksft_print_msg("Starting CMT test ...\n"); + + res = mount_resctrlfs(); + if (res) { + ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + return; + } + if (!validate_resctrl_feature_request(CMT_STR)) { ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n"); - return; + goto umount; } - if (!has_ben) - sprintf(benchmark_cmd[5], "%s", CMT_STR); res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd); ksft_test_result(!res, "CMT: test\n"); if ((get_vendor() == ARCH_INTEL) && res) ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); + +umount: + umount_resctrlfs(); } static void run_cat_test(int cpu_no, int no_of_bits) @@ -132,22 +153,32 @@ static void run_cat_test(int cpu_no, int no_of_bits) ksft_print_msg("Starting CAT test ...\n"); + res = mount_resctrlfs(); + if (res) { + ksft_exit_fail_msg("Failed to mount resctrl FS\n"); + return; + } + if (!validate_resctrl_feature_request(CAT_STR)) { ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n"); - return; + goto umount; } res = cat_perf_miss_val(cpu_no, no_of_bits, "L3"); ksft_test_result(!res, "CAT: test\n"); + +umount: + umount_resctrlfs(); } int main(int argc, char **argv) { bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true; - int c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 0; char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64]; char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE]; + int c, cpu_no = 1, argc_new = argc, i, no_of_bits = 0; int ben_ind, ben_count, tests = 0; + size_t span = 250 * MB; bool cat_test = true; for (i = 0; i < argc; i++) { @@ -232,16 +263,15 @@ int main(int argc, char **argv) benchmark_cmd[ben_count] = NULL; } else { /* If no benchmark is given by "-b" argument, use fill_buf. */ - for (i = 0; i < 6; i++) + for (i = 0; i < 5; i++) benchmark_cmd[i] = benchmark_cmd_area[i]; strcpy(benchmark_cmd[0], "fill_buf"); - sprintf(benchmark_cmd[1], "%d", span); + sprintf(benchmark_cmd[1], "%zu", span); strcpy(benchmark_cmd[2], "1"); - strcpy(benchmark_cmd[3], "1"); - strcpy(benchmark_cmd[4], "0"); - strcpy(benchmark_cmd[5], ""); - benchmark_cmd[6] = NULL; + strcpy(benchmark_cmd[3], "0"); + strcpy(benchmark_cmd[4], "false"); + benchmark_cmd[5] = NULL; } sprintf(bw_report, "reads"); @@ -250,23 +280,24 @@ int main(int argc, char **argv) if (!check_resctrlfs_support()) return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n"); + if (umount_resctrlfs()) + return ksft_exit_skip("resctrl FS unmount failed.\n"); + filter_dmesg(); ksft_set_plan(tests ? : 4); if (mbm_test) - run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); + run_mbm_test(benchmark_cmd, span, cpu_no, bw_report); if (mba_test) - run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report); + run_mba_test(benchmark_cmd, cpu_no, bw_report); if (cmt_test) - run_cmt_test(has_ben, benchmark_cmd, cpu_no); + run_cmt_test(benchmark_cmd, cpu_no); if (cat_test) run_cat_test(cpu_no, no_of_bits); - umount_resctrlfs(); - ksft_finished(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index ab1eab1e7ff6..f0f6c5f6e98b 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -648,10 +648,6 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) return ret; } - ret = remount_resctrlfs(param->mum_resctrlfs); - if (ret) - return ret; - /* * If benchmark wasn't successfully started by child, then child should * kill parent, so save parent's pid @@ -763,7 +759,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param) /* Test runs until the callback setup() tells the test to stop. */ while (1) { - ret = param->setup(1, param); + ret = param->setup(param); if (ret == END_OF_TESTS) { ret = 0; break; @@ -788,7 +784,6 @@ unregister: signal_handler_unregister(); out: kill(bm_pid, SIGKILL); - umount_resctrlfs(); return ret; } diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index fb00245dee92..bd36ee206602 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -48,29 +48,20 @@ static int find_resctrl_mount(char *buffer) } /* - * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl - * @mum_resctrlfs: Should the resctrl FS be remounted? + * mount_resctrlfs - Mount resctrl FS at /sys/fs/resctrl * - * If not mounted, mount it. - * If mounted and mum_resctrlfs then remount resctrl FS. - * If mounted and !mum_resctrlfs then noop + * Mounts resctrl FS. Fails if resctrl FS is already mounted to avoid + * pre-existing settings interfering with the test results. * * Return: 0 on success, non-zero on failure */ -int remount_resctrlfs(bool mum_resctrlfs) +int mount_resctrlfs(void) { - char mountpoint[256]; int ret; - ret = find_resctrl_mount(mountpoint); - if (ret) - strcpy(mountpoint, RESCTRL_PATH); - - if (!ret && mum_resctrlfs && umount(mountpoint)) - ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint); - - if (!ret && !mum_resctrlfs) - return 0; + ret = find_resctrl_mount(NULL); + if (ret != -ENOENT) + return -1; ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH); ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL); @@ -82,10 +73,16 @@ int remount_resctrlfs(bool mum_resctrlfs) int umount_resctrlfs(void) { - if (find_resctrl_mount(NULL)) + char mountpoint[256]; + int ret; + + ret = find_resctrl_mount(mountpoint); + if (ret == -ENOENT) return 0; + if (ret) + return ret; - if (umount(RESCTRL_PATH)) { + if (umount(mountpoint)) { perror("# Unable to umount resctrl"); return errno; @@ -305,10 +302,10 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no) */ void run_benchmark(int signum, siginfo_t *info, void *ucontext) { - int operation, ret, malloc_and_init_memory, memflush; - unsigned long span, buffer_span; + int operation, ret, memflush; char **benchmark_cmd; - char resctrl_val[64]; + size_t span; + bool once; FILE *fp; benchmark_cmd = info->si_ptr; @@ -324,18 +321,16 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext) if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { /* Execute default fill_buf benchmark */ span = strtoul(benchmark_cmd[1], NULL, 10); - malloc_and_init_memory = atoi(benchmark_cmd[2]); - memflush = atoi(benchmark_cmd[3]); - operation = atoi(benchmark_cmd[4]); - sprintf(resctrl_val, "%s", benchmark_cmd[5]); - - if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) - buffer_span = span * MB; + memflush = atoi(benchmark_cmd[2]); + operation = atoi(benchmark_cmd[3]); + if (!strcmp(benchmark_cmd[4], "true")) + once = true; + else if (!strcmp(benchmark_cmd[4], "false")) + once = false; else - buffer_span = span; + PARENT_EXIT("Invalid once parameter"); - if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush, - operation, resctrl_val)) + if (run_fill_buf(span, memflush, operation, once)) fprintf(stderr, "Error in running fill buffer\n"); } else { /* Execute specified benchmark */ @@ -611,7 +606,8 @@ char *fgrep(FILE *inf, const char *str) * validate_resctrl_feature_request - Check if requested feature is valid. * @resctrl_val: Requested feature * - * Return: True if the feature is supported, else false + * Return: True if the feature is supported, else false. False is also + * returned if resctrl FS is not mounted. */ bool validate_resctrl_feature_request(const char *resctrl_val) { @@ -619,11 +615,13 @@ bool validate_resctrl_feature_request(const char *resctrl_val) bool found = false; char *res; FILE *inf; + int ret; if (!resctrl_val) return false; - if (remount_resctrlfs(false)) + ret = find_resctrl_mount(NULL); + if (ret) return false; if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) { diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile index 32a72902d045..4a9ff515a3a0 100644 --- a/tools/testing/selftests/riscv/Makefile +++ b/tools/testing/selftests/riscv/Makefile @@ -5,7 +5,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),riscv)) -RISCV_SUBTARGETS ?= hwprobe +RISCV_SUBTARGETS ?= hwprobe vector mm else RISCV_SUBTARGETS := endif @@ -43,7 +43,7 @@ run_tests: all done # Avoid any output on non riscv on emit_tests -emit_tests: all +emit_tests: @for DIR in $(RISCV_SUBTARGETS); do \ BUILD_TARGET=$(OUTPUT)/$$DIR; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ diff --git a/tools/testing/selftests/riscv/hwprobe/.gitignore b/tools/testing/selftests/riscv/hwprobe/.gitignore new file mode 100644 index 000000000000..8113dc3bdd03 --- /dev/null +++ b/tools/testing/selftests/riscv/hwprobe/.gitignore @@ -0,0 +1 @@ +hwprobe diff --git a/tools/testing/selftests/riscv/mm/.gitignore b/tools/testing/selftests/riscv/mm/.gitignore new file mode 100644 index 000000000000..5c2c57cb950c --- /dev/null +++ b/tools/testing/selftests/riscv/mm/.gitignore @@ -0,0 +1,2 @@ +mmap_bottomup +mmap_default diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile new file mode 100644 index 000000000000..c333263f2b27 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. + +TEST_GEN_FILES := mmap_default mmap_bottomup + +TEST_PROGS := run_mmap.sh + +include ../../lib.mk + +$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h + $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^ diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c new file mode 100644 index 000000000000..1757d19ca89b --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include <mmap_test.h> + +#include "../../kselftest_harness.h" + +TEST(infinite_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(BOTTOM_UP, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c new file mode 100644 index 000000000000..c63c60b9397e --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/mman.h> +#include <mmap_test.h> + +#include "../../kselftest_harness.h" + +TEST(default_rlimit) +{ +// Only works on 64 bit +#if __riscv_xlen == 64 + struct addresses mmap_addresses; + + EXPECT_EQ(TOP_DOWN, memory_layout()); + + do_mmaps(&mmap_addresses); + + EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr); + EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr); + + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr); + EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr); + EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr); + EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr); +#endif +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h new file mode 100644 index 000000000000..9b8434f62f57 --- /dev/null +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef _TESTCASES_MMAP_TEST_H +#define _TESTCASES_MMAP_TEST_H +#include <sys/mman.h> +#include <sys/resource.h> +#include <stddef.h> + +#define TOP_DOWN 0 +#define BOTTOM_UP 1 + +struct addresses { + int *no_hint; + int *on_37_addr; + int *on_38_addr; + int *on_46_addr; + int *on_47_addr; + int *on_55_addr; + int *on_56_addr; +}; + +static inline void do_mmaps(struct addresses *mmap_addresses) +{ + /* + * Place all of the hint addresses on the boundaries of mmap + * sv39, sv48, sv57 + * User addresses end at 1<<38, 1<<47, 1<<56 respectively + */ + void *on_37_bits = (void *)(1UL << 37); + void *on_38_bits = (void *)(1UL << 38); + void *on_46_bits = (void *)(1UL << 46); + void *on_47_bits = (void *)(1UL << 47); + void *on_55_bits = (void *)(1UL << 55); + void *on_56_bits = (void *)(1UL << 56); + + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + mmap_addresses->no_hint = + mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_37_addr = + mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_38_addr = + mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_46_addr = + mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_47_addr = + mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_55_addr = + mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0); + mmap_addresses->on_56_addr = + mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0); +} + +static inline int memory_layout(void) +{ + int prot = PROT_READ | PROT_WRITE; + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + + void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0); + + return value2 > value1; +} +#endif /* _TESTCASES_MMAP_TEST_H */ diff --git a/tools/testing/selftests/riscv/mm/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh new file mode 100755 index 000000000000..ca5ad7c48bad --- /dev/null +++ b/tools/testing/selftests/riscv/mm/run_mmap.sh @@ -0,0 +1,12 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +original_stack_limit=$(ulimit -s) + +./mmap_default + +# Force mmap_bottomup to be ran with bottomup memory due to +# the unlimited stack +ulimit -s unlimited +./mmap_bottomup +ulimit -s $original_stack_limit diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore new file mode 100644 index 000000000000..9ae7964491d5 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/.gitignore @@ -0,0 +1,3 @@ +vstate_exec_nolibc +vstate_prctl +v_initval_nolibc diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile new file mode 100644 index 000000000000..bfff0ff4f3be --- /dev/null +++ b/tools/testing/selftests/riscv/vector/Makefile @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2021 ARM Limited +# Originally tools/testing/arm64/abi/Makefile + +TEST_GEN_PROGS := vstate_prctl v_initval_nolibc +TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc + +include ../../lib.mk + +$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S + $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^ + +$(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c + $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ + -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc + +$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c + $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \ + -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c new file mode 100644 index 000000000000..66764edb0d52 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../../kselftest.h" +#define MAX_VSIZE (8192 * 32) + +void dump(char *ptr, int size) +{ + int i = 0; + + for (i = 0; i < size; i++) { + if (i != 0) { + if (i % 16 == 0) + printf("\n"); + else if (i % 8 == 0) + printf(" "); + } + printf("%02x ", ptr[i]); + } + printf("\n"); +} + +int main(void) +{ + int i; + unsigned long vl; + char *datap, *tmp; + + datap = malloc(MAX_VSIZE); + if (!datap) { + ksft_test_result_fail("fail to allocate memory for size = %lu\n", MAX_VSIZE); + exit(-1); + } + + tmp = datap; + asm volatile ( + ".option push\n\t" + ".option arch, +v\n\t" + "vsetvli %0, x0, e8, m8, ta, ma\n\t" + "vse8.v v0, (%2)\n\t" + "add %1, %2, %0\n\t" + "vse8.v v8, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v16, (%1)\n\t" + "add %1, %1, %0\n\t" + "vse8.v v24, (%1)\n\t" + ".option pop\n\t" + : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory"); + + ksft_print_msg("vl = %lu\n", vl); + + if (datap[0] != 0x00 && datap[0] != 0xff) { + ksft_test_result_fail("v-regesters are not properly initialized\n"); + dump(datap, vl * 4); + exit(-1); + } + + for (i = 1; i < vl * 4; i++) { + if (datap[i] != datap[0]) { + ksft_test_result_fail("detect stale values on v-regesters\n"); + dump(datap, vl * 4); + exit(-2); + } + } + + free(datap); + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c new file mode 100644 index 000000000000..2c0d2b1126c1 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define THIS_PROGRAM "./vstate_exec_nolibc" + +int main(int argc, char **argv) +{ + int rc, pid, status, test_inherit = 0; + long ctrl, ctrl_c; + char *exec_argv[2], *exec_envp[2]; + + if (argc > 1) + test_inherit = 1; + + ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); + if (ctrl < 0) { + puts("PR_RISCV_V_GET_CONTROL is not supported\n"); + return ctrl; + } + + if (test_inherit) { + pid = fork(); + if (pid == -1) { + puts("fork failed\n"); + exit(-1); + } + + /* child */ + if (!pid) { + exec_argv[0] = THIS_PROGRAM; + exec_argv[1] = NULL; + exec_envp[0] = NULL; + exec_envp[1] = NULL; + /* launch the program again to check inherit */ + rc = execve(THIS_PROGRAM, exec_argv, exec_envp); + if (rc) { + puts("child execve failed\n"); + exit(-1); + } + } + + } else { + pid = fork(); + if (pid == -1) { + puts("fork failed\n"); + exit(-1); + } + + if (!pid) { + rc = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL); + if (rc != ctrl) { + puts("child's vstate_ctrl not equal to parent's\n"); + exit(-1); + } + asm volatile (".option push\n\t" + ".option arch, +v\n\t" + "vsetvli x0, x0, e32, m8, ta, ma\n\t" + ".option pop\n\t" + ); + exit(ctrl); + } + } + + rc = waitpid(-1, &status, 0); + + if (WIFEXITED(status) && WEXITSTATUS(status) == -1) { + puts("child exited abnormally\n"); + exit(-1); + } + + if (WIFSIGNALED(status)) { + if (WTERMSIG(status) != SIGILL) { + puts("child was terminated by unexpected signal\n"); + exit(-1); + } + + if ((ctrl & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) != PR_RISCV_V_VSTATE_CTRL_OFF) { + puts("child signaled by illegal V access but vstate_ctrl is not off\n"); + exit(-1); + } + + /* child terminated, and its vstate_ctrl is off */ + exit(ctrl); + } + + ctrl_c = WEXITSTATUS(status); + if (test_inherit) { + if (ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT) { + if (!(ctrl_c & PR_RISCV_V_VSTATE_CTRL_INHERIT)) { + puts("parent has inherit bit, but child has not\n"); + exit(-1); + } + } + rc = (ctrl & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2; + if (rc != PR_RISCV_V_VSTATE_CTRL_DEFAULT) { + if (rc != (ctrl_c & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)) { + puts("parent's next setting does not equal to child's\n"); + exit(-1); + } + + if (!(ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT)) { + if ((ctrl_c & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) != + PR_RISCV_V_VSTATE_CTRL_DEFAULT) { + puts("must clear child's next vstate_ctrl if !inherit\n"); + exit(-1); + } + } + } + } + return ctrl; +} diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c new file mode 100644 index 000000000000..b348b475be57 --- /dev/null +++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/hwprobe.h> +#include <errno.h> +#include <sys/wait.h> + +#include "../../kselftest.h" + +/* + * Rather than relying on having a new enough libc to define this, just do it + * ourselves. This way we don't need to be coupled to a new-enough libc to + * contain the call. + */ +long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, unsigned int flags); + +#define NEXT_PROGRAM "./vstate_exec_nolibc" +static int launch_test(int test_inherit) +{ + char *exec_argv[3], *exec_envp[1]; + int rc, pid, status; + + pid = fork(); + if (pid < 0) { + ksft_test_result_fail("fork failed %d", pid); + return -1; + } + + if (!pid) { + exec_argv[0] = NEXT_PROGRAM; + exec_argv[1] = test_inherit != 0 ? "x" : NULL; + exec_argv[2] = NULL; + exec_envp[0] = NULL; + /* launch the program again to check inherit */ + rc = execve(NEXT_PROGRAM, exec_argv, exec_envp); + if (rc) { + perror("execve"); + ksft_test_result_fail("child execve failed %d\n", rc); + exit(-1); + } + } + + rc = waitpid(-1, &status, 0); + if (rc < 0) { + ksft_test_result_fail("waitpid failed\n"); + return -3; + } + + if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) || + WIFSIGNALED(status)) { + ksft_test_result_fail("child exited abnormally\n"); + return -4; + } + + return WEXITSTATUS(status); +} + +int test_and_compare_child(long provided, long expected, int inherit) +{ + int rc; + + rc = prctl(PR_RISCV_V_SET_CONTROL, provided); + if (rc != 0) { + ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n", + provided, rc); + return -1; + } + rc = launch_test(inherit); + if (rc != expected) { + ksft_test_result_fail("Test failed, check %d != %d\n", rc, + expected); + return -2; + } + return 0; +} + +#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0 +#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2 + +int main(void) +{ + struct riscv_hwprobe pair; + long flag, expected; + long rc; + + pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0; + rc = riscv_hwprobe(&pair, 1, 0, NULL, 0); + if (rc < 0) { + ksft_test_result_fail("hwprobe() failed with %d\n", rc); + return -1; + } + + if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) { + ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n"); + return -2; + } + + if (!(pair.value & RISCV_HWPROBE_IMA_V)) { + rc = prctl(PR_RISCV_V_GET_CONTROL); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + return -3; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n"); + return -4; + } + + ksft_test_result_skip("Vector not supported\n"); + return 0; + } + + flag = PR_RISCV_V_VSTATE_CTRL_ON; + rc = prctl(PR_RISCV_V_SET_CONTROL, flag); + if (rc != 0) { + ksft_test_result_fail("Enabling V for current should always success\n"); + return -5; + } + + flag = PR_RISCV_V_VSTATE_CTRL_OFF; + rc = prctl(PR_RISCV_V_SET_CONTROL, flag); + if (rc != -1 || errno != EPERM) { + ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n", + errno); + return -5; + } + + /* Turn on next's vector explicitly and test */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0)) + return -6; + + /* Turn off next's vector explicitly and test */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0)) + return -7; + + /* Turn on next's vector explicitly and test inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_ON; + if (test_and_compare_child(flag, expected, 0)) + return -8; + + if (test_and_compare_child(flag, expected, 1)) + return -9; + + /* Turn off next's vector explicitly and test inherit */ + flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT; + flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT; + expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF; + if (test_and_compare_child(flag, expected, 0)) + return -10; + + if (test_and_compare_child(flag, expected, 1)) + return -11; + + /* arguments should fail with EINVAL */ + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc); + if (rc != -1 || errno != EINVAL) { + ksft_test_result_fail("Undefined control argument should return EINVAL\n"); + return -12; + } + + ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n"); + ksft_exit_pass(); + return 0; +} diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile index b357ba24af06..5a3432fceb58 100644 --- a/tools/testing/selftests/rseq/Makefile +++ b/tools/testing/selftests/rseq/Makefile @@ -4,8 +4,10 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) CLANG_FLAGS += -no-integrated-as endif +top_srcdir = ../../../.. + CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \ - $(CLANG_FLAGS) + $(CLANG_FLAGS) -I$(top_srcdir)/tools/include LDLIBS += -lpthread -ldl # Own dependencies because we only want to build against 1st prerequisite, but @@ -31,7 +33,7 @@ $(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h $(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@ $(OUTPUT)/basic_percpu_ops_mm_cid_test: basic_percpu_ops_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h - $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID_ID $< $(LDLIBS) -lrseq -o $@ + $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID $< $(LDLIBS) -lrseq -o $@ $(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \ rseq.h rseq-*.h diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h index f47092bddeba..49d62fbd6dda 100644 --- a/tools/testing/selftests/rseq/compiler.h +++ b/tools/testing/selftests/rseq/compiler.h @@ -33,4 +33,30 @@ #define RSEQ_COMBINE_TOKENS(_tokena, _tokenb) \ RSEQ__COMBINE_TOKENS(_tokena, _tokenb) +#ifdef __cplusplus +#define rseq_unqual_scalar_typeof(x) \ + std::remove_cv<std::remove_reference<decltype(x)>::type>::type +#else +#define rseq_scalar_type_to_expr(type) \ + unsigned type: (unsigned type)0, \ + signed type: (signed type)0 + +/* + * Use C11 _Generic to express unqualified type from expression. This removes + * volatile qualifier from expression type. + */ +#define rseq_unqual_scalar_typeof(x) \ + __typeof__( \ + _Generic((x), \ + char: (char)0, \ + rseq_scalar_type_to_expr(char), \ + rseq_scalar_type_to_expr(short), \ + rseq_scalar_type_to_expr(int), \ + rseq_scalar_type_to_expr(long), \ + rseq_scalar_type_to_expr(long long), \ + default: (x) \ + ) \ + ) +#endif + #endif /* RSEQ_COMPILER_H_ */ diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h index 8414fc3eac15..d887b3bbe257 100644 --- a/tools/testing/selftests/rseq/rseq-arm.h +++ b/tools/testing/selftests/rseq/rseq-arm.h @@ -66,7 +66,7 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ rseq_smp_mb(); \ ____p1; \ }) @@ -76,7 +76,7 @@ __extension__ ({ \ #define rseq_smp_store_release(p, v) \ do { \ rseq_smp_mb(); \ - RSEQ_WRITE_ONCE(*p, v); \ + RSEQ_WRITE_ONCE(*(p), v); \ } while (0) #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h index 85b90977e7e6..21e1626a7235 100644 --- a/tools/testing/selftests/rseq/rseq-arm64.h +++ b/tools/testing/selftests/rseq/rseq-arm64.h @@ -27,59 +27,61 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*p) ____p1; \ - switch (sizeof(*p)) { \ + union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u; \ + switch (sizeof(*(p))) { \ case 1: \ - asm volatile ("ldarb %w0, %1" \ - : "=r" (*(__u8 *)p) \ - : "Q" (*p) : "memory"); \ + __asm__ __volatile__ ("ldarb %w0, %1" \ + : "=r" (*(__u8 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ break; \ case 2: \ - asm volatile ("ldarh %w0, %1" \ - : "=r" (*(__u16 *)p) \ - : "Q" (*p) : "memory"); \ + __asm__ __volatile__ ("ldarh %w0, %1" \ + : "=r" (*(__u16 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ break; \ case 4: \ - asm volatile ("ldar %w0, %1" \ - : "=r" (*(__u32 *)p) \ - : "Q" (*p) : "memory"); \ + __asm__ __volatile__ ("ldar %w0, %1" \ + : "=r" (*(__u32 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ break; \ case 8: \ - asm volatile ("ldar %0, %1" \ - : "=r" (*(__u64 *)p) \ - : "Q" (*p) : "memory"); \ + __asm__ __volatile__ ("ldar %0, %1" \ + : "=r" (*(__u64 *)__u.__c) \ + : "Q" (*(p)) : "memory"); \ break; \ } \ - ____p1; \ + (rseq_unqual_scalar_typeof(*(p)))__u.__val; \ }) #define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb() #define rseq_smp_store_release(p, v) \ do { \ - switch (sizeof(*p)) { \ + union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u = \ + { .__val = (rseq_unqual_scalar_typeof(*(p))) (v) }; \ + switch (sizeof(*(p))) { \ case 1: \ - asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ - : "r" ((__u8)v) \ + __asm__ __volatile__ ("stlrb %w1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u8 *)__u.__c) \ : "memory"); \ break; \ case 2: \ - asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ - : "r" ((__u16)v) \ + __asm__ __volatile__ ("stlrh %w1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u16 *)__u.__c) \ : "memory"); \ break; \ case 4: \ - asm volatile ("stlr %w1, %0" \ - : "=Q" (*p) \ - : "r" ((__u32)v) \ + __asm__ __volatile__ ("stlr %w1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u32 *)__u.__c) \ : "memory"); \ break; \ case 8: \ - asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ - : "r" ((__u64)v) \ + __asm__ __volatile__ ("stlr %1, %0" \ + : "=Q" (*(p)) \ + : "r" (*(__u64 *)__u.__c) \ : "memory"); \ break; \ } \ diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h index 50b950cf9585..42ef8e946693 100644 --- a/tools/testing/selftests/rseq/rseq-mips.h +++ b/tools/testing/selftests/rseq/rseq-mips.h @@ -45,7 +45,7 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ rseq_smp_mb(); \ ____p1; \ }) @@ -55,7 +55,7 @@ __extension__ ({ \ #define rseq_smp_store_release(p, v) \ do { \ rseq_smp_mb(); \ - RSEQ_WRITE_ONCE(*p, v); \ + RSEQ_WRITE_ONCE(*(p), v); \ } while (0) #if _MIPS_SZLONG == 64 diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h index dc9190facee9..57b160597189 100644 --- a/tools/testing/selftests/rseq/rseq-ppc.h +++ b/tools/testing/selftests/rseq/rseq-ppc.h @@ -23,7 +23,7 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ rseq_smp_lwsync(); \ ____p1; \ }) @@ -33,7 +33,7 @@ __extension__ ({ \ #define rseq_smp_store_release(p, v) \ do { \ rseq_smp_lwsync(); \ - RSEQ_WRITE_ONCE(*p, v); \ + RSEQ_WRITE_ONCE(*(p), v); \ } while (0) /* diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h index 17932a79e066..37e598d0a365 100644 --- a/tools/testing/selftests/rseq/rseq-riscv.h +++ b/tools/testing/selftests/rseq/rseq-riscv.h @@ -36,8 +36,8 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ - RISCV_FENCE(r, rw) \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ + RISCV_FENCE(r, rw); \ ____p1; \ }) @@ -46,7 +46,7 @@ __extension__ ({ \ #define rseq_smp_store_release(p, v) \ do { \ RISCV_FENCE(rw, w); \ - RSEQ_WRITE_ONCE(*(p), v); \ + RSEQ_WRITE_ONCE(*(p), v); \ } while (0) #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \ diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h index 46c92598acc7..33baaa9f9997 100644 --- a/tools/testing/selftests/rseq/rseq-s390.h +++ b/tools/testing/selftests/rseq/rseq-s390.h @@ -15,7 +15,7 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ rseq_barrier(); \ ____p1; \ }) @@ -25,7 +25,7 @@ __extension__ ({ \ #define rseq_smp_store_release(p, v) \ do { \ rseq_barrier(); \ - RSEQ_WRITE_ONCE(*p, v); \ + RSEQ_WRITE_ONCE(*(p), v); \ } while (0) #ifdef __s390x__ diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index fb65ef54b0fb..a2aa428ba151 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -42,7 +42,7 @@ #define rseq_smp_load_acquire(p) \ __extension__ ({ \ - __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \ + rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \ rseq_barrier(); \ ____p1; \ }) @@ -52,7 +52,7 @@ __extension__ ({ \ #define rseq_smp_store_release(p, v) \ do { \ rseq_barrier(); \ - RSEQ_WRITE_ONCE(*p, v); \ + RSEQ_WRITE_ONCE(*(p), v); \ } while (0) #define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \ diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 4e4aa006004c..96e812bdf8a4 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -31,12 +31,22 @@ #include <sys/auxv.h> #include <linux/auxvec.h> +#include <linux/compiler.h> + #include "../kselftest.h" #include "rseq.h" -static const ptrdiff_t *libc_rseq_offset_p; -static const unsigned int *libc_rseq_size_p; -static const unsigned int *libc_rseq_flags_p; +/* + * Define weak versions to play nice with binaries that are statically linked + * against a libc that doesn't support registering its own rseq. + */ +__weak ptrdiff_t __rseq_offset; +__weak unsigned int __rseq_size; +__weak unsigned int __rseq_flags; + +static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset; +static const unsigned int *libc_rseq_size_p = &__rseq_size; +static const unsigned int *libc_rseq_flags_p = &__rseq_flags; /* Offset from the thread pointer to the rseq area. */ ptrdiff_t rseq_offset; @@ -155,9 +165,17 @@ unsigned int get_rseq_feature_size(void) static __attribute__((constructor)) void rseq_init(void) { - libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); - libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); - libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + /* + * If the libc's registered rseq size isn't already valid, it may be + * because the binary is dynamically linked and not necessarily due to + * libc not having registered a restartable sequence. Try to find the + * symbols if that's the case. + */ + if (!*libc_rseq_size_p) { + libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset"); + libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size"); + libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags"); + } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { /* rseq registration owned by glibc */ diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 97165a83df63..92743980e553 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -26,6 +26,7 @@ Usage: $0 [OPTIONS] -l | --list List the available collection:test entries -d | --dry-run Don't actually run any tests -h | --help Show this usage info + -o | --override-timeout Number of seconds after which we timeout EOF exit $1 } @@ -33,6 +34,7 @@ EOF COLLECTIONS="" TESTS="" dryrun="" +kselftest_override_timeout="" while true; do case "$1" in -s | --summary) @@ -51,6 +53,9 @@ while true; do -d | --dry-run) dryrun="echo" shift ;; + -o | --override-timeout) + kselftest_override_timeout="$2" + shift 2 ;; -h | --help) usage 0 ;; "") @@ -85,7 +90,7 @@ if [ -n "$TESTS" ]; then available="$(echo "$valid" | sed -e 's/ /\n/g')" fi -collections=$(echo "$available" | cut -d: -f1 | uniq) +collections=$(echo "$available" | cut -d: -f1 | sort | uniq) for collection in $collections ; do [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 43ec36b179dc..38f651469968 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -2184,6 +2184,9 @@ FIXTURE_TEARDOWN(TRACE_syscall) TEST(negative_ENOSYS) { +#if defined(__arm__) + SKIP(return, "arm32 does not support calling syscall -1"); +#endif /* * There should be no difference between an "internal" skip * and userspace asking for syscall "-1". @@ -3072,7 +3075,8 @@ TEST(syscall_restart) timeout.tv_sec = 1; errno = 0; EXPECT_EQ(0, nanosleep(&timeout, NULL)) { - TH_LOG("Call to nanosleep() failed (errno %d)", errno); + TH_LOG("Call to nanosleep() failed (errno %d: %s)", + errno, strerror(errno)); } /* Read final sync from parent. */ @@ -3908,6 +3912,9 @@ TEST(user_notification_filter_empty) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } + if (__NR_clone3 < 0) + SKIP(return, "Test not built with clone3 support"); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); @@ -3962,6 +3969,9 @@ TEST(user_notification_filter_empty_threaded) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } + if (__NR_clone3 < 0) + SKIP(return, "Test not built with clone3 support"); + pid = sys_clone3(&args, sizeof(args)); ASSERT_GE(pid, 0); @@ -4255,6 +4265,61 @@ TEST(user_notification_addfd_rlimit) close(memfd); } +#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP +#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0) +#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64) +#endif + +TEST(user_notification_sync) +{ + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + int status, listener; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + /* Try to set invalid flags. */ + EXPECT_SYSCALL_RETURN(-EINVAL, + ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0)); + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, + SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + ret = syscall(__NR_getppid); + ASSERT_EQ(ret, USER_NOTIF_MAGIC) { + _exit(1); + } + _exit(0); + } + + req.pid = 0; + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(req.data.nr, __NR_getppid); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + resp.flags = 0; + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + ASSERT_EQ(waitpid(pid, &status, 0), pid); + ASSERT_EQ(status, 0); +} + + /* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */ FIXTURE(O_SUSPEND_SECCOMP) { pid_t pid; diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh index bfc54b422f25..444b2befda82 100755 --- a/tools/testing/selftests/sysctl/sysctl.sh +++ b/tools/testing/selftests/sysctl/sysctl.sh @@ -14,23 +14,27 @@ TEST_FILE=$(mktemp) # This represents # -# TEST_ID:TEST_COUNT:ENABLED:TARGET +# TEST_ID:TEST_COUNT:ENABLED:TARGET:SKIP_NO_TARGET # # TEST_ID: is the test id number # TEST_COUNT: number of times we should run the test # ENABLED: 1 if enabled, 0 otherwise # TARGET: test target file required on the test_sysctl module +# SKIP_NO_TARGET: 1 skip if TARGET not there +# 0 run eventhough TARGET not there # # Once these are enabled please leave them as-is. Write your own test, # we have tons of space. -ALL_TESTS="0001:1:1:int_0001" -ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001" -ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002" -ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001" -ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003" -ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001" -ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int" -ALL_TESTS="$ALL_TESTS 0008:1:1:match_int" +ALL_TESTS="0001:1:1:int_0001:1" +ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001:1" +ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002:1" +ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001:1" +ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003:1" +ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001:1" +ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int:1" +ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1" +ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0" +ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0" function allow_user_defaults() { @@ -613,7 +617,6 @@ target_exists() TEST_ID="$2" if [ ! -f ${TARGET} ] ; then - echo "Target for test $TEST_ID: $TARGET not exist, skipping test ..." return 0 fi return 1 @@ -730,7 +733,7 @@ sysctl_test_0005() sysctl_test_0006() { - TARGET="${SYSCTL}/bitmap_0001" + TARGET="${SYSCTL}/$(get_test_target 0006)" reset_vals ORIG="" run_bitmaptest @@ -738,7 +741,7 @@ sysctl_test_0006() sysctl_test_0007() { - TARGET="${SYSCTL}/boot_int" + TARGET="${SYSCTL}/$(get_test_target 0007)" if [ ! -f $TARGET ]; then echo "Skipping test for $TARGET as it is not present ..." return $ksft_skip @@ -778,7 +781,7 @@ sysctl_test_0007() sysctl_test_0008() { - TARGET="${SYSCTL}/match_int" + TARGET="${SYSCTL}/$(get_test_target 0008)" if [ ! -f $TARGET ]; then echo "Skipping test for $TARGET as it is not present ..." return $ksft_skip @@ -797,6 +800,34 @@ sysctl_test_0008() return 0 } +sysctl_test_0009() +{ + TARGET="${SYSCTL}/$(get_test_target 0009)" + echo -n "Testing if $TARGET unregistered correctly ..." + if [ -d $TARGET ]; then + echo "TEST FAILED" + rc=1 + test_rc + fi + + echo "ok" + return 0 +} + +sysctl_test_0010() +{ + TARGET="${SYSCTL}/$(get_test_target 0010)" + echo -n "Testing that $TARGET was not created ..." + if [ -d $TARGET ]; then + echo "TEST FAILED" + rc=1 + test_rc + fi + + echo "ok" + return 0 +} + list_tests() { echo "Test ID list:" @@ -813,6 +844,8 @@ list_tests() echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()" echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param" echo "0008 x $(get_test_count 0008) - tests sysctl macro values match" + echo "0009 x $(get_test_count 0009) - tests sysct unregister" + echo "0010 x $(get_test_count 0010) - tests sysct mount point" } usage() @@ -857,38 +890,65 @@ function test_num() usage fi } +function remove_leading_zeros() +{ + echo $1 | sed 's/^0*//' +} function get_test_count() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + awk_field=$(remove_leading_zeros $1) + TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}') echo ${TEST_DATA} | awk -F":" '{print $2}' } function get_test_enabled() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + awk_field=$(remove_leading_zeros $1) + TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}') echo ${TEST_DATA} | awk -F":" '{print $3}' } function get_test_target() { test_num $1 - TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}') + awk_field=$(remove_leading_zeros $1) + TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}') echo ${TEST_DATA} | awk -F":" '{print $4}' } +function get_test_skip_no_target() +{ + test_num $1 + awk_field=$(remove_leading_zeros $1) + TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}') + echo ${TEST_DATA} | awk -F":" '{print $5}' +} + +function skip_test() +{ + TEST_ID=$1 + TEST_TARGET=$2 + if target_exists $TEST_TARGET $TEST_ID; then + TEST_SKIP=$(get_test_skip_no_target $TEST_ID) + if [[ $TEST_SKIP -eq "1" ]]; then + echo "Target for test $TEST_ID: $TEST_TARGET not exist, skipping test ..." + return 0 + fi + fi + return 1 +} + function run_all_tests() { for i in $ALL_TESTS ; do - TEST_ID=${i%:*:*:*} + TEST_ID=${i%:*:*:*:*} ENABLED=$(get_test_enabled $TEST_ID) TEST_COUNT=$(get_test_count $TEST_ID) TEST_TARGET=$(get_test_target $TEST_ID) - if target_exists $TEST_TARGET $TEST_ID; then - continue - fi + if [[ $ENABLED -eq "1" ]]; then test_case $TEST_ID $TEST_COUNT $TEST_TARGET fi @@ -923,18 +983,19 @@ function watch_case() function test_case() { + TEST_ID=$1 NUM_TESTS=$2 + TARGET=$3 - i=0 - - if target_exists $3 $1; then - continue + if skip_test $TEST_ID $TARGET; then + return fi + i=0 while [ $i -lt $NUM_TESTS ]; do - test_num $1 - watch_log $i ${TEST_NAME}_test_$1 noclear - RUN_TEST=${TEST_NAME}_test_$1 + test_num $TEST_ID + watch_log $i ${TEST_NAME}_test_${TEST_ID} noclear + RUN_TEST=${TEST_NAME}_test_${TEST_ID} $RUN_TEST let i=$i+1 done diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile index cb553eac9f41..3c4b7fa05075 100644 --- a/tools/testing/selftests/tc-testing/Makefile +++ b/tools/testing/selftests/tc-testing/Makefile @@ -24,7 +24,7 @@ CLANG_FLAGS = -I. -I$(APIDIR) \ $(OUTPUT)/%.o: %.c $(CLANG) $(CLANG_FLAGS) \ - -O2 -target bpf -emit-llvm -c $< -o - | \ + -O2 --target=bpf -emit-llvm -c $< -o - | \ $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@ TEST_PROGS += ./tdc.sh diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index 6e73b09c20c8..5aa8705751f0 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -5,6 +5,8 @@ CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_MARK=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CONNTRACK_LABELS=y +CONFIG_NF_CONNTRACK_PROCFS=y +CONFIG_NF_FLOW_TABLE=m CONFIG_NF_NAT=m CONFIG_NETFILTER_XT_TARGET_LOG=m @@ -94,10 +96,11 @@ CONFIG_NET_SCH_FIFO=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_FQ_PIE=m -CONFIG_NETDEVSIM=m # ## Network testing # CONFIG_CAN=m CONFIG_ATM=y +CONFIG_NETDEVSIM=m +CONFIG_PTP_1588_CLOCK_MOCK=m diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings new file mode 100644 index 000000000000..e2206265f67c --- /dev/null +++ b/tools/testing/selftests/tc-testing/settings @@ -0,0 +1 @@ +timeout=900 diff --git a/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh b/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh new file mode 100755 index 000000000000..f5335e8ad6b4 --- /dev/null +++ b/tools/testing/selftests/tc-testing/taprio_wait_for_admin.sh @@ -0,0 +1,16 @@ +#!/bin/bash + +TC="$1"; shift +ETH="$1"; shift + +# The taprio architecture changes the admin schedule from a hrtimer and not +# from process context, so we need to wait in order to make sure that any +# schedule change actually took place. +while :; do + has_admin="$($TC -j qdisc show dev $ETH root | jq '.[].options | has("admin")')" + if [ "$has_admin" = "false" ]; then + break; + fi + + sleep 1 +done diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json new file mode 100644 index 000000000000..c4c778e83da2 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json @@ -0,0 +1,25 @@ +[ + { + "id": "c2b4", + "name": "soft lockup alarm will be not generated after delete the prio 0 filter of the chain", + "category": [ + "filter", + "chain" + ], + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$TC qdisc add dev $DUMMY root handle 1: htb default 1", + "$TC chain add dev $DUMMY", + "$TC filter del dev $DUMMY chain 0 parent 1: prio 0" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY chain 0 parent 1:", + "expExitCode": "2", + "verifyCmd": "$TC chain ls dev $DUMMY", + "matchPattern": "chain parent 1: chain 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: htb default 1", + "$IP link del dev $DUMMY type dummy" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json index 44fbfc6caec7..e3d2de5c184f 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json @@ -155,5 +155,28 @@ "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" ] - } + }, + { + "id": "0531", + "name": "Replace mq with invalid parent ID", + "category": [ + "qdisc", + "mq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 16\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle ffff: mq" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH parent ffff:fff1 handle ffff: mq", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent ffff", + "matchCount": "16", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json index 147899a868d3..976dffda4654 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json @@ -213,5 +213,91 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP link del dev $DUMMY type dummy" ] + }, + { + "id": "85ee", + "name": "QFQ with big MTU", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$IP link set dev $DUMMY mtu 2147483647 || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root qfq" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class qfq 1:", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "ddfa", + "name": "QFQ with small MTU", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$IP link set dev $DUMMY mtu 256 || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: root qfq" + ], + "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100", + "expExitCode": "2", + "verifyCmd": "$TC class show dev $DUMMY", + "matchPattern": "class qfq 1:", + "matchCount": "0", + "teardown": [ + "$IP link del dev $DUMMY type dummy" + ] + }, + { + "id": "5993", + "name": "QFQ with stab overhead greater than max packet len", + "category": [ + "qdisc", + "qfq", + "scapy" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link add dev $DUMMY type dummy || /bin/true", + "$IP link set dev $DUMMY up || /bin/true", + "$TC qdisc add dev $DUMMY handle 1: stab mtu 2048 tsize 512 mpu 0 overhead 999999999 linklayer ethernet root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100", + "$TC qdisc add dev $DEV1 clsact", + "$TC filter add dev $DEV1 ingress protocol ip flower dst_ip 1.3.3.7/32 action mirred egress mirror dev $DUMMY" + ], + "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: matchall classid 1:1", + "scapy": [ + { + "iface": "$DEV0", + "count": 22, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='1.3.3.7')/TCP(sport=5000,dport=10)" + } + ], + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc ls dev $DUMMY", + "matchPattern": "dropped 22", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root qfq" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index a44455372646..0599635c4bc6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -104,7 +104,7 @@ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI", "expExitCode": "0", "verifyCmd": "$TC class show dev $ETH", - "matchPattern": "class taprio 1:[0-9]+ root leaf 1:", + "matchPattern": "class taprio 1:[0-9]+ root", "matchCount": "8", "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" @@ -131,5 +131,130 @@ "teardown": [ "echo \"1\" > /sys/bus/netdevsim/del_device" ] + }, + { + "id": "3e1e", + "name": "Add taprio Qdisc with an invalid cycle-time", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true", + "$IP link set dev $ETH up", + "$IP addr add 10.10.10.10/24 dev $ETH" + ], + "cmdUnderTest": "/bin/true", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $ETH", + "matchPattern": "qdisc taprio 1: root refcnt", + "matchCount": "0", + "teardown": [ + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "39b4", + "name": "Reject grafting taprio as child qdisc of software taprio", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", + "./taprio_wait_for_admin.sh $TC $ETH" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 clockid CLOCK_TAI", + "expExitCode": "2", + "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", + "matchPattern": "0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "e8a1", + "name": "Reject grafting taprio as child qdisc of offloaded taprio", + "category": [ + "qdisc", + "taprio" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2", + "./taprio_wait_for_admin.sh $TC $ETH" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 flags 0x2", + "expExitCode": "2", + "verifyCmd": "bash -c \"./taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"", + "matchPattern": "0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "a7bf", + "name": "Graft cbs as child of software taprio", + "category": [ + "qdisc", + "taprio", + "cbs" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH handle 8002: parent 8001:8 cbs idleslope 20000 sendslope -980000 hicredit 30 locredit -1470", + "expExitCode": "0", + "verifyCmd": "$TC -d qdisc show dev $ETH", + "matchPattern": "qdisc cbs 8002: parent 8001:8 hicredit 30 locredit -1470 sendslope -980000 idleslope 20000 offload 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] + }, + { + "id": "6a83", + "name": "Graft cbs as child of offloaded taprio", + "category": [ + "qdisc", + "taprio", + "cbs" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", + "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2" + ], + "cmdUnderTest": "$TC qdisc replace dev $ETH handle 8002: parent 8001:8 cbs idleslope 20000 sendslope -980000 hicredit 30 locredit -1470", + "expExitCode": "0", + "verifyCmd": "$TC -d qdisc show dev $ETH", + "matchPattern": "qdisc cbs 8002: parent 8001:8 refcnt 2 hicredit 30 locredit -1470 sendslope -980000 idleslope 20000 offload 0", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $ETH root", + "echo \"1\" > /sys/bus/netdevsim/del_device" + ] } ] diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index 5beceeed0d11..6eba203f9da7 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -129,8 +129,7 @@ int main(int argc, char **argv) printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000))); /* Avg the two actual freq samples adjtimex gave us */ - ppm = (tx1.freq + tx2.freq) * 1000 / 2; - ppm = (long long)tx1.freq * 1000; + ppm = (long long)(tx1.freq + tx2.freq) * 1000 / 2; ppm = shift_right(ppm, 16); printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/tty/.gitignore index 24e27957efcc..fe70462a4aad 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore +++ b/tools/testing/selftests/tty/.gitignore @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -srcu.c +tty_tstamp_update diff --git a/tools/testing/selftests/tty/Makefile b/tools/testing/selftests/tty/Makefile new file mode 100644 index 000000000000..50d7027b2ae3 --- /dev/null +++ b/tools/testing/selftests/tty/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS = -O2 -Wall +TEST_GEN_PROGS := tty_tstamp_update + +include ../lib.mk diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c new file mode 100644 index 000000000000..0ee97943dccc --- /dev/null +++ b/tools/testing/selftests/tty/tty_tstamp_update.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <unistd.h> +#include <linux/limits.h> + +#include "../kselftest.h" + +#define MIN_TTY_PATH_LEN 8 + +static bool tty_valid(char *tty) +{ + if (strlen(tty) < MIN_TTY_PATH_LEN) + return false; + + if (strncmp(tty, "/dev/tty", MIN_TTY_PATH_LEN) == 0 || + strncmp(tty, "/dev/pts", MIN_TTY_PATH_LEN) == 0) + return true; + + return false; +} + +static int write_dev_tty(void) +{ + FILE *f; + int r = 0; + + f = fopen("/dev/tty", "r+"); + if (!f) + return -errno; + + r = fprintf(f, "hello, world!\n"); + if (r != strlen("hello, world!\n")) + r = -EIO; + + fclose(f); + return r; +} + +int main(int argc, char **argv) +{ + int r; + char tty[PATH_MAX] = {}; + struct stat st1, st2; + + ksft_print_header(); + ksft_set_plan(1); + + r = readlink("/proc/self/fd/0", tty, PATH_MAX); + if (r < 0) + ksft_exit_fail_msg("readlink on /proc/self/fd/0 failed: %m\n"); + + if (!tty_valid(tty)) + ksft_exit_skip("invalid tty path '%s'\n", tty); + + r = stat(tty, &st1); + if (r < 0) + ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty); + + /* We need to wait at least 8 seconds in order to observe timestamp change */ + /* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fbf47635315ab308c9b58a1ea0906e711a9228de */ + sleep(10); + + r = write_dev_tty(); + if (r < 0) + ksft_exit_fail_msg("failed to write to /dev/tty: %s\n", + strerror(-r)); + + r = stat(tty, &st2); + if (r < 0) + ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty); + + /* We wrote to the terminal so timestamps should have been updated */ + if (st1.st_atim.tv_sec == st2.st_atim.tv_sec && + st1.st_mtim.tv_sec == st2.st_mtim.tv_sec) { + ksft_test_result_fail("tty timestamps not updated\n"); + ksft_exit_fail(); + } + + ksft_test_result_pass( + "timestamps of terminal '%s' updated after write to /dev/tty\n", tty); + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile index 9e95bd41b0b4..10fcd0066203 100644 --- a/tools/testing/selftests/user_events/Makefile +++ b/tools/testing/selftests/user_events/Makefile @@ -2,14 +2,6 @@ CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) LDLIBS += -lrt -lpthread -lm -# Note: -# This test depends on <linux/user_events.h> exported in uapi -# The following commit removed user_events.h out of uapi: -# commit 5cfff569cab8bf544bab62c911c5d6efd5af5e05 -# tracing: Move user_events.h temporarily out of include/uapi -# This test will not compile until user_events.h is added -# back to uapi. - TEST_GEN_PROGS = ftrace_test dyn_test perf_test abi_test TEST_FILES := settings diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c index 5125c42efe65..8202f1327c39 100644 --- a/tools/testing/selftests/user_events/abi_test.c +++ b/tools/testing/selftests/user_events/abi_test.c @@ -19,6 +19,7 @@ #include <asm/unistd.h> #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable"; @@ -90,14 +91,18 @@ static int reg_disable(long *enable, int bit) FIXTURE(user) { long check; + bool umount; }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return, self->umount); + change_event(false); self->check = 0; } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); } TEST_F(user, enablement) { diff --git a/tools/testing/selftests/user_events/config b/tools/testing/selftests/user_events/config new file mode 100644 index 000000000000..64f7a9a90cec --- /dev/null +++ b/tools/testing/selftests/user_events/config @@ -0,0 +1 @@ +CONFIG_USER_EVENTS=y diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c index d6979a48478f..a85980190bea 100644 --- a/tools/testing/selftests/user_events/dyn_test.c +++ b/tools/testing/selftests/user_events/dyn_test.c @@ -15,6 +15,7 @@ #include <unistd.h> #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *abi_file = "/sys/kernel/tracing/user_events_data"; const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable"; @@ -143,12 +144,16 @@ do { \ FIXTURE(user) { int check; + bool umount; }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return, self->umount); } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + wait_for_delete(); } @@ -217,6 +222,18 @@ TEST_F(user, matching) { /* Types don't match */ TEST_NMATCH("__test_event u64 a; u64 b", "__test_event u32 a; u32 b"); + + /* Struct name and size matches */ + TEST_MATCH("__test_event struct my_struct a 20", + "__test_event struct my_struct a 20"); + + /* Struct name don't match */ + TEST_NMATCH("__test_event struct my_struct a 20", + "__test_event struct my_struct b 20"); + + /* Struct size don't match */ + TEST_NMATCH("__test_event struct my_struct a 20", + "__test_event struct my_struct a 21"); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c index eb6904d89f14..dcd7509fe2e0 100644 --- a/tools/testing/selftests/user_events/ftrace_test.c +++ b/tools/testing/selftests/user_events/ftrace_test.c @@ -16,6 +16,7 @@ #include <unistd.h> #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *status_file = "/sys/kernel/tracing/user_events_status"; @@ -203,9 +204,12 @@ FIXTURE(user) { int data_fd; int enable_fd; int check; + bool umount; }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return, self->umount); + self->status_fd = open(status_file, O_RDONLY); ASSERT_NE(-1, self->status_fd); @@ -216,6 +220,8 @@ FIXTURE_SETUP(user) { } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + close(self->status_fd); close(self->data_fd); @@ -324,6 +330,10 @@ TEST_F(user, write_events) { io[0].iov_base = ®.write_index; io[0].iov_len = sizeof(reg.write_index); + /* Write should return -EBADF when event is not enabled */ + ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3)); + ASSERT_EQ(EBADF, errno); + /* Enable event */ self->enable_fd = open(enable_file, O_RDWR); ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) @@ -400,6 +410,10 @@ TEST_F(user, write_fault) { ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, ®)); ASSERT_EQ(0, reg.write_index); + /* Enable event */ + self->enable_fd = open(enable_file, O_RDWR); + ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1"))) + /* Write should work normally */ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2)); diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 8b09be566fa2..5288e768b207 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -17,6 +17,7 @@ #include <asm/unistd.h> #include "../kselftest_harness.h" +#include "user_events_selftests.h" const char *data_file = "/sys/kernel/tracing/user_events_data"; const char *id_file = "/sys/kernel/tracing/events/user_events/__test_event/id"; @@ -110,14 +111,19 @@ static int clear(int *check) FIXTURE(user) { int data_fd; int check; + bool umount; }; FIXTURE_SETUP(user) { + USER_EVENT_FIXTURE_SETUP(return, self->umount); + self->data_fd = open(data_file, O_RDWR); ASSERT_NE(-1, self->data_fd); } FIXTURE_TEARDOWN(user) { + USER_EVENT_FIXTURE_TEARDOWN(self->umount); + close(self->data_fd); if (clear(&self->check) != 0) diff --git a/tools/testing/selftests/user_events/user_events_selftests.h b/tools/testing/selftests/user_events/user_events_selftests.h new file mode 100644 index 000000000000..e1c3c063c031 --- /dev/null +++ b/tools/testing/selftests/user_events/user_events_selftests.h @@ -0,0 +1,114 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _USER_EVENTS_SELFTESTS_H +#define _USER_EVENTS_SELFTESTS_H + +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <unistd.h> +#include <errno.h> + +#include "../kselftest.h" + +static inline void tracefs_unmount(void) +{ + umount("/sys/kernel/tracing"); +} + +static inline bool tracefs_enabled(char **message, bool *fail, bool *umount) +{ + struct stat buf; + int ret; + + *message = ""; + *fail = false; + *umount = false; + + /* Ensure tracefs is installed */ + ret = stat("/sys/kernel/tracing", &buf); + + if (ret == -1) { + *message = "Tracefs is not installed"; + return false; + } + + /* Ensure mounted tracefs */ + ret = stat("/sys/kernel/tracing/README", &buf); + + if (ret == -1 && errno == ENOENT) { + if (mount(NULL, "/sys/kernel/tracing", "tracefs", 0, NULL) != 0) { + *message = "Cannot mount tracefs"; + *fail = true; + return false; + } + + *umount = true; + + ret = stat("/sys/kernel/tracing/README", &buf); + } + + if (ret == -1) { + *message = "Cannot access tracefs"; + *fail = true; + return false; + } + + return true; +} + +static inline bool user_events_enabled(char **message, bool *fail, bool *umount) +{ + struct stat buf; + int ret; + + *message = ""; + *fail = false; + *umount = false; + + if (getuid() != 0) { + *message = "Must be run as root"; + *fail = true; + return false; + } + + if (!tracefs_enabled(message, fail, umount)) + return false; + + /* Ensure user_events is installed */ + ret = stat("/sys/kernel/tracing/user_events_data", &buf); + + if (ret == -1) { + switch (errno) { + case ENOENT: + *message = "user_events is not installed"; + return false; + + default: + *message = "Cannot access user_events_data"; + *fail = true; + return false; + } + } + + return true; +} + +#define USER_EVENT_FIXTURE_SETUP(statement, umount) do { \ + char *message; \ + bool fail; \ + if (!user_events_enabled(&message, &fail, &(umount))) { \ + if (fail) { \ + TH_LOG("Setup failed due to: %s", message); \ + ASSERT_FALSE(fail); \ + } \ + SKIP(statement, "Skipping due to: %s", message); \ + } \ +} while (0) + +#define USER_EVENT_FIXTURE_TEARDOWN(umount) do { \ + if ((umount)) \ + tracefs_unmount(); \ +} while (0) + +#endif /* _USER_EVENTS_SELFTESTS_H */ diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c index 15dcee16ff72..38d46a8bf7cb 100644 --- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c +++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c @@ -84,12 +84,12 @@ static inline int vdso_test_clock(unsigned int clock_id) int main(int argc, char **argv) { - int ret; + int ret = 0; #if _POSIX_TIMERS > 0 #ifdef CLOCK_REALTIME - ret = vdso_test_clock(CLOCK_REALTIME); + ret += vdso_test_clock(CLOCK_REALTIME); #endif #ifdef CLOCK_BOOTTIME diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 69c7796c7ca9..405ff262ca93 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -514,10 +514,32 @@ n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter' n1 ping -W 1 -c 1 192.168.241.2 [[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]] -ip1 link del veth1 -ip1 link del veth3 -ip1 link del wg0 -ip2 link del wg0 +ip1 link del dev veth3 +ip1 link del dev wg0 +ip2 link del dev wg0 + +# Make sure persistent keep alives are sent when an adapter comes up +ip1 link add dev wg0 type wireguard +n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1 +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +ip1 link set dev wg0 up +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -gt 0 ]] +ip1 link del dev wg0 +# This should also happen even if the private key is set later +ip1 link add dev wg0 type wireguard +n1 wg set wg0 peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1 +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +ip1 link set dev wg0 up +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -eq 0 ]] +n1 wg set wg0 private-key <(echo "$key1") +read _ _ tx_bytes < <(n1 wg show wg0 transfer) +[[ $tx_bytes -gt 0 ]] +ip1 link del dev veth1 +ip1 link del dev wg0 # We test that Netlink/IPC is working properly by doing things that usually cause split responses ip0 link add dev wg0 type wireguard diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index 6327c9c400e0..507555714b1d 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -41,7 +41,6 @@ CONFIG_KALLSYMS=y CONFIG_BUG=y CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y CONFIG_JUMP_LABEL=y -CONFIG_EMBEDDED=n CONFIG_BASE_FULL=y CONFIG_FUTEX=y CONFIG_SHMEM=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 598135d3162b..7e8c937627dd 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header amx lam + corrupt_xstate_header amx lam test_shadow_stack # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c new file mode 100644 index 000000000000..757e6527f67e --- /dev/null +++ b/tools/testing/selftests/x86/test_shadow_stack.c @@ -0,0 +1,884 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This program test's basic kernel shadow stack support. It enables shadow + * stack manual via the arch_prctl(), instead of relying on glibc. It's + * Makefile doesn't compile with shadow stack support, so it doesn't rely on + * any particular glibc. As a result it can't do any operations that require + * special glibc shadow stack support (longjmp(), swapcontext(), etc). Just + * stick to the basics and hope the compiler doesn't do anything strange. + */ + +#define _GNU_SOURCE + +#include <sys/syscall.h> +#include <asm/mman.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdbool.h> +#include <x86intrin.h> +#include <asm/prctl.h> +#include <sys/prctl.h> +#include <stdint.h> +#include <signal.h> +#include <pthread.h> +#include <sys/ioctl.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <sys/ptrace.h> +#include <sys/signal.h> +#include <linux/elf.h> + +/* + * Define the ABI defines if needed, so people can run the tests + * without building the headers. + */ +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 + +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) + +#define ARCH_SHSTK_ENABLE 0x5001 +#define ARCH_SHSTK_DISABLE 0x5002 +#define ARCH_SHSTK_LOCK 0x5003 +#define ARCH_SHSTK_UNLOCK 0x5004 +#define ARCH_SHSTK_STATUS 0x5005 + +#define ARCH_SHSTK_SHSTK (1ULL << 0) +#define ARCH_SHSTK_WRSS (1ULL << 1) + +#define NT_X86_SHSTK 0x204 +#endif + +#define SS_SIZE 0x200000 +#define PAGE_SIZE 0x1000 + +#if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 5) +int main(int argc, char *argv[]) +{ + printf("[SKIP]\tCompiler does not support CET.\n"); + return 0; +} +#else +void write_shstk(unsigned long *addr, unsigned long val) +{ + asm volatile("wrssq %[val], (%[addr])\n" + : "=m" (addr) + : [addr] "r" (addr), [val] "r" (val)); +} + +static inline unsigned long __attribute__((always_inline)) get_ssp(void) +{ + unsigned long ret = 0; + + asm volatile("xor %0, %0; rdsspq %0" : "=r" (ret)); + return ret; +} + +/* + * For use in inline enablement of shadow stack. + * + * The program can't return from the point where shadow stack gets enabled + * because there will be no address on the shadow stack. So it can't use + * syscall() for enablement, since it is a function. + * + * Based on code from nolibc.h. Keep a copy here because this can't pull in all + * of nolibc.h. + */ +#define ARCH_PRCTL(arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = __NR_arch_prctl; \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a"(_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +void *create_shstk(void *addr) +{ + return (void *)syscall(__NR_map_shadow_stack, addr, SS_SIZE, SHADOW_STACK_SET_TOKEN); +} + +void *create_normal_mem(void *addr) +{ + return mmap(addr, SS_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); +} + +void free_shstk(void *shstk) +{ + munmap(shstk, SS_SIZE); +} + +int reset_shstk(void *shstk) +{ + return madvise(shstk, SS_SIZE, MADV_DONTNEED); +} + +void try_shstk(unsigned long new_ssp) +{ + unsigned long ssp; + + printf("[INFO]\tnew_ssp = %lx, *new_ssp = %lx\n", + new_ssp, *((unsigned long *)new_ssp)); + + ssp = get_ssp(); + printf("[INFO]\tchanging ssp from %lx to %lx\n", ssp, new_ssp); + + asm volatile("rstorssp (%0)\n":: "r" (new_ssp)); + asm volatile("saveprevssp"); + printf("[INFO]\tssp is now %lx\n", get_ssp()); + + /* Switch back to original shadow stack */ + ssp -= 8; + asm volatile("rstorssp (%0)\n":: "r" (ssp)); + asm volatile("saveprevssp"); +} + +int test_shstk_pivot(void) +{ + void *shstk = create_shstk(0); + + if (shstk == MAP_FAILED) { + printf("[FAIL]\tError creating shadow stack: %d\n", errno); + return 1; + } + try_shstk((unsigned long)shstk + SS_SIZE - 8); + free_shstk(shstk); + + printf("[OK]\tShadow stack pivot\n"); + return 0; +} + +int test_shstk_faults(void) +{ + unsigned long *shstk = create_shstk(0); + + /* Read shadow stack, test if it's zero to not get read optimized out */ + if (*shstk != 0) + goto err; + + /* Wrss memory that was already read. */ + write_shstk(shstk, 1); + if (*shstk != 1) + goto err; + + /* Page out memory, so we can wrss it again. */ + if (reset_shstk((void *)shstk)) + goto err; + + write_shstk(shstk, 1); + if (*shstk != 1) + goto err; + + printf("[OK]\tShadow stack faults\n"); + return 0; + +err: + return 1; +} + +unsigned long saved_ssp; +unsigned long saved_ssp_val; +volatile bool segv_triggered; + +void __attribute__((noinline)) violate_ss(void) +{ + saved_ssp = get_ssp(); + saved_ssp_val = *(unsigned long *)saved_ssp; + + /* Corrupt shadow stack */ + printf("[INFO]\tCorrupting shadow stack\n"); + write_shstk((void *)saved_ssp, 0); +} + +void segv_handler(int signum, siginfo_t *si, void *uc) +{ + printf("[INFO]\tGenerated shadow stack violation successfully\n"); + + segv_triggered = true; + + /* Fix shadow stack */ + write_shstk((void *)saved_ssp, saved_ssp_val); +} + +int test_shstk_violation(void) +{ + struct sigaction sa = {}; + + sa.sa_sigaction = segv_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + /* Make sure segv_triggered is set before violate_ss() */ + asm volatile("" : : : "memory"); + + violate_ss(); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tShadow stack violation test\n"); + + return !segv_triggered; +} + +/* Gup test state */ +#define MAGIC_VAL 0x12345678 +bool is_shstk_access; +void *shstk_ptr; +int fd; + +void reset_test_shstk(void *addr) +{ + if (shstk_ptr) + free_shstk(shstk_ptr); + shstk_ptr = create_shstk(addr); +} + +void test_access_fix_handler(int signum, siginfo_t *si, void *uc) +{ + printf("[INFO]\tViolation from %s\n", is_shstk_access ? "shstk access" : "normal write"); + + segv_triggered = true; + + /* Fix shadow stack */ + if (is_shstk_access) { + reset_test_shstk(shstk_ptr); + return; + } + + free_shstk(shstk_ptr); + create_normal_mem(shstk_ptr); +} + +bool test_shstk_access(void *ptr) +{ + is_shstk_access = true; + segv_triggered = false; + write_shstk(ptr, MAGIC_VAL); + + asm volatile("" : : : "memory"); + + return segv_triggered; +} + +bool test_write_access(void *ptr) +{ + is_shstk_access = false; + segv_triggered = false; + *(unsigned long *)ptr = MAGIC_VAL; + + asm volatile("" : : : "memory"); + + return segv_triggered; +} + +bool gup_write(void *ptr) +{ + unsigned long val; + + lseek(fd, (unsigned long)ptr, SEEK_SET); + if (write(fd, &val, sizeof(val)) < 0) + return 1; + + return 0; +} + +bool gup_read(void *ptr) +{ + unsigned long val; + + lseek(fd, (unsigned long)ptr, SEEK_SET); + if (read(fd, &val, sizeof(val)) < 0) + return 1; + + return 0; +} + +int test_gup(void) +{ + struct sigaction sa = {}; + int status; + pid_t pid; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + fd = open("/proc/self/mem", O_RDWR); + if (fd == -1) + return 1; + + reset_test_shstk(0); + if (gup_read(shstk_ptr)) + return 1; + if (test_shstk_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup read -> shstk access success\n"); + + reset_test_shstk(0); + if (gup_write(shstk_ptr)) + return 1; + if (test_shstk_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup write -> shstk access success\n"); + + reset_test_shstk(0); + if (gup_read(shstk_ptr)) + return 1; + if (!test_write_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup read -> write access success\n"); + + reset_test_shstk(0); + if (gup_write(shstk_ptr)) + return 1; + if (!test_write_access(shstk_ptr)) + return 1; + printf("[INFO]\tGup write -> write access success\n"); + + close(fd); + + /* COW/gup test */ + reset_test_shstk(0); + pid = fork(); + if (!pid) { + fd = open("/proc/self/mem", O_RDWR); + if (fd == -1) + exit(1); + + if (gup_write(shstk_ptr)) { + close(fd); + exit(1); + } + close(fd); + exit(0); + } + waitpid(pid, &status, 0); + if (WEXITSTATUS(status)) { + printf("[FAIL]\tWrite in child failed\n"); + return 1; + } + if (*(unsigned long *)shstk_ptr == MAGIC_VAL) { + printf("[FAIL]\tWrite in child wrote through to shared memory\n"); + return 1; + } + + printf("[INFO]\tCow gup write -> write access success\n"); + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tShadow gup test\n"); + + return 0; +} + +int test_mprotect(void) +{ + struct sigaction sa = {}; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + segv_triggered = false; + + /* mprotect a shadow stack as read only */ + reset_test_shstk(0); + if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_READ) failed\n"); + return 1; + } + + /* try to wrss it and fail */ + if (!test_shstk_access(shstk_ptr)) { + printf("[FAIL]\tShadow stack access to read-only memory succeeded\n"); + return 1; + } + + /* + * The shadow stack was reset above to resolve the fault, make the new one + * read-only. + */ + if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_READ) failed\n"); + return 1; + } + + /* then back to writable */ + if (mprotect(shstk_ptr, SS_SIZE, PROT_WRITE | PROT_READ) < 0) { + printf("[FAIL]\tmprotect(PROT_WRITE) failed\n"); + return 1; + } + + /* then wrss to it and succeed */ + if (test_shstk_access(shstk_ptr)) { + printf("[FAIL]\tShadow stack access to mprotect() writable memory failed\n"); + return 1; + } + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + printf("[OK]\tmprotect() test\n"); + + return 0; +} + +char zero[4096]; + +static void *uffd_thread(void *arg) +{ + struct uffdio_copy req; + int uffd = *(int *)arg; + struct uffd_msg msg; + int ret; + + while (1) { + ret = read(uffd, &msg, sizeof(msg)); + if (ret > 0) + break; + else if (errno == EAGAIN) + continue; + return (void *)1; + } + + req.dst = msg.arg.pagefault.address; + req.src = (__u64)zero; + req.len = 4096; + req.mode = 0; + + if (ioctl(uffd, UFFDIO_COPY, &req)) + return (void *)1; + + return (void *)0; +} + +int test_userfaultfd(void) +{ + struct uffdio_register uffdio_register; + struct uffdio_api uffdio_api; + struct sigaction sa = {}; + pthread_t thread; + void *res; + int uffd; + + sa.sa_sigaction = test_access_fix_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); + if (uffd < 0) { + printf("[SKIP]\tUserfaultfd unavailable.\n"); + return 0; + } + + reset_test_shstk(0); + + uffdio_api.api = UFFD_API; + uffdio_api.features = 0; + if (ioctl(uffd, UFFDIO_API, &uffdio_api)) + goto err; + + uffdio_register.range.start = (__u64)shstk_ptr; + uffdio_register.range.len = 4096; + uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) + goto err; + + if (pthread_create(&thread, NULL, &uffd_thread, &uffd)) + goto err; + + reset_shstk(shstk_ptr); + test_shstk_access(shstk_ptr); + + if (pthread_join(thread, &res)) + goto err; + + if (test_shstk_access(shstk_ptr)) + goto err; + + free_shstk(shstk_ptr); + + signal(SIGSEGV, SIG_DFL); + + if (!res) + printf("[OK]\tUserfaultfd test\n"); + return !!res; +err: + free_shstk(shstk_ptr); + close(uffd); + signal(SIGSEGV, SIG_DFL); + return 1; +} + +/* Simple linked list for keeping track of mappings in test_guard_gap() */ +struct node { + struct node *next; + void *mapping; +}; + +/* + * This tests whether mmap will place other mappings in a shadow stack's guard + * gap. The steps are: + * 1. Finds an empty place by mapping and unmapping something. + * 2. Map a shadow stack in the middle of the known empty area. + * 3. Map a bunch of PAGE_SIZE mappings. These will use the search down + * direction, filling any gaps until it encounters the shadow stack's + * guard gap. + * 4. When a mapping lands below the shadow stack from step 2, then all + * of the above gaps are filled. The search down algorithm will have + * looked at the shadow stack gaps. + * 5. See if it landed in the gap. + */ +int test_guard_gap(void) +{ + void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF; + struct node *head = NULL, *cur; + + free_area = mmap(0, SS_SIZE * 3, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + munmap(free_area, SS_SIZE * 3); + + shstk = create_shstk(free_area + SS_SIZE); + if (shstk == MAP_FAILED) + return 1; + + while (test_map > shstk) { + test_map = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (test_map == MAP_FAILED) + return 1; + cur = malloc(sizeof(*cur)); + cur->mapping = test_map; + + cur->next = head; + head = cur; + } + + while (head) { + cur = head; + head = cur->next; + munmap(cur->mapping, PAGE_SIZE); + free(cur); + } + + free_shstk(shstk); + + if (shstk - test_map - PAGE_SIZE != PAGE_SIZE) + return 1; + + printf("[OK]\tGuard gap test\n"); + + return 0; +} + +/* + * Too complicated to pull it out of the 32 bit header, but also get the + * 64 bit one needed above. Just define a copy here. + */ +#define __NR_compat_sigaction 67 + +/* + * Call 32 bit signal handler to get 32 bit signals ABI. Make sure + * to push the registers that will get clobbered. + */ +int sigaction32(int signum, const struct sigaction *restrict act, + struct sigaction *restrict oldact) +{ + register long syscall_reg asm("eax") = __NR_compat_sigaction; + register long signum_reg asm("ebx") = signum; + register long act_reg asm("ecx") = (long)act; + register long oldact_reg asm("edx") = (long)oldact; + int ret = 0; + + asm volatile ("int $0x80;" + : "=a"(ret), "=m"(oldact) + : "r"(syscall_reg), "r"(signum_reg), "r"(act_reg), + "r"(oldact_reg) + : "r8", "r9", "r10", "r11" + ); + + return ret; +} + +sigjmp_buf jmp_buffer; + +void segv_gp_handler(int signum, siginfo_t *si, void *uc) +{ + segv_triggered = true; + + /* + * To work with old glibc, this can't rely on siglongjmp working with + * shadow stack enabled, so disable shadow stack before siglongjmp(). + */ + ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK); + siglongjmp(jmp_buffer, -1); +} + +/* + * Transition to 32 bit mode and check that a #GP triggers a segfault. + */ +int test_32bit(void) +{ + struct sigaction sa = {}; + struct sigaction *sa32; + + /* Create sigaction in 32 bit address range */ + sa32 = mmap(0, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + sa32->sa_flags = SA_SIGINFO; + + sa.sa_sigaction = segv_gp_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + + segv_triggered = false; + + /* Make sure segv_triggered is set before triggering the #GP */ + asm volatile("" : : : "memory"); + + /* + * Set handler to somewhere in 32 bit address space + */ + sa32->sa_handler = (void *)sa32; + if (sigaction32(SIGUSR1, sa32, NULL)) + return 1; + + if (!sigsetjmp(jmp_buffer, 1)) + raise(SIGUSR1); + + if (segv_triggered) + printf("[OK]\t32 bit test\n"); + + return !segv_triggered; +} + +void segv_handler_ptrace(int signum, siginfo_t *si, void *uc) +{ + /* The SSP adjustment caused a segfault. */ + exit(0); +} + +int test_ptrace(void) +{ + unsigned long saved_ssp, ssp = 0; + struct sigaction sa= {}; + struct iovec iov; + int status; + int pid; + + iov.iov_base = &ssp; + iov.iov_len = sizeof(ssp); + + pid = fork(); + if (!pid) { + ssp = get_ssp(); + + sa.sa_sigaction = segv_handler_ptrace; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGSEGV, &sa, NULL)) + return 1; + + ptrace(PTRACE_TRACEME, NULL, NULL, NULL); + /* + * The parent will tweak the SSP and return from this function + * will #CP. + */ + raise(SIGTRAP); + + exit(1); + } + + while (waitpid(pid, &status, 0) != -1 && WSTOPSIG(status) != SIGTRAP); + + if (ptrace(PTRACE_GETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tFailed to PTRACE_GETREGS\n"); + goto out_kill; + } + + if (!ssp) { + printf("[INFO]\tPtrace child SSP was 0\n"); + goto out_kill; + } + + saved_ssp = ssp; + + iov.iov_len = 0; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tToo small size accepted via PTRACE_SETREGS\n"); + goto out_kill; + } + + iov.iov_len = sizeof(ssp) + 1; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tToo large size accepted via PTRACE_SETREGS\n"); + goto out_kill; + } + + ssp += 1; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tUnaligned SSP written via PTRACE_SETREGS\n"); + goto out_kill; + } + + ssp = 0xFFFFFFFFFFFF0000; + if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tKernel range SSP written via PTRACE_SETREGS\n"); + goto out_kill; + } + + /* + * Tweak the SSP so the child with #CP when it resumes and returns + * from raise() + */ + ssp = saved_ssp + 8; + iov.iov_len = sizeof(ssp); + if (ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) { + printf("[INFO]\tFailed to PTRACE_SETREGS\n"); + goto out_kill; + } + + if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) { + printf("[INFO]\tFailed to PTRACE_DETACH\n"); + goto out_kill; + } + + waitpid(pid, &status, 0); + if (WEXITSTATUS(status)) + return 1; + + printf("[OK]\tPtrace test\n"); + return 0; + +out_kill: + kill(pid, SIGKILL); + return 1; +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + printf("[SKIP]\tCould not enable Shadow stack\n"); + return 1; + } + + if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) { + ret = 1; + printf("[FAIL]\tDisabling shadow stack failed\n"); + } + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) { + printf("[SKIP]\tCould not re-enable Shadow stack\n"); + return 1; + } + + if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS)) { + printf("[SKIP]\tCould not enable WRSS\n"); + ret = 1; + goto out; + } + + /* Should have succeeded if here, but this is a test, so double check. */ + if (!get_ssp()) { + printf("[FAIL]\tShadow stack disabled\n"); + return 1; + } + + if (test_shstk_pivot()) { + ret = 1; + printf("[FAIL]\tShadow stack pivot\n"); + goto out; + } + + if (test_shstk_faults()) { + ret = 1; + printf("[FAIL]\tShadow stack fault test\n"); + goto out; + } + + if (test_shstk_violation()) { + ret = 1; + printf("[FAIL]\tShadow stack violation test\n"); + goto out; + } + + if (test_gup()) { + ret = 1; + printf("[FAIL]\tShadow shadow stack gup\n"); + goto out; + } + + if (test_mprotect()) { + ret = 1; + printf("[FAIL]\tShadow shadow mprotect test\n"); + goto out; + } + + if (test_userfaultfd()) { + ret = 1; + printf("[FAIL]\tUserfaultfd test\n"); + goto out; + } + + if (test_guard_gap()) { + ret = 1; + printf("[FAIL]\tGuard gap test\n"); + goto out; + } + + if (test_ptrace()) { + ret = 1; + printf("[FAIL]\tptrace test\n"); + } + + if (test_32bit()) { + ret = 1; + printf("[FAIL]\t32 bit test\n"); + goto out; + } + + return ret; + +out: + /* + * Disable shadow stack before the function returns, or there will be a + * shadow stack violation. + */ + if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) { + ret = 1; + printf("[FAIL]\tDisabling shadow stack failed\n"); + } + + return ret; +} +#endif diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 43a254f0e14d..21a98ba565ab 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -8,5 +8,5 @@ vsock_perf: vsock_perf.o CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE .PHONY: all test clean clean: - ${RM} *.o *.d vsock_test vsock_diag_test + ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf -include *.d diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index ac1bd3ac1533..90718c2fd4ea 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -255,35 +255,142 @@ static void test_stream_multiconn_server(const struct test_opts *opts) close(fds[i]); } -static void test_stream_msg_peek_client(const struct test_opts *opts) +#define MSG_PEEK_BUF_LEN 64 + +static void test_msg_peek_client(const struct test_opts *opts, + bool seqpacket) { + unsigned char buf[MSG_PEEK_BUF_LEN]; + ssize_t send_size; int fd; + int i; + + if (seqpacket) + fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + else + fd = vsock_stream_connect(opts->peer_cid, 1234); - fd = vsock_stream_connect(opts->peer_cid, 1234); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); } - send_byte(fd, 1, 0); + for (i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + control_expectln("SRVREADY"); + + send_size = send(fd, buf, sizeof(buf), 0); + + if (send_size < 0) { + perror("send"); + exit(EXIT_FAILURE); + } + + if (send_size != sizeof(buf)) { + fprintf(stderr, "Invalid send size %zi\n", send_size); + exit(EXIT_FAILURE); + } + close(fd); } -static void test_stream_msg_peek_server(const struct test_opts *opts) +static void test_msg_peek_server(const struct test_opts *opts, + bool seqpacket) { + unsigned char buf_half[MSG_PEEK_BUF_LEN / 2]; + unsigned char buf_normal[MSG_PEEK_BUF_LEN]; + unsigned char buf_peek[MSG_PEEK_BUF_LEN]; + ssize_t res; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (seqpacket) + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + else + fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); } - recv_byte(fd, 1, MSG_PEEK); - recv_byte(fd, 1, 0); + /* Peek from empty socket. */ + res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK | MSG_DONTWAIT); + if (res != -1) { + fprintf(stderr, "expected recv(2) failure, got %zi\n", res); + exit(EXIT_FAILURE); + } + + if (errno != EAGAIN) { + perror("EAGAIN expected"); + exit(EXIT_FAILURE); + } + + control_writeln("SRVREADY"); + + /* Peek part of data. */ + res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK); + if (res != sizeof(buf_half)) { + fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n", + sizeof(buf_half), res); + exit(EXIT_FAILURE); + } + + /* Peek whole data. */ + res = recv(fd, buf_peek, sizeof(buf_peek), MSG_PEEK); + if (res != sizeof(buf_peek)) { + fprintf(stderr, "recv(2) + MSG_PEEK, expected %zu, got %zi\n", + sizeof(buf_peek), res); + exit(EXIT_FAILURE); + } + + /* Compare partial and full peek. */ + if (memcmp(buf_half, buf_peek, sizeof(buf_half))) { + fprintf(stderr, "Partial peek data mismatch\n"); + exit(EXIT_FAILURE); + } + + if (seqpacket) { + /* This type of socket supports MSG_TRUNC flag, + * so check it with MSG_PEEK. We must get length + * of the message. + */ + res = recv(fd, buf_half, sizeof(buf_half), MSG_PEEK | + MSG_TRUNC); + if (res != sizeof(buf_peek)) { + fprintf(stderr, + "recv(2) + MSG_PEEK | MSG_TRUNC, exp %zu, got %zi\n", + sizeof(buf_half), res); + exit(EXIT_FAILURE); + } + } + + res = recv(fd, buf_normal, sizeof(buf_normal), 0); + if (res != sizeof(buf_normal)) { + fprintf(stderr, "recv(2), expected %zu, got %zi\n", + sizeof(buf_normal), res); + exit(EXIT_FAILURE); + } + + /* Compare full peek and normal read. */ + if (memcmp(buf_peek, buf_normal, sizeof(buf_peek))) { + fprintf(stderr, "Full peek data mismatch\n"); + exit(EXIT_FAILURE); + } + close(fd); } +static void test_stream_msg_peek_client(const struct test_opts *opts) +{ + return test_msg_peek_client(opts, false); +} + +static void test_stream_msg_peek_server(const struct test_opts *opts) +{ + return test_msg_peek_server(opts, false); +} + #define SOCK_BUF_SIZE (2 * 1024 * 1024) #define MAX_MSG_SIZE (32 * 1024) @@ -1053,6 +1160,16 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) close(fd); } +static void test_seqpacket_msg_peek_client(const struct test_opts *opts) +{ + return test_msg_peek_client(opts, true); +} + +static void test_seqpacket_msg_peek_server(const struct test_opts *opts) +{ + return test_msg_peek_server(opts, true); +} + static struct test_case test_cases[] = { { .name = "SOCK_STREAM connection reset", @@ -1128,6 +1245,11 @@ static struct test_case test_cases[] = { .run_client = test_stream_virtio_skb_merge_client, .run_server = test_stream_virtio_skb_merge_server, }, + { + .name = "SOCK_SEQPACKET MSG_PEEK", + .run_client = test_seqpacket_msg_peek_client, + .run_server = test_seqpacket_msg_peek_server, + }, {}, }; |
