diff options
| author | Ankur Arora <ankur.a.arora@oracle.com> | 2025-09-17 08:24:11 -0700 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2025-09-19 12:43:59 -0300 |
| commit | a8f0992998af9ea1135ee6415c68c1d84cb5ad22 (patch) | |
| tree | da51f8393ca48c82b9f5bf0a80cc2ba0d69c395a | |
| parent | perf bench mem: Refactor mem_options (diff) | |
| download | linux-a8f0992998af9ea1135ee6415c68c1d84cb5ad22.tar.gz linux-a8f0992998af9ea1135ee6415c68c1d84cb5ad22.zip | |
perf bench mem: Add mmap() workloads
Add two mmap() workloads: one that eagerly populates a region and
another that demand faults it in.
The intent is to probe the memory subsytem performance incurred
by mmap().
$ perf bench mem mmap -s 4gb -p 4kb -l 10 -f populate
# Running 'mem/mmap' benchmark:
# function 'populate' (Eagerly populated map())
# Copying 4gb bytes ...
1.811691 GB/sec
$ perf bench mem mmap -s 4gb -p 2mb -l 10 -f populate
# Running 'mem/mmap' benchmark:
# function 'populate' (Eagerly populated mmap())
# Copying 4gb bytes ...
12.272017 GB/sec
$ perf bench mem mmap -s 4gb -p 1gb -l 10 -f populate
# Running 'mem/mmap' benchmark:
# function 'populate' (Eagerly populated mmap())
# Copying 4gb bytes ...
17.085927 GB/sec
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Mateusz Guzik <mjguzik@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Raghavendra K T <raghavendra.kt@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
| -rw-r--r-- | tools/perf/Documentation/perf-bench.txt | 34 | ||||
| -rw-r--r-- | tools/perf/bench/bench.h | 1 | ||||
| -rw-r--r-- | tools/perf/bench/mem-functions.c | 96 | ||||
| -rw-r--r-- | tools/perf/builtin-bench.c | 1 |
4 files changed, 132 insertions, 0 deletions
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index 3d1455d880c3..1160224cb718 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -240,6 +240,40 @@ Repeat memset invocation this number of times. --cycles:: Use perf's cpu-cycles event instead of gettimeofday syscall. +*mmap*:: +Suite for evaluating memory subsystem performance for mmap()'d memory. + +Options of *mmap* +^^^^^^^^^^^^^^^^^ +-s:: +--size:: +Specify size of memory to set (default: 1MB). +Available units are B, KB, MB, GB and TB (case insensitive). + +-p:: +--page:: +Specify page-size for mapping memory buffers (default: 4KB). +Available values are 4KB, 2MB, 1GB (case insensitive). + +-r:: +--randomize:: +Specify seed to randomize page access offset (default: 0, or not randomized). + +-f:: +--function:: +Specify function to set (default: all). +Available functions are 'demand' and 'populate', with the first +demand faulting pages in the region and the second using an eager +mapping. + +-l:: +--nr_loops:: +Repeat mmap() invocation this number of times. + +-c:: +--cycles:: +Use perf's cpu-cycles event instead of gettimeofday syscall. + SUITES FOR 'numa' ~~~~~~~~~~~~~~~~~ *mem*:: diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 9f736423af53..8519eb5a42fa 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -28,6 +28,7 @@ int bench_syscall_fork(int argc, const char **argv); int bench_syscall_execve(int argc, const char **argv); int bench_mem_memcpy(int argc, const char **argv); int bench_mem_memset(int argc, const char **argv); +int bench_mem_mmap(int argc, const char **argv); int bench_mem_find_bit(int argc, const char **argv); int bench_futex_hash(int argc, const char **argv); int bench_futex_wake(int argc, const char **argv); diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c index 2a23bed8c2d3..2908a3a796c9 100644 --- a/tools/perf/bench/mem-functions.c +++ b/tools/perf/bench/mem-functions.c @@ -40,6 +40,7 @@ static const char *chunk_size_str = "0"; static unsigned int nr_loops = 1; static bool use_cycles; static int cycles_fd; +static unsigned int seed; static const struct option bench_common_options[] = { OPT_STRING('s', "size", &size_str, "1MB", @@ -81,6 +82,7 @@ struct bench_params { size_t chunk_size; unsigned int nr_loops; unsigned int page_shift; + unsigned int seed; }; struct bench_mem_info { @@ -98,6 +100,7 @@ typedef void (*mem_fini_t)(struct bench_mem_info *, struct bench_params *, void **, void **); typedef void *(*memcpy_t)(void *, const void *, size_t); typedef void *(*memset_t)(void *, int, size_t); +typedef void (*mmap_op_t)(void *, size_t, unsigned int, bool); struct function { const char *name; @@ -108,6 +111,7 @@ struct function { union { memcpy_t memcpy; memset_t memset; + mmap_op_t mmap_op; }; } fn; }; @@ -160,6 +164,14 @@ static union bench_clock clock_diff(union bench_clock *s, union bench_clock *e) return t; } +static void clock_accum(union bench_clock *a, union bench_clock *b) +{ + if (use_cycles) + a->cycles += b->cycles; + else + timeradd(&a->tv, &b->tv, &a->tv); +} + static double timeval2double(struct timeval *ts) { return (double)ts->tv_sec + (double)ts->tv_usec / (double)USEC_PER_SEC; @@ -271,6 +283,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info * } p.page_shift = ilog2(page_size); + p.seed = seed; + if (!strncmp(function_str, "all", 3)) { for (i = 0; info->functions[i].name; i++) __bench_mem_function(info, &p, i); @@ -465,3 +479,85 @@ int bench_mem_memset(int argc, const char **argv) return bench_mem_common(argc, argv, &info); } + +static void mmap_page_touch(void *dst, size_t size, unsigned int page_shift, bool random) +{ + unsigned long npages = size / (1 << page_shift); + unsigned long offset = 0, r = 0; + + for (unsigned long i = 0; i < npages; i++) { + if (random) + r = rand() % (1 << page_shift); + + *((char *)dst + offset + r) = *(char *)(dst + offset + r) + i; + offset += 1 << page_shift; + } +} + +static int do_mmap(const struct function *r, struct bench_params *p, + void *src __maybe_unused, void *dst __maybe_unused, + union bench_clock *accum) +{ + union bench_clock start, end, diff; + mmap_op_t fn = r->fn.mmap_op; + bool populate = strcmp(r->name, "populate") == 0; + + if (p->seed) + srand(p->seed); + + for (unsigned int i = 0; i < p->nr_loops; i++) { + clock_get(&start); + dst = bench_mmap(p->size, populate, p->page_shift); + if (!dst) + goto out; + + fn(dst, p->size, p->page_shift, p->seed); + clock_get(&end); + diff = clock_diff(&start, &end); + clock_accum(accum, &diff); + + bench_munmap(dst, p->size); + } + + return 0; +out: + printf("# Memory allocation failed - maybe size (%s) %s?\n", size_str, + p->page_shift != PAGE_SHIFT_4KB ? "has insufficient hugepages" : "is too large"); + return -1; +} + +static const char * const bench_mem_mmap_usage[] = { + "perf bench mem mmap <options>", + NULL +}; + +static const struct function mmap_functions[] = { + { .name = "demand", + .desc = "Demand loaded mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = "populate", + .desc = "Eagerly populated mmap()", + .fn.mmap_op = mmap_page_touch }, + + { .name = NULL, } +}; + +int bench_mem_mmap(int argc, const char **argv) +{ + static const struct option bench_mmap_options[] = { + OPT_UINTEGER('r', "randomize", &seed, + "Seed to randomize page access offset."), + OPT_PARENT(bench_common_options), + OPT_END() + }; + + struct bench_mem_info info = { + .functions = mmap_functions, + .do_op = do_mmap, + .usage = bench_mem_mmap_usage, + .options = bench_mmap_options, + }; + + return bench_mem_common(argc, argv, &info); +} diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 2c1a9f3d847a..02dea1b88228 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -65,6 +65,7 @@ static struct bench mem_benchmarks[] = { { "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy }, { "memset", "Benchmark for memset() functions", bench_mem_memset }, { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit }, + { "mmap", "Benchmark for mmap() mappings", bench_mem_mmap }, { "all", "Run all memory access benchmarks", NULL }, { NULL, NULL, NULL } }; |
