diff options
| author | Ingo Molnar <mingo@kernel.org> | 2017-12-06 17:20:04 +0100 |
|---|---|---|
| committer | Ingo Molnar <mingo@kernel.org> | 2017-12-06 17:20:04 +0100 |
| commit | c32909741fe93032705e6da29b564b8fec84f415 (patch) | |
| tree | 5f82c3f5f1c573f304feeff7c8c5428200e696dd /tools/perf/util | |
| parent | Merge branch 'perf/urgent' into perf/core, to pick up fixes (diff) | |
| parent | perf tools: Rename 'backward' to 'overwrite' in evlist, mmap and record (diff) | |
| download | linux-c32909741fe93032705e6da29b564b8fec84f415.tar.gz linux-c32909741fe93032705e6da29b564b8fec84f415.zip | |
Merge tag 'perf-core-for-mingo-4.16-20171206' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core
Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo:
- Improve build messages for files needed by Intel-PT, originally copied
from the kernel sources, that drifted from its original (Adrian Hunter)
- Allow computing 'perf stat' style metrics in 'perf script' (Andi Kleen)
- Fix 'perf report -D' output for user metadata events (Arnaldo Carvalho de Melo)
- Add feature test for pthread_barrier_t availability (Arnaldo Carvalho de Melo)
- Allow again using x86's asm.h when building for the 'bpf' clang target,
making some 'perf test' LLVM/BPF entries work again (Arnaldo Carvalho de Melo)
- Use cpumaps in 'perf bench futex', eliminating some code duplication
(Davidlohr Bueso)
- Improve PMU infrastructure to support amp64's ThunderX2 implementation
defined core events (Ganapatrao Kulkarni)
- Add hint about how to add USDT probes for Node.js (Hansuk Hong)
- s/390 needs -fPIC to be incrementally linked or linked to shared
libraries (Hendrik Brueckner)
- Use pthread_barrier to synch 'perf bench futex wake-parallel' waker
threads (James Yang)
- Fix up build in hardened environments, such as Fedora 27 (Jiri Olsa)
- Add a tip about cacheline events in 'perf c2c' (Sangwon Hong)
- Set browser mode right before setup_browser(), because we may have
errors printed before that, which were getting lost (Seokho Song)
- s390x doesn't support PERF_TYPE_BREAKPOINT, so disable 'perf test'
cases 19 and 20 on s390x, that tests that feature (Thomas Richter)
- Fix unnecessary memory allocation for s390x 'perf annotate' objdump
parsing, which could lead to thousands of needless entries in the
instruction handling array (Thomas Richter)
- Fix objdump comment parsing for Intel mov dissassembly (Thomas Richter)
- Clarify usage of 'overwrite' and 'backward' in the evlist/mmap code,
removing the 'overwrite' parameter from several functions as it was
always used it as 'false' (Wang Nan)
- Fix 'perf record' backward recording, it wasn't doing what was
expected: overwriting records when the ring buffer gets full (Wang Nan)
- Use more flexible pattern matching for CPU identification for perf
vendor event's mapfile.csv, removing the need for a new perf binary
for a sligthly different chip revision that shares the same set of
counters (William Cohen)
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'tools/perf/util')
| -rw-r--r-- | tools/perf/util/annotate.c | 8 | ||||
| -rw-r--r-- | tools/perf/util/evlist.c | 53 | ||||
| -rw-r--r-- | tools/perf/util/evlist.h | 8 | ||||
| -rw-r--r-- | tools/perf/util/header.c | 68 | ||||
| -rw-r--r-- | tools/perf/util/header.h | 8 | ||||
| -rw-r--r-- | tools/perf/util/intel-pt-decoder/Build | 24 | ||||
| -rw-r--r-- | tools/perf/util/metricgroup.c | 8 | ||||
| -rw-r--r-- | tools/perf/util/mmap.c | 73 | ||||
| -rw-r--r-- | tools/perf/util/mmap.h | 4 | ||||
| -rw-r--r-- | tools/perf/util/pmu.c | 87 | ||||
| -rw-r--r-- | tools/perf/util/pmu.h | 2 | ||||
| -rw-r--r-- | tools/perf/util/python.c | 2 | ||||
| -rw-r--r-- | tools/perf/util/rblist.c | 19 | ||||
| -rw-r--r-- | tools/perf/util/rblist.h | 1 | ||||
| -rw-r--r-- | tools/perf/util/session.c | 3 | ||||
| -rw-r--r-- | tools/perf/util/stat-shadow.c | 12 | ||||
| -rw-r--r-- | tools/perf/util/thread_map.c | 22 | ||||
| -rw-r--r-- | tools/perf/util/thread_map.h | 1 |
18 files changed, 275 insertions, 128 deletions
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 22ea7936d92f..facad1e279a8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -322,6 +322,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; *addrp = strtoull(comment, &endptr, 16); + if (endptr == comment) + return 0; name = strchr(endptr, '<'); if (name == NULL) return -1; @@ -435,8 +437,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m return 0; comment = ltrim(comment); - comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; @@ -480,7 +482,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops return 0; comment = ltrim(comment); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 199bb82efbcd..3570355bcf39 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); fdarray__exit(&evlist->pollfd); } @@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) { int i; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->backward_mmap[i].fd; + int fd = evlist->overwrite_mmap[i].fd; int err; if (fd < 0) @@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, evlist->overwrite); + return perf_mmap__read_forward(md); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) @@ -738,7 +738,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { - perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); + perf_mmap__consume(&evlist->mmap[idx], false); } static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) @@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) for (i = 0; i < evlist->nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); - if (evlist->backward_mmap) + if (evlist->overwrite_mmap) for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->backward_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i]); } void perf_evlist__munmap(struct perf_evlist *evlist) { perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); } static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) @@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_backward) + int thread, int *_output, int *_output_overwrite) { struct perf_evsel *evsel; int revent; @@ -812,18 +812,20 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, int fd; int cpu; + mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { - output = _output_backward; - maps = evlist->backward_mmap; + output = _output_overwrite; + maps = evlist->overwrite_mmap; if (!maps) { maps = perf_evlist__alloc_mmap(evlist); if (!maps) return -1; - evlist->backward_mmap = maps; + evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } + mp->prot &= ~PROT_WRITE; } if (evsel->system_wide && thread) @@ -884,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_backward)) + thread, &output, &output_overwrite)) goto out_unmap; } } @@ -912,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_backward)) + &output, &output_overwrite)) goto out_unmap; } @@ -1052,15 +1054,18 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - struct mmap_params mp = { - .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), - }; + /* + * Delay setting mp.prot: set it before calling perf_mmap__mmap. + * Its value is decided by evsel's write_backward. + * So &mp should not be passed through const pointer. + */ + struct mmap_params mp; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist); @@ -1070,7 +1075,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = overwrite; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; @@ -1091,10 +1095,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) @@ -1750,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, RESUME, } action = NONE; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return; switch (old_state) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4e8131dacbd7..75160666d305 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -31,7 +31,6 @@ struct perf_evlist { int nr_entries; int nr_groups; int nr_mmaps; - bool overwrite; bool enabled; bool has_user_cpus; size_t mmap_len; @@ -45,7 +44,7 @@ struct perf_evlist { } workload; struct fdarray pollfd; struct perf_mmap *mmap; - struct perf_mmap *backward_mmap; + struct perf_mmap *overwrite_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; @@ -169,10 +168,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite); -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite); +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); size_t perf_evlist__mmap_size(unsigned long pages); diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7c0e9d587bfa..5890e08e0754 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3258,6 +3258,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, return err; } +static bool has_unit(struct perf_evsel *counter) +{ + return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ + return counter->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe) +{ + struct perf_evsel *counter; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(counter)) { + err = perf_event__synthesize_event_update_unit(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(counter)) { + err = perf_event__synthesize_event_update_scale(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel counter.\n"); + return err; + } + } + + if (counter->own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_evlist **pevlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 29ccbfdf8724..317fb901e47f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -9,6 +9,7 @@ #include <linux/types.h> #include "event.h" #include "env.h" +#include "pmu.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -107,6 +108,11 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe); + int perf_event__process_feature(struct perf_tool *tool, union perf_event *event, struct perf_session *session); @@ -166,5 +172,5 @@ int write_padded(struct feat_fd *fd, const void *bf, */ int get_cpuid(char *buffer, size_t sz); -char *get_cpuid_str(void); +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 10e0814bb8d2..1b704fbea9de 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c @(diff -I 2>&1 | grep -q 'option requires an argument' && \ - test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ - diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ - diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ - diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ - || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \ + ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \ + ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0ddd9c199227..e48410c99b39 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -38,6 +38,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct metric_event me = { .evsel = evsel }; + + if (!metric_events) + return NULL; + nd = rblist__find(metric_events, &me); if (nd) return container_of(nd, struct metric_event, nd); @@ -270,7 +274,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int i; struct rblist groups; @@ -368,7 +372,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int ret = -EINVAL; int i, j; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9fe5f9c7d577..05076e683938 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, +static union perf_event *perf_mmap__read(struct perf_mmap *map, u64 start, u64 end, u64 *prev) { unsigned char *data = map->base + page_size; union perf_event *event = NULL; int diff = end - start; - if (check_messup) { - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the 'end', we got messed up. - * - * In either case, truncate and restart at 'end'. - */ - if (diff > map->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * 'end' points to a known good entry, start there. - */ - start = end; - diff = 0; - } - } - if (diff >= (int)sizeof(event->header)) { size_t size; @@ -89,7 +69,7 @@ broken_event: return event; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) +union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; u64 old = map->prev; @@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess head = perf_mmap__read_head(map); - return perf_mmap__read(map, check_messup, old, head, &map->prev); + return perf_mmap__read(map, old, head, &map->prev); } union perf_event *perf_mmap__read_backward(struct perf_mmap *map) @@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) else end = head + map->mask + 1; - return perf_mmap__read(map, false, start, end, &map->prev); + return perf_mmap__read(map, start, end, &map->prev); } void perf_mmap__read_catchup(struct perf_mmap *map) @@ -254,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) { struct perf_event_header *pheader; u64 evt_head = head; int size = mask + 1; - pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); pheader = (struct perf_event_header *)(buf + (head & mask)); *start = head; while (true) { if (evt_head - head >= (unsigned int)size) { - pr_debug("Finished reading backward ring buffer: rewind\n"); + pr_debug("Finished reading overwrite ring buffer: rewind\n"); if (evt_head - head > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; @@ -275,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); if (pheader->size == 0) { - pr_debug("Finished reading backward ring buffer: get start\n"); + pr_debug("Finished reading overwrite ring buffer: get start\n"); *end = evt_head; return 0; } @@ -287,19 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 return -1; } -static int rb_find_range(void *data, int mask, u64 head, u64 old, - u64 *start, u64 *end, bool backward) -{ - if (!backward) { - *start = old; - *end = head; - return 0; - } - - return backward_rb_find_range(data, mask, head, start, end); -} - -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool overwrite, void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); @@ -310,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, void *buf; int rc = 0; - if (rb_find_range(data, md->mask, head, old, &start, &end, backward)) - return -1; + start = overwrite ? head : old; + end = overwrite ? old : head; if (start == end) return 0; size = end - start; if (size > (unsigned long)(md->mask) + 1) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + if (!overwrite) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - md->prev = head; - perf_mmap__consume(md, overwrite || backward); - return 0; + md->prev = head; + perf_mmap__consume(md, overwrite); + return 0; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) + return -1; } if ((start & md->mask) + size != (end & md->mask)) { @@ -346,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, } md->prev = head; - perf_mmap__consume(md, overwrite || backward); + perf_mmap__consume(md, overwrite); out: return rc; } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index efd78b827b05..d640273b7762 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -86,10 +86,10 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) pc->data_tail = tail; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 80fb1593913a..57e38fdf0b34 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,7 @@ #include <dirent.h> #include <api/fs/fs.h> #include <locale.h> +#include <regex.h> #include "util.h" #include "pmu.h" #include "parse-events.h" @@ -537,17 +538,45 @@ static bool pmu_is_uncore(const char *name) } /* + * PMU CORE devices have different name other than cpu in sysfs on some + * platforms. looking for possible sysfs files to identify as core device. + */ +static int is_pmu_core(const char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + /* Look for cpu sysfs (x86 and others) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); + if ((stat(path, &st) == 0) && + (strncmp(name, "cpu", strlen("cpu")) == 0)) + return 1; + + /* Look for cpu sysfs (specific to arm) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", + sysfs, name); + if (stat(path, &st) == 0) + return 1; + + return 0; +} + +/* * Return the CPU id as a raw string. * * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(void) +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { return NULL; } -static char *perf_pmu__getcpuid(void) +static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; static bool printed; @@ -556,7 +585,7 @@ static char *perf_pmu__getcpuid(void) if (cpuid) cpuid = strdup(cpuid); if (!cpuid) - cpuid = get_cpuid_str(); + cpuid = get_cpuid_str(pmu); if (!cpuid) return NULL; @@ -567,22 +596,45 @@ static char *perf_pmu__getcpuid(void) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(void) +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { struct pmu_events_map *map; - char *cpuid = perf_pmu__getcpuid(); + char *cpuid = perf_pmu__getcpuid(pmu); int i; + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; + i = 0; for (;;) { + regex_t re; + regmatch_t pmatch[1]; + int match; + map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (!strcmp(map->cpuid, cpuid)) + if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", map->cpuid); break; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + break; + } } free(cpuid); return map; @@ -593,13 +645,14 @@ struct pmu_events_map *perf_pmu__find_map(void) * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { int i; struct pmu_events_map *map; struct pmu_event *pe; + const char *name = pmu->name; - map = perf_pmu__find_map(); + map = perf_pmu__find_map(pmu); if (!map) return; @@ -608,7 +661,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) */ i = 0; while (1) { - const char *pname; pe = &map->table[i++]; if (!pe->name) { @@ -617,9 +669,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, const char *name) break; } - pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) - continue; + if (!is_pmu_core(name)) { + /* check for uncore devices */ + if (pe->pmu == NULL) + continue; + if (strncmp(pe->pmu, name, strlen(pe->pmu))) + continue; + } /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, @@ -661,21 +717,20 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, &aliases)) return NULL; - pmu_add_cpu_aliases(&aliases, name); pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; pmu->cpus = pmu_cpumask(name); - + pmu->name = strdup(name); + pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + pmu_add_cpu_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); - pmu->name = strdup(name); - pmu->type = type; list_add_tail(&pmu->list, &pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 27c75e635866..76fecec7b3f9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -92,6 +92,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); -struct pmu_events_map *perf_pmu__find_map(void); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8e49d9cafcfc..b1e999bd21ef 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { + if (perf_evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0dfe27d99458..0efc3258c648 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist) return; } +void rblist__exit(struct rblist *rblist) +{ + struct rb_node *pos, *next = rb_first(&rblist->entries); + + while (next) { + pos = next; + next = rb_next(pos); + rblist__remove_node(rblist, pos); + } +} + void rblist__delete(struct rblist *rblist) { if (rblist != NULL) { - struct rb_node *pos, *next = rb_first(&rblist->entries); - - while (next) { - pos = next; - next = rb_next(pos); - rblist__remove_node(rblist, pos); - } + rblist__exit(rblist); free(rblist); } } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 4c8638a22571..76df15c27f5f 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -29,6 +29,7 @@ struct rblist { }; void rblist__init(struct rblist *rblist); +void rblist__exit(struct rblist *rblist); void rblist__delete(struct rblist *rblist); int rblist__add_node(struct rblist *rblist, const void *new_entry); void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index df2857137908..54e30f1bcbd7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1348,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; + struct perf_sample sample = { .time = 0, }; int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 855e35cbb1dc..57ec22513971 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -87,6 +87,16 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, return &nd->rb_node; } +static void saved_value_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct saved_value *v; + + BUG_ON(!rb_node); + v = container_of(rb_node, struct saved_value, rb_node); + free(v); +} + static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, bool create) @@ -114,7 +124,7 @@ void perf_stat__init_shadow_stats(void) rblist__init(&runtime_saved_values); runtime_saved_values.node_cmp = saved_value_cmp; runtime_saved_values.node_new = saved_value_new; - /* No delete for now */ + runtime_saved_values.node_delete = saved_value_delete; } static int evsel_context(struct perf_evsel *evsel) diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index be0d5a736dea..2b653853eec2 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new_by_uid(uid_t uid) +static struct thread_map *__thread_map__new_all_cpus(uid_t uid) { DIR *proc; int max_threads = 32, items, i; @@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; - struct stat st; pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ @@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - if (stat(path, &st) != 0) - continue; + if (uid != UINT_MAX) { + struct stat st; - if (st.st_uid != uid) - continue; + if (stat(path, &st) != 0 || st.st_uid != uid) + continue; + } snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); @@ -178,6 +178,16 @@ out_free_closedir: goto out_closedir; } +struct thread_map *thread_map__new_all_cpus(void) +{ + return __thread_map__new_all_cpus(UINT_MAX); +} + +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + return __thread_map__new_all_cpus(uid); +} + struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index f15803985435..07a765fb22bb 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void); struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new_all_cpus(void); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); struct thread_map *thread_map__new_event(struct thread_map_event *event); |
