diff options
Diffstat (limited to 'grep.c')
| -rw-r--r-- | grep.c | 1174 |
1 files changed, 535 insertions, 639 deletions
@@ -1,6 +1,7 @@ #include "cache.h" #include "config.h" #include "grep.h" +#include "hex.h" #include "object-store.h" #include "userdiff.h" #include "xdiff-interface.h" @@ -11,62 +12,26 @@ #include "help.h" static int grep_source_load(struct grep_source *gs); -static int grep_source_is_binary(struct grep_source *gs); +static int grep_source_is_binary(struct grep_source *gs, + struct index_state *istate); -static struct grep_opt grep_defaults; +static void std_output(struct grep_opt *opt, const void *buf, size_t size) +{ + fwrite(buf, size, 1, stdout); +} static const char *color_grep_slots[] = { [GREP_COLOR_CONTEXT] = "context", [GREP_COLOR_FILENAME] = "filename", [GREP_COLOR_FUNCTION] = "function", [GREP_COLOR_LINENO] = "lineNumber", + [GREP_COLOR_COLUMNNO] = "column", [GREP_COLOR_MATCH_CONTEXT] = "matchContext", [GREP_COLOR_MATCH_SELECTED] = "matchSelected", [GREP_COLOR_SELECTED] = "selected", [GREP_COLOR_SEP] = "separator", }; -static void std_output(struct grep_opt *opt, const void *buf, size_t size) -{ - fwrite(buf, size, 1, stdout); -} - -static void color_set(char *dst, const char *color_bytes) -{ - xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes); -} - -/* - * Initialize the grep_defaults template with hardcoded defaults. - * We could let the compiler do this, but without C99 initializers - * the code gets unwieldy and unreadable, so... - */ -void init_grep_defaults(void) -{ - struct grep_opt *opt = &grep_defaults; - static int run_once; - - if (run_once) - return; - run_once++; - - memset(opt, 0, sizeof(*opt)); - opt->relative = 1; - opt->pathname = 1; - opt->max_depth = -1; - opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED; - color_set(opt->colors[GREP_COLOR_CONTEXT], ""); - color_set(opt->colors[GREP_COLOR_FILENAME], ""); - color_set(opt->colors[GREP_COLOR_FUNCTION], ""); - color_set(opt->colors[GREP_COLOR_LINENO], ""); - color_set(opt->colors[GREP_COLOR_MATCH_CONTEXT], GIT_COLOR_BOLD_RED); - color_set(opt->colors[GREP_COLOR_MATCH_SELECTED], GIT_COLOR_BOLD_RED); - color_set(opt->colors[GREP_COLOR_SELECTED], ""); - color_set(opt->colors[GREP_COLOR_SEP], GIT_COLOR_CYAN); - opt->color = -1; - opt->output = std_output; -} - static int parse_pattern_type_arg(const char *opt, const char *arg) { if (!strcmp(arg, "default")) @@ -90,7 +55,7 @@ define_list_config_array_extra(color_grep_slots, {"match"}); */ int grep_config(const char *var, const char *value, void *cb) { - struct grep_opt *opt = &grep_defaults; + struct grep_opt *opt = cb; const char *slot; if (userdiff_config(var, value) < 0) @@ -110,6 +75,10 @@ int grep_config(const char *var, const char *value, void *cb) opt->linenum = git_config_bool(var, value); return 0; } + if (!strcmp(var, "grep.column")) { + opt->columnnum = git_config_bool(var, value); + return 0; + } if (!strcmp(var, "grep.fullname")) { opt->relative = !git_config_bool(var, value); @@ -137,99 +106,14 @@ int grep_config(const char *var, const char *value, void *cb) return 0; } -/* - * Initialize one instance of grep_opt and copy the - * default values from the template we read the configuration - * information in an earlier call to git_config(grep_config). - */ -void grep_init(struct grep_opt *opt, const char *prefix) +void grep_init(struct grep_opt *opt, struct repository *repo) { - struct grep_opt *def = &grep_defaults; - int i; + struct grep_opt blank = GREP_OPT_INIT; + memcpy(opt, &blank, sizeof(*opt)); - memset(opt, 0, sizeof(*opt)); - opt->prefix = prefix; - opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0; + opt->repo = repo; opt->pattern_tail = &opt->pattern_list; opt->header_tail = &opt->header_list; - - opt->color = def->color; - opt->extended_regexp_option = def->extended_regexp_option; - opt->pattern_type_option = def->pattern_type_option; - opt->linenum = def->linenum; - opt->max_depth = def->max_depth; - opt->pathname = def->pathname; - opt->relative = def->relative; - opt->output = def->output; - - for (i = 0; i < NR_GREP_COLORS; i++) - color_set(opt->colors[i], def->colors[i]); -} - -static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt) -{ - /* - * When committing to the pattern type by setting the relevant - * fields in grep_opt it's generally not necessary to zero out - * the fields we're not choosing, since they won't have been - * set by anything. The extended_regexp_option field is the - * only exception to this. - * - * This is because in the process of parsing grep.patternType - * & grep.extendedRegexp we set opt->pattern_type_option and - * opt->extended_regexp_option, respectively. We then - * internally use opt->extended_regexp_option to see if we're - * compiling an ERE. It must be unset if that's not actually - * the case. - */ - if (pattern_type != GREP_PATTERN_TYPE_ERE && - opt->extended_regexp_option) - opt->extended_regexp_option = 0; - - switch (pattern_type) { - case GREP_PATTERN_TYPE_UNSPECIFIED: - /* fall through */ - - case GREP_PATTERN_TYPE_BRE: - break; - - case GREP_PATTERN_TYPE_ERE: - opt->extended_regexp_option = 1; - break; - - case GREP_PATTERN_TYPE_FIXED: - opt->fixed = 1; - break; - - case GREP_PATTERN_TYPE_PCRE: -#ifdef USE_LIBPCRE2 - opt->pcre2 = 1; -#else - /* - * It's important that pcre1 always be assigned to - * even when there's no USE_LIBPCRE* defined. We still - * call the PCRE stub function, it just dies with - * "cannot use Perl-compatible regexes[...]". - */ - opt->pcre1 = 1; -#endif - break; - } -} - -void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt) -{ - if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(pattern_type, opt); - else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED) - grep_set_pattern_type_option(opt->pattern_type_option, opt); - else if (opt->extended_regexp_option) - /* - * This branch *must* happen after setting from the - * opt->pattern_type_option above, we don't want - * grep.extendedRegexp to override grep.patternType! - */ - grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt); } static struct grep_pat *create_grep_pat(const char *pat, size_t patlen, @@ -356,152 +240,99 @@ static int is_fixed(const char *s, size_t len) return 1; } -static int has_null(const char *s, size_t len) -{ - /* - * regcomp cannot accept patterns with NULs so when using it - * we consider any pattern containing a NUL fixed. - */ - if (memchr(s, 0, len)) - return 1; - - return 0; -} +#ifdef USE_LIBPCRE2 +#define GREP_PCRE2_DEBUG_MALLOC 0 -#ifdef USE_LIBPCRE1 -static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) +static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data) { - const char *error; - int erroffset; - int options = PCRE_MULTILINE; - - if (opt->ignore_case) { - if (has_non_ascii(p->pattern)) - p->pcre1_tables = pcre_maketables(); - options |= PCRE_CASELESS; - } - if (is_utf8_locale() && has_non_ascii(p->pattern)) - options |= PCRE_UTF8; - - p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset, - p->pcre1_tables); - if (!p->pcre1_regexp) - compile_regexp_failed(p, error); - - p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error); - if (!p->pcre1_extra_info && error) - die("%s", error); - -#ifdef GIT_PCRE1_USE_JIT - pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on); - if (p->pcre1_jit_on == 1) { - p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024); - if (!p->pcre1_jit_stack) - die("Couldn't allocate PCRE JIT stack"); - pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack); - } else if (p->pcre1_jit_on != 0) { - BUG("The pcre1_jit_on variable should be 0 or 1, not %d", - p->pcre1_jit_on); - } + void *pointer = malloc(size); +#if GREP_PCRE2_DEBUG_MALLOC + static int count = 1; + fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size); #endif + return pointer; } -static int pcre1match(struct grep_pat *p, const char *line, const char *eol, - regmatch_t *match, int eflags) +static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data) { - int ovector[30], ret, flags = 0; - - if (eflags & REG_NOTBOL) - flags |= PCRE_NOTBOL; - -#ifdef GIT_PCRE1_USE_JIT - if (p->pcre1_jit_on) { - ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line, - eol - line, 0, flags, ovector, - ARRAY_SIZE(ovector), p->pcre1_jit_stack); - } else +#if GREP_PCRE2_DEBUG_MALLOC + static int count = 1; + if (pointer) + fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++); #endif - { - ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line, - eol - line, 0, flags, ovector, - ARRAY_SIZE(ovector)); - } - - if (ret < 0 && ret != PCRE_ERROR_NOMATCH) - die("pcre_exec failed with error code %d", ret); - if (ret > 0) { - ret = 0; - match->rm_so = ovector[0]; - match->rm_eo = ovector[1]; - } - - return ret; + free(pointer); } -static void free_pcre1_regexp(struct grep_pat *p) -{ - pcre_free(p->pcre1_regexp); -#ifdef GIT_PCRE1_USE_JIT - if (p->pcre1_jit_on) { - pcre_free_study(p->pcre1_extra_info); - pcre_jit_stack_free(p->pcre1_jit_stack); - } else -#endif - { - pcre_free(p->pcre1_extra_info); - } - pcre_free((void *)p->pcre1_tables); -} -#else /* !USE_LIBPCRE1 */ -static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt) +static int pcre2_jit_functional(void) { - die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); -} + static int jit_working = -1; + pcre2_code *code; + size_t off; + int err; -static int pcre1match(struct grep_pat *p, const char *line, const char *eol, - regmatch_t *match, int eflags) -{ - return 1; -} + if (jit_working != -1) + return jit_working; -static void free_pcre1_regexp(struct grep_pat *p) -{ + /* + * Try to JIT compile a simple pattern to probe if the JIT is + * working in general. It might fail for systems where creating + * memory mappings for runtime code generation is restricted. + */ + code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL); + if (!code) + return 0; + + jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0; + pcre2_code_free(code); + + return jit_working; } -#endif /* !USE_LIBPCRE1 */ -#ifdef USE_LIBPCRE2 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) { int error; PCRE2_UCHAR errbuf[256]; PCRE2_SIZE erroffset; int options = PCRE2_MULTILINE; - const uint8_t *character_tables = NULL; int jitret; int patinforet; size_t jitsizearg; + int literal = !opt->ignore_case && (p->fixed || p->is_fixed); - assert(opt->pcre2); - - p->pcre2_compile_context = NULL; + /* + * Call pcre2_general_context_create() before calling any + * other pcre2_*(). It sets up our malloc()/free() functions + * with which everything else is allocated. + */ + p->pcre2_general_context = pcre2_general_context_create( + pcre2_malloc, pcre2_free, NULL); + if (!p->pcre2_general_context) + die("Couldn't allocate PCRE2 general context"); if (opt->ignore_case) { - if (has_non_ascii(p->pattern)) { - character_tables = pcre2_maketables(NULL); - p->pcre2_compile_context = pcre2_compile_context_create(NULL); - pcre2_set_character_tables(p->pcre2_compile_context, character_tables); + if (!opt->ignore_locale && has_non_ascii(p->pattern)) { + p->pcre2_tables = pcre2_maketables(p->pcre2_general_context); + p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context); + pcre2_set_character_tables(p->pcre2_compile_context, + p->pcre2_tables); } options |= PCRE2_CASELESS; } - if (is_utf8_locale() && has_non_ascii(p->pattern)) - options |= PCRE2_UTF; + if (!opt->ignore_locale && is_utf8_locale() && !literal) + options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF); + +#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER + /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */ + if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) + options |= PCRE2_NO_START_OPTIMIZE; +#endif p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern, p->patternlen, options, &error, &erroffset, p->pcre2_compile_context); if (p->pcre2_pattern) { - p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL); + p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context); if (!p->pcre2_match_data) die("Couldn't allocate PCRE2 match data"); } else { @@ -510,10 +341,31 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt } pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on); - if (p->pcre2_jit_on == 1) { + if (p->pcre2_jit_on) { jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE); - if (jitret) - die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret); + if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) { + /* + * Even though pcre2_config(PCRE2_CONFIG_JIT, ...) + * indicated JIT support, the library might still + * fail to generate JIT code for various reasons, + * e.g. when SELinux's 'deny_execmem' or PaX's + * MPROTECT prevent creating W|X memory mappings. + * + * Instead of faling hard, fall back to interpreter + * mode, just as if the pattern was prefixed with + * '(*NO_JIT)'. + */ + p->pcre2_jit_on = 0; + return; + } else if (jitret) { + int need_clip = p->patternlen > 64; + int clip_len = need_clip ? 64 : p->patternlen; + die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s", + clip_len, p->pattern, need_clip ? "..." : "", jitret, + pcre2_jit_functional() + ? "\nPerhaps prefix (*NO_JIT) to your pattern?" + : ""); + } /* * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just @@ -537,17 +389,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt p->pcre2_jit_on = 0; return; } - - p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL); - if (!p->pcre2_jit_stack) - die("Couldn't allocate PCRE2 JIT stack"); - p->pcre2_match_context = pcre2_match_context_create(NULL); - if (!p->pcre2_match_context) - die("Couldn't allocate PCRE2 match context"); - pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack); - } else if (p->pcre2_jit_on != 0) { - BUG("The pcre2_jit_on variable should be 0 or 1, not %d", - p->pcre1_jit_on); } } @@ -591,17 +432,16 @@ static void free_pcre2_pattern(struct grep_pat *p) pcre2_compile_context_free(p->pcre2_compile_context); pcre2_code_free(p->pcre2_pattern); pcre2_match_data_free(p->pcre2_match_data); - pcre2_jit_stack_free(p->pcre2_jit_stack); - pcre2_match_context_free(p->pcre2_match_context); +#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER + pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables); +#else + free((void *)p->pcre2_tables); +#endif + pcre2_general_context_free(p->pcre2_general_context); } #else /* !USE_LIBPCRE2 */ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt) { - /* - * Unreachable until USE_LIBPCRE2 becomes synonymous with - * USE_LIBPCRE. See the sibling comment in - * grep_set_pattern_type_option(). - */ die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE"); } @@ -614,7 +454,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol, static void free_pcre2_pattern(struct grep_pat *p) { } -#endif /* !USE_LIBPCRE2 */ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) { @@ -626,8 +465,6 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) if (opt->ignore_case) regflags |= REG_ICASE; err = regcomp(&p->regexp, sb.buf, regflags); - if (opt->debug) - fprintf(stderr, "fixed %s\n", sb.buf); strbuf_release(&sb); if (err) { char errbuf[1024]; @@ -635,62 +472,82 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt) compile_regexp_failed(p, errbuf); } } +#endif /* !USE_LIBPCRE2 */ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { - int ascii_only; int err; int regflags = REG_NEWLINE; + if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED) + opt->pattern_type_option = (opt->extended_regexp_option + ? GREP_PATTERN_TYPE_ERE + : GREP_PATTERN_TYPE_BRE); + p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; - ascii_only = !has_non_ascii(p->pattern); + p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED; - /* - * Even when -F (fixed) asks us to do a non-regexp search, we - * may not be able to correctly case-fold when -i - * (ignore-case) is asked (in which case, we'll synthesize a - * regexp to match the pattern that matches regexp special - * characters literally, while ignoring case differences). On - * the other hand, even without -F, if the pattern does not - * have any regexp special characters and there is no need for - * case-folding search, we can internally turn it into a - * simple string match using kws. p->fixed tells us if we - * want to use kws. - */ - if (opt->fixed || - has_null(p->pattern, p->patternlen) || - is_fixed(p->pattern, p->patternlen)) - p->fixed = !p->ignore_case || ascii_only; - - if (p->fixed) { - p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL); - kwsincr(p->kws, p->pattern, p->patternlen); - kwsprep(p->kws); - return; - } else if (opt->fixed) { - /* - * We come here when the pattern has the non-ascii - * characters we cannot case-fold, and asked to - * ignore-case. - */ + if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE && + memchr(p->pattern, 0, p->patternlen)) + die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2")); + + p->is_fixed = is_fixed(p->pattern, p->patternlen); +#ifdef USE_LIBPCRE2 + if (!p->fixed && !p->is_fixed) { + const char *no_jit = "(*NO_JIT)"; + const int no_jit_len = strlen(no_jit); + if (starts_with(p->pattern, no_jit) && + is_fixed(p->pattern + no_jit_len, + p->patternlen - no_jit_len)) + p->is_fixed = 1; + } +#endif + if (p->fixed || p->is_fixed) { +#ifdef USE_LIBPCRE2 + if (p->is_fixed) { + compile_pcre2_pattern(p, opt); + } else { + /* + * E.g. t7811-grep-open.sh relies on the + * pattern being restored. + */ + char *old_pattern = p->pattern; + size_t old_patternlen = p->patternlen; + struct strbuf sb = STRBUF_INIT; + + /* + * There is the PCRE2_LITERAL flag, but it's + * only in PCRE v2 10.30 and later. Needing to + * ifdef our way around that and dealing with + * it + PCRE2_MULTILINE being an error is more + * complex than just quoting this ourselves. + */ + strbuf_add(&sb, "\\Q", 2); + strbuf_add(&sb, p->pattern, p->patternlen); + strbuf_add(&sb, "\\E", 2); + + p->pattern = sb.buf; + p->patternlen = sb.len; + compile_pcre2_pattern(p, opt); + p->pattern = old_pattern; + p->patternlen = old_patternlen; + strbuf_release(&sb); + } +#else /* !USE_LIBPCRE2 */ compile_fixed_regexp(p, opt); +#endif /* !USE_LIBPCRE2 */ return; } - if (opt->pcre2) { + if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) { compile_pcre2_pattern(p, opt); return; } - if (opt->pcre1) { - compile_pcre1_regexp(p, opt); - return; - } - if (p->ignore_case) regflags |= REG_ICASE; - if (opt->extended_regexp_option) + if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE) regflags |= REG_EXTENDED; err = regcomp(&p->regexp, p->pattern, regflags); if (err) { @@ -700,6 +557,35 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) } } +static struct grep_expr *grep_not_expr(struct grep_expr *expr) +{ + struct grep_expr *z = xcalloc(1, sizeof(*z)); + z->node = GREP_NODE_NOT; + z->u.unary = expr; + return z; +} + +static struct grep_expr *grep_binexp(enum grep_expr_node kind, + struct grep_expr *left, + struct grep_expr *right) +{ + struct grep_expr *z = xcalloc(1, sizeof(*z)); + z->node = kind; + z->u.binary.left = left; + z->u.binary.right = right; + return z; +} + +static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right) +{ + return grep_binexp(GREP_NODE_OR, left, right); +} + +static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right) +{ + return grep_binexp(GREP_NODE_AND, left, right); +} + static struct grep_expr *compile_pattern_or(struct grep_pat **); static struct grep_expr *compile_pattern_atom(struct grep_pat **list) { @@ -713,7 +599,7 @@ static struct grep_expr *compile_pattern_atom(struct grep_pat **list) case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - x = xcalloc(1, sizeof (struct grep_expr)); + CALLOC_ARRAY(x, 1); x->node = GREP_NODE_ATOM; x->u.atom = p; *list = p->next; @@ -743,12 +629,10 @@ static struct grep_expr *compile_pattern_not(struct grep_pat **list) if (!p->next) die("--not not followed by pattern expression"); *list = p->next; - x = xcalloc(1, sizeof (struct grep_expr)); - x->node = GREP_NODE_NOT; - x->u.unary = compile_pattern_not(list); - if (!x->u.unary) + x = compile_pattern_not(list); + if (!x) die("--not followed by non pattern expression"); - return x; + return grep_not_expr(x); default: return compile_pattern_atom(list); } @@ -757,22 +641,20 @@ static struct grep_expr *compile_pattern_not(struct grep_pat **list) static struct grep_expr *compile_pattern_and(struct grep_pat **list) { struct grep_pat *p; - struct grep_expr *x, *y, *z; + struct grep_expr *x, *y; x = compile_pattern_not(list); p = *list; if (p && p->token == GREP_AND) { + if (!x) + die("--and not preceded by pattern expression"); if (!p->next) die("--and not followed by pattern expression"); *list = p->next; y = compile_pattern_and(list); if (!y) die("--and not followed by pattern expression"); - z = xcalloc(1, sizeof (struct grep_expr)); - z->node = GREP_NODE_AND; - z->u.binary.left = x; - z->u.binary.right = y; - return z; + return grep_and_expr(x, y); } return x; } @@ -780,7 +662,7 @@ static struct grep_expr *compile_pattern_and(struct grep_pat **list) static struct grep_expr *compile_pattern_or(struct grep_pat **list) { struct grep_pat *p; - struct grep_expr *x, *y, *z; + struct grep_expr *x, *y; x = compile_pattern_and(list); p = *list; @@ -788,11 +670,7 @@ static struct grep_expr *compile_pattern_or(struct grep_pat **list) y = compile_pattern_or(list); if (!y) die("not a pattern expression %s", p->pattern); - z = xcalloc(1, sizeof (struct grep_expr)); - z->node = GREP_NODE_OR; - z->u.binary.left = x; - z->u.binary.right = y; - return z; + return grep_or_expr(x, y); } return x; } @@ -802,87 +680,6 @@ static struct grep_expr *compile_pattern_expr(struct grep_pat **list) return compile_pattern_or(list); } -static void indent(int in) -{ - while (in-- > 0) - fputc(' ', stderr); -} - -static void dump_grep_pat(struct grep_pat *p) -{ - switch (p->token) { - case GREP_AND: fprintf(stderr, "*and*"); break; - case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break; - case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break; - case GREP_NOT: fprintf(stderr, "*not*"); break; - case GREP_OR: fprintf(stderr, "*or*"); break; - - case GREP_PATTERN: fprintf(stderr, "pattern"); break; - case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break; - case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break; - } - - switch (p->token) { - default: break; - case GREP_PATTERN_HEAD: - fprintf(stderr, "<head %d>", p->field); break; - case GREP_PATTERN_BODY: - fprintf(stderr, "<body>"); break; - } - switch (p->token) { - default: break; - case GREP_PATTERN_HEAD: - case GREP_PATTERN_BODY: - case GREP_PATTERN: - fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern); - break; - } - fputc('\n', stderr); -} - -static void dump_grep_expression_1(struct grep_expr *x, int in) -{ - indent(in); - switch (x->node) { - case GREP_NODE_TRUE: - fprintf(stderr, "true\n"); - break; - case GREP_NODE_ATOM: - dump_grep_pat(x->u.atom); - break; - case GREP_NODE_NOT: - fprintf(stderr, "(not\n"); - dump_grep_expression_1(x->u.unary, in+1); - indent(in); - fprintf(stderr, ")\n"); - break; - case GREP_NODE_AND: - fprintf(stderr, "(and\n"); - dump_grep_expression_1(x->u.binary.left, in+1); - dump_grep_expression_1(x->u.binary.right, in+1); - indent(in); - fprintf(stderr, ")\n"); - break; - case GREP_NODE_OR: - fprintf(stderr, "(or\n"); - dump_grep_expression_1(x->u.binary.left, in+1); - dump_grep_expression_1(x->u.binary.right, in+1); - indent(in); - fprintf(stderr, ")\n"); - break; - } -} - -static void dump_grep_expression(struct grep_opt *opt) -{ - struct grep_expr *x = opt->pattern_expression; - - if (opt->all_match) - fprintf(stderr, "[all-match]\n"); - dump_grep_expression_1(x, 0); - fflush(NULL); -} - static struct grep_expr *grep_true_expr(void) { struct grep_expr *z = xcalloc(1, sizeof(*z)); @@ -890,15 +687,6 @@ static struct grep_expr *grep_true_expr(void) return z; } -static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right) -{ - struct grep_expr *z = xcalloc(1, sizeof(*z)); - z->node = GREP_NODE_OR; - z->u.binary.left = left; - z->u.binary.right = right; - return z; -} - static struct grep_expr *prep_header_patterns(struct grep_opt *opt) { struct grep_pat *p; @@ -963,10 +751,11 @@ static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y return z; } -static void compile_grep_patterns_real(struct grep_opt *opt) +void compile_grep_patterns(struct grep_opt *opt) { struct grep_pat *p; struct grep_expr *header_expr = prep_header_patterns(opt); + int extended = 0; for (p = opt->pattern_list; p; p = p->next) { switch (p->token) { @@ -976,14 +765,14 @@ static void compile_grep_patterns_real(struct grep_opt *opt) compile_regexp(p, opt); break; default: - opt->extended = 1; + extended = 1; break; } } - if (opt->all_match || header_expr) - opt->extended = 1; - else if (!opt->extended && !opt->debug) + if (opt->all_match || opt->no_body_match || header_expr) + extended = 1; + else if (!extended) return; p = opt->pattern_list; @@ -992,6 +781,9 @@ static void compile_grep_patterns_real(struct grep_opt *opt) if (p) die("incomplete pattern expression: %s", p->pattern); + if (opt->no_body_match && opt->pattern_expression) + opt->pattern_expression = grep_not_expr(opt->pattern_expression); + if (!header_expr) return; @@ -1006,13 +798,6 @@ static void compile_grep_patterns_real(struct grep_opt *opt) opt->all_match = 1; } -void compile_grep_patterns(struct grep_opt *opt) -{ - compile_grep_patterns_real(opt); - if (opt->debug) - dump_grep_expression(opt); -} - static void free_pattern_expr(struct grep_expr *x) { switch (x->node) { @@ -1031,21 +816,17 @@ static void free_pattern_expr(struct grep_expr *x) free(x); } -void free_grep_patterns(struct grep_opt *opt) +static void free_grep_pat(struct grep_pat *pattern) { struct grep_pat *p, *n; - for (p = opt->pattern_list; p; p = n) { + for (p = pattern; p; p = n) { n = p->next; switch (p->token) { case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: case GREP_PATTERN_BODY: - if (p->kws) - kwsfree(p->kws); - else if (p->pcre1_regexp) - free_pcre1_regexp(p); - else if (p->pcre2_pattern) + if (p->pcre2_pattern) free_pcre2_pattern(p); else regfree(&p->regexp); @@ -1056,13 +837,18 @@ void free_grep_patterns(struct grep_opt *opt) } free(p); } +} - if (!opt->extended) - return; - free_pattern_expr(opt->pattern_expression); +void free_grep_patterns(struct grep_opt *opt) +{ + free_grep_pat(opt->pattern_list); + free_grep_pat(opt->header_list); + + if (opt->pattern_expression) + free_pattern_expr(opt->pattern_expression); } -static char *end_of_line(char *cp, unsigned long *left) +static const char *end_of_line(const char *cp, unsigned long *left) { unsigned long l = *left; while (l && *cp != '\n') { @@ -1103,31 +889,13 @@ static void show_name(struct grep_opt *opt, const char *name) opt->output(opt, opt->null_following_name ? "\0" : "\n", 1); } -static int fixmatch(struct grep_pat *p, char *line, char *eol, - regmatch_t *match) -{ - struct kwsmatch kwsm; - size_t offset = kwsexec(p->kws, line, eol - line, &kwsm); - if (offset == -1) { - match->rm_so = match->rm_eo = -1; - return REG_NOMATCH; - } else { - match->rm_so = offset; - match->rm_eo = match->rm_so + kwsm.size[0]; - return 0; - } -} - -static int patmatch(struct grep_pat *p, char *line, char *eol, +static int patmatch(struct grep_pat *p, + const char *line, const char *eol, regmatch_t *match, int eflags) { int hit; - if (p->fixed) - hit = !fixmatch(p, line, eol, match); - else if (p->pcre1_regexp) - hit = !pcre1match(p, line, eol, match, eflags); - else if (p->pcre2_pattern) + if (p->pcre2_pattern) hit = !pcre2match(p, line, eol, match, eflags); else hit = !regexec_buf(&p->regexp, line, eol - line, 1, match, @@ -1136,20 +904,16 @@ static int patmatch(struct grep_pat *p, char *line, char *eol, return hit; } -static int strip_timestamp(char *bol, char **eol_p) +static void strip_timestamp(const char *bol, const char **eol_p) { - char *eol = *eol_p; - int ch; + const char *eol = *eol_p; while (bol < --eol) { if (*eol != '>') continue; *eol_p = ++eol; - ch = *eol; - *eol = '\0'; - return ch; + break; } - return 0; } static struct { @@ -1161,37 +925,18 @@ static struct { { "reflog ", 7 }, }; -static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, - enum grep_context ctx, - regmatch_t *pmatch, int eflags) +static int headerless_match_one_pattern(struct grep_pat *p, + const char *bol, const char *eol, + enum grep_context ctx, + regmatch_t *pmatch, int eflags) { int hit = 0; - int saved_ch = 0; const char *start = bol; if ((p->token != GREP_PATTERN) && ((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD))) return 0; - if (p->token == GREP_PATTERN_HEAD) { - const char *field; - size_t len; - assert(p->field < ARRAY_SIZE(header_field)); - field = header_field[p->field].field; - len = header_field[p->field].len; - if (strncmp(bol, field, len)) - return 0; - bol += len; - switch (p->field) { - case GREP_HEADER_AUTHOR: - case GREP_HEADER_COMMITTER: - saved_ch = strip_timestamp(bol, &eol); - break; - default: - break; - } - } - again: hit = patmatch(p, bol, eol, pmatch, eflags); @@ -1235,8 +980,6 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, goto again; } } - if (p->token == GREP_PATTERN_HEAD && saved_ch) - *eol = saved_ch; if (hit) { pmatch[0].rm_so += bol - start; pmatch[0].rm_eo += bol - start; @@ -1244,38 +987,96 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol, return hit; } -static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, - enum grep_context ctx, int collect_hits) +static int match_one_pattern(struct grep_pat *p, + const char *bol, const char *eol, + enum grep_context ctx, regmatch_t *pmatch, + int eflags) +{ + const char *field; + size_t len; + + if (p->token == GREP_PATTERN_HEAD) { + assert(p->field < ARRAY_SIZE(header_field)); + field = header_field[p->field].field; + len = header_field[p->field].len; + if (strncmp(bol, field, len)) + return 0; + bol += len; + + switch (p->field) { + case GREP_HEADER_AUTHOR: + case GREP_HEADER_COMMITTER: + strip_timestamp(bol, &eol); + break; + default: + break; + } + } + + return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags); +} + + +static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x, + const char *bol, const char *eol, + enum grep_context ctx, ssize_t *col, + ssize_t *icol, int collect_hits) { int h = 0; - regmatch_t match; - if (!x) - die("Not a valid grep expression"); switch (x->node) { case GREP_NODE_TRUE: h = 1; break; case GREP_NODE_ATOM: - h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0); + { + regmatch_t tmp; + h = match_one_pattern(x->u.atom, bol, eol, ctx, + &tmp, 0); + if (h && (*col < 0 || tmp.rm_so < *col)) + *col = tmp.rm_so; + } + if (x->u.atom->token == GREP_PATTERN_BODY) + opt->body_hit |= h; break; case GREP_NODE_NOT: - h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0); + /* + * Upon visiting a GREP_NODE_NOT, col and icol become swapped. + */ + h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col, + 0); break; case GREP_NODE_AND: - if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0)) - return 0; - h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0); + h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col, + icol, 0); + if (h || opt->columnnum) { + /* + * Don't short-circuit AND when given --column, since a + * NOT earlier in the tree may turn this into an OR. In + * this case, see the below comment. + */ + h &= match_expr_eval(opt, x->u.binary.right, bol, eol, + ctx, col, icol, 0); + } break; case GREP_NODE_OR: - if (!collect_hits) - return (match_expr_eval(x->u.binary.left, - bol, eol, ctx, 0) || - match_expr_eval(x->u.binary.right, - bol, eol, ctx, 0)); - h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0); - x->u.binary.left->hit |= h; - h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1); + if (!(collect_hits || opt->columnnum)) { + /* + * Don't short-circuit OR when given --column (or + * collecting hits) to ensure we don't skip a later + * child that would produce an earlier match. + */ + return (match_expr_eval(opt, x->u.binary.left, bol, eol, + ctx, col, icol, 0) || + match_expr_eval(opt, x->u.binary.right, bol, + eol, ctx, col, icol, 0)); + } + h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col, + icol, 0); + if (collect_hits) + x->u.binary.left->hit |= h; + h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col, + icol, collect_hits); break; default: die("Unexpected node type (internal error) %d", x->node); @@ -1285,37 +1086,56 @@ static int match_expr_eval(struct grep_expr *x, char *bol, char *eol, return h; } -static int match_expr(struct grep_opt *opt, char *bol, char *eol, - enum grep_context ctx, int collect_hits) +static int match_expr(struct grep_opt *opt, + const char *bol, const char *eol, + enum grep_context ctx, ssize_t *col, + ssize_t *icol, int collect_hits) { struct grep_expr *x = opt->pattern_expression; - return match_expr_eval(x, bol, eol, ctx, collect_hits); + return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits); } -static int match_line(struct grep_opt *opt, char *bol, char *eol, +static int match_line(struct grep_opt *opt, + const char *bol, const char *eol, + ssize_t *col, ssize_t *icol, enum grep_context ctx, int collect_hits) { struct grep_pat *p; - regmatch_t match; + int hit = 0; - if (opt->extended) - return match_expr(opt, bol, eol, ctx, collect_hits); + if (opt->pattern_expression) + return match_expr(opt, bol, eol, ctx, col, icol, + collect_hits); /* we do not call with collect_hits without being extended */ for (p = opt->pattern_list; p; p = p->next) { - if (match_one_pattern(p, bol, eol, ctx, &match, 0)) - return 1; + regmatch_t tmp; + if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) { + hit |= 1; + if (!opt->columnnum) { + /* + * Without --column, any single match on a line + * is enough to know that it needs to be + * printed. With --column, scan _all_ patterns + * to find the earliest. + */ + break; + } + if (*col < 0 || tmp.rm_so < *col) + *col = tmp.rm_so; + } } - return 0; + return hit; } -static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, +static int match_next_pattern(struct grep_pat *p, + const char *bol, const char *eol, enum grep_context ctx, regmatch_t *pmatch, int eflags) { regmatch_t match; - if (!match_one_pattern(p, bol, eol, ctx, &match, eflags)) + if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags)) return 0; if (match.rm_so < 0 || match.rm_eo < 0) return 0; @@ -1330,18 +1150,26 @@ static int match_next_pattern(struct grep_pat *p, char *bol, char *eol, return 1; } -static int next_match(struct grep_opt *opt, char *bol, char *eol, - enum grep_context ctx, regmatch_t *pmatch, int eflags) +int grep_next_match(struct grep_opt *opt, + const char *bol, const char *eol, + enum grep_context ctx, regmatch_t *pmatch, + enum grep_header_field field, int eflags) { struct grep_pat *p; int hit = 0; pmatch->rm_so = pmatch->rm_eo = -1; if (bol < eol) { - for (p = opt->pattern_list; p; p = p->next) { + for (p = ((ctx == GREP_CONTEXT_HEAD) + ? opt->header_list : opt->pattern_list); + p; p = p->next) { switch (p->token) { - case GREP_PATTERN: /* atom */ case GREP_PATTERN_HEAD: + if ((field != GREP_HEADER_FIELD_MAX) && + (p->field != field)) + continue; + /* fall thru */ + case GREP_PATTERN: /* atom */ case GREP_PATTERN_BODY: hit |= match_next_pattern(p, bol, eol, ctx, pmatch, eflags); @@ -1354,11 +1182,45 @@ static int next_match(struct grep_opt *opt, char *bol, char *eol, return hit; } -static void show_line(struct grep_opt *opt, char *bol, char *eol, - const char *name, unsigned lno, char sign) +static void show_line_header(struct grep_opt *opt, const char *name, + unsigned lno, ssize_t cno, char sign) +{ + if (opt->heading && opt->last_shown == 0) { + output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); + opt->output(opt, "\n", 1); + } + opt->last_shown = lno; + + if (!opt->heading && opt->pathname) { + output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); + output_sep(opt, sign); + } + if (opt->linenum) { + char buf[32]; + xsnprintf(buf, sizeof(buf), "%d", lno); + output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]); + output_sep(opt, sign); + } + /* + * Treat 'cno' as the 1-indexed offset from the start of a non-context + * line to its first match. Otherwise, 'cno' is 0 indicating that we are + * being called with a context line. + */ + if (opt->columnnum && cno) { + char buf[32]; + xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno); + output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]); + output_sep(opt, sign); + } +} + +static void show_line(struct grep_opt *opt, + const char *bol, const char *eol, + const char *name, unsigned lno, ssize_t cno, char sign) { int rest = eol - bol; - const char *match_color, *line_color = NULL; + const char *match_color = NULL; + const char *line_color = NULL; if (opt->file_break && opt->last_shown == 0) { if (opt->show_hunk_mark) @@ -1374,57 +1236,55 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol, opt->output(opt, "\n", 1); } } - if (opt->heading && opt->last_shown == 0) { - output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); - opt->output(opt, "\n", 1); - } - opt->last_shown = lno; - - if (!opt->heading && opt->pathname) { - output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]); - output_sep(opt, sign); - } - if (opt->linenum) { - char buf[32]; - xsnprintf(buf, sizeof(buf), "%d", lno); - output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]); - output_sep(opt, sign); + if (!opt->only_matching) { + /* + * In case the line we're being called with contains more than + * one match, leave printing each header to the loop below. + */ + show_line_header(opt, name, lno, cno, sign); } - if (opt->color) { + if (opt->color || opt->only_matching) { regmatch_t match; enum grep_context ctx = GREP_CONTEXT_BODY; - int ch = *eol; int eflags = 0; - if (sign == ':') - match_color = opt->colors[GREP_COLOR_MATCH_SELECTED]; - else - match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT]; - if (sign == ':') - line_color = opt->colors[GREP_COLOR_SELECTED]; - else if (sign == '-') - line_color = opt->colors[GREP_COLOR_CONTEXT]; - else if (sign == '=') - line_color = opt->colors[GREP_COLOR_FUNCTION]; - *eol = '\0'; - while (next_match(opt, bol, eol, ctx, &match, eflags)) { + if (opt->color) { + if (sign == ':') + match_color = opt->colors[GREP_COLOR_MATCH_SELECTED]; + else + match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT]; + if (sign == ':') + line_color = opt->colors[GREP_COLOR_SELECTED]; + else if (sign == '-') + line_color = opt->colors[GREP_COLOR_CONTEXT]; + else if (sign == '=') + line_color = opt->colors[GREP_COLOR_FUNCTION]; + } + while (grep_next_match(opt, bol, eol, ctx, &match, + GREP_HEADER_FIELD_MAX, eflags)) { if (match.rm_so == match.rm_eo) break; - output_color(opt, bol, match.rm_so, line_color); + if (opt->only_matching) + show_line_header(opt, name, lno, cno, sign); + else + output_color(opt, bol, match.rm_so, line_color); output_color(opt, bol + match.rm_so, match.rm_eo - match.rm_so, match_color); + if (opt->only_matching) + opt->output(opt, "\n", 1); bol += match.rm_eo; + cno += match.rm_eo; rest -= match.rm_eo; eflags = REG_NOTBOL; } - *eol = ch; } - output_color(opt, bol, rest, line_color); - opt->output(opt, "\n", 1); + if (!opt->only_matching) { + output_color(opt, bol, rest, line_color); + opt->output(opt, "\n", 1); + } } -#ifndef NO_PTHREADS int grep_use_locks; /* @@ -1445,21 +1305,12 @@ static inline void grep_attr_unlock(void) pthread_mutex_unlock(&grep_attr_mutex); } -/* - * Same as git_attr_mutex, but protecting the thread-unsafe object db access. - */ -pthread_mutex_t grep_read_mutex; - -#else -#define grep_attr_lock() -#define grep_attr_unlock() -#endif - -static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol) +static int match_funcname(struct grep_opt *opt, struct grep_source *gs, + const char *bol, const char *eol) { xdemitconf_t *xecfg = opt->priv; if (xecfg && !xecfg->find_func) { - grep_source_load_driver(gs); + grep_source_load_driver(gs, opt->repo->index); if (gs->driver->funcname.pattern) { const struct userdiff_funcname *pe = &gs->driver->funcname; xdiff_set_find_func(xecfg, pe->pattern, pe->cflags); @@ -1482,10 +1333,10 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo } static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, - char *bol, unsigned lno) + const char *bol, unsigned lno) { while (bol > gs->buf) { - char *eol = --bol; + const char *eol = --bol; while (bol > gs->buf && bol[-1] != '\n') bol--; @@ -1495,7 +1346,7 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, break; if (match_funcname(opt, gs, bol, eol)) { - show_line(opt, bol, eol, gs->name, lno, '='); + show_line(opt, bol, eol, gs->name, lno, 0, '='); break; } } @@ -1504,7 +1355,7 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs, static int is_empty_line(const char *bol, const char *eol); static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, - char *bol, char *end, unsigned lno) + const char *bol, const char *end, unsigned lno) { unsigned cur = lno, from = 1, funcname_lno = 0, orig_from; int funcname_needed = !!opt->funcname, comment_needed = 0; @@ -1524,8 +1375,8 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, /* Rewind. */ while (bol > gs->buf && cur > from) { - char *next_bol = bol; - char *eol = --bol; + const char *next_bol = bol; + const char *eol = --bol; while (bol > gs->buf && bol[-1] != '\n') bol--; @@ -1556,11 +1407,11 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs, /* Back forward. */ while (cur < lno) { - char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; + const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-'; while (*eol != '\n') eol++; - show_line(opt, bol, eol, gs->name, cur, sign); + show_line(opt, bol, eol, gs->name, cur, 0, sign); bol = eol + 1; cur++; } @@ -1570,7 +1421,7 @@ static int should_lookahead(struct grep_opt *opt) { struct grep_pat *p; - if (opt->extended) + if (opt->pattern_expression) return 0; /* punt for too complex stuff */ if (opt->invert) return 0; @@ -1584,12 +1435,12 @@ static int should_lookahead(struct grep_opt *opt) static int look_ahead(struct grep_opt *opt, unsigned long *left_p, unsigned *lno_p, - char **bol_p) + const char **bol_p) { unsigned lno = *lno_p; - char *bol = *bol_p; + const char *bol = *bol_p; struct grep_pat *p; - char *sp, *last_bol; + const char *sp, *last_bol; regoff_t earliest = -1; for (p = opt->pattern_list; p; p = p->next) { @@ -1622,7 +1473,8 @@ static int look_ahead(struct grep_opt *opt, return 0; } -static int fill_textconv_grep(struct userdiff_driver *driver, +static int fill_textconv_grep(struct repository *r, + struct userdiff_driver *driver, struct grep_source *gs) { struct diff_filespec *df; @@ -1643,20 +1495,27 @@ static int fill_textconv_grep(struct userdiff_driver *driver, fill_filespec(df, gs->identifier, 1, 0100644); break; case GREP_SOURCE_FILE: - fill_filespec(df, &null_oid, 0, 0100644); + fill_filespec(df, null_oid(), 0, 0100644); break; default: BUG("attempt to textconv something without a path?"); } /* - * fill_textconv is not remotely thread-safe; it may load objects - * behind the scenes, and it modifies the global diff tempfile - * structure. + * fill_textconv is not remotely thread-safe; it modifies the global + * diff tempfile structure, writes to the_repo's odb and might + * internally call thread-unsafe functions such as the + * prepare_packed_git() lazy-initializator. Because of the last two, we + * must ensure mutual exclusion between this call and the object reading + * API, thus we use obj_read_lock() here. + * + * TODO: allowing text conversion to run in parallel with object + * reading operations might increase performance in the multithreaded + * non-worktreee git-grep with --textconv. */ - grep_read_lock(); - size = fill_textconv(driver, df, &buf); - grep_read_unlock(); + obj_read_lock(); + size = fill_textconv(r, driver, df, &buf); + obj_read_unlock(); free_filespec(df); /* @@ -1683,8 +1542,8 @@ static int is_empty_line(const char *bol, const char *eol) static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits) { - char *bol; - char *peek_bol = NULL; + const char *bol; + const char *peek_bol = NULL; unsigned long left; unsigned lno = 1; unsigned last_hit = 0; @@ -1696,6 +1555,10 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle enum grep_context ctx = GREP_CONTEXT_HEAD; xdemitconf_t xecfg; + if (!opt->status_only && gs->name == NULL) + BUG("grep call which could print a name requires " + "grep_source.name be non-NULL"); + if (!opt->output) opt->output = std_output; @@ -1715,13 +1578,18 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle opt->last_shown = 0; if (opt->allow_textconv) { - grep_source_load_driver(gs); + grep_source_load_driver(gs, opt->repo->index); /* * We might set up the shared textconv cache data here, which - * is not thread-safe. + * is not thread-safe. Also, get_oid_with_context() and + * parse_object() might be internally called. As they are not + * currently thread-safe and might be racy with object reading, + * obj_read_lock() must be called. */ grep_attr_lock(); - textconv = userdiff_get_textconv(gs->driver); + obj_read_lock(); + textconv = userdiff_get_textconv(opt->repo, gs->driver); + obj_read_unlock(); grep_attr_unlock(); } @@ -1732,11 +1600,11 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle if (!textconv) { switch (opt->binary) { case GREP_BINARY_DEFAULT: - if (grep_source_is_binary(gs)) + if (grep_source_is_binary(gs, opt->repo->index)) binary_match_only = 1; break; case GREP_BINARY_NOMATCH: - if (grep_source_is_binary(gs)) + if (grep_source_is_binary(gs, opt->repo->index)) return 0; /* Assume unmatch */ break; case GREP_BINARY_TEXT: @@ -1751,14 +1619,16 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle try_lookahead = should_lookahead(opt); - if (fill_textconv_grep(textconv, gs) < 0) + if (fill_textconv_grep(opt->repo, textconv, gs) < 0) return 0; bol = gs->buf; left = gs->size; while (left) { - char *eol, ch; + const char *eol; int hit; + ssize_t cno; + ssize_t col = -1, icol = -1; /* * look_ahead() skips quickly to the line that possibly @@ -1776,14 +1646,11 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle && look_ahead(opt, &left, &lno, &bol)) break; eol = end_of_line(bol, &left); - ch = *eol; - *eol = 0; if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol)) ctx = GREP_CONTEXT_BODY; - hit = match_line(opt, bol, eol, ctx, collect_hits); - *eol = ch; + hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits); if (collect_hits) goto next_line; @@ -1799,7 +1666,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle return 0; goto next_line; } - if (hit) { + if (hit && (opt->max_count < 0 || count < opt->max_count)) { count++; if (opt->status_only) return 1; @@ -1823,7 +1690,18 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle show_pre_context(opt, gs, bol, eol, lno); else if (opt->funcname) show_funcname_line(opt, gs, bol, lno); - show_line(opt, bol, eol, gs->name, lno, ':'); + cno = opt->invert ? icol : col; + if (cno < 0) { + /* + * A negative cno indicates that there was no + * match on the line. We are thus inverted and + * being asked to show all lines that _don't_ + * match a given expression. Therefore, set cno + * to 0 to suggest the whole line matches. + */ + cno = 0; + } + show_line(opt, bol, eol, gs->name, lno, cno + 1, ':'); last_hit = lno; if (opt->funcbody) show_function = 1; @@ -1831,7 +1709,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle } if (show_function && (!peek_bol || peek_bol < bol)) { unsigned long peek_left = left; - char *peek_eol = eol; + const char *peek_eol = eol; /* * Trailing empty lines are not interesting. @@ -1852,7 +1730,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle /* If the last hit is within the post context, * we need to show this line. */ - show_line(opt, bol, eol, gs->name, lno, '-'); + show_line(opt, bol, eol, gs->name, lno, col + 1, '-'); } next_line: @@ -1928,29 +1806,43 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs) * we do not have to do the two-pass grep when we do not check * buffer-wide "all-match". */ - if (!opt->all_match) + if (!opt->all_match && !opt->no_body_match) return grep_source_1(opt, gs, 0); /* Otherwise the toplevel "or" terms hit a bit differently. * We first clear hit markers from them. */ clr_hit_marker(opt->pattern_expression); + opt->body_hit = 0; grep_source_1(opt, gs, 1); - if (!chk_hit_marker(opt->pattern_expression)) + if (opt->all_match && !chk_hit_marker(opt->pattern_expression)) + return 0; + if (opt->no_body_match && opt->body_hit) return 0; return grep_source_1(opt, gs, 0); } -int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) +static void grep_source_init_buf(struct grep_source *gs, + const char *buf, + unsigned long size) +{ + gs->type = GREP_SOURCE_BUF; + gs->name = NULL; + gs->path = NULL; + gs->buf = buf; + gs->size = size; + gs->driver = NULL; + gs->identifier = NULL; +} + +int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size) { struct grep_source gs; int r; - grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL); - gs.buf = buf; - gs.size = size; + grep_source_init_buf(&gs, buf, size); r = grep_source(opt, &gs); @@ -1958,28 +1850,30 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) return r; } -void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const char *path, - const void *identifier) +void grep_source_init_file(struct grep_source *gs, const char *name, + const char *path) { - gs->type = type; + gs->type = GREP_SOURCE_FILE; gs->name = xstrdup_or_null(name); gs->path = xstrdup_or_null(path); gs->buf = NULL; gs->size = 0; gs->driver = NULL; + gs->identifier = xstrdup(path); +} - switch (type) { - case GREP_SOURCE_FILE: - gs->identifier = xstrdup(identifier); - break; - case GREP_SOURCE_OID: - gs->identifier = oiddup(identifier); - break; - case GREP_SOURCE_BUF: - gs->identifier = NULL; - break; - } +void grep_source_init_oid(struct grep_source *gs, const char *name, + const char *path, const struct object_id *oid, + struct repository *repo) +{ + gs->type = GREP_SOURCE_OID; + gs->name = xstrdup_or_null(name); + gs->path = xstrdup_or_null(path); + gs->buf = NULL; + gs->size = 0; + gs->driver = NULL; + gs->identifier = oiddup(oid); + gs->repo = repo; } void grep_source_clear(struct grep_source *gs) @@ -1995,7 +1889,9 @@ void grep_source_clear_data(struct grep_source *gs) switch (gs->type) { case GREP_SOURCE_FILE: case GREP_SOURCE_OID: - FREE_AND_NULL(gs->buf); + /* these types own the buffer */ + free((char *)gs->buf); + gs->buf = NULL; gs->size = 0; break; case GREP_SOURCE_BUF: @@ -2008,10 +1904,8 @@ static int grep_source_load_oid(struct grep_source *gs) { enum object_type type; - grep_read_lock(); - gs->buf = read_object_file(gs->identifier, &type, &gs->size); - grep_read_unlock(); - + gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type, + &gs->size); if (!gs->buf) return error(_("'%s': unable to read %s"), gs->name, @@ -2069,22 +1963,24 @@ static int grep_source_load(struct grep_source *gs) BUG("invalid grep_source type to load"); } -void grep_source_load_driver(struct grep_source *gs) +void grep_source_load_driver(struct grep_source *gs, + struct index_state *istate) { if (gs->driver) return; grep_attr_lock(); if (gs->path) - gs->driver = userdiff_find_by_path(gs->path); + gs->driver = userdiff_find_by_path(istate, gs->path); if (!gs->driver) gs->driver = userdiff_find_by_name("default"); grep_attr_unlock(); } -static int grep_source_is_binary(struct grep_source *gs) +static int grep_source_is_binary(struct grep_source *gs, + struct index_state *istate) { - grep_source_load_driver(gs); + grep_source_load_driver(gs, istate); if (gs->driver->binary != -1) return gs->driver->binary; |
