aboutsummaryrefslogtreecommitdiffstats
path: root/grep.c
diff options
context:
space:
mode:
Diffstat (limited to 'grep.c')
-rw-r--r--grep.c1174
1 files changed, 535 insertions, 639 deletions
diff --git a/grep.c b/grep.c
index 7e2f440955..68e9328dfd 100644
--- a/grep.c
+++ b/grep.c
@@ -1,6 +1,7 @@
#include "cache.h"
#include "config.h"
#include "grep.h"
+#include "hex.h"
#include "object-store.h"
#include "userdiff.h"
#include "xdiff-interface.h"
@@ -11,62 +12,26 @@
#include "help.h"
static int grep_source_load(struct grep_source *gs);
-static int grep_source_is_binary(struct grep_source *gs);
+static int grep_source_is_binary(struct grep_source *gs,
+ struct index_state *istate);
-static struct grep_opt grep_defaults;
+static void std_output(struct grep_opt *opt, const void *buf, size_t size)
+{
+ fwrite(buf, size, 1, stdout);
+}
static const char *color_grep_slots[] = {
[GREP_COLOR_CONTEXT] = "context",
[GREP_COLOR_FILENAME] = "filename",
[GREP_COLOR_FUNCTION] = "function",
[GREP_COLOR_LINENO] = "lineNumber",
+ [GREP_COLOR_COLUMNNO] = "column",
[GREP_COLOR_MATCH_CONTEXT] = "matchContext",
[GREP_COLOR_MATCH_SELECTED] = "matchSelected",
[GREP_COLOR_SELECTED] = "selected",
[GREP_COLOR_SEP] = "separator",
};
-static void std_output(struct grep_opt *opt, const void *buf, size_t size)
-{
- fwrite(buf, size, 1, stdout);
-}
-
-static void color_set(char *dst, const char *color_bytes)
-{
- xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
-}
-
-/*
- * Initialize the grep_defaults template with hardcoded defaults.
- * We could let the compiler do this, but without C99 initializers
- * the code gets unwieldy and unreadable, so...
- */
-void init_grep_defaults(void)
-{
- struct grep_opt *opt = &grep_defaults;
- static int run_once;
-
- if (run_once)
- return;
- run_once++;
-
- memset(opt, 0, sizeof(*opt));
- opt->relative = 1;
- opt->pathname = 1;
- opt->max_depth = -1;
- opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
- color_set(opt->colors[GREP_COLOR_CONTEXT], "");
- color_set(opt->colors[GREP_COLOR_FILENAME], "");
- color_set(opt->colors[GREP_COLOR_FUNCTION], "");
- color_set(opt->colors[GREP_COLOR_LINENO], "");
- color_set(opt->colors[GREP_COLOR_MATCH_CONTEXT], GIT_COLOR_BOLD_RED);
- color_set(opt->colors[GREP_COLOR_MATCH_SELECTED], GIT_COLOR_BOLD_RED);
- color_set(opt->colors[GREP_COLOR_SELECTED], "");
- color_set(opt->colors[GREP_COLOR_SEP], GIT_COLOR_CYAN);
- opt->color = -1;
- opt->output = std_output;
-}
-
static int parse_pattern_type_arg(const char *opt, const char *arg)
{
if (!strcmp(arg, "default"))
@@ -90,7 +55,7 @@ define_list_config_array_extra(color_grep_slots, {"match"});
*/
int grep_config(const char *var, const char *value, void *cb)
{
- struct grep_opt *opt = &grep_defaults;
+ struct grep_opt *opt = cb;
const char *slot;
if (userdiff_config(var, value) < 0)
@@ -110,6 +75,10 @@ int grep_config(const char *var, const char *value, void *cb)
opt->linenum = git_config_bool(var, value);
return 0;
}
+ if (!strcmp(var, "grep.column")) {
+ opt->columnnum = git_config_bool(var, value);
+ return 0;
+ }
if (!strcmp(var, "grep.fullname")) {
opt->relative = !git_config_bool(var, value);
@@ -137,99 +106,14 @@ int grep_config(const char *var, const char *value, void *cb)
return 0;
}
-/*
- * Initialize one instance of grep_opt and copy the
- * default values from the template we read the configuration
- * information in an earlier call to git_config(grep_config).
- */
-void grep_init(struct grep_opt *opt, const char *prefix)
+void grep_init(struct grep_opt *opt, struct repository *repo)
{
- struct grep_opt *def = &grep_defaults;
- int i;
+ struct grep_opt blank = GREP_OPT_INIT;
+ memcpy(opt, &blank, sizeof(*opt));
- memset(opt, 0, sizeof(*opt));
- opt->prefix = prefix;
- opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
+ opt->repo = repo;
opt->pattern_tail = &opt->pattern_list;
opt->header_tail = &opt->header_list;
-
- opt->color = def->color;
- opt->extended_regexp_option = def->extended_regexp_option;
- opt->pattern_type_option = def->pattern_type_option;
- opt->linenum = def->linenum;
- opt->max_depth = def->max_depth;
- opt->pathname = def->pathname;
- opt->relative = def->relative;
- opt->output = def->output;
-
- for (i = 0; i < NR_GREP_COLORS; i++)
- color_set(opt->colors[i], def->colors[i]);
-}
-
-static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, struct grep_opt *opt)
-{
- /*
- * When committing to the pattern type by setting the relevant
- * fields in grep_opt it's generally not necessary to zero out
- * the fields we're not choosing, since they won't have been
- * set by anything. The extended_regexp_option field is the
- * only exception to this.
- *
- * This is because in the process of parsing grep.patternType
- * & grep.extendedRegexp we set opt->pattern_type_option and
- * opt->extended_regexp_option, respectively. We then
- * internally use opt->extended_regexp_option to see if we're
- * compiling an ERE. It must be unset if that's not actually
- * the case.
- */
- if (pattern_type != GREP_PATTERN_TYPE_ERE &&
- opt->extended_regexp_option)
- opt->extended_regexp_option = 0;
-
- switch (pattern_type) {
- case GREP_PATTERN_TYPE_UNSPECIFIED:
- /* fall through */
-
- case GREP_PATTERN_TYPE_BRE:
- break;
-
- case GREP_PATTERN_TYPE_ERE:
- opt->extended_regexp_option = 1;
- break;
-
- case GREP_PATTERN_TYPE_FIXED:
- opt->fixed = 1;
- break;
-
- case GREP_PATTERN_TYPE_PCRE:
-#ifdef USE_LIBPCRE2
- opt->pcre2 = 1;
-#else
- /*
- * It's important that pcre1 always be assigned to
- * even when there's no USE_LIBPCRE* defined. We still
- * call the PCRE stub function, it just dies with
- * "cannot use Perl-compatible regexes[...]".
- */
- opt->pcre1 = 1;
-#endif
- break;
- }
-}
-
-void grep_commit_pattern_type(enum grep_pattern_type pattern_type, struct grep_opt *opt)
-{
- if (pattern_type != GREP_PATTERN_TYPE_UNSPECIFIED)
- grep_set_pattern_type_option(pattern_type, opt);
- else if (opt->pattern_type_option != GREP_PATTERN_TYPE_UNSPECIFIED)
- grep_set_pattern_type_option(opt->pattern_type_option, opt);
- else if (opt->extended_regexp_option)
- /*
- * This branch *must* happen after setting from the
- * opt->pattern_type_option above, we don't want
- * grep.extendedRegexp to override grep.patternType!
- */
- grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
}
static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
@@ -356,152 +240,99 @@ static int is_fixed(const char *s, size_t len)
return 1;
}
-static int has_null(const char *s, size_t len)
-{
- /*
- * regcomp cannot accept patterns with NULs so when using it
- * we consider any pattern containing a NUL fixed.
- */
- if (memchr(s, 0, len))
- return 1;
-
- return 0;
-}
+#ifdef USE_LIBPCRE2
+#define GREP_PCRE2_DEBUG_MALLOC 0
-#ifdef USE_LIBPCRE1
-static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
+static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
{
- const char *error;
- int erroffset;
- int options = PCRE_MULTILINE;
-
- if (opt->ignore_case) {
- if (has_non_ascii(p->pattern))
- p->pcre1_tables = pcre_maketables();
- options |= PCRE_CASELESS;
- }
- if (is_utf8_locale() && has_non_ascii(p->pattern))
- options |= PCRE_UTF8;
-
- p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
- p->pcre1_tables);
- if (!p->pcre1_regexp)
- compile_regexp_failed(p, error);
-
- p->pcre1_extra_info = pcre_study(p->pcre1_regexp, GIT_PCRE_STUDY_JIT_COMPILE, &error);
- if (!p->pcre1_extra_info && error)
- die("%s", error);
-
-#ifdef GIT_PCRE1_USE_JIT
- pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
- if (p->pcre1_jit_on == 1) {
- p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
- if (!p->pcre1_jit_stack)
- die("Couldn't allocate PCRE JIT stack");
- pcre_assign_jit_stack(p->pcre1_extra_info, NULL, p->pcre1_jit_stack);
- } else if (p->pcre1_jit_on != 0) {
- BUG("The pcre1_jit_on variable should be 0 or 1, not %d",
- p->pcre1_jit_on);
- }
+ void *pointer = malloc(size);
+#if GREP_PCRE2_DEBUG_MALLOC
+ static int count = 1;
+ fprintf(stderr, "PCRE2:%p -> #%02d: alloc(%lu)\n", pointer, count++, size);
#endif
+ return pointer;
}
-static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
- regmatch_t *match, int eflags)
+static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
{
- int ovector[30], ret, flags = 0;
-
- if (eflags & REG_NOTBOL)
- flags |= PCRE_NOTBOL;
-
-#ifdef GIT_PCRE1_USE_JIT
- if (p->pcre1_jit_on) {
- ret = pcre_jit_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
- eol - line, 0, flags, ovector,
- ARRAY_SIZE(ovector), p->pcre1_jit_stack);
- } else
+#if GREP_PCRE2_DEBUG_MALLOC
+ static int count = 1;
+ if (pointer)
+ fprintf(stderr, "PCRE2:%p -> #%02d: free()\n", pointer, count++);
#endif
- {
- ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
- eol - line, 0, flags, ovector,
- ARRAY_SIZE(ovector));
- }
-
- if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
- die("pcre_exec failed with error code %d", ret);
- if (ret > 0) {
- ret = 0;
- match->rm_so = ovector[0];
- match->rm_eo = ovector[1];
- }
-
- return ret;
+ free(pointer);
}
-static void free_pcre1_regexp(struct grep_pat *p)
-{
- pcre_free(p->pcre1_regexp);
-#ifdef GIT_PCRE1_USE_JIT
- if (p->pcre1_jit_on) {
- pcre_free_study(p->pcre1_extra_info);
- pcre_jit_stack_free(p->pcre1_jit_stack);
- } else
-#endif
- {
- pcre_free(p->pcre1_extra_info);
- }
- pcre_free((void *)p->pcre1_tables);
-}
-#else /* !USE_LIBPCRE1 */
-static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
+static int pcre2_jit_functional(void)
{
- die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
-}
+ static int jit_working = -1;
+ pcre2_code *code;
+ size_t off;
+ int err;
-static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
- regmatch_t *match, int eflags)
-{
- return 1;
-}
+ if (jit_working != -1)
+ return jit_working;
-static void free_pcre1_regexp(struct grep_pat *p)
-{
+ /*
+ * Try to JIT compile a simple pattern to probe if the JIT is
+ * working in general. It might fail for systems where creating
+ * memory mappings for runtime code generation is restricted.
+ */
+ code = pcre2_compile((PCRE2_SPTR)".", 1, 0, &err, &off, NULL);
+ if (!code)
+ return 0;
+
+ jit_working = pcre2_jit_compile(code, PCRE2_JIT_COMPLETE) == 0;
+ pcre2_code_free(code);
+
+ return jit_working;
}
-#endif /* !USE_LIBPCRE1 */
-#ifdef USE_LIBPCRE2
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
int error;
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
- const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
+ int literal = !opt->ignore_case && (p->fixed || p->is_fixed);
- assert(opt->pcre2);
-
- p->pcre2_compile_context = NULL;
+ /*
+ * Call pcre2_general_context_create() before calling any
+ * other pcre2_*(). It sets up our malloc()/free() functions
+ * with which everything else is allocated.
+ */
+ p->pcre2_general_context = pcre2_general_context_create(
+ pcre2_malloc, pcre2_free, NULL);
+ if (!p->pcre2_general_context)
+ die("Couldn't allocate PCRE2 general context");
if (opt->ignore_case) {
- if (has_non_ascii(p->pattern)) {
- character_tables = pcre2_maketables(NULL);
- p->pcre2_compile_context = pcre2_compile_context_create(NULL);
- pcre2_set_character_tables(p->pcre2_compile_context, character_tables);
+ if (!opt->ignore_locale && has_non_ascii(p->pattern)) {
+ p->pcre2_tables = pcre2_maketables(p->pcre2_general_context);
+ p->pcre2_compile_context = pcre2_compile_context_create(p->pcre2_general_context);
+ pcre2_set_character_tables(p->pcre2_compile_context,
+ p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
- if (is_utf8_locale() && has_non_ascii(p->pattern))
- options |= PCRE2_UTF;
+ if (!opt->ignore_locale && is_utf8_locale() && !literal)
+ options |= (PCRE2_UTF | PCRE2_UCP | PCRE2_MATCH_INVALID_UTF);
+
+#ifndef GIT_PCRE2_VERSION_10_36_OR_HIGHER
+ /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
+ if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS))
+ options |= PCRE2_NO_START_OPTIMIZE;
+#endif
p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
p->patternlen, options, &error, &erroffset,
p->pcre2_compile_context);
if (p->pcre2_pattern) {
- p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, NULL);
+ p->pcre2_match_data = pcre2_match_data_create_from_pattern(p->pcre2_pattern, p->pcre2_general_context);
if (!p->pcre2_match_data)
die("Couldn't allocate PCRE2 match data");
} else {
@@ -510,10 +341,31 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
}
pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
- if (p->pcre2_jit_on == 1) {
+ if (p->pcre2_jit_on) {
jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
- if (jitret)
- die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", p->pattern, jitret);
+ if (jitret == PCRE2_ERROR_NOMEMORY && !pcre2_jit_functional()) {
+ /*
+ * Even though pcre2_config(PCRE2_CONFIG_JIT, ...)
+ * indicated JIT support, the library might still
+ * fail to generate JIT code for various reasons,
+ * e.g. when SELinux's 'deny_execmem' or PaX's
+ * MPROTECT prevent creating W|X memory mappings.
+ *
+ * Instead of faling hard, fall back to interpreter
+ * mode, just as if the pattern was prefixed with
+ * '(*NO_JIT)'.
+ */
+ p->pcre2_jit_on = 0;
+ return;
+ } else if (jitret) {
+ int need_clip = p->patternlen > 64;
+ int clip_len = need_clip ? 64 : p->patternlen;
+ die("Couldn't JIT the PCRE2 pattern '%.*s'%s, got '%d'%s",
+ clip_len, p->pattern, need_clip ? "..." : "", jitret,
+ pcre2_jit_functional()
+ ? "\nPerhaps prefix (*NO_JIT) to your pattern?"
+ : "");
+ }
/*
* The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
@@ -537,17 +389,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
p->pcre2_jit_on = 0;
return;
}
-
- p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, NULL);
- if (!p->pcre2_jit_stack)
- die("Couldn't allocate PCRE2 JIT stack");
- p->pcre2_match_context = pcre2_match_context_create(NULL);
- if (!p->pcre2_match_context)
- die("Couldn't allocate PCRE2 match context");
- pcre2_jit_stack_assign(p->pcre2_match_context, NULL, p->pcre2_jit_stack);
- } else if (p->pcre2_jit_on != 0) {
- BUG("The pcre2_jit_on variable should be 0 or 1, not %d",
- p->pcre1_jit_on);
}
}
@@ -591,17 +432,16 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_compile_context_free(p->pcre2_compile_context);
pcre2_code_free(p->pcre2_pattern);
pcre2_match_data_free(p->pcre2_match_data);
- pcre2_jit_stack_free(p->pcre2_jit_stack);
- pcre2_match_context_free(p->pcre2_match_context);
+#ifdef GIT_PCRE2_VERSION_10_34_OR_HIGHER
+ pcre2_maketables_free(p->pcre2_general_context, p->pcre2_tables);
+#else
+ free((void *)p->pcre2_tables);
+#endif
+ pcre2_general_context_free(p->pcre2_general_context);
}
#else /* !USE_LIBPCRE2 */
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
- /*
- * Unreachable until USE_LIBPCRE2 becomes synonymous with
- * USE_LIBPCRE. See the sibling comment in
- * grep_set_pattern_type_option().
- */
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}
@@ -614,7 +454,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
static void free_pcre2_pattern(struct grep_pat *p)
{
}
-#endif /* !USE_LIBPCRE2 */
static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
{
@@ -626,8 +465,6 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
if (opt->ignore_case)
regflags |= REG_ICASE;
err = regcomp(&p->regexp, sb.buf, regflags);
- if (opt->debug)
- fprintf(stderr, "fixed %s\n", sb.buf);
strbuf_release(&sb);
if (err) {
char errbuf[1024];
@@ -635,62 +472,82 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
compile_regexp_failed(p, errbuf);
}
}
+#endif /* !USE_LIBPCRE2 */
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
- int ascii_only;
int err;
int regflags = REG_NEWLINE;
+ if (opt->pattern_type_option == GREP_PATTERN_TYPE_UNSPECIFIED)
+ opt->pattern_type_option = (opt->extended_regexp_option
+ ? GREP_PATTERN_TYPE_ERE
+ : GREP_PATTERN_TYPE_BRE);
+
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- ascii_only = !has_non_ascii(p->pattern);
+ p->fixed = opt->pattern_type_option == GREP_PATTERN_TYPE_FIXED;
- /*
- * Even when -F (fixed) asks us to do a non-regexp search, we
- * may not be able to correctly case-fold when -i
- * (ignore-case) is asked (in which case, we'll synthesize a
- * regexp to match the pattern that matches regexp special
- * characters literally, while ignoring case differences). On
- * the other hand, even without -F, if the pattern does not
- * have any regexp special characters and there is no need for
- * case-folding search, we can internally turn it into a
- * simple string match using kws. p->fixed tells us if we
- * want to use kws.
- */
- if (opt->fixed ||
- has_null(p->pattern, p->patternlen) ||
- is_fixed(p->pattern, p->patternlen))
- p->fixed = !p->ignore_case || ascii_only;
-
- if (p->fixed) {
- p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
- kwsincr(p->kws, p->pattern, p->patternlen);
- kwsprep(p->kws);
- return;
- } else if (opt->fixed) {
- /*
- * We come here when the pattern has the non-ascii
- * characters we cannot case-fold, and asked to
- * ignore-case.
- */
+ if (opt->pattern_type_option != GREP_PATTERN_TYPE_PCRE &&
+ memchr(p->pattern, 0, p->patternlen))
+ die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
+
+ p->is_fixed = is_fixed(p->pattern, p->patternlen);
+#ifdef USE_LIBPCRE2
+ if (!p->fixed && !p->is_fixed) {
+ const char *no_jit = "(*NO_JIT)";
+ const int no_jit_len = strlen(no_jit);
+ if (starts_with(p->pattern, no_jit) &&
+ is_fixed(p->pattern + no_jit_len,
+ p->patternlen - no_jit_len))
+ p->is_fixed = 1;
+ }
+#endif
+ if (p->fixed || p->is_fixed) {
+#ifdef USE_LIBPCRE2
+ if (p->is_fixed) {
+ compile_pcre2_pattern(p, opt);
+ } else {
+ /*
+ * E.g. t7811-grep-open.sh relies on the
+ * pattern being restored.
+ */
+ char *old_pattern = p->pattern;
+ size_t old_patternlen = p->patternlen;
+ struct strbuf sb = STRBUF_INIT;
+
+ /*
+ * There is the PCRE2_LITERAL flag, but it's
+ * only in PCRE v2 10.30 and later. Needing to
+ * ifdef our way around that and dealing with
+ * it + PCRE2_MULTILINE being an error is more
+ * complex than just quoting this ourselves.
+ */
+ strbuf_add(&sb, "\\Q", 2);
+ strbuf_add(&sb, p->pattern, p->patternlen);
+ strbuf_add(&sb, "\\E", 2);
+
+ p->pattern = sb.buf;
+ p->patternlen = sb.len;
+ compile_pcre2_pattern(p, opt);
+ p->pattern = old_pattern;
+ p->patternlen = old_patternlen;
+ strbuf_release(&sb);
+ }
+#else /* !USE_LIBPCRE2 */
compile_fixed_regexp(p, opt);
+#endif /* !USE_LIBPCRE2 */
return;
}
- if (opt->pcre2) {
+ if (opt->pattern_type_option == GREP_PATTERN_TYPE_PCRE) {
compile_pcre2_pattern(p, opt);
return;
}
- if (opt->pcre1) {
- compile_pcre1_regexp(p, opt);
- return;
- }
-
if (p->ignore_case)
regflags |= REG_ICASE;
- if (opt->extended_regexp_option)
+ if (opt->pattern_type_option == GREP_PATTERN_TYPE_ERE)
regflags |= REG_EXTENDED;
err = regcomp(&p->regexp, p->pattern, regflags);
if (err) {
@@ -700,6 +557,35 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
}
}
+static struct grep_expr *grep_not_expr(struct grep_expr *expr)
+{
+ struct grep_expr *z = xcalloc(1, sizeof(*z));
+ z->node = GREP_NODE_NOT;
+ z->u.unary = expr;
+ return z;
+}
+
+static struct grep_expr *grep_binexp(enum grep_expr_node kind,
+ struct grep_expr *left,
+ struct grep_expr *right)
+{
+ struct grep_expr *z = xcalloc(1, sizeof(*z));
+ z->node = kind;
+ z->u.binary.left = left;
+ z->u.binary.right = right;
+ return z;
+}
+
+static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
+{
+ return grep_binexp(GREP_NODE_OR, left, right);
+}
+
+static struct grep_expr *grep_and_expr(struct grep_expr *left, struct grep_expr *right)
+{
+ return grep_binexp(GREP_NODE_AND, left, right);
+}
+
static struct grep_expr *compile_pattern_or(struct grep_pat **);
static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
{
@@ -713,7 +599,7 @@ static struct grep_expr *compile_pattern_atom(struct grep_pat **list)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- x = xcalloc(1, sizeof (struct grep_expr));
+ CALLOC_ARRAY(x, 1);
x->node = GREP_NODE_ATOM;
x->u.atom = p;
*list = p->next;
@@ -743,12 +629,10 @@ static struct grep_expr *compile_pattern_not(struct grep_pat **list)
if (!p->next)
die("--not not followed by pattern expression");
*list = p->next;
- x = xcalloc(1, sizeof (struct grep_expr));
- x->node = GREP_NODE_NOT;
- x->u.unary = compile_pattern_not(list);
- if (!x->u.unary)
+ x = compile_pattern_not(list);
+ if (!x)
die("--not followed by non pattern expression");
- return x;
+ return grep_not_expr(x);
default:
return compile_pattern_atom(list);
}
@@ -757,22 +641,20 @@ static struct grep_expr *compile_pattern_not(struct grep_pat **list)
static struct grep_expr *compile_pattern_and(struct grep_pat **list)
{
struct grep_pat *p;
- struct grep_expr *x, *y, *z;
+ struct grep_expr *x, *y;
x = compile_pattern_not(list);
p = *list;
if (p && p->token == GREP_AND) {
+ if (!x)
+ die("--and not preceded by pattern expression");
if (!p->next)
die("--and not followed by pattern expression");
*list = p->next;
y = compile_pattern_and(list);
if (!y)
die("--and not followed by pattern expression");
- z = xcalloc(1, sizeof (struct grep_expr));
- z->node = GREP_NODE_AND;
- z->u.binary.left = x;
- z->u.binary.right = y;
- return z;
+ return grep_and_expr(x, y);
}
return x;
}
@@ -780,7 +662,7 @@ static struct grep_expr *compile_pattern_and(struct grep_pat **list)
static struct grep_expr *compile_pattern_or(struct grep_pat **list)
{
struct grep_pat *p;
- struct grep_expr *x, *y, *z;
+ struct grep_expr *x, *y;
x = compile_pattern_and(list);
p = *list;
@@ -788,11 +670,7 @@ static struct grep_expr *compile_pattern_or(struct grep_pat **list)
y = compile_pattern_or(list);
if (!y)
die("not a pattern expression %s", p->pattern);
- z = xcalloc(1, sizeof (struct grep_expr));
- z->node = GREP_NODE_OR;
- z->u.binary.left = x;
- z->u.binary.right = y;
- return z;
+ return grep_or_expr(x, y);
}
return x;
}
@@ -802,87 +680,6 @@ static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
return compile_pattern_or(list);
}
-static void indent(int in)
-{
- while (in-- > 0)
- fputc(' ', stderr);
-}
-
-static void dump_grep_pat(struct grep_pat *p)
-{
- switch (p->token) {
- case GREP_AND: fprintf(stderr, "*and*"); break;
- case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
- case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
- case GREP_NOT: fprintf(stderr, "*not*"); break;
- case GREP_OR: fprintf(stderr, "*or*"); break;
-
- case GREP_PATTERN: fprintf(stderr, "pattern"); break;
- case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
- case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
- }
-
- switch (p->token) {
- default: break;
- case GREP_PATTERN_HEAD:
- fprintf(stderr, "<head %d>", p->field); break;
- case GREP_PATTERN_BODY:
- fprintf(stderr, "<body>"); break;
- }
- switch (p->token) {
- default: break;
- case GREP_PATTERN_HEAD:
- case GREP_PATTERN_BODY:
- case GREP_PATTERN:
- fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
- break;
- }
- fputc('\n', stderr);
-}
-
-static void dump_grep_expression_1(struct grep_expr *x, int in)
-{
- indent(in);
- switch (x->node) {
- case GREP_NODE_TRUE:
- fprintf(stderr, "true\n");
- break;
- case GREP_NODE_ATOM:
- dump_grep_pat(x->u.atom);
- break;
- case GREP_NODE_NOT:
- fprintf(stderr, "(not\n");
- dump_grep_expression_1(x->u.unary, in+1);
- indent(in);
- fprintf(stderr, ")\n");
- break;
- case GREP_NODE_AND:
- fprintf(stderr, "(and\n");
- dump_grep_expression_1(x->u.binary.left, in+1);
- dump_grep_expression_1(x->u.binary.right, in+1);
- indent(in);
- fprintf(stderr, ")\n");
- break;
- case GREP_NODE_OR:
- fprintf(stderr, "(or\n");
- dump_grep_expression_1(x->u.binary.left, in+1);
- dump_grep_expression_1(x->u.binary.right, in+1);
- indent(in);
- fprintf(stderr, ")\n");
- break;
- }
-}
-
-static void dump_grep_expression(struct grep_opt *opt)
-{
- struct grep_expr *x = opt->pattern_expression;
-
- if (opt->all_match)
- fprintf(stderr, "[all-match]\n");
- dump_grep_expression_1(x, 0);
- fflush(NULL);
-}
-
static struct grep_expr *grep_true_expr(void)
{
struct grep_expr *z = xcalloc(1, sizeof(*z));
@@ -890,15 +687,6 @@ static struct grep_expr *grep_true_expr(void)
return z;
}
-static struct grep_expr *grep_or_expr(struct grep_expr *left, struct grep_expr *right)
-{
- struct grep_expr *z = xcalloc(1, sizeof(*z));
- z->node = GREP_NODE_OR;
- z->u.binary.left = left;
- z->u.binary.right = right;
- return z;
-}
-
static struct grep_expr *prep_header_patterns(struct grep_opt *opt)
{
struct grep_pat *p;
@@ -963,10 +751,11 @@ static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y
return z;
}
-static void compile_grep_patterns_real(struct grep_opt *opt)
+void compile_grep_patterns(struct grep_opt *opt)
{
struct grep_pat *p;
struct grep_expr *header_expr = prep_header_patterns(opt);
+ int extended = 0;
for (p = opt->pattern_list; p; p = p->next) {
switch (p->token) {
@@ -976,14 +765,14 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
compile_regexp(p, opt);
break;
default:
- opt->extended = 1;
+ extended = 1;
break;
}
}
- if (opt->all_match || header_expr)
- opt->extended = 1;
- else if (!opt->extended && !opt->debug)
+ if (opt->all_match || opt->no_body_match || header_expr)
+ extended = 1;
+ else if (!extended)
return;
p = opt->pattern_list;
@@ -992,6 +781,9 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
if (p)
die("incomplete pattern expression: %s", p->pattern);
+ if (opt->no_body_match && opt->pattern_expression)
+ opt->pattern_expression = grep_not_expr(opt->pattern_expression);
+
if (!header_expr)
return;
@@ -1006,13 +798,6 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
opt->all_match = 1;
}
-void compile_grep_patterns(struct grep_opt *opt)
-{
- compile_grep_patterns_real(opt);
- if (opt->debug)
- dump_grep_expression(opt);
-}
-
static void free_pattern_expr(struct grep_expr *x)
{
switch (x->node) {
@@ -1031,21 +816,17 @@ static void free_pattern_expr(struct grep_expr *x)
free(x);
}
-void free_grep_patterns(struct grep_opt *opt)
+static void free_grep_pat(struct grep_pat *pattern)
{
struct grep_pat *p, *n;
- for (p = opt->pattern_list; p; p = n) {
+ for (p = pattern; p; p = n) {
n = p->next;
switch (p->token) {
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- if (p->kws)
- kwsfree(p->kws);
- else if (p->pcre1_regexp)
- free_pcre1_regexp(p);
- else if (p->pcre2_pattern)
+ if (p->pcre2_pattern)
free_pcre2_pattern(p);
else
regfree(&p->regexp);
@@ -1056,13 +837,18 @@ void free_grep_patterns(struct grep_opt *opt)
}
free(p);
}
+}
- if (!opt->extended)
- return;
- free_pattern_expr(opt->pattern_expression);
+void free_grep_patterns(struct grep_opt *opt)
+{
+ free_grep_pat(opt->pattern_list);
+ free_grep_pat(opt->header_list);
+
+ if (opt->pattern_expression)
+ free_pattern_expr(opt->pattern_expression);
}
-static char *end_of_line(char *cp, unsigned long *left)
+static const char *end_of_line(const char *cp, unsigned long *left)
{
unsigned long l = *left;
while (l && *cp != '\n') {
@@ -1103,31 +889,13 @@ static void show_name(struct grep_opt *opt, const char *name)
opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
}
-static int fixmatch(struct grep_pat *p, char *line, char *eol,
- regmatch_t *match)
-{
- struct kwsmatch kwsm;
- size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
- if (offset == -1) {
- match->rm_so = match->rm_eo = -1;
- return REG_NOMATCH;
- } else {
- match->rm_so = offset;
- match->rm_eo = match->rm_so + kwsm.size[0];
- return 0;
- }
-}
-
-static int patmatch(struct grep_pat *p, char *line, char *eol,
+static int patmatch(struct grep_pat *p,
+ const char *line, const char *eol,
regmatch_t *match, int eflags)
{
int hit;
- if (p->fixed)
- hit = !fixmatch(p, line, eol, match);
- else if (p->pcre1_regexp)
- hit = !pcre1match(p, line, eol, match, eflags);
- else if (p->pcre2_pattern)
+ if (p->pcre2_pattern)
hit = !pcre2match(p, line, eol, match, eflags);
else
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,
@@ -1136,20 +904,16 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
return hit;
}
-static int strip_timestamp(char *bol, char **eol_p)
+static void strip_timestamp(const char *bol, const char **eol_p)
{
- char *eol = *eol_p;
- int ch;
+ const char *eol = *eol_p;
while (bol < --eol) {
if (*eol != '>')
continue;
*eol_p = ++eol;
- ch = *eol;
- *eol = '\0';
- return ch;
+ break;
}
- return 0;
}
static struct {
@@ -1161,37 +925,18 @@ static struct {
{ "reflog ", 7 },
};
-static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
- enum grep_context ctx,
- regmatch_t *pmatch, int eflags)
+static int headerless_match_one_pattern(struct grep_pat *p,
+ const char *bol, const char *eol,
+ enum grep_context ctx,
+ regmatch_t *pmatch, int eflags)
{
int hit = 0;
- int saved_ch = 0;
const char *start = bol;
if ((p->token != GREP_PATTERN) &&
((p->token == GREP_PATTERN_HEAD) != (ctx == GREP_CONTEXT_HEAD)))
return 0;
- if (p->token == GREP_PATTERN_HEAD) {
- const char *field;
- size_t len;
- assert(p->field < ARRAY_SIZE(header_field));
- field = header_field[p->field].field;
- len = header_field[p->field].len;
- if (strncmp(bol, field, len))
- return 0;
- bol += len;
- switch (p->field) {
- case GREP_HEADER_AUTHOR:
- case GREP_HEADER_COMMITTER:
- saved_ch = strip_timestamp(bol, &eol);
- break;
- default:
- break;
- }
- }
-
again:
hit = patmatch(p, bol, eol, pmatch, eflags);
@@ -1235,8 +980,6 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
goto again;
}
}
- if (p->token == GREP_PATTERN_HEAD && saved_ch)
- *eol = saved_ch;
if (hit) {
pmatch[0].rm_so += bol - start;
pmatch[0].rm_eo += bol - start;
@@ -1244,38 +987,96 @@ static int match_one_pattern(struct grep_pat *p, char *bol, char *eol,
return hit;
}
-static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
- enum grep_context ctx, int collect_hits)
+static int match_one_pattern(struct grep_pat *p,
+ const char *bol, const char *eol,
+ enum grep_context ctx, regmatch_t *pmatch,
+ int eflags)
+{
+ const char *field;
+ size_t len;
+
+ if (p->token == GREP_PATTERN_HEAD) {
+ assert(p->field < ARRAY_SIZE(header_field));
+ field = header_field[p->field].field;
+ len = header_field[p->field].len;
+ if (strncmp(bol, field, len))
+ return 0;
+ bol += len;
+
+ switch (p->field) {
+ case GREP_HEADER_AUTHOR:
+ case GREP_HEADER_COMMITTER:
+ strip_timestamp(bol, &eol);
+ break;
+ default:
+ break;
+ }
+ }
+
+ return headerless_match_one_pattern(p, bol, eol, ctx, pmatch, eflags);
+}
+
+
+static int match_expr_eval(struct grep_opt *opt, struct grep_expr *x,
+ const char *bol, const char *eol,
+ enum grep_context ctx, ssize_t *col,
+ ssize_t *icol, int collect_hits)
{
int h = 0;
- regmatch_t match;
- if (!x)
- die("Not a valid grep expression");
switch (x->node) {
case GREP_NODE_TRUE:
h = 1;
break;
case GREP_NODE_ATOM:
- h = match_one_pattern(x->u.atom, bol, eol, ctx, &match, 0);
+ {
+ regmatch_t tmp;
+ h = match_one_pattern(x->u.atom, bol, eol, ctx,
+ &tmp, 0);
+ if (h && (*col < 0 || tmp.rm_so < *col))
+ *col = tmp.rm_so;
+ }
+ if (x->u.atom->token == GREP_PATTERN_BODY)
+ opt->body_hit |= h;
break;
case GREP_NODE_NOT:
- h = !match_expr_eval(x->u.unary, bol, eol, ctx, 0);
+ /*
+ * Upon visiting a GREP_NODE_NOT, col and icol become swapped.
+ */
+ h = !match_expr_eval(opt, x->u.unary, bol, eol, ctx, icol, col,
+ 0);
break;
case GREP_NODE_AND:
- if (!match_expr_eval(x->u.binary.left, bol, eol, ctx, 0))
- return 0;
- h = match_expr_eval(x->u.binary.right, bol, eol, ctx, 0);
+ h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
+ icol, 0);
+ if (h || opt->columnnum) {
+ /*
+ * Don't short-circuit AND when given --column, since a
+ * NOT earlier in the tree may turn this into an OR. In
+ * this case, see the below comment.
+ */
+ h &= match_expr_eval(opt, x->u.binary.right, bol, eol,
+ ctx, col, icol, 0);
+ }
break;
case GREP_NODE_OR:
- if (!collect_hits)
- return (match_expr_eval(x->u.binary.left,
- bol, eol, ctx, 0) ||
- match_expr_eval(x->u.binary.right,
- bol, eol, ctx, 0));
- h = match_expr_eval(x->u.binary.left, bol, eol, ctx, 0);
- x->u.binary.left->hit |= h;
- h |= match_expr_eval(x->u.binary.right, bol, eol, ctx, 1);
+ if (!(collect_hits || opt->columnnum)) {
+ /*
+ * Don't short-circuit OR when given --column (or
+ * collecting hits) to ensure we don't skip a later
+ * child that would produce an earlier match.
+ */
+ return (match_expr_eval(opt, x->u.binary.left, bol, eol,
+ ctx, col, icol, 0) ||
+ match_expr_eval(opt, x->u.binary.right, bol,
+ eol, ctx, col, icol, 0));
+ }
+ h = match_expr_eval(opt, x->u.binary.left, bol, eol, ctx, col,
+ icol, 0);
+ if (collect_hits)
+ x->u.binary.left->hit |= h;
+ h |= match_expr_eval(opt, x->u.binary.right, bol, eol, ctx, col,
+ icol, collect_hits);
break;
default:
die("Unexpected node type (internal error) %d", x->node);
@@ -1285,37 +1086,56 @@ static int match_expr_eval(struct grep_expr *x, char *bol, char *eol,
return h;
}
-static int match_expr(struct grep_opt *opt, char *bol, char *eol,
- enum grep_context ctx, int collect_hits)
+static int match_expr(struct grep_opt *opt,
+ const char *bol, const char *eol,
+ enum grep_context ctx, ssize_t *col,
+ ssize_t *icol, int collect_hits)
{
struct grep_expr *x = opt->pattern_expression;
- return match_expr_eval(x, bol, eol, ctx, collect_hits);
+ return match_expr_eval(opt, x, bol, eol, ctx, col, icol, collect_hits);
}
-static int match_line(struct grep_opt *opt, char *bol, char *eol,
+static int match_line(struct grep_opt *opt,
+ const char *bol, const char *eol,
+ ssize_t *col, ssize_t *icol,
enum grep_context ctx, int collect_hits)
{
struct grep_pat *p;
- regmatch_t match;
+ int hit = 0;
- if (opt->extended)
- return match_expr(opt, bol, eol, ctx, collect_hits);
+ if (opt->pattern_expression)
+ return match_expr(opt, bol, eol, ctx, col, icol,
+ collect_hits);
/* we do not call with collect_hits without being extended */
for (p = opt->pattern_list; p; p = p->next) {
- if (match_one_pattern(p, bol, eol, ctx, &match, 0))
- return 1;
+ regmatch_t tmp;
+ if (match_one_pattern(p, bol, eol, ctx, &tmp, 0)) {
+ hit |= 1;
+ if (!opt->columnnum) {
+ /*
+ * Without --column, any single match on a line
+ * is enough to know that it needs to be
+ * printed. With --column, scan _all_ patterns
+ * to find the earliest.
+ */
+ break;
+ }
+ if (*col < 0 || tmp.rm_so < *col)
+ *col = tmp.rm_so;
+ }
}
- return 0;
+ return hit;
}
-static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
+static int match_next_pattern(struct grep_pat *p,
+ const char *bol, const char *eol,
enum grep_context ctx,
regmatch_t *pmatch, int eflags)
{
regmatch_t match;
- if (!match_one_pattern(p, bol, eol, ctx, &match, eflags))
+ if (!headerless_match_one_pattern(p, bol, eol, ctx, &match, eflags))
return 0;
if (match.rm_so < 0 || match.rm_eo < 0)
return 0;
@@ -1330,18 +1150,26 @@ static int match_next_pattern(struct grep_pat *p, char *bol, char *eol,
return 1;
}
-static int next_match(struct grep_opt *opt, char *bol, char *eol,
- enum grep_context ctx, regmatch_t *pmatch, int eflags)
+int grep_next_match(struct grep_opt *opt,
+ const char *bol, const char *eol,
+ enum grep_context ctx, regmatch_t *pmatch,
+ enum grep_header_field field, int eflags)
{
struct grep_pat *p;
int hit = 0;
pmatch->rm_so = pmatch->rm_eo = -1;
if (bol < eol) {
- for (p = opt->pattern_list; p; p = p->next) {
+ for (p = ((ctx == GREP_CONTEXT_HEAD)
+ ? opt->header_list : opt->pattern_list);
+ p; p = p->next) {
switch (p->token) {
- case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
+ if ((field != GREP_HEADER_FIELD_MAX) &&
+ (p->field != field))
+ continue;
+ /* fall thru */
+ case GREP_PATTERN: /* atom */
case GREP_PATTERN_BODY:
hit |= match_next_pattern(p, bol, eol, ctx,
pmatch, eflags);
@@ -1354,11 +1182,45 @@ static int next_match(struct grep_opt *opt, char *bol, char *eol,
return hit;
}
-static void show_line(struct grep_opt *opt, char *bol, char *eol,
- const char *name, unsigned lno, char sign)
+static void show_line_header(struct grep_opt *opt, const char *name,
+ unsigned lno, ssize_t cno, char sign)
+{
+ if (opt->heading && opt->last_shown == 0) {
+ output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
+ opt->output(opt, "\n", 1);
+ }
+ opt->last_shown = lno;
+
+ if (!opt->heading && opt->pathname) {
+ output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
+ output_sep(opt, sign);
+ }
+ if (opt->linenum) {
+ char buf[32];
+ xsnprintf(buf, sizeof(buf), "%d", lno);
+ output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
+ output_sep(opt, sign);
+ }
+ /*
+ * Treat 'cno' as the 1-indexed offset from the start of a non-context
+ * line to its first match. Otherwise, 'cno' is 0 indicating that we are
+ * being called with a context line.
+ */
+ if (opt->columnnum && cno) {
+ char buf[32];
+ xsnprintf(buf, sizeof(buf), "%"PRIuMAX, (uintmax_t)cno);
+ output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_COLUMNNO]);
+ output_sep(opt, sign);
+ }
+}
+
+static void show_line(struct grep_opt *opt,
+ const char *bol, const char *eol,
+ const char *name, unsigned lno, ssize_t cno, char sign)
{
int rest = eol - bol;
- const char *match_color, *line_color = NULL;
+ const char *match_color = NULL;
+ const char *line_color = NULL;
if (opt->file_break && opt->last_shown == 0) {
if (opt->show_hunk_mark)
@@ -1374,57 +1236,55 @@ static void show_line(struct grep_opt *opt, char *bol, char *eol,
opt->output(opt, "\n", 1);
}
}
- if (opt->heading && opt->last_shown == 0) {
- output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
- opt->output(opt, "\n", 1);
- }
- opt->last_shown = lno;
-
- if (!opt->heading && opt->pathname) {
- output_color(opt, name, strlen(name), opt->colors[GREP_COLOR_FILENAME]);
- output_sep(opt, sign);
- }
- if (opt->linenum) {
- char buf[32];
- xsnprintf(buf, sizeof(buf), "%d", lno);
- output_color(opt, buf, strlen(buf), opt->colors[GREP_COLOR_LINENO]);
- output_sep(opt, sign);
+ if (!opt->only_matching) {
+ /*
+ * In case the line we're being called with contains more than
+ * one match, leave printing each header to the loop below.
+ */
+ show_line_header(opt, name, lno, cno, sign);
}
- if (opt->color) {
+ if (opt->color || opt->only_matching) {
regmatch_t match;
enum grep_context ctx = GREP_CONTEXT_BODY;
- int ch = *eol;
int eflags = 0;
- if (sign == ':')
- match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
- else
- match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
- if (sign == ':')
- line_color = opt->colors[GREP_COLOR_SELECTED];
- else if (sign == '-')
- line_color = opt->colors[GREP_COLOR_CONTEXT];
- else if (sign == '=')
- line_color = opt->colors[GREP_COLOR_FUNCTION];
- *eol = '\0';
- while (next_match(opt, bol, eol, ctx, &match, eflags)) {
+ if (opt->color) {
+ if (sign == ':')
+ match_color = opt->colors[GREP_COLOR_MATCH_SELECTED];
+ else
+ match_color = opt->colors[GREP_COLOR_MATCH_CONTEXT];
+ if (sign == ':')
+ line_color = opt->colors[GREP_COLOR_SELECTED];
+ else if (sign == '-')
+ line_color = opt->colors[GREP_COLOR_CONTEXT];
+ else if (sign == '=')
+ line_color = opt->colors[GREP_COLOR_FUNCTION];
+ }
+ while (grep_next_match(opt, bol, eol, ctx, &match,
+ GREP_HEADER_FIELD_MAX, eflags)) {
if (match.rm_so == match.rm_eo)
break;
- output_color(opt, bol, match.rm_so, line_color);
+ if (opt->only_matching)
+ show_line_header(opt, name, lno, cno, sign);
+ else
+ output_color(opt, bol, match.rm_so, line_color);
output_color(opt, bol + match.rm_so,
match.rm_eo - match.rm_so, match_color);
+ if (opt->only_matching)
+ opt->output(opt, "\n", 1);
bol += match.rm_eo;
+ cno += match.rm_eo;
rest -= match.rm_eo;
eflags = REG_NOTBOL;
}
- *eol = ch;
}
- output_color(opt, bol, rest, line_color);
- opt->output(opt, "\n", 1);
+ if (!opt->only_matching) {
+ output_color(opt, bol, rest, line_color);
+ opt->output(opt, "\n", 1);
+ }
}
-#ifndef NO_PTHREADS
int grep_use_locks;
/*
@@ -1445,21 +1305,12 @@ static inline void grep_attr_unlock(void)
pthread_mutex_unlock(&grep_attr_mutex);
}
-/*
- * Same as git_attr_mutex, but protecting the thread-unsafe object db access.
- */
-pthread_mutex_t grep_read_mutex;
-
-#else
-#define grep_attr_lock()
-#define grep_attr_unlock()
-#endif
-
-static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bol, char *eol)
+static int match_funcname(struct grep_opt *opt, struct grep_source *gs,
+ const char *bol, const char *eol)
{
xdemitconf_t *xecfg = opt->priv;
if (xecfg && !xecfg->find_func) {
- grep_source_load_driver(gs);
+ grep_source_load_driver(gs, opt->repo->index);
if (gs->driver->funcname.pattern) {
const struct userdiff_funcname *pe = &gs->driver->funcname;
xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
@@ -1482,10 +1333,10 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo
}
static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
- char *bol, unsigned lno)
+ const char *bol, unsigned lno)
{
while (bol > gs->buf) {
- char *eol = --bol;
+ const char *eol = --bol;
while (bol > gs->buf && bol[-1] != '\n')
bol--;
@@ -1495,7 +1346,7 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
break;
if (match_funcname(opt, gs, bol, eol)) {
- show_line(opt, bol, eol, gs->name, lno, '=');
+ show_line(opt, bol, eol, gs->name, lno, 0, '=');
break;
}
}
@@ -1504,7 +1355,7 @@ static void show_funcname_line(struct grep_opt *opt, struct grep_source *gs,
static int is_empty_line(const char *bol, const char *eol);
static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
- char *bol, char *end, unsigned lno)
+ const char *bol, const char *end, unsigned lno)
{
unsigned cur = lno, from = 1, funcname_lno = 0, orig_from;
int funcname_needed = !!opt->funcname, comment_needed = 0;
@@ -1524,8 +1375,8 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
/* Rewind. */
while (bol > gs->buf && cur > from) {
- char *next_bol = bol;
- char *eol = --bol;
+ const char *next_bol = bol;
+ const char *eol = --bol;
while (bol > gs->buf && bol[-1] != '\n')
bol--;
@@ -1556,11 +1407,11 @@ static void show_pre_context(struct grep_opt *opt, struct grep_source *gs,
/* Back forward. */
while (cur < lno) {
- char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
+ const char *eol = bol, sign = (cur == funcname_lno) ? '=' : '-';
while (*eol != '\n')
eol++;
- show_line(opt, bol, eol, gs->name, cur, sign);
+ show_line(opt, bol, eol, gs->name, cur, 0, sign);
bol = eol + 1;
cur++;
}
@@ -1570,7 +1421,7 @@ static int should_lookahead(struct grep_opt *opt)
{
struct grep_pat *p;
- if (opt->extended)
+ if (opt->pattern_expression)
return 0; /* punt for too complex stuff */
if (opt->invert)
return 0;
@@ -1584,12 +1435,12 @@ static int should_lookahead(struct grep_opt *opt)
static int look_ahead(struct grep_opt *opt,
unsigned long *left_p,
unsigned *lno_p,
- char **bol_p)
+ const char **bol_p)
{
unsigned lno = *lno_p;
- char *bol = *bol_p;
+ const char *bol = *bol_p;
struct grep_pat *p;
- char *sp, *last_bol;
+ const char *sp, *last_bol;
regoff_t earliest = -1;
for (p = opt->pattern_list; p; p = p->next) {
@@ -1622,7 +1473,8 @@ static int look_ahead(struct grep_opt *opt,
return 0;
}
-static int fill_textconv_grep(struct userdiff_driver *driver,
+static int fill_textconv_grep(struct repository *r,
+ struct userdiff_driver *driver,
struct grep_source *gs)
{
struct diff_filespec *df;
@@ -1643,20 +1495,27 @@ static int fill_textconv_grep(struct userdiff_driver *driver,
fill_filespec(df, gs->identifier, 1, 0100644);
break;
case GREP_SOURCE_FILE:
- fill_filespec(df, &null_oid, 0, 0100644);
+ fill_filespec(df, null_oid(), 0, 0100644);
break;
default:
BUG("attempt to textconv something without a path?");
}
/*
- * fill_textconv is not remotely thread-safe; it may load objects
- * behind the scenes, and it modifies the global diff tempfile
- * structure.
+ * fill_textconv is not remotely thread-safe; it modifies the global
+ * diff tempfile structure, writes to the_repo's odb and might
+ * internally call thread-unsafe functions such as the
+ * prepare_packed_git() lazy-initializator. Because of the last two, we
+ * must ensure mutual exclusion between this call and the object reading
+ * API, thus we use obj_read_lock() here.
+ *
+ * TODO: allowing text conversion to run in parallel with object
+ * reading operations might increase performance in the multithreaded
+ * non-worktreee git-grep with --textconv.
*/
- grep_read_lock();
- size = fill_textconv(driver, df, &buf);
- grep_read_unlock();
+ obj_read_lock();
+ size = fill_textconv(r, driver, df, &buf);
+ obj_read_unlock();
free_filespec(df);
/*
@@ -1683,8 +1542,8 @@ static int is_empty_line(const char *bol, const char *eol)
static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
{
- char *bol;
- char *peek_bol = NULL;
+ const char *bol;
+ const char *peek_bol = NULL;
unsigned long left;
unsigned lno = 1;
unsigned last_hit = 0;
@@ -1696,6 +1555,10 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
enum grep_context ctx = GREP_CONTEXT_HEAD;
xdemitconf_t xecfg;
+ if (!opt->status_only && gs->name == NULL)
+ BUG("grep call which could print a name requires "
+ "grep_source.name be non-NULL");
+
if (!opt->output)
opt->output = std_output;
@@ -1715,13 +1578,18 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
opt->last_shown = 0;
if (opt->allow_textconv) {
- grep_source_load_driver(gs);
+ grep_source_load_driver(gs, opt->repo->index);
/*
* We might set up the shared textconv cache data here, which
- * is not thread-safe.
+ * is not thread-safe. Also, get_oid_with_context() and
+ * parse_object() might be internally called. As they are not
+ * currently thread-safe and might be racy with object reading,
+ * obj_read_lock() must be called.
*/
grep_attr_lock();
- textconv = userdiff_get_textconv(gs->driver);
+ obj_read_lock();
+ textconv = userdiff_get_textconv(opt->repo, gs->driver);
+ obj_read_unlock();
grep_attr_unlock();
}
@@ -1732,11 +1600,11 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
if (!textconv) {
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
- if (grep_source_is_binary(gs))
+ if (grep_source_is_binary(gs, opt->repo->index))
binary_match_only = 1;
break;
case GREP_BINARY_NOMATCH:
- if (grep_source_is_binary(gs))
+ if (grep_source_is_binary(gs, opt->repo->index))
return 0; /* Assume unmatch */
break;
case GREP_BINARY_TEXT:
@@ -1751,14 +1619,16 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
try_lookahead = should_lookahead(opt);
- if (fill_textconv_grep(textconv, gs) < 0)
+ if (fill_textconv_grep(opt->repo, textconv, gs) < 0)
return 0;
bol = gs->buf;
left = gs->size;
while (left) {
- char *eol, ch;
+ const char *eol;
int hit;
+ ssize_t cno;
+ ssize_t col = -1, icol = -1;
/*
* look_ahead() skips quickly to the line that possibly
@@ -1776,14 +1646,11 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
&& look_ahead(opt, &left, &lno, &bol))
break;
eol = end_of_line(bol, &left);
- ch = *eol;
- *eol = 0;
if ((ctx == GREP_CONTEXT_HEAD) && (eol == bol))
ctx = GREP_CONTEXT_BODY;
- hit = match_line(opt, bol, eol, ctx, collect_hits);
- *eol = ch;
+ hit = match_line(opt, bol, eol, &col, &icol, ctx, collect_hits);
if (collect_hits)
goto next_line;
@@ -1799,7 +1666,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
return 0;
goto next_line;
}
- if (hit) {
+ if (hit && (opt->max_count < 0 || count < opt->max_count)) {
count++;
if (opt->status_only)
return 1;
@@ -1823,7 +1690,18 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
show_pre_context(opt, gs, bol, eol, lno);
else if (opt->funcname)
show_funcname_line(opt, gs, bol, lno);
- show_line(opt, bol, eol, gs->name, lno, ':');
+ cno = opt->invert ? icol : col;
+ if (cno < 0) {
+ /*
+ * A negative cno indicates that there was no
+ * match on the line. We are thus inverted and
+ * being asked to show all lines that _don't_
+ * match a given expression. Therefore, set cno
+ * to 0 to suggest the whole line matches.
+ */
+ cno = 0;
+ }
+ show_line(opt, bol, eol, gs->name, lno, cno + 1, ':');
last_hit = lno;
if (opt->funcbody)
show_function = 1;
@@ -1831,7 +1709,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
}
if (show_function && (!peek_bol || peek_bol < bol)) {
unsigned long peek_left = left;
- char *peek_eol = eol;
+ const char *peek_eol = eol;
/*
* Trailing empty lines are not interesting.
@@ -1852,7 +1730,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
/* If the last hit is within the post context,
* we need to show this line.
*/
- show_line(opt, bol, eol, gs->name, lno, '-');
+ show_line(opt, bol, eol, gs->name, lno, col + 1, '-');
}
next_line:
@@ -1928,29 +1806,43 @@ int grep_source(struct grep_opt *opt, struct grep_source *gs)
* we do not have to do the two-pass grep when we do not check
* buffer-wide "all-match".
*/
- if (!opt->all_match)
+ if (!opt->all_match && !opt->no_body_match)
return grep_source_1(opt, gs, 0);
/* Otherwise the toplevel "or" terms hit a bit differently.
* We first clear hit markers from them.
*/
clr_hit_marker(opt->pattern_expression);
+ opt->body_hit = 0;
grep_source_1(opt, gs, 1);
- if (!chk_hit_marker(opt->pattern_expression))
+ if (opt->all_match && !chk_hit_marker(opt->pattern_expression))
+ return 0;
+ if (opt->no_body_match && opt->body_hit)
return 0;
return grep_source_1(opt, gs, 0);
}
-int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
+static void grep_source_init_buf(struct grep_source *gs,
+ const char *buf,
+ unsigned long size)
+{
+ gs->type = GREP_SOURCE_BUF;
+ gs->name = NULL;
+ gs->path = NULL;
+ gs->buf = buf;
+ gs->size = size;
+ gs->driver = NULL;
+ gs->identifier = NULL;
+}
+
+int grep_buffer(struct grep_opt *opt, const char *buf, unsigned long size)
{
struct grep_source gs;
int r;
- grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL);
- gs.buf = buf;
- gs.size = size;
+ grep_source_init_buf(&gs, buf, size);
r = grep_source(opt, &gs);
@@ -1958,28 +1850,30 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size)
return r;
}
-void grep_source_init(struct grep_source *gs, enum grep_source_type type,
- const char *name, const char *path,
- const void *identifier)
+void grep_source_init_file(struct grep_source *gs, const char *name,
+ const char *path)
{
- gs->type = type;
+ gs->type = GREP_SOURCE_FILE;
gs->name = xstrdup_or_null(name);
gs->path = xstrdup_or_null(path);
gs->buf = NULL;
gs->size = 0;
gs->driver = NULL;
+ gs->identifier = xstrdup(path);
+}
- switch (type) {
- case GREP_SOURCE_FILE:
- gs->identifier = xstrdup(identifier);
- break;
- case GREP_SOURCE_OID:
- gs->identifier = oiddup(identifier);
- break;
- case GREP_SOURCE_BUF:
- gs->identifier = NULL;
- break;
- }
+void grep_source_init_oid(struct grep_source *gs, const char *name,
+ const char *path, const struct object_id *oid,
+ struct repository *repo)
+{
+ gs->type = GREP_SOURCE_OID;
+ gs->name = xstrdup_or_null(name);
+ gs->path = xstrdup_or_null(path);
+ gs->buf = NULL;
+ gs->size = 0;
+ gs->driver = NULL;
+ gs->identifier = oiddup(oid);
+ gs->repo = repo;
}
void grep_source_clear(struct grep_source *gs)
@@ -1995,7 +1889,9 @@ void grep_source_clear_data(struct grep_source *gs)
switch (gs->type) {
case GREP_SOURCE_FILE:
case GREP_SOURCE_OID:
- FREE_AND_NULL(gs->buf);
+ /* these types own the buffer */
+ free((char *)gs->buf);
+ gs->buf = NULL;
gs->size = 0;
break;
case GREP_SOURCE_BUF:
@@ -2008,10 +1904,8 @@ static int grep_source_load_oid(struct grep_source *gs)
{
enum object_type type;
- grep_read_lock();
- gs->buf = read_object_file(gs->identifier, &type, &gs->size);
- grep_read_unlock();
-
+ gs->buf = repo_read_object_file(gs->repo, gs->identifier, &type,
+ &gs->size);
if (!gs->buf)
return error(_("'%s': unable to read %s"),
gs->name,
@@ -2069,22 +1963,24 @@ static int grep_source_load(struct grep_source *gs)
BUG("invalid grep_source type to load");
}
-void grep_source_load_driver(struct grep_source *gs)
+void grep_source_load_driver(struct grep_source *gs,
+ struct index_state *istate)
{
if (gs->driver)
return;
grep_attr_lock();
if (gs->path)
- gs->driver = userdiff_find_by_path(gs->path);
+ gs->driver = userdiff_find_by_path(istate, gs->path);
if (!gs->driver)
gs->driver = userdiff_find_by_name("default");
grep_attr_unlock();
}
-static int grep_source_is_binary(struct grep_source *gs)
+static int grep_source_is_binary(struct grep_source *gs,
+ struct index_state *istate)
{
- grep_source_load_driver(gs);
+ grep_source_load_driver(gs, istate);
if (gs->driver->binary != -1)
return gs->driver->binary;