aboutsummaryrefslogtreecommitdiffstats
path: root/grep.c
diff options
context:
space:
mode:
Diffstat (limited to 'grep.c')
-rw-r--r--grep.c332
1 files changed, 55 insertions, 277 deletions
diff --git a/grep.c b/grep.c
index 54af9f813e..aabfaaa4c3 100644
--- a/grep.c
+++ b/grep.c
@@ -14,7 +14,31 @@ static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs,
struct index_state *istate);
-static struct grep_opt grep_defaults;
+static void std_output(struct grep_opt *opt, const void *buf, size_t size)
+{
+ fwrite(buf, size, 1, stdout);
+}
+
+static struct grep_opt grep_defaults = {
+ .relative = 1,
+ .pathname = 1,
+ .max_depth = -1,
+ .pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED,
+ .colors = {
+ [GREP_COLOR_CONTEXT] = "",
+ [GREP_COLOR_FILENAME] = "",
+ [GREP_COLOR_FUNCTION] = "",
+ [GREP_COLOR_LINENO] = "",
+ [GREP_COLOR_COLUMNNO] = "",
+ [GREP_COLOR_MATCH_CONTEXT] = GIT_COLOR_BOLD_RED,
+ [GREP_COLOR_MATCH_SELECTED] = GIT_COLOR_BOLD_RED,
+ [GREP_COLOR_SELECTED] = "",
+ [GREP_COLOR_SEP] = GIT_COLOR_CYAN,
+ },
+ .only_matching = 0,
+ .color = -1,
+ .output = std_output,
+};
#ifdef USE_LIBPCRE2
static pcre2_general_context *pcre2_global_context;
@@ -42,50 +66,6 @@ static const char *color_grep_slots[] = {
[GREP_COLOR_SEP] = "separator",
};
-static void std_output(struct grep_opt *opt, const void *buf, size_t size)
-{
- fwrite(buf, size, 1, stdout);
-}
-
-static void color_set(char *dst, const char *color_bytes)
-{
- xsnprintf(dst, COLOR_MAXLEN, "%s", color_bytes);
-}
-
-/*
- * Initialize the grep_defaults template with hardcoded defaults.
- * We could let the compiler do this, but without C99 initializers
- * the code gets unwieldy and unreadable, so...
- */
-void init_grep_defaults(struct repository *repo)
-{
- struct grep_opt *opt = &grep_defaults;
- static int run_once;
-
- if (run_once)
- return;
- run_once++;
-
- memset(opt, 0, sizeof(*opt));
- opt->repo = repo;
- opt->relative = 1;
- opt->pathname = 1;
- opt->max_depth = -1;
- opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
- color_set(opt->colors[GREP_COLOR_CONTEXT], "");
- color_set(opt->colors[GREP_COLOR_FILENAME], "");
- color_set(opt->colors[GREP_COLOR_FUNCTION], "");
- color_set(opt->colors[GREP_COLOR_LINENO], "");
- color_set(opt->colors[GREP_COLOR_COLUMNNO], "");
- color_set(opt->colors[GREP_COLOR_MATCH_CONTEXT], GIT_COLOR_BOLD_RED);
- color_set(opt->colors[GREP_COLOR_MATCH_SELECTED], GIT_COLOR_BOLD_RED);
- color_set(opt->colors[GREP_COLOR_SELECTED], "");
- color_set(opt->colors[GREP_COLOR_SEP], GIT_COLOR_CYAN);
- opt->only_matching = 0;
- opt->color = -1;
- opt->output = std_output;
-}
-
static int parse_pattern_type_arg(const char *opt, const char *arg)
{
if (!strcmp(arg, "default"))
@@ -115,6 +95,14 @@ int grep_config(const char *var, const char *value, void *cb)
if (userdiff_config(var, value) < 0)
return -1;
+ /*
+ * The instance of grep_opt that we set up here is copied by
+ * grep_init() to be used by each individual invocation.
+ * When populating a new field of this structure here, be
+ * sure to think about ownership -- e.g., you might need to
+ * override the shallow copy in grep_init() with a deep copy.
+ */
+
if (!strcmp(var, "grep.extendedregexp")) {
opt->extended_regexp_option = git_config_bool(var, value);
return 0;
@@ -172,40 +160,19 @@ int grep_config(const char *var, const char *value, void *cb)
*/
void grep_init(struct grep_opt *opt, struct repository *repo, const char *prefix)
{
- struct grep_opt *def = &grep_defaults;
- int i;
-
#if defined(USE_LIBPCRE2)
if (!pcre2_global_context)
pcre2_global_context = pcre2_general_context_create(
pcre2_malloc, pcre2_free, NULL);
#endif
-#ifdef USE_LIBPCRE1
- pcre_malloc = malloc;
- pcre_free = free;
-#endif
+ *opt = grep_defaults;
- memset(opt, 0, sizeof(*opt));
opt->repo = repo;
opt->prefix = prefix;
opt->prefix_length = (prefix && *prefix) ? strlen(prefix) : 0;
opt->pattern_tail = &opt->pattern_list;
opt->header_tail = &opt->header_list;
-
- opt->only_matching = def->only_matching;
- opt->color = def->color;
- opt->extended_regexp_option = def->extended_regexp_option;
- opt->pattern_type_option = def->pattern_type_option;
- opt->linenum = def->linenum;
- opt->columnnum = def->columnnum;
- opt->max_depth = def->max_depth;
- opt->pathname = def->pathname;
- opt->relative = def->relative;
- opt->output = def->output;
-
- for (i = 0; i < NR_GREP_COLORS; i++)
- color_set(opt->colors[i], def->colors[i]);
}
void grep_destroy(void)
@@ -251,17 +218,7 @@ static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, st
break;
case GREP_PATTERN_TYPE_PCRE:
-#ifdef USE_LIBPCRE2
opt->pcre2 = 1;
-#else
- /*
- * It's important that pcre1 always be assigned to
- * even when there's no USE_LIBPCRE* defined. We still
- * call the PCRE stub function, it just dies with
- * "cannot use Perl-compatible regexes[...]".
- */
- opt->pcre1 = 1;
-#endif
break;
}
}
@@ -405,92 +362,6 @@ static int is_fixed(const char *s, size_t len)
return 1;
}
-#ifdef USE_LIBPCRE1
-static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
-{
- const char *error;
- int erroffset;
- int options = PCRE_MULTILINE;
- int study_options = 0;
-
- if (opt->ignore_case) {
- if (!opt->ignore_locale && has_non_ascii(p->pattern))
- p->pcre1_tables = pcre_maketables();
- options |= PCRE_CASELESS;
- }
- if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern))
- options |= PCRE_UTF8;
-
- p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
- p->pcre1_tables);
- if (!p->pcre1_regexp)
- compile_regexp_failed(p, error);
-
-#if defined(PCRE_CONFIG_JIT) && !defined(NO_LIBPCRE1_JIT)
- pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
- if (opt->debug)
- fprintf(stderr, "pcre1_jit_on=%d\n", p->pcre1_jit_on);
-
- if (p->pcre1_jit_on)
- study_options = PCRE_STUDY_JIT_COMPILE;
-#endif
-
- p->pcre1_extra_info = pcre_study(p->pcre1_regexp, study_options, &error);
- if (!p->pcre1_extra_info && error)
- die("%s", error);
-}
-
-static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
- regmatch_t *match, int eflags)
-{
- int ovector[30], ret, flags = PCRE_NO_UTF8_CHECK;
-
- if (eflags & REG_NOTBOL)
- flags |= PCRE_NOTBOL;
-
- ret = pcre_exec(p->pcre1_regexp, p->pcre1_extra_info, line,
- eol - line, 0, flags, ovector,
- ARRAY_SIZE(ovector));
-
- if (ret < 0 && ret != PCRE_ERROR_NOMATCH)
- die("pcre_exec failed with error code %d", ret);
- if (ret > 0) {
- ret = 0;
- match->rm_so = ovector[0];
- match->rm_eo = ovector[1];
- }
-
- return ret;
-}
-
-static void free_pcre1_regexp(struct grep_pat *p)
-{
- pcre_free(p->pcre1_regexp);
-#ifdef PCRE_CONFIG_JIT
- if (p->pcre1_jit_on)
- pcre_free_study(p->pcre1_extra_info);
- else
-#endif
- pcre_free(p->pcre1_extra_info);
- pcre_free((void *)p->pcre1_tables);
-}
-#else /* !USE_LIBPCRE1 */
-static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
-{
- die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
-}
-
-static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
- regmatch_t *match, int eflags)
-{
- return 1;
-}
-
-static void free_pcre1_regexp(struct grep_pat *p)
-{
-}
-#endif /* !USE_LIBPCRE1 */
-
#ifdef USE_LIBPCRE2
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
@@ -520,7 +391,23 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
}
if (!opt->ignore_locale && is_utf8_locale() && has_non_ascii(p->pattern) &&
!(!opt->ignore_case && (p->fixed || p->is_fixed)))
- options |= PCRE2_UTF;
+ options |= (PCRE2_UTF | PCRE2_MATCH_INVALID_UTF);
+
+ /* Work around https://bugs.exim.org/show_bug.cgi?id=2642 fixed in 10.36 */
+ if (PCRE2_MATCH_INVALID_UTF && options & (PCRE2_UTF | PCRE2_CASELESS)) {
+ struct strbuf buf;
+ int len;
+ int err;
+
+ if ((len = pcre2_config(PCRE2_CONFIG_VERSION, NULL)) < 0)
+ BUG("pcre2_config(..., NULL) failed: %d", len);
+ strbuf_init(&buf, len + 1);
+ if ((err = pcre2_config(PCRE2_CONFIG_VERSION, buf.buf)) < 0)
+ BUG("pcre2_config(..., buf.buf) failed: %d", err);
+ if (versioncmp(buf.buf, "10.36") < 0)
+ options |= PCRE2_NO_START_OPTIMIZE;
+ strbuf_release(&buf);
+ }
p->pcre2_pattern = pcre2_compile((PCRE2_SPTR)p->pattern,
p->patternlen, options, &error, &erroffset,
@@ -536,8 +423,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
}
pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
- if (opt->debug)
- fprintf(stderr, "pcre2_jit_on=%d\n", p->pcre2_jit_on);
if (p->pcre2_jit_on) {
jitret = pcre2_jit_compile(p->pcre2_pattern, PCRE2_JIT_COMPLETE);
if (jitret)
@@ -563,9 +448,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
BUG("pcre2_pattern_info() failed: %d", patinforet);
if (jitsizearg == 0) {
p->pcre2_jit_on = 0;
- if (opt->debug)
- fprintf(stderr, "pcre2_jit_on=%d: (*NO_JIT) in regex\n",
- p->pcre2_jit_on);
return;
}
}
@@ -616,11 +498,6 @@ static void free_pcre2_pattern(struct grep_pat *p)
#else /* !USE_LIBPCRE2 */
static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt)
{
- /*
- * Unreachable until USE_LIBPCRE2 becomes synonymous with
- * USE_LIBPCRE. See the sibling comment in
- * grep_set_pattern_type_option().
- */
die("cannot use Perl-compatible regexes when not compiled with USE_LIBPCRE");
}
@@ -644,8 +521,6 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
if (opt->ignore_case)
regflags |= REG_ICASE;
err = regcomp(&p->regexp, sb.buf, regflags);
- if (opt->debug)
- fprintf(stderr, "fixed %s\n", sb.buf);
strbuf_release(&sb);
if (err) {
char errbuf[1024];
@@ -721,11 +596,6 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
return;
}
- if (opt->pcre1) {
- compile_pcre1_regexp(p, opt);
- return;
- }
-
if (p->ignore_case)
regflags |= REG_ICASE;
if (opt->extended_regexp_option)
@@ -840,87 +710,6 @@ static struct grep_expr *compile_pattern_expr(struct grep_pat **list)
return compile_pattern_or(list);
}
-static void indent(int in)
-{
- while (in-- > 0)
- fputc(' ', stderr);
-}
-
-static void dump_grep_pat(struct grep_pat *p)
-{
- switch (p->token) {
- case GREP_AND: fprintf(stderr, "*and*"); break;
- case GREP_OPEN_PAREN: fprintf(stderr, "*(*"); break;
- case GREP_CLOSE_PAREN: fprintf(stderr, "*)*"); break;
- case GREP_NOT: fprintf(stderr, "*not*"); break;
- case GREP_OR: fprintf(stderr, "*or*"); break;
-
- case GREP_PATTERN: fprintf(stderr, "pattern"); break;
- case GREP_PATTERN_HEAD: fprintf(stderr, "pattern_head"); break;
- case GREP_PATTERN_BODY: fprintf(stderr, "pattern_body"); break;
- }
-
- switch (p->token) {
- default: break;
- case GREP_PATTERN_HEAD:
- fprintf(stderr, "<head %d>", p->field); break;
- case GREP_PATTERN_BODY:
- fprintf(stderr, "<body>"); break;
- }
- switch (p->token) {
- default: break;
- case GREP_PATTERN_HEAD:
- case GREP_PATTERN_BODY:
- case GREP_PATTERN:
- fprintf(stderr, "%.*s", (int)p->patternlen, p->pattern);
- break;
- }
- fputc('\n', stderr);
-}
-
-static void dump_grep_expression_1(struct grep_expr *x, int in)
-{
- indent(in);
- switch (x->node) {
- case GREP_NODE_TRUE:
- fprintf(stderr, "true\n");
- break;
- case GREP_NODE_ATOM:
- dump_grep_pat(x->u.atom);
- break;
- case GREP_NODE_NOT:
- fprintf(stderr, "(not\n");
- dump_grep_expression_1(x->u.unary, in+1);
- indent(in);
- fprintf(stderr, ")\n");
- break;
- case GREP_NODE_AND:
- fprintf(stderr, "(and\n");
- dump_grep_expression_1(x->u.binary.left, in+1);
- dump_grep_expression_1(x->u.binary.right, in+1);
- indent(in);
- fprintf(stderr, ")\n");
- break;
- case GREP_NODE_OR:
- fprintf(stderr, "(or\n");
- dump_grep_expression_1(x->u.binary.left, in+1);
- dump_grep_expression_1(x->u.binary.right, in+1);
- indent(in);
- fprintf(stderr, ")\n");
- break;
- }
-}
-
-static void dump_grep_expression(struct grep_opt *opt)
-{
- struct grep_expr *x = opt->pattern_expression;
-
- if (opt->all_match)
- fprintf(stderr, "[all-match]\n");
- dump_grep_expression_1(x, 0);
- fflush(NULL);
-}
-
static struct grep_expr *grep_true_expr(void)
{
struct grep_expr *z = xcalloc(1, sizeof(*z));
@@ -1001,7 +790,7 @@ static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y
return z;
}
-static void compile_grep_patterns_real(struct grep_opt *opt)
+void compile_grep_patterns(struct grep_opt *opt)
{
struct grep_pat *p;
struct grep_expr *header_expr = prep_header_patterns(opt);
@@ -1021,7 +810,7 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
if (opt->all_match || header_expr)
opt->extended = 1;
- else if (!opt->extended && !opt->debug)
+ else if (!opt->extended)
return;
p = opt->pattern_list;
@@ -1044,13 +833,6 @@ static void compile_grep_patterns_real(struct grep_opt *opt)
opt->all_match = 1;
}
-void compile_grep_patterns(struct grep_opt *opt)
-{
- compile_grep_patterns_real(opt);
- if (opt->debug)
- dump_grep_expression(opt);
-}
-
static void free_pattern_expr(struct grep_expr *x)
{
switch (x->node) {
@@ -1079,9 +861,7 @@ void free_grep_patterns(struct grep_opt *opt)
case GREP_PATTERN: /* atom */
case GREP_PATTERN_HEAD:
case GREP_PATTERN_BODY:
- if (p->pcre1_regexp)
- free_pcre1_regexp(p);
- else if (p->pcre2_pattern)
+ if (p->pcre2_pattern)
free_pcre2_pattern(p);
else
regfree(&p->regexp);
@@ -1144,9 +924,7 @@ static int patmatch(struct grep_pat *p, char *line, char *eol,
{
int hit;
- if (p->pcre1_regexp)
- hit = !pcre1match(p, line, eol, match, eflags);
- else if (p->pcre2_pattern)
+ if (p->pcre2_pattern)
hit = !pcre2match(p, line, eol, match, eflags);
else
hit = !regexec_buf(&p->regexp, line, eol - line, 1, match,