aboutsummaryrefslogtreecommitdiffstats
path: root/userdiff.c
diff options
context:
space:
mode:
Diffstat (limited to 'userdiff.c')
-rw-r--r--userdiff.c124
1 files changed, 98 insertions, 26 deletions
diff --git a/userdiff.c b/userdiff.c
index d9b2ba752f..92ef649c99 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -1,18 +1,44 @@
-#include "cache.h"
+#include "git-compat-util.h"
#include "config.h"
#include "userdiff.h"
#include "attr.h"
+#include "strbuf.h"
+#include "environment.h"
static struct userdiff_driver *drivers;
static int ndrivers;
static int drivers_alloc;
-#define PATTERNS(name, pattern, word_regex) \
- { name, NULL, -1, { pattern, REG_EXTENDED }, \
- word_regex "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+" }
-#define IPATTERN(name, pattern, word_regex) \
- { name, NULL, -1, { pattern, REG_EXTENDED | REG_ICASE }, \
- word_regex "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+" }
+#define PATTERNS(lang, rx, wrx) { \
+ .name = lang, \
+ .binary = -1, \
+ .funcname = { \
+ .pattern = rx, \
+ .cflags = REG_EXTENDED, \
+ }, \
+ .word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
+ .word_regex_multi_byte = wrx "|[^[:space:]]", \
+}
+#define IPATTERN(lang, rx, wrx) { \
+ .name = lang, \
+ .binary = -1, \
+ .funcname = { \
+ .pattern = rx, \
+ .cflags = REG_EXTENDED | REG_ICASE, \
+ }, \
+ .word_regex = wrx "|[^[:space:]]|[\xc0-\xff][\x80-\xbf]+", \
+ .word_regex_multi_byte = wrx "|[^[:space:]]", \
+}
+
+/*
+ * Built-in drivers for various languages, sorted by their names
+ * (except that the "default" is left at the end).
+ *
+ * When writing or updating patterns, assume that the contents these
+ * patterns are applied to are syntactically correct. The patterns
+ * can be simple without implementing all syntactical corner cases, as
+ * long as they are sufficiently permissive.
+ */
static struct userdiff_driver builtin_drivers[] = {
IPATTERN("ada",
"!^(.*[ \t])?(is[ \t]+new|renames|is[ \t]+separate)([ \t].*)?$\n"
@@ -54,9 +80,15 @@ PATTERNS("cpp",
/* functions/methods, variables, and compounds at top level */
"^((::[[:space:]]*)?[A-Za-z_].*)$",
/* -- */
+ /* identifiers and keywords */
"[a-zA-Z_][a-zA-Z0-9_]*"
- "|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lLuU]*"
- "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*"),
+ /* decimal and octal integers as well as floatingpoint numbers */
+ "|[0-9][0-9.]*([Ee][-+]?[0-9]+)?[fFlLuU]*"
+ /* hexadecimal and binary integers */
+ "|0[xXbB][0-9a-fA-F]+[lLuU]*"
+ /* floatingpoint numbers that begin with a decimal point */
+ "|\\.[0-9][0-9]*([Ee][-+]?[0-9]+)?[fFlL]?"
+ "|[-+*/<>%&^|=!]=|--|\\+\\+|<<=?|>>=?|&&|\\|\\||::|->\\*?|\\.\\*|<=>"),
PATTERNS("csharp",
/* Keywords */
"!^[ \t]*(do|while|for|if|else|instanceof|new|return|switch|case|throw|catch|using)\n"
@@ -142,12 +174,28 @@ PATTERNS("html",
"[^<>= \t]+"),
PATTERNS("java",
"!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
- "^[ \t]*(([A-Za-z_][A-Za-z_0-9]*[ \t]+)+[A-Za-z_][A-Za-z_0-9]*[ \t]*\\([^;]*)$",
+ /* Class, enum, interface, and record declarations */
+ "^[ \t]*(([a-z-]+[ \t]+)*(class|enum|interface|record)[ \t]+.*)$\n"
+ /* Method definitions; note that constructor signatures are not */
+ /* matched because they are indistinguishable from method calls. */
+ "^[ \t]*(([A-Za-z_<>&][][?&<>.,A-Za-z_0-9]*[ \t]+)+[A-Za-z_][A-Za-z_0-9]*[ \t]*\\([^;]*)$",
/* -- */
"[a-zA-Z_][a-zA-Z0-9_]*"
"|[-+0-9.e]+[fFlL]?|0[xXbB]?[0-9a-fA-F]+[lL]?"
"|[-+*/<>%&^|=!]="
"|--|\\+\\+|<<=?|>>>?=?|&&|\\|\\|"),
+PATTERNS("kotlin",
+ "^[ \t]*(([a-z]+[ \t]+)*(fun|class|interface)[ \t]+.*)$",
+ /* -- */
+ "[a-zA-Z_][a-zA-Z0-9_]*"
+ /* hexadecimal and binary numbers */
+ "|0[xXbB][0-9a-fA-F_]+[lLuU]*"
+ /* integers and floats */
+ "|[0-9][0-9_]*([.][0-9_]*)?([Ee][-+]?[0-9]+)?[fFlLuU]*"
+ /* floating point numbers beginning with decimal point */
+ "|[.][0-9][0-9_]*([Ee][-+]?[0-9]+)?[fFlLuU]?"
+ /* unary and binary operators */
+ "|[-+*/<>%&^|=!]==?|--|\\+\\+|<<=|>>=|&&|\\|\\||->|\\.\\*|!!|[?:.][.:]"),
PATTERNS("markdown",
"^ {0,3}#{1,6}[ \t].*",
/* -- */
@@ -214,7 +262,7 @@ PATTERNS("perl",
"|<<|<>|<=>|>>"),
PATTERNS("php",
"^[\t ]*(((public|protected|private|static|abstract|final)[\t ]+)*function.*)$\n"
- "^[\t ]*((((final|abstract)[\t ]+)?class|interface|trait).*)$",
+ "^[\t ]*((((final|abstract)[\t ]+)?class|enum|interface|trait).*)$",
/* -- */
"[a-zA-Z_][a-zA-Z0-9_]*"
"|[-+0-9.e]+|0[xXbB]?[0-9a-fA-F]+"
@@ -248,24 +296,20 @@ PATTERNS("scheme",
/* All other words should be delimited by spaces or parentheses */
"|([^][)(}{[ \t])+"),
PATTERNS("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
- "\\\\[a-zA-Z@]+|\\\\.|[a-zA-Z0-9\x80-\xff]+"),
-{ "default", NULL, -1, { NULL, 0 } },
+ "\\\\[a-zA-Z@]+|\\\\.|([a-zA-Z0-9]|[^\x01-\x7f])+"),
+{ "default", NULL, NULL, -1, { NULL, 0 } },
};
#undef PATTERNS
#undef IPATTERN
static struct userdiff_driver driver_true = {
- "diff=true",
- NULL,
- 0,
- { NULL, 0 }
+ .name = "diff=true",
+ .binary = 0,
};
static struct userdiff_driver driver_false = {
- "!diff",
- NULL,
- 1,
- { NULL, 0 }
+ .name = "!diff",
+ .binary = 1,
};
struct find_by_namelen_data {
@@ -275,18 +319,37 @@ struct find_by_namelen_data {
};
static int userdiff_find_by_namelen_cb(struct userdiff_driver *driver,
- enum userdiff_driver_type type, void *priv)
+ enum userdiff_driver_type type UNUSED,
+ void *priv)
{
struct find_by_namelen_data *cb_data = priv;
- if (!strncmp(driver->name, cb_data->name, cb_data->len) &&
- !driver->name[cb_data->len]) {
+ if (!xstrncmpz(driver->name, cb_data->name, cb_data->len)) {
cb_data->driver = driver;
return 1; /* tell the caller to stop iterating */
}
return 0;
}
+static int regexec_supports_multi_byte_chars(void)
+{
+ static const char not_space[] = "[^[:space:]]";
+ static const char utf8_multi_byte_char[] = "\xc2\xa3";
+ regex_t re;
+ regmatch_t match;
+ static int result = -1;
+
+ if (result != -1)
+ return result;
+ if (regcomp(&re, not_space, REG_EXTENDED))
+ BUG("invalid regular expression: %s", not_space);
+ result = !regexec(&re, utf8_multi_byte_char, 1, &match, 0) &&
+ match.rm_so == 0 &&
+ match.rm_eo == strlen(utf8_multi_byte_char);
+ regfree(&re);
+ return result;
+}
+
static struct userdiff_driver *userdiff_find_by_namelen(const char *name, size_t len)
{
struct find_by_namelen_data udcbdata = {
@@ -353,6 +416,8 @@ int userdiff_config(const char *k, const char *v)
return parse_bool(&drv->textconv_want_cache, k, v);
if (!strcmp(type, "wordregex"))
return git_config_string(&drv->word_regex, k, v);
+ if (!strcmp(type, "algorithm"))
+ return git_config_string(&drv->algorithm, k, v);
return 0;
}
@@ -360,7 +425,13 @@ int userdiff_config(const char *k, const char *v)
struct userdiff_driver *userdiff_find_by_name(const char *name)
{
int len = strlen(name);
- return userdiff_find_by_namelen(name, len);
+ struct userdiff_driver *driver = userdiff_find_by_namelen(name, len);
+ if (driver && driver->word_regex_multi_byte) {
+ if (regexec_supports_multi_byte_chars())
+ driver->word_regex = driver->word_regex_multi_byte;
+ driver->word_regex_multi_byte = NULL;
+ }
+ return driver;
}
struct userdiff_driver *userdiff_find_by_path(struct index_state *istate,
@@ -389,7 +460,8 @@ struct userdiff_driver *userdiff_get_textconv(struct repository *r,
if (!driver->textconv)
return NULL;
- if (driver->textconv_want_cache && !driver->textconv_cache) {
+ if (driver->textconv_want_cache && !driver->textconv_cache &&
+ have_git_dir()) {
struct notes_cache *c = xmalloc(sizeof(*c));
struct strbuf name = STRBUF_INIT;