aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonin Delpeuch <antonin@delpeuch.eu>2025-11-06 22:41:54 +0000
committerJunio C Hamano <gitster@pobox.com>2025-11-07 07:32:52 -0800
commitbed7812b6a6366f302590ef9e7d9dcb5a3b787f0 (patch)
tree3f3f35426eaed6fe3d43eae285956984553ec6b9
parentxdiff: add 'minimal' to XDF_DIFF_ALGORITHM_MASK (diff)
downloadgit-bed7812b6a6366f302590ef9e7d9dcb5a3b787f0.tar.gz
git-bed7812b6a6366f302590ef9e7d9dcb5a3b787f0.zip
blame: make diff algorithm configurable
The diff algorithm used in 'git-blame(1)' is set to 'myers', without the possibility to change it aside from the `--minimal` option. There has been long-standing interest in changing the default diff algorithm to "histogram", and Git 3.0 was floated as a possible occasion for taking some steps towards that: https://lore.kernel.org/git/xmqqed873vgn.fsf@gitster.g/ As a preparation for this move, it is worth making sure that the diff algorithm is configurable where useful. Make it configurable in the `git-blame(1)` command by introducing the `--diff-algorithm` option and make honor the `diff.algorithm` config variable. Keep Myers diff as the default. Signed-off-by: Antonin Delpeuch <antonin@delpeuch.eu> Signed-off-by: Junio C Hamano <gitster@pobox.com>
-rw-r--r--Documentation/diff-algorithm-option.adoc20
-rw-r--r--Documentation/diff-options.adoc21
-rw-r--r--Documentation/git-blame.adoc2
-rw-r--r--builtin/blame.c52
-rw-r--r--t/meson.build1
-rwxr-xr-xt/t8015-blame-diff-algorithm.sh203
6 files changed, 278 insertions, 21 deletions
diff --git a/Documentation/diff-algorithm-option.adoc b/Documentation/diff-algorithm-option.adoc
new file mode 100644
index 0000000000..8e3a0b63d7
--- /dev/null
+++ b/Documentation/diff-algorithm-option.adoc
@@ -0,0 +1,20 @@
+`--diff-algorithm=(patience|minimal|histogram|myers)`::
+ Choose a diff algorithm. The variants are as follows:
++
+--
+ `default`;;
+ `myers`;;
+ The basic greedy diff algorithm. Currently, this is the default.
+ `minimal`;;
+ Spend extra time to make sure the smallest possible diff is
+ produced.
+ `patience`;;
+ Use "patience diff" algorithm when generating patches.
+ `histogram`;;
+ This algorithm extends the patience algorithm to "support
+ low-occurrence common elements".
+--
++
+For instance, if you configured the `diff.algorithm` variable to a
+non-default value and want to use the default one, then you
+have to use `--diff-algorithm=default` option.
diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc
index ae31520f7f..9cdad6f72a 100644
--- a/Documentation/diff-options.adoc
+++ b/Documentation/diff-options.adoc
@@ -197,26 +197,7 @@ and starts with _<text>_, this algorithm attempts to prevent it from
appearing as a deletion or addition in the output. It uses the "patience
diff" algorithm internally.
-`--diff-algorithm=(patience|minimal|histogram|myers)`::
- Choose a diff algorithm. The variants are as follows:
-+
---
- `default`;;
- `myers`;;
- The basic greedy diff algorithm. Currently, this is the default.
- `minimal`;;
- Spend extra time to make sure the smallest possible diff is
- produced.
- `patience`;;
- Use "patience diff" algorithm when generating patches.
- `histogram`;;
- This algorithm extends the patience algorithm to "support
- low-occurrence common elements".
---
-+
-For instance, if you configured the `diff.algorithm` variable to a
-non-default value and want to use the default one, then you
-have to use `--diff-algorithm=default` option.
+include::diff-algorithm-option.adoc[]
`--stat[=<width>[,<name-width>[,<count>]]]`::
Generate a diffstat. By default, as much space as necessary
diff --git a/Documentation/git-blame.adoc b/Documentation/git-blame.adoc
index e438d28625..adcbb6f5dc 100644
--- a/Documentation/git-blame.adoc
+++ b/Documentation/git-blame.adoc
@@ -85,6 +85,8 @@ include::blame-options.adoc[]
Ignore whitespace when comparing the parent's version and
the child's to find where the lines came from.
+include::diff-algorithm-option.adoc[]
+
--abbrev=<n>::
Instead of using the default 7+1 hexadecimal digits as the
abbreviated object name, use <m>+1 digits, where <m> is at
diff --git a/builtin/blame.c b/builtin/blame.c
index 2703820258..27b513d27f 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value,
}
}
+ if (!strcmp(var, "diff.algorithm")) {
+ long diff_algorithm;
+ if (!value)
+ return config_error_nonbool(var);
+ diff_algorithm = parse_algorithm_value(value);
+ if (diff_algorithm < 0)
+ return error(_("unknown value for config '%s': %s"),
+ var, value);
+ xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
+ xdl_opts |= diff_algorithm;
+ return 0;
+ }
+
if (git_diff_heuristic_config(var, value, cb) < 0)
return -1;
if (userdiff_config(var, value) < 0)
@@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int
return 0;
}
+static int blame_diff_algorithm_minimal(const struct option *option,
+ const char *arg, int unset)
+{
+ int *opt = option->value;
+
+ BUG_ON_OPT_ARG(arg);
+
+ *opt &= ~XDF_DIFF_ALGORITHM_MASK;
+ if (!unset)
+ *opt |= XDF_NEED_MINIMAL;
+
+ return 0;
+}
+
+static int blame_diff_algorithm_callback(const struct option *option,
+ const char *arg, int unset)
+{
+ int *opt = option->value;
+ long value = parse_algorithm_value(arg);
+
+ BUG_ON_OPT_NEG(unset);
+
+ if (value < 0)
+ return error(_("option diff-algorithm accepts \"myers\", "
+ "\"minimal\", \"patience\" and \"histogram\""));
+
+ *opt &= ~XDF_DIFF_ALGORITHM_MASK;
+ *opt |= value;
+
+ return 0;
+}
+
static int is_a_rev(const char *name)
{
struct object_id oid;
@@ -915,11 +960,16 @@ int cmd_blame(int argc,
OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
+ OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_("<algorithm>"),
+ N_("choose a diff algorithm"),
+ PARSE_OPT_NONEG, blame_diff_algorithm_callback),
OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")),
OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")),
OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
- OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL),
+ OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL,
+ N_("spend extra cycles to find a better match"),
+ PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, blame_diff_algorithm_minimal),
OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")),
OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")),
OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback),
diff --git a/t/meson.build b/t/meson.build
index 401b24e50e..9f2fe7af8b 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -955,6 +955,7 @@ integration_tests = [
't8012-blame-colors.sh',
't8013-blame-ignore-revs.sh',
't8014-blame-ignore-fuzzy.sh',
+ 't8015-blame-diff-algorithm.sh',
't8020-last-modified.sh',
't9001-send-email.sh',
't9002-column.sh',
diff --git a/t/t8015-blame-diff-algorithm.sh b/t/t8015-blame-diff-algorithm.sh
new file mode 100755
index 0000000000..55e1d540dc
--- /dev/null
+++ b/t/t8015-blame-diff-algorithm.sh
@@ -0,0 +1,203 @@
+#!/bin/sh
+
+test_description='git blame with specific diff algorithm'
+
+. ./test-lib.sh
+
+test_expect_success setup '
+ cat >file.c <<-\EOF &&
+ int f(int x, int y)
+ {
+ if (x == 0)
+ {
+ return y;
+ }
+ return x;
+ }
+
+ int g(size_t u)
+ {
+ while (u < 30)
+ {
+ u++;
+ }
+ return u;
+ }
+ EOF
+ test_write_lines x x x x >file.txt &&
+ git add file.c file.txt &&
+ GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 &&
+
+ cat >file.c <<-\EOF &&
+ int g(size_t u)
+ {
+ while (u < 30)
+ {
+ u++;
+ }
+ return u;
+ }
+
+ int h(int x, int y, int z)
+ {
+ if (z == 0)
+ {
+ return x;
+ }
+ return y;
+ }
+ EOF
+ test_write_lines x x x A B C D x E F G >file.txt &&
+ git add file.c file.txt &&
+ GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2
+'
+
+test_expect_success 'blame uses Myers diff algorithm by default' '
+ cat >expected <<-\EOF &&
+ Commit_2 int g(size_t u)
+ Commit_1 {
+ Commit_2 while (u < 30)
+ Commit_1 {
+ Commit_2 u++;
+ Commit_1 }
+ Commit_2 return u;
+ Commit_1 }
+ Commit_1
+ Commit_2 int h(int x, int y, int z)
+ Commit_1 {
+ Commit_2 if (z == 0)
+ Commit_1 {
+ Commit_2 return x;
+ Commit_1 }
+ Commit_2 return y;
+ Commit_1 }
+ EOF
+
+ git blame file.c > output &&
+ sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
+ sed -e "s/ *$//g" without_varying_parts > actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'blame honors --diff-algorithm option' '
+ cat >expected <<-\EOF &&
+ Commit_1 int g(size_t u)
+ Commit_1 {
+ Commit_1 while (u < 30)
+ Commit_1 {
+ Commit_1 u++;
+ Commit_1 }
+ Commit_1 return u;
+ Commit_1 }
+ Commit_2
+ Commit_2 int h(int x, int y, int z)
+ Commit_2 {
+ Commit_2 if (z == 0)
+ Commit_2 {
+ Commit_2 return x;
+ Commit_2 }
+ Commit_2 return y;
+ Commit_2 }
+ EOF
+
+ git blame file.c --diff-algorithm histogram > output &&
+ sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
+ sed -e "s/ *$//g" without_varying_parts > actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'blame honors diff.algorithm config variable' '
+ cat >expected <<-\EOF &&
+ Commit_1 int g(size_t u)
+ Commit_1 {
+ Commit_1 while (u < 30)
+ Commit_1 {
+ Commit_1 u++;
+ Commit_1 }
+ Commit_1 return u;
+ Commit_1 }
+ Commit_2
+ Commit_2 int h(int x, int y, int z)
+ Commit_2 {
+ Commit_2 if (z == 0)
+ Commit_2 {
+ Commit_2 return x;
+ Commit_2 }
+ Commit_2 return y;
+ Commit_2 }
+ EOF
+
+ git -c diff.algorithm=histogram blame file.c > output &&
+ sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
+ -e "s/ *$//g" output > actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' '
+ cat >expected <<-\EOF &&
+ Commit_1 int g(size_t u)
+ Commit_1 {
+ Commit_1 while (u < 30)
+ Commit_1 {
+ Commit_1 u++;
+ Commit_1 }
+ Commit_1 return u;
+ Commit_1 }
+ Commit_2
+ Commit_2 int h(int x, int y, int z)
+ Commit_2 {
+ Commit_2 if (z == 0)
+ Commit_2 {
+ Commit_2 return x;
+ Commit_2 }
+ Commit_2 return y;
+ Commit_2 }
+ EOF
+
+ git -c diff.algorithm=myers blame file.c --diff-algorithm histogram > output &&
+ sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
+ -e "s/ *$//g" output > actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'blame honors --minimal option' '
+ cat >expected <<-\EOF &&
+ Commit_1 x
+ Commit_1 x
+ Commit_1 x
+ Commit_2 A
+ Commit_2 B
+ Commit_2 C
+ Commit_2 D
+ Commit_1 x
+ Commit_2 E
+ Commit_2 F
+ Commit_2 G
+ EOF
+
+ git blame file.txt --minimal > output &&
+ sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > actual &&
+ test_cmp expected actual
+'
+
+test_expect_success 'blame respects the order of diff options' '
+ cat >expected <<-\EOF &&
+ Commit_1 x
+ Commit_1 x
+ Commit_1 x
+ Commit_2 A
+ Commit_2 B
+ Commit_2 C
+ Commit_2 D
+ Commit_2 x
+ Commit_2 E
+ Commit_2 F
+ Commit_2 G
+ EOF
+
+ git blame file.txt --minimal --diff-algorithm myers > output &&
+ sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > actual &&
+ test_cmp expected actual
+'
+
+test_done