Skip to content

Commit bed7812

Browse files
wetnebgitster
authored andcommitted
blame: make diff algorithm configurable
The diff algorithm used in 'git-blame(1)' is set to 'myers', without the possibility to change it aside from the `--minimal` option. There has been long-standing interest in changing the default diff algorithm to "histogram", and Git 3.0 was floated as a possible occasion for taking some steps towards that: https://lore.kernel.org/git/[email protected]/ As a preparation for this move, it is worth making sure that the diff algorithm is configurable where useful. Make it configurable in the `git-blame(1)` command by introducing the `--diff-algorithm` option and make honor the `diff.algorithm` config variable. Keep Myers diff as the default. Signed-off-by: Antonin Delpeuch <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent bd7255c commit bed7812

File tree

6 files changed

+278
-21
lines changed

6 files changed

+278
-21
lines changed
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
`--diff-algorithm=(patience|minimal|histogram|myers)`::
2+
Choose a diff algorithm. The variants are as follows:
3+
+
4+
--
5+
`default`;;
6+
`myers`;;
7+
The basic greedy diff algorithm. Currently, this is the default.
8+
`minimal`;;
9+
Spend extra time to make sure the smallest possible diff is
10+
produced.
11+
`patience`;;
12+
Use "patience diff" algorithm when generating patches.
13+
`histogram`;;
14+
This algorithm extends the patience algorithm to "support
15+
low-occurrence common elements".
16+
--
17+
+
18+
For instance, if you configured the `diff.algorithm` variable to a
19+
non-default value and want to use the default one, then you
20+
have to use `--diff-algorithm=default` option.

Documentation/diff-options.adoc

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -197,26 +197,7 @@ and starts with _<text>_, this algorithm attempts to prevent it from
197197
appearing as a deletion or addition in the output. It uses the "patience
198198
diff" algorithm internally.
199199

200-
`--diff-algorithm=(patience|minimal|histogram|myers)`::
201-
Choose a diff algorithm. The variants are as follows:
202-
+
203-
--
204-
`default`;;
205-
`myers`;;
206-
The basic greedy diff algorithm. Currently, this is the default.
207-
`minimal`;;
208-
Spend extra time to make sure the smallest possible diff is
209-
produced.
210-
`patience`;;
211-
Use "patience diff" algorithm when generating patches.
212-
`histogram`;;
213-
This algorithm extends the patience algorithm to "support
214-
low-occurrence common elements".
215-
--
216-
+
217-
For instance, if you configured the `diff.algorithm` variable to a
218-
non-default value and want to use the default one, then you
219-
have to use `--diff-algorithm=default` option.
200+
include::diff-algorithm-option.adoc[]
220201

221202
`--stat[=<width>[,<name-width>[,<count>]]]`::
222203
Generate a diffstat. By default, as much space as necessary

Documentation/git-blame.adoc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ include::blame-options.adoc[]
8585
Ignore whitespace when comparing the parent's version and
8686
the child's to find where the lines came from.
8787

88+
include::diff-algorithm-option.adoc[]
89+
8890
--abbrev=<n>::
8991
Instead of using the default 7+1 hexadecimal digits as the
9092
abbreviated object name, use <m>+1 digits, where <m> is at

builtin/blame.c

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,6 +779,19 @@ static int git_blame_config(const char *var, const char *value,
779779
}
780780
}
781781

782+
if (!strcmp(var, "diff.algorithm")) {
783+
long diff_algorithm;
784+
if (!value)
785+
return config_error_nonbool(var);
786+
diff_algorithm = parse_algorithm_value(value);
787+
if (diff_algorithm < 0)
788+
return error(_("unknown value for config '%s': %s"),
789+
var, value);
790+
xdl_opts &= ~XDF_DIFF_ALGORITHM_MASK;
791+
xdl_opts |= diff_algorithm;
792+
return 0;
793+
}
794+
782795
if (git_diff_heuristic_config(var, value, cb) < 0)
783796
return -1;
784797
if (userdiff_config(var, value) < 0)
@@ -824,6 +837,38 @@ static int blame_move_callback(const struct option *option, const char *arg, int
824837
return 0;
825838
}
826839

840+
static int blame_diff_algorithm_minimal(const struct option *option,
841+
const char *arg, int unset)
842+
{
843+
int *opt = option->value;
844+
845+
BUG_ON_OPT_ARG(arg);
846+
847+
*opt &= ~XDF_DIFF_ALGORITHM_MASK;
848+
if (!unset)
849+
*opt |= XDF_NEED_MINIMAL;
850+
851+
return 0;
852+
}
853+
854+
static int blame_diff_algorithm_callback(const struct option *option,
855+
const char *arg, int unset)
856+
{
857+
int *opt = option->value;
858+
long value = parse_algorithm_value(arg);
859+
860+
BUG_ON_OPT_NEG(unset);
861+
862+
if (value < 0)
863+
return error(_("option diff-algorithm accepts \"myers\", "
864+
"\"minimal\", \"patience\" and \"histogram\""));
865+
866+
*opt &= ~XDF_DIFF_ALGORITHM_MASK;
867+
*opt |= value;
868+
869+
return 0;
870+
}
871+
827872
static int is_a_rev(const char *name)
828873
{
829874
struct object_id oid;
@@ -915,11 +960,16 @@ int cmd_blame(int argc,
915960
OPT_BIT('s', NULL, &output_option, N_("suppress author name and timestamp (Default: off)"), OUTPUT_NO_AUTHOR),
916961
OPT_BIT('e', "show-email", &output_option, N_("show author email instead of name (Default: off)"), OUTPUT_SHOW_EMAIL),
917962
OPT_BIT('w', NULL, &xdl_opts, N_("ignore whitespace differences"), XDF_IGNORE_WHITESPACE),
963+
OPT_CALLBACK_F(0, "diff-algorithm", &xdl_opts, N_("<algorithm>"),
964+
N_("choose a diff algorithm"),
965+
PARSE_OPT_NONEG, blame_diff_algorithm_callback),
918966
OPT_STRING_LIST(0, "ignore-rev", &ignore_rev_list, N_("rev"), N_("ignore <rev> when blaming")),
919967
OPT_STRING_LIST(0, "ignore-revs-file", &ignore_revs_file_list, N_("file"), N_("ignore revisions from <file>")),
920968
OPT_BIT(0, "color-lines", &output_option, N_("color redundant metadata from previous line differently"), OUTPUT_COLOR_LINE),
921969
OPT_BIT(0, "color-by-age", &output_option, N_("color lines by age"), OUTPUT_SHOW_AGE_WITH_COLOR),
922-
OPT_BIT(0, "minimal", &xdl_opts, N_("spend extra cycles to find better match"), XDF_NEED_MINIMAL),
970+
OPT_CALLBACK_F(0, "minimal", &xdl_opts, NULL,
971+
N_("spend extra cycles to find a better match"),
972+
PARSE_OPT_NOARG | PARSE_OPT_HIDDEN, blame_diff_algorithm_minimal),
923973
OPT_STRING('S', NULL, &revs_file, N_("file"), N_("use revisions from <file> instead of calling git-rev-list")),
924974
OPT_STRING(0, "contents", &contents_from, N_("file"), N_("use <file>'s contents as the final image")),
925975
OPT_CALLBACK_F('C', NULL, &opt, N_("score"), N_("find line copies within and across files"), PARSE_OPT_OPTARG, blame_copy_callback),

t/meson.build

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,7 @@ integration_tests = [
955955
't8012-blame-colors.sh',
956956
't8013-blame-ignore-revs.sh',
957957
't8014-blame-ignore-fuzzy.sh',
958+
't8015-blame-diff-algorithm.sh',
958959
't8020-last-modified.sh',
959960
't9001-send-email.sh',
960961
't9002-column.sh',

t/t8015-blame-diff-algorithm.sh

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
#!/bin/sh
2+
3+
test_description='git blame with specific diff algorithm'
4+
5+
. ./test-lib.sh
6+
7+
test_expect_success setup '
8+
cat >file.c <<-\EOF &&
9+
int f(int x, int y)
10+
{
11+
if (x == 0)
12+
{
13+
return y;
14+
}
15+
return x;
16+
}
17+
18+
int g(size_t u)
19+
{
20+
while (u < 30)
21+
{
22+
u++;
23+
}
24+
return u;
25+
}
26+
EOF
27+
test_write_lines x x x x >file.txt &&
28+
git add file.c file.txt &&
29+
GIT_AUTHOR_NAME=Commit_1 git commit -m Commit_1 &&
30+
31+
cat >file.c <<-\EOF &&
32+
int g(size_t u)
33+
{
34+
while (u < 30)
35+
{
36+
u++;
37+
}
38+
return u;
39+
}
40+
41+
int h(int x, int y, int z)
42+
{
43+
if (z == 0)
44+
{
45+
return x;
46+
}
47+
return y;
48+
}
49+
EOF
50+
test_write_lines x x x A B C D x E F G >file.txt &&
51+
git add file.c file.txt &&
52+
GIT_AUTHOR_NAME=Commit_2 git commit -m Commit_2
53+
'
54+
55+
test_expect_success 'blame uses Myers diff algorithm by default' '
56+
cat >expected <<-\EOF &&
57+
Commit_2 int g(size_t u)
58+
Commit_1 {
59+
Commit_2 while (u < 30)
60+
Commit_1 {
61+
Commit_2 u++;
62+
Commit_1 }
63+
Commit_2 return u;
64+
Commit_1 }
65+
Commit_1
66+
Commit_2 int h(int x, int y, int z)
67+
Commit_1 {
68+
Commit_2 if (z == 0)
69+
Commit_1 {
70+
Commit_2 return x;
71+
Commit_1 }
72+
Commit_2 return y;
73+
Commit_1 }
74+
EOF
75+
76+
git blame file.c > output &&
77+
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
78+
sed -e "s/ *$//g" without_varying_parts > actual &&
79+
test_cmp expected actual
80+
'
81+
82+
test_expect_success 'blame honors --diff-algorithm option' '
83+
cat >expected <<-\EOF &&
84+
Commit_1 int g(size_t u)
85+
Commit_1 {
86+
Commit_1 while (u < 30)
87+
Commit_1 {
88+
Commit_1 u++;
89+
Commit_1 }
90+
Commit_1 return u;
91+
Commit_1 }
92+
Commit_2
93+
Commit_2 int h(int x, int y, int z)
94+
Commit_2 {
95+
Commit_2 if (z == 0)
96+
Commit_2 {
97+
Commit_2 return x;
98+
Commit_2 }
99+
Commit_2 return y;
100+
Commit_2 }
101+
EOF
102+
103+
git blame file.c --diff-algorithm histogram > output &&
104+
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > without_varying_parts &&
105+
sed -e "s/ *$//g" without_varying_parts > actual &&
106+
test_cmp expected actual
107+
'
108+
109+
test_expect_success 'blame honors diff.algorithm config variable' '
110+
cat >expected <<-\EOF &&
111+
Commit_1 int g(size_t u)
112+
Commit_1 {
113+
Commit_1 while (u < 30)
114+
Commit_1 {
115+
Commit_1 u++;
116+
Commit_1 }
117+
Commit_1 return u;
118+
Commit_1 }
119+
Commit_2
120+
Commit_2 int h(int x, int y, int z)
121+
Commit_2 {
122+
Commit_2 if (z == 0)
123+
Commit_2 {
124+
Commit_2 return x;
125+
Commit_2 }
126+
Commit_2 return y;
127+
Commit_2 }
128+
EOF
129+
130+
git -c diff.algorithm=histogram blame file.c > output &&
131+
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
132+
-e "s/ *$//g" output > actual &&
133+
test_cmp expected actual
134+
'
135+
136+
test_expect_success 'blame gives priority to --diff-algorithm over diff.algorithm' '
137+
cat >expected <<-\EOF &&
138+
Commit_1 int g(size_t u)
139+
Commit_1 {
140+
Commit_1 while (u < 30)
141+
Commit_1 {
142+
Commit_1 u++;
143+
Commit_1 }
144+
Commit_1 return u;
145+
Commit_1 }
146+
Commit_2
147+
Commit_2 int h(int x, int y, int z)
148+
Commit_2 {
149+
Commit_2 if (z == 0)
150+
Commit_2 {
151+
Commit_2 return x;
152+
Commit_2 }
153+
Commit_2 return y;
154+
Commit_2 }
155+
EOF
156+
157+
git -c diff.algorithm=myers blame file.c --diff-algorithm histogram > output &&
158+
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" \
159+
-e "s/ *$//g" output > actual &&
160+
test_cmp expected actual
161+
'
162+
163+
test_expect_success 'blame honors --minimal option' '
164+
cat >expected <<-\EOF &&
165+
Commit_1 x
166+
Commit_1 x
167+
Commit_1 x
168+
Commit_2 A
169+
Commit_2 B
170+
Commit_2 C
171+
Commit_2 D
172+
Commit_1 x
173+
Commit_2 E
174+
Commit_2 F
175+
Commit_2 G
176+
EOF
177+
178+
git blame file.txt --minimal > output &&
179+
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > actual &&
180+
test_cmp expected actual
181+
'
182+
183+
test_expect_success 'blame respects the order of diff options' '
184+
cat >expected <<-\EOF &&
185+
Commit_1 x
186+
Commit_1 x
187+
Commit_1 x
188+
Commit_2 A
189+
Commit_2 B
190+
Commit_2 C
191+
Commit_2 D
192+
Commit_2 x
193+
Commit_2 E
194+
Commit_2 F
195+
Commit_2 G
196+
EOF
197+
198+
git blame file.txt --minimal --diff-algorithm myers > output &&
199+
sed -e "s/^[^ ]* (\([^ ]*\) [^)]*)/\1/g" output > actual &&
200+
test_cmp expected actual
201+
'
202+
203+
test_done

0 commit comments

Comments
 (0)