#1 improve git-diff documentation and A...B handling
Opened 3 years ago by ddevault. Modified 3 years ago

file modified
+16 -4
@@ -11,15 +11,17 @@ 

  [verse]

  'git diff' [<options>] [<commit>] [--] [<path>...]

  'git diff' [<options>] --cached [<commit>] [--] [<path>...]

- 'git diff' [<options>] <commit> <commit> [--] [<path>...]

+ 'git diff' [<options>] <commit> [<commit>...] <commit> [--] [<path>...]

+ 'git diff' [<options>] <commit>...<commit> [--] [<path>...]

  'git diff' [<options>] <blob> <blob>

  'git diff' [<options>] --no-index [--] <path> <path>

  

  DESCRIPTION

  -----------

  Show changes between the working tree and the index or a tree, changes

- between the index and a tree, changes between two trees, changes between

- two blob objects, or changes between two files on disk.

+ between the index and a tree, changes between two trees, changes resulting

+ from a merge, changes between two blob objects, or changes between two

+ files on disk.

  

  'git diff' [<options>] [--] [<path>...]::

  
@@ -67,6 +69,15 @@ 

  	one side is omitted, it will have the same effect as

  	using HEAD instead.

  

+ 'git diff' [<options>] <commit> [<commit>...] <commit> [--] [<path>...]::

+ 

+ 	This form is to view the results of a merge commit.  The first

+ 	listed <commit> must be the merge itself; the remaining two or

+ 	more commits should be its parents.  A convenient way to produce

+ 	the desired set of revisions is to use the {caret}@ suffix.

+ 	For instance, if `master` names a merge commit, `git diff master

+ 	master^@` gives the same combined diff as `git show master`.

+ 

  'git diff' [<options>] <commit>\...<commit> [--] [<path>...]::

  

  	This form is to view the changes on the branch containing
@@ -196,7 +207,8 @@ 

  linkgit:git-log[1],

  linkgit:gitdiffcore[7],

  linkgit:git-format-patch[1],

- linkgit:git-apply[1]

+ linkgit:git-apply[1],

+ linkgit:git-show[1]

  

  GIT

  ---

file modified
+118 -14
@@ -6,6 +6,7 @@ 

  #define USE_THE_INDEX_COMPATIBILITY_MACROS

  #include "cache.h"

  #include "config.h"

+ #include "ewah/ewok.h"

  #include "lockfile.h"

  #include "color.h"

  #include "commit.h"
@@ -23,7 +24,13 @@ 

  #define DIFF_NO_INDEX_IMPLICIT 2

  

  static const char builtin_diff_usage[] =

- "git diff [<options>] [<commit> [<commit>]] [--] [<path>...]";

+ "git diff [<options>] [<commit>] [--] [<path>...]\n"

+ "   or: git diff [<options>] --cached [<commit>] [--] [<path>...]\n"

+ "   or: git diff [<options>] <commit> [<commit>...] <commit> [--] [<path>...]\n"

+ "   or: git diff [<options>] <commit>...<commit>] [--] [<path>...]\n"

+ "   or: git diff [<options>] <blob> <blob>]\n"

+ "   or: git diff [<options>] --no-index [--] <path> <path>]\n"

+ COMMON_DIFF_OPTIONS_HELP;

  

  static const char *blob_path(struct object_array_entry *entry)

  {
@@ -254,6 +261,108 @@ 

  	return run_diff_files(revs, options);

  }

  

+ struct symdiff {

+ 	struct bitmap *skip;

+ 	int warn;

+ 	const char *base, *left, *right;

+ };

+ 

+ /*

+  * Check for symmetric-difference arguments, and if present, arrange

+  * everything we need to know to handle them correctly.  As a bonus,

+  * weed out all bogus range-based revision specifications, e.g.,

+  * "git diff A..B C..D" or "git diff A..B C" get rejected.

+  *

+  * For an actual symmetric diff, *symdiff is set this way:

+  *

+  *  - its skip is non-NULL and marks *all* rev->pending.objects[i]

+  *    indices that the caller should ignore (extra merge bases, of

+  *    which there might be many, and A in A...B).  Note that the

+  *    chosen merge base and right side are NOT marked.

+  *  - warn is set if there are multiple merge bases.

+  *  - base, left, and right point to the names to use in a

+  *    warning about multiple merge bases.

+  *

+  * If there is no symmetric diff argument, sym->skip is NULL and

+  * sym->warn is cleared.  The remaining fields are not set.

+  */

+ static void symdiff_prepare(struct rev_info *rev, struct symdiff *sym)

+ {

+ 	int i, is_symdiff = 0, basecount = 0, othercount = 0;

+ 	int lpos = -1, rpos = -1, basepos = -1;

+ 	struct bitmap *map = NULL;

+ 

+ 	/*

+ 	 * Use the whence fields to find merge bases and left and

+ 	 * right parts of symmetric difference, so that we do not

+ 	 * depend on the order that revisions are parsed.  If there

+ 	 * are any revs that aren't from these sources, we have a

+ 	 * "git diff C A...B" or "git diff A...B C" case.  Or we

+ 	 * could even get "git diff A...B C...E", for instance.

+ 	 *

+ 	 * If we don't have just one merge base, we pick one

+ 	 * at random.

+ 	 *

+ 	 * NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,

+ 	 * so we must check for SYMMETRIC_LEFT too.  The two arrays

+ 	 * rev->pending.objects and rev->cmdline.rev are parallel.

+ 	 */

+ 	for (i = 0; i < rev->cmdline.nr; i++) {

+ 		struct object *obj = rev->pending.objects[i].item;

+ 		switch (rev->cmdline.rev[i].whence) {

+ 		case REV_CMD_MERGE_BASE:

+ 			if (basepos < 0)

+ 				basepos = i;

+ 			basecount++;

+ 			break;		/* do mark all bases */

+ 		case REV_CMD_LEFT:

+ 			if (lpos >= 0)

+ 				usage(builtin_diff_usage);

+ 			lpos = i;

+ 			if (obj->flags & SYMMETRIC_LEFT) {

+ 				is_symdiff = 1;

+ 				break;	/* do mark A */

+ 			}

+ 			continue;

+ 		case REV_CMD_RIGHT:

+ 			if (rpos >= 0)

+ 				usage(builtin_diff_usage);

+ 			rpos = i;

+ 			continue;	/* don't mark B */

+ 		case REV_CMD_PARENTS_ONLY:

+ 		case REV_CMD_REF:

+ 		case REV_CMD_REV:

+ 			othercount++;

+ 			continue;

+ 		}

+ 		if (map == NULL)

+ 			map = bitmap_new();

+ 		bitmap_set(map, i);

+ 	}

+ 

+ 	/*

+ 	 * Forbid any additional revs for both A...B and A..B.

+ 	 */

+ 	if (lpos >= 0 && othercount > 0)

+ 		usage(builtin_diff_usage);

+ 

+ 	if (!is_symdiff) {

+ 		bitmap_free(map);

+ 		sym->warn = 0;

+ 		sym->skip = NULL;

+ 		return;

+ 	}

+ 

+ 	sym->left = rev->pending.objects[lpos].name;

+ 	sym->right = rev->pending.objects[rpos].name;

+ 	sym->base = rev->pending.objects[basepos].name;

+ 	if (basecount == 0)

+ 		die(_("%s...%s: no merge base"), sym->left, sym->right);

+ 	bitmap_unset(map, basepos);	/* unmark the base we want */

+ 	sym->warn = basecount > 1;

+ 	sym->skip = map;

+ }

+ 

  int cmd_diff(int argc, const char **argv, const char *prefix)

  {

  	int i;
@@ -263,6 +372,7 @@ 

  	struct object_array_entry *blob[2];

  	int nongit = 0, no_index = 0;

  	int result = 0;

+ 	struct symdiff sdiff;

  

  	/*

  	 * We could get N tree-ish in the rev.pending_objects list.
@@ -382,6 +492,7 @@ 

  		}

  	}

  

+ 	symdiff_prepare(&rev, &sdiff);

  	for (i = 0; i < rev.pending.nr; i++) {

  		struct object_array_entry *entry = &rev.pending.objects[i];

  		struct object *obj = entry->item;
@@ -396,6 +507,8 @@ 

  			obj = &get_commit_tree(((struct commit *)obj))->object;

  

  		if (obj->type == OBJ_TREE) {

+ 			if (sdiff.skip && bitmap_get(sdiff.skip, i))

+ 				continue;

  			obj->flags |= flags;

  			add_object_array(obj, name, &ent);

  		} else if (obj->type == OBJ_BLOB) {
@@ -437,21 +550,12 @@ 

  		usage(builtin_diff_usage);

  	else if (ent.nr == 1)

  		result = builtin_diff_index(&rev, argc, argv);

- 	else if (ent.nr == 2)

+ 	else if (ent.nr == 2) {

+ 		if (sdiff.warn)

+ 			warning(_("%s...%s: multiple merge bases, using %s"),

+ 				sdiff.left, sdiff.right, sdiff.base);

  		result = builtin_diff_tree(&rev, argc, argv,

  					   &ent.objects[0], &ent.objects[1]);

- 	else if (ent.objects[0].item->flags & UNINTERESTING) {

- 		/*

- 		 * diff A...B where there is at least one merge base

- 		 * between A and B.  We have ent.objects[0] ==

- 		 * merge-base, ent.objects[ents-2] == A, and

- 		 * ent.objects[ents-1] == B.  Show diff between the

- 		 * base and B.  Note that we pick one merge base at

- 		 * random if there are more than one.

- 		 */

- 		result = builtin_diff_tree(&rev, argc, argv,

- 					   &ent.objects[0],

- 					   &ent.objects[ent.nr-1]);

  	} else

  		result = builtin_diff_combined(&rev, argc, argv,

  					       ent.objects, ent.nr);

file modified
+1 -1
@@ -420,7 +420,7 @@ 

  	git commit --fixup B B.t &&

"Chris Torek via GitGitGadget" gitgitgadget@gmail.com writes:

From: Chris Torek chris.torek@gmail.com

According to the documentation, "git diff" takes at most two commit-ish,
or an A..B style range, or an A...B style symmetric difference range.
The autosquash-and-exec test relied on "git diff HEAD^!", which works
fine for ordinary commits as the revision parse produces two commit-ish,
namely ^HEAD^ and HEAD.

For merge commits, however, this test makes use of an undocumented
feature:

s/undocumented feature/undefined behaviour/;

The show.sh scripts wants to compute the diff against first parent,
and it uses a range notation HEAD^! which happens to mean
HEAD^..HEAD for a single parent commit, but it forgets that the
commit it may get fed could be a merge. What the code happens to do
when given "git diff ^HEAD^2 HEAD^..HEAD" is undefined behaviour and
does not even ...

the resulting revision parse has all the parents as UNINTERESTING
followed by the HEAD commit. This looks identical to a symmetric
diff parse, which lists the merge bases as UNINTERESTING, followed by
the A (UNINTERESTING) and B revs. So the diff winds up treating it
as one, using the first oid (i.e., HEAD^) and the last (i.e., HEAD).
The documentation, however, says nothing about this usage.

...deserve to be explained in a paragraph like this, I would think.

Since diff actually just uses HEAD^ and HEAD, call for these directly
here. That makes it possible to improve the diff code's handling of
symmetric difference arguments.

Yes, the resulting code expresses the intent much better.

Signed-off-by: Chris Torek chris.torek@gmail.com

t/t3430-rebase-merges.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t3430-rebase-merges.sh b/t/t3430-rebase-merges.sh
index a1bc3e2..b454f40 100755
--- a/t/t3430-rebase-merges.sh
+++ b/t/t3430-rebase-merges.sh
@@ -420,7 +420,7 @@ test_expect_success 'with --autosquash and --exec' '
git commit --fixup B B.t &&
write_script show.sh <<-\EOF &&
subject="$(git show -s --format=%s HEAD)"
- content="$(git diff HEAD^! | tail -n 1)"
+ content="$(git diff HEAD^ HEAD | tail -n 1)"
echo "$subject: $content"
EOF
test_tick &&

  	write_script show.sh <<-\EOF &&

  	subject="$(git show -s --format=%s HEAD)"

- 	content="$(git diff HEAD^! | tail -n 1)"

+ 	content="$(git diff HEAD^ HEAD | tail -n 1)"

  	echo "$subject: $content"

  	EOF

  	test_tick &&

@@ -0,0 +1,91 @@ 

+ #!/bin/sh

+ 

+ test_description='behavior of diff with symmetric-diff setups'

+ 

+ . ./test-lib.sh

+ 

+ # build these situations:

+ #  - normal merge with one merge base (br1...b2r);

+ #  - criss-cross merge ie 2 merge bases (br1...master);

+ #  - disjoint subgraph (orphan branch, br3...master).

+ #

+ #     B---E   <-- master

+ #    / \ /

+ #   A   X

+ #    \ / \

+ #     C---D--G   <-- br1

+ #      \    /

+ #       ---F   <-- br2

+ #

+ #  H  <-- br3

+ #

+ # We put files into a few commits so that we can verify the

+ # output as well.

+ 

+ test_expect_success setup '

+ 	git commit --allow-empty -m A &&

+ 	echo b >b &&

+ 	git add b &&

+ 	git commit -m B &&

+ 	git checkout -b br1 HEAD^ &&

+ 	echo c >c &&

+ 	git add c &&

+ 	git commit -m C &&

+ 	git tag commit-C &&

+ 	git merge -m D master &&

+ 	git tag commit-D &&

+ 	git checkout master &&

+ 	git merge -m E commit-C &&

+ 	git checkout -b br2 commit-C &&

+ 	echo f >f &&

+ 	git add f &&

+ 	git commit -m F &&

+ 	git checkout br1 &&

+ 	git merge -m G br2 &&

+ 	git checkout --orphan br3 &&

+ 	git commit -m H

+ '

+ 

+ test_expect_success 'diff with one merge base' '

+ 	git diff commit-D...br1 >tmp &&

+ 	tail -n 1 tmp >actual &&

+ 	echo +f >expect &&

+ 	test_cmp expect actual

+ '

+ 

+ # The output (in tmp) can have +b or +c depending

+ # on which merge base (commit B or C) is picked.

+ # It should have one of those two, which comes out

+ # to seven lines.

+ test_expect_success 'diff with two merge bases' '

+ 	git diff br1...master >tmp 2>err &&

+ 	test_line_count = 7 tmp &&

+ 	test_line_count = 1 err

+ '

+ 

+ test_expect_success 'diff with no merge bases' '

+ 	test_must_fail git diff br2...br3 >tmp 2>err &&

+ 	test_i18ngrep "fatal: br2...br3: no merge base" err

+ '

+ 

+ test_expect_success 'diff with too many symmetric differences' '

+ 	test_must_fail git diff br1...master br2...br3 >tmp 2>err &&

+ 	test_i18ngrep "usage" err

+ '

+ 

+ test_expect_success 'diff with symmetric difference and extraneous arg' '

+ 	test_must_fail git diff master br1...master >tmp 2>err &&

+ 	test_i18ngrep "usage" err

+ '

+ 

+ test_expect_success 'diff with two ranges' '

+ 	test_must_fail git diff master br1..master br2..br3 >tmp 2>err &&

+ 	test_i18ngrep "usage" err

+ '

+ 

+ test_expect_success 'diff with ranges and extra arg' '

+ 	test_must_fail git diff master br1..master commit-D >tmp 2>err &&

+ 	test_i18ngrep "usage" err

+ '

+ 

+ test_done

git diff -h help is succinct, but perhaps too much so.

The symmetric-diff syntax, git diff A...B, is defined by the documentation
to compare the merge base of A and B to commit B. It does so just fine when
there is a merge base. It compares A and B directly if there is no merge
base, and it is overly forgiving of bad arguments after which it can produce
nonsensical diffs.

The first patch simply adjusts a test that will fail if the second patch is
accepted. The second patch adds special handling for the symmetric diff
syntax so that the option parsing works, plus a small test suite. The third
patch just updates the SYNOPSIS section of the documentation and makes the
help output more verbose (to match the SYNOPSIS and provide common diff
options like git-diff-files, for instance).

Welcome to GitGitGadget

Hi chris3torek, and welcome to GitGitGadget, the GitHub App to send patch series to the Git mailing list from GitHub Pull Requests.

Please make sure that your Pull Request has a good description, as it will be used as cover letter.

Also, it is a good idea to review the commit messages one last time, as the Git project expects them in a quite specific form:

  • the lines should not exceed 76 columns,
  • the first line should be like a header and typically start with a prefix like "tests:" or "commit:", and
  • the commit messages' body should be describing the "why?" of the change.
  • Finally, the commit messages should end in a Signed-off-by: line matching the commits' author.

It is in general a good idea to await the automated test ("Checks") in this Pull Request before contributing the patches, e.g. to avoid trivial issues such as unportable code.

Contributing the patches

Before you can contribute the patches, your GitHub username needs to be added to the list of permitted users. Any already-permitted user can do that, by adding a comment to your PR of the form /allow. A good way to find other contributors is to locate recent pull requests where someone has been /allowed:

Both the person who commented /allow and the PR author are able to /allow you.

An alternative is the channel #git-devel on the FreeNode IRC network:

<newcontributor> I've just created my first PR, could someone please /allow me? https://github.com/gitgitgadget/git/pull/12345
<veteran> newcontributor: it is done
<newcontributor> thanks!

Once on the list of permitted usernames, you can contribute the patches to the Git mailing list by adding a PR comment /submit.

If you want to see what email(s) would be sent for a /submit request, add a PR comment /preview to have the email(s) sent to you. You must have a public GitHub email address for this.

After you submit, GitGitGadget will respond with another comment that contains the link to the cover letter mail in the Git mailing list archive. Please make sure to monitor the discussion in that thread and to address comments and suggestions (while the comments and suggestions will be mirrored into the PR by GitGitGadget, you will still want to reply via mail).

If you do not want to subscribe to the Git mailing list just to be able to respond to a mail, you can download the mbox from the Git mailing list archive (click the (raw) link), then import it into your mail program. If you use GMail, you can do this via:

curl -g --user "<EMailAddress>:<Password>" \
    --url "imaps://imap.gmail.com/INBOX" -T /path/to/raw.txt

To iterate on your change, i.e. send a revised patch or patch series, you will first want to (force-)push to the same branch. You probably also want to modify your Pull Request description (or title). It is a good idea to summarize the revision by adding something like this to the cover letter (read: by editing the first comment on the PR, i.e. the PR description):

Changes since v1:
- Fixed a typo in the commit message (found by ...)
- Added a code comment to ... as suggested by ...
...

To send a new iteration, just add another PR comment with the contents: /submit.

Need help?

New contributors who want advice are encouraged to join git-mentoring@googlegroups.com, where volunteers who regularly contribute to Git are willing to answer newbie questions, give advice, or otherwise provide mentoring to interested contributors. You must join in order to post or view messages, but anyone can join.

You may also be able to find help in real time in the developer IRC channel, #git-devel on Freenode. Remember that IRC does not support offline messaging, so if you send someone a private message and log out, they cannot respond to you. The scrollback of #git-devel is archived, though.

User chris3torek is now allowed to use GitGitGadget.

Preview email sent as pull.804.git.git.1591650208.gitgitgadget@gmail.com

/preview

Preview email sent as pull.804.git.git.1591652002.gitgitgadget@gmail.com

Submitted as pull.804.git.git.1591661021.gitgitgadget@gmail.com

"Chris Torek via GitGitGadget" gitgitgadget@gmail.com writes:

From: Chris Torek chris.torek@gmail.com

According to the documentation, "git diff" takes at most two commit-ish,
or an A..B style range, or an A...B style symmetric difference range.
The autosquash-and-exec test relied on "git diff HEAD^!", which works
fine for ordinary commits as the revision parse produces two commit-ish,
namely ^HEAD^ and HEAD.

For merge commits, however, this test makes use of an undocumented
feature:

s/undocumented feature/undefined behaviour/;

The show.sh scripts wants to compute the diff against first parent,
and it uses a range notation HEAD^! which happens to mean
HEAD^..HEAD for a single parent commit, but it forgets that the
commit it may get fed could be a merge. What the code happens to do
when given "git diff ^HEAD^2 HEAD^..HEAD" is undefined behaviour and
does not even ...

the resulting revision parse has all the parents as UNINTERESTING
followed by the HEAD commit. This looks identical to a symmetric
diff parse, which lists the merge bases as UNINTERESTING, followed by
the A (UNINTERESTING) and B revs. So the diff winds up treating it
as one, using the first oid (i.e., HEAD^) and the last (i.e., HEAD).
The documentation, however, says nothing about this usage.

...deserve to be explained in a paragraph like this, I would think.

Since diff actually just uses HEAD^ and HEAD, call for these directly
here. That makes it possible to improve the diff code's handling of
symmetric difference arguments.

Yes, the resulting code expresses the intent much better.

Signed-off-by: Chris Torek chris.torek@gmail.com

t/t3430-rebase-merges.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t3430-rebase-merges.sh b/t/t3430-rebase-merges.sh
index a1bc3e2..b454f40 100755
--- a/t/t3430-rebase-merges.sh
+++ b/t/t3430-rebase-merges.sh
@@ -420,7 +420,7 @@ test_expect_success 'with --autosquash and --exec' '
git commit --fixup B B.t &&
write_script show.sh <<-\EOF &&
subject="$(git show -s --format=%s HEAD)"
- content="$(git diff HEAD^! | tail -n 1)"
+ content="$(git diff HEAD^ HEAD | tail -n 1)"
echo "$subject: $content"
EOF
test_tick &&

"Chris Torek via GitGitGadget" gitgitgadget@gmail.com writes:

+struct symdiff {
+ struct bitmap skip; / bitmap of commit indices to skip, or NULL /
+ int warn; /
true if there were multiple merge bases /
+ int base, left, right; /
index of chosen merge base and left&right /
+};
+
+/

+ * Check for symmetric-difference arguments, and if present, arrange
+ * everything we need to know to handle them correctly.
+
+ * For an actual symmetric diff,
symdiff is set this way:
+
+ * - its skip is non-NULL and marks
all rev->pending.objects[i]
+ * indices that the caller should ignore (extra merge bases, of
+ * which there might be many, and A in A...B). Note that the
+ * chosen merge base and right side are NOT marked.
+ * - warn is set if there are multiple merge bases.
+ * - base, left, and right hold the merge base and left and
+ * right side indices, for warnings or errors.
+

+ * If there is no symmetric diff argument, sym->skip is NULL and
+ * sym->warn is cleared. The remaining fields are not set.
+
+ * If the user provides a symmetric diff with no merge base, or
+ * more than one range, we do a usage-exit.
+
/
+static void builtin_diff_symdiff(struct rev_info rev, struct symdiff sym)

The function name feels quite suboptimal. At least I thought that
by the time a call to this function returns, we would have already
produced a symmetric diff output from its name, but apparently that
is not what is being done. Calling it symdiff_prepare() may be a
vast improvement, perhaps.

+{
+ int i, lcount = 0, rcount = 0, basecount = 0;
+ int lpos = -1, rpos = -1, basepos = -1;
+ struct bitmap map = NULL;
+
+ /

+ * Use the whence fields to find merge bases and left and
+ * right parts of symmetric difference, so that we do not
+ * depend on the order that revisions are parsed. If there
+ * are any revs that aren't from these sources, we have a
+ * "git diff C A...B" or "git diff A...B C" case. Or we
+ * could even get "git diff A...B C...E", for instance.
+
+ * If we don't have just one merge base, we pick one
+ * at random.
+

+ * NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,
+ * so we must check for SYMMETRIC_LEFT too. The two arrays
+ * rev->pending.objects and rev->cmdline.rev are parallel.
+ /
+ for (i = 0; i < rev->cmdline.nr; i++) {
+ struct object
obj = rev->pending.objects[i].item;
+ switch (rev->cmdline.rev[i].whence) {
+ case REV_CMD_MERGE_BASE:
+ if (basepos < 0)
+ basepos = i;
+ basecount++;
+ break; / do mark all bases /
+ case REV_CMD_LEFT:
+ if (obj->flags & SYMMETRIC_LEFT) {
+ lpos = i;
+ lcount++;
+ break; / do mark A /
+ }
+ continue;
+ case REV_CMD_RIGHT:
+ rpos = i;
+ rcount++;

Even though, unlike lcount, you allow arbitrary number of rcount,
and rpos uses "the last one wins" semantics. Can we describe in the
comment above what use case benefits from this looseness (as opposed
to erroring out when rcount is NOT 1, like done for lcount)?

  • continue; / don't mark B /
  • default:
  • continue;
  • }
  • if (map == NULL)
  • map = bitmap_new();
  • bitmap_set(map, i);
  • }
    +
  • if (lcount == 0) { / not a symmetric difference /
  • bitmap_free(map);
  • sym->warn = 0;
  • sym->skip = NULL;
  • return;
  • }
    +
  • if (lcount != 1)
  • die(_("cannot use more than one symmetric difference"));
    +
  • if (basecount == 0) {
  • const char *lname = rev->pending.objects[lpos].name;
  • const char *rname = rev->pending.objects[rpos].name;
  • die(_("%s...%s: no merge base"), lname, rname);
  • }
  • bitmap_unset(map, basepos); / unmark the base we want /
  • sym->base = basepos;
  • sym->left = lpos;
  • sym->right = rpos;
  • sym->warn = basecount > 1;
  • sym->skip = map;
    +}
    +
    int cmd_diff(int argc, const char argv, const char *prefix)
    {
    int i;
    @@ -263,6 +361,7 @@ int cmd_diff(int argc, const char
    argv, const char prefix)
    struct object_array_entry
    blob[2];
    int nongit = 0, no_index = 0;
    int result = 0;
  • struct symdiff sdiff;

/
* We could get N tree-ish in the rev.pending_objects list.
@@ -382,6 +481,7 @@ int cmd_diff(int argc, const char
argv, const char prefix)
}
}

  • builtin_diff_symdiff(&rev, &sdiff);
    for (i = 0; i < rev.pending.nr; i++) {
    struct object_array_entry entry = &rev.pending.objects[i];
    struct object
    obj = entry->item;
    @@ -394,8 +494,9 @@ int cmd_diff(int argc, const char argv, const char prefix)
    die(_("invalid object '%s' given."), name);
    if (obj->type == OBJ_COMMIT)
    obj = &get_commit_tree(((struct commit
    )obj))->object;
    -
    if (obj->type == OBJ_TREE) {
  • if (sdiff.skip && bitmap_get(sdiff.skip, i))
  • continue;
    obj->flags |= flags;
    add_object_array(obj, name, &ent);
    } else if (obj->type == OBJ_BLOB) {
    @@ -437,24 +538,22 @@ int cmd_diff(int argc, const char *argv, const char prefix)
    usage(builtin_diff_usage);
    else if (ent.nr == 1)
    result = builtin_diff_index(&rev, argc, argv);
  • else if (ent.nr == 2)
  • else if (ent.nr == 2) {
  • if (sdiff.warn) {
  • const char *lname = rev.pending.objects[sdiff.left].name;
  • const char *rname = rev.pending.objects[sdiff.right].name;
  • const char *basename = rev.pending.objects[sdiff.base].name;
  • warning(_("%s...%s: multiple merge bases, using %s"),
  • lname, rname, basename);
  • }
    result = builtin_diff_tree(&rev, argc, argv,
    &ent.objects[0], &ent.objects[1]);
  • else if (ent.objects[0].item->flags & UNINTERESTING) {
  • /*
    • diff A...B where there is at least one merge base
    • between A and B. We have ent.objects[0] ==
    • merge-base, ent.objects[ents-2] == A, and
    • ent.objects[ents-1] == B. Show diff between the
    • base and B. Note that we pick one merge base at
    • random if there are more than one.
  • */
  • result = builtin_diff_tree(&rev, argc, argv,
  • &ent.objects[0],
  • &ent.objects[ent.nr-1]);
  • } else
  • } else {
  • if (sdiff.skip)
  • usage(builtin_diff_usage);

sdiff.skip being non-NULL means symdiff_prepare() saw one A...B that
produced two ents and the fact that we have more than two ents mean
that the command line gave us other tree-ishes, e.g. "git diff A...B C"
and it is rejected here. OK.

  result = builtin_diff_combined(&rev, argc, argv,
                     ent.objects, ent.nr);
  • }

"Chris Torek via GitGitGadget" gitgitgadget@gmail.com writes:

'git diff' [<options>] [<commit>] [--] [<path>...]
'git diff' [<options>] --cached [<commit>] [--] [<path>...]
'git diff' [<options>] <commit> <commit> [--] [<path>...]
+'git diff' [<options>] <commit>..<commit> [--] [<path>...]
+'git diff' [<options>] <commit>...<commit> [--] [<path>...]
'git diff' [<options>] <blob> <blob>
'git diff' [<options>] --no-index [--] <path> <path>

We actually are trying to wean users off of saying "diff A..B" which
is a nonsense notation, so I'd rather not to see it added here.
Describing "diff A...B" is a good idea, though.

While we have our attention on this part of the documentation, would
it make sense to also add description on invoking the combined diff
as well?

rebased onto 944d51c

3 years ago

Preview email sent as pull.804.v2.git.git.1591724505.gitgitgadget@gmail.com

Submitted as pull.804.v2.git.git.1591729224.gitgitgadget@gmail.com

This must have been sent to me by mistake.

"Chris Torek via GitGitGadget" <gitgitgadget@gmail.com> writes:

> +static void symdiff_prepare(struct rev_info *rev, struct symdiff *sym)
> +{
> + int i, lcount = 0, rcount = 0, basecount = 0;
> + int lpos = -1, rpos = -1, basepos = -1;
> + struct bitmap *map = NULL;

The logic around rcount and rpos in this function still smells
fishy.  For example, rcount is counted up from 0 but its value is
never consulted, so we should be able to get rid of it.

For that matter, lcount and lpos look somewhat redundant.  lpos
begins with -1 to signal "have not seen any left end of symmetric
range yet", and we won't allow more than two symmetric ranges
anyway, so we should be able to get rid of lcount and base the error
condition purely on lpos by

 * when we see SYMMETRIC_LEFT, check if lpos is already non-negative,
   and die otherwise right there.  We don't need "if lcount != 1, die"
   after the loop.

 * after the loop, if lpos is still -1, we know we didn't see
   symmetric difference.

> + /*
> +  * Use the whence fields to find merge bases and left and
> +  * right parts of symmetric difference, so that we do not
> +  * depend on the order that revisions are parsed.  If there
> +  * are any revs that aren't from these sources, we have a
> +  * "git diff C A...B" or "git diff A...B C" case.  Or we
> +  * could even get "git diff A...B C...E", for instance.
> +  *
> +  * If we don't have just one merge base, we pick one
> +  * at random.
> +  *
> +  * NB: REV_CMD_LEFT, REV_CMD_RIGHT are also used for A..B,
> +  * so we must check for SYMMETRIC_LEFT too.  The two arrays
> +  * rev->pending.objects and rev->cmdline.rev are parallel.
> +  */
> + for (i = 0; i < rev->cmdline.nr; i++) {
> +     struct object *obj = rev->pending.objects[i].item;
> +     switch (rev->cmdline.rev[i].whence) {
> +     case REV_CMD_MERGE_BASE:
> +         if (basepos < 0)
> +             basepos = i;
> +         basecount++;
> +         break;      /* do mark all bases */
> +     case REV_CMD_LEFT:
> +         if (obj->flags & SYMMETRIC_LEFT) {
> +             lpos = i;
> +             lcount++;
> +             break;  /* do mark A */
> +         }
> +         continue;
> +     case REV_CMD_RIGHT:
> +         rpos = i;
> +         rcount++;
> +         continue;   /* don't mark B */

It is unclear if we want to allow "git diff A..B C..D" (or
alternatively "git diff A...B C..D") and if so why.  

It appears that you are allowing both, but I am not sure if that is
a good idea.  Read a bit further below.

> +     default:
> +         continue;
> +     }
> +     if (map == NULL)
> +         map = bitmap_new();
> +     bitmap_set(map, i);
> + }
> +
> + if (lcount == 0) {  /* not a symmetric difference */
> +     bitmap_free(map);
> +     sym->warn = 0;
> +     sym->skip = NULL;
> +     return;
> + }
> +
> + if (lcount != 1)
> +     die(_("cannot use more than one symmetric difference"));
> +
> + if (basecount == 0) {
> +     const char *lname = rev->pending.objects[lpos].name;
> +     const char *rname = rev->pending.objects[rpos].name;
> +     die(_("%s...%s: no merge base"), lname, rname);

When "git diff A...B C..D" is given, what do we want to do?  A is
the only element marked with REV_CMD_LEFT, which is pointed at by
lpos and lcount gets incremented to become one.  When we see B, we
make rpos point at it, but then later, when we see D, wouldn't rpos
get updated to point at it?  What error message would we give when
we see no merge base then?  We would want to say the symdiff between
A and B has no merge bases, but wouldn't we end up mentioning D
instead of B?

> + }
> + bitmap_unset(map, basepos); /* unmark the base we want */
> + sym->base = basepos;
> + sym->left = lpos;
> + sym->right = rpos;
> + sym->warn = basecount > 1;
> + sym->skip = map;
> +}

> @@ -394,8 +494,9 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
>           die(_("invalid object '%s' given."), name);
>       if (obj->type == OBJ_COMMIT)
>           obj = &get_commit_tree(((struct commit *)obj))->object;
> -

Do not lose this blank line.  It does not make a difference to the
compiler, but it is semantically significant to human readers.

>       if (obj->type == OBJ_TREE) {
> +         if (sdiff.skip && bitmap_get(sdiff.skip, i))
> +             continue;
>           obj->flags |= flags;
>           add_object_array(obj, name, &ent);
>       } else if (obj->type == OBJ_BLOB) {

> diff --git a/t/t4068-diff-symmetric.sh b/t/t4068-diff-symmetric.sh
> new file mode 100755
> index 00000000000..7b5988933da
> --- /dev/null
> +++ b/t/t4068-diff-symmetric.sh
> @@ -0,0 +1,81 @@
> +#!/bin/sh
> +...
> +test_expect_success setup '
> + git commit --allow-empty -m A &&
> + echo b >b &&
> + git add b &&
> + git commit -m B &&
> + git checkout -b br1 HEAD^ &&
> + echo c >c &&
> + git add c &&
> + git commit -m C &&
> + git tag commit-C &&
> + git merge -m D master &&
> + git tag commit-D &&
> + git checkout master &&
> + git merge -m E commit-C &&
> + git checkout -b br2 commit-C &&
> + echo f >f &&
> + git add f &&
> + git commit -m F &&
> + git checkout br1 &&
> + git merge -m G br2 &&
> + git checkout --orphan br3 &&
> + git commit -m H
> +'
> +
> +test_expect_success 'diff with one merge base' '
> + git diff commit-D...br1 >tmp &&
> + tail -1 tmp >actual &&

Let's make sure we spell "tail -n 1" (same for "head -1" -> "head -n 1").
I know there are a handful of existing offenders, but that does not mean
it is OK to make things worse.

> + echo +f >expect &&
> + test_cmp expect actual
"Chris Torek via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Chris Torek <chris.torek@gmail.com>
>
> Update the manual page synopsis to include the three-dot notation
> and the combined-diff option

Surely.  That is "tweak ... slightly".  Full-stop is missing here,
by the way.

> Make "git diff -h" print the same usage summary as the manual
> page synopsis, minus the "A..B" form, which is now discouraged.

Good.

> Document the usage for producing combined commits.

Yup, that is "while we are at it".  The new text reads well, but it
appears that it is the more significant part of the change in this
patch now ;-)

> diff --git a/Documentation/git-diff.txt b/Documentation/git-diff.txt
> index 37781cf1755..0bce278652a 100644
> --- a/Documentation/git-diff.txt
> +++ b/Documentation/git-diff.txt
> @@ -11,15 +11,17 @@ SYNOPSIS
>  [verse]
>  'git diff' [<options>] [<commit>] [--] [<path>...]
>  'git diff' [<options>] --cached [<commit>] [--] [<path>...]
> -'git diff' [<options>] <commit> <commit> [--] [<path>...]
> +'git diff' [<options>] <commit> [<commit>...] <commit> [--] [<path>...]
> +'git diff' [<options>] <commit>...<commit> [--] [<path>...]
>  'git diff' [<options>] <blob> <blob>
>  'git diff' [<options>] --no-index [--] <path> <path>
>  
>  DESCRIPTION
>  -----------
>  Show changes between the working tree and the index or a tree, changes
> -between the index and a tree, changes between two trees, changes between
> -two blob objects, or changes between two files on disk.
> +between the index and a tree, changes between two trees, changes resulting
> +from a merge, changes between two blob objects, or changes between two
> +files on disk.
>  
>  'git diff' [<options>] [--] [<path>...]::
>  
> @@ -67,6 +69,16 @@ two blob objects, or changes between two files on disk.
>   one side is omitted, it will have the same effect as
>   using HEAD instead.
>  
> +'git diff' [<options>] <commit> [<commit>...] <commit> [--] [<path>...]::
> +
> + This form is to view the results of a merge commit.  The first
> + listed <commit> must be the merge itself; the remaining two or
> + more commits should be its parents.  A convenient way to produce
> + the desired set of revisions is to use the {caret}@ suffix, i.e.,
> + "git diff master master^@".  This is equivalent to running "git

Don't we usually use `git diff master master^@` to mark up literal
examples with tt, instead of "git diff..." with double-quotes?

> + show --format=" on the merge commit, e.g., "git show --format=
> + master".

Likewise.

But more importantly, I think giving the exact equivalent is much
less important than keeping the explanation concise, simple and
clear, and the "empty format to omit the log part" is distracting
(after all, teaching how to squelch the log message part in the
"show" command is not the topic of this manpage).

    For a merge commit `master`, this gives the same combined diff
    as `git show master` does.

perhaps?

Thanks.

rebased onto e9860da

3 years ago

Submitted as pull.804.v3.git.git.1591888511.gitgitgadget@gmail.com

On Thu, Jun 11, 2020 at 8:15 AM Chris Torek via GitGitGadget
<gitgitgadget@gmail.com> wrote:
 > +                       if (lpos > 0)

Ugh, this and the rpos test are supposed to be >= not >.  Will fix these for v4.
Otherwise seems review-able.

Also, I forgot to update the GitGitGadget "changes since",
It's changes since v2, not since v1, of course.

Chris

rebased onto 36cca09

3 years ago