Skip to content

Commit 1389d9d

Browse files
author
Junio C Hamano
committed
reflog expire --fix-stale
The logic in an earlier round to detect reflog entries that point at a broken commit was not sufficient. Just like we do not trust presense of a commit during pack transfer (we trust only our refs), we should not trust a commit's presense, even if the tree of that commit is complete. A repository that had reflog enabled on some of the refs that was rewound and then run git-repack or git-prune from older versions of git can have reflog entries that point at a commit that still exist but lack commits (or trees and blobs needed for that commit) between it and some commit that is reachable from one of the refs. This revamps the logic -- the definition of "broken commit" becomes: a commit that is not reachable from any of the refs and there is a missing object among the commit, tree, or blob objects reachable from it that is not reachable from any of the refs. Entries in the reflog that refer to such a commit are expired. Since this computation involves traversing all the reachable objects, i.e. it has the same cost as 'git prune', it is enabled only when a new option --fix-stale. Fortunately, once this is run, we should not have to ever worry about missing objects, because the current prune and pack-objects know about reflogs and protect objects referred by them. Unfortunately, this will be absolutely necessary to help people migrate to the newer prune and repack. Signed-off-by: Junio C Hamano <junkio@cox.net>
1 parent 9442147 commit 1389d9d

File tree

2 files changed

+314
-21
lines changed

2 files changed

+314
-21
lines changed

builtin-reflog.c

Lines changed: 138 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,34 @@
44
#include "refs.h"
55
#include "dir.h"
66
#include "tree-walk.h"
7+
#include "diff.h"
8+
#include "revision.h"
9+
#include "reachable.h"
10+
11+
/*
12+
* reflog expire
13+
*/
14+
15+
static const char reflog_expire_usage[] =
16+
"git-reflog expire [--verbose] [--dry-run] [--fix-stale] [--expire=<time>] [--expire-unreachable=<time>] [--all] <refs>...";
717

818
static unsigned long default_reflog_expire;
919
static unsigned long default_reflog_expire_unreachable;
1020

21+
struct cmd_reflog_expire_cb {
22+
struct rev_info revs;
23+
int dry_run;
24+
int stalefix;
25+
int verbose;
26+
unsigned long expire_total;
27+
unsigned long expire_unreachable;
28+
};
29+
1130
struct expire_reflog_cb {
1231
FILE *newlog;
1332
const char *ref;
1433
struct commit *ref_commit;
15-
unsigned long expire_total;
16-
unsigned long expire_unreachable;
34+
struct cmd_reflog_expire_cb *cmd;
1735
};
1836

1937
static int tree_is_complete(const unsigned char *sha1)
@@ -43,6 +61,83 @@ static int tree_is_complete(const unsigned char *sha1)
4361
return 1;
4462
}
4563

64+
#define INCOMPLETE (1u<<10)
65+
#define STUDYING (1u<<11)
66+
67+
static int commit_is_complete(struct commit *commit)
68+
{
69+
struct object_array study;
70+
struct object_array found;
71+
int is_incomplete = 0;
72+
int i;
73+
74+
/* early return */
75+
if (commit->object.flags & SEEN)
76+
return 1;
77+
if (commit->object.flags & INCOMPLETE)
78+
return 0;
79+
/*
80+
* Find all commits that are reachable and are not marked as
81+
* SEEN. Then make sure the trees and blobs contained are
82+
* complete. After that, mark these commits also as SEEN.
83+
* If some of the objects that are needed to complete this
84+
* commit are missing, mark this commit as INCOMPLETE.
85+
*/
86+
memset(&study, 0, sizeof(study));
87+
memset(&found, 0, sizeof(found));
88+
add_object_array(&commit->object, NULL, &study);
89+
add_object_array(&commit->object, NULL, &found);
90+
commit->object.flags |= STUDYING;
91+
while (study.nr) {
92+
struct commit *c;
93+
struct commit_list *parent;
94+
95+
c = (struct commit *)study.objects[--study.nr].item;
96+
if (!c->object.parsed && !parse_object(c->object.sha1))
97+
c->object.flags |= INCOMPLETE;
98+
99+
if (c->object.flags & INCOMPLETE) {
100+
is_incomplete = 1;
101+
break;
102+
}
103+
else if (c->object.flags & SEEN)
104+
continue;
105+
for (parent = c->parents; parent; parent = parent->next) {
106+
struct commit *p = parent->item;
107+
if (p->object.flags & STUDYING)
108+
continue;
109+
p->object.flags |= STUDYING;
110+
add_object_array(&p->object, NULL, &study);
111+
add_object_array(&p->object, NULL, &found);
112+
}
113+
}
114+
if (!is_incomplete) {
115+
/* make sure all commits in found have all the
116+
* necessary objects.
117+
*/
118+
for (i = 0; !is_incomplete && i < found.nr; i++) {
119+
struct commit *c =
120+
(struct commit *)found.objects[i].item;
121+
if (!tree_is_complete(c->tree->object.sha1))
122+
is_incomplete = 1;
123+
}
124+
if (!is_incomplete) {
125+
/* mark all found commits as complete, iow SEEN */
126+
for (i = 0; i < found.nr; i++)
127+
found.objects[i].item->flags |= SEEN;
128+
}
129+
}
130+
/* clear flags from the objects we traversed */
131+
for (i = 0; i < found.nr; i++)
132+
found.objects[i].item->flags &= ~STUDYING;
133+
if (is_incomplete)
134+
commit->object.flags |= INCOMPLETE;
135+
/* free object arrays */
136+
free(study.objects);
137+
free(found.objects);
138+
return !is_incomplete;
139+
}
140+
46141
static int keep_entry(struct commit **it, unsigned char *sha1)
47142
{
48143
struct commit *commit;
@@ -54,9 +149,15 @@ static int keep_entry(struct commit **it, unsigned char *sha1)
54149
if (!commit)
55150
return 0;
56151

57-
/* Make sure everything in this commit exists. */
58-
parse_object(commit->object.sha1);
59-
if (!tree_is_complete(commit->tree->object.sha1))
152+
/*
153+
* Make sure everything in this commit exists.
154+
*
155+
* We have walked all the objects reachable from the refs
156+
* and cache earlier. The commits reachable by this commit
157+
* must meet SEEN commits -- and then we should mark them as
158+
* SEEN as well.
159+
*/
160+
if (!commit_is_complete(commit))
60161
return 0;
61162
*it = commit;
62163
return 1;
@@ -76,13 +177,14 @@ static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
76177
timestamp = strtoul(cp, &ep, 10);
77178
if (*ep != ' ')
78179
goto prune;
79-
if (timestamp < cb->expire_total)
180+
if (timestamp < cb->cmd->expire_total)
80181
goto prune;
81182

82-
if (!keep_entry(&old, osha1) || !keep_entry(&new, nsha1))
183+
if (cb->cmd->stalefix &&
184+
(!keep_entry(&old, osha1) || !keep_entry(&new, nsha1)))
83185
goto prune;
84186

85-
if ((timestamp < cb->expire_unreachable) &&
187+
if ((timestamp < cb->cmd->expire_unreachable) &&
86188
(!cb->ref_commit ||
87189
(old && !in_merge_bases(old, cb->ref_commit)) ||
88190
(new && !in_merge_bases(new, cb->ref_commit))))
@@ -91,19 +193,15 @@ static int expire_reflog_ent(unsigned char *osha1, unsigned char *nsha1,
91193
if (cb->newlog)
92194
fprintf(cb->newlog, "%s %s %s",
93195
sha1_to_hex(osha1), sha1_to_hex(nsha1), data);
196+
if (cb->cmd->verbose)
197+
printf("keep %s", data);
94198
return 0;
95199
prune:
96-
if (!cb->newlog)
97-
fprintf(stderr, "would prune %s", data);
200+
if (!cb->newlog || cb->cmd->verbose)
201+
printf("%sprune %s", cb->newlog ? "" : "would ", data);
98202
return 0;
99203
}
100204

101-
struct cmd_reflog_expire_cb {
102-
int dry_run;
103-
unsigned long expire_total;
104-
unsigned long expire_unreachable;
105-
};
106-
107205
static int expire_reflog(const char *ref, const unsigned char *sha1, int unused, void *cb_data)
108206
{
109207
struct cmd_reflog_expire_cb *cmd = cb_data;
@@ -134,8 +232,7 @@ static int expire_reflog(const char *ref, const unsigned char *sha1, int unused,
134232
fprintf(stderr,
135233
"warning: ref '%s' does not point at a commit\n", ref);
136234
cb.ref = ref;
137-
cb.expire_total = cmd->expire_total;
138-
cb.expire_unreachable = cmd->expire_unreachable;
235+
cb.cmd = cmd;
139236
for_each_reflog_ent(ref, expire_reflog_ent, &cb);
140237
finish:
141238
if (cb.newlog) {
@@ -164,9 +261,6 @@ static int reflog_expire_config(const char *var, const char *value)
164261
return 0;
165262
}
166263

167-
static const char reflog_expire_usage[] =
168-
"git-reflog expire [--dry-run] [--expire=<time>] [--expire-unreachable=<time>] [--all] <refs>...";
169-
170264
static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
171265
{
172266
struct cmd_reflog_expire_cb cb;
@@ -186,6 +280,12 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
186280
cb.expire_total = default_reflog_expire;
187281
cb.expire_unreachable = default_reflog_expire_unreachable;
188282

283+
/*
284+
* We can trust the commits and objects reachable from refs
285+
* even in older repository. We cannot trust what's reachable
286+
* from reflog if the repository was pruned with older git.
287+
*/
288+
189289
for (i = 1; i < argc; i++) {
190290
const char *arg = argv[i];
191291
if (!strcmp(arg, "--dry-run") || !strcmp(arg, "-n"))
@@ -194,8 +294,12 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
194294
cb.expire_total = approxidate(arg + 9);
195295
else if (!strncmp(arg, "--expire-unreachable=", 21))
196296
cb.expire_unreachable = approxidate(arg + 21);
297+
else if (!strcmp(arg, "--stale-fix"))
298+
cb.stalefix = 1;
197299
else if (!strcmp(arg, "--all"))
198300
do_all = 1;
301+
else if (!strcmp(arg, "--verbose"))
302+
cb.verbose = 1;
199303
else if (!strcmp(arg, "--")) {
200304
i++;
201305
break;
@@ -205,6 +309,15 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
205309
else
206310
break;
207311
}
312+
if (cb.stalefix) {
313+
init_revisions(&cb.revs, prefix);
314+
if (cb.verbose)
315+
printf("Marking reachable objects...");
316+
mark_reachable_objects(&cb.revs, 0);
317+
if (cb.verbose)
318+
putchar('\n');
319+
}
320+
208321
if (do_all)
209322
status |= for_each_ref(expire_reflog, &cb);
210323
while (i < argc) {
@@ -219,6 +332,10 @@ static int cmd_reflog_expire(int argc, const char **argv, const char *prefix)
219332
return status;
220333
}
221334

335+
/*
336+
* main "reflog"
337+
*/
338+
222339
static const char reflog_usage[] =
223340
"git-reflog (expire | ...)";
224341

0 commit comments

Comments
 (0)