Skip to content

Commit 49e28ee

Browse files
authored
Merge pull request #3578 from pre-commit/store-gc-refactor
refactor gc into store
2 parents 17cf886 + d5c273a commit 49e28ee

File tree

4 files changed

+101
-111
lines changed

4 files changed

+101
-111
lines changed

pre_commit/commands/gc.py

Lines changed: 1 addition & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,89 +1,9 @@
11
from __future__ import annotations
22

3-
import os.path
4-
from typing import Any
5-
6-
import pre_commit.constants as C
73
from pre_commit import output
8-
from pre_commit.clientlib import InvalidConfigError
9-
from pre_commit.clientlib import InvalidManifestError
10-
from pre_commit.clientlib import load_config
11-
from pre_commit.clientlib import load_manifest
12-
from pre_commit.clientlib import LOCAL
13-
from pre_commit.clientlib import META
144
from pre_commit.store import Store
155

166

17-
def _mark_used_repos(
18-
store: Store,
19-
all_repos: dict[tuple[str, str], str],
20-
unused_repos: set[tuple[str, str]],
21-
repo: dict[str, Any],
22-
) -> None:
23-
if repo['repo'] == META:
24-
return
25-
elif repo['repo'] == LOCAL:
26-
for hook in repo['hooks']:
27-
deps = hook.get('additional_dependencies')
28-
unused_repos.discard((
29-
store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION,
30-
))
31-
else:
32-
key = (repo['repo'], repo['rev'])
33-
path = all_repos.get(key)
34-
# can't inspect manifest if it isn't cloned
35-
if path is None:
36-
return
37-
38-
try:
39-
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
40-
except InvalidManifestError:
41-
return
42-
else:
43-
unused_repos.discard(key)
44-
by_id = {hook['id']: hook for hook in manifest}
45-
46-
for hook in repo['hooks']:
47-
if hook['id'] not in by_id:
48-
continue
49-
50-
deps = hook.get(
51-
'additional_dependencies',
52-
by_id[hook['id']]['additional_dependencies'],
53-
)
54-
unused_repos.discard((
55-
store.db_repo_name(repo['repo'], deps), repo['rev'],
56-
))
57-
58-
59-
def _gc_repos(store: Store) -> int:
60-
configs = store.select_all_configs()
61-
repos = store.select_all_repos()
62-
63-
# delete config paths which do not exist
64-
dead_configs = [p for p in configs if not os.path.exists(p)]
65-
live_configs = [p for p in configs if os.path.exists(p)]
66-
67-
all_repos = {(repo, ref): path for repo, ref, path in repos}
68-
unused_repos = set(all_repos)
69-
for config_path in live_configs:
70-
try:
71-
config = load_config(config_path)
72-
except InvalidConfigError:
73-
dead_configs.append(config_path)
74-
continue
75-
else:
76-
for repo in config['repos']:
77-
_mark_used_repos(store, all_repos, unused_repos, repo)
78-
79-
store.delete_configs(dead_configs)
80-
for db_repo_name, ref in unused_repos:
81-
store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)])
82-
return len(unused_repos)
83-
84-
857
def gc(store: Store) -> int:
86-
with store.exclusive_lock():
87-
repos_removed = _gc_repos(store)
88-
output.write_line(f'{repos_removed} repo(s) removed.')
8+
output.write_line(f'{store.gc()} repo(s) removed.')
899
return 0

pre_commit/store.py

Lines changed: 76 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from collections.abc import Callable
99
from collections.abc import Generator
1010
from collections.abc import Sequence
11+
from typing import Any
1112

1213
import pre_commit.constants as C
1314
from pre_commit import clientlib
@@ -96,7 +97,7 @@ def __init__(self, directory: str | None = None) -> None:
9697
' PRIMARY KEY (repo, ref)'
9798
');',
9899
)
99-
self._create_config_table(db)
100+
self._create_configs_table(db)
100101

101102
# Atomic file move
102103
os.replace(tmpfile, self.db_path)
@@ -215,7 +216,7 @@ def make_local(self, deps: Sequence[str]) -> str:
215216
'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
216217
)
217218

218-
def _create_config_table(self, db: sqlite3.Connection) -> None:
219+
def _create_configs_table(self, db: sqlite3.Connection) -> None:
219220
db.executescript(
220221
'CREATE TABLE IF NOT EXISTS configs ('
221222
' path TEXT NOT NULL,'
@@ -232,28 +233,83 @@ def mark_config_used(self, path: str) -> None:
232233
return
233234
with self.connect() as db:
234235
# TODO: eventually remove this and only create in _create
235-
self._create_config_table(db)
236+
self._create_configs_table(db)
236237
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
237238

238-
def select_all_configs(self) -> list[str]:
239-
with self.connect() as db:
240-
self._create_config_table(db)
241-
rows = db.execute('SELECT path FROM configs').fetchall()
242-
return [path for path, in rows]
239+
def _mark_used_repos(
240+
self,
241+
all_repos: dict[tuple[str, str], str],
242+
unused_repos: set[tuple[str, str]],
243+
repo: dict[str, Any],
244+
) -> None:
245+
if repo['repo'] == clientlib.META:
246+
return
247+
elif repo['repo'] == clientlib.LOCAL:
248+
for hook in repo['hooks']:
249+
deps = hook.get('additional_dependencies')
250+
unused_repos.discard((
251+
self.db_repo_name(repo['repo'], deps),
252+
C.LOCAL_REPO_VERSION,
253+
))
254+
else:
255+
key = (repo['repo'], repo['rev'])
256+
path = all_repos.get(key)
257+
# can't inspect manifest if it isn't cloned
258+
if path is None:
259+
return
243260

244-
def delete_configs(self, configs: list[str]) -> None:
245-
with self.connect() as db:
246-
rows = [(path,) for path in configs]
247-
db.executemany('DELETE FROM configs WHERE path = ?', rows)
261+
try:
262+
manifest = clientlib.load_manifest(
263+
os.path.join(path, C.MANIFEST_FILE),
264+
)
265+
except clientlib.InvalidManifestError:
266+
return
267+
else:
268+
unused_repos.discard(key)
269+
by_id = {hook['id']: hook for hook in manifest}
248270

249-
def select_all_repos(self) -> list[tuple[str, str, str]]:
250-
with self.connect() as db:
251-
return db.execute('SELECT repo, ref, path from repos').fetchall()
271+
for hook in repo['hooks']:
272+
if hook['id'] not in by_id:
273+
continue
252274

253-
def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None:
254-
with self.connect() as db:
255-
db.execute(
275+
deps = hook.get(
276+
'additional_dependencies',
277+
by_id[hook['id']]['additional_dependencies'],
278+
)
279+
unused_repos.discard((
280+
self.db_repo_name(repo['repo'], deps), repo['rev'],
281+
))
282+
283+
def gc(self) -> int:
284+
with self.exclusive_lock(), self.connect() as db:
285+
self._create_configs_table(db)
286+
287+
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
288+
all_repos = {(repo, ref): path for repo, ref, path in repos}
289+
unused_repos = set(all_repos)
290+
291+
configs_rows = db.execute('SELECT path FROM configs').fetchall()
292+
configs = [path for path, in configs_rows]
293+
294+
dead_configs = []
295+
for config_path in configs:
296+
try:
297+
config = clientlib.load_config(config_path)
298+
except clientlib.InvalidConfigError:
299+
dead_configs.append(config_path)
300+
continue
301+
else:
302+
for repo in config['repos']:
303+
self._mark_used_repos(all_repos, unused_repos, repo)
304+
305+
paths = [(path,) for path in dead_configs]
306+
db.executemany('DELETE FROM configs WHERE path = ?', paths)
307+
308+
db.executemany(
256309
'DELETE FROM repos WHERE repo = ? and ref = ?',
257-
(db_repo_name, ref),
310+
sorted(unused_repos),
258311
)
259-
rmtree(path)
312+
for k in unused_repos:
313+
rmtree(all_repos[k])
314+
315+
return len(unused_repos)

tests/commands/gc_test.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020

2121
def _repo_count(store):
22-
return len(store.select_all_repos())
22+
with store.connect() as db:
23+
return db.execute('SELECT COUNT(1) FROM repos').fetchone()[0]
2324

2425

2526
def _config_count(store):
26-
return len(store.select_all_configs())
27+
with store.connect() as db:
28+
return db.execute('SELECT COUNT(1) FROM configs').fetchone()[0]
2729

2830

2931
def _remove_config_assert_cleared(store, cap_out):
@@ -153,7 +155,8 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
153155
install_hooks(C.CONFIG_FILE, store)
154156

155157
# we'll "break" the manifest to simulate an old version clone
156-
(_, _, path), = store.select_all_repos()
158+
with store.connect() as db:
159+
path, = db.execute('SELECT path FROM repos').fetchone()
157160
os.remove(os.path.join(path, C.MANIFEST_FILE))
158161

159162
assert _config_count(store) == 1

tests/store_test.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@
2222
from testing.util import xfailif_windows
2323

2424

25+
def _select_all_configs(store: Store) -> list[str]:
26+
with store.connect() as db:
27+
rows = db.execute('SELECT * FROM configs').fetchall()
28+
return [path for path, in rows]
29+
30+
31+
def _select_all_repos(store: Store) -> list[tuple[str, str, str]]:
32+
with store.connect() as db:
33+
return db.execute('SELECT repo, ref, path FROM repos').fetchall()
34+
35+
2536
def test_our_session_fixture_works():
2637
"""There's a session fixture which makes `Store` invariantly raise to
2738
prevent writing to the home directory.
@@ -91,7 +102,7 @@ def test_clone(store, tempdir_factory, caplog):
91102
assert git.head_rev(ret) == rev
92103

93104
# Assert there's an entry in the sqlite db for this
94-
assert store.select_all_repos() == [(path, rev, ret)]
105+
assert _select_all_repos(store) == [(path, rev, ret)]
95106

96107

97108
def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog):
@@ -217,7 +228,7 @@ def fake_shallow_clone(self, *args, **kwargs):
217228
assert git.head_rev(ret) == rev
218229

219230
# Assert there's an entry in the sqlite db for this
220-
assert store.select_all_repos() == [(path, rev, ret)]
231+
assert _select_all_repos(store) == [(path, rev, ret)]
221232

222233

223234
def test_clone_tag_not_on_mainline(store, tempdir_factory):
@@ -265,7 +276,7 @@ def test_mark_config_as_used(store, tmpdir):
265276
with tmpdir.as_cwd():
266277
f = tmpdir.join('f').ensure()
267278
store.mark_config_used('f')
268-
assert store.select_all_configs() == [f.strpath]
279+
assert _select_all_configs(store) == [f.strpath]
269280

270281

271282
def test_mark_config_as_used_idempotent(store, tmpdir):
@@ -275,17 +286,17 @@ def test_mark_config_as_used_idempotent(store, tmpdir):
275286

276287
def test_mark_config_as_used_does_not_exist(store):
277288
store.mark_config_used('f')
278-
assert store.select_all_configs() == []
289+
assert _select_all_configs(store) == []
279290

280291

281292
def _simulate_pre_1_14_0(store):
282293
with store.connect() as db:
283294
db.executescript('DROP TABLE configs')
284295

285296

286-
def test_select_all_configs_roll_forward(store):
297+
def test_gc_roll_forward(store):
287298
_simulate_pre_1_14_0(store)
288-
assert store.select_all_configs() == []
299+
assert store.gc() == 0
289300

290301

291302
def test_mark_config_as_used_roll_forward(store, tmpdir):
@@ -314,7 +325,7 @@ def _chmod_minus_w(p):
314325
assert store.readonly
315326
# should be skipped due to readonly
316327
store.mark_config_used(str(cfg))
317-
assert store.select_all_configs() == []
328+
assert _select_all_configs(store) == []
318329

319330

320331
def test_clone_with_recursive_submodules(store, tmp_path):

0 commit comments

Comments
 (0)