Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 1 addition & 81 deletions pre_commit/commands/gc.py
Original file line number Diff line number Diff line change
@@ -1,89 +1,9 @@
from __future__ import annotations

import os.path
from typing import Any

import pre_commit.constants as C
from pre_commit import output
from pre_commit.clientlib import InvalidConfigError
from pre_commit.clientlib import InvalidManifestError
from pre_commit.clientlib import load_config
from pre_commit.clientlib import load_manifest
from pre_commit.clientlib import LOCAL
from pre_commit.clientlib import META
from pre_commit.store import Store


def _mark_used_repos(
store: Store,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == META:
return
elif repo['repo'] == LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return

try:
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
except InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}

for hook in repo['hooks']:
if hook['id'] not in by_id:
continue

deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), repo['rev'],
))


def _gc_repos(store: Store) -> int:
configs = store.select_all_configs()
repos = store.select_all_repos()

# delete config paths which do not exist
dead_configs = [p for p in configs if not os.path.exists(p)]
live_configs = [p for p in configs if os.path.exists(p)]

all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
for config_path in live_configs:
try:
config = load_config(config_path)
except InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
_mark_used_repos(store, all_repos, unused_repos, repo)

store.delete_configs(dead_configs)
for db_repo_name, ref in unused_repos:
store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)])
return len(unused_repos)


def gc(store: Store) -> int:
with store.exclusive_lock():
repos_removed = _gc_repos(store)
output.write_line(f'{repos_removed} repo(s) removed.')
output.write_line(f'{store.gc()} repo(s) removed.')
return 0
96 changes: 76 additions & 20 deletions pre_commit/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Sequence
from typing import Any

import pre_commit.constants as C
from pre_commit import clientlib
Expand Down Expand Up @@ -96,7 +97,7 @@ def __init__(self, directory: str | None = None) -> None:
' PRIMARY KEY (repo, ref)'
');',
)
self._create_config_table(db)
self._create_configs_table(db)

# Atomic file move
os.replace(tmpfile, self.db_path)
Expand Down Expand Up @@ -215,7 +216,7 @@ def make_local(self, deps: Sequence[str]) -> str:
'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
)

def _create_config_table(self, db: sqlite3.Connection) -> None:
def _create_configs_table(self, db: sqlite3.Connection) -> None:
db.executescript(
'CREATE TABLE IF NOT EXISTS configs ('
' path TEXT NOT NULL,'
Expand All @@ -232,28 +233,83 @@ def mark_config_used(self, path: str) -> None:
return
with self.connect() as db:
# TODO: eventually remove this and only create in _create
self._create_config_table(db)
self._create_configs_table(db)
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))

def select_all_configs(self) -> list[str]:
with self.connect() as db:
self._create_config_table(db)
rows = db.execute('SELECT path FROM configs').fetchall()
return [path for path, in rows]
def _mark_used_repos(
self,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == clientlib.META:
return
elif repo['repo'] == clientlib.LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
self.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return

def delete_configs(self, configs: list[str]) -> None:
with self.connect() as db:
rows = [(path,) for path in configs]
db.executemany('DELETE FROM configs WHERE path = ?', rows)
try:
manifest = clientlib.load_manifest(
os.path.join(path, C.MANIFEST_FILE),
)
except clientlib.InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}

def select_all_repos(self) -> list[tuple[str, str, str]]:
with self.connect() as db:
return db.execute('SELECT repo, ref, path from repos').fetchall()
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue

def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None:
with self.connect() as db:
db.execute(
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
self.db_repo_name(repo['repo'], deps), repo['rev'],
))

def gc(self) -> int:
with self.exclusive_lock(), self.connect() as db:
self._create_configs_table(db)

repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)

configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]

dead_configs = []
for config_path in configs:
try:
config = clientlib.load_config(config_path)
except clientlib.InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
self._mark_used_repos(all_repos, unused_repos, repo)

paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)

db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
(db_repo_name, ref),
sorted(unused_repos),
)
rmtree(path)
for k in unused_repos:
rmtree(all_repos[k])

return len(unused_repos)
9 changes: 6 additions & 3 deletions tests/commands/gc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@


def _repo_count(store):
return len(store.select_all_repos())
with store.connect() as db:
return db.execute('SELECT COUNT(1) FROM repos').fetchone()[0]


def _config_count(store):
return len(store.select_all_configs())
with store.connect() as db:
return db.execute('SELECT COUNT(1) FROM configs').fetchone()[0]


def _remove_config_assert_cleared(store, cap_out):
Expand Down Expand Up @@ -153,7 +155,8 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
install_hooks(C.CONFIG_FILE, store)

# we'll "break" the manifest to simulate an old version clone
(_, _, path), = store.select_all_repos()
with store.connect() as db:
path, = db.execute('SELECT path FROM repos').fetchone()
os.remove(os.path.join(path, C.MANIFEST_FILE))

assert _config_count(store) == 1
Expand Down
25 changes: 18 additions & 7 deletions tests/store_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,17 @@
from testing.util import xfailif_windows


def _select_all_configs(store: Store) -> list[str]:
with store.connect() as db:
rows = db.execute('SELECT * FROM configs').fetchall()
return [path for path, in rows]


def _select_all_repos(store: Store) -> list[tuple[str, str, str]]:
with store.connect() as db:
return db.execute('SELECT repo, ref, path FROM repos').fetchall()


def test_our_session_fixture_works():
"""There's a session fixture which makes `Store` invariantly raise to
prevent writing to the home directory.
Expand Down Expand Up @@ -91,7 +102,7 @@ def test_clone(store, tempdir_factory, caplog):
assert git.head_rev(ret) == rev

# Assert there's an entry in the sqlite db for this
assert store.select_all_repos() == [(path, rev, ret)]
assert _select_all_repos(store) == [(path, rev, ret)]


def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog):
Expand Down Expand Up @@ -217,7 +228,7 @@ def fake_shallow_clone(self, *args, **kwargs):
assert git.head_rev(ret) == rev

# Assert there's an entry in the sqlite db for this
assert store.select_all_repos() == [(path, rev, ret)]
assert _select_all_repos(store) == [(path, rev, ret)]


def test_clone_tag_not_on_mainline(store, tempdir_factory):
Expand Down Expand Up @@ -265,7 +276,7 @@ def test_mark_config_as_used(store, tmpdir):
with tmpdir.as_cwd():
f = tmpdir.join('f').ensure()
store.mark_config_used('f')
assert store.select_all_configs() == [f.strpath]
assert _select_all_configs(store) == [f.strpath]


def test_mark_config_as_used_idempotent(store, tmpdir):
Expand All @@ -275,17 +286,17 @@ def test_mark_config_as_used_idempotent(store, tmpdir):

def test_mark_config_as_used_does_not_exist(store):
store.mark_config_used('f')
assert store.select_all_configs() == []
assert _select_all_configs(store) == []


def _simulate_pre_1_14_0(store):
with store.connect() as db:
db.executescript('DROP TABLE configs')


def test_select_all_configs_roll_forward(store):
def test_gc_roll_forward(store):
_simulate_pre_1_14_0(store)
assert store.select_all_configs() == []
assert store.gc() == 0


def test_mark_config_as_used_roll_forward(store, tmpdir):
Expand Down Expand Up @@ -314,7 +325,7 @@ def _chmod_minus_w(p):
assert store.readonly
# should be skipped due to readonly
store.mark_config_used(str(cfg))
assert store.select_all_configs() == []
assert _select_all_configs(store) == []


def test_clone_with_recursive_submodules(store, tmp_path):
Expand Down