Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 90 additions & 1 deletion pre_commit/commands/gc.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,98 @@
from __future__ import annotations

import os.path
from typing import Any

import pre_commit.constants as C
from pre_commit import output
from pre_commit.clientlib import InvalidConfigError
from pre_commit.clientlib import InvalidManifestError
from pre_commit.clientlib import load_config
from pre_commit.clientlib import load_manifest
from pre_commit.clientlib import LOCAL
from pre_commit.clientlib import META
from pre_commit.store import Store
from pre_commit.util import rmtree


def _mark_used_repos(
store: Store,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == META:
return
elif repo['repo'] == LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
store.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return

try:
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
except InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}

for hook in repo['hooks']:
if hook['id'] not in by_id:
continue

deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), repo['rev'],
))


def _gc(store: Store) -> int:
with store.exclusive_lock(), store.connect() as db:
store._create_configs_table(db)

repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)

configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]

dead_configs = []
for config_path in configs:
try:
config = load_config(config_path)
except InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
_mark_used_repos(store, all_repos, unused_repos, repo)

paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)

db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
sorted(unused_repos),
)
for k in unused_repos:
rmtree(all_repos[k])

return len(unused_repos)


def gc(store: Store) -> int:
output.write_line(f'{store.gc()} repo(s) removed.')
output.write_line(f'{_gc(store)} repo(s) removed.')
return 0
80 changes: 0 additions & 80 deletions pre_commit/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Sequence
from typing import Any

import pre_commit.constants as C
from pre_commit import clientlib
Expand All @@ -18,7 +17,6 @@
from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output_b
from pre_commit.util import resource_text
from pre_commit.util import rmtree


logger = logging.getLogger('pre_commit')
Expand Down Expand Up @@ -235,81 +233,3 @@ def mark_config_used(self, path: str) -> None:
# TODO: eventually remove this and only create in _create
self._create_configs_table(db)
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))

def _mark_used_repos(
self,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == clientlib.META:
return
elif repo['repo'] == clientlib.LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
self.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return

try:
manifest = clientlib.load_manifest(
os.path.join(path, C.MANIFEST_FILE),
)
except clientlib.InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}

for hook in repo['hooks']:
if hook['id'] not in by_id:
continue

deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
self.db_repo_name(repo['repo'], deps), repo['rev'],
))

def gc(self) -> int:
with self.exclusive_lock(), self.connect() as db:
self._create_configs_table(db)

repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)

configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]

dead_configs = []
for config_path in configs:
try:
config = clientlib.load_config(config_path)
except clientlib.InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
self._mark_used_repos(all_repos, unused_repos, repo)

paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)

db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
sorted(unused_repos),
)
for k in unused_repos:
rmtree(all_repos[k])

return len(unused_repos)
8 changes: 8 additions & 0 deletions tests/commands/gc_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,3 +165,11 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
assert _config_count(store) == 1
assert _repo_count(store) == 0
assert cap_out.get().splitlines()[-1] == '1 repo(s) removed.'


def test_gc_pre_1_14_roll_forward(store, cap_out):
with store.connect() as db: # simulate pre-1.14.0
db.executescript('DROP TABLE configs')

assert not gc(store)
assert cap_out.get() == '0 repo(s) removed.\n'
13 changes: 2 additions & 11 deletions tests/store_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,18 +289,9 @@ def test_mark_config_as_used_does_not_exist(store):
assert _select_all_configs(store) == []


def _simulate_pre_1_14_0(store):
with store.connect() as db:
db.executescript('DROP TABLE configs')


def test_gc_roll_forward(store):
_simulate_pre_1_14_0(store)
assert store.gc() == 0


def test_mark_config_as_used_roll_forward(store, tmpdir):
_simulate_pre_1_14_0(store)
with store.connect() as db: # simulate pre-1.14.0
db.executescript('DROP TABLE configs')
test_mark_config_as_used(store, tmpdir)


Expand Down