move logic for gc back to commands.gc

This commit is contained in:
Anthony Sottile 2025-11-19 14:29:50 -05:00
parent 1b32c50bc7
commit 66278a9a0b
4 changed files with 100 additions and 92 deletions

View file

@ -1,9 +1,98 @@
from __future__ import annotations from __future__ import annotations
import os.path
from typing import Any
import pre_commit.constants as C
from pre_commit import output from pre_commit import output
from pre_commit.clientlib import InvalidConfigError
from pre_commit.clientlib import InvalidManifestError
from pre_commit.clientlib import load_config
from pre_commit.clientlib import load_manifest
from pre_commit.clientlib import LOCAL
from pre_commit.clientlib import META
from pre_commit.store import Store from pre_commit.store import Store
from pre_commit.util import rmtree
def _mark_used_repos(
store: Store,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == META:
return
elif repo['repo'] == LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
store.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return
try:
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
except InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), repo['rev'],
))
def _gc(store: Store) -> int:
with store.exclusive_lock(), store.connect() as db:
store._create_configs_table(db)
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]
dead_configs = []
for config_path in configs:
try:
config = load_config(config_path)
except InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
_mark_used_repos(store, all_repos, unused_repos, repo)
paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)
db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
sorted(unused_repos),
)
for k in unused_repos:
rmtree(all_repos[k])
return len(unused_repos)
def gc(store: Store) -> int: def gc(store: Store) -> int:
output.write_line(f'{store.gc()} repo(s) removed.') output.write_line(f'{_gc(store)} repo(s) removed.')
return 0 return 0

View file

@ -8,7 +8,6 @@ import tempfile
from collections.abc import Callable from collections.abc import Callable
from collections.abc import Generator from collections.abc import Generator
from collections.abc import Sequence from collections.abc import Sequence
from typing import Any
import pre_commit.constants as C import pre_commit.constants as C
from pre_commit import clientlib from pre_commit import clientlib
@ -18,7 +17,6 @@ from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output_b from pre_commit.util import cmd_output_b
from pre_commit.util import resource_text from pre_commit.util import resource_text
from pre_commit.util import rmtree
logger = logging.getLogger('pre_commit') logger = logging.getLogger('pre_commit')
@ -235,81 +233,3 @@ class Store:
# TODO: eventually remove this and only create in _create # TODO: eventually remove this and only create in _create
self._create_configs_table(db) self._create_configs_table(db)
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
def _mark_used_repos(
self,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == clientlib.META:
return
elif repo['repo'] == clientlib.LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
self.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return
try:
manifest = clientlib.load_manifest(
os.path.join(path, C.MANIFEST_FILE),
)
except clientlib.InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
self.db_repo_name(repo['repo'], deps), repo['rev'],
))
def gc(self) -> int:
with self.exclusive_lock(), self.connect() as db:
self._create_configs_table(db)
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]
dead_configs = []
for config_path in configs:
try:
config = clientlib.load_config(config_path)
except clientlib.InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
self._mark_used_repos(all_repos, unused_repos, repo)
paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)
db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
sorted(unused_repos),
)
for k in unused_repos:
rmtree(all_repos[k])
return len(unused_repos)

View file

@ -165,3 +165,11 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
assert _config_count(store) == 1 assert _config_count(store) == 1
assert _repo_count(store) == 0 assert _repo_count(store) == 0
assert cap_out.get().splitlines()[-1] == '1 repo(s) removed.' assert cap_out.get().splitlines()[-1] == '1 repo(s) removed.'
def test_gc_pre_1_14_roll_forward(store, cap_out):
with store.connect() as db: # simulate pre-1.14.0
db.executescript('DROP TABLE configs')
assert not gc(store)
assert cap_out.get() == '0 repo(s) removed.\n'

View file

@ -289,18 +289,9 @@ def test_mark_config_as_used_does_not_exist(store):
assert _select_all_configs(store) == [] assert _select_all_configs(store) == []
def _simulate_pre_1_14_0(store):
with store.connect() as db:
db.executescript('DROP TABLE configs')
def test_gc_roll_forward(store):
_simulate_pre_1_14_0(store)
assert store.gc() == 0
def test_mark_config_as_used_roll_forward(store, tmpdir): def test_mark_config_as_used_roll_forward(store, tmpdir):
_simulate_pre_1_14_0(store) with store.connect() as db: # simulate pre-1.14.0
db.executescript('DROP TABLE configs')
test_mark_config_as_used(store, tmpdir) test_mark_config_as_used(store, tmpdir)