move logic for gc back to commands.gc

This commit is contained in:
Anthony Sottile 2025-11-19 14:29:50 -05:00
parent 1b32c50bc7
commit 66278a9a0b
4 changed files with 100 additions and 92 deletions

View file

@ -1,9 +1,98 @@
from __future__ import annotations
import os.path
from typing import Any
import pre_commit.constants as C
from pre_commit import output
from pre_commit.clientlib import InvalidConfigError
from pre_commit.clientlib import InvalidManifestError
from pre_commit.clientlib import load_config
from pre_commit.clientlib import load_manifest
from pre_commit.clientlib import LOCAL
from pre_commit.clientlib import META
from pre_commit.store import Store
from pre_commit.util import rmtree
def _mark_used_repos(
store: Store,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == META:
return
elif repo['repo'] == LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
store.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return
try:
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
except InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), repo['rev'],
))
def _gc(store: Store) -> int:
with store.exclusive_lock(), store.connect() as db:
store._create_configs_table(db)
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]
dead_configs = []
for config_path in configs:
try:
config = load_config(config_path)
except InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
_mark_used_repos(store, all_repos, unused_repos, repo)
paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)
db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
sorted(unused_repos),
)
for k in unused_repos:
rmtree(all_repos[k])
return len(unused_repos)
def gc(store: Store) -> int:
output.write_line(f'{store.gc()} repo(s) removed.')
output.write_line(f'{_gc(store)} repo(s) removed.')
return 0

View file

@ -8,7 +8,6 @@ import tempfile
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Sequence
from typing import Any
import pre_commit.constants as C
from pre_commit import clientlib
@ -18,7 +17,6 @@ from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output_b
from pre_commit.util import resource_text
from pre_commit.util import rmtree
logger = logging.getLogger('pre_commit')
@ -235,81 +233,3 @@ class Store:
# TODO: eventually remove this and only create in _create
self._create_configs_table(db)
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
def _mark_used_repos(
self,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == clientlib.META:
return
elif repo['repo'] == clientlib.LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
self.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return
try:
manifest = clientlib.load_manifest(
os.path.join(path, C.MANIFEST_FILE),
)
except clientlib.InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
self.db_repo_name(repo['repo'], deps), repo['rev'],
))
def gc(self) -> int:
with self.exclusive_lock(), self.connect() as db:
self._create_configs_table(db)
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]
dead_configs = []
for config_path in configs:
try:
config = clientlib.load_config(config_path)
except clientlib.InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
self._mark_used_repos(all_repos, unused_repos, repo)
paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)
db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
sorted(unused_repos),
)
for k in unused_repos:
rmtree(all_repos[k])
return len(unused_repos)

View file

@ -165,3 +165,11 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
assert _config_count(store) == 1
assert _repo_count(store) == 0
assert cap_out.get().splitlines()[-1] == '1 repo(s) removed.'
def test_gc_pre_1_14_roll_forward(store, cap_out):
with store.connect() as db: # simulate pre-1.14.0
db.executescript('DROP TABLE configs')
assert not gc(store)
assert cap_out.get() == '0 repo(s) removed.\n'

View file

@ -289,18 +289,9 @@ def test_mark_config_as_used_does_not_exist(store):
assert _select_all_configs(store) == []
def _simulate_pre_1_14_0(store):
with store.connect() as db:
db.executescript('DROP TABLE configs')
def test_gc_roll_forward(store):
_simulate_pre_1_14_0(store)
assert store.gc() == 0
def test_mark_config_as_used_roll_forward(store, tmpdir):
_simulate_pre_1_14_0(store)
with store.connect() as db: # simulate pre-1.14.0
db.executescript('DROP TABLE configs')
test_mark_config_as_used(store, tmpdir)