refactor gc into store

this will make refactoring this easier later and limits the api surface of Store
This commit is contained in:
Anthony Sottile 2025-11-09 16:57:40 -05:00
parent 17cf886473
commit d5c273a2ba
4 changed files with 101 additions and 111 deletions

View file

@ -1,89 +1,9 @@
from __future__ import annotations
import os.path
from typing import Any
import pre_commit.constants as C
from pre_commit import output
from pre_commit.clientlib import InvalidConfigError
from pre_commit.clientlib import InvalidManifestError
from pre_commit.clientlib import load_config
from pre_commit.clientlib import load_manifest
from pre_commit.clientlib import LOCAL
from pre_commit.clientlib import META
from pre_commit.store import Store
def _mark_used_repos(
store: Store,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == META:
return
elif repo['repo'] == LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return
try:
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
except InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
store.db_repo_name(repo['repo'], deps), repo['rev'],
))
def _gc_repos(store: Store) -> int:
configs = store.select_all_configs()
repos = store.select_all_repos()
# delete config paths which do not exist
dead_configs = [p for p in configs if not os.path.exists(p)]
live_configs = [p for p in configs if os.path.exists(p)]
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
for config_path in live_configs:
try:
config = load_config(config_path)
except InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
_mark_used_repos(store, all_repos, unused_repos, repo)
store.delete_configs(dead_configs)
for db_repo_name, ref in unused_repos:
store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)])
return len(unused_repos)
def gc(store: Store) -> int:
with store.exclusive_lock():
repos_removed = _gc_repos(store)
output.write_line(f'{repos_removed} repo(s) removed.')
output.write_line(f'{store.gc()} repo(s) removed.')
return 0

View file

@ -8,6 +8,7 @@ import tempfile
from collections.abc import Callable
from collections.abc import Generator
from collections.abc import Sequence
from typing import Any
import pre_commit.constants as C
from pre_commit import clientlib
@ -96,7 +97,7 @@ class Store:
' PRIMARY KEY (repo, ref)'
');',
)
self._create_config_table(db)
self._create_configs_table(db)
# Atomic file move
os.replace(tmpfile, self.db_path)
@ -215,7 +216,7 @@ class Store:
'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
)
def _create_config_table(self, db: sqlite3.Connection) -> None:
def _create_configs_table(self, db: sqlite3.Connection) -> None:
db.executescript(
'CREATE TABLE IF NOT EXISTS configs ('
' path TEXT NOT NULL,'
@ -232,28 +233,83 @@ class Store:
return
with self.connect() as db:
# TODO: eventually remove this and only create in _create
self._create_config_table(db)
self._create_configs_table(db)
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
def select_all_configs(self) -> list[str]:
with self.connect() as db:
self._create_config_table(db)
rows = db.execute('SELECT path FROM configs').fetchall()
return [path for path, in rows]
def _mark_used_repos(
self,
all_repos: dict[tuple[str, str], str],
unused_repos: set[tuple[str, str]],
repo: dict[str, Any],
) -> None:
if repo['repo'] == clientlib.META:
return
elif repo['repo'] == clientlib.LOCAL:
for hook in repo['hooks']:
deps = hook.get('additional_dependencies')
unused_repos.discard((
self.db_repo_name(repo['repo'], deps),
C.LOCAL_REPO_VERSION,
))
else:
key = (repo['repo'], repo['rev'])
path = all_repos.get(key)
# can't inspect manifest if it isn't cloned
if path is None:
return
def delete_configs(self, configs: list[str]) -> None:
with self.connect() as db:
rows = [(path,) for path in configs]
db.executemany('DELETE FROM configs WHERE path = ?', rows)
try:
manifest = clientlib.load_manifest(
os.path.join(path, C.MANIFEST_FILE),
)
except clientlib.InvalidManifestError:
return
else:
unused_repos.discard(key)
by_id = {hook['id']: hook for hook in manifest}
def select_all_repos(self) -> list[tuple[str, str, str]]:
with self.connect() as db:
return db.execute('SELECT repo, ref, path from repos').fetchall()
for hook in repo['hooks']:
if hook['id'] not in by_id:
continue
def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None:
with self.connect() as db:
db.execute(
deps = hook.get(
'additional_dependencies',
by_id[hook['id']]['additional_dependencies'],
)
unused_repos.discard((
self.db_repo_name(repo['repo'], deps), repo['rev'],
))
def gc(self) -> int:
with self.exclusive_lock(), self.connect() as db:
self._create_configs_table(db)
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
all_repos = {(repo, ref): path for repo, ref, path in repos}
unused_repos = set(all_repos)
configs_rows = db.execute('SELECT path FROM configs').fetchall()
configs = [path for path, in configs_rows]
dead_configs = []
for config_path in configs:
try:
config = clientlib.load_config(config_path)
except clientlib.InvalidConfigError:
dead_configs.append(config_path)
continue
else:
for repo in config['repos']:
self._mark_used_repos(all_repos, unused_repos, repo)
paths = [(path,) for path in dead_configs]
db.executemany('DELETE FROM configs WHERE path = ?', paths)
db.executemany(
'DELETE FROM repos WHERE repo = ? and ref = ?',
(db_repo_name, ref),
sorted(unused_repos),
)
rmtree(path)
for k in unused_repos:
rmtree(all_repos[k])
return len(unused_repos)