diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py index 6892e097..d1941e4b 100644 --- a/pre_commit/commands/gc.py +++ b/pre_commit/commands/gc.py @@ -1,89 +1,9 @@ from __future__ import annotations -import os.path -from typing import Any - -import pre_commit.constants as C from pre_commit import output -from pre_commit.clientlib import InvalidConfigError -from pre_commit.clientlib import InvalidManifestError -from pre_commit.clientlib import load_config -from pre_commit.clientlib import load_manifest -from pre_commit.clientlib import LOCAL -from pre_commit.clientlib import META from pre_commit.store import Store -def _mark_used_repos( - store: Store, - all_repos: dict[tuple[str, str], str], - unused_repos: set[tuple[str, str]], - repo: dict[str, Any], -) -> None: - if repo['repo'] == META: - return - elif repo['repo'] == LOCAL: - for hook in repo['hooks']: - deps = hook.get('additional_dependencies') - unused_repos.discard(( - store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION, - )) - else: - key = (repo['repo'], repo['rev']) - path = all_repos.get(key) - # can't inspect manifest if it isn't cloned - if path is None: - return - - try: - manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) - except InvalidManifestError: - return - else: - unused_repos.discard(key) - by_id = {hook['id']: hook for hook in manifest} - - for hook in repo['hooks']: - if hook['id'] not in by_id: - continue - - deps = hook.get( - 'additional_dependencies', - by_id[hook['id']]['additional_dependencies'], - ) - unused_repos.discard(( - store.db_repo_name(repo['repo'], deps), repo['rev'], - )) - - -def _gc_repos(store: Store) -> int: - configs = store.select_all_configs() - repos = store.select_all_repos() - - # delete config paths which do not exist - dead_configs = [p for p in configs if not os.path.exists(p)] - live_configs = [p for p in configs if os.path.exists(p)] - - all_repos = {(repo, ref): path for repo, ref, path in repos} - unused_repos = set(all_repos) - for config_path in live_configs: - try: - config = load_config(config_path) - except InvalidConfigError: - dead_configs.append(config_path) - continue - else: - for repo in config['repos']: - _mark_used_repos(store, all_repos, unused_repos, repo) - - store.delete_configs(dead_configs) - for db_repo_name, ref in unused_repos: - store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)]) - return len(unused_repos) - - def gc(store: Store) -> int: - with store.exclusive_lock(): - repos_removed = _gc_repos(store) - output.write_line(f'{repos_removed} repo(s) removed.') + output.write_line(f'{store.gc()} repo(s) removed.') return 0 diff --git a/pre_commit/store.py b/pre_commit/store.py index 9e3b4048..34c5f0d9 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -8,6 +8,7 @@ import tempfile from collections.abc import Callable from collections.abc import Generator from collections.abc import Sequence +from typing import Any import pre_commit.constants as C from pre_commit import clientlib @@ -96,7 +97,7 @@ class Store: ' PRIMARY KEY (repo, ref)' ');', ) - self._create_config_table(db) + self._create_configs_table(db) # Atomic file move os.replace(tmpfile, self.db_path) @@ -215,7 +216,7 @@ class Store: 'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo, ) - def _create_config_table(self, db: sqlite3.Connection) -> None: + def _create_configs_table(self, db: sqlite3.Connection) -> None: db.executescript( 'CREATE TABLE IF NOT EXISTS configs (' ' path TEXT NOT NULL,' @@ -232,28 +233,83 @@ class Store: return with self.connect() as db: # TODO: eventually remove this and only create in _create - self._create_config_table(db) + self._create_configs_table(db) db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) - def select_all_configs(self) -> list[str]: - with self.connect() as db: - self._create_config_table(db) - rows = db.execute('SELECT path FROM configs').fetchall() - return [path for path, in rows] + def _mark_used_repos( + self, + all_repos: dict[tuple[str, str], str], + unused_repos: set[tuple[str, str]], + repo: dict[str, Any], + ) -> None: + if repo['repo'] == clientlib.META: + return + elif repo['repo'] == clientlib.LOCAL: + for hook in repo['hooks']: + deps = hook.get('additional_dependencies') + unused_repos.discard(( + self.db_repo_name(repo['repo'], deps), + C.LOCAL_REPO_VERSION, + )) + else: + key = (repo['repo'], repo['rev']) + path = all_repos.get(key) + # can't inspect manifest if it isn't cloned + if path is None: + return - def delete_configs(self, configs: list[str]) -> None: - with self.connect() as db: - rows = [(path,) for path in configs] - db.executemany('DELETE FROM configs WHERE path = ?', rows) + try: + manifest = clientlib.load_manifest( + os.path.join(path, C.MANIFEST_FILE), + ) + except clientlib.InvalidManifestError: + return + else: + unused_repos.discard(key) + by_id = {hook['id']: hook for hook in manifest} - def select_all_repos(self) -> list[tuple[str, str, str]]: - with self.connect() as db: - return db.execute('SELECT repo, ref, path from repos').fetchall() + for hook in repo['hooks']: + if hook['id'] not in by_id: + continue - def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None: - with self.connect() as db: - db.execute( + deps = hook.get( + 'additional_dependencies', + by_id[hook['id']]['additional_dependencies'], + ) + unused_repos.discard(( + self.db_repo_name(repo['repo'], deps), repo['rev'], + )) + + def gc(self) -> int: + with self.exclusive_lock(), self.connect() as db: + self._create_configs_table(db) + + repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() + all_repos = {(repo, ref): path for repo, ref, path in repos} + unused_repos = set(all_repos) + + configs_rows = db.execute('SELECT path FROM configs').fetchall() + configs = [path for path, in configs_rows] + + dead_configs = [] + for config_path in configs: + try: + config = clientlib.load_config(config_path) + except clientlib.InvalidConfigError: + dead_configs.append(config_path) + continue + else: + for repo in config['repos']: + self._mark_used_repos(all_repos, unused_repos, repo) + + paths = [(path,) for path in dead_configs] + db.executemany('DELETE FROM configs WHERE path = ?', paths) + + db.executemany( 'DELETE FROM repos WHERE repo = ? and ref = ?', - (db_repo_name, ref), + sorted(unused_repos), ) - rmtree(path) + for k in unused_repos: + rmtree(all_repos[k]) + + return len(unused_repos) diff --git a/tests/commands/gc_test.py b/tests/commands/gc_test.py index 95113ed5..85e66977 100644 --- a/tests/commands/gc_test.py +++ b/tests/commands/gc_test.py @@ -19,11 +19,13 @@ from testing.util import git_commit def _repo_count(store): - return len(store.select_all_repos()) + with store.connect() as db: + return db.execute('SELECT COUNT(1) FROM repos').fetchone()[0] def _config_count(store): - return len(store.select_all_configs()) + with store.connect() as db: + return db.execute('SELECT COUNT(1) FROM configs').fetchone()[0] def _remove_config_assert_cleared(store, cap_out): @@ -153,7 +155,8 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out): install_hooks(C.CONFIG_FILE, store) # we'll "break" the manifest to simulate an old version clone - (_, _, path), = store.select_all_repos() + with store.connect() as db: + path, = db.execute('SELECT path FROM repos').fetchone() os.remove(os.path.join(path, C.MANIFEST_FILE)) assert _config_count(store) == 1 diff --git a/tests/store_test.py b/tests/store_test.py index 7d4dffb0..4b04a8e7 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -22,6 +22,17 @@ from testing.util import git_commit from testing.util import xfailif_windows +def _select_all_configs(store: Store) -> list[str]: + with store.connect() as db: + rows = db.execute('SELECT * FROM configs').fetchall() + return [path for path, in rows] + + +def _select_all_repos(store: Store) -> list[tuple[str, str, str]]: + with store.connect() as db: + return db.execute('SELECT repo, ref, path FROM repos').fetchall() + + def test_our_session_fixture_works(): """There's a session fixture which makes `Store` invariantly raise to prevent writing to the home directory. @@ -91,7 +102,7 @@ def test_clone(store, tempdir_factory, caplog): assert git.head_rev(ret) == rev # Assert there's an entry in the sqlite db for this - assert store.select_all_repos() == [(path, rev, ret)] + assert _select_all_repos(store) == [(path, rev, ret)] def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog): @@ -217,7 +228,7 @@ def test_clone_shallow_failure_fallback_to_complete( assert git.head_rev(ret) == rev # Assert there's an entry in the sqlite db for this - assert store.select_all_repos() == [(path, rev, ret)] + assert _select_all_repos(store) == [(path, rev, ret)] def test_clone_tag_not_on_mainline(store, tempdir_factory): @@ -265,7 +276,7 @@ def test_mark_config_as_used(store, tmpdir): with tmpdir.as_cwd(): f = tmpdir.join('f').ensure() store.mark_config_used('f') - assert store.select_all_configs() == [f.strpath] + assert _select_all_configs(store) == [f.strpath] def test_mark_config_as_used_idempotent(store, tmpdir): @@ -275,7 +286,7 @@ def test_mark_config_as_used_idempotent(store, tmpdir): def test_mark_config_as_used_does_not_exist(store): store.mark_config_used('f') - assert store.select_all_configs() == [] + assert _select_all_configs(store) == [] def _simulate_pre_1_14_0(store): @@ -283,9 +294,9 @@ def _simulate_pre_1_14_0(store): db.executescript('DROP TABLE configs') -def test_select_all_configs_roll_forward(store): +def test_gc_roll_forward(store): _simulate_pre_1_14_0(store) - assert store.select_all_configs() == [] + assert store.gc() == 0 def test_mark_config_as_used_roll_forward(store, tmpdir): @@ -314,7 +325,7 @@ def test_mark_config_as_used_readonly(tmpdir): assert store.readonly # should be skipped due to readonly store.mark_config_used(str(cfg)) - assert store.select_all_configs() == [] + assert _select_all_configs(store) == [] def test_clone_with_recursive_submodules(store, tmp_path):