diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py index d1941e4b..975d5e4c 100644 --- a/pre_commit/commands/gc.py +++ b/pre_commit/commands/gc.py @@ -1,9 +1,98 @@ from __future__ import annotations +import os.path +from typing import Any + +import pre_commit.constants as C from pre_commit import output +from pre_commit.clientlib import InvalidConfigError +from pre_commit.clientlib import InvalidManifestError +from pre_commit.clientlib import load_config +from pre_commit.clientlib import load_manifest +from pre_commit.clientlib import LOCAL +from pre_commit.clientlib import META from pre_commit.store import Store +from pre_commit.util import rmtree + + +def _mark_used_repos( + store: Store, + all_repos: dict[tuple[str, str], str], + unused_repos: set[tuple[str, str]], + repo: dict[str, Any], +) -> None: + if repo['repo'] == META: + return + elif repo['repo'] == LOCAL: + for hook in repo['hooks']: + deps = hook.get('additional_dependencies') + unused_repos.discard(( + store.db_repo_name(repo['repo'], deps), + C.LOCAL_REPO_VERSION, + )) + else: + key = (repo['repo'], repo['rev']) + path = all_repos.get(key) + # can't inspect manifest if it isn't cloned + if path is None: + return + + try: + manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) + except InvalidManifestError: + return + else: + unused_repos.discard(key) + by_id = {hook['id']: hook for hook in manifest} + + for hook in repo['hooks']: + if hook['id'] not in by_id: + continue + + deps = hook.get( + 'additional_dependencies', + by_id[hook['id']]['additional_dependencies'], + ) + unused_repos.discard(( + store.db_repo_name(repo['repo'], deps), repo['rev'], + )) + + +def _gc(store: Store) -> int: + with store.exclusive_lock(), store.connect() as db: + store._create_configs_table(db) + + repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() + all_repos = {(repo, ref): path for repo, ref, path in repos} + unused_repos = set(all_repos) + + configs_rows = db.execute('SELECT path FROM configs').fetchall() + configs = [path for path, in configs_rows] + + dead_configs = [] + for config_path in configs: + try: + config = load_config(config_path) + except InvalidConfigError: + dead_configs.append(config_path) + continue + else: + for repo in config['repos']: + _mark_used_repos(store, all_repos, unused_repos, repo) + + paths = [(path,) for path in dead_configs] + db.executemany('DELETE FROM configs WHERE path = ?', paths) + + db.executemany( + 'DELETE FROM repos WHERE repo = ? and ref = ?', + sorted(unused_repos), + ) + for k in unused_repos: + rmtree(all_repos[k]) + + return len(unused_repos) def gc(store: Store) -> int: - output.write_line(f'{store.gc()} repo(s) removed.') + output.write_line(f'{_gc(store)} repo(s) removed.') return 0 diff --git a/pre_commit/store.py b/pre_commit/store.py index 34c5f0d9..dc90c051 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -8,7 +8,6 @@ import tempfile from collections.abc import Callable from collections.abc import Generator from collections.abc import Sequence -from typing import Any import pre_commit.constants as C from pre_commit import clientlib @@ -18,7 +17,6 @@ from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output_b from pre_commit.util import resource_text -from pre_commit.util import rmtree logger = logging.getLogger('pre_commit') @@ -235,81 +233,3 @@ class Store: # TODO: eventually remove this and only create in _create self._create_configs_table(db) db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) - - def _mark_used_repos( - self, - all_repos: dict[tuple[str, str], str], - unused_repos: set[tuple[str, str]], - repo: dict[str, Any], - ) -> None: - if repo['repo'] == clientlib.META: - return - elif repo['repo'] == clientlib.LOCAL: - for hook in repo['hooks']: - deps = hook.get('additional_dependencies') - unused_repos.discard(( - self.db_repo_name(repo['repo'], deps), - C.LOCAL_REPO_VERSION, - )) - else: - key = (repo['repo'], repo['rev']) - path = all_repos.get(key) - # can't inspect manifest if it isn't cloned - if path is None: - return - - try: - manifest = clientlib.load_manifest( - os.path.join(path, C.MANIFEST_FILE), - ) - except clientlib.InvalidManifestError: - return - else: - unused_repos.discard(key) - by_id = {hook['id']: hook for hook in manifest} - - for hook in repo['hooks']: - if hook['id'] not in by_id: - continue - - deps = hook.get( - 'additional_dependencies', - by_id[hook['id']]['additional_dependencies'], - ) - unused_repos.discard(( - self.db_repo_name(repo['repo'], deps), repo['rev'], - )) - - def gc(self) -> int: - with self.exclusive_lock(), self.connect() as db: - self._create_configs_table(db) - - repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() - all_repos = {(repo, ref): path for repo, ref, path in repos} - unused_repos = set(all_repos) - - configs_rows = db.execute('SELECT path FROM configs').fetchall() - configs = [path for path, in configs_rows] - - dead_configs = [] - for config_path in configs: - try: - config = clientlib.load_config(config_path) - except clientlib.InvalidConfigError: - dead_configs.append(config_path) - continue - else: - for repo in config['repos']: - self._mark_used_repos(all_repos, unused_repos, repo) - - paths = [(path,) for path in dead_configs] - db.executemany('DELETE FROM configs WHERE path = ?', paths) - - db.executemany( - 'DELETE FROM repos WHERE repo = ? and ref = ?', - sorted(unused_repos), - ) - for k in unused_repos: - rmtree(all_repos[k]) - - return len(unused_repos) diff --git a/tests/commands/gc_test.py b/tests/commands/gc_test.py index 85e66977..992b02f3 100644 --- a/tests/commands/gc_test.py +++ b/tests/commands/gc_test.py @@ -165,3 +165,11 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out): assert _config_count(store) == 1 assert _repo_count(store) == 0 assert cap_out.get().splitlines()[-1] == '1 repo(s) removed.' + + +def test_gc_pre_1_14_roll_forward(store, cap_out): + with store.connect() as db: # simulate pre-1.14.0 + db.executescript('DROP TABLE configs') + + assert not gc(store) + assert cap_out.get() == '0 repo(s) removed.\n' diff --git a/tests/store_test.py b/tests/store_test.py index 4b04a8e7..13f198ea 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -289,18 +289,9 @@ def test_mark_config_as_used_does_not_exist(store): assert _select_all_configs(store) == [] -def _simulate_pre_1_14_0(store): - with store.connect() as db: - db.executescript('DROP TABLE configs') - - -def test_gc_roll_forward(store): - _simulate_pre_1_14_0(store) - assert store.gc() == 0 - - def test_mark_config_as_used_roll_forward(store, tmpdir): - _simulate_pre_1_14_0(store) + with store.connect() as db: # simulate pre-1.14.0 + db.executescript('DROP TABLE configs') test_mark_config_as_used(store, tmpdir)