mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-02-17 00:04:42 +04:00
Merge pull request #3578 from pre-commit/store-gc-refactor
Some checks are pending
Some checks are pending
refactor gc into store
This commit is contained in:
commit
49e28eea48
4 changed files with 101 additions and 111 deletions
|
|
@ -1,89 +1,9 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import os.path
|
||||
from typing import Any
|
||||
|
||||
import pre_commit.constants as C
|
||||
from pre_commit import output
|
||||
from pre_commit.clientlib import InvalidConfigError
|
||||
from pre_commit.clientlib import InvalidManifestError
|
||||
from pre_commit.clientlib import load_config
|
||||
from pre_commit.clientlib import load_manifest
|
||||
from pre_commit.clientlib import LOCAL
|
||||
from pre_commit.clientlib import META
|
||||
from pre_commit.store import Store
|
||||
|
||||
|
||||
def _mark_used_repos(
|
||||
store: Store,
|
||||
all_repos: dict[tuple[str, str], str],
|
||||
unused_repos: set[tuple[str, str]],
|
||||
repo: dict[str, Any],
|
||||
) -> None:
|
||||
if repo['repo'] == META:
|
||||
return
|
||||
elif repo['repo'] == LOCAL:
|
||||
for hook in repo['hooks']:
|
||||
deps = hook.get('additional_dependencies')
|
||||
unused_repos.discard((
|
||||
store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION,
|
||||
))
|
||||
else:
|
||||
key = (repo['repo'], repo['rev'])
|
||||
path = all_repos.get(key)
|
||||
# can't inspect manifest if it isn't cloned
|
||||
if path is None:
|
||||
return
|
||||
|
||||
try:
|
||||
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
|
||||
except InvalidManifestError:
|
||||
return
|
||||
else:
|
||||
unused_repos.discard(key)
|
||||
by_id = {hook['id']: hook for hook in manifest}
|
||||
|
||||
for hook in repo['hooks']:
|
||||
if hook['id'] not in by_id:
|
||||
continue
|
||||
|
||||
deps = hook.get(
|
||||
'additional_dependencies',
|
||||
by_id[hook['id']]['additional_dependencies'],
|
||||
)
|
||||
unused_repos.discard((
|
||||
store.db_repo_name(repo['repo'], deps), repo['rev'],
|
||||
))
|
||||
|
||||
|
||||
def _gc_repos(store: Store) -> int:
|
||||
configs = store.select_all_configs()
|
||||
repos = store.select_all_repos()
|
||||
|
||||
# delete config paths which do not exist
|
||||
dead_configs = [p for p in configs if not os.path.exists(p)]
|
||||
live_configs = [p for p in configs if os.path.exists(p)]
|
||||
|
||||
all_repos = {(repo, ref): path for repo, ref, path in repos}
|
||||
unused_repos = set(all_repos)
|
||||
for config_path in live_configs:
|
||||
try:
|
||||
config = load_config(config_path)
|
||||
except InvalidConfigError:
|
||||
dead_configs.append(config_path)
|
||||
continue
|
||||
else:
|
||||
for repo in config['repos']:
|
||||
_mark_used_repos(store, all_repos, unused_repos, repo)
|
||||
|
||||
store.delete_configs(dead_configs)
|
||||
for db_repo_name, ref in unused_repos:
|
||||
store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)])
|
||||
return len(unused_repos)
|
||||
|
||||
|
||||
def gc(store: Store) -> int:
|
||||
with store.exclusive_lock():
|
||||
repos_removed = _gc_repos(store)
|
||||
output.write_line(f'{repos_removed} repo(s) removed.')
|
||||
output.write_line(f'{store.gc()} repo(s) removed.')
|
||||
return 0
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import tempfile
|
|||
from collections.abc import Callable
|
||||
from collections.abc import Generator
|
||||
from collections.abc import Sequence
|
||||
from typing import Any
|
||||
|
||||
import pre_commit.constants as C
|
||||
from pre_commit import clientlib
|
||||
|
|
@ -96,7 +97,7 @@ class Store:
|
|||
' PRIMARY KEY (repo, ref)'
|
||||
');',
|
||||
)
|
||||
self._create_config_table(db)
|
||||
self._create_configs_table(db)
|
||||
|
||||
# Atomic file move
|
||||
os.replace(tmpfile, self.db_path)
|
||||
|
|
@ -215,7 +216,7 @@ class Store:
|
|||
'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
|
||||
)
|
||||
|
||||
def _create_config_table(self, db: sqlite3.Connection) -> None:
|
||||
def _create_configs_table(self, db: sqlite3.Connection) -> None:
|
||||
db.executescript(
|
||||
'CREATE TABLE IF NOT EXISTS configs ('
|
||||
' path TEXT NOT NULL,'
|
||||
|
|
@ -232,28 +233,83 @@ class Store:
|
|||
return
|
||||
with self.connect() as db:
|
||||
# TODO: eventually remove this and only create in _create
|
||||
self._create_config_table(db)
|
||||
self._create_configs_table(db)
|
||||
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
|
||||
|
||||
def select_all_configs(self) -> list[str]:
|
||||
with self.connect() as db:
|
||||
self._create_config_table(db)
|
||||
rows = db.execute('SELECT path FROM configs').fetchall()
|
||||
return [path for path, in rows]
|
||||
def _mark_used_repos(
|
||||
self,
|
||||
all_repos: dict[tuple[str, str], str],
|
||||
unused_repos: set[tuple[str, str]],
|
||||
repo: dict[str, Any],
|
||||
) -> None:
|
||||
if repo['repo'] == clientlib.META:
|
||||
return
|
||||
elif repo['repo'] == clientlib.LOCAL:
|
||||
for hook in repo['hooks']:
|
||||
deps = hook.get('additional_dependencies')
|
||||
unused_repos.discard((
|
||||
self.db_repo_name(repo['repo'], deps),
|
||||
C.LOCAL_REPO_VERSION,
|
||||
))
|
||||
else:
|
||||
key = (repo['repo'], repo['rev'])
|
||||
path = all_repos.get(key)
|
||||
# can't inspect manifest if it isn't cloned
|
||||
if path is None:
|
||||
return
|
||||
|
||||
def delete_configs(self, configs: list[str]) -> None:
|
||||
with self.connect() as db:
|
||||
rows = [(path,) for path in configs]
|
||||
db.executemany('DELETE FROM configs WHERE path = ?', rows)
|
||||
try:
|
||||
manifest = clientlib.load_manifest(
|
||||
os.path.join(path, C.MANIFEST_FILE),
|
||||
)
|
||||
except clientlib.InvalidManifestError:
|
||||
return
|
||||
else:
|
||||
unused_repos.discard(key)
|
||||
by_id = {hook['id']: hook for hook in manifest}
|
||||
|
||||
def select_all_repos(self) -> list[tuple[str, str, str]]:
|
||||
with self.connect() as db:
|
||||
return db.execute('SELECT repo, ref, path from repos').fetchall()
|
||||
for hook in repo['hooks']:
|
||||
if hook['id'] not in by_id:
|
||||
continue
|
||||
|
||||
def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None:
|
||||
with self.connect() as db:
|
||||
db.execute(
|
||||
deps = hook.get(
|
||||
'additional_dependencies',
|
||||
by_id[hook['id']]['additional_dependencies'],
|
||||
)
|
||||
unused_repos.discard((
|
||||
self.db_repo_name(repo['repo'], deps), repo['rev'],
|
||||
))
|
||||
|
||||
def gc(self) -> int:
|
||||
with self.exclusive_lock(), self.connect() as db:
|
||||
self._create_configs_table(db)
|
||||
|
||||
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
|
||||
all_repos = {(repo, ref): path for repo, ref, path in repos}
|
||||
unused_repos = set(all_repos)
|
||||
|
||||
configs_rows = db.execute('SELECT path FROM configs').fetchall()
|
||||
configs = [path for path, in configs_rows]
|
||||
|
||||
dead_configs = []
|
||||
for config_path in configs:
|
||||
try:
|
||||
config = clientlib.load_config(config_path)
|
||||
except clientlib.InvalidConfigError:
|
||||
dead_configs.append(config_path)
|
||||
continue
|
||||
else:
|
||||
for repo in config['repos']:
|
||||
self._mark_used_repos(all_repos, unused_repos, repo)
|
||||
|
||||
paths = [(path,) for path in dead_configs]
|
||||
db.executemany('DELETE FROM configs WHERE path = ?', paths)
|
||||
|
||||
db.executemany(
|
||||
'DELETE FROM repos WHERE repo = ? and ref = ?',
|
||||
(db_repo_name, ref),
|
||||
sorted(unused_repos),
|
||||
)
|
||||
rmtree(path)
|
||||
for k in unused_repos:
|
||||
rmtree(all_repos[k])
|
||||
|
||||
return len(unused_repos)
|
||||
|
|
|
|||
|
|
@ -19,11 +19,13 @@ from testing.util import git_commit
|
|||
|
||||
|
||||
def _repo_count(store):
|
||||
return len(store.select_all_repos())
|
||||
with store.connect() as db:
|
||||
return db.execute('SELECT COUNT(1) FROM repos').fetchone()[0]
|
||||
|
||||
|
||||
def _config_count(store):
|
||||
return len(store.select_all_configs())
|
||||
with store.connect() as db:
|
||||
return db.execute('SELECT COUNT(1) FROM configs').fetchone()[0]
|
||||
|
||||
|
||||
def _remove_config_assert_cleared(store, cap_out):
|
||||
|
|
@ -153,7 +155,8 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
|
|||
install_hooks(C.CONFIG_FILE, store)
|
||||
|
||||
# we'll "break" the manifest to simulate an old version clone
|
||||
(_, _, path), = store.select_all_repos()
|
||||
with store.connect() as db:
|
||||
path, = db.execute('SELECT path FROM repos').fetchone()
|
||||
os.remove(os.path.join(path, C.MANIFEST_FILE))
|
||||
|
||||
assert _config_count(store) == 1
|
||||
|
|
|
|||
|
|
@ -22,6 +22,17 @@ from testing.util import git_commit
|
|||
from testing.util import xfailif_windows
|
||||
|
||||
|
||||
def _select_all_configs(store: Store) -> list[str]:
|
||||
with store.connect() as db:
|
||||
rows = db.execute('SELECT * FROM configs').fetchall()
|
||||
return [path for path, in rows]
|
||||
|
||||
|
||||
def _select_all_repos(store: Store) -> list[tuple[str, str, str]]:
|
||||
with store.connect() as db:
|
||||
return db.execute('SELECT repo, ref, path FROM repos').fetchall()
|
||||
|
||||
|
||||
def test_our_session_fixture_works():
|
||||
"""There's a session fixture which makes `Store` invariantly raise to
|
||||
prevent writing to the home directory.
|
||||
|
|
@ -91,7 +102,7 @@ def test_clone(store, tempdir_factory, caplog):
|
|||
assert git.head_rev(ret) == rev
|
||||
|
||||
# Assert there's an entry in the sqlite db for this
|
||||
assert store.select_all_repos() == [(path, rev, ret)]
|
||||
assert _select_all_repos(store) == [(path, rev, ret)]
|
||||
|
||||
|
||||
def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog):
|
||||
|
|
@ -217,7 +228,7 @@ def test_clone_shallow_failure_fallback_to_complete(
|
|||
assert git.head_rev(ret) == rev
|
||||
|
||||
# Assert there's an entry in the sqlite db for this
|
||||
assert store.select_all_repos() == [(path, rev, ret)]
|
||||
assert _select_all_repos(store) == [(path, rev, ret)]
|
||||
|
||||
|
||||
def test_clone_tag_not_on_mainline(store, tempdir_factory):
|
||||
|
|
@ -265,7 +276,7 @@ def test_mark_config_as_used(store, tmpdir):
|
|||
with tmpdir.as_cwd():
|
||||
f = tmpdir.join('f').ensure()
|
||||
store.mark_config_used('f')
|
||||
assert store.select_all_configs() == [f.strpath]
|
||||
assert _select_all_configs(store) == [f.strpath]
|
||||
|
||||
|
||||
def test_mark_config_as_used_idempotent(store, tmpdir):
|
||||
|
|
@ -275,7 +286,7 @@ def test_mark_config_as_used_idempotent(store, tmpdir):
|
|||
|
||||
def test_mark_config_as_used_does_not_exist(store):
|
||||
store.mark_config_used('f')
|
||||
assert store.select_all_configs() == []
|
||||
assert _select_all_configs(store) == []
|
||||
|
||||
|
||||
def _simulate_pre_1_14_0(store):
|
||||
|
|
@ -283,9 +294,9 @@ def _simulate_pre_1_14_0(store):
|
|||
db.executescript('DROP TABLE configs')
|
||||
|
||||
|
||||
def test_select_all_configs_roll_forward(store):
|
||||
def test_gc_roll_forward(store):
|
||||
_simulate_pre_1_14_0(store)
|
||||
assert store.select_all_configs() == []
|
||||
assert store.gc() == 0
|
||||
|
||||
|
||||
def test_mark_config_as_used_roll_forward(store, tmpdir):
|
||||
|
|
@ -314,7 +325,7 @@ def test_mark_config_as_used_readonly(tmpdir):
|
|||
assert store.readonly
|
||||
# should be skipped due to readonly
|
||||
store.mark_config_used(str(cfg))
|
||||
assert store.select_all_configs() == []
|
||||
assert _select_all_configs(store) == []
|
||||
|
||||
|
||||
def test_clone_with_recursive_submodules(store, tmp_path):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue