mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-02-17 00:04:42 +04:00
Merge pull request #3578 from pre-commit/store-gc-refactor
Some checks are pending
Some checks are pending
refactor gc into store
This commit is contained in:
commit
49e28eea48
4 changed files with 101 additions and 111 deletions
|
|
@ -1,89 +1,9 @@
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import os.path
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
import pre_commit.constants as C
|
|
||||||
from pre_commit import output
|
from pre_commit import output
|
||||||
from pre_commit.clientlib import InvalidConfigError
|
|
||||||
from pre_commit.clientlib import InvalidManifestError
|
|
||||||
from pre_commit.clientlib import load_config
|
|
||||||
from pre_commit.clientlib import load_manifest
|
|
||||||
from pre_commit.clientlib import LOCAL
|
|
||||||
from pre_commit.clientlib import META
|
|
||||||
from pre_commit.store import Store
|
from pre_commit.store import Store
|
||||||
|
|
||||||
|
|
||||||
def _mark_used_repos(
|
|
||||||
store: Store,
|
|
||||||
all_repos: dict[tuple[str, str], str],
|
|
||||||
unused_repos: set[tuple[str, str]],
|
|
||||||
repo: dict[str, Any],
|
|
||||||
) -> None:
|
|
||||||
if repo['repo'] == META:
|
|
||||||
return
|
|
||||||
elif repo['repo'] == LOCAL:
|
|
||||||
for hook in repo['hooks']:
|
|
||||||
deps = hook.get('additional_dependencies')
|
|
||||||
unused_repos.discard((
|
|
||||||
store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION,
|
|
||||||
))
|
|
||||||
else:
|
|
||||||
key = (repo['repo'], repo['rev'])
|
|
||||||
path = all_repos.get(key)
|
|
||||||
# can't inspect manifest if it isn't cloned
|
|
||||||
if path is None:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE))
|
|
||||||
except InvalidManifestError:
|
|
||||||
return
|
|
||||||
else:
|
|
||||||
unused_repos.discard(key)
|
|
||||||
by_id = {hook['id']: hook for hook in manifest}
|
|
||||||
|
|
||||||
for hook in repo['hooks']:
|
|
||||||
if hook['id'] not in by_id:
|
|
||||||
continue
|
|
||||||
|
|
||||||
deps = hook.get(
|
|
||||||
'additional_dependencies',
|
|
||||||
by_id[hook['id']]['additional_dependencies'],
|
|
||||||
)
|
|
||||||
unused_repos.discard((
|
|
||||||
store.db_repo_name(repo['repo'], deps), repo['rev'],
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
def _gc_repos(store: Store) -> int:
|
|
||||||
configs = store.select_all_configs()
|
|
||||||
repos = store.select_all_repos()
|
|
||||||
|
|
||||||
# delete config paths which do not exist
|
|
||||||
dead_configs = [p for p in configs if not os.path.exists(p)]
|
|
||||||
live_configs = [p for p in configs if os.path.exists(p)]
|
|
||||||
|
|
||||||
all_repos = {(repo, ref): path for repo, ref, path in repos}
|
|
||||||
unused_repos = set(all_repos)
|
|
||||||
for config_path in live_configs:
|
|
||||||
try:
|
|
||||||
config = load_config(config_path)
|
|
||||||
except InvalidConfigError:
|
|
||||||
dead_configs.append(config_path)
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
for repo in config['repos']:
|
|
||||||
_mark_used_repos(store, all_repos, unused_repos, repo)
|
|
||||||
|
|
||||||
store.delete_configs(dead_configs)
|
|
||||||
for db_repo_name, ref in unused_repos:
|
|
||||||
store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)])
|
|
||||||
return len(unused_repos)
|
|
||||||
|
|
||||||
|
|
||||||
def gc(store: Store) -> int:
|
def gc(store: Store) -> int:
|
||||||
with store.exclusive_lock():
|
output.write_line(f'{store.gc()} repo(s) removed.')
|
||||||
repos_removed = _gc_repos(store)
|
|
||||||
output.write_line(f'{repos_removed} repo(s) removed.')
|
|
||||||
return 0
|
return 0
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import tempfile
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from collections.abc import Generator
|
from collections.abc import Generator
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
import pre_commit.constants as C
|
import pre_commit.constants as C
|
||||||
from pre_commit import clientlib
|
from pre_commit import clientlib
|
||||||
|
|
@ -96,7 +97,7 @@ class Store:
|
||||||
' PRIMARY KEY (repo, ref)'
|
' PRIMARY KEY (repo, ref)'
|
||||||
');',
|
');',
|
||||||
)
|
)
|
||||||
self._create_config_table(db)
|
self._create_configs_table(db)
|
||||||
|
|
||||||
# Atomic file move
|
# Atomic file move
|
||||||
os.replace(tmpfile, self.db_path)
|
os.replace(tmpfile, self.db_path)
|
||||||
|
|
@ -215,7 +216,7 @@ class Store:
|
||||||
'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
|
'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo,
|
||||||
)
|
)
|
||||||
|
|
||||||
def _create_config_table(self, db: sqlite3.Connection) -> None:
|
def _create_configs_table(self, db: sqlite3.Connection) -> None:
|
||||||
db.executescript(
|
db.executescript(
|
||||||
'CREATE TABLE IF NOT EXISTS configs ('
|
'CREATE TABLE IF NOT EXISTS configs ('
|
||||||
' path TEXT NOT NULL,'
|
' path TEXT NOT NULL,'
|
||||||
|
|
@ -232,28 +233,83 @@ class Store:
|
||||||
return
|
return
|
||||||
with self.connect() as db:
|
with self.connect() as db:
|
||||||
# TODO: eventually remove this and only create in _create
|
# TODO: eventually remove this and only create in _create
|
||||||
self._create_config_table(db)
|
self._create_configs_table(db)
|
||||||
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
|
db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,))
|
||||||
|
|
||||||
def select_all_configs(self) -> list[str]:
|
def _mark_used_repos(
|
||||||
with self.connect() as db:
|
self,
|
||||||
self._create_config_table(db)
|
all_repos: dict[tuple[str, str], str],
|
||||||
rows = db.execute('SELECT path FROM configs').fetchall()
|
unused_repos: set[tuple[str, str]],
|
||||||
return [path for path, in rows]
|
repo: dict[str, Any],
|
||||||
|
) -> None:
|
||||||
|
if repo['repo'] == clientlib.META:
|
||||||
|
return
|
||||||
|
elif repo['repo'] == clientlib.LOCAL:
|
||||||
|
for hook in repo['hooks']:
|
||||||
|
deps = hook.get('additional_dependencies')
|
||||||
|
unused_repos.discard((
|
||||||
|
self.db_repo_name(repo['repo'], deps),
|
||||||
|
C.LOCAL_REPO_VERSION,
|
||||||
|
))
|
||||||
|
else:
|
||||||
|
key = (repo['repo'], repo['rev'])
|
||||||
|
path = all_repos.get(key)
|
||||||
|
# can't inspect manifest if it isn't cloned
|
||||||
|
if path is None:
|
||||||
|
return
|
||||||
|
|
||||||
def delete_configs(self, configs: list[str]) -> None:
|
try:
|
||||||
with self.connect() as db:
|
manifest = clientlib.load_manifest(
|
||||||
rows = [(path,) for path in configs]
|
os.path.join(path, C.MANIFEST_FILE),
|
||||||
db.executemany('DELETE FROM configs WHERE path = ?', rows)
|
|
||||||
|
|
||||||
def select_all_repos(self) -> list[tuple[str, str, str]]:
|
|
||||||
with self.connect() as db:
|
|
||||||
return db.execute('SELECT repo, ref, path from repos').fetchall()
|
|
||||||
|
|
||||||
def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None:
|
|
||||||
with self.connect() as db:
|
|
||||||
db.execute(
|
|
||||||
'DELETE FROM repos WHERE repo = ? and ref = ?',
|
|
||||||
(db_repo_name, ref),
|
|
||||||
)
|
)
|
||||||
rmtree(path)
|
except clientlib.InvalidManifestError:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
unused_repos.discard(key)
|
||||||
|
by_id = {hook['id']: hook for hook in manifest}
|
||||||
|
|
||||||
|
for hook in repo['hooks']:
|
||||||
|
if hook['id'] not in by_id:
|
||||||
|
continue
|
||||||
|
|
||||||
|
deps = hook.get(
|
||||||
|
'additional_dependencies',
|
||||||
|
by_id[hook['id']]['additional_dependencies'],
|
||||||
|
)
|
||||||
|
unused_repos.discard((
|
||||||
|
self.db_repo_name(repo['repo'], deps), repo['rev'],
|
||||||
|
))
|
||||||
|
|
||||||
|
def gc(self) -> int:
|
||||||
|
with self.exclusive_lock(), self.connect() as db:
|
||||||
|
self._create_configs_table(db)
|
||||||
|
|
||||||
|
repos = db.execute('SELECT repo, ref, path FROM repos').fetchall()
|
||||||
|
all_repos = {(repo, ref): path for repo, ref, path in repos}
|
||||||
|
unused_repos = set(all_repos)
|
||||||
|
|
||||||
|
configs_rows = db.execute('SELECT path FROM configs').fetchall()
|
||||||
|
configs = [path for path, in configs_rows]
|
||||||
|
|
||||||
|
dead_configs = []
|
||||||
|
for config_path in configs:
|
||||||
|
try:
|
||||||
|
config = clientlib.load_config(config_path)
|
||||||
|
except clientlib.InvalidConfigError:
|
||||||
|
dead_configs.append(config_path)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
for repo in config['repos']:
|
||||||
|
self._mark_used_repos(all_repos, unused_repos, repo)
|
||||||
|
|
||||||
|
paths = [(path,) for path in dead_configs]
|
||||||
|
db.executemany('DELETE FROM configs WHERE path = ?', paths)
|
||||||
|
|
||||||
|
db.executemany(
|
||||||
|
'DELETE FROM repos WHERE repo = ? and ref = ?',
|
||||||
|
sorted(unused_repos),
|
||||||
|
)
|
||||||
|
for k in unused_repos:
|
||||||
|
rmtree(all_repos[k])
|
||||||
|
|
||||||
|
return len(unused_repos)
|
||||||
|
|
|
||||||
|
|
@ -19,11 +19,13 @@ from testing.util import git_commit
|
||||||
|
|
||||||
|
|
||||||
def _repo_count(store):
|
def _repo_count(store):
|
||||||
return len(store.select_all_repos())
|
with store.connect() as db:
|
||||||
|
return db.execute('SELECT COUNT(1) FROM repos').fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
def _config_count(store):
|
def _config_count(store):
|
||||||
return len(store.select_all_configs())
|
with store.connect() as db:
|
||||||
|
return db.execute('SELECT COUNT(1) FROM configs').fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
def _remove_config_assert_cleared(store, cap_out):
|
def _remove_config_assert_cleared(store, cap_out):
|
||||||
|
|
@ -153,7 +155,8 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out):
|
||||||
install_hooks(C.CONFIG_FILE, store)
|
install_hooks(C.CONFIG_FILE, store)
|
||||||
|
|
||||||
# we'll "break" the manifest to simulate an old version clone
|
# we'll "break" the manifest to simulate an old version clone
|
||||||
(_, _, path), = store.select_all_repos()
|
with store.connect() as db:
|
||||||
|
path, = db.execute('SELECT path FROM repos').fetchone()
|
||||||
os.remove(os.path.join(path, C.MANIFEST_FILE))
|
os.remove(os.path.join(path, C.MANIFEST_FILE))
|
||||||
|
|
||||||
assert _config_count(store) == 1
|
assert _config_count(store) == 1
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,17 @@ from testing.util import git_commit
|
||||||
from testing.util import xfailif_windows
|
from testing.util import xfailif_windows
|
||||||
|
|
||||||
|
|
||||||
|
def _select_all_configs(store: Store) -> list[str]:
|
||||||
|
with store.connect() as db:
|
||||||
|
rows = db.execute('SELECT * FROM configs').fetchall()
|
||||||
|
return [path for path, in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def _select_all_repos(store: Store) -> list[tuple[str, str, str]]:
|
||||||
|
with store.connect() as db:
|
||||||
|
return db.execute('SELECT repo, ref, path FROM repos').fetchall()
|
||||||
|
|
||||||
|
|
||||||
def test_our_session_fixture_works():
|
def test_our_session_fixture_works():
|
||||||
"""There's a session fixture which makes `Store` invariantly raise to
|
"""There's a session fixture which makes `Store` invariantly raise to
|
||||||
prevent writing to the home directory.
|
prevent writing to the home directory.
|
||||||
|
|
@ -91,7 +102,7 @@ def test_clone(store, tempdir_factory, caplog):
|
||||||
assert git.head_rev(ret) == rev
|
assert git.head_rev(ret) == rev
|
||||||
|
|
||||||
# Assert there's an entry in the sqlite db for this
|
# Assert there's an entry in the sqlite db for this
|
||||||
assert store.select_all_repos() == [(path, rev, ret)]
|
assert _select_all_repos(store) == [(path, rev, ret)]
|
||||||
|
|
||||||
|
|
||||||
def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog):
|
def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog):
|
||||||
|
|
@ -217,7 +228,7 @@ def test_clone_shallow_failure_fallback_to_complete(
|
||||||
assert git.head_rev(ret) == rev
|
assert git.head_rev(ret) == rev
|
||||||
|
|
||||||
# Assert there's an entry in the sqlite db for this
|
# Assert there's an entry in the sqlite db for this
|
||||||
assert store.select_all_repos() == [(path, rev, ret)]
|
assert _select_all_repos(store) == [(path, rev, ret)]
|
||||||
|
|
||||||
|
|
||||||
def test_clone_tag_not_on_mainline(store, tempdir_factory):
|
def test_clone_tag_not_on_mainline(store, tempdir_factory):
|
||||||
|
|
@ -265,7 +276,7 @@ def test_mark_config_as_used(store, tmpdir):
|
||||||
with tmpdir.as_cwd():
|
with tmpdir.as_cwd():
|
||||||
f = tmpdir.join('f').ensure()
|
f = tmpdir.join('f').ensure()
|
||||||
store.mark_config_used('f')
|
store.mark_config_used('f')
|
||||||
assert store.select_all_configs() == [f.strpath]
|
assert _select_all_configs(store) == [f.strpath]
|
||||||
|
|
||||||
|
|
||||||
def test_mark_config_as_used_idempotent(store, tmpdir):
|
def test_mark_config_as_used_idempotent(store, tmpdir):
|
||||||
|
|
@ -275,7 +286,7 @@ def test_mark_config_as_used_idempotent(store, tmpdir):
|
||||||
|
|
||||||
def test_mark_config_as_used_does_not_exist(store):
|
def test_mark_config_as_used_does_not_exist(store):
|
||||||
store.mark_config_used('f')
|
store.mark_config_used('f')
|
||||||
assert store.select_all_configs() == []
|
assert _select_all_configs(store) == []
|
||||||
|
|
||||||
|
|
||||||
def _simulate_pre_1_14_0(store):
|
def _simulate_pre_1_14_0(store):
|
||||||
|
|
@ -283,9 +294,9 @@ def _simulate_pre_1_14_0(store):
|
||||||
db.executescript('DROP TABLE configs')
|
db.executescript('DROP TABLE configs')
|
||||||
|
|
||||||
|
|
||||||
def test_select_all_configs_roll_forward(store):
|
def test_gc_roll_forward(store):
|
||||||
_simulate_pre_1_14_0(store)
|
_simulate_pre_1_14_0(store)
|
||||||
assert store.select_all_configs() == []
|
assert store.gc() == 0
|
||||||
|
|
||||||
|
|
||||||
def test_mark_config_as_used_roll_forward(store, tmpdir):
|
def test_mark_config_as_used_roll_forward(store, tmpdir):
|
||||||
|
|
@ -314,7 +325,7 @@ def test_mark_config_as_used_readonly(tmpdir):
|
||||||
assert store.readonly
|
assert store.readonly
|
||||||
# should be skipped due to readonly
|
# should be skipped due to readonly
|
||||||
store.mark_config_used(str(cfg))
|
store.mark_config_used(str(cfg))
|
||||||
assert store.select_all_configs() == []
|
assert _select_all_configs(store) == []
|
||||||
|
|
||||||
|
|
||||||
def test_clone_with_recursive_submodules(store, tmp_path):
|
def test_clone_with_recursive_submodules(store, tmp_path):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue