From d5c273a2ba0c712659640e9487adb38bd7da68f6 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Sun, 9 Nov 2025 16:57:40 -0500 Subject: [PATCH 01/14] refactor gc into store this will make refactoring this easier later and limits the api surface of Store --- pre_commit/commands/gc.py | 82 +-------------------------------- pre_commit/store.py | 96 +++++++++++++++++++++++++++++++-------- tests/commands/gc_test.py | 9 ++-- tests/store_test.py | 25 +++++++--- 4 files changed, 101 insertions(+), 111 deletions(-) diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py index 6892e097..d1941e4b 100644 --- a/pre_commit/commands/gc.py +++ b/pre_commit/commands/gc.py @@ -1,89 +1,9 @@ from __future__ import annotations -import os.path -from typing import Any - -import pre_commit.constants as C from pre_commit import output -from pre_commit.clientlib import InvalidConfigError -from pre_commit.clientlib import InvalidManifestError -from pre_commit.clientlib import load_config -from pre_commit.clientlib import load_manifest -from pre_commit.clientlib import LOCAL -from pre_commit.clientlib import META from pre_commit.store import Store -def _mark_used_repos( - store: Store, - all_repos: dict[tuple[str, str], str], - unused_repos: set[tuple[str, str]], - repo: dict[str, Any], -) -> None: - if repo['repo'] == META: - return - elif repo['repo'] == LOCAL: - for hook in repo['hooks']: - deps = hook.get('additional_dependencies') - unused_repos.discard(( - store.db_repo_name(repo['repo'], deps), C.LOCAL_REPO_VERSION, - )) - else: - key = (repo['repo'], repo['rev']) - path = all_repos.get(key) - # can't inspect manifest if it isn't cloned - if path is None: - return - - try: - manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) - except InvalidManifestError: - return - else: - unused_repos.discard(key) - by_id = {hook['id']: hook for hook in manifest} - - for hook in repo['hooks']: - if hook['id'] not in by_id: - continue - - deps = hook.get( - 'additional_dependencies', - by_id[hook['id']]['additional_dependencies'], - ) - unused_repos.discard(( - store.db_repo_name(repo['repo'], deps), repo['rev'], - )) - - -def _gc_repos(store: Store) -> int: - configs = store.select_all_configs() - repos = store.select_all_repos() - - # delete config paths which do not exist - dead_configs = [p for p in configs if not os.path.exists(p)] - live_configs = [p for p in configs if os.path.exists(p)] - - all_repos = {(repo, ref): path for repo, ref, path in repos} - unused_repos = set(all_repos) - for config_path in live_configs: - try: - config = load_config(config_path) - except InvalidConfigError: - dead_configs.append(config_path) - continue - else: - for repo in config['repos']: - _mark_used_repos(store, all_repos, unused_repos, repo) - - store.delete_configs(dead_configs) - for db_repo_name, ref in unused_repos: - store.delete_repo(db_repo_name, ref, all_repos[(db_repo_name, ref)]) - return len(unused_repos) - - def gc(store: Store) -> int: - with store.exclusive_lock(): - repos_removed = _gc_repos(store) - output.write_line(f'{repos_removed} repo(s) removed.') + output.write_line(f'{store.gc()} repo(s) removed.') return 0 diff --git a/pre_commit/store.py b/pre_commit/store.py index 9e3b4048..34c5f0d9 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -8,6 +8,7 @@ import tempfile from collections.abc import Callable from collections.abc import Generator from collections.abc import Sequence +from typing import Any import pre_commit.constants as C from pre_commit import clientlib @@ -96,7 +97,7 @@ class Store: ' PRIMARY KEY (repo, ref)' ');', ) - self._create_config_table(db) + self._create_configs_table(db) # Atomic file move os.replace(tmpfile, self.db_path) @@ -215,7 +216,7 @@ class Store: 'local', C.LOCAL_REPO_VERSION, deps, _make_local_repo, ) - def _create_config_table(self, db: sqlite3.Connection) -> None: + def _create_configs_table(self, db: sqlite3.Connection) -> None: db.executescript( 'CREATE TABLE IF NOT EXISTS configs (' ' path TEXT NOT NULL,' @@ -232,28 +233,83 @@ class Store: return with self.connect() as db: # TODO: eventually remove this and only create in _create - self._create_config_table(db) + self._create_configs_table(db) db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) - def select_all_configs(self) -> list[str]: - with self.connect() as db: - self._create_config_table(db) - rows = db.execute('SELECT path FROM configs').fetchall() - return [path for path, in rows] + def _mark_used_repos( + self, + all_repos: dict[tuple[str, str], str], + unused_repos: set[tuple[str, str]], + repo: dict[str, Any], + ) -> None: + if repo['repo'] == clientlib.META: + return + elif repo['repo'] == clientlib.LOCAL: + for hook in repo['hooks']: + deps = hook.get('additional_dependencies') + unused_repos.discard(( + self.db_repo_name(repo['repo'], deps), + C.LOCAL_REPO_VERSION, + )) + else: + key = (repo['repo'], repo['rev']) + path = all_repos.get(key) + # can't inspect manifest if it isn't cloned + if path is None: + return - def delete_configs(self, configs: list[str]) -> None: - with self.connect() as db: - rows = [(path,) for path in configs] - db.executemany('DELETE FROM configs WHERE path = ?', rows) + try: + manifest = clientlib.load_manifest( + os.path.join(path, C.MANIFEST_FILE), + ) + except clientlib.InvalidManifestError: + return + else: + unused_repos.discard(key) + by_id = {hook['id']: hook for hook in manifest} - def select_all_repos(self) -> list[tuple[str, str, str]]: - with self.connect() as db: - return db.execute('SELECT repo, ref, path from repos').fetchall() + for hook in repo['hooks']: + if hook['id'] not in by_id: + continue - def delete_repo(self, db_repo_name: str, ref: str, path: str) -> None: - with self.connect() as db: - db.execute( + deps = hook.get( + 'additional_dependencies', + by_id[hook['id']]['additional_dependencies'], + ) + unused_repos.discard(( + self.db_repo_name(repo['repo'], deps), repo['rev'], + )) + + def gc(self) -> int: + with self.exclusive_lock(), self.connect() as db: + self._create_configs_table(db) + + repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() + all_repos = {(repo, ref): path for repo, ref, path in repos} + unused_repos = set(all_repos) + + configs_rows = db.execute('SELECT path FROM configs').fetchall() + configs = [path for path, in configs_rows] + + dead_configs = [] + for config_path in configs: + try: + config = clientlib.load_config(config_path) + except clientlib.InvalidConfigError: + dead_configs.append(config_path) + continue + else: + for repo in config['repos']: + self._mark_used_repos(all_repos, unused_repos, repo) + + paths = [(path,) for path in dead_configs] + db.executemany('DELETE FROM configs WHERE path = ?', paths) + + db.executemany( 'DELETE FROM repos WHERE repo = ? and ref = ?', - (db_repo_name, ref), + sorted(unused_repos), ) - rmtree(path) + for k in unused_repos: + rmtree(all_repos[k]) + + return len(unused_repos) diff --git a/tests/commands/gc_test.py b/tests/commands/gc_test.py index 95113ed5..85e66977 100644 --- a/tests/commands/gc_test.py +++ b/tests/commands/gc_test.py @@ -19,11 +19,13 @@ from testing.util import git_commit def _repo_count(store): - return len(store.select_all_repos()) + with store.connect() as db: + return db.execute('SELECT COUNT(1) FROM repos').fetchone()[0] def _config_count(store): - return len(store.select_all_configs()) + with store.connect() as db: + return db.execute('SELECT COUNT(1) FROM configs').fetchone()[0] def _remove_config_assert_cleared(store, cap_out): @@ -153,7 +155,8 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out): install_hooks(C.CONFIG_FILE, store) # we'll "break" the manifest to simulate an old version clone - (_, _, path), = store.select_all_repos() + with store.connect() as db: + path, = db.execute('SELECT path FROM repos').fetchone() os.remove(os.path.join(path, C.MANIFEST_FILE)) assert _config_count(store) == 1 diff --git a/tests/store_test.py b/tests/store_test.py index 7d4dffb0..4b04a8e7 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -22,6 +22,17 @@ from testing.util import git_commit from testing.util import xfailif_windows +def _select_all_configs(store: Store) -> list[str]: + with store.connect() as db: + rows = db.execute('SELECT * FROM configs').fetchall() + return [path for path, in rows] + + +def _select_all_repos(store: Store) -> list[tuple[str, str, str]]: + with store.connect() as db: + return db.execute('SELECT repo, ref, path FROM repos').fetchall() + + def test_our_session_fixture_works(): """There's a session fixture which makes `Store` invariantly raise to prevent writing to the home directory. @@ -91,7 +102,7 @@ def test_clone(store, tempdir_factory, caplog): assert git.head_rev(ret) == rev # Assert there's an entry in the sqlite db for this - assert store.select_all_repos() == [(path, rev, ret)] + assert _select_all_repos(store) == [(path, rev, ret)] def test_warning_for_deprecated_stages_on_init(store, tempdir_factory, caplog): @@ -217,7 +228,7 @@ def test_clone_shallow_failure_fallback_to_complete( assert git.head_rev(ret) == rev # Assert there's an entry in the sqlite db for this - assert store.select_all_repos() == [(path, rev, ret)] + assert _select_all_repos(store) == [(path, rev, ret)] def test_clone_tag_not_on_mainline(store, tempdir_factory): @@ -265,7 +276,7 @@ def test_mark_config_as_used(store, tmpdir): with tmpdir.as_cwd(): f = tmpdir.join('f').ensure() store.mark_config_used('f') - assert store.select_all_configs() == [f.strpath] + assert _select_all_configs(store) == [f.strpath] def test_mark_config_as_used_idempotent(store, tmpdir): @@ -275,7 +286,7 @@ def test_mark_config_as_used_idempotent(store, tmpdir): def test_mark_config_as_used_does_not_exist(store): store.mark_config_used('f') - assert store.select_all_configs() == [] + assert _select_all_configs(store) == [] def _simulate_pre_1_14_0(store): @@ -283,9 +294,9 @@ def _simulate_pre_1_14_0(store): db.executescript('DROP TABLE configs') -def test_select_all_configs_roll_forward(store): +def test_gc_roll_forward(store): _simulate_pre_1_14_0(store) - assert store.select_all_configs() == [] + assert store.gc() == 0 def test_mark_config_as_used_roll_forward(store, tmpdir): @@ -314,7 +325,7 @@ def test_mark_config_as_used_readonly(tmpdir): assert store.readonly # should be skipped due to readonly store.mark_config_used(str(cfg)) - assert store.select_all_configs() == [] + assert _select_all_configs(store) == [] def test_clone_with_recursive_submodules(store, tmp_path): From 063229aee77ba2da3e9ed5c8217070b4128234fd Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Nov 2025 20:59:54 +0000 Subject: [PATCH 02/14] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v3.21.0 → v3.21.1](https://github.com/asottile/pyupgrade/compare/v3.21.0...v3.21.1) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fa077365..e47d56ca 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: add-trailing-comma - repo: https://github.com/asottile/pyupgrade - rev: v3.21.0 + rev: v3.21.1 hooks: - id: pyupgrade args: [--py310-plus] From 66278a9a0b69a69fde820d2b85a7e198eae52981 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 19 Nov 2025 14:29:50 -0500 Subject: [PATCH 03/14] move logic for gc back to commands.gc --- pre_commit/commands/gc.py | 91 ++++++++++++++++++++++++++++++++++++++- pre_commit/store.py | 80 ---------------------------------- tests/commands/gc_test.py | 8 ++++ tests/store_test.py | 13 +----- 4 files changed, 100 insertions(+), 92 deletions(-) diff --git a/pre_commit/commands/gc.py b/pre_commit/commands/gc.py index d1941e4b..975d5e4c 100644 --- a/pre_commit/commands/gc.py +++ b/pre_commit/commands/gc.py @@ -1,9 +1,98 @@ from __future__ import annotations +import os.path +from typing import Any + +import pre_commit.constants as C from pre_commit import output +from pre_commit.clientlib import InvalidConfigError +from pre_commit.clientlib import InvalidManifestError +from pre_commit.clientlib import load_config +from pre_commit.clientlib import load_manifest +from pre_commit.clientlib import LOCAL +from pre_commit.clientlib import META from pre_commit.store import Store +from pre_commit.util import rmtree + + +def _mark_used_repos( + store: Store, + all_repos: dict[tuple[str, str], str], + unused_repos: set[tuple[str, str]], + repo: dict[str, Any], +) -> None: + if repo['repo'] == META: + return + elif repo['repo'] == LOCAL: + for hook in repo['hooks']: + deps = hook.get('additional_dependencies') + unused_repos.discard(( + store.db_repo_name(repo['repo'], deps), + C.LOCAL_REPO_VERSION, + )) + else: + key = (repo['repo'], repo['rev']) + path = all_repos.get(key) + # can't inspect manifest if it isn't cloned + if path is None: + return + + try: + manifest = load_manifest(os.path.join(path, C.MANIFEST_FILE)) + except InvalidManifestError: + return + else: + unused_repos.discard(key) + by_id = {hook['id']: hook for hook in manifest} + + for hook in repo['hooks']: + if hook['id'] not in by_id: + continue + + deps = hook.get( + 'additional_dependencies', + by_id[hook['id']]['additional_dependencies'], + ) + unused_repos.discard(( + store.db_repo_name(repo['repo'], deps), repo['rev'], + )) + + +def _gc(store: Store) -> int: + with store.exclusive_lock(), store.connect() as db: + store._create_configs_table(db) + + repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() + all_repos = {(repo, ref): path for repo, ref, path in repos} + unused_repos = set(all_repos) + + configs_rows = db.execute('SELECT path FROM configs').fetchall() + configs = [path for path, in configs_rows] + + dead_configs = [] + for config_path in configs: + try: + config = load_config(config_path) + except InvalidConfigError: + dead_configs.append(config_path) + continue + else: + for repo in config['repos']: + _mark_used_repos(store, all_repos, unused_repos, repo) + + paths = [(path,) for path in dead_configs] + db.executemany('DELETE FROM configs WHERE path = ?', paths) + + db.executemany( + 'DELETE FROM repos WHERE repo = ? and ref = ?', + sorted(unused_repos), + ) + for k in unused_repos: + rmtree(all_repos[k]) + + return len(unused_repos) def gc(store: Store) -> int: - output.write_line(f'{store.gc()} repo(s) removed.') + output.write_line(f'{_gc(store)} repo(s) removed.') return 0 diff --git a/pre_commit/store.py b/pre_commit/store.py index 34c5f0d9..dc90c051 100644 --- a/pre_commit/store.py +++ b/pre_commit/store.py @@ -8,7 +8,6 @@ import tempfile from collections.abc import Callable from collections.abc import Generator from collections.abc import Sequence -from typing import Any import pre_commit.constants as C from pre_commit import clientlib @@ -18,7 +17,6 @@ from pre_commit.util import CalledProcessError from pre_commit.util import clean_path_on_failure from pre_commit.util import cmd_output_b from pre_commit.util import resource_text -from pre_commit.util import rmtree logger = logging.getLogger('pre_commit') @@ -235,81 +233,3 @@ class Store: # TODO: eventually remove this and only create in _create self._create_configs_table(db) db.execute('INSERT OR IGNORE INTO configs VALUES (?)', (path,)) - - def _mark_used_repos( - self, - all_repos: dict[tuple[str, str], str], - unused_repos: set[tuple[str, str]], - repo: dict[str, Any], - ) -> None: - if repo['repo'] == clientlib.META: - return - elif repo['repo'] == clientlib.LOCAL: - for hook in repo['hooks']: - deps = hook.get('additional_dependencies') - unused_repos.discard(( - self.db_repo_name(repo['repo'], deps), - C.LOCAL_REPO_VERSION, - )) - else: - key = (repo['repo'], repo['rev']) - path = all_repos.get(key) - # can't inspect manifest if it isn't cloned - if path is None: - return - - try: - manifest = clientlib.load_manifest( - os.path.join(path, C.MANIFEST_FILE), - ) - except clientlib.InvalidManifestError: - return - else: - unused_repos.discard(key) - by_id = {hook['id']: hook for hook in manifest} - - for hook in repo['hooks']: - if hook['id'] not in by_id: - continue - - deps = hook.get( - 'additional_dependencies', - by_id[hook['id']]['additional_dependencies'], - ) - unused_repos.discard(( - self.db_repo_name(repo['repo'], deps), repo['rev'], - )) - - def gc(self) -> int: - with self.exclusive_lock(), self.connect() as db: - self._create_configs_table(db) - - repos = db.execute('SELECT repo, ref, path FROM repos').fetchall() - all_repos = {(repo, ref): path for repo, ref, path in repos} - unused_repos = set(all_repos) - - configs_rows = db.execute('SELECT path FROM configs').fetchall() - configs = [path for path, in configs_rows] - - dead_configs = [] - for config_path in configs: - try: - config = clientlib.load_config(config_path) - except clientlib.InvalidConfigError: - dead_configs.append(config_path) - continue - else: - for repo in config['repos']: - self._mark_used_repos(all_repos, unused_repos, repo) - - paths = [(path,) for path in dead_configs] - db.executemany('DELETE FROM configs WHERE path = ?', paths) - - db.executemany( - 'DELETE FROM repos WHERE repo = ? and ref = ?', - sorted(unused_repos), - ) - for k in unused_repos: - rmtree(all_repos[k]) - - return len(unused_repos) diff --git a/tests/commands/gc_test.py b/tests/commands/gc_test.py index 85e66977..992b02f3 100644 --- a/tests/commands/gc_test.py +++ b/tests/commands/gc_test.py @@ -165,3 +165,11 @@ def test_invalid_manifest_gcd(tempdir_factory, store, in_git_dir, cap_out): assert _config_count(store) == 1 assert _repo_count(store) == 0 assert cap_out.get().splitlines()[-1] == '1 repo(s) removed.' + + +def test_gc_pre_1_14_roll_forward(store, cap_out): + with store.connect() as db: # simulate pre-1.14.0 + db.executescript('DROP TABLE configs') + + assert not gc(store) + assert cap_out.get() == '0 repo(s) removed.\n' diff --git a/tests/store_test.py b/tests/store_test.py index 4b04a8e7..13f198ea 100644 --- a/tests/store_test.py +++ b/tests/store_test.py @@ -289,18 +289,9 @@ def test_mark_config_as_used_does_not_exist(store): assert _select_all_configs(store) == [] -def _simulate_pre_1_14_0(store): - with store.connect() as db: - db.executescript('DROP TABLE configs') - - -def test_gc_roll_forward(store): - _simulate_pre_1_14_0(store) - assert store.gc() == 0 - - def test_mark_config_as_used_roll_forward(store, tmpdir): - _simulate_pre_1_14_0(store) + with store.connect() as db: # simulate pre-1.14.0 + db.executescript('DROP TABLE configs') test_mark_config_as_used(store, tmpdir) From 844dacc168d68a32553ecf8a99178ab395fdb11e Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Wed, 19 Nov 2025 14:57:01 -0500 Subject: [PATCH 04/14] add forward-compat error message --- pre_commit/clientlib.py | 11 ++++++++++- tests/clientlib_test.py | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py index eb0fd4d6..51f14d26 100644 --- a/pre_commit/clientlib.py +++ b/pre_commit/clientlib.py @@ -270,10 +270,19 @@ class InvalidManifestError(FatalError): pass +def _load_manifest_forward_compat(contents: str) -> object: + obj = yaml_load(contents) + if isinstance(obj, dict): + check_min_version('5') + raise AssertionError('unreachable') + else: + return obj + + load_manifest = functools.partial( cfgv.load_from_filename, schema=MANIFEST_SCHEMA, - load_strategy=yaml_load, + load_strategy=_load_manifest_forward_compat, exc_tp=InvalidManifestError, ) diff --git a/tests/clientlib_test.py b/tests/clientlib_test.py index 93c698f7..2c42b80c 100644 --- a/tests/clientlib_test.py +++ b/tests/clientlib_test.py @@ -12,6 +12,8 @@ from pre_commit.clientlib import CONFIG_HOOK_DICT from pre_commit.clientlib import CONFIG_REPO_DICT from pre_commit.clientlib import CONFIG_SCHEMA from pre_commit.clientlib import DEFAULT_LANGUAGE_VERSION +from pre_commit.clientlib import InvalidManifestError +from pre_commit.clientlib import load_manifest from pre_commit.clientlib import MANIFEST_HOOK_DICT from pre_commit.clientlib import MANIFEST_SCHEMA from pre_commit.clientlib import META_HOOK_DICT @@ -588,3 +590,18 @@ def test_config_hook_stages_defaulting(): 'id': 'fake-hook', 'stages': ['commit-msg', 'pre-push', 'pre-commit', 'pre-merge-commit'], } + + +def test_manifest_v5_forward_compat(tmp_path): + manifest = tmp_path.joinpath('.pre-commit-hooks.yaml') + manifest.write_text('hooks: {}') + + with pytest.raises(InvalidManifestError) as excinfo: + load_manifest(manifest) + assert str(excinfo.value) == ( + f'\n' + f'==> File {manifest}\n' + f'=====> \n' + f'=====> pre-commit version 5 is required but version {C.VERSION} ' + f'is installed. Perhaps run `pip install --upgrade pre-commit`.' + ) From 8d34f95308fc4c14dea3d3e90153acfdaf55e2de Mon Sep 17 00:00:00 2001 From: anthony sottile Date: Fri, 21 Nov 2025 15:09:41 -0500 Subject: [PATCH 05/14] use ExitStack instead of start + stop --- tests/main_test.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/main_test.py b/tests/main_test.py index 945349fa..325792d8 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -1,6 +1,7 @@ from __future__ import annotations import argparse +import contextlib import os.path from unittest import mock @@ -97,11 +98,9 @@ CMDS = tuple(fn.replace('_', '-') for fn in FNS) @pytest.fixture def mock_commands(): - mcks = {fn: mock.patch.object(main, fn).start() for fn in FNS} - ret = auto_namedtuple(**mcks) - yield ret - for mck in ret: - mck.stop() + with contextlib.ExitStack() as ctx: + mcks = {f: ctx.enter_context(mock.patch.object(main, f)) for f in FNS} + yield auto_namedtuple(**mcks) @pytest.fixture From bdf68790b78158268bbc8482f76491a61d75809a Mon Sep 17 00:00:00 2001 From: anthony sottile Date: Fri, 21 Nov 2025 16:38:55 -0500 Subject: [PATCH 06/14] add pre-commit hazmat --- pre_commit/commands/hazmat.py | 95 +++++++++++++++++++++++++++++++++ pre_commit/lang_base.py | 6 ++- pre_commit/main.py | 10 +++- tests/commands/hazmat_test.py | 99 +++++++++++++++++++++++++++++++++++ tests/lang_base_test.py | 12 +++++ tests/main_test.py | 12 +++++ tests/repository_test.py | 11 ++++ 7 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 pre_commit/commands/hazmat.py create mode 100644 tests/commands/hazmat_test.py diff --git a/pre_commit/commands/hazmat.py b/pre_commit/commands/hazmat.py new file mode 100644 index 00000000..01b27ce6 --- /dev/null +++ b/pre_commit/commands/hazmat.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import argparse +import subprocess +from collections.abc import Sequence + +from pre_commit.parse_shebang import normalize_cmd + + +def add_parsers(parser: argparse.ArgumentParser) -> None: + subparsers = parser.add_subparsers(dest='tool') + + cd_parser = subparsers.add_parser( + 'cd', help='cd to a subdir and run the command', + ) + cd_parser.add_argument('subdir') + cd_parser.add_argument('cmd', nargs=argparse.REMAINDER) + + ignore_exit_code_parser = subparsers.add_parser( + 'ignore-exit-code', help='run the command but ignore the exit code', + ) + ignore_exit_code_parser.add_argument('cmd', nargs=argparse.REMAINDER) + + n1_parser = subparsers.add_parser( + 'n1', help='run the command once per filename', + ) + n1_parser.add_argument('cmd', nargs=argparse.REMAINDER) + + +def _cmd_filenames(cmd: tuple[str, ...]) -> tuple[ + tuple[str, ...], + tuple[str, ...], +]: + for idx, val in enumerate(reversed(cmd)): + if val == '--': + split = len(cmd) - idx + break + else: + raise SystemExit('hazmat entry must end with `--`') + + return cmd[:split - 1], cmd[split:] + + +def cd(subdir: str, cmd: tuple[str, ...]) -> int: + cmd, filenames = _cmd_filenames(cmd) + + prefix = f'{subdir}/' + new_filenames = [] + for filename in filenames: + if not filename.startswith(prefix): + raise SystemExit(f'unexpected file without {prefix=}: {filename}') + else: + new_filenames.append(filename.removeprefix(prefix)) + + cmd = normalize_cmd(cmd) + return subprocess.call((*cmd, *new_filenames), cwd=subdir) + + +def ignore_exit_code(cmd: tuple[str, ...]) -> int: + cmd = normalize_cmd(cmd) + subprocess.call(cmd) + return 0 + + +def n1(cmd: tuple[str, ...]) -> int: + cmd, filenames = _cmd_filenames(cmd) + cmd = normalize_cmd(cmd) + ret = 0 + for filename in filenames: + ret |= subprocess.call((*cmd, filename)) + return ret + + +def impl(args: argparse.Namespace) -> int: + args.cmd = tuple(args.cmd) + if args.tool == 'cd': + return cd(args.subdir, args.cmd) + elif args.tool == 'ignore-exit-code': + return ignore_exit_code(args.cmd) + elif args.tool == 'n1': + return n1(args.cmd) + else: + raise NotImplementedError(f'unexpected tool: {args.tool}') + + +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser() + add_parsers(parser) + args = parser.parse_args(argv) + + return impl(args) + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/pre_commit/lang_base.py b/pre_commit/lang_base.py index 95be7b9b..198e9365 100644 --- a/pre_commit/lang_base.py +++ b/pre_commit/lang_base.py @@ -5,6 +5,7 @@ import os import random import re import shlex +import sys from collections.abc import Generator from collections.abc import Sequence from typing import Any @@ -171,7 +172,10 @@ def run_xargs( def hook_cmd(entry: str, args: Sequence[str]) -> tuple[str, ...]: - return (*shlex.split(entry), *args) + cmd = shlex.split(entry) + if cmd[:2] == ['pre-commit', 'hazmat']: + cmd = [sys.executable, '-m', 'pre_commit.commands.hazmat', *cmd[2:]] + return (*cmd, *args) def basic_run_hook( diff --git a/pre_commit/main.py b/pre_commit/main.py index c33fbfda..0c3eefda 100644 --- a/pre_commit/main.py +++ b/pre_commit/main.py @@ -10,6 +10,7 @@ import pre_commit.constants as C from pre_commit import clientlib from pre_commit import git from pre_commit.color import add_color_option +from pre_commit.commands import hazmat from pre_commit.commands.autoupdate import autoupdate from pre_commit.commands.clean import clean from pre_commit.commands.gc import gc @@ -41,7 +42,7 @@ os.environ.pop('__PYVENV_LAUNCHER__', None) os.environ.pop('PYTHONEXECUTABLE', None) COMMANDS_NO_GIT = { - 'clean', 'gc', 'init-templatedir', 'sample-config', + 'clean', 'gc', 'hazmat', 'init-templatedir', 'sample-config', 'validate-config', 'validate-manifest', } @@ -245,6 +246,11 @@ def main(argv: Sequence[str] | None = None) -> int: _add_cmd('gc', help='Clean unused cached repos.') + hazmat_parser = _add_cmd( + 'hazmat', help='Composable tools for rare use in hook `entry`.', + ) + hazmat.add_parsers(hazmat_parser) + init_templatedir_parser = _add_cmd( 'init-templatedir', help=( @@ -389,6 +395,8 @@ def main(argv: Sequence[str] | None = None) -> int: return clean(store) elif args.command == 'gc': return gc(store) + elif args.command == 'hazmat': + return hazmat.impl(args) elif args.command == 'hook-impl': return hook_impl( store, diff --git a/tests/commands/hazmat_test.py b/tests/commands/hazmat_test.py new file mode 100644 index 00000000..df957e36 --- /dev/null +++ b/tests/commands/hazmat_test.py @@ -0,0 +1,99 @@ +from __future__ import annotations + +import sys + +import pytest + +from pre_commit.commands.hazmat import _cmd_filenames +from pre_commit.commands.hazmat import main +from testing.util import cwd + + +def test_cmd_filenames_no_dash_dash(): + with pytest.raises(SystemExit) as excinfo: + _cmd_filenames(('no', 'dashdash', 'here')) + msg, = excinfo.value.args + assert msg == 'hazmat entry must end with `--`' + + +def test_cmd_filenames_no_filenames(): + cmd, filenames = _cmd_filenames(('hello', 'world', '--')) + assert cmd == ('hello', 'world') + assert filenames == () + + +def test_cmd_filenames_some_filenames(): + cmd, filenames = _cmd_filenames(('hello', 'world', '--', 'f1', 'f2')) + assert cmd == ('hello', 'world') + assert filenames == ('f1', 'f2') + + +def test_cmd_filenames_multiple_dashdash(): + cmd, filenames = _cmd_filenames(('hello', '--', 'arg', '--', 'f1', 'f2')) + assert cmd == ('hello', '--', 'arg') + assert filenames == ('f1', 'f2') + + +def test_cd_unexpected_filename(): + with pytest.raises(SystemExit) as excinfo: + main(('cd', 'subdir', 'cmd', '--', 'subdir/1', 'not-subdir/2')) + msg, = excinfo.value.args + assert msg == "unexpected file without prefix='subdir/': not-subdir/2" + + +def _norm(out): + return out.replace('\r\n', '\n') + + +def test_cd(tmp_path, capfd): + subdir = tmp_path.joinpath('subdir') + subdir.mkdir() + subdir.joinpath('a').write_text('a') + subdir.joinpath('b').write_text('b') + + with cwd(tmp_path): + ret = main(( + 'cd', 'subdir', + sys.executable, '-c', + 'import os; print(os.getcwd());' + 'import sys; [print(open(f).read()) for f in sys.argv[1:]]', + '--', + 'subdir/a', 'subdir/b', + )) + + assert ret == 0 + out, err = capfd.readouterr() + assert _norm(out) == f'{subdir}\na\nb\n' + assert err == '' + + +def test_ignore_exit_code(capfd): + ret = main(( + 'ignore-exit-code', sys.executable, '-c', 'raise SystemExit("bye")', + )) + assert ret == 0 + out, err = capfd.readouterr() + assert out == '' + assert _norm(err) == 'bye\n' + + +def test_n1(capfd): + ret = main(( + 'n1', sys.executable, '-c', 'import sys; print(sys.argv[1:])', + '--', + 'foo', 'bar', 'baz', + )) + assert ret == 0 + out, err = capfd.readouterr() + assert _norm(out) == "['foo']\n['bar']\n['baz']\n" + assert err == '' + + +def test_n1_some_error_code(): + ret = main(( + 'n1', sys.executable, '-c', + 'import sys; raise SystemExit(sys.argv[1] == "error")', + '--', + 'ok', 'error', 'ok', + )) + assert ret == 1 diff --git a/tests/lang_base_test.py b/tests/lang_base_test.py index da289aef..9fac83da 100644 --- a/tests/lang_base_test.py +++ b/tests/lang_base_test.py @@ -164,3 +164,15 @@ def test_basic_run_hook(tmp_path): assert ret == 0 out = out.replace(b'\r\n', b'\n') assert out == b'hi hello file file file\n' + + +def test_hook_cmd(): + assert lang_base.hook_cmd('echo hi', ()) == ('echo', 'hi') + + +def test_hook_cmd_hazmat(): + ret = lang_base.hook_cmd('pre-commit hazmat cd a echo -- b', ()) + assert ret == ( + sys.executable, '-m', 'pre_commit.commands.hazmat', + 'cd', 'a', 'echo', '--', 'b', + ) diff --git a/tests/main_test.py b/tests/main_test.py index 945349fa..eb9ea18d 100644 --- a/tests/main_test.py +++ b/tests/main_test.py @@ -8,6 +8,7 @@ import pytest import pre_commit.constants as C from pre_commit import main +from pre_commit.commands import hazmat from pre_commit.errors import FatalError from pre_commit.util import cmd_output from testing.auto_namedtuple import auto_namedtuple @@ -158,6 +159,17 @@ def test_all_cmds(command, mock_commands, mock_store_dir): assert_only_one_mock_called(mock_commands) +def test_hazmat(mock_store_dir): + with mock.patch.object(hazmat, 'impl') as mck: + main.main(('hazmat', 'cd', 'subdir', '--', 'cmd', '--', 'f1', 'f2')) + assert mck.call_count == 1 + (arg,), dct = mck.call_args + assert dct == {} + assert arg.tool == 'cd' + assert arg.subdir == 'subdir' + assert arg.cmd == ['cmd', '--', 'f1', 'f2'] + + def test_try_repo(mock_store_dir): with mock.patch.object(main, 'try_repo') as patch: main.main(('try-repo', '.')) diff --git a/tests/repository_test.py b/tests/repository_test.py index b1c7a002..5d71c3e4 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -506,3 +506,14 @@ def test_args_with_spaces_and_quotes(tmp_path): expected = b"['i have spaces', 'and\"\\'quotes', '$and !this']\n" assert ret == (0, expected) + + +def test_hazmat(tmp_path): + ret = run_language( + tmp_path, unsupported, + f'pre-commit hazmat ignore-exit-code {shlex.quote(sys.executable)} ' + f"-c 'import sys; raise SystemExit(sys.argv[1:])'", + ('f1', 'f2'), + ) + expected = b"['f1', 'f2']\n" + assert ret == (0, expected) From 1af6c8fa9502336c6977c2ff3e79185bd97a6e57 Mon Sep 17 00:00:00 2001 From: anthony sottile Date: Sat, 22 Nov 2025 16:02:16 -0500 Subject: [PATCH 07/14] v4.5.0 --- CHANGELOG.md | 7 +++++++ setup.cfg | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b27af5e7..1434728d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +4.5.0 - 2025-11-22 +================== + +### Features +- Add `pre-commit hazmat`. + - #3585 PR by @asottile. + 4.4.0 - 2025-11-08 ================== diff --git a/setup.cfg b/setup.cfg index be031c3e..00c71759 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = pre_commit -version = 4.4.0 +version = 4.5.0 description = A framework for managing and maintaining multi-language pre-commit hooks. long_description = file: README.md long_description_content_type = text/markdown From 8ea2b790d817088444b2328ff6cfe6742260070f Mon Sep 17 00:00:00 2001 From: anthony sottile Date: Sat, 22 Nov 2025 16:06:27 -0500 Subject: [PATCH 08/14] remove sha256 file from zipapp script github displays the checksum for us now! --- testing/zipapp/make | 3 --- 1 file changed, 3 deletions(-) diff --git a/testing/zipapp/make b/testing/zipapp/make index 165046f6..43bb4373 100755 --- a/testing/zipapp/make +++ b/testing/zipapp/make @@ -107,9 +107,6 @@ def main() -> int: shebang = '/usr/bin/env python3' zipapp.create_archive(tmpdir, filename, interpreter=shebang) - with open(f'{filename}.sha256sum', 'w') as f: - subprocess.check_call(('sha256sum', filename), stdout=f) - return 0 From 465192d7de58d569776eaaa818c94cb2b962d436 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 24 Nov 2025 20:53:38 +0000 Subject: [PATCH 09/14] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/pyupgrade: v3.21.1 → v3.21.2](https://github.com/asottile/pyupgrade/compare/v3.21.1...v3.21.2) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e47d56ca..50893030 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ repos: hooks: - id: add-trailing-comma - repo: https://github.com/asottile/pyupgrade - rev: v3.21.1 + rev: v3.21.2 hooks: - id: pyupgrade args: [--py310-plus] From 48953556d06f8cdb4248002c1a0044e69e0916b3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 21:05:15 +0000 Subject: [PATCH 10/14] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v1.18.2 → v1.19.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.18.2...v1.19.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 50893030..cedeae5e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 + rev: v1.19.0 hooks: - id: mypy additional_dependencies: [types-pyyaml] From c251e6b6d011b3b262339dc8e109de29b0ff8db1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Dec 2025 20:48:45 +0000 Subject: [PATCH 11/14] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v1.19.0 → v1.19.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.19.0...v1.19.1) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index cedeae5e..83ff03f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -37,7 +37,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.19.0 + rev: v1.19.1 hooks: - id: mypy additional_dependencies: [types-pyyaml] From 51592eececd13b99c40ec477ad8f810799147227 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 16 Dec 2025 15:45:01 -0500 Subject: [PATCH 12/14] fix python local template when artifact dirs are present --- pre_commit/resources/empty_template_setup.py | 2 +- tests/languages/python_test.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/pre_commit/resources/empty_template_setup.py b/pre_commit/resources/empty_template_setup.py index ef05eef8..e8b1ff02 100644 --- a/pre_commit/resources/empty_template_setup.py +++ b/pre_commit/resources/empty_template_setup.py @@ -1,4 +1,4 @@ from setuptools import setup -setup(name='pre-commit-placeholder-package', version='0.0.0') +setup(name='pre-commit-placeholder-package', version='0.0.0', py_modules=[]) diff --git a/tests/languages/python_test.py b/tests/languages/python_test.py index 565525a4..593634b7 100644 --- a/tests/languages/python_test.py +++ b/tests/languages/python_test.py @@ -10,6 +10,8 @@ import pre_commit.constants as C from pre_commit.envcontext import envcontext from pre_commit.languages import python from pre_commit.prefix import Prefix +from pre_commit.store import _make_local_repo +from pre_commit.util import cmd_output_b from pre_commit.util import make_executable from pre_commit.util import win_exe from testing.auto_namedtuple import auto_namedtuple @@ -351,3 +353,15 @@ def test_python_hook_weird_setup_cfg(tmp_path): ret = run_language(tmp_path, python, 'socks', [os.devnull]) assert ret == (0, f'[{os.devnull!r}]\nhello hello\n'.encode()) + + +def test_local_repo_with_other_artifacts(tmp_path): + cmd_output_b('git', 'init', tmp_path) + _make_local_repo(str(tmp_path)) + # pretend a rust install also ran here + tmp_path.joinpath('target').mkdir() + + ret, out = run_language(tmp_path, python, 'python --version') + + assert ret == 0 + assert out.startswith(b'Python ') From 8a0630ca1aa7f6d5665effe674ebe2022af17919 Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 16 Dec 2025 16:13:56 -0500 Subject: [PATCH 13/14] v4.5.1 --- CHANGELOG.md | 7 +++++++ setup.cfg | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1434728d..879ae073 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +4.5.1 - 2025-12-16 +================== + +### Fixes +- Fix `language: python` with `repo: local` without `additional_dependencies`. + - #3597 PR by @asottile. + 4.5.0 - 2025-11-22 ================== diff --git a/setup.cfg b/setup.cfg index 00c71759..a95ee447 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = pre_commit -version = 4.5.0 +version = 4.5.1 description = A framework for managing and maintaining multi-language pre-commit hooks. long_description = file: README.md long_description_content_type = text/markdown From 37a879e65ee00d8375d12f053ef76e0024a0ed55 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 22 Dec 2025 20:26:26 +0000 Subject: [PATCH 14/14] [pre-commit.ci] pre-commit autoupdate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/asottile/setup-cfg-fmt: v3.1.0 → v3.2.0](https://github.com/asottile/setup-cfg-fmt/compare/v3.1.0...v3.2.0) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 83ff03f3..3654066f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: name-tests-test - id: requirements-txt-fixer - repo: https://github.com/asottile/setup-cfg-fmt - rev: v3.1.0 + rev: v3.2.0 hooks: - id: setup-cfg-fmt - repo: https://github.com/asottile/reorder-python-imports