From 5d692d7e06606ec34ef3a6acf4a0fa7fef158983 Mon Sep 17 00:00:00 2001 From: Max R Date: Sat, 9 Sep 2023 21:51:59 -0400 Subject: [PATCH] Short-circuit hooks --- pre_commit/commands/run.py | 48 +++++++++---------- pre_commit/meta_hooks/check_hooks_apply.py | 2 +- .../meta_hooks/check_useless_excludes.py | 14 +++--- tests/commands/run_test.py | 16 +++---- 4 files changed, 41 insertions(+), 39 deletions(-) diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index c867799e..38d80db3 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -10,7 +10,8 @@ import subprocess import time import unicodedata from typing import Any -from typing import Collection +from typing import Generator +from typing import Iterable from typing import MutableMapping from typing import Sequence @@ -57,20 +58,20 @@ def _full_msg( def filter_by_include_exclude( - names: Collection[str], + names: Iterable[str], include: str, exclude: str, -) -> list[str]: +) -> Generator[str, None, None]: include_re, exclude_re = re.compile(include), re.compile(exclude) - return [ + return ( filename for filename in names if include_re.search(filename) if not exclude_re.search(filename) - ] + ) class Classifier: - def __init__(self, filenames: Collection[str]) -> None: + def __init__(self, filenames: Iterable[str]) -> None: self.filenames = [f for f in filenames if os.path.lexists(f)] @functools.lru_cache(maxsize=None) @@ -79,15 +80,14 @@ class Classifier: def by_types( self, - names: Sequence[str], - types: Collection[str], - types_or: Collection[str], - exclude_types: Collection[str], - ) -> list[str]: + names: Iterable[str], + types: Iterable[str], + types_or: Iterable[str], + exclude_types: Iterable[str], + ) -> Generator[str, None, None]: types = frozenset(types) types_or = frozenset(types_or) exclude_types = frozenset(exclude_types) - ret = [] for filename in names: tags = self._types_for_file(filename) if ( @@ -95,24 +95,24 @@ class Classifier: (not types_or or tags & types_or) and not tags & exclude_types ): - ret.append(filename) - return ret + yield filename - def filenames_for_hook(self, hook: Hook) -> tuple[str, ...]: - names = self.filenames - names = filter_by_include_exclude(names, hook.files, hook.exclude) - names = self.by_types( - names, + def filenames_for_hook(self, hook: Hook) -> Generator[str, None, None]: + return self.by_types( + filter_by_include_exclude( + self.filenames, + hook.files, + hook.exclude, + ), hook.types, hook.types_or, hook.exclude_types, ) - return tuple(names) @classmethod def from_config( cls, - filenames: Collection[str], + filenames: Iterable[str], include: str, exclude: str, ) -> Classifier: @@ -121,7 +121,7 @@ class Classifier: # this also makes improperly quoted shell-based hooks work better # see #1173 if os.altsep == '/' and os.sep == '\\': - filenames = [f.replace(os.sep, os.altsep) for f in filenames] + filenames = (f.replace(os.sep, os.altsep) for f in filenames) filenames = filter_by_include_exclude(filenames, include, exclude) return Classifier(filenames) @@ -148,7 +148,7 @@ def _run_single_hook( verbose: bool, use_color: bool, ) -> tuple[bool, bytes]: - filenames = classifier.filenames_for_hook(hook) + filenames = tuple(classifier.filenames_for_hook(hook)) if hook.id in skips or hook.alias in skips: output.write( @@ -250,7 +250,7 @@ def _compute_cols(hooks: Sequence[Hook]) -> int: return max(cols, 80) -def _all_filenames(args: argparse.Namespace) -> Collection[str]: +def _all_filenames(args: argparse.Namespace) -> Iterable[str]: # these hooks do not operate on files if args.hook_stage in { 'post-checkout', 'post-commit', 'post-merge', 'post-rewrite', diff --git a/pre_commit/meta_hooks/check_hooks_apply.py b/pre_commit/meta_hooks/check_hooks_apply.py index b05a7050..7f491a20 100644 --- a/pre_commit/meta_hooks/check_hooks_apply.py +++ b/pre_commit/meta_hooks/check_hooks_apply.py @@ -21,7 +21,7 @@ def check_all_hooks_match_files(config_file: str) -> int: for hook in all_hooks(config, Store()): if hook.always_run or hook.language == 'fail': continue - elif not classifier.filenames_for_hook(hook): + elif not any(classifier.filenames_for_hook(hook)): print(f'{hook.id} does not apply to this repository') retv = 1 diff --git a/pre_commit/meta_hooks/check_useless_excludes.py b/pre_commit/meta_hooks/check_useless_excludes.py index 0a8249b8..8b0c106a 100644 --- a/pre_commit/meta_hooks/check_useless_excludes.py +++ b/pre_commit/meta_hooks/check_useless_excludes.py @@ -2,6 +2,7 @@ from __future__ import annotations import argparse import re +from typing import Iterable from typing import Sequence from cfgv import apply_defaults @@ -14,7 +15,7 @@ from pre_commit.commands.run import Classifier def exclude_matches_any( - filenames: Sequence[str], + filenames: Iterable[str], include: str, exclude: str, ) -> bool: @@ -50,11 +51,12 @@ def check_useless_excludes(config_file: str) -> int: # Not actually a manifest dict, but this more accurately reflects # the defaults applied during runtime hook = apply_defaults(hook, MANIFEST_HOOK_DICT) - names = classifier.filenames - types = hook['types'] - types_or = hook['types_or'] - exclude_types = hook['exclude_types'] - names = classifier.by_types(names, types, types_or, exclude_types) + names = classifier.by_types( + classifier.filenames, + hook['types'], + hook['types_or'], + hook['exclude_types'], + ) include, exclude = hook['files'], hook['exclude'] if not exclude_matches_any(names, include, exclude): print( diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py index dd15b94c..8d89815b 100644 --- a/tests/commands/run_test.py +++ b/tests/commands/run_test.py @@ -1127,8 +1127,8 @@ def test_classifier_empty_types_or(tmpdir): types_or=[], exclude_types=[], ) - assert for_symlink == ['foo'] - assert for_file == ['bar'] + assert tuple(for_symlink) == ('foo',) + assert tuple(for_file) == ('bar',) @pytest.fixture @@ -1142,33 +1142,33 @@ def some_filenames(): def test_include_exclude_base_case(some_filenames): ret = filter_by_include_exclude(some_filenames, '', '^$') - assert ret == [ + assert tuple(ret) == ( '.pre-commit-hooks.yaml', 'pre_commit/git.py', 'pre_commit/main.py', - ] + ) def test_matches_broken_symlink(tmpdir): with tmpdir.as_cwd(): os.symlink('does-not-exist', 'link') ret = filter_by_include_exclude({'link'}, '', '^$') - assert ret == ['link'] + assert tuple(ret) == ('link',) def test_include_exclude_total_match(some_filenames): ret = filter_by_include_exclude(some_filenames, r'^.*\.py$', '^$') - assert ret == ['pre_commit/git.py', 'pre_commit/main.py'] + assert tuple(ret) == ('pre_commit/git.py', 'pre_commit/main.py') def test_include_exclude_does_search_instead_of_match(some_filenames): ret = filter_by_include_exclude(some_filenames, r'\.yaml$', '^$') - assert ret == ['.pre-commit-hooks.yaml'] + assert tuple(ret) == ('.pre-commit-hooks.yaml',) def test_include_exclude_exclude_removes_files(some_filenames): ret = filter_by_include_exclude(some_filenames, '', r'\.py$') - assert ret == ['.pre-commit-hooks.yaml'] + assert tuple(ret) == ('.pre-commit-hooks.yaml',) def test_args_hook_only(cap_out, store, repo_with_passing_hook):