Merge pull request #2998 from pre-commit/short-circuit

Short-circuit `check-hooks-apply` and `check-useless-excludes` hooks
This commit is contained in:
Anthony Sottile 2023-09-11 19:52:51 -04:00 committed by GitHub
commit f56b75dd77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 39 deletions

View file

@ -10,7 +10,8 @@ import subprocess
import time import time
import unicodedata import unicodedata
from typing import Any from typing import Any
from typing import Collection from typing import Generator
from typing import Iterable
from typing import MutableMapping from typing import MutableMapping
from typing import Sequence from typing import Sequence
@ -57,20 +58,20 @@ def _full_msg(
def filter_by_include_exclude( def filter_by_include_exclude(
names: Collection[str], names: Iterable[str],
include: str, include: str,
exclude: str, exclude: str,
) -> list[str]: ) -> Generator[str, None, None]:
include_re, exclude_re = re.compile(include), re.compile(exclude) include_re, exclude_re = re.compile(include), re.compile(exclude)
return [ return (
filename for filename in names filename for filename in names
if include_re.search(filename) if include_re.search(filename)
if not exclude_re.search(filename) if not exclude_re.search(filename)
] )
class Classifier: class Classifier:
def __init__(self, filenames: Collection[str]) -> None: def __init__(self, filenames: Iterable[str]) -> None:
self.filenames = [f for f in filenames if os.path.lexists(f)] self.filenames = [f for f in filenames if os.path.lexists(f)]
@functools.lru_cache(maxsize=None) @functools.lru_cache(maxsize=None)
@ -79,15 +80,14 @@ class Classifier:
def by_types( def by_types(
self, self,
names: Sequence[str], names: Iterable[str],
types: Collection[str], types: Iterable[str],
types_or: Collection[str], types_or: Iterable[str],
exclude_types: Collection[str], exclude_types: Iterable[str],
) -> list[str]: ) -> Generator[str, None, None]:
types = frozenset(types) types = frozenset(types)
types_or = frozenset(types_or) types_or = frozenset(types_or)
exclude_types = frozenset(exclude_types) exclude_types = frozenset(exclude_types)
ret = []
for filename in names: for filename in names:
tags = self._types_for_file(filename) tags = self._types_for_file(filename)
if ( if (
@ -95,24 +95,24 @@ class Classifier:
(not types_or or tags & types_or) and (not types_or or tags & types_or) and
not tags & exclude_types not tags & exclude_types
): ):
ret.append(filename) yield filename
return ret
def filenames_for_hook(self, hook: Hook) -> tuple[str, ...]: def filenames_for_hook(self, hook: Hook) -> Generator[str, None, None]:
names = self.filenames return self.by_types(
names = filter_by_include_exclude(names, hook.files, hook.exclude) filter_by_include_exclude(
names = self.by_types( self.filenames,
names, hook.files,
hook.exclude,
),
hook.types, hook.types,
hook.types_or, hook.types_or,
hook.exclude_types, hook.exclude_types,
) )
return tuple(names)
@classmethod @classmethod
def from_config( def from_config(
cls, cls,
filenames: Collection[str], filenames: Iterable[str],
include: str, include: str,
exclude: str, exclude: str,
) -> Classifier: ) -> Classifier:
@ -121,7 +121,7 @@ class Classifier:
# this also makes improperly quoted shell-based hooks work better # this also makes improperly quoted shell-based hooks work better
# see #1173 # see #1173
if os.altsep == '/' and os.sep == '\\': if os.altsep == '/' and os.sep == '\\':
filenames = [f.replace(os.sep, os.altsep) for f in filenames] filenames = (f.replace(os.sep, os.altsep) for f in filenames)
filenames = filter_by_include_exclude(filenames, include, exclude) filenames = filter_by_include_exclude(filenames, include, exclude)
return Classifier(filenames) return Classifier(filenames)
@ -148,7 +148,7 @@ def _run_single_hook(
verbose: bool, verbose: bool,
use_color: bool, use_color: bool,
) -> tuple[bool, bytes]: ) -> tuple[bool, bytes]:
filenames = classifier.filenames_for_hook(hook) filenames = tuple(classifier.filenames_for_hook(hook))
if hook.id in skips or hook.alias in skips: if hook.id in skips or hook.alias in skips:
output.write( output.write(
@ -250,7 +250,7 @@ def _compute_cols(hooks: Sequence[Hook]) -> int:
return max(cols, 80) return max(cols, 80)
def _all_filenames(args: argparse.Namespace) -> Collection[str]: def _all_filenames(args: argparse.Namespace) -> Iterable[str]:
# these hooks do not operate on files # these hooks do not operate on files
if args.hook_stage in { if args.hook_stage in {
'post-checkout', 'post-commit', 'post-merge', 'post-rewrite', 'post-checkout', 'post-commit', 'post-merge', 'post-rewrite',

View file

@ -21,7 +21,7 @@ def check_all_hooks_match_files(config_file: str) -> int:
for hook in all_hooks(config, Store()): for hook in all_hooks(config, Store()):
if hook.always_run or hook.language == 'fail': if hook.always_run or hook.language == 'fail':
continue continue
elif not classifier.filenames_for_hook(hook): elif not any(classifier.filenames_for_hook(hook)):
print(f'{hook.id} does not apply to this repository') print(f'{hook.id} does not apply to this repository')
retv = 1 retv = 1

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import argparse import argparse
import re import re
from typing import Iterable
from typing import Sequence from typing import Sequence
from cfgv import apply_defaults from cfgv import apply_defaults
@ -14,7 +15,7 @@ from pre_commit.commands.run import Classifier
def exclude_matches_any( def exclude_matches_any(
filenames: Sequence[str], filenames: Iterable[str],
include: str, include: str,
exclude: str, exclude: str,
) -> bool: ) -> bool:
@ -50,11 +51,12 @@ def check_useless_excludes(config_file: str) -> int:
# Not actually a manifest dict, but this more accurately reflects # Not actually a manifest dict, but this more accurately reflects
# the defaults applied during runtime # the defaults applied during runtime
hook = apply_defaults(hook, MANIFEST_HOOK_DICT) hook = apply_defaults(hook, MANIFEST_HOOK_DICT)
names = classifier.filenames names = classifier.by_types(
types = hook['types'] classifier.filenames,
types_or = hook['types_or'] hook['types'],
exclude_types = hook['exclude_types'] hook['types_or'],
names = classifier.by_types(names, types, types_or, exclude_types) hook['exclude_types'],
)
include, exclude = hook['files'], hook['exclude'] include, exclude = hook['files'], hook['exclude']
if not exclude_matches_any(names, include, exclude): if not exclude_matches_any(names, include, exclude):
print( print(

View file

@ -1127,8 +1127,8 @@ def test_classifier_empty_types_or(tmpdir):
types_or=[], types_or=[],
exclude_types=[], exclude_types=[],
) )
assert for_symlink == ['foo'] assert tuple(for_symlink) == ('foo',)
assert for_file == ['bar'] assert tuple(for_file) == ('bar',)
@pytest.fixture @pytest.fixture
@ -1142,33 +1142,33 @@ def some_filenames():
def test_include_exclude_base_case(some_filenames): def test_include_exclude_base_case(some_filenames):
ret = filter_by_include_exclude(some_filenames, '', '^$') ret = filter_by_include_exclude(some_filenames, '', '^$')
assert ret == [ assert tuple(ret) == (
'.pre-commit-hooks.yaml', '.pre-commit-hooks.yaml',
'pre_commit/git.py', 'pre_commit/git.py',
'pre_commit/main.py', 'pre_commit/main.py',
] )
def test_matches_broken_symlink(tmpdir): def test_matches_broken_symlink(tmpdir):
with tmpdir.as_cwd(): with tmpdir.as_cwd():
os.symlink('does-not-exist', 'link') os.symlink('does-not-exist', 'link')
ret = filter_by_include_exclude({'link'}, '', '^$') ret = filter_by_include_exclude({'link'}, '', '^$')
assert ret == ['link'] assert tuple(ret) == ('link',)
def test_include_exclude_total_match(some_filenames): def test_include_exclude_total_match(some_filenames):
ret = filter_by_include_exclude(some_filenames, r'^.*\.py$', '^$') ret = filter_by_include_exclude(some_filenames, r'^.*\.py$', '^$')
assert ret == ['pre_commit/git.py', 'pre_commit/main.py'] assert tuple(ret) == ('pre_commit/git.py', 'pre_commit/main.py')
def test_include_exclude_does_search_instead_of_match(some_filenames): def test_include_exclude_does_search_instead_of_match(some_filenames):
ret = filter_by_include_exclude(some_filenames, r'\.yaml$', '^$') ret = filter_by_include_exclude(some_filenames, r'\.yaml$', '^$')
assert ret == ['.pre-commit-hooks.yaml'] assert tuple(ret) == ('.pre-commit-hooks.yaml',)
def test_include_exclude_exclude_removes_files(some_filenames): def test_include_exclude_exclude_removes_files(some_filenames):
ret = filter_by_include_exclude(some_filenames, '', r'\.py$') ret = filter_by_include_exclude(some_filenames, '', r'\.py$')
assert ret == ['.pre-commit-hooks.yaml'] assert tuple(ret) == ('.pre-commit-hooks.yaml',)
def test_args_hook_only(cap_out, store, repo_with_passing_hook): def test_args_hook_only(cap_out, store, repo_with_passing_hook):