Merge pull request #2998 from pre-commit/short-circuit

Short-circuit `check-hooks-apply` and `check-useless-excludes` hooks
This commit is contained in:
Anthony Sottile 2023-09-11 19:52:51 -04:00 committed by GitHub
commit f56b75dd77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 39 deletions

View file

@ -10,7 +10,8 @@ import subprocess
import time
import unicodedata
from typing import Any
from typing import Collection
from typing import Generator
from typing import Iterable
from typing import MutableMapping
from typing import Sequence
@ -57,20 +58,20 @@ def _full_msg(
def filter_by_include_exclude(
names: Collection[str],
names: Iterable[str],
include: str,
exclude: str,
) -> list[str]:
) -> Generator[str, None, None]:
include_re, exclude_re = re.compile(include), re.compile(exclude)
return [
return (
filename for filename in names
if include_re.search(filename)
if not exclude_re.search(filename)
]
)
class Classifier:
def __init__(self, filenames: Collection[str]) -> None:
def __init__(self, filenames: Iterable[str]) -> None:
self.filenames = [f for f in filenames if os.path.lexists(f)]
@functools.lru_cache(maxsize=None)
@ -79,15 +80,14 @@ class Classifier:
def by_types(
self,
names: Sequence[str],
types: Collection[str],
types_or: Collection[str],
exclude_types: Collection[str],
) -> list[str]:
names: Iterable[str],
types: Iterable[str],
types_or: Iterable[str],
exclude_types: Iterable[str],
) -> Generator[str, None, None]:
types = frozenset(types)
types_or = frozenset(types_or)
exclude_types = frozenset(exclude_types)
ret = []
for filename in names:
tags = self._types_for_file(filename)
if (
@ -95,24 +95,24 @@ class Classifier:
(not types_or or tags & types_or) and
not tags & exclude_types
):
ret.append(filename)
return ret
yield filename
def filenames_for_hook(self, hook: Hook) -> tuple[str, ...]:
names = self.filenames
names = filter_by_include_exclude(names, hook.files, hook.exclude)
names = self.by_types(
names,
def filenames_for_hook(self, hook: Hook) -> Generator[str, None, None]:
return self.by_types(
filter_by_include_exclude(
self.filenames,
hook.files,
hook.exclude,
),
hook.types,
hook.types_or,
hook.exclude_types,
)
return tuple(names)
@classmethod
def from_config(
cls,
filenames: Collection[str],
filenames: Iterable[str],
include: str,
exclude: str,
) -> Classifier:
@ -121,7 +121,7 @@ class Classifier:
# this also makes improperly quoted shell-based hooks work better
# see #1173
if os.altsep == '/' and os.sep == '\\':
filenames = [f.replace(os.sep, os.altsep) for f in filenames]
filenames = (f.replace(os.sep, os.altsep) for f in filenames)
filenames = filter_by_include_exclude(filenames, include, exclude)
return Classifier(filenames)
@ -148,7 +148,7 @@ def _run_single_hook(
verbose: bool,
use_color: bool,
) -> tuple[bool, bytes]:
filenames = classifier.filenames_for_hook(hook)
filenames = tuple(classifier.filenames_for_hook(hook))
if hook.id in skips or hook.alias in skips:
output.write(
@ -250,7 +250,7 @@ def _compute_cols(hooks: Sequence[Hook]) -> int:
return max(cols, 80)
def _all_filenames(args: argparse.Namespace) -> Collection[str]:
def _all_filenames(args: argparse.Namespace) -> Iterable[str]:
# these hooks do not operate on files
if args.hook_stage in {
'post-checkout', 'post-commit', 'post-merge', 'post-rewrite',

View file

@ -21,7 +21,7 @@ def check_all_hooks_match_files(config_file: str) -> int:
for hook in all_hooks(config, Store()):
if hook.always_run or hook.language == 'fail':
continue
elif not classifier.filenames_for_hook(hook):
elif not any(classifier.filenames_for_hook(hook)):
print(f'{hook.id} does not apply to this repository')
retv = 1

View file

@ -2,6 +2,7 @@ from __future__ import annotations
import argparse
import re
from typing import Iterable
from typing import Sequence
from cfgv import apply_defaults
@ -14,7 +15,7 @@ from pre_commit.commands.run import Classifier
def exclude_matches_any(
filenames: Sequence[str],
filenames: Iterable[str],
include: str,
exclude: str,
) -> bool:
@ -50,11 +51,12 @@ def check_useless_excludes(config_file: str) -> int:
# Not actually a manifest dict, but this more accurately reflects
# the defaults applied during runtime
hook = apply_defaults(hook, MANIFEST_HOOK_DICT)
names = classifier.filenames
types = hook['types']
types_or = hook['types_or']
exclude_types = hook['exclude_types']
names = classifier.by_types(names, types, types_or, exclude_types)
names = classifier.by_types(
classifier.filenames,
hook['types'],
hook['types_or'],
hook['exclude_types'],
)
include, exclude = hook['files'], hook['exclude']
if not exclude_matches_any(names, include, exclude):
print(

View file

@ -1127,8 +1127,8 @@ def test_classifier_empty_types_or(tmpdir):
types_or=[],
exclude_types=[],
)
assert for_symlink == ['foo']
assert for_file == ['bar']
assert tuple(for_symlink) == ('foo',)
assert tuple(for_file) == ('bar',)
@pytest.fixture
@ -1142,33 +1142,33 @@ def some_filenames():
def test_include_exclude_base_case(some_filenames):
ret = filter_by_include_exclude(some_filenames, '', '^$')
assert ret == [
assert tuple(ret) == (
'.pre-commit-hooks.yaml',
'pre_commit/git.py',
'pre_commit/main.py',
]
)
def test_matches_broken_symlink(tmpdir):
with tmpdir.as_cwd():
os.symlink('does-not-exist', 'link')
ret = filter_by_include_exclude({'link'}, '', '^$')
assert ret == ['link']
assert tuple(ret) == ('link',)
def test_include_exclude_total_match(some_filenames):
ret = filter_by_include_exclude(some_filenames, r'^.*\.py$', '^$')
assert ret == ['pre_commit/git.py', 'pre_commit/main.py']
assert tuple(ret) == ('pre_commit/git.py', 'pre_commit/main.py')
def test_include_exclude_does_search_instead_of_match(some_filenames):
ret = filter_by_include_exclude(some_filenames, r'\.yaml$', '^$')
assert ret == ['.pre-commit-hooks.yaml']
assert tuple(ret) == ('.pre-commit-hooks.yaml',)
def test_include_exclude_exclude_removes_files(some_filenames):
ret = filter_by_include_exclude(some_filenames, '', r'\.py$')
assert ret == ['.pre-commit-hooks.yaml']
assert tuple(ret) == ('.pre-commit-hooks.yaml',)
def test_args_hook_only(cap_out, store, repo_with_passing_hook):