diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py index 7d87ee04..b8bd2fb1 100644 --- a/pre_commit/clientlib.py +++ b/pre_commit/clientlib.py @@ -60,6 +60,7 @@ MANIFEST_HOOK_DICT = cfgv.Map( cfgv.Optional('alias', cfgv.check_string, ''), cfgv.Optional('files', check_string_regex, ''), + cfgv.Optional('files_contain', cfgv.check_string, ''), cfgv.Optional('exclude', check_string_regex, '^$'), cfgv.Optional('types', cfgv.check_array(check_type_tag), ['file']), cfgv.Optional('types_or', cfgv.check_array(check_type_tag), []), diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index 95ad5e96..f1aa4dbc 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -2,6 +2,7 @@ import argparse import contextlib import functools import logging +import mmap import os import re import subprocess @@ -100,6 +101,22 @@ class Classifier: ret.append(filename) return ret + def files_contain( + self, + names: Sequence[str], + contains: str, + ) -> List[str]: + if contains == '': + return list(names) + + ret = [] + for filename in names: + with open(filename, 'r+') as f: + with mmap.mmap(f.fileno(), 0) as mm: + if mm.find(contains.encode()) >= 0: + ret.append(filename) + return ret + def filenames_for_hook(self, hook: Hook) -> Tuple[str, ...]: names = self.filenames names = filter_by_include_exclude(names, hook.files, hook.exclude) @@ -109,6 +126,10 @@ class Classifier: hook.types_or, hook.exclude_types, ) + names = self.files_contain( + names, + hook.files_contain, + ) return tuple(names) @classmethod diff --git a/pre_commit/hook.py b/pre_commit/hook.py index ea773942..edfd76f4 100644 --- a/pre_commit/hook.py +++ b/pre_commit/hook.py @@ -20,6 +20,7 @@ class Hook(NamedTuple): language: str alias: str files: str + files_contain: str exclude: str types: Sequence[str] types_or: Sequence[str] diff --git a/pre_commit/meta_hooks/check_useless_excludes.py b/pre_commit/meta_hooks/check_useless_excludes.py index 61165973..ff209342 100644 --- a/pre_commit/meta_hooks/check_useless_excludes.py +++ b/pre_commit/meta_hooks/check_useless_excludes.py @@ -53,7 +53,9 @@ def check_useless_excludes(config_file: str) -> int: types = hook['types'] types_or = hook['types_or'] exclude_types = hook['exclude_types'] + files_contain = hook['files_contain'] names = classifier.by_types(names, types, types_or, exclude_types) + names = classifier.files_contain(names, files_contain) include, exclude = hook['files'], hook['exclude'] if not exclude_matches_any(names, include, exclude): print( diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py index 8c153957..d74e9ae8 100644 --- a/tests/commands/run_test.py +++ b/tests/commands/run_test.py @@ -1028,6 +1028,18 @@ def test_classifier_empty_types_or(tmpdir): assert for_file == ['bar'] +def test_classifier_files_contain(tmpdir): + tmpdir.join('ignored').ensure().write('We\nIgnore\nThis\nFile\n') + tmpdir.join('matched').ensure().write('We\nMatch\nThis\nFile\n') + with tmpdir.as_cwd(): + classifier = Classifier(('ignored', 'matched')) + files_contain = classifier.files_contain( + classifier.filenames, + contains='Match', + ) + assert files_contain == ['matched'] + + @pytest.fixture def some_filenames(): return ( diff --git a/tests/repository_test.py b/tests/repository_test.py index 6f4047c3..9f3e5ea8 100644 --- a/tests/repository_test.py +++ b/tests/repository_test.py @@ -986,6 +986,7 @@ def test_manifest_hooks(tempdir_factory, store): exclude='^$', exclude_types=[], files='', + files_contain='', id='bash_hook', language='script', language_version='default',