diff --git a/pre_commit/clientlib.py b/pre_commit/clientlib.py index bceecaa6..6da6db25 100644 --- a/pre_commit/clientlib.py +++ b/pre_commit/clientlib.py @@ -5,6 +5,7 @@ import argparse import functools from aspy.yaml import ordered_load +from identify.identify import ALL_TAGS import pre_commit.constants as C from pre_commit import schema @@ -19,6 +20,14 @@ def check_language(v): ) +def check_type_tag(tag): + if tag not in ALL_TAGS: + raise schema.ValidationError( + 'Type tag {!r} is not recognized. ' + 'Try upgrading identify and pre-commit?'.format(tag), + ) + + def _make_argparser(filenames_help): parser = argparse.ArgumentParser() parser.add_argument('filenames', nargs='*', help=filenames_help) @@ -36,10 +45,17 @@ MANIFEST_HOOK_DICT = schema.Map( 'language', schema.check_and(schema.check_string, check_language), ), - schema.Conditional( + schema.Optional( 'files', schema.check_and(schema.check_string, schema.check_regex), - condition_key='always_run', condition_value=False, + '', ), + schema.Optional( + 'exclude', + schema.check_and(schema.check_string, schema.check_regex), + '^$', + ), + schema.Optional('types', schema.check_array(check_type_tag), ['file']), + schema.Optional('exclude_types', schema.check_array(check_type_tag), []), schema.Optional( 'additional_dependencies', schema.check_array(schema.check_string), [], @@ -48,11 +64,6 @@ MANIFEST_HOOK_DICT = schema.Map( schema.Optional('always_run', schema.check_bool, False), schema.Optional('pass_filenames', schema.check_bool, True), schema.Optional('description', schema.check_string, ''), - schema.Optional( - 'exclude', - schema.check_and(schema.check_string, schema.check_regex), - '^$', - ), schema.Optional('language_version', schema.check_string, 'default'), schema.Optional('log_file', schema.check_string, ''), schema.Optional('minimum_pre_commit_version', schema.check_string, '0'), diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index a8e61193..99d3a189 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -6,18 +6,24 @@ import os import subprocess import sys +from identify.identify import tags_from_path + from pre_commit import color from pre_commit import git from pre_commit import output from pre_commit.output import get_hook_message from pre_commit.staged_files_only import staged_files_only from pre_commit.util import cmd_output +from pre_commit.util import memoize_by_cwd from pre_commit.util import noop_context logger = logging.getLogger('pre_commit') +tags_from_path = memoize_by_cwd(tags_from_path) + + def _get_skips(environ): skips = environ.get('SKIP', '') return {skip.strip() for skip in skips.split(',') if skip.strip()} @@ -37,6 +43,16 @@ def get_changed_files(new, old): )[1].splitlines() +def filter_filenames_by_types(filenames, types, exclude_types): + types, exclude_types = frozenset(types), frozenset(exclude_types) + ret = [] + for filename in filenames: + tags = tags_from_path(filename) + if tags >= types and not tags & exclude_types: + ret.append(filename) + return tuple(ret) + + def get_filenames(args, include_expr, exclude_expr): if args.origin and args.source: getter = git.get_files_matching( @@ -58,7 +74,10 @@ NO_FILES = '(no files to check)' def _run_single_hook(hook, repo, args, skips, cols): - filenames = get_filenames(args, hook.get('files', '^$'), hook['exclude']) + filenames = get_filenames(args, hook['files'], hook['exclude']) + filenames = filter_filenames_by_types( + filenames, hook['types'], hook['exclude_types'], + ) if hook['id'] in skips: output.write(get_hook_message( _hook_msg_start(hook, args.verbose), diff --git a/pre_commit/parse_shebang.py b/pre_commit/parse_shebang.py index be38d15f..4419cbfc 100644 --- a/pre_commit/parse_shebang.py +++ b/pre_commit/parse_shebang.py @@ -1,13 +1,9 @@ from __future__ import absolute_import from __future__ import unicode_literals -import io import os.path -import shlex -import string - -printable = frozenset(string.printable) +from identify.identify import parse_shebang_from_file class ExecutableNotFoundError(OSError): @@ -15,34 +11,11 @@ class ExecutableNotFoundError(OSError): return (1, self.args[0].encode('UTF-8'), b'') -def parse_bytesio(bytesio): - """Parse the shebang from a file opened for reading binary.""" - if bytesio.read(2) != b'#!': - return () - first_line = bytesio.readline() - try: - first_line = first_line.decode('US-ASCII') - except UnicodeDecodeError: - return () - - # Require only printable ascii - for c in first_line: - if c not in printable: - return () - - cmd = tuple(shlex.split(first_line)) - if cmd[0] == '/usr/bin/env': - cmd = cmd[1:] - return cmd - - def parse_filename(filename): - """Parse the shebang given a filename.""" - if not os.path.exists(filename) or not os.access(filename, os.X_OK): + if not os.path.exists(filename): return () - - with io.open(filename, 'rb') as f: - return parse_bytesio(f) + else: + return parse_shebang_from_file(filename) def find_executable(exe, _environ=None): diff --git a/setup.py b/setup.py index 0b8bcb7d..1ec3c6ff 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ setup( install_requires=[ 'aspy.yaml', 'cached-property', + 'identify>=1.0.0', 'nodeenv>=0.11.1', 'pyyaml', 'six', diff --git a/testing/resources/exclude_types_repo/.pre-commit-hooks.yaml b/testing/resources/exclude_types_repo/.pre-commit-hooks.yaml new file mode 100644 index 00000000..ed8794fb --- /dev/null +++ b/testing/resources/exclude_types_repo/.pre-commit-hooks.yaml @@ -0,0 +1,6 @@ +- id: python-files + name: Python files + entry: bin/hook.sh + language: script + types: [python] + exclude_types: [python3] diff --git a/testing/resources/exclude_types_repo/bin/hook.sh b/testing/resources/exclude_types_repo/bin/hook.sh new file mode 100755 index 00000000..bdade513 --- /dev/null +++ b/testing/resources/exclude_types_repo/bin/hook.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +echo $@ +exit 1 diff --git a/testing/resources/types_repo/.pre-commit-hooks.yaml b/testing/resources/types_repo/.pre-commit-hooks.yaml new file mode 100644 index 00000000..2e5e4a6c --- /dev/null +++ b/testing/resources/types_repo/.pre-commit-hooks.yaml @@ -0,0 +1,5 @@ +- id: python-files + name: Python files + entry: bin/hook.sh + language: script + types: [python] diff --git a/testing/resources/types_repo/bin/hook.sh b/testing/resources/types_repo/bin/hook.sh new file mode 100755 index 00000000..bdade513 --- /dev/null +++ b/testing/resources/types_repo/bin/hook.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +echo $@ +exit 1 diff --git a/tests/clientlib_test.py b/tests/clientlib_test.py index 454824a9..65209a64 100644 --- a/tests/clientlib_test.py +++ b/tests/clientlib_test.py @@ -4,6 +4,7 @@ import pytest from pre_commit import schema from pre_commit.clientlib import check_language +from pre_commit.clientlib import check_type_tag from pre_commit.clientlib import CONFIG_HOOK_DICT from pre_commit.clientlib import CONFIG_SCHEMA from pre_commit.clientlib import is_local_repo @@ -27,6 +28,12 @@ def test_check_language_failures(value): check_language(value) +@pytest.mark.parametrize('value', ('definitely-not-a-tag', 'fiel')) +def test_check_type_tag_failures(value): + with pytest.raises(schema.ValidationError): + check_type_tag(value) + + @pytest.mark.parametrize('value', ('python', 'node', 'pcre')) def test_check_language_ok(value): check_language(value) diff --git a/tests/commands/run_test.py b/tests/commands/run_test.py index d8522da4..1643cbb8 100644 --- a/tests/commands/run_test.py +++ b/tests/commands/run_test.py @@ -20,6 +20,7 @@ from pre_commit.commands.run import run from pre_commit.runner import Runner from pre_commit.util import cmd_output from pre_commit.util import cwd +from pre_commit.util import make_executable from testing.auto_namedtuple import auto_namedtuple from testing.fixtures import add_config_to_repo from testing.fixtures import make_consuming_repo @@ -43,7 +44,7 @@ def repo_with_failing_hook(tempdir_factory): def stage_a_file(filename='foo.py'): - cmd_output('touch', filename) + open(filename, 'a').close() cmd_output('git', 'add', filename) @@ -153,6 +154,35 @@ def test_hook_that_modifies_but_returns_zero( ) +def test_types_hook_repository( + cap_out, tempdir_factory, mock_out_store_directory, +): + git_path = make_consuming_repo(tempdir_factory, 'types_repo') + with cwd(git_path): + stage_a_file('bar.py') + stage_a_file('bar.notpy') + ret, printed = _do_run(cap_out, git_path, _get_opts()) + assert ret == 1 + assert b'bar.py' in printed + assert b'bar.notpy' not in printed + + +def test_exclude_types_hook_repository( + cap_out, tempdir_factory, mock_out_store_directory, +): + git_path = make_consuming_repo(tempdir_factory, 'exclude_types_repo') + with cwd(git_path): + with io.open('exe', 'w') as exe: + exe.write('#!/usr/bin/env python3\n') + make_executable('exe') + cmd_output('git', 'add', 'exe') + stage_a_file('bar.py') + ret, printed = _do_run(cap_out, git_path, _get_opts()) + assert ret == 1 + assert b'bar.py' in printed + assert b'exe' not in printed + + def test_show_diff_on_failure( capfd, cap_out, tempdir_factory, mock_out_store_directory, ): diff --git a/tests/manifest_test.py b/tests/manifest_test.py index 47e7fa32..7db886c5 100644 --- a/tests/manifest_test.py +++ b/tests/manifest_test.py @@ -34,6 +34,8 @@ def test_manifest_contents(manifest): 'name': 'Bash hook', 'pass_filenames': True, 'stages': [], + 'types': ['file'], + 'exclude_types': [], }] @@ -54,6 +56,8 @@ def test_hooks(manifest): 'name': 'Bash hook', 'pass_filenames': True, 'stages': [], + 'types': ['file'], + 'exclude_types': [], } diff --git a/tests/parse_shebang_test.py b/tests/parse_shebang_test.py index 46ca2db8..3f87aea8 100644 --- a/tests/parse_shebang_test.py +++ b/tests/parse_shebang_test.py @@ -15,36 +15,10 @@ from pre_commit.envcontext import Var from pre_commit.util import make_executable -@pytest.mark.parametrize( - ('s', 'expected'), - ( - (b'', ()), - (b'#!/usr/bin/python', ('/usr/bin/python',)), - (b'#!/usr/bin/env python', ('python',)), - (b'#! /usr/bin/python', ('/usr/bin/python',)), - (b'#!/usr/bin/foo python', ('/usr/bin/foo', 'python')), - (b'\xf9\x93\x01\x42\xcd', ()), - (b'#!\xf9\x93\x01\x42\xcd', ()), - (b'#!\x00\x00\x00\x00', ()), - ), -) -def test_parse_bytesio(s, expected): - assert parse_shebang.parse_bytesio(io.BytesIO(s)) == expected - - def test_file_doesnt_exist(): assert parse_shebang.parse_filename('herp derp derp') == () -@pytest.mark.xfail( - sys.platform == 'win32', reason='Windows says everything is X_OK', -) -def test_file_not_executable(tmpdir): - x = tmpdir.join('f') - x.write_text('#!/usr/bin/env python', encoding='UTF-8') - assert parse_shebang.parse_filename(x.strpath) == () - - def test_simple_case(tmpdir): x = tmpdir.join('f') x.write_text('#!/usr/bin/env python', encoding='UTF-8')