From 120eecaf89e84d755f52b657288b98ac46146f51 Mon Sep 17 00:00:00 2001 From: Chris Kuehl Date: Wed, 6 Jan 2016 16:52:00 -0800 Subject: [PATCH] Target files by type as well as path regex --- pre_commit/commands/run.py | 21 ++++++++-- pre_commit/git.py | 86 +++++++++++++++++++++++++++++++++++--- tests/git_test.py | 38 +++++++++++------ 3 files changed, 122 insertions(+), 23 deletions(-) diff --git a/pre_commit/commands/run.py b/pre_commit/commands/run.py index f45e7089..11fbfbce 100644 --- a/pre_commit/commands/run.py +++ b/pre_commit/commands/run.py @@ -55,24 +55,37 @@ def get_changed_files(new, old): )[1].splitlines() -def get_filenames(args, include_expr, exclude_expr): +def get_filenames(args, include_expr, exclude_expr, types): + """Return a list of file names and modes to consider for this run. + + :return: a list of tuples of the of the form (path, mode). + """ if args.origin and args.source: getter = git.get_files_matching( lambda: get_changed_files(args.origin, args.source), ) elif args.files: - getter = git.get_files_matching(lambda: args.files) + files = [ + (path, git.guess_git_type_for_file(path)) + for path in args.files + ] + getter = git.get_files_matching(lambda: files) elif args.all_files: getter = git.get_all_files_matching elif git.is_in_merge_conflict(): getter = git.get_conflicted_files_matching else: getter = git.get_staged_files_matching - return getter(include_expr, exclude_expr) + return getter(include_expr, exclude_expr, types) def _run_single_hook(hook, repo, args, write, skips=frozenset()): - filenames = get_filenames(args, hook['files'], hook['exclude']) + filenames = get_filenames( + args, + hook['files'], + hook['exclude'], + frozenset(hook['types']) if 'types' in hook else frozenset(['file']), + ) if hook['id'] in skips: _print_user_skipped(hook, write, args) return 0 diff --git a/pre_commit/git.py b/pre_commit/git.py index 2d37d344..abddb026 100644 --- a/pre_commit/git.py +++ b/pre_commit/git.py @@ -56,6 +56,7 @@ def parse_merge_msg_for_conflicts(merge_msg): @memoize_by_cwd def get_conflicted_files(): + """Return a list of file names and types for conflicted files.""" logger.info('Checking merge-conflict files only.') # Need to get the conflicted files from the MERGE_MSG because they could # have resolved the conflict by choosing one side or the other @@ -69,32 +70,105 @@ def get_conflicted_files(): merge_diff_filenames = cmd_output( 'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only', )[1].splitlines() - return set(merge_conflict_filenames) | set(merge_diff_filenames) + + return [ + (path, get_git_type_for_file(path)) + for path + in set(merge_conflict_filenames) | set(merge_diff_filenames) + ] + + +def get_git_type_for_file(path): + """Return the git type of a file which is in this git repository. + + Because the file is in this git repository, we can use `git ls-files` to + read its type directly. + """ + # TODO: call this function once with a list of paths for more speed? + _, mode = _parse_git_ls_line( + cmd_output('git', 'ls-files', '--stage', '--', path)[1], + ) + return mode + + +def guess_git_type_for_file(path): + """Return a guessed git type of a file which is not in this git repository. + + Because the file isn't in git, we must guess the file type. This is + necessary when using `pre-commit run` or `pre-commit identify` and listing + files (which might not be in a repo). + """ + if os.path.islink(path): + return GIT_MODE_SYMLINK + elif os.path.isfile(path): + # determine if executable + if os.stat(path).st_mode & 0o111: + return GIT_MODE_EXECUTABLE + else: + return GIT_MODE_FILE + elif os.path.isdir(path): + # git doesn't track directories, so if it *is* one, it's a submodule + return GIT_MODE_SUBMODULE + else: + raise ValueError('Unable to determine type of `{0}`'.format(path)) @memoize_by_cwd def get_staged_files(): - return cmd_output('git', 'diff', '--staged', '--name-only')[1].splitlines() + """Return a list of paths in the repo which have been added/modified.""" + return [ + (path, get_git_type_for_file(path)) + for path + in cmd_output( + 'git', 'diff', + '--diff-filter=ACMRTUXB', # all types except D ("Deleted") + '--staged', + '--name-only', + )[1].splitlines() + ] + + +# The output format of the command is: +# [file mode] [object hash] [stage number]\t[file path] +# (We only care about the mode and path.) +_split_git_ls_line_regex = re.compile('^([0-7]{6}) [0-9a-f]{40} [0-9]+\t(.+)$') + + +def _parse_git_ls_line(line): + """Split a line of `git ls-files` into a tuple (path, type).""" + match = _split_git_ls_line_regex.match(line) + return match.group(2), int(match.group(1), 8) @memoize_by_cwd def get_all_files(): - return cmd_output('git', 'ls-files')[1].splitlines() + """Return a list of all files (and their types) in the repository. + + :return: list of (path, type) tuples + """ + return [ + _parse_git_ls_line(line) + for line in cmd_output('git', 'ls-files', '--stage')[1].splitlines() + ] def get_files_matching(all_file_list_strategy): @functools.wraps(all_file_list_strategy) @memoize_by_cwd - def wrapper(include_expr, exclude_expr): + def wrapper(include_expr, exclude_expr, types): + # TODO: how to avoid this? + from pre_commit.file_classifier.classifier import classify + include_regex = re.compile(include_expr) exclude_regex = re.compile(exclude_expr) return set( filename - for filename in all_file_list_strategy() + for filename, mode in all_file_list_strategy() if ( include_regex.search(filename) and not exclude_regex.search(filename) and - os.path.lexists(filename) + os.path.lexists(filename) and + classify(filename, mode).intersection(types) ) ) return wrapper diff --git a/tests/git_test.py b/tests/git_test.py index c4e01450..1e832a76 100644 --- a/tests/git_test.py +++ b/tests/git_test.py @@ -58,18 +58,18 @@ def test_cherry_pick_conflict(in_merge_conflict): def get_files_matching_func(): def get_filenames(): return ( - 'pre_commit/main.py', - 'pre_commit/git.py', - 'im_a_file_that_doesnt_exist.py', - 'testing/test_symlink', - 'hooks.yaml', + ('pre_commit/main.py', git.GIT_MODE_FILE), + ('pre_commit/git.py', git.GIT_MODE_FILE), + ('im_a_file_that_doesnt_exist.py', git.GIT_MODE_FILE), + ('testing/test_symlink', git.GIT_MODE_SYMLINK), + ('hooks.yaml', git.GIT_MODE_FILE), ) return git.get_files_matching(get_filenames) def test_get_files_matching_base(get_files_matching_func): - ret = get_files_matching_func('', '^$') + ret = get_files_matching_func('', '^$', frozenset(['file'])) assert ret == set([ 'pre_commit/main.py', 'pre_commit/git.py', @@ -79,7 +79,7 @@ def test_get_files_matching_base(get_files_matching_func): def test_get_files_matching_total_match(get_files_matching_func): - ret = get_files_matching_func('^.*\\.py$', '^$') + ret = get_files_matching_func('^.*\\.py$', '^$', frozenset(['file'])) assert ret == set([ 'pre_commit/main.py', 'pre_commit/git.py', @@ -87,18 +87,23 @@ def test_get_files_matching_total_match(get_files_matching_func): def test_does_search_instead_of_match(get_files_matching_func): - ret = get_files_matching_func('\\.yaml$', '^$') + ret = get_files_matching_func('\\.yaml$', '^$', frozenset(['file'])) assert ret == set(['hooks.yaml']) -def test_does_not_include_deleted_fileS(get_files_matching_func): - ret = get_files_matching_func('exist.py', '^$') +def test_does_not_include_deleted_files(get_files_matching_func): + ret = get_files_matching_func('exist.py', '^$', frozenset(['file'])) assert ret == set() def test_exclude_removes_files(get_files_matching_func): +<<<<<<< HEAD ret = get_files_matching_func('', '\\.py$') assert ret == set(['hooks.yaml', 'testing/test_symlink']) +======= + ret = get_files_matching_func('', '\\.py$', frozenset(['file'])) + assert ret == set(['hooks.yaml']) +>>>>>>> Target files by type as well as path regex def resolve_conflict(): @@ -114,12 +119,17 @@ def test_get_conflicted_files(in_merge_conflict): cmd_output('git', 'add', 'other_file') ret = set(git.get_conflicted_files()) - assert ret == set(('conflict_file', 'other_file')) + assert ret == set([ + ('conflict_file', git.GIT_MODE_FILE), + ('other_file', git.GIT_MODE_FILE), + ]) def test_get_conflicted_files_in_submodule(in_conflicting_submodule): resolve_conflict() - assert set(git.get_conflicted_files()) == set(('conflict_file',)) + assert set(git.get_conflicted_files()) == set([ + ('conflict_file', git.GIT_MODE_FILE)], + ) def test_get_conflicted_files_unstaged_files(in_merge_conflict): @@ -132,7 +142,9 @@ def test_get_conflicted_files_unstaged_files(in_merge_conflict): bar_only_file.write('new contents!\n') ret = set(git.get_conflicted_files()) - assert ret == set(('conflict_file',)) + assert ret == set([ + ('conflict_file', git.GIT_MODE_FILE), + ]) MERGE_MSG = "Merge branch 'foo' into bar\n\nConflicts:\n\tconflict_file\n"