Target files by type as well as path regex

This commit is contained in:
Chris Kuehl 2016-01-06 16:52:00 -08:00 committed by Chris Kuehl
parent a4e9394cc7
commit 81b66cda74
3 changed files with 118 additions and 24 deletions

View file

@ -55,24 +55,37 @@ def get_changed_files(new, old):
)[1].splitlines()
def get_filenames(args, include_expr, exclude_expr):
def get_filenames(args, include_expr, exclude_expr, types):
"""Return a list of file names and modes to consider for this run.
:return: a list of tuples of the of the form (path, mode).
"""
if args.origin and args.source:
getter = git.get_files_matching(
lambda: get_changed_files(args.origin, args.source),
)
elif args.files:
getter = git.get_files_matching(lambda: args.files)
files = [
(path, git.guess_git_type_for_file(path))
for path in args.files
]
getter = git.get_files_matching(lambda: files)
elif args.all_files:
getter = git.get_all_files_matching
elif git.is_in_merge_conflict():
getter = git.get_conflicted_files_matching
else:
getter = git.get_staged_files_matching
return getter(include_expr, exclude_expr)
return getter(include_expr, exclude_expr, types)
def _run_single_hook(hook, repo, args, write, skips=frozenset()):
filenames = get_filenames(args, hook['files'], hook['exclude'])
filenames = get_filenames(
args,
hook['files'],
hook['exclude'],
frozenset(hook['types']) if 'types' in hook else frozenset(['file']),
)
if hook['id'] in skips:
_print_user_skipped(hook, write, args)
return 0

View file

@ -56,6 +56,7 @@ def parse_merge_msg_for_conflicts(merge_msg):
@memoize_by_cwd
def get_conflicted_files():
"""Return a list of file names and types for conflicted files."""
logger.info('Checking merge-conflict files only.')
# Need to get the conflicted files from the MERGE_MSG because they could
# have resolved the conflict by choosing one side or the other
@ -69,32 +70,105 @@ def get_conflicted_files():
merge_diff_filenames = cmd_output(
'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
)[1].splitlines()
return set(merge_conflict_filenames) | set(merge_diff_filenames)
return [
(path, get_git_type_for_file(path))
for path
in set(merge_conflict_filenames) | set(merge_diff_filenames)
]
def get_git_type_for_file(path):
"""Return the git type of a file which is in this git repository.
Because the file is in this git repository, we can use `git ls-files` to
read its type directly.
"""
# TODO: call this function once with a list of paths for more speed?
_, mode = _parse_git_ls_line(
cmd_output('git', 'ls-files', '--stage', '--', path)[1],
)
return mode
def guess_git_type_for_file(path):
"""Return a guessed git type of a file which is not in this git repository.
Because the file isn't in git, we must guess the file type. This is
necessary when using `pre-commit run` or `pre-commit identify` and listing
files (which might not be in a repo).
"""
if os.path.islink(path):
return GIT_MODE_SYMLINK
elif os.path.isfile(path):
# determine if executable
if os.stat(path).st_mode & 0o111:
return GIT_MODE_EXECUTABLE
else:
return GIT_MODE_FILE
elif os.path.isdir(path):
# git doesn't track directories, so if it *is* one, it's a submodule
return GIT_MODE_SUBMODULE
else:
raise ValueError('Unable to determine type of `{0}`'.format(path))
@memoize_by_cwd
def get_staged_files():
return cmd_output('git', 'diff', '--staged', '--name-only')[1].splitlines()
"""Return a list of paths in the repo which have been added/modified."""
return [
(path, get_git_type_for_file(path))
for path
in cmd_output(
'git', 'diff',
'--diff-filter=ACMRTUXB', # all types except D ("Deleted")
'--staged',
'--name-only',
)[1].splitlines()
]
# The output format of the command is:
# [file mode] [object hash] [stage number]\t[file path]
# (We only care about the mode and path.)
_split_git_ls_line_regex = re.compile('^([0-7]{6}) [0-9a-f]{40} [0-9]+\t(.+)$')
def _parse_git_ls_line(line):
"""Split a line of `git ls-files` into a tuple (path, type)."""
match = _split_git_ls_line_regex.match(line)
return match.group(2), int(match.group(1), 8)
@memoize_by_cwd
def get_all_files():
return cmd_output('git', 'ls-files')[1].splitlines()
"""Return a list of all files (and their types) in the repository.
:return: list of (path, type) tuples
"""
return [
_parse_git_ls_line(line)
for line in cmd_output('git', 'ls-files', '--stage')[1].splitlines()
]
def get_files_matching(all_file_list_strategy):
@functools.wraps(all_file_list_strategy)
@memoize_by_cwd
def wrapper(include_expr, exclude_expr):
def wrapper(include_expr, exclude_expr, types):
# TODO: how to avoid this?
from pre_commit.file_classifier.classifier import classify
include_regex = re.compile(include_expr)
exclude_regex = re.compile(exclude_expr)
return set(
filename
for filename in all_file_list_strategy()
for filename, mode in all_file_list_strategy()
if (
include_regex.search(filename) and
not exclude_regex.search(filename) and
os.path.lexists(filename)
os.path.lexists(filename) and
classify(filename, mode).intersection(types)
)
)
return wrapper

View file

@ -58,18 +58,18 @@ def test_cherry_pick_conflict(in_merge_conflict):
def get_files_matching_func():
def get_filenames():
return (
'pre_commit/main.py',
'pre_commit/git.py',
'im_a_file_that_doesnt_exist.py',
'testing/test_symlink',
'hooks.yaml',
('pre_commit/main.py', git.GIT_MODE_FILE),
('pre_commit/git.py', git.GIT_MODE_FILE),
('im_a_file_that_doesnt_exist.py', git.GIT_MODE_FILE),
('testing/test_symlink', git.GIT_MODE_SYMLINK),
('hooks.yaml', git.GIT_MODE_FILE),
)
return git.get_files_matching(get_filenames)
def test_get_files_matching_base(get_files_matching_func):
ret = get_files_matching_func('', '^$')
ret = get_files_matching_func('', '^$', frozenset(['file', 'symlink']))
assert ret == set([
'pre_commit/main.py',
'pre_commit/git.py',
@ -79,7 +79,7 @@ def test_get_files_matching_base(get_files_matching_func):
def test_get_files_matching_total_match(get_files_matching_func):
ret = get_files_matching_func('^.*\\.py$', '^$')
ret = get_files_matching_func('^.*\\.py$', '^$', frozenset(['file']))
assert ret == set([
'pre_commit/main.py',
'pre_commit/git.py',
@ -87,17 +87,17 @@ def test_get_files_matching_total_match(get_files_matching_func):
def test_does_search_instead_of_match(get_files_matching_func):
ret = get_files_matching_func('\\.yaml$', '^$')
ret = get_files_matching_func('\\.yaml$', '^$', frozenset(['file']))
assert ret == set(['hooks.yaml'])
def test_does_not_include_deleted_fileS(get_files_matching_func):
ret = get_files_matching_func('exist.py', '^$')
def test_does_not_include_deleted_files(get_files_matching_func):
ret = get_files_matching_func('exist.py', '^$', frozenset(['file']))
assert ret == set()
def test_exclude_removes_files(get_files_matching_func):
ret = get_files_matching_func('', '\\.py$')
ret = get_files_matching_func('', '\\.py$', frozenset(['file', 'symlink']))
assert ret == set(['hooks.yaml', 'testing/test_symlink'])
@ -114,12 +114,17 @@ def test_get_conflicted_files(in_merge_conflict):
cmd_output('git', 'add', 'other_file')
ret = set(git.get_conflicted_files())
assert ret == set(('conflict_file', 'other_file'))
assert ret == set([
('conflict_file', git.GIT_MODE_FILE),
('other_file', git.GIT_MODE_FILE),
])
def test_get_conflicted_files_in_submodule(in_conflicting_submodule):
resolve_conflict()
assert set(git.get_conflicted_files()) == set(('conflict_file',))
assert set(git.get_conflicted_files()) == set([
('conflict_file', git.GIT_MODE_FILE)],
)
def test_get_conflicted_files_unstaged_files(in_merge_conflict):
@ -132,7 +137,9 @@ def test_get_conflicted_files_unstaged_files(in_merge_conflict):
bar_only_file.write('new contents!\n')
ret = set(git.get_conflicted_files())
assert ret == set(('conflict_file',))
assert ret == set([
('conflict_file', git.GIT_MODE_FILE),
])
MERGE_MSG = "Merge branch 'foo' into bar\n\nConflicts:\n\tconflict_file\n"