Target files by type as well as path regex

This commit is contained in:
Chris Kuehl 2016-01-06 16:52:00 -08:00
parent 9b44f54241
commit 120eecaf89
3 changed files with 122 additions and 23 deletions

View file

@ -56,6 +56,7 @@ def parse_merge_msg_for_conflicts(merge_msg):
@memoize_by_cwd
def get_conflicted_files():
"""Return a list of file names and types for conflicted files."""
logger.info('Checking merge-conflict files only.')
# Need to get the conflicted files from the MERGE_MSG because they could
# have resolved the conflict by choosing one side or the other
@ -69,32 +70,105 @@ def get_conflicted_files():
merge_diff_filenames = cmd_output(
'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
)[1].splitlines()
return set(merge_conflict_filenames) | set(merge_diff_filenames)
return [
(path, get_git_type_for_file(path))
for path
in set(merge_conflict_filenames) | set(merge_diff_filenames)
]
def get_git_type_for_file(path):
"""Return the git type of a file which is in this git repository.
Because the file is in this git repository, we can use `git ls-files` to
read its type directly.
"""
# TODO: call this function once with a list of paths for more speed?
_, mode = _parse_git_ls_line(
cmd_output('git', 'ls-files', '--stage', '--', path)[1],
)
return mode
def guess_git_type_for_file(path):
"""Return a guessed git type of a file which is not in this git repository.
Because the file isn't in git, we must guess the file type. This is
necessary when using `pre-commit run` or `pre-commit identify` and listing
files (which might not be in a repo).
"""
if os.path.islink(path):
return GIT_MODE_SYMLINK
elif os.path.isfile(path):
# determine if executable
if os.stat(path).st_mode & 0o111:
return GIT_MODE_EXECUTABLE
else:
return GIT_MODE_FILE
elif os.path.isdir(path):
# git doesn't track directories, so if it *is* one, it's a submodule
return GIT_MODE_SUBMODULE
else:
raise ValueError('Unable to determine type of `{0}`'.format(path))
@memoize_by_cwd
def get_staged_files():
return cmd_output('git', 'diff', '--staged', '--name-only')[1].splitlines()
"""Return a list of paths in the repo which have been added/modified."""
return [
(path, get_git_type_for_file(path))
for path
in cmd_output(
'git', 'diff',
'--diff-filter=ACMRTUXB', # all types except D ("Deleted")
'--staged',
'--name-only',
)[1].splitlines()
]
# The output format of the command is:
# [file mode] [object hash] [stage number]\t[file path]
# (We only care about the mode and path.)
_split_git_ls_line_regex = re.compile('^([0-7]{6}) [0-9a-f]{40} [0-9]+\t(.+)$')
def _parse_git_ls_line(line):
"""Split a line of `git ls-files` into a tuple (path, type)."""
match = _split_git_ls_line_regex.match(line)
return match.group(2), int(match.group(1), 8)
@memoize_by_cwd
def get_all_files():
return cmd_output('git', 'ls-files')[1].splitlines()
"""Return a list of all files (and their types) in the repository.
:return: list of (path, type) tuples
"""
return [
_parse_git_ls_line(line)
for line in cmd_output('git', 'ls-files', '--stage')[1].splitlines()
]
def get_files_matching(all_file_list_strategy):
@functools.wraps(all_file_list_strategy)
@memoize_by_cwd
def wrapper(include_expr, exclude_expr):
def wrapper(include_expr, exclude_expr, types):
# TODO: how to avoid this?
from pre_commit.file_classifier.classifier import classify
include_regex = re.compile(include_expr)
exclude_regex = re.compile(exclude_expr)
return set(
filename
for filename in all_file_list_strategy()
for filename, mode in all_file_list_strategy()
if (
include_regex.search(filename) and
not exclude_regex.search(filename) and
os.path.lexists(filename)
os.path.lexists(filename) and
classify(filename, mode).intersection(types)
)
)
return wrapper