mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-04-15 01:51:46 +04:00
Target files by type as well as path regex
This commit is contained in:
parent
a4e9394cc7
commit
81b66cda74
3 changed files with 118 additions and 24 deletions
|
|
@ -55,24 +55,37 @@ def get_changed_files(new, old):
|
||||||
)[1].splitlines()
|
)[1].splitlines()
|
||||||
|
|
||||||
|
|
||||||
def get_filenames(args, include_expr, exclude_expr):
|
def get_filenames(args, include_expr, exclude_expr, types):
|
||||||
|
"""Return a list of file names and modes to consider for this run.
|
||||||
|
|
||||||
|
:return: a list of tuples of the of the form (path, mode).
|
||||||
|
"""
|
||||||
if args.origin and args.source:
|
if args.origin and args.source:
|
||||||
getter = git.get_files_matching(
|
getter = git.get_files_matching(
|
||||||
lambda: get_changed_files(args.origin, args.source),
|
lambda: get_changed_files(args.origin, args.source),
|
||||||
)
|
)
|
||||||
elif args.files:
|
elif args.files:
|
||||||
getter = git.get_files_matching(lambda: args.files)
|
files = [
|
||||||
|
(path, git.guess_git_type_for_file(path))
|
||||||
|
for path in args.files
|
||||||
|
]
|
||||||
|
getter = git.get_files_matching(lambda: files)
|
||||||
elif args.all_files:
|
elif args.all_files:
|
||||||
getter = git.get_all_files_matching
|
getter = git.get_all_files_matching
|
||||||
elif git.is_in_merge_conflict():
|
elif git.is_in_merge_conflict():
|
||||||
getter = git.get_conflicted_files_matching
|
getter = git.get_conflicted_files_matching
|
||||||
else:
|
else:
|
||||||
getter = git.get_staged_files_matching
|
getter = git.get_staged_files_matching
|
||||||
return getter(include_expr, exclude_expr)
|
return getter(include_expr, exclude_expr, types)
|
||||||
|
|
||||||
|
|
||||||
def _run_single_hook(hook, repo, args, write, skips=frozenset()):
|
def _run_single_hook(hook, repo, args, write, skips=frozenset()):
|
||||||
filenames = get_filenames(args, hook['files'], hook['exclude'])
|
filenames = get_filenames(
|
||||||
|
args,
|
||||||
|
hook['files'],
|
||||||
|
hook['exclude'],
|
||||||
|
frozenset(hook['types']) if 'types' in hook else frozenset(['file']),
|
||||||
|
)
|
||||||
if hook['id'] in skips:
|
if hook['id'] in skips:
|
||||||
_print_user_skipped(hook, write, args)
|
_print_user_skipped(hook, write, args)
|
||||||
return 0
|
return 0
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,7 @@ def parse_merge_msg_for_conflicts(merge_msg):
|
||||||
|
|
||||||
@memoize_by_cwd
|
@memoize_by_cwd
|
||||||
def get_conflicted_files():
|
def get_conflicted_files():
|
||||||
|
"""Return a list of file names and types for conflicted files."""
|
||||||
logger.info('Checking merge-conflict files only.')
|
logger.info('Checking merge-conflict files only.')
|
||||||
# Need to get the conflicted files from the MERGE_MSG because they could
|
# Need to get the conflicted files from the MERGE_MSG because they could
|
||||||
# have resolved the conflict by choosing one side or the other
|
# have resolved the conflict by choosing one side or the other
|
||||||
|
|
@ -69,32 +70,105 @@ def get_conflicted_files():
|
||||||
merge_diff_filenames = cmd_output(
|
merge_diff_filenames = cmd_output(
|
||||||
'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
|
'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
|
||||||
)[1].splitlines()
|
)[1].splitlines()
|
||||||
return set(merge_conflict_filenames) | set(merge_diff_filenames)
|
|
||||||
|
return [
|
||||||
|
(path, get_git_type_for_file(path))
|
||||||
|
for path
|
||||||
|
in set(merge_conflict_filenames) | set(merge_diff_filenames)
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def get_git_type_for_file(path):
|
||||||
|
"""Return the git type of a file which is in this git repository.
|
||||||
|
|
||||||
|
Because the file is in this git repository, we can use `git ls-files` to
|
||||||
|
read its type directly.
|
||||||
|
"""
|
||||||
|
# TODO: call this function once with a list of paths for more speed?
|
||||||
|
_, mode = _parse_git_ls_line(
|
||||||
|
cmd_output('git', 'ls-files', '--stage', '--', path)[1],
|
||||||
|
)
|
||||||
|
return mode
|
||||||
|
|
||||||
|
|
||||||
|
def guess_git_type_for_file(path):
|
||||||
|
"""Return a guessed git type of a file which is not in this git repository.
|
||||||
|
|
||||||
|
Because the file isn't in git, we must guess the file type. This is
|
||||||
|
necessary when using `pre-commit run` or `pre-commit identify` and listing
|
||||||
|
files (which might not be in a repo).
|
||||||
|
"""
|
||||||
|
if os.path.islink(path):
|
||||||
|
return GIT_MODE_SYMLINK
|
||||||
|
elif os.path.isfile(path):
|
||||||
|
# determine if executable
|
||||||
|
if os.stat(path).st_mode & 0o111:
|
||||||
|
return GIT_MODE_EXECUTABLE
|
||||||
|
else:
|
||||||
|
return GIT_MODE_FILE
|
||||||
|
elif os.path.isdir(path):
|
||||||
|
# git doesn't track directories, so if it *is* one, it's a submodule
|
||||||
|
return GIT_MODE_SUBMODULE
|
||||||
|
else:
|
||||||
|
raise ValueError('Unable to determine type of `{0}`'.format(path))
|
||||||
|
|
||||||
|
|
||||||
@memoize_by_cwd
|
@memoize_by_cwd
|
||||||
def get_staged_files():
|
def get_staged_files():
|
||||||
return cmd_output('git', 'diff', '--staged', '--name-only')[1].splitlines()
|
"""Return a list of paths in the repo which have been added/modified."""
|
||||||
|
return [
|
||||||
|
(path, get_git_type_for_file(path))
|
||||||
|
for path
|
||||||
|
in cmd_output(
|
||||||
|
'git', 'diff',
|
||||||
|
'--diff-filter=ACMRTUXB', # all types except D ("Deleted")
|
||||||
|
'--staged',
|
||||||
|
'--name-only',
|
||||||
|
)[1].splitlines()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# The output format of the command is:
|
||||||
|
# [file mode] [object hash] [stage number]\t[file path]
|
||||||
|
# (We only care about the mode and path.)
|
||||||
|
_split_git_ls_line_regex = re.compile('^([0-7]{6}) [0-9a-f]{40} [0-9]+\t(.+)$')
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_git_ls_line(line):
|
||||||
|
"""Split a line of `git ls-files` into a tuple (path, type)."""
|
||||||
|
match = _split_git_ls_line_regex.match(line)
|
||||||
|
return match.group(2), int(match.group(1), 8)
|
||||||
|
|
||||||
|
|
||||||
@memoize_by_cwd
|
@memoize_by_cwd
|
||||||
def get_all_files():
|
def get_all_files():
|
||||||
return cmd_output('git', 'ls-files')[1].splitlines()
|
"""Return a list of all files (and their types) in the repository.
|
||||||
|
|
||||||
|
:return: list of (path, type) tuples
|
||||||
|
"""
|
||||||
|
return [
|
||||||
|
_parse_git_ls_line(line)
|
||||||
|
for line in cmd_output('git', 'ls-files', '--stage')[1].splitlines()
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def get_files_matching(all_file_list_strategy):
|
def get_files_matching(all_file_list_strategy):
|
||||||
@functools.wraps(all_file_list_strategy)
|
@functools.wraps(all_file_list_strategy)
|
||||||
@memoize_by_cwd
|
@memoize_by_cwd
|
||||||
def wrapper(include_expr, exclude_expr):
|
def wrapper(include_expr, exclude_expr, types):
|
||||||
|
# TODO: how to avoid this?
|
||||||
|
from pre_commit.file_classifier.classifier import classify
|
||||||
|
|
||||||
include_regex = re.compile(include_expr)
|
include_regex = re.compile(include_expr)
|
||||||
exclude_regex = re.compile(exclude_expr)
|
exclude_regex = re.compile(exclude_expr)
|
||||||
return set(
|
return set(
|
||||||
filename
|
filename
|
||||||
for filename in all_file_list_strategy()
|
for filename, mode in all_file_list_strategy()
|
||||||
if (
|
if (
|
||||||
include_regex.search(filename) and
|
include_regex.search(filename) and
|
||||||
not exclude_regex.search(filename) and
|
not exclude_regex.search(filename) and
|
||||||
os.path.lexists(filename)
|
os.path.lexists(filename) and
|
||||||
|
classify(filename, mode).intersection(types)
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
|
||||||
|
|
@ -58,18 +58,18 @@ def test_cherry_pick_conflict(in_merge_conflict):
|
||||||
def get_files_matching_func():
|
def get_files_matching_func():
|
||||||
def get_filenames():
|
def get_filenames():
|
||||||
return (
|
return (
|
||||||
'pre_commit/main.py',
|
('pre_commit/main.py', git.GIT_MODE_FILE),
|
||||||
'pre_commit/git.py',
|
('pre_commit/git.py', git.GIT_MODE_FILE),
|
||||||
'im_a_file_that_doesnt_exist.py',
|
('im_a_file_that_doesnt_exist.py', git.GIT_MODE_FILE),
|
||||||
'testing/test_symlink',
|
('testing/test_symlink', git.GIT_MODE_SYMLINK),
|
||||||
'hooks.yaml',
|
('hooks.yaml', git.GIT_MODE_FILE),
|
||||||
)
|
)
|
||||||
|
|
||||||
return git.get_files_matching(get_filenames)
|
return git.get_files_matching(get_filenames)
|
||||||
|
|
||||||
|
|
||||||
def test_get_files_matching_base(get_files_matching_func):
|
def test_get_files_matching_base(get_files_matching_func):
|
||||||
ret = get_files_matching_func('', '^$')
|
ret = get_files_matching_func('', '^$', frozenset(['file', 'symlink']))
|
||||||
assert ret == set([
|
assert ret == set([
|
||||||
'pre_commit/main.py',
|
'pre_commit/main.py',
|
||||||
'pre_commit/git.py',
|
'pre_commit/git.py',
|
||||||
|
|
@ -79,7 +79,7 @@ def test_get_files_matching_base(get_files_matching_func):
|
||||||
|
|
||||||
|
|
||||||
def test_get_files_matching_total_match(get_files_matching_func):
|
def test_get_files_matching_total_match(get_files_matching_func):
|
||||||
ret = get_files_matching_func('^.*\\.py$', '^$')
|
ret = get_files_matching_func('^.*\\.py$', '^$', frozenset(['file']))
|
||||||
assert ret == set([
|
assert ret == set([
|
||||||
'pre_commit/main.py',
|
'pre_commit/main.py',
|
||||||
'pre_commit/git.py',
|
'pre_commit/git.py',
|
||||||
|
|
@ -87,17 +87,17 @@ def test_get_files_matching_total_match(get_files_matching_func):
|
||||||
|
|
||||||
|
|
||||||
def test_does_search_instead_of_match(get_files_matching_func):
|
def test_does_search_instead_of_match(get_files_matching_func):
|
||||||
ret = get_files_matching_func('\\.yaml$', '^$')
|
ret = get_files_matching_func('\\.yaml$', '^$', frozenset(['file']))
|
||||||
assert ret == set(['hooks.yaml'])
|
assert ret == set(['hooks.yaml'])
|
||||||
|
|
||||||
|
|
||||||
def test_does_not_include_deleted_fileS(get_files_matching_func):
|
def test_does_not_include_deleted_files(get_files_matching_func):
|
||||||
ret = get_files_matching_func('exist.py', '^$')
|
ret = get_files_matching_func('exist.py', '^$', frozenset(['file']))
|
||||||
assert ret == set()
|
assert ret == set()
|
||||||
|
|
||||||
|
|
||||||
def test_exclude_removes_files(get_files_matching_func):
|
def test_exclude_removes_files(get_files_matching_func):
|
||||||
ret = get_files_matching_func('', '\\.py$')
|
ret = get_files_matching_func('', '\\.py$', frozenset(['file', 'symlink']))
|
||||||
assert ret == set(['hooks.yaml', 'testing/test_symlink'])
|
assert ret == set(['hooks.yaml', 'testing/test_symlink'])
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -114,12 +114,17 @@ def test_get_conflicted_files(in_merge_conflict):
|
||||||
cmd_output('git', 'add', 'other_file')
|
cmd_output('git', 'add', 'other_file')
|
||||||
|
|
||||||
ret = set(git.get_conflicted_files())
|
ret = set(git.get_conflicted_files())
|
||||||
assert ret == set(('conflict_file', 'other_file'))
|
assert ret == set([
|
||||||
|
('conflict_file', git.GIT_MODE_FILE),
|
||||||
|
('other_file', git.GIT_MODE_FILE),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
def test_get_conflicted_files_in_submodule(in_conflicting_submodule):
|
def test_get_conflicted_files_in_submodule(in_conflicting_submodule):
|
||||||
resolve_conflict()
|
resolve_conflict()
|
||||||
assert set(git.get_conflicted_files()) == set(('conflict_file',))
|
assert set(git.get_conflicted_files()) == set([
|
||||||
|
('conflict_file', git.GIT_MODE_FILE)],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_get_conflicted_files_unstaged_files(in_merge_conflict):
|
def test_get_conflicted_files_unstaged_files(in_merge_conflict):
|
||||||
|
|
@ -132,7 +137,9 @@ def test_get_conflicted_files_unstaged_files(in_merge_conflict):
|
||||||
bar_only_file.write('new contents!\n')
|
bar_only_file.write('new contents!\n')
|
||||||
|
|
||||||
ret = set(git.get_conflicted_files())
|
ret = set(git.get_conflicted_files())
|
||||||
assert ret == set(('conflict_file',))
|
assert ret == set([
|
||||||
|
('conflict_file', git.GIT_MODE_FILE),
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
MERGE_MSG = "Merge branch 'foo' into bar\n\nConflicts:\n\tconflict_file\n"
|
MERGE_MSG = "Merge branch 'foo' into bar\n\nConflicts:\n\tconflict_file\n"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue