mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-04-15 18:11:48 +04:00
180 lines
5.5 KiB
Python
180 lines
5.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
from __future__ import unicode_literals
|
|
|
|
import functools
|
|
import logging
|
|
import os
|
|
import os.path
|
|
import re
|
|
|
|
from pre_commit.errors import FatalError
|
|
from pre_commit.util import CalledProcessError
|
|
from pre_commit.util import cmd_output
|
|
from pre_commit.util import memoize_by_cwd
|
|
|
|
GIT_MODE_FILE = 0o100644
|
|
GIT_MODE_EXECUTABLE = 0o100755
|
|
GIT_MODE_SYMLINK = 0o120000
|
|
GIT_MODE_SUBMODULE = 0o160000
|
|
|
|
|
|
logger = logging.getLogger('pre_commit')
|
|
|
|
|
|
def get_root():
|
|
try:
|
|
return cmd_output('git', 'rev-parse', '--show-toplevel')[1].strip()
|
|
except CalledProcessError:
|
|
raise FatalError(
|
|
'Called from outside of the gits. Please cd to a git repository.'
|
|
)
|
|
|
|
|
|
def get_git_dir(git_root):
|
|
return os.path.normpath(os.path.join(
|
|
git_root,
|
|
cmd_output('git', 'rev-parse', '--git-dir', cwd=git_root)[1].strip(),
|
|
))
|
|
|
|
|
|
def is_in_merge_conflict():
|
|
git_dir = get_git_dir('.')
|
|
return (
|
|
os.path.exists(os.path.join(git_dir, 'MERGE_MSG')) and
|
|
os.path.exists(os.path.join(git_dir, 'MERGE_HEAD'))
|
|
)
|
|
|
|
|
|
def parse_merge_msg_for_conflicts(merge_msg):
|
|
# Conflicted files start with tabs
|
|
return [
|
|
line.lstrip('#').strip()
|
|
for line in merge_msg.splitlines()
|
|
# '#\t' for git 2.4.1
|
|
if line.startswith(('\t', '#\t'))
|
|
]
|
|
|
|
|
|
@memoize_by_cwd
|
|
def get_conflicted_files():
|
|
"""Return a list of file names and types for conflicted files."""
|
|
logger.info('Checking merge-conflict files only.')
|
|
# Need to get the conflicted files from the MERGE_MSG because they could
|
|
# have resolved the conflict by choosing one side or the other
|
|
merge_msg = open(os.path.join(get_git_dir('.'), 'MERGE_MSG')).read()
|
|
merge_conflict_filenames = parse_merge_msg_for_conflicts(merge_msg)
|
|
|
|
# This will get the rest of the changes made after the merge.
|
|
# If they resolved the merge conflict by choosing a mesh of both sides
|
|
# this will also include the conflicted files
|
|
tree_hash = cmd_output('git', 'write-tree')[1].strip()
|
|
merge_diff_filenames = cmd_output(
|
|
'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
|
|
)[1].splitlines()
|
|
|
|
return [
|
|
(path, get_git_type_for_file(path))
|
|
for path
|
|
in set(merge_conflict_filenames) | set(merge_diff_filenames)
|
|
]
|
|
|
|
|
|
def get_git_type_for_file(path):
|
|
"""Return the git type of a file which is in this git repository.
|
|
|
|
Because the file is in this git repository, we can use `git ls-files` to
|
|
read its type directly.
|
|
"""
|
|
# TODO: call this function once with a list of paths for more speed?
|
|
_, mode = _parse_git_ls_line(
|
|
cmd_output('git', 'ls-files', '--stage', '--', path)[1],
|
|
)
|
|
return mode
|
|
|
|
|
|
def guess_git_type_for_file(path):
|
|
"""Return a guessed git type of a file which is not in this git repository.
|
|
|
|
Because the file isn't in git, we must guess the file type. This is
|
|
necessary when using `pre-commit run` or `pre-commit identify` and listing
|
|
files (which might not be in a repo).
|
|
"""
|
|
if os.path.islink(path):
|
|
return GIT_MODE_SYMLINK
|
|
elif os.path.isfile(path):
|
|
# determine if executable
|
|
if os.access(path, os.X_OK):
|
|
return GIT_MODE_EXECUTABLE
|
|
else:
|
|
return GIT_MODE_FILE
|
|
elif os.path.isdir(path):
|
|
# git doesn't track directories, so if it *is* one, it's a submodule
|
|
return GIT_MODE_SUBMODULE
|
|
else:
|
|
raise ValueError('Unable to determine type of `{0}`'.format(path))
|
|
|
|
|
|
@memoize_by_cwd
|
|
def get_staged_files():
|
|
"""Return a list of paths in the repo which have been added/modified."""
|
|
return [
|
|
(path, get_git_type_for_file(path))
|
|
for path
|
|
in cmd_output(
|
|
'git', 'diff',
|
|
'--diff-filter=ACMRTUXB', # all types except D ("Deleted")
|
|
'--staged',
|
|
'--name-only',
|
|
)[1].splitlines()
|
|
]
|
|
|
|
|
|
# The output format of the command is:
|
|
# [file mode] [object hash] [stage number]\t[file path]
|
|
# (We only care about the mode and path.)
|
|
_split_git_ls_line_regex = re.compile('^([0-7]{6}) [0-9a-f]{40} [0-9]+\t(.+)$')
|
|
|
|
|
|
def _parse_git_ls_line(line):
|
|
"""Split a line of `git ls-files` into a tuple (path, type)."""
|
|
match = _split_git_ls_line_regex.match(line)
|
|
return match.group(2), int(match.group(1), 8)
|
|
|
|
|
|
@memoize_by_cwd
|
|
def get_all_files():
|
|
"""Return a list of all files (and their types) in the repository.
|
|
|
|
:return: list of (path, type) tuples
|
|
"""
|
|
return [
|
|
_parse_git_ls_line(line)
|
|
for line in cmd_output('git', 'ls-files', '--stage')[1].splitlines()
|
|
]
|
|
|
|
|
|
def get_files_matching(all_file_list_strategy):
|
|
@functools.wraps(all_file_list_strategy)
|
|
@memoize_by_cwd
|
|
def wrapper(include_expr, exclude_expr, types):
|
|
# TODO: how to avoid this?
|
|
from pre_commit.file_classifier.classifier import classify
|
|
|
|
include_regex = re.compile(include_expr)
|
|
exclude_regex = re.compile(exclude_expr)
|
|
return set(
|
|
filename
|
|
for filename, mode in all_file_list_strategy()
|
|
if (
|
|
include_regex.search(filename) and
|
|
not exclude_regex.search(filename) and
|
|
os.path.lexists(filename) and
|
|
classify(filename, mode).intersection(types)
|
|
)
|
|
)
|
|
return wrapper
|
|
|
|
|
|
get_staged_files_matching = get_files_matching(get_staged_files)
|
|
get_all_files_matching = get_files_matching(get_all_files)
|
|
get_conflicted_files_matching = get_files_matching(get_conflicted_files)
|