pre-commit/pre_commit/git.py
2015-07-29 01:54:54 -04:00

121 lines
3.7 KiB
Python

from __future__ import unicode_literals
import functools
import logging
import os
import os.path
import re
from pre_commit.errors import FatalError
from pre_commit.util import cmd_output
from pre_commit.util import memoize_by_cwd
# octal constants for git file modes
GIT_MODE_FILE = 0o100644
GIT_MODE_EXECUTABLE = 0o100755
GIT_MODE_SYMLINK = 0o120000
GIT_MODE_SUBMODULE = 0o160000
logger = logging.getLogger('pre_commit')
def get_root():
path = os.getcwd()
while path != os.path.normpath(os.path.join(path, '../')):
if os.path.exists(os.path.join(path, '.git')):
return path
else:
path = os.path.normpath(os.path.join(path, '../'))
raise FatalError(
'Called from outside of the gits. '
'Please cd to a git repository.'
)
def is_in_merge_conflict():
return (
os.path.exists(os.path.join('.git', 'MERGE_MSG')) and
os.path.exists(os.path.join('.git', 'MERGE_HEAD'))
)
def parse_merge_msg_for_conflicts(merge_msg):
# Conflicted files start with tabs
return [
line.lstrip('#').strip()
for line in merge_msg.splitlines()
# '#\t' for git 2.4.1
if line.startswith(('\t', '#\t'))
]
@memoize_by_cwd
def get_conflicted_files():
logger.info('Checking merge-conflict files only.')
# Need to get the conflicted files from the MERGE_MSG because they could
# have resolved the conflict by choosing one side or the other
merge_msg = open(os.path.join('.git', 'MERGE_MSG')).read()
merge_conflict_filenames = parse_merge_msg_for_conflicts(merge_msg)
# This will get the rest of the changes made after the merge.
# If they resolved the merge conflict by choosing a mesh of both sides
# this will also include the conflicted files
tree_hash = cmd_output('git', 'write-tree')[1].strip()
merge_diff_filenames = cmd_output(
'git', 'diff', '-m', tree_hash, 'HEAD', 'MERGE_HEAD', '--name-only',
)[1].splitlines()
return set(merge_conflict_filenames) | set(merge_diff_filenames)
@memoize_by_cwd
def get_staged_files():
return cmd_output('git', 'diff', '--staged', '--name-only')[1].splitlines()
@memoize_by_cwd
def get_all_files():
"""Return a list of all actual files in the git repository.
There are some types of content we want to exclude. In order to exclude
submodules, which git tracks similarly to files, we call `git ls-files
--stage` and grep out entries with the special submodule file mode.
http://stackoverflow.com/a/24122304
"""
# The output format of the command is:
# [file mode] [object hash] [stage number]\t[file path]
split_regex = re.compile('^([0-7]{6}) [0-9a-f]{40} [0-9]+\t(.+)$')
def split(line):
match = split_regex.match(line)
return int(match.group(1), 8), match.group(2)
output = cmd_output('git', 'ls-files', '--stage')[1]
return [
path for mode, path in [
split(line) for line in output.splitlines()
] if mode != GIT_MODE_SUBMODULE
]
def get_files_matching(all_file_list_strategy):
@functools.wraps(all_file_list_strategy)
@memoize_by_cwd
def wrapper(include_expr, exclude_expr):
include_regex = re.compile(include_expr)
exclude_regex = re.compile(exclude_expr)
return set(
filename
for filename in all_file_list_strategy()
if (
include_regex.search(filename) and
not exclude_regex.search(filename) and
os.path.exists(filename)
)
)
return wrapper
get_staged_files_matching = get_files_matching(get_staged_files)
get_all_files_matching = get_files_matching(get_all_files)
get_conflicted_files_matching = get_files_matching(get_conflicted_files)