Merge pull request #131 from pre-commit/non_utf8_diff

Treat diffs as maybe-not-utf8.
This commit is contained in:
Anthony Sottile 2014-06-23 07:50:34 -07:00
commit d02f41920f
3 changed files with 32 additions and 13 deletions

View file

@ -56,24 +56,24 @@ class PrefixedCommandRunner(object):
if not os.path.exists(self.prefix_dir): if not os.path.exists(self.prefix_dir):
self.__makedirs(self.prefix_dir) self.__makedirs(self.prefix_dir)
def run(self, cmd, retcode=0, stdin=None, **kwargs): def run(self, cmd, retcode=0, stdin=None, encoding='UTF-8', **kwargs):
popen_kwargs = { popen_kwargs = {
'stdin': subprocess.PIPE, 'stdin': subprocess.PIPE,
'stdout': subprocess.PIPE, 'stdout': subprocess.PIPE,
'stderr': subprocess.PIPE, 'stderr': subprocess.PIPE,
} }
if stdin is not None: if stdin is not None:
stdin = stdin.encode('utf-8') stdin = stdin.encode('UTF-8')
popen_kwargs.update(kwargs) popen_kwargs.update(kwargs)
self._create_path_if_not_exists() self._create_path_if_not_exists()
replaced_cmd = _replace_cmd(cmd, prefix=self.prefix_dir) replaced_cmd = _replace_cmd(cmd, prefix=self.prefix_dir)
proc = self.__popen(replaced_cmd, **popen_kwargs) proc = self.__popen(replaced_cmd, **popen_kwargs)
stdout, stderr = proc.communicate(stdin) stdout, stderr = proc.communicate(stdin)
if isinstance(stdout, bytes): if encoding is not None:
stdout = stdout.decode('UTF-8') stdout = stdout.decode(encoding)
if isinstance(stderr, bytes): if encoding is not None:
stderr = stderr.decode('UTF-8') stderr = stderr.decode(encoding)
returncode = proc.returncode returncode = proc.returncode
if retcode is not None and retcode != returncode: if retcode is not None and retcode != returncode:

View file

@ -20,19 +20,20 @@ def staged_files_only(cmd_runner):
cmd_runner - PrefixedCommandRunner cmd_runner - PrefixedCommandRunner
""" """
# Determine if there are unstaged files # Determine if there are unstaged files
retcode, diff_stdout, _ = cmd_runner.run( retcode, diff_stdout_binary, _ = cmd_runner.run(
['git', 'diff', '--ignore-submodules', '--binary', '--exit-code'], ['git', 'diff', '--ignore-submodules', '--binary', '--exit-code'],
retcode=None, retcode=None,
encoding=None,
) )
if retcode and diff_stdout.strip(): if retcode and diff_stdout_binary.strip():
patch_filename = cmd_runner.path('patch{0}'.format(int(time.time()))) patch_filename = cmd_runner.path('patch{0}'.format(int(time.time())))
logger.warning('Unstaged files detected.') logger.warning('Unstaged files detected.')
logger.info( logger.info(
'Stashing unstaged files to {0}.'.format(patch_filename), 'Stashing unstaged files to {0}.'.format(patch_filename),
) )
# Save the current unstaged changes as a patch # Save the current unstaged changes as a patch
with io.open(patch_filename, 'w', encoding='utf-8') as patch_file: with io.open(patch_filename, 'wb') as patch_file:
patch_file.write(diff_stdout) patch_file.write(diff_stdout_binary)
# Clear the working directory of unstaged changes # Clear the working directory of unstaged changes
cmd_runner.run(['git', 'checkout', '--', '.']) cmd_runner.run(['git', 'checkout', '--', '.'])

View file

@ -1,3 +1,4 @@
# -*- coding: UTF-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import unicode_literals from __future__ import unicode_literals
@ -34,9 +35,14 @@ def foo_staged(tmpdir_factory):
yield auto_namedtuple(path=path, foo_filename=foo_filename) yield auto_namedtuple(path=path, foo_filename=foo_filename)
def _test_foo_state(path, foo_contents=FOO_CONTENTS, status='A'): def _test_foo_state(
path,
foo_contents=FOO_CONTENTS,
status='A',
encoding='UTF-8',
):
assert os.path.exists(path.foo_filename) assert os.path.exists(path.foo_filename)
assert io.open(path.foo_filename, encoding='utf-8').read() == foo_contents assert io.open(path.foo_filename, encoding=encoding).read() == foo_contents
actual_status = get_short_git_status()['foo'] actual_status = get_short_git_status()['foo']
assert status == actual_status assert status == actual_status
@ -246,10 +252,22 @@ def test_diff_returns_1_no_diff_though(fake_logging_handler, foo_staged):
def test_stage_utf8_changes(foo_staged, cmd_runner): def test_stage_utf8_changes(foo_staged, cmd_runner):
contents = '\u2603' contents = '\u2603'
with io.open('foo', 'w', encoding='utf-8') as foo_file: with io.open('foo', 'w', encoding='UTF-8') as foo_file:
foo_file.write(contents) foo_file.write(contents)
_test_foo_state(foo_staged, contents, 'AM') _test_foo_state(foo_staged, contents, 'AM')
with staged_files_only(cmd_runner): with staged_files_only(cmd_runner):
_test_foo_state(foo_staged) _test_foo_state(foo_staged)
_test_foo_state(foo_staged, contents, 'AM') _test_foo_state(foo_staged, contents, 'AM')
def test_stage_non_utf8_changes(foo_staged, cmd_runner):
contents = 'ú'
# Produce a latin-1 diff
with io.open('foo', 'w', encoding='latin-1') as foo_file:
foo_file.write(contents)
_test_foo_state(foo_staged, contents, 'AM', encoding='latin-1')
with staged_files_only(cmd_runner):
_test_foo_state(foo_staged)
_test_foo_state(foo_staged, contents, 'AM', encoding='latin-1')