diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index eea3acdb..2fe8a454 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -1,41 +1,63 @@ from __future__ import absolute_import from __future__ import unicode_literals +import sys + +import six + from pre_commit import parse_shebang from pre_commit.util import cmd_output -# Limit used previously to avoid "xargs ... Bad file number" on windows -# This is slightly less than the posix mandated minimum -MAX_LENGTH = 4000 +# TODO: properly compute max_length value +def _get_platform_max_length(): + # posix minimum + return 4 * 1024 + + +def _command_length(*cmd): + full_cmd = ' '.join(cmd) + + # win32 uses the amount of characters, more details at: + # https://github.com/pre-commit/pre-commit/pull/839 + if sys.platform == 'win32': + # the python2.x apis require bytes, we encode as UTF-8 + if six.PY2: + return len(full_cmd.encode('utf-8')) + else: + return len(full_cmd.encode('utf-16le')) // 2 + else: + return len(full_cmd.encode(sys.getfilesystemencoding())) class ArgumentTooLongError(RuntimeError): pass -def partition(cmd, varargs, _max_length=MAX_LENGTH): +def partition(cmd, varargs, _max_length=None): + _max_length = _max_length or _get_platform_max_length() cmd = tuple(cmd) ret = [] ret_cmd = [] - total_len = len(' '.join(cmd)) # Reversed so arguments are in order varargs = list(reversed(varargs)) + total_length = _command_length(*cmd) while varargs: arg = varargs.pop() - if total_len + 1 + len(arg) <= _max_length: + arg_length = _command_length(arg) + 1 + if total_length + arg_length <= _max_length: ret_cmd.append(arg) - total_len += len(arg) + total_length += arg_length elif not ret_cmd: raise ArgumentTooLongError(arg) else: # We've exceeded the length, yield a command ret.append(cmd + tuple(ret_cmd)) ret_cmd = [] - total_len = len(' '.join(cmd)) + total_length = _command_length(*cmd) varargs.append(arg) ret.append(cmd + tuple(ret_cmd)) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 529eb197..bf685e16 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -1,11 +1,39 @@ +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import unicode_literals +import sys + +import mock import pytest +import six from pre_commit import xargs +@pytest.fixture +def win32_py2_mock(): + with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): + with mock.patch.object(sys, 'platform', 'win32'): + with mock.patch.object(six, 'PY2', True): + yield + + +@pytest.fixture +def win32_py3_mock(): + with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): + with mock.patch.object(sys, 'platform', 'win32'): + with mock.patch.object(six, 'PY2', False): + yield + + +@pytest.fixture +def linux_mock(): + with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): + with mock.patch.object(sys, 'platform', 'linux'): + yield + + def test_partition_trivial(): assert xargs.partition(('cmd',), ()) == (('cmd',),) @@ -35,6 +63,35 @@ def test_partition_limits(): ) +def test_partition_limit_win32_py3(win32_py3_mock): + cmd = ('ninechars',) + # counted as half because of utf-16 encode + varargs = ('😑' * 5,) + ret = xargs.partition(cmd, varargs, _max_length=20) + assert ret == (cmd + varargs,) + + +def test_partition_limit_win32_py2(win32_py2_mock): + cmd = ('ninechars',) + varargs = ('😑' * 5,) # 4 bytes * 5 + ret = xargs.partition(cmd, varargs, _max_length=30) + assert ret == (cmd + varargs,) + + +def test_partition_limit_linux(linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 5,) + ret = xargs.partition(cmd, varargs, _max_length=30) + assert ret == (cmd + varargs,) + + +def test_argument_too_long_with_large_unicode(linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) # 4 bytes * 10 + with pytest.raises(xargs.ArgumentTooLongError): + xargs.partition(cmd, varargs, _max_length=20) + + def test_argument_too_long(): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10)