From fa4c03da655654a9818e3bcb95577a0eac4cf1ef Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Fri, 5 Oct 2018 11:54:31 -0300 Subject: [PATCH] Update xargs.partition with platform information Change how xargs.partition computes the command length (including arguments) depending on the plataform. More specifically, 'win32' uses the amount of characters while posix system uses the byte count. --- pre_commit/xargs.py | 29 +++++++++++++++++++++-------- tests/xargs_test.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index eea3acdb..1b237a38 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -1,41 +1,54 @@ from __future__ import absolute_import from __future__ import unicode_literals +import sys + from pre_commit import parse_shebang from pre_commit.util import cmd_output -# Limit used previously to avoid "xargs ... Bad file number" on windows -# This is slightly less than the posix mandated minimum -MAX_LENGTH = 4000 +# TODO: properly compute max_length value +def _get_platform_max_length(): + # posix minimum + return 4 * 1024 + + +def _get_command_length(command, arg): + parts = command + (arg,) + full_cmd = ' '.join(parts) + + # win32 uses the amount of characters, more details at: + # https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553/ + if sys.platform == 'win32': + return len(full_cmd) + + return len(full_cmd.encode(sys.getdefaultencoding())) class ArgumentTooLongError(RuntimeError): pass -def partition(cmd, varargs, _max_length=MAX_LENGTH): +def partition(cmd, varargs, _max_length=None): + _max_length = _max_length or _get_platform_max_length() cmd = tuple(cmd) ret = [] ret_cmd = [] - total_len = len(' '.join(cmd)) # Reversed so arguments are in order varargs = list(reversed(varargs)) while varargs: arg = varargs.pop() - if total_len + 1 + len(arg) <= _max_length: + if _get_command_length(cmd + tuple(ret_cmd), arg) <= _max_length: ret_cmd.append(arg) - total_len += len(arg) elif not ret_cmd: raise ArgumentTooLongError(arg) else: # We've exceeded the length, yield a command ret.append(cmd + tuple(ret_cmd)) ret_cmd = [] - total_len = len(' '.join(cmd)) varargs.append(arg) ret.append(cmd + tuple(ret_cmd)) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 529eb197..84d4899b 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -1,11 +1,29 @@ from __future__ import absolute_import from __future__ import unicode_literals +from unittest import mock + import pytest from pre_commit import xargs +@pytest.fixture +def sys_win32_mock(): + return mock.Mock( + platform='win32', + getdefaultencoding=mock.Mock(return_value='utf-8'), + ) + + +@pytest.fixture +def sys_linux_mock(): + return mock.Mock( + platform='linux', + getdefaultencoding=mock.Mock(return_value='utf-8'), + ) + + def test_partition_trivial(): assert xargs.partition(('cmd',), ()) == (('cmd',),) @@ -35,6 +53,32 @@ def test_partition_limits(): ) +def test_partition_limit_win32(sys_win32_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) + with mock.patch('pre_commit.xargs.sys', sys_win32_mock): + ret = xargs.partition(cmd, varargs, _max_length=20) + + assert ret == (cmd + varargs,) + + +def test_partition_limit_linux(sys_linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 5,) + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + ret = xargs.partition(cmd, varargs, _max_length=30) + + assert ret == (cmd + varargs,) + + +def test_argument_too_long_with_large_unicode(sys_linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) # 4 bytes * 10 + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + with pytest.raises(xargs.ArgumentTooLongError): + xargs.partition(cmd, varargs, _max_length=20) + + def test_argument_too_long(): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10)