From fa4c03da655654a9818e3bcb95577a0eac4cf1ef Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Fri, 5 Oct 2018 11:54:31 -0300 Subject: [PATCH 1/9] Update xargs.partition with platform information Change how xargs.partition computes the command length (including arguments) depending on the plataform. More specifically, 'win32' uses the amount of characters while posix system uses the byte count. --- pre_commit/xargs.py | 29 +++++++++++++++++++++-------- tests/xargs_test.py | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 8 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index eea3acdb..1b237a38 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -1,41 +1,54 @@ from __future__ import absolute_import from __future__ import unicode_literals +import sys + from pre_commit import parse_shebang from pre_commit.util import cmd_output -# Limit used previously to avoid "xargs ... Bad file number" on windows -# This is slightly less than the posix mandated minimum -MAX_LENGTH = 4000 +# TODO: properly compute max_length value +def _get_platform_max_length(): + # posix minimum + return 4 * 1024 + + +def _get_command_length(command, arg): + parts = command + (arg,) + full_cmd = ' '.join(parts) + + # win32 uses the amount of characters, more details at: + # https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553/ + if sys.platform == 'win32': + return len(full_cmd) + + return len(full_cmd.encode(sys.getdefaultencoding())) class ArgumentTooLongError(RuntimeError): pass -def partition(cmd, varargs, _max_length=MAX_LENGTH): +def partition(cmd, varargs, _max_length=None): + _max_length = _max_length or _get_platform_max_length() cmd = tuple(cmd) ret = [] ret_cmd = [] - total_len = len(' '.join(cmd)) # Reversed so arguments are in order varargs = list(reversed(varargs)) while varargs: arg = varargs.pop() - if total_len + 1 + len(arg) <= _max_length: + if _get_command_length(cmd + tuple(ret_cmd), arg) <= _max_length: ret_cmd.append(arg) - total_len += len(arg) elif not ret_cmd: raise ArgumentTooLongError(arg) else: # We've exceeded the length, yield a command ret.append(cmd + tuple(ret_cmd)) ret_cmd = [] - total_len = len(' '.join(cmd)) varargs.append(arg) ret.append(cmd + tuple(ret_cmd)) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 529eb197..84d4899b 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -1,11 +1,29 @@ from __future__ import absolute_import from __future__ import unicode_literals +from unittest import mock + import pytest from pre_commit import xargs +@pytest.fixture +def sys_win32_mock(): + return mock.Mock( + platform='win32', + getdefaultencoding=mock.Mock(return_value='utf-8'), + ) + + +@pytest.fixture +def sys_linux_mock(): + return mock.Mock( + platform='linux', + getdefaultencoding=mock.Mock(return_value='utf-8'), + ) + + def test_partition_trivial(): assert xargs.partition(('cmd',), ()) == (('cmd',),) @@ -35,6 +53,32 @@ def test_partition_limits(): ) +def test_partition_limit_win32(sys_win32_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) + with mock.patch('pre_commit.xargs.sys', sys_win32_mock): + ret = xargs.partition(cmd, varargs, _max_length=20) + + assert ret == (cmd + varargs,) + + +def test_partition_limit_linux(sys_linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 5,) + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + ret = xargs.partition(cmd, varargs, _max_length=30) + + assert ret == (cmd + varargs,) + + +def test_argument_too_long_with_large_unicode(sys_linux_mock): + cmd = ('ninechars',) + varargs = ('😑' * 10,) # 4 bytes * 10 + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + with pytest.raises(xargs.ArgumentTooLongError): + xargs.partition(cmd, varargs, _max_length=20) + + def test_argument_too_long(): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10) From df5d171cd7709db434f93474591aa867a69abe81 Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Fri, 5 Oct 2018 14:33:32 -0300 Subject: [PATCH 2/9] Fix xargs.partition tests in python2.7 (pytest-mock) --- requirements-dev.txt | 1 + tests/xargs_test.py | 27 +++++++++++++-------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 157f287d..bd7f8411 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,3 +5,4 @@ flake8 mock pytest pytest-env +pytest-mock diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 84d4899b..e68f46c5 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -1,26 +1,25 @@ +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import unicode_literals -from unittest import mock - import pytest from pre_commit import xargs @pytest.fixture -def sys_win32_mock(): - return mock.Mock( +def sys_win32_mock(mocker): + return mocker.Mock( platform='win32', - getdefaultencoding=mock.Mock(return_value='utf-8'), + getdefaultencoding=mocker.Mock(return_value='utf-8'), ) @pytest.fixture -def sys_linux_mock(): - return mock.Mock( +def sys_linux_mock(mocker): + return mocker.Mock( platform='linux', - getdefaultencoding=mock.Mock(return_value='utf-8'), + getdefaultencoding=mocker.Mock(return_value='utf-8'), ) @@ -53,28 +52,28 @@ def test_partition_limits(): ) -def test_partition_limit_win32(sys_win32_mock): +def test_partition_limit_win32(mocker, sys_win32_mock): cmd = ('ninechars',) varargs = ('😑' * 10,) - with mock.patch('pre_commit.xargs.sys', sys_win32_mock): + with mocker.mock_module.patch('pre_commit.xargs.sys', sys_win32_mock): ret = xargs.partition(cmd, varargs, _max_length=20) assert ret == (cmd + varargs,) -def test_partition_limit_linux(sys_linux_mock): +def test_partition_limit_linux(mocker, sys_linux_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) - with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + with mocker.mock_module.patch('pre_commit.xargs.sys', sys_linux_mock): ret = xargs.partition(cmd, varargs, _max_length=30) assert ret == (cmd + varargs,) -def test_argument_too_long_with_large_unicode(sys_linux_mock): +def test_argument_too_long_with_large_unicode(mocker, sys_linux_mock): cmd = ('ninechars',) varargs = ('😑' * 10,) # 4 bytes * 10 - with mock.patch('pre_commit.xargs.sys', sys_linux_mock): + with mocker.mock_module.patch('pre_commit.xargs.sys', sys_linux_mock): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(cmd, varargs, _max_length=20) From 2ad69e12ce781c4b9242673893eacb3734d4afde Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Fri, 5 Oct 2018 16:39:49 -0300 Subject: [PATCH 3/9] Fix xargs.partition: use sys.getfilesystemencoding The previous `sys.getdefaultencoding` almost always fallsback to `ascii` while `sys.getfilesystemencoding` is utf-8 once in utf-8 mode. --- pre_commit/xargs.py | 2 +- tests/xargs_test.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 1b237a38..2cbd6c39 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -22,7 +22,7 @@ def _get_command_length(command, arg): if sys.platform == 'win32': return len(full_cmd) - return len(full_cmd.encode(sys.getdefaultencoding())) + return len(full_cmd.encode(sys.getfilesystemencoding())) class ArgumentTooLongError(RuntimeError): diff --git a/tests/xargs_test.py b/tests/xargs_test.py index e68f46c5..73ba9bc6 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -11,7 +11,7 @@ from pre_commit import xargs def sys_win32_mock(mocker): return mocker.Mock( platform='win32', - getdefaultencoding=mocker.Mock(return_value='utf-8'), + getfilesystemencoding=mocker.Mock(return_value='utf-8'), ) @@ -19,7 +19,7 @@ def sys_win32_mock(mocker): def sys_linux_mock(mocker): return mocker.Mock( platform='linux', - getdefaultencoding=mocker.Mock(return_value='utf-8'), + getfilesystemencoding=mocker.Mock(return_value='utf-8'), ) From bb6b1c33ae439889b08f82c49ab374f144d9a35b Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Sat, 6 Oct 2018 19:57:30 -0300 Subject: [PATCH 4/9] Remove pytest-mock --- requirements-dev.txt | 1 - tests/xargs_test.py | 25 +++++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index bd7f8411..157f287d 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -5,4 +5,3 @@ flake8 mock pytest pytest-env -pytest-mock diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 73ba9bc6..de16a012 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -2,24 +2,25 @@ from __future__ import absolute_import from __future__ import unicode_literals +import mock import pytest from pre_commit import xargs @pytest.fixture -def sys_win32_mock(mocker): - return mocker.Mock( +def sys_win32_mock(): + return mock.Mock( platform='win32', - getfilesystemencoding=mocker.Mock(return_value='utf-8'), + getfilesystemencoding=mock.Mock(return_value='utf-8'), ) @pytest.fixture -def sys_linux_mock(mocker): - return mocker.Mock( +def sys_linux_mock(): + return mock.Mock( platform='linux', - getfilesystemencoding=mocker.Mock(return_value='utf-8'), + getfilesystemencoding=mock.Mock(return_value='utf-8'), ) @@ -52,28 +53,28 @@ def test_partition_limits(): ) -def test_partition_limit_win32(mocker, sys_win32_mock): +def test_partition_limit_win32(sys_win32_mock): cmd = ('ninechars',) varargs = ('😑' * 10,) - with mocker.mock_module.patch('pre_commit.xargs.sys', sys_win32_mock): + with mock.patch('pre_commit.xargs.sys', sys_win32_mock): ret = xargs.partition(cmd, varargs, _max_length=20) assert ret == (cmd + varargs,) -def test_partition_limit_linux(mocker, sys_linux_mock): +def test_partition_limit_linux(sys_linux_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) - with mocker.mock_module.patch('pre_commit.xargs.sys', sys_linux_mock): + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): ret = xargs.partition(cmd, varargs, _max_length=30) assert ret == (cmd + varargs,) -def test_argument_too_long_with_large_unicode(mocker, sys_linux_mock): +def test_argument_too_long_with_large_unicode(sys_linux_mock): cmd = ('ninechars',) varargs = ('😑' * 10,) # 4 bytes * 10 - with mocker.mock_module.patch('pre_commit.xargs.sys', sys_linux_mock): + with mock.patch('pre_commit.xargs.sys', sys_linux_mock): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(cmd, varargs, _max_length=20) From 333ea75e45e631d3c76521646f88a80333938b45 Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Sat, 6 Oct 2018 20:04:17 -0300 Subject: [PATCH 5/9] Refactor xargs.partition: _command_length usage --- pre_commit/xargs.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 2cbd6c39..89a134d2 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -13,9 +13,8 @@ def _get_platform_max_length(): return 4 * 1024 -def _get_command_length(command, arg): - parts = command + (arg,) - full_cmd = ' '.join(parts) +def _command_length(*cmd): + full_cmd = ' '.join(cmd) # win32 uses the amount of characters, more details at: # https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553/ @@ -38,17 +37,21 @@ def partition(cmd, varargs, _max_length=None): # Reversed so arguments are in order varargs = list(reversed(varargs)) + total_length = _command_length(*cmd) while varargs: arg = varargs.pop() - if _get_command_length(cmd + tuple(ret_cmd), arg) <= _max_length: + arg_length = _command_length(arg) + 1 + if total_length + arg_length <= _max_length: ret_cmd.append(arg) + total_length += arg_length elif not ret_cmd: raise ArgumentTooLongError(arg) else: # We've exceeded the length, yield a command ret.append(cmd + tuple(ret_cmd)) ret_cmd = [] + total_length = _command_length(*cmd) varargs.append(arg) ret.append(cmd + tuple(ret_cmd)) From 2560280d21bcdc646674214e24f7352861d7dcf8 Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Mon, 8 Oct 2018 19:42:59 -0300 Subject: [PATCH 6/9] Fix xargs.partition tests: explicity set unicode chars --- tests/xargs_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index de16a012..2d2a4ba2 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -55,7 +55,7 @@ def test_partition_limits(): def test_partition_limit_win32(sys_win32_mock): cmd = ('ninechars',) - varargs = ('😑' * 10,) + varargs = (u'😑' * 10,) with mock.patch('pre_commit.xargs.sys', sys_win32_mock): ret = xargs.partition(cmd, varargs, _max_length=20) @@ -64,7 +64,7 @@ def test_partition_limit_win32(sys_win32_mock): def test_partition_limit_linux(sys_linux_mock): cmd = ('ninechars',) - varargs = ('😑' * 5,) + varargs = (u'😑' * 5,) with mock.patch('pre_commit.xargs.sys', sys_linux_mock): ret = xargs.partition(cmd, varargs, _max_length=30) @@ -73,7 +73,7 @@ def test_partition_limit_linux(sys_linux_mock): def test_argument_too_long_with_large_unicode(sys_linux_mock): cmd = ('ninechars',) - varargs = ('😑' * 10,) # 4 bytes * 10 + varargs = (u'😑' * 10,) # 4 bytes * 10 with mock.patch('pre_commit.xargs.sys', sys_linux_mock): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(cmd, varargs, _max_length=20) From c9e297ddb62b30c19e2d6908cc6b0075823d83ee Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Tue, 9 Oct 2018 22:54:41 -0300 Subject: [PATCH 7/9] Fix xargs.partition: win32 new string length computation --- pre_commit/xargs.py | 4 ++-- tests/xargs_test.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 89a134d2..8a632008 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -17,9 +17,9 @@ def _command_length(*cmd): full_cmd = ' '.join(cmd) # win32 uses the amount of characters, more details at: - # https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553/ + # https://github.com/pre-commit/pre-commit/pull/839 if sys.platform == 'win32': - return len(full_cmd) + return len(full_cmd.encode('utf-16le')) // 2 return len(full_cmd.encode(sys.getfilesystemencoding())) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 2d2a4ba2..de16a012 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -55,7 +55,7 @@ def test_partition_limits(): def test_partition_limit_win32(sys_win32_mock): cmd = ('ninechars',) - varargs = (u'😑' * 10,) + varargs = ('😑' * 10,) with mock.patch('pre_commit.xargs.sys', sys_win32_mock): ret = xargs.partition(cmd, varargs, _max_length=20) @@ -64,7 +64,7 @@ def test_partition_limit_win32(sys_win32_mock): def test_partition_limit_linux(sys_linux_mock): cmd = ('ninechars',) - varargs = (u'😑' * 5,) + varargs = ('😑' * 5,) with mock.patch('pre_commit.xargs.sys', sys_linux_mock): ret = xargs.partition(cmd, varargs, _max_length=30) @@ -73,7 +73,7 @@ def test_partition_limit_linux(sys_linux_mock): def test_argument_too_long_with_large_unicode(sys_linux_mock): cmd = ('ninechars',) - varargs = (u'😑' * 10,) # 4 bytes * 10 + varargs = ('😑' * 10,) # 4 bytes * 10 with mock.patch('pre_commit.xargs.sys', sys_linux_mock): with pytest.raises(xargs.ArgumentTooLongError): xargs.partition(cmd, varargs, _max_length=20) From 3d573d8736eb1bd7df39af7333dadc71da698d45 Mon Sep 17 00:00:00 2001 From: "George Y. Kussumoto" Date: Tue, 9 Oct 2018 23:32:46 -0300 Subject: [PATCH 8/9] Fix xargs.partion: win32 test --- tests/xargs_test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/xargs_test.py b/tests/xargs_test.py index de16a012..65336c58 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -55,7 +55,8 @@ def test_partition_limits(): def test_partition_limit_win32(sys_win32_mock): cmd = ('ninechars',) - varargs = ('😑' * 10,) + # counted as half because of utf-16 encode + varargs = ('😑' * 5,) with mock.patch('pre_commit.xargs.sys', sys_win32_mock): ret = xargs.partition(cmd, varargs, _max_length=20) From ead906aed066d66c216308a891e93596e85ec09c Mon Sep 17 00:00:00 2001 From: Anthony Sottile Date: Tue, 23 Oct 2018 22:02:48 -0700 Subject: [PATCH 9/9] Compute win32 python2 length according to encoded size --- pre_commit/xargs.py | 12 +++++++--- tests/xargs_test.py | 56 +++++++++++++++++++++++++++------------------ 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/pre_commit/xargs.py b/pre_commit/xargs.py index 8a632008..2fe8a454 100644 --- a/pre_commit/xargs.py +++ b/pre_commit/xargs.py @@ -3,6 +3,8 @@ from __future__ import unicode_literals import sys +import six + from pre_commit import parse_shebang from pre_commit.util import cmd_output @@ -19,9 +21,13 @@ def _command_length(*cmd): # win32 uses the amount of characters, more details at: # https://github.com/pre-commit/pre-commit/pull/839 if sys.platform == 'win32': - return len(full_cmd.encode('utf-16le')) // 2 - - return len(full_cmd.encode(sys.getfilesystemencoding())) + # the python2.x apis require bytes, we encode as UTF-8 + if six.PY2: + return len(full_cmd.encode('utf-8')) + else: + return len(full_cmd.encode('utf-16le')) // 2 + else: + return len(full_cmd.encode(sys.getfilesystemencoding())) class ArgumentTooLongError(RuntimeError): diff --git a/tests/xargs_test.py b/tests/xargs_test.py index 65336c58..bf685e16 100644 --- a/tests/xargs_test.py +++ b/tests/xargs_test.py @@ -2,26 +2,36 @@ from __future__ import absolute_import from __future__ import unicode_literals +import sys + import mock import pytest +import six from pre_commit import xargs @pytest.fixture -def sys_win32_mock(): - return mock.Mock( - platform='win32', - getfilesystemencoding=mock.Mock(return_value='utf-8'), - ) +def win32_py2_mock(): + with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): + with mock.patch.object(sys, 'platform', 'win32'): + with mock.patch.object(six, 'PY2', True): + yield @pytest.fixture -def sys_linux_mock(): - return mock.Mock( - platform='linux', - getfilesystemencoding=mock.Mock(return_value='utf-8'), - ) +def win32_py3_mock(): + with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): + with mock.patch.object(sys, 'platform', 'win32'): + with mock.patch.object(six, 'PY2', False): + yield + + +@pytest.fixture +def linux_mock(): + with mock.patch.object(sys, 'getfilesystemencoding', return_value='utf-8'): + with mock.patch.object(sys, 'platform', 'linux'): + yield def test_partition_trivial(): @@ -53,31 +63,33 @@ def test_partition_limits(): ) -def test_partition_limit_win32(sys_win32_mock): +def test_partition_limit_win32_py3(win32_py3_mock): cmd = ('ninechars',) # counted as half because of utf-16 encode varargs = ('😑' * 5,) - with mock.patch('pre_commit.xargs.sys', sys_win32_mock): - ret = xargs.partition(cmd, varargs, _max_length=20) - + ret = xargs.partition(cmd, varargs, _max_length=20) assert ret == (cmd + varargs,) -def test_partition_limit_linux(sys_linux_mock): +def test_partition_limit_win32_py2(win32_py2_mock): + cmd = ('ninechars',) + varargs = ('😑' * 5,) # 4 bytes * 5 + ret = xargs.partition(cmd, varargs, _max_length=30) + assert ret == (cmd + varargs,) + + +def test_partition_limit_linux(linux_mock): cmd = ('ninechars',) varargs = ('😑' * 5,) - with mock.patch('pre_commit.xargs.sys', sys_linux_mock): - ret = xargs.partition(cmd, varargs, _max_length=30) - + ret = xargs.partition(cmd, varargs, _max_length=30) assert ret == (cmd + varargs,) -def test_argument_too_long_with_large_unicode(sys_linux_mock): +def test_argument_too_long_with_large_unicode(linux_mock): cmd = ('ninechars',) varargs = ('😑' * 10,) # 4 bytes * 10 - with mock.patch('pre_commit.xargs.sys', sys_linux_mock): - with pytest.raises(xargs.ArgumentTooLongError): - xargs.partition(cmd, varargs, _max_length=20) + with pytest.raises(xargs.ArgumentTooLongError): + xargs.partition(cmd, varargs, _max_length=20) def test_argument_too_long():