Merge pull request #851 from chriskuehl/concurrent-execution

Implement concurrent execution of individual hooks
This commit is contained in:
Anthony Sottile 2018-11-01 18:46:13 -07:00 committed by GitHub
commit 76e0f11916
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
18 changed files with 188 additions and 47 deletions

View file

@ -56,6 +56,7 @@ MANIFEST_HOOK_DICT = cfgv.Map(
cfgv.Optional('language_version', cfgv.check_string, 'default'), cfgv.Optional('language_version', cfgv.check_string, 'default'),
cfgv.Optional('log_file', cfgv.check_string, ''), cfgv.Optional('log_file', cfgv.check_string, ''),
cfgv.Optional('minimum_pre_commit_version', cfgv.check_string, '0'), cfgv.Optional('minimum_pre_commit_version', cfgv.check_string, '0'),
cfgv.Optional('require_serial', cfgv.check_bool, False),
cfgv.Optional('stages', cfgv.check_array(cfgv.check_one_of(C.STAGES)), []), cfgv.Optional('stages', cfgv.check_array(cfgv.check_one_of(C.STAGES)), []),
cfgv.Optional('verbose', cfgv.check_bool, False), cfgv.Optional('verbose', cfgv.check_bool, False),
) )

View file

@ -9,7 +9,6 @@ from pre_commit.languages import helpers
from pre_commit.util import CalledProcessError from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'docker' ENVIRONMENT_DIR = 'docker'
@ -97,4 +96,4 @@ def run_hook(prefix, hook, file_args): # pragma: windows no cover
entry_tag = ('--entrypoint', entry_exe, docker_tag(prefix)) entry_tag = ('--entrypoint', entry_exe, docker_tag(prefix))
cmd = docker_cmd() + entry_tag + cmd_rest cmd = docker_cmd() + entry_tag + cmd_rest
return xargs(cmd, file_args) return helpers.run_xargs(hook, cmd, file_args)

View file

@ -4,7 +4,6 @@ from __future__ import unicode_literals
from pre_commit.languages import helpers from pre_commit.languages import helpers
from pre_commit.languages.docker import assert_docker_available from pre_commit.languages.docker import assert_docker_available
from pre_commit.languages.docker import docker_cmd from pre_commit.languages.docker import docker_cmd
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None ENVIRONMENT_DIR = None
@ -16,4 +15,4 @@ install_environment = helpers.no_install
def run_hook(prefix, hook, file_args): # pragma: windows no cover def run_hook(prefix, hook, file_args): # pragma: windows no cover
assert_docker_available() assert_docker_available()
cmd = docker_cmd() + helpers.to_cmd(hook) cmd = docker_cmd() + helpers.to_cmd(hook)
return xargs(cmd, file_args) return helpers.run_xargs(hook, cmd, file_args)

View file

@ -11,7 +11,6 @@ from pre_commit.languages import helpers
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.util import rmtree from pre_commit.util import rmtree
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'golangenv' ENVIRONMENT_DIR = 'golangenv'
@ -81,4 +80,4 @@ def install_environment(prefix, version, additional_dependencies):
def run_hook(prefix, hook, file_args): def run_hook(prefix, hook, file_args):
with in_env(prefix): with in_env(prefix):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)

View file

@ -1,8 +1,11 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import multiprocessing
import os
import shlex import shlex
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.xargs import xargs
def run_setup_cmd(prefix, cmd): def run_setup_cmd(prefix, cmd):
@ -45,3 +48,21 @@ def basic_healthy(prefix, language_version):
def no_install(prefix, version, additional_dependencies): def no_install(prefix, version, additional_dependencies):
raise AssertionError('This type is not installable') raise AssertionError('This type is not installable')
def target_concurrency(hook):
if hook['require_serial'] or 'PRE_COMMIT_NO_CONCURRENCY' in os.environ:
return 1
else:
# Travis appears to have a bunch of CPUs, but we can't use them all.
if 'TRAVIS' in os.environ:
return 2
else:
try:
return multiprocessing.cpu_count()
except NotImplementedError:
return 1
def run_xargs(hook, cmd, file_args):
return xargs(cmd, file_args, target_concurrency=target_concurrency(hook))

View file

@ -10,7 +10,6 @@ from pre_commit.languages import helpers
from pre_commit.languages.python import bin_dir from pre_commit.languages.python import bin_dir
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'node_env' ENVIRONMENT_DIR = 'node_env'
@ -71,4 +70,4 @@ def install_environment(prefix, version, additional_dependencies):
def run_hook(prefix, hook, file_args): def run_hook(prefix, hook, file_args):
with in_env(prefix, hook['language_version']): with in_env(prefix, hook['language_version']):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)

View file

@ -12,7 +12,6 @@ from pre_commit.parse_shebang import find_executable
from pre_commit.util import CalledProcessError from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'py_env' ENVIRONMENT_DIR = 'py_env'
@ -127,7 +126,7 @@ def py_interface(_dir, _make_venv):
def run_hook(prefix, hook, file_args): def run_hook(prefix, hook, file_args):
with in_env(prefix, hook['language_version']): with in_env(prefix, hook['language_version']):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)
def install_environment(prefix, version, additional_dependencies): def install_environment(prefix, version, additional_dependencies):
additional_dependencies = tuple(additional_dependencies) additional_dependencies = tuple(additional_dependencies)

View file

@ -12,7 +12,6 @@ from pre_commit.languages import helpers
from pre_commit.util import CalledProcessError from pre_commit.util import CalledProcessError
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import resource_bytesio from pre_commit.util import resource_bytesio
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'rbenv' ENVIRONMENT_DIR = 'rbenv'
@ -126,4 +125,4 @@ def install_environment(
def run_hook(prefix, hook, file_args): # pragma: windows no cover def run_hook(prefix, hook, file_args): # pragma: windows no cover
with in_env(prefix, hook['language_version']): with in_env(prefix, hook['language_version']):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)

View file

@ -10,7 +10,6 @@ from pre_commit.envcontext import Var
from pre_commit.languages import helpers from pre_commit.languages import helpers
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'rustenv' ENVIRONMENT_DIR = 'rustenv'
@ -91,4 +90,4 @@ def install_environment(prefix, version, additional_dependencies):
def run_hook(prefix, hook, file_args): def run_hook(prefix, hook, file_args):
with in_env(prefix): with in_env(prefix):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)

View file

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from pre_commit.languages import helpers from pre_commit.languages import helpers
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None ENVIRONMENT_DIR = None
@ -13,4 +12,4 @@ install_environment = helpers.no_install
def run_hook(prefix, hook, file_args): def run_hook(prefix, hook, file_args):
cmd = helpers.to_cmd(hook) cmd = helpers.to_cmd(hook)
cmd = (prefix.path(cmd[0]),) + cmd[1:] cmd = (prefix.path(cmd[0]),) + cmd[1:]
return xargs(cmd, file_args) return helpers.run_xargs(hook, cmd, file_args)

View file

@ -8,7 +8,6 @@ from pre_commit.envcontext import Var
from pre_commit.languages import helpers from pre_commit.languages import helpers
from pre_commit.util import clean_path_on_failure from pre_commit.util import clean_path_on_failure
from pre_commit.util import cmd_output from pre_commit.util import cmd_output
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = 'swift_env' ENVIRONMENT_DIR = 'swift_env'
get_default_version = helpers.basic_get_default_version get_default_version = helpers.basic_get_default_version
@ -53,4 +52,4 @@ def install_environment(
def run_hook(prefix, hook, file_args): # pragma: windows no cover def run_hook(prefix, hook, file_args): # pragma: windows no cover
with in_env(prefix): with in_env(prefix):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)

View file

@ -1,7 +1,6 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from pre_commit.languages import helpers from pre_commit.languages import helpers
from pre_commit.xargs import xargs
ENVIRONMENT_DIR = None ENVIRONMENT_DIR = None
@ -11,4 +10,4 @@ install_environment = helpers.no_install
def run_hook(prefix, hook, file_args): def run_hook(prefix, hook, file_args):
return xargs(helpers.to_cmd(hook), file_args) return helpers.run_xargs(hook, helpers.to_cmd(hook), file_args)

View file

@ -1,8 +1,12 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division
from __future__ import unicode_literals from __future__ import unicode_literals
import contextlib
import math
import sys import sys
import concurrent.futures
import six import six
from pre_commit import parse_shebang from pre_commit import parse_shebang
@ -34,8 +38,13 @@ class ArgumentTooLongError(RuntimeError):
pass pass
def partition(cmd, varargs, _max_length=None): def partition(cmd, varargs, target_concurrency, _max_length=None):
_max_length = _max_length or _get_platform_max_length() _max_length = _max_length or _get_platform_max_length()
# Generally, we try to partition evenly into at least `target_concurrency`
# partitions, but we don't want a bunch of tiny partitions.
max_args = max(4, math.ceil(len(varargs) / target_concurrency))
cmd = tuple(cmd) cmd = tuple(cmd)
ret = [] ret = []
@ -48,7 +57,10 @@ def partition(cmd, varargs, _max_length=None):
arg = varargs.pop() arg = varargs.pop()
arg_length = _command_length(arg) + 1 arg_length = _command_length(arg) + 1
if total_length + arg_length <= _max_length: if (
total_length + arg_length <= _max_length
and len(ret_cmd) < max_args
):
ret_cmd.append(arg) ret_cmd.append(arg)
total_length += arg_length total_length += arg_length
elif not ret_cmd: elif not ret_cmd:
@ -65,12 +77,23 @@ def partition(cmd, varargs, _max_length=None):
return tuple(ret) return tuple(ret)
@contextlib.contextmanager
def _thread_mapper(maxsize):
if maxsize == 1:
yield map
else:
with concurrent.futures.ThreadPoolExecutor(maxsize) as ex:
yield ex.map
def xargs(cmd, varargs, **kwargs): def xargs(cmd, varargs, **kwargs):
"""A simplified implementation of xargs. """A simplified implementation of xargs.
negate: Make nonzero successful and zero a failure negate: Make nonzero successful and zero a failure
target_concurrency: Target number of partitions to run concurrently
""" """
negate = kwargs.pop('negate', False) negate = kwargs.pop('negate', False)
target_concurrency = kwargs.pop('target_concurrency', 1)
retcode = 0 retcode = 0
stdout = b'' stdout = b''
stderr = b'' stderr = b''
@ -80,22 +103,28 @@ def xargs(cmd, varargs, **kwargs):
except parse_shebang.ExecutableNotFoundError as e: except parse_shebang.ExecutableNotFoundError as e:
return e.to_output() return e.to_output()
for run_cmd in partition(cmd, varargs, **kwargs): partitions = partition(cmd, varargs, target_concurrency, **kwargs)
proc_retcode, proc_out, proc_err = cmd_output(
*run_cmd, encoding=None, retcode=None def run_cmd_partition(run_cmd):
) return cmd_output(*run_cmd, encoding=None, retcode=None)
# This is *slightly* too clever so I'll explain it.
# First the xor boolean table: threads = min(len(partitions), target_concurrency)
# T | F | with _thread_mapper(threads) as thread_map:
# +-------+ results = thread_map(run_cmd_partition, partitions)
# T | F | T |
# --+-------+ for proc_retcode, proc_out, proc_err in results:
# F | T | F | # This is *slightly* too clever so I'll explain it.
# --+-------+ # First the xor boolean table:
# When negate is True, it has the effect of flipping the return code # T | F |
# Otherwise, the retuncode is unchanged # +-------+
retcode |= bool(proc_retcode) ^ negate # T | F | T |
stdout += proc_out # --+-------+
stderr += proc_err # F | T | F |
# --+-------+
# When negate is True, it has the effect of flipping the return
# code. Otherwise, the returncode is unchanged.
retcode |= bool(proc_retcode) ^ negate
stdout += proc_out
stderr += proc_err
return retcode, stdout, stderr return retcode, stdout, stderr

View file

@ -47,7 +47,10 @@ setup(
'toml', 'toml',
'virtualenv', 'virtualenv',
], ],
extras_require={':python_version<"3.7"': ['importlib-resources']}, extras_require={
':python_version<"3.2"': ['futures'],
':python_version<"3.7"': ['importlib-resources'],
},
entry_points={ entry_points={
'console_scripts': [ 'console_scripts': [
'pre-commit = pre_commit.main:main', 'pre-commit = pre_commit.main:main',

View file

@ -1,8 +1,11 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import unicode_literals from __future__ import unicode_literals
import multiprocessing
import os
import sys import sys
import mock
import pytest import pytest
from pre_commit.languages import helpers from pre_commit.languages import helpers
@ -28,3 +31,34 @@ def test_failed_setup_command_does_not_unicode_error():
# an assertion that this does not raise `UnicodeError` # an assertion that this does not raise `UnicodeError`
with pytest.raises(CalledProcessError): with pytest.raises(CalledProcessError):
helpers.run_setup_cmd(Prefix('.'), (sys.executable, '-c', script)) helpers.run_setup_cmd(Prefix('.'), (sys.executable, '-c', script))
def test_target_concurrency_normal():
with mock.patch.object(multiprocessing, 'cpu_count', return_value=123):
with mock.patch.dict(os.environ, {}, clear=True):
assert helpers.target_concurrency({'require_serial': False}) == 123
def test_target_concurrency_cpu_count_require_serial_true():
with mock.patch.dict(os.environ, {}, clear=True):
assert helpers.target_concurrency({'require_serial': True}) == 1
def test_target_concurrency_testing_env_var():
with mock.patch.dict(
os.environ, {'PRE_COMMIT_NO_CONCURRENCY': '1'}, clear=True,
):
assert helpers.target_concurrency({'require_serial': False}) == 1
def test_target_concurrency_on_travis():
with mock.patch.dict(os.environ, {'TRAVIS': '1'}, clear=True):
assert helpers.target_concurrency({'require_serial': False}) == 2
def test_target_concurrency_cpu_count_not_implemented():
with mock.patch.object(
multiprocessing, 'cpu_count', side_effect=NotImplementedError,
):
with mock.patch.dict(os.environ, {}, clear=True):
assert helpers.target_concurrency({'require_serial': False}) == 1

View file

@ -837,6 +837,7 @@ def test_manifest_hooks(tempdir_factory, store):
'minimum_pre_commit_version': '0', 'minimum_pre_commit_version': '0',
'name': 'Bash hook', 'name': 'Bash hook',
'pass_filenames': True, 'pass_filenames': True,
'require_serial': False,
'stages': [], 'stages': [],
'types': ['file'], 'types': ['file'],
'exclude_types': [], 'exclude_types': [],

View file

@ -3,7 +3,9 @@ from __future__ import absolute_import
from __future__ import unicode_literals from __future__ import unicode_literals
import sys import sys
import time
import concurrent.futures
import mock import mock
import pytest import pytest
import six import six
@ -35,11 +37,11 @@ def linux_mock():
def test_partition_trivial(): def test_partition_trivial():
assert xargs.partition(('cmd',), ()) == (('cmd',),) assert xargs.partition(('cmd',), (), 1) == (('cmd',),)
def test_partition_simple(): def test_partition_simple():
assert xargs.partition(('cmd',), ('foo',)) == (('cmd', 'foo'),) assert xargs.partition(('cmd',), ('foo',), 1) == (('cmd', 'foo'),)
def test_partition_limits(): def test_partition_limits():
@ -53,6 +55,7 @@ def test_partition_limits():
'.' * 5, '.' * 5,
'.' * 6, '.' * 6,
), ),
1,
_max_length=20, _max_length=20,
) )
assert ret == ( assert ret == (
@ -67,21 +70,21 @@ def test_partition_limit_win32_py3(win32_py3_mock):
cmd = ('ninechars',) cmd = ('ninechars',)
# counted as half because of utf-16 encode # counted as half because of utf-16 encode
varargs = ('😑' * 5,) varargs = ('😑' * 5,)
ret = xargs.partition(cmd, varargs, _max_length=20) ret = xargs.partition(cmd, varargs, 1, _max_length=20)
assert ret == (cmd + varargs,) assert ret == (cmd + varargs,)
def test_partition_limit_win32_py2(win32_py2_mock): def test_partition_limit_win32_py2(win32_py2_mock):
cmd = ('ninechars',) cmd = ('ninechars',)
varargs = ('😑' * 5,) # 4 bytes * 5 varargs = ('😑' * 5,) # 4 bytes * 5
ret = xargs.partition(cmd, varargs, _max_length=30) ret = xargs.partition(cmd, varargs, 1, _max_length=30)
assert ret == (cmd + varargs,) assert ret == (cmd + varargs,)
def test_partition_limit_linux(linux_mock): def test_partition_limit_linux(linux_mock):
cmd = ('ninechars',) cmd = ('ninechars',)
varargs = ('😑' * 5,) varargs = ('😑' * 5,)
ret = xargs.partition(cmd, varargs, _max_length=30) ret = xargs.partition(cmd, varargs, 1, _max_length=30)
assert ret == (cmd + varargs,) assert ret == (cmd + varargs,)
@ -89,12 +92,39 @@ def test_argument_too_long_with_large_unicode(linux_mock):
cmd = ('ninechars',) cmd = ('ninechars',)
varargs = ('😑' * 10,) # 4 bytes * 10 varargs = ('😑' * 10,) # 4 bytes * 10
with pytest.raises(xargs.ArgumentTooLongError): with pytest.raises(xargs.ArgumentTooLongError):
xargs.partition(cmd, varargs, _max_length=20) xargs.partition(cmd, varargs, 1, _max_length=20)
def test_partition_target_concurrency():
ret = xargs.partition(
('foo',), ('A',) * 22,
4,
_max_length=50,
)
assert ret == (
('foo',) + ('A',) * 6,
('foo',) + ('A',) * 6,
('foo',) + ('A',) * 6,
('foo',) + ('A',) * 4,
)
def test_partition_target_concurrency_wont_make_tiny_partitions():
ret = xargs.partition(
('foo',), ('A',) * 10,
4,
_max_length=50,
)
assert ret == (
('foo',) + ('A',) * 4,
('foo',) + ('A',) * 4,
('foo',) + ('A',) * 2,
)
def test_argument_too_long(): def test_argument_too_long():
with pytest.raises(xargs.ArgumentTooLongError): with pytest.raises(xargs.ArgumentTooLongError):
xargs.partition(('a' * 5,), ('a' * 5,), _max_length=10) xargs.partition(('a' * 5,), ('a' * 5,), 1, _max_length=10)
def test_xargs_smoke(): def test_xargs_smoke():
@ -132,3 +162,34 @@ def test_xargs_retcode_normal():
ret, _, _ = xargs.xargs(exit_cmd, ('0', '1'), _max_length=max_length) ret, _, _ = xargs.xargs(exit_cmd, ('0', '1'), _max_length=max_length)
assert ret == 1 assert ret == 1
def test_xargs_concurrency():
bash_cmd = ('bash', '-c')
print_pid = ('sleep 0.5 && echo $$',)
start = time.time()
ret, stdout, _ = xargs.xargs(
bash_cmd, print_pid * 5,
target_concurrency=5,
_max_length=len(' '.join(bash_cmd + print_pid)),
)
elapsed = time.time() - start
assert ret == 0
pids = stdout.splitlines()
assert len(pids) == 5
# It would take 0.5*5=2.5 seconds ot run all of these in serial, so if it
# takes less, they must have run concurrently.
assert elapsed < 2.5
def test_thread_mapper_concurrency_uses_threadpoolexecutor_map():
with xargs._thread_mapper(10) as thread_map:
assert isinstance(
thread_map.__self__, concurrent.futures.ThreadPoolExecutor,
) is True
def test_thread_mapper_concurrency_uses_regular_map():
with xargs._thread_mapper(1) as thread_map:
assert thread_map is map

View file

@ -27,3 +27,4 @@ env =
GIT_AUTHOR_EMAIL=test@example.com GIT_AUTHOR_EMAIL=test@example.com
GIT_COMMITTER_EMAIL=test@example.com GIT_COMMITTER_EMAIL=test@example.com
VIRTUALENV_NO_DOWNLOAD=1 VIRTUALENV_NO_DOWNLOAD=1
PRE_COMMIT_NO_CONCURRENCY=1