add --jobs option to autoupdate

This commit is contained in:
Anthony Sottile 2023-04-29 15:05:17 -04:00
parent bab5f70a38
commit ddbee32ad0
4 changed files with 72 additions and 43 deletions

View file

@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import concurrent.futures
import os.path import os.path
import re import re
import tempfile import tempfile
@ -10,6 +11,7 @@ from typing import Sequence
import pre_commit.constants as C import pre_commit.constants as C
from pre_commit import git from pre_commit import git
from pre_commit import output from pre_commit import output
from pre_commit import xargs
from pre_commit.clientlib import InvalidManifestError from pre_commit.clientlib import InvalidManifestError
from pre_commit.clientlib import load_config from pre_commit.clientlib import load_config
from pre_commit.clientlib import load_manifest from pre_commit.clientlib import load_manifest
@ -71,7 +73,7 @@ class RevInfo(NamedTuple):
try: try:
manifest = load_manifest(os.path.join(tmp, C.MANIFEST_FILE)) manifest = load_manifest(os.path.join(tmp, C.MANIFEST_FILE))
except InvalidManifestError as e: except InvalidManifestError as e:
raise RepositoryCannotBeUpdatedError(str(e)) raise RepositoryCannotBeUpdatedError(f'[{self.repo}] {e}')
else: else:
hook_ids = frozenset(hook['id'] for hook in manifest) hook_ids = frozenset(hook['id'] for hook in manifest)
@ -91,11 +93,24 @@ def _check_hooks_still_exist_at_rev(
hooks_missing = hooks - info.hook_ids hooks_missing = hooks - info.hook_ids
if hooks_missing: if hooks_missing:
raise RepositoryCannotBeUpdatedError( raise RepositoryCannotBeUpdatedError(
f'Cannot update because the update target is missing these ' f'[{info.repo}] Cannot update because the update target is '
f'hooks:\n{", ".join(sorted(hooks_missing))}', f'missing these hooks: {", ".join(sorted(hooks_missing))}',
) )
def _update_one(
i: int,
repo: dict[str, Any],
*,
tags_only: bool,
freeze: bool,
) -> tuple[int, RevInfo, RevInfo]:
old = RevInfo.from_config(repo)
new = old.update(tags_only=tags_only, freeze=freeze)
_check_hooks_still_exist_at_rev(repo, new)
return i, old, new
REV_LINE_RE = re.compile(r'^(\s+)rev:(\s*)([\'"]?)([^\s#]+)(.*)(\r?\n)$') REV_LINE_RE = re.compile(r'^(\s+)rev:(\s*)([\'"]?)([^\s#]+)(.*)(\r?\n)$')
@ -147,45 +162,50 @@ def autoupdate(
tags_only: bool, tags_only: bool,
freeze: bool, freeze: bool,
repos: Sequence[str] = (), repos: Sequence[str] = (),
jobs: int = 1,
) -> int: ) -> int:
"""Auto-update the pre-commit config to the latest versions of repos.""" """Auto-update the pre-commit config to the latest versions of repos."""
migrate_config(config_file, quiet=True) migrate_config(config_file, quiet=True)
retv = 0
rev_infos: list[RevInfo | None] = []
changed = False changed = False
retv = 0
config = load_config(config_file) config_repos = [
for repo_config in config['repos']: repo for repo in load_config(config_file)['repos']
if repo_config['repo'] in {LOCAL, META}: if repo['repo'] not in {LOCAL, META}
continue ]
info = RevInfo.from_config(repo_config) rev_infos: list[RevInfo | None] = [None] * len(config_repos)
if repos and info.repo not in repos: jobs = jobs or xargs.cpu_count() # 0 => number of cpus
rev_infos.append(None) jobs = min(jobs, len(repos) or len(config_repos)) # max 1-per-thread
continue jobs = max(jobs, 1) # at least one thread
with concurrent.futures.ThreadPoolExecutor(jobs) as exe:
output.write(f'Updating {info.repo} ... ') futures = [
try: exe.submit(
new_info = info.update(tags_only=tags_only, freeze=freeze) _update_one,
_check_hooks_still_exist_at_rev(repo_config, new_info) i, repo, tags_only=tags_only, freeze=freeze,
except RepositoryCannotBeUpdatedError as error: )
output.write_line(error.args[0]) for i, repo in enumerate(config_repos)
rev_infos.append(None) if not repos or repo['repo'] in repos
retv = 1 ]
continue for future in concurrent.futures.as_completed(futures):
try:
if new_info.rev != info.rev: i, old, new = future.result()
changed = True except RepositoryCannotBeUpdatedError as e:
if new_info.frozen: output.write_line(str(e))
updated_to = f'{new_info.frozen} (frozen)' retv = 1
else: else:
updated_to = new_info.rev if new.rev != old.rev:
msg = f'updating {info.rev} -> {updated_to}.' changed = True
output.write_line(msg) if new.frozen:
rev_infos.append(new_info) new_s = f'{new.frozen} (frozen)'
else: else:
output.write_line('already up to date.') new_s = new.rev
rev_infos.append(None) msg = f'updating {old.rev} -> {new_s}'
rev_infos[i] = new
else:
msg = 'already up to date!'
output.write_line(f'[{old.repo}] {msg}')
if changed: if changed:
_write_new_config(config_file, rev_infos) _write_new_config(config_file, rev_infos)

View file

@ -1,7 +1,6 @@
from __future__ import annotations from __future__ import annotations
import contextlib import contextlib
import multiprocessing
import os import os
import random import random
import re import re
@ -15,9 +14,9 @@ from typing import Sequence
import pre_commit.constants as C import pre_commit.constants as C
from pre_commit import parse_shebang from pre_commit import parse_shebang
from pre_commit import xargs
from pre_commit.prefix import Prefix from pre_commit.prefix import Prefix
from pre_commit.util import cmd_output_b from pre_commit.util import cmd_output_b
from pre_commit.xargs import xargs
FIXED_RANDOM_SEED = 1542676187 FIXED_RANDOM_SEED = 1542676187
@ -140,10 +139,7 @@ def target_concurrency() -> int:
if 'TRAVIS' in os.environ: if 'TRAVIS' in os.environ:
return 2 return 2
else: else:
try: return xargs.cpu_count()
return multiprocessing.cpu_count()
except NotImplementedError:
return 1
def _shuffled(seq: Sequence[str]) -> list[str]: def _shuffled(seq: Sequence[str]) -> list[str]:
@ -171,7 +167,7 @@ def run_xargs(
# ordering. # ordering.
file_args = _shuffled(file_args) file_args = _shuffled(file_args)
jobs = target_concurrency() jobs = target_concurrency()
return xargs(cmd, file_args, target_concurrency=jobs, color=color) return xargs.xargs(cmd, file_args, target_concurrency=jobs, color=color)
def hook_cmd(entry: str, args: Sequence[str]) -> tuple[str, ...]: def hook_cmd(entry: str, args: Sequence[str]) -> tuple[str, ...]:

View file

@ -226,9 +226,13 @@ def main(argv: Sequence[str] | None = None) -> int:
help='Store "frozen" hashes in `rev` instead of tag names', help='Store "frozen" hashes in `rev` instead of tag names',
) )
autoupdate_parser.add_argument( autoupdate_parser.add_argument(
'--repo', dest='repos', action='append', metavar='REPO', '--repo', dest='repos', action='append', metavar='REPO', default=[],
help='Only update this repository -- may be specified multiple times.', help='Only update this repository -- may be specified multiple times.',
) )
autoupdate_parser.add_argument(
'-j', '--jobs', type=int, default=1,
help='Number of threads to use. (default %(default)s).',
)
_add_cmd('clean', help='Clean out pre-commit files.') _add_cmd('clean', help='Clean out pre-commit files.')
@ -372,6 +376,7 @@ def main(argv: Sequence[str] | None = None) -> int:
tags_only=not args.bleeding_edge, tags_only=not args.bleeding_edge,
freeze=args.freeze, freeze=args.freeze,
repos=args.repos, repos=args.repos,
jobs=args.jobs,
) )
elif args.command == 'clean': elif args.command == 'clean':
return clean(store) return clean(store)

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import concurrent.futures import concurrent.futures
import contextlib import contextlib
import math import math
import multiprocessing
import os import os
import subprocess import subprocess
import sys import sys
@ -22,6 +23,13 @@ TArg = TypeVar('TArg')
TRet = TypeVar('TRet') TRet = TypeVar('TRet')
def cpu_count() -> int:
try:
return multiprocessing.cpu_count()
except NotImplementedError:
return 1
def _environ_size(_env: MutableMapping[str, str] | None = None) -> int: def _environ_size(_env: MutableMapping[str, str] | None = None) -> int:
environ = _env if _env is not None else getattr(os, 'environb', os.environ) environ = _env if _env is not None else getattr(os, 'environb', os.environ)
size = 8 * len(environ) # number of pointers in `envp` size = 8 * len(environ) # number of pointers in `envp`