Allow parallelization of autoupdate

Signed-off-by: Bernát Gábor <bgabor8@bloomberg.net>
This commit is contained in:
Bernát Gábor 2023-04-11 08:52:51 -07:00
parent 5027592625
commit 0911240821
No known key found for this signature in database
GPG key ID: EEDA44E02406AF79
2 changed files with 71 additions and 33 deletions

View file

@ -3,9 +3,14 @@ from __future__ import annotations
import os.path import os.path
import re import re
import tempfile import tempfile
from concurrent.futures import as_completed
from concurrent.futures import ThreadPoolExecutor
from typing import Any from typing import Any
from typing import cast
from typing import List
from typing import NamedTuple from typing import NamedTuple
from typing import Sequence from typing import Sequence
from typing import Union
import pre_commit.constants as C import pre_commit.constants as C
from pre_commit import git from pre_commit import git
@ -149,32 +154,65 @@ def autoupdate(
tags_only: bool, tags_only: bool,
freeze: bool, freeze: bool,
repos: Sequence[str] = (), repos: Sequence[str] = (),
jobs: int = 1,
) -> int: ) -> int:
"""Auto-update the pre-commit config to the latest versions of repos.""" """Auto-update the pre-commit config to the latest versions of repos."""
migrate_config(config_file, quiet=True) migrate_config(config_file, quiet=True)
retv = 0 retv = 0
rev_infos: list[RevInfo | None] = []
changed = False changed = False
config = load_config(config_file) config = load_config(config_file)
for repo_config in config['repos']: rev_infos: list[RevInfo | None | object] = [None] * len(config['repos'])
with ThreadPoolExecutor(max_workers=jobs) as pool:
futures = {}
for at, repo_config in enumerate(config['repos']):
future = pool.submit(
_run_one, repo_config, store, tags_only, freeze, repos,
jobs != 1,
)
futures[future] = at
for future in as_completed(futures):
try:
change, new_info = future.result()
except RepositoryCannotBeUpdatedError:
retv = 1
else:
changed = changed or change
rev_infos[futures[future]] = new_info
if changed:
info = cast(
List[Union[RevInfo, None]],
[i for i in rev_infos if i is not object],
)
_write_new_config(config_file, info)
return retv
def _run_one(
repo_config: dict[str, str],
store: Store,
tags_only: bool,
freeze: bool,
repos: Sequence[str] = (),
parallel: bool = False,
) -> tuple[bool, RevInfo | None | object]:
if repo_config['repo'] in {LOCAL, META}: if repo_config['repo'] in {LOCAL, META}:
continue return False, object
info = RevInfo.from_config(repo_config) info = RevInfo.from_config(repo_config)
if repos and info.repo not in repos: if repos and info.repo not in repos:
rev_infos.append(None) return False, None
continue
output.write(f'Updating {info.repo} ... ') pref = f'Updating {info.repo} ... '
if not parallel:
output.write(pref)
new_info = info.update(tags_only=tags_only, freeze=freeze) new_info = info.update(tags_only=tags_only, freeze=freeze)
try: try:
_check_hooks_still_exist_at_rev(repo_config, new_info, store) _check_hooks_still_exist_at_rev(repo_config, new_info, store)
except RepositoryCannotBeUpdatedError as error: except RepositoryCannotBeUpdatedError as error:
output.write_line(error.args[0]) output.write_line(f'{pref if parallel else ""}{error.args[0]}')
rev_infos.append(None) raise
retv = 1
continue
if new_info.rev != info.rev: if new_info.rev != info.rev:
changed = True changed = True
@ -182,14 +220,9 @@ def autoupdate(
updated_to = f'{new_info.frozen} (frozen)' updated_to = f'{new_info.frozen} (frozen)'
else: else:
updated_to = new_info.rev updated_to = new_info.rev
msg = f'updating {info.rev} -> {updated_to}.' msg = f'{pref if parallel else ""}updating {info.rev} -> {updated_to}.'
output.write_line(msg) output.write_line(msg)
rev_infos.append(new_info)
else: else:
output.write_line('already up to date.') changed = False
rev_infos.append(None) output.write_line(f'{pref if parallel else ""}already up to date.')
return changed, new_info
if changed:
_write_new_config(config_file, rev_infos)
return retv

View file

@ -229,6 +229,10 @@ def main(argv: Sequence[str] | None = None) -> int:
'--repo', dest='repos', action='append', metavar='REPO', '--repo', dest='repos', action='append', metavar='REPO',
help='Only update this repository -- may be specified multiple times.', help='Only update this repository -- may be specified multiple times.',
) )
autoupdate_parser.add_argument(
'-j', type=int, dest='jobs', default=1, metavar='COUNT',
help='Parallelization level to use.',
)
_add_cmd('clean', help='Clean out pre-commit files.') _add_cmd('clean', help='Clean out pre-commit files.')
@ -372,6 +376,7 @@ def main(argv: Sequence[str] | None = None) -> int:
tags_only=not args.bleeding_edge, tags_only=not args.bleeding_edge,
freeze=args.freeze, freeze=args.freeze,
repos=args.repos, repos=args.repos,
jobs=args.jobs,
) )
elif args.command == 'clean': elif args.command == 'clean':
return clean(store) return clean(store)