mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-04-14 17:41:45 +04:00
pre-commit currently passes selected filenames to hooks via argv.
For large changesets (or --all-files), argv length limits are hit and
filenames are partitioned, causing multiple hook invocations.
This means there is currently no built-in way to pass filenames to an
underlying hook in one shot without chunking / re-running. The only practical
workaround is to set pass_filenames: false and run custom git operations in
hook code to reconstruct the file set, which is expensive and duplicates
pre-commit's own file-selection logic.
This change adds a hook option:
pass_filenames_via_stdin: true
When enabled, pre-commit sends filenames as NUL-delimited bytes on stdin and
runs the hook in a single invocation (no argv chunking).
Why NUL-delimited stdin:
- safe for filenames containing spaces/newlines
- matches established -0 conventions in unix tooling
Usage for hook authors:
- shell:
while IFS= read -r -d '' filename; do
...
done
- python:
data = sys.stdin.buffer.read()
filenames = [os.fsdecode(p) for p in data.split(b'\0') if p]
Behavior notes:
- default remains argv-based passing
- pass_filenames: false still disables filename passing entirely
Implementation includes schema/runtime wiring, shared NUL encode/decode
helpers, and tests covering defaulting and runtime behavior.
28 lines
733 B
Python
28 lines
733 B
Python
from __future__ import annotations
|
|
|
|
from collections.abc import Sequence
|
|
|
|
from pre_commit import lang_base
|
|
from pre_commit.prefix import Prefix
|
|
|
|
ENVIRONMENT_DIR = None
|
|
get_default_version = lang_base.basic_get_default_version
|
|
health_check = lang_base.basic_health_check
|
|
install_environment = lang_base.no_install
|
|
in_env = lang_base.no_env
|
|
|
|
|
|
def run_hook(
|
|
prefix: Prefix,
|
|
entry: str,
|
|
args: Sequence[str],
|
|
file_args: Sequence[str],
|
|
*,
|
|
is_local: bool,
|
|
require_serial: bool,
|
|
color: bool,
|
|
pass_filenames_via_stdin: bool = False,
|
|
) -> tuple[int, bytes]:
|
|
out = f'{entry}\n\n'.encode()
|
|
out += b'\n'.join(f.encode() for f in file_args) + b'\n'
|
|
return 1, out
|