mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-04-15 01:51:46 +04:00
add pass_filenames_via_stdin for large changesets
pre-commit currently passes selected filenames to hooks via argv.
For large changesets (or --all-files), argv length limits are hit and
filenames are partitioned, causing multiple hook invocations.
This means there is currently no built-in way to pass filenames to an
underlying hook in one shot without chunking / re-running. The only practical
workaround is to set pass_filenames: false and run custom git operations in
hook code to reconstruct the file set, which is expensive and duplicates
pre-commit's own file-selection logic.
This change adds a hook option:
pass_filenames_via_stdin: true
When enabled, pre-commit sends filenames as NUL-delimited bytes on stdin and
runs the hook in a single invocation (no argv chunking).
Why NUL-delimited stdin:
- safe for filenames containing spaces/newlines
- matches established -0 conventions in unix tooling
Usage for hook authors:
- shell:
while IFS= read -r -d '' filename; do
...
done
- python:
data = sys.stdin.buffer.read()
filenames = [os.fsdecode(p) for p in data.split(b'\0') if p]
Behavior notes:
- default remains argv-based passing
- pass_filenames: false still disables filename passing entirely
Implementation includes schema/runtime wiring, shared NUL encode/decode
helpers, and tests covering defaulting and runtime behavior.
This commit is contained in:
parent
8416413a0e
commit
635912514d
18 changed files with 147 additions and 2 deletions
|
|
@ -22,6 +22,7 @@ from pre_commit.util import cmd_output_b
|
|||
FIXED_RANDOM_SEED = 1542676187
|
||||
|
||||
SHIMS_RE = re.compile(r'[/\\]shims[/\\]')
|
||||
NUL = b'\0'
|
||||
|
||||
|
||||
class Language(Protocol):
|
||||
|
|
@ -56,6 +57,7 @@ class Language(Protocol):
|
|||
is_local: bool,
|
||||
require_serial: bool,
|
||||
color: bool,
|
||||
pass_filenames_via_stdin: bool = False,
|
||||
) -> tuple[int, bytes]:
|
||||
...
|
||||
|
||||
|
|
@ -153,13 +155,33 @@ def _shuffled(seq: Sequence[str]) -> list[str]:
|
|||
return seq
|
||||
|
||||
|
||||
def to_nul_delimited_filenames(file_args: Sequence[str]) -> bytes:
|
||||
ret = NUL.join(os.fsencode(filename) for filename in file_args)
|
||||
return ret + NUL if ret else ret
|
||||
|
||||
|
||||
def from_nul_delimited_filenames(filenames: bytes) -> list[str]:
|
||||
return [os.fsdecode(part) for part in filenames.split(NUL) if part]
|
||||
|
||||
|
||||
def run_xargs(
|
||||
cmd: tuple[str, ...],
|
||||
file_args: Sequence[str],
|
||||
*,
|
||||
require_serial: bool,
|
||||
color: bool,
|
||||
pass_filenames_via_stdin: bool = False,
|
||||
) -> tuple[int, bytes]:
|
||||
if pass_filenames_via_stdin:
|
||||
stdin = to_nul_delimited_filenames(file_args)
|
||||
return xargs.xargs(
|
||||
cmd,
|
||||
(),
|
||||
target_concurrency=1,
|
||||
color=color,
|
||||
input=stdin,
|
||||
)
|
||||
|
||||
if require_serial:
|
||||
jobs = 1
|
||||
else:
|
||||
|
|
@ -187,10 +209,12 @@ def basic_run_hook(
|
|||
is_local: bool,
|
||||
require_serial: bool,
|
||||
color: bool,
|
||||
pass_filenames_via_stdin: bool = False,
|
||||
) -> tuple[int, bytes]:
|
||||
return run_xargs(
|
||||
hook_cmd(entry, args),
|
||||
file_args,
|
||||
require_serial=require_serial,
|
||||
color=color,
|
||||
pass_filenames_via_stdin=pass_filenames_via_stdin,
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue