add pass_filenames_via_stdin for large changesets

pre-commit currently passes selected filenames to hooks via argv.
For large changesets (or --all-files), argv length limits are hit and
filenames are partitioned, causing multiple hook invocations.

This means there is currently no built-in way to pass filenames to an
underlying hook in one shot without chunking / re-running. The only practical
workaround is to set pass_filenames: false and run custom git operations in
hook code to reconstruct the file set, which is expensive and duplicates
pre-commit's own file-selection logic.

This change adds a hook option:

    pass_filenames_via_stdin: true

When enabled, pre-commit sends filenames as NUL-delimited bytes on stdin and
runs the hook in a single invocation (no argv chunking).

Why NUL-delimited stdin:
- safe for filenames containing spaces/newlines
- matches established -0 conventions in unix tooling

Usage for hook authors:
- shell:

    while IFS= read -r -d '' filename; do
        ...
    done

- python:

    data = sys.stdin.buffer.read()
    filenames = [os.fsdecode(p) for p in data.split(b'\0') if p]

Behavior notes:
- default remains argv-based passing
- pass_filenames: false still disables filename passing entirely

Implementation includes schema/runtime wiring, shared NUL encode/decode
helpers, and tests covering defaulting and runtime behavior.
This commit is contained in:
Sharmila Jesupaul 2026-02-18 18:01:55 -08:00
parent 8416413a0e
commit 635912514d
18 changed files with 147 additions and 2 deletions

View file

@ -165,6 +165,7 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]: # pragma: win32 no cover
# Rebuild the docker image in case it has gone missing, as many people do
# automated cleanup of docker images.
@ -178,4 +179,5 @@ def run_hook(
file_args,
require_serial=require_serial,
color=color,
pass_filenames_via_stdin=pass_filenames_via_stdin,
)

View file

@ -22,6 +22,7 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]: # pragma: win32 no cover
cmd = docker_cmd(color=color) + lang_base.hook_cmd(entry, args)
return lang_base.run_xargs(
@ -29,4 +30,5 @@ def run_hook(
file_args,
require_serial=require_serial,
color=color,
pass_filenames_via_stdin=pass_filenames_via_stdin,
)

View file

@ -21,6 +21,7 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]:
out = f'{entry}\n\n'.encode()
out += b'\n'.join(f.encode() for f in file_args) + b'\n'

View file

@ -27,6 +27,7 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]:
# `entry` is a (hook-repo relative) file followed by (optional) args, e.g.
# `bin/id.jl` or `bin/hook.jl --arg1 --arg2` so we
@ -43,6 +44,7 @@ def run_hook(
file_args,
require_serial=require_serial,
color=color,
pass_filenames_via_stdin=pass_filenames_via_stdin,
)

View file

@ -96,8 +96,12 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]:
cmd = (sys.executable, '-m', __name__, *args, entry)
if pass_filenames_via_stdin:
stdin = lang_base.to_nul_delimited_filenames(file_args)
return xargs(cmd, (), color=color, input=stdin)
return xargs(cmd, file_args, color=color)
@ -116,6 +120,11 @@ def main(argv: Sequence[str] | None = None) -> int:
parser.add_argument('filenames', nargs='*')
args = parser.parse_args(argv)
if not args.filenames:
stdin = sys.stdin.buffer.read()
if stdin:
args.filenames = lang_base.from_nul_delimited_filenames(stdin)
flags = re.IGNORECASE if args.ignore_case else 0
if args.multiline:
flags |= re.MULTILINE | re.DOTALL

View file

@ -268,6 +268,7 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]:
cmd = _cmd_from_hook(prefix, entry, args, is_local=is_local)
return lang_base.run_xargs(
@ -275,4 +276,5 @@ def run_hook(
file_args,
require_serial=require_serial,
color=color,
pass_filenames_via_stdin=pass_filenames_via_stdin,
)

View file

@ -21,6 +21,7 @@ def run_hook(
is_local: bool,
require_serial: bool,
color: bool,
pass_filenames_via_stdin: bool = False,
) -> tuple[int, bytes]:
cmd = lang_base.hook_cmd(entry, args)
cmd = (prefix.path(cmd[0]), *cmd[1:])
@ -29,4 +30,5 @@ def run_hook(
file_args,
require_serial=require_serial,
color=color,
pass_filenames_via_stdin=pass_filenames_via_stdin,
)