mirror of
https://github.com/pre-commit/pre-commit.git
synced 2026-04-14 17:41:45 +04:00
pre-commit currently passes selected filenames to hooks via argv.
For large changesets (or --all-files), argv length limits are hit and
filenames are partitioned, causing multiple hook invocations.
This means there is currently no built-in way to pass filenames to an
underlying hook in one shot without chunking / re-running. The only practical
workaround is to set pass_filenames: false and run custom git operations in
hook code to reconstruct the file set, which is expensive and duplicates
pre-commit's own file-selection logic.
This change adds a hook option:
pass_filenames_via_stdin: true
When enabled, pre-commit sends filenames as NUL-delimited bytes on stdin and
runs the hook in a single invocation (no argv chunking).
Why NUL-delimited stdin:
- safe for filenames containing spaces/newlines
- matches established -0 conventions in unix tooling
Usage for hook authors:
- shell:
while IFS= read -r -d '' filename; do
...
done
- python:
data = sys.stdin.buffer.read()
filenames = [os.fsdecode(p) for p in data.split(b'\0') if p]
Behavior notes:
- default remains argv-based passing
- pass_filenames: false still disables filename passing entirely
Implementation includes schema/runtime wiring, shared NUL encode/decode
helpers, and tests covering defaulting and runtime behavior.
142 lines
4.1 KiB
Python
142 lines
4.1 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import re
|
|
import sys
|
|
from collections.abc import Sequence
|
|
from re import Pattern
|
|
from typing import NamedTuple
|
|
|
|
from pre_commit import lang_base
|
|
from pre_commit import output
|
|
from pre_commit.prefix import Prefix
|
|
from pre_commit.xargs import xargs
|
|
|
|
ENVIRONMENT_DIR = None
|
|
get_default_version = lang_base.basic_get_default_version
|
|
health_check = lang_base.basic_health_check
|
|
install_environment = lang_base.no_install
|
|
in_env = lang_base.no_env
|
|
|
|
|
|
def _process_filename_by_line(pattern: Pattern[bytes], filename: str) -> int:
|
|
retv = 0
|
|
with open(filename, 'rb') as f:
|
|
for line_no, line in enumerate(f, start=1):
|
|
if pattern.search(line):
|
|
retv = 1
|
|
output.write(f'{filename}:{line_no}:')
|
|
output.write_line_b(line.rstrip(b'\r\n'))
|
|
return retv
|
|
|
|
|
|
def _process_filename_at_once(pattern: Pattern[bytes], filename: str) -> int:
|
|
retv = 0
|
|
with open(filename, 'rb') as f:
|
|
contents = f.read()
|
|
match = pattern.search(contents)
|
|
if match:
|
|
retv = 1
|
|
line_no = contents[:match.start()].count(b'\n')
|
|
output.write(f'{filename}:{line_no + 1}:')
|
|
|
|
matched_lines = match[0].split(b'\n')
|
|
matched_lines[0] = contents.split(b'\n')[line_no]
|
|
|
|
output.write_line_b(b'\n'.join(matched_lines))
|
|
return retv
|
|
|
|
|
|
def _process_filename_by_line_negated(
|
|
pattern: Pattern[bytes],
|
|
filename: str,
|
|
) -> int:
|
|
with open(filename, 'rb') as f:
|
|
for line in f:
|
|
if pattern.search(line):
|
|
return 0
|
|
else:
|
|
output.write_line(filename)
|
|
return 1
|
|
|
|
|
|
def _process_filename_at_once_negated(
|
|
pattern: Pattern[bytes],
|
|
filename: str,
|
|
) -> int:
|
|
with open(filename, 'rb') as f:
|
|
contents = f.read()
|
|
match = pattern.search(contents)
|
|
if match:
|
|
return 0
|
|
else:
|
|
output.write_line(filename)
|
|
return 1
|
|
|
|
|
|
class Choice(NamedTuple):
|
|
multiline: bool
|
|
negate: bool
|
|
|
|
|
|
FNS = {
|
|
Choice(multiline=True, negate=True): _process_filename_at_once_negated,
|
|
Choice(multiline=True, negate=False): _process_filename_at_once,
|
|
Choice(multiline=False, negate=True): _process_filename_by_line_negated,
|
|
Choice(multiline=False, negate=False): _process_filename_by_line,
|
|
}
|
|
|
|
|
|
def run_hook(
|
|
prefix: Prefix,
|
|
entry: str,
|
|
args: Sequence[str],
|
|
file_args: Sequence[str],
|
|
*,
|
|
is_local: bool,
|
|
require_serial: bool,
|
|
color: bool,
|
|
pass_filenames_via_stdin: bool = False,
|
|
) -> tuple[int, bytes]:
|
|
cmd = (sys.executable, '-m', __name__, *args, entry)
|
|
if pass_filenames_via_stdin:
|
|
stdin = lang_base.to_nul_delimited_filenames(file_args)
|
|
return xargs(cmd, (), color=color, input=stdin)
|
|
return xargs(cmd, file_args, color=color)
|
|
|
|
|
|
def main(argv: Sequence[str] | None = None) -> int:
|
|
parser = argparse.ArgumentParser(
|
|
description=(
|
|
'grep-like finder using python regexes. Unlike grep, this tool '
|
|
'returns nonzero when it finds a match and zero otherwise. The '
|
|
'idea here being that matches are "problems".'
|
|
),
|
|
)
|
|
parser.add_argument('-i', '--ignore-case', action='store_true')
|
|
parser.add_argument('--multiline', action='store_true')
|
|
parser.add_argument('--negate', action='store_true')
|
|
parser.add_argument('pattern', help='python regex pattern.')
|
|
parser.add_argument('filenames', nargs='*')
|
|
args = parser.parse_args(argv)
|
|
|
|
if not args.filenames:
|
|
stdin = sys.stdin.buffer.read()
|
|
if stdin:
|
|
args.filenames = lang_base.from_nul_delimited_filenames(stdin)
|
|
|
|
flags = re.IGNORECASE if args.ignore_case else 0
|
|
if args.multiline:
|
|
flags |= re.MULTILINE | re.DOTALL
|
|
|
|
pattern = re.compile(args.pattern.encode(), flags)
|
|
|
|
retv = 0
|
|
process_fn = FNS[Choice(multiline=args.multiline, negate=args.negate)]
|
|
for filename in args.filenames:
|
|
retv |= process_fn(pattern, filename)
|
|
return retv
|
|
|
|
|
|
if __name__ == '__main__':
|
|
raise SystemExit(main())
|