diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index 40adba0f..05061f4f 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -17,13 +17,28 @@ healthy = helpers.basic_healthy install_environment = helpers.no_install +def _column_marker(line: bytes, byte_offset: Optional[int]) -> str: + try: + char_offset = len(line[0:byte_offset].decode()) + except UnicodeEncodeError: + retv = '' + else: + retv = f'{char_offset}:' + + return retv + + def _process_filename_by_line(pattern: Pattern[bytes], filename: str) -> int: retv = 0 with open(filename, 'rb') as f: for line_no, line in enumerate(f, start=1): - if pattern.search(line): + match = pattern.search(line) + if match: retv = 1 - output.write(f'{filename}:{line_no}:') + output.write( + f'{filename}:{line_no}:' + + _column_marker(line, match.start()), + ) output.write_line_b(line.rstrip(b'\r\n')) return retv @@ -36,7 +51,11 @@ def _process_filename_at_once(pattern: Pattern[bytes], filename: str) -> int: if match: retv = 1 line_no = contents[:match.start()].count(b'\n') - output.write(f'{filename}:{line_no + 1}:') + line_start = contents.rfind(b'\n', 0, match.start()) + 1 + output.write( + f'{filename}:{line_no + 1}:' + + _column_marker(contents[line_start:match.start()], None), + ) matched_lines = match[0].split(b'\n') matched_lines[0] = contents.split(b'\n')[line_no] diff --git a/tests/languages/pygrep_test.py b/tests/languages/pygrep_test.py index cabea22e..06c24b98 100644 --- a/tests/languages/pygrep_test.py +++ b/tests/languages/pygrep_test.py @@ -17,10 +17,10 @@ def some_files(tmpdir): ('pattern', 'expected_retcode', 'expected_out'), ( ('baz', 0, ''), - ('foo', 1, 'f1:1:foo\n'), - ('bar', 1, 'f1:2:bar\n'), - (r'(?i)\[info\]', 1, 'f2:1:[INFO] hi\n'), - ("h'q", 1, "f3:1:with'quotes\n"), + ('foo', 1, 'f1:1:0:foo\n'), + ('bar', 1, 'f1:2:0:bar\n'), + (r'(?i)\[info\]', 1, 'f2:1:0:[INFO] hi\n'), + ("h'q", 1, "f3:1:3:with'quotes\n"), ), ) def test_main(some_files, cap_out, pattern, expected_retcode, expected_out): @@ -34,32 +34,32 @@ def test_ignore_case(some_files, cap_out): ret = pygrep.main(('--ignore-case', 'info', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f2:1:[INFO] hi\n' + assert out == 'f2:1:1:[INFO] hi\n' def test_multiline(some_files, cap_out): ret = pygrep.main(('--multiline', r'foo\nbar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:1:foo\nbar\n' + assert out == 'f1:1:0:foo\nbar\n' def test_multiline_line_number(some_files, cap_out): ret = pygrep.main(('--multiline', r'ar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:2:bar\n' + assert out == 'f1:2:1:bar\n' def test_multiline_dotall_flag_is_enabled(some_files, cap_out): ret = pygrep.main(('--multiline', r'o.*bar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:1:foo\nbar\n' + assert out == 'f1:1:1:foo\nbar\n' def test_multiline_multiline_flag_is_enabled(some_files, cap_out): ret = pygrep.main(('--multiline', r'foo$.*bar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:1:foo\nbar\n' + assert out == 'f1:1:0:foo\nbar\n'