Include column number in pygrep output

2026-04-16 02:21:46 +04:00 · 2020-02-09 12:19:51 +02:00 · 2020-02-09 12:19:51 +02:00 · 83fcb22f3e
commit 83fcb22f3e
parent 524bdaeb33
2 changed files with 31 additions and 12 deletions
--- a/pre_commit/languages/pygrep.py
+++ b/pre_commit/languages/pygrep.py
@ -17,13 +17,28 @@ healthy = helpers.basic_healthy
 install_environment = helpers.no_install


+def _column_marker(line: bytes, byte_offset: Optional[int]) -> str:
+    try:
+        char_offset = len(line[0:byte_offset].decode())
+    except UnicodeEncodeError:
+        retv = ''
+    else:
+        retv = f'{char_offset}:'
+
+    return retv
+
+
 def _process_filename_by_line(pattern: Pattern[bytes], filename: str) -> int:
    retv = 0
    with open(filename, 'rb') as f:
        for line_no, line in enumerate(f, start=1):
-            if pattern.search(line):
+            match = pattern.search(line)
+            if match:
                retv = 1
-                output.write(f'{filename}:{line_no}:')
+                output.write(
+                    f'{filename}:{line_no}:' +
+                    _column_marker(line, match.start()),
+                )
                output.write_line_b(line.rstrip(b'\r\n'))
    return retv

@ -36,7 +51,11 @@ def _process_filename_at_once(pattern: Pattern[bytes], filename: str) -> int:
        if match:
            retv = 1
            line_no = contents[:match.start()].count(b'\n')
-            output.write(f'{filename}:{line_no + 1}:')
+            line_start = contents.rfind(b'\n', 0, match.start()) + 1
+            output.write(
+                f'{filename}:{line_no + 1}:' +
+                _column_marker(contents[line_start:match.start()], None),
+            )

            matched_lines = match[0].split(b'\n')
            matched_lines[0] = contents.split(b'\n')[line_no]
--- a/tests/languages/pygrep_test.py
+++ b/tests/languages/pygrep_test.py
@ -17,10 +17,10 @@ def some_files(tmpdir):
    ('pattern', 'expected_retcode', 'expected_out'),
    (
        ('baz', 0, ''),
-        ('foo', 1, 'f1:1:foo\n'),
-        ('bar', 1, 'f1:2:bar\n'),
-        (r'(?i)\[info\]', 1, 'f2:1:[INFO] hi\n'),
-        ("h'q", 1, "f3:1:with'quotes\n"),
+        ('foo', 1, 'f1:1:0:foo\n'),
+        ('bar', 1, 'f1:2:0:bar\n'),
+        (r'(?i)\[info\]', 1, 'f2:1:0:[INFO] hi\n'),
+        ("h'q", 1, "f3:1:3:with'quotes\n"),
    ),
 )
 def test_main(some_files, cap_out, pattern, expected_retcode, expected_out):
@ -34,32 +34,32 @@ def test_ignore_case(some_files, cap_out):
    ret = pygrep.main(('--ignore-case', 'info', 'f1', 'f2', 'f3'))
    out = cap_out.get()
    assert ret == 1
-    assert out == 'f2:1:[INFO] hi\n'
+    assert out == 'f2:1:1:[INFO] hi\n'


 def test_multiline(some_files, cap_out):
    ret = pygrep.main(('--multiline', r'foo\nbar', 'f1', 'f2', 'f3'))
    out = cap_out.get()
    assert ret == 1
-    assert out == 'f1:1:foo\nbar\n'
+    assert out == 'f1:1:0:foo\nbar\n'


 def test_multiline_line_number(some_files, cap_out):
    ret = pygrep.main(('--multiline', r'ar', 'f1', 'f2', 'f3'))
    out = cap_out.get()
    assert ret == 1
-    assert out == 'f1:2:bar\n'
+    assert out == 'f1:2:1:bar\n'


 def test_multiline_dotall_flag_is_enabled(some_files, cap_out):
    ret = pygrep.main(('--multiline', r'o.*bar', 'f1', 'f2', 'f3'))
    out = cap_out.get()
    assert ret == 1
-    assert out == 'f1:1:foo\nbar\n'
+    assert out == 'f1:1:1:foo\nbar\n'


 def test_multiline_multiline_flag_is_enabled(some_files, cap_out):
    ret = pygrep.main(('--multiline', r'foo$.*bar', 'f1', 'f2', 'f3'))
    out = cap_out.get()
    assert ret == 1
-    assert out == 'f1:1:foo\nbar\n'
+    assert out == 'f1:1:0:foo\nbar\n'