From 69333fa2277deaf30be41f7b04a196b0b5a8b101 Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Mon, 26 Feb 2018 10:17:21 +0100 Subject: [PATCH 1/7] Add multiline mode to pygrep --- pre_commit/languages/pygrep.py | 15 ++++++++++++++- tests/languages/pygrep_test.py | 6 ++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index 878f57d0..34d77da1 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -26,6 +26,15 @@ def _process_filename_by_line(pattern, filename): output.write_line(line.rstrip(b'\r\n')) return retv +def _process_filename_at_once(pattern, filename): + retv = 0 + with open(filename, 'rb') as f: + match = pattern.search(f.read()) + if match: + retv = 1 + output.write('{}:'.format(filename)) + output.write_line(match.group()) + return retv def run_hook(prefix, hook, file_args): exe = (sys.executable, '-m', __name__) @@ -42,6 +51,7 @@ def main(argv=None): ), ) parser.add_argument('-i', '--ignore-case', action='store_true') + parser.add_argument('-z', '--null-data', action='store_true') parser.add_argument('pattern', help='python regex pattern.') parser.add_argument('filenames', nargs='*') args = parser.parse_args(argv) @@ -51,7 +61,10 @@ def main(argv=None): retv = 0 for filename in args.filenames: - retv |= _process_filename_by_line(pattern, filename) + if args.null_data: + retv |= _process_filename_at_once(pattern, filename) + else: + retv |= _process_filename_by_line(pattern, filename) return retv diff --git a/tests/languages/pygrep_test.py b/tests/languages/pygrep_test.py index 048a5908..ece454f9 100644 --- a/tests/languages/pygrep_test.py +++ b/tests/languages/pygrep_test.py @@ -38,3 +38,9 @@ def test_ignore_case(some_files, cap_out): out = cap_out.get() assert ret == 1 assert out == 'f2:1:[INFO] hi\n' + +def test_null_data(some_files, cap_out): + ret = pygrep.main(('--null-data', r'foo.*bar', 'f1', 'f2', 'f3')) + out = cap_out.get() + assert ret == 1 + assert out == 'f1:foobar\n' From 2d57068f498807fdf5c8a36bdadbe59beb9d2a62 Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Mon, 26 Feb 2018 13:29:40 +0100 Subject: [PATCH 2/7] Remove newlines from file contents --- pre_commit/languages/pygrep.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index 34d77da1..0447837a 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -29,7 +29,7 @@ def _process_filename_by_line(pattern, filename): def _process_filename_at_once(pattern, filename): retv = 0 with open(filename, 'rb') as f: - match = pattern.search(f.read()) + match = pattern.search(f.read().decode('utf-8').replace('\n','')) if match: retv = 1 output.write('{}:'.format(filename)) From 3793bc32c039550014bf3646a6e78e11ded35c89 Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Mon, 26 Feb 2018 15:46:33 +0100 Subject: [PATCH 3/7] Fix linters --- pre_commit/languages/pygrep.py | 4 +++- tests/languages/pygrep_test.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index 0447837a..bc01208a 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -26,16 +26,18 @@ def _process_filename_by_line(pattern, filename): output.write_line(line.rstrip(b'\r\n')) return retv + def _process_filename_at_once(pattern, filename): retv = 0 with open(filename, 'rb') as f: - match = pattern.search(f.read().decode('utf-8').replace('\n','')) + match = pattern.search(f.read().decode('utf-8').replace('\n', '')) if match: retv = 1 output.write('{}:'.format(filename)) output.write_line(match.group()) return retv + def run_hook(prefix, hook, file_args): exe = (sys.executable, '-m', __name__) exe += tuple(hook['args']) + (hook['entry'],) diff --git a/tests/languages/pygrep_test.py b/tests/languages/pygrep_test.py index ece454f9..33250e4a 100644 --- a/tests/languages/pygrep_test.py +++ b/tests/languages/pygrep_test.py @@ -39,6 +39,7 @@ def test_ignore_case(some_files, cap_out): assert ret == 1 assert out == 'f2:1:[INFO] hi\n' + def test_null_data(some_files, cap_out): ret = pygrep.main(('--null-data', r'foo.*bar', 'f1', 'f2', 'f3')) out = cap_out.get() From 25c06e65259f858e099f995f2f302bf1e8ff9efb Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Wed, 7 Mar 2018 09:24:56 +0100 Subject: [PATCH 4/7] Remove encoding dependence --- pre_commit/languages/pygrep.py | 7 +++++-- tests/languages/pygrep_test.py | 18 ++++++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index bc01208a..b1af2f20 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -30,10 +30,10 @@ def _process_filename_by_line(pattern, filename): def _process_filename_at_once(pattern, filename): retv = 0 with open(filename, 'rb') as f: - match = pattern.search(f.read().decode('utf-8').replace('\n', '')) + match = pattern.search(f.read()) if match: retv = 1 - output.write('{}:'.format(filename)) + output.write('{}:{}-{}:'.format(filename, match.start(), match.end())) output.write_line(match.group()) return retv @@ -59,6 +59,9 @@ def main(argv=None): args = parser.parse_args(argv) flags = re.IGNORECASE if args.ignore_case else 0 + if args.null_data: + flags = flags | re.MULTILINE | re.DOTALL + pattern = re.compile(args.pattern.encode(), flags) retv = 0 diff --git a/tests/languages/pygrep_test.py b/tests/languages/pygrep_test.py index 33250e4a..e2063a95 100644 --- a/tests/languages/pygrep_test.py +++ b/tests/languages/pygrep_test.py @@ -41,7 +41,21 @@ def test_ignore_case(some_files, cap_out): def test_null_data(some_files, cap_out): - ret = pygrep.main(('--null-data', r'foo.*bar', 'f1', 'f2', 'f3')) + ret = pygrep.main(('--null-data', r'foo\nbar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:foobar\n' + assert out == 'f1:0-7:foo\nbar\n' + + +def test_null_data_dotall_flag_is_enabled(some_files, cap_out): + ret = pygrep.main(('--null-data', r'o.*bar', 'f1', 'f2', 'f3')) + out = cap_out.get() + assert ret == 1 + assert out == 'f1:1-7:oo\nbar\n' + + +def test_null_data_multiline_flag_is_enabled(some_files, cap_out): + ret = pygrep.main(('--null-data', r'foo$.*bar', 'f1', 'f2', 'f3')) + out = cap_out.get() + assert ret == 1 + assert out == 'f1:0-7:foo\nbar\n' From 19075371fa1770fc8f29e407111a275046c617ab Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Wed, 7 Mar 2018 09:35:08 +0100 Subject: [PATCH 5/7] Pre-commit compliance --- pre_commit/languages/pygrep.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index b1af2f20..36755dd7 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -33,7 +33,9 @@ def _process_filename_at_once(pattern, filename): match = pattern.search(f.read()) if match: retv = 1 - output.write('{}:{}-{}:'.format(filename, match.start(), match.end())) + output.write( + '{}:{}-{}:'.format(filename, match.start(), match.end()), + ) output.write_line(match.group()) return retv From 55c74c10d95af5719f771cd1db894a57fa801ea2 Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Thu, 8 Mar 2018 09:42:32 +0100 Subject: [PATCH 6/7] Rename option to and improve output --- pre_commit/languages/pygrep.py | 22 +++++++++++++++------- tests/languages/pygrep_test.py | 25 ++++++++++++++++--------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index 36755dd7..a1d496b5 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -30,13 +30,21 @@ def _process_filename_by_line(pattern, filename): def _process_filename_at_once(pattern, filename): retv = 0 with open(filename, 'rb') as f: - match = pattern.search(f.read()) + contents = f.read() + match = pattern.search(contents) if match: retv = 1 - output.write( - '{}:{}-{}:'.format(filename, match.start(), match.end()), + line_no = len( + re.compile('\n'.encode()).findall(contents, 0, match.start()), ) - output.write_line(match.group()) + output.write( + '{}:{}:'.format(filename, line_no + 1), + ) + + matched_lines = match.group().split('\n') + matched_lines[0] = contents.split('\n')[line_no] + + output.write_line('\n'.join(matched_lines)) return retv @@ -55,20 +63,20 @@ def main(argv=None): ), ) parser.add_argument('-i', '--ignore-case', action='store_true') - parser.add_argument('-z', '--null-data', action='store_true') + parser.add_argument('--multiline', action='store_true') parser.add_argument('pattern', help='python regex pattern.') parser.add_argument('filenames', nargs='*') args = parser.parse_args(argv) flags = re.IGNORECASE if args.ignore_case else 0 - if args.null_data: + if args.multiline: flags = flags | re.MULTILINE | re.DOTALL pattern = re.compile(args.pattern.encode(), flags) retv = 0 for filename in args.filenames: - if args.null_data: + if args.multiline: retv |= _process_filename_at_once(pattern, filename) else: retv |= _process_filename_by_line(pattern, filename) diff --git a/tests/languages/pygrep_test.py b/tests/languages/pygrep_test.py index e2063a95..d91363e2 100644 --- a/tests/languages/pygrep_test.py +++ b/tests/languages/pygrep_test.py @@ -40,22 +40,29 @@ def test_ignore_case(some_files, cap_out): assert out == 'f2:1:[INFO] hi\n' -def test_null_data(some_files, cap_out): - ret = pygrep.main(('--null-data', r'foo\nbar', 'f1', 'f2', 'f3')) +def test_multiline(some_files, cap_out): + ret = pygrep.main(('--multiline', r'foo\nbar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:0-7:foo\nbar\n' + assert out == 'f1:1:foo\nbar\n' -def test_null_data_dotall_flag_is_enabled(some_files, cap_out): - ret = pygrep.main(('--null-data', r'o.*bar', 'f1', 'f2', 'f3')) +def test_multiline_line_number(some_files, cap_out): + ret = pygrep.main(('--multiline', r'ar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:1-7:oo\nbar\n' + assert out == 'f1:2:bar\n' -def test_null_data_multiline_flag_is_enabled(some_files, cap_out): - ret = pygrep.main(('--null-data', r'foo$.*bar', 'f1', 'f2', 'f3')) +def test_multiline_dotall_flag_is_enabled(some_files, cap_out): + ret = pygrep.main(('--multiline', r'o.*bar', 'f1', 'f2', 'f3')) out = cap_out.get() assert ret == 1 - assert out == 'f1:0-7:foo\nbar\n' + assert out == 'f1:1:foo\nbar\n' + + +def test_multiline_multiline_flag_is_enabled(some_files, cap_out): + ret = pygrep.main(('--multiline', r'foo$.*bar', 'f1', 'f2', 'f3')) + out = cap_out.get() + assert ret == 1 + assert out == 'f1:1:foo\nbar\n' From 55ef3ce96058b723c5f557aa16255b99e26f7ce9 Mon Sep 17 00:00:00 2001 From: Thierry Deo Date: Fri, 9 Mar 2018 09:22:34 +0100 Subject: [PATCH 7/7] Address review comments --- pre_commit/languages/pygrep.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/pre_commit/languages/pygrep.py b/pre_commit/languages/pygrep.py index a1d496b5..7eead9e1 100644 --- a/pre_commit/languages/pygrep.py +++ b/pre_commit/languages/pygrep.py @@ -34,17 +34,13 @@ def _process_filename_at_once(pattern, filename): match = pattern.search(contents) if match: retv = 1 - line_no = len( - re.compile('\n'.encode()).findall(contents, 0, match.start()), - ) - output.write( - '{}:{}:'.format(filename, line_no + 1), - ) + line_no = contents[:match.start()].count(b'\n') + output.write('{}:{}:'.format(filename, line_no + 1)) - matched_lines = match.group().split('\n') - matched_lines[0] = contents.split('\n')[line_no] + matched_lines = match.group().split(b'\n') + matched_lines[0] = contents.split(b'\n')[line_no] - output.write_line('\n'.join(matched_lines)) + output.write_line(b'\n'.join(matched_lines)) return retv @@ -70,7 +66,7 @@ def main(argv=None): flags = re.IGNORECASE if args.ignore_case else 0 if args.multiline: - flags = flags | re.MULTILINE | re.DOTALL + flags |= re.MULTILINE | re.DOTALL pattern = re.compile(args.pattern.encode(), flags)