From 54b2d982f32ec724c5d9a96c9116f09b796b23a9 Mon Sep 17 00:00:00 2001 From: Emilio Graff <1@emil.io> Date: Fri, 28 Jul 2023 12:36:33 -0700 Subject: [PATCH] Fix for cases where commands output UTF8 in Windows. --- pre_commit/util.py | 9 +++++++++ tests/util_test.py | 10 ++++++++++ 2 files changed, 19 insertions(+) diff --git a/pre_commit/util.py b/pre_commit/util.py index 4f8e8357..8fd38136 100644 --- a/pre_commit/util.py +++ b/pre_commit/util.py @@ -93,6 +93,15 @@ def cmd_output_b( ) -> tuple[int, bytes, bytes | None]: _setdefault_kwargs(kwargs) + if sys.platform == 'win32': + # In windows, pipes use CP1252 by default, and you'll get an + # exception if the command being run outputs unicode. So we + # set the pipe encoding to utf-8 instead. + # + # See https://stackoverflow.com/a/74607949/149506 + import os + os.environ['PYTHONIOENCODING'] = 'utf-8' + try: cmd = parse_shebang.normalize_cmd(cmd, env=kwargs.get('env')) except parse_shebang.ExecutableNotFoundError as e: diff --git a/tests/util_test.py b/tests/util_test.py index 5b262113..70810498 100644 --- a/tests/util_test.py +++ b/tests/util_test.py @@ -3,6 +3,7 @@ from __future__ import annotations import os.path import stat import subprocess +import sys import pytest @@ -106,3 +107,12 @@ def test_rmtree_read_only_directories(tmpdir): tmpdir.join('x/y/z').chmod(mode_no_w) tmpdir.join('x/y/z').chmod(mode_no_w) rmtree(str(tmpdir.join('x'))) + + +@pytest.mark.parametrize('fn', (cmd_output_b, cmd_output_p)) +def test_cmd_output_utf8(fn): + """Makes sure `cmd_output_*` works if the command being + run outputs UTF8 characters.""" + ret, out, _ = fn(f'{sys.executable}', '-c', 'print("❤")') + assert ret == 0 + assert out.strip().decode() == '❤'