diff options
author | Joris Roovers <joris.roovers@gmail.com> | 2023-02-07 08:27:27 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-07 08:27:27 +0000 |
commit | ad0dfae525fb036747ff99d68fdcc4228022d15d (patch) | |
tree | f97cd7f524df4b5bce27685a0f1bb117b67c3096 | |
parent | f1093bc69594b3b5e46b8adff70bc7d239669831 (diff) |
Separate FILE_ENCODING from TERMINAL_ENCODING (#424)
Distinguish between terminal encoding (now TERMINAL_ENCODING,
renamed from DEFAULT_ENCODING) and file encoding (FILE_ENCODING).
Before, gitlint always decoded files using the terminal encoding
(DEFAULT_ENCODING) which can be problematic in cases where the terminal
encoding is non UTF-8.
File encoding is hard set to UTF-8.
-rw-r--r-- | gitlint-core/gitlint/cli.py | 4 | ||||
-rw-r--r-- | gitlint-core/gitlint/config.py | 4 | ||||
-rw-r--r-- | gitlint-core/gitlint/hooks.py | 4 | ||||
-rw-r--r-- | gitlint-core/gitlint/shell.py | 4 | ||||
-rw-r--r-- | gitlint-core/gitlint/tests/base.py | 6 | ||||
-rw-r--r-- | gitlint-core/gitlint/tests/cli/test_cli.py | 8 | ||||
-rw-r--r-- | gitlint-core/gitlint/tests/cli/test_cli_hooks.py | 10 | ||||
-rw-r--r-- | gitlint-core/gitlint/tests/test_utils.py | 4 | ||||
-rw-r--r-- | gitlint-core/gitlint/utils.py | 27 | ||||
-rw-r--r-- | qa/test_gitlint.py | 8 |
10 files changed, 45 insertions, 34 deletions
diff --git a/gitlint-core/gitlint/cli.py b/gitlint-core/gitlint/cli.py index d28d373..1caddf6 100644 --- a/gitlint-core/gitlint/cli.py +++ b/gitlint-core/gitlint/cli.py @@ -68,7 +68,7 @@ def log_system_info(): LOG.debug("Git version: %s", git_version()) LOG.debug("Gitlint version: %s", gitlint.__version__) LOG.debug("GITLINT_USE_SH_LIB: %s", os.environ.get("GITLINT_USE_SH_LIB", "[NOT SET]")) - LOG.debug("DEFAULT_ENCODING: %s", gitlint.utils.DEFAULT_ENCODING) + LOG.debug("DEFAULT_ENCODING: %s", gitlint.utils.TERMINAL_ENCODING) def build_config( @@ -264,7 +264,7 @@ class ContextObj: @click.option("--ignore", envvar="GITLINT_IGNORE", default="", help="Ignore rules (comma-separated by id or name).") @click.option("--contrib", envvar="GITLINT_CONTRIB", default="", help="Contrib rules to enable (comma-separated by id or name).") -@click.option("--msg-filename", type=click.File(encoding=gitlint.utils.DEFAULT_ENCODING), +@click.option("--msg-filename", type=click.File(encoding=gitlint.utils.FILE_ENCODING), help="Path to a file containing a commit-msg.") @click.option("--ignore-stdin", envvar="GITLINT_IGNORE_STDIN", is_flag=True, help="Ignore any stdin data. Useful for running in CI server.") diff --git a/gitlint-core/gitlint/config.py b/gitlint-core/gitlint/config.py index 72f283c..4205ce1 100644 --- a/gitlint-core/gitlint/config.py +++ b/gitlint-core/gitlint/config.py @@ -13,7 +13,7 @@ from gitlint import ( ) from gitlint.contrib import rules as contrib_rules from gitlint.exception import GitlintError -from gitlint.utils import DEFAULT_ENCODING +from gitlint.utils import FILE_ENCODING def handle_option_error(func): @@ -468,7 +468,7 @@ class LintConfigBuilder: try: parser = ConfigParser() - with open(filename, encoding=DEFAULT_ENCODING) as config_file: + with open(filename, encoding=FILE_ENCODING) as config_file: parser.read_file(config_file, filename) for section_name in parser.sections(): diff --git a/gitlint-core/gitlint/hooks.py b/gitlint-core/gitlint/hooks.py index bdc459e..91756d5 100644 --- a/gitlint-core/gitlint/hooks.py +++ b/gitlint-core/gitlint/hooks.py @@ -4,7 +4,7 @@ import stat from gitlint.exception import GitlintError from gitlint.git import git_hooks_dir -from gitlint.utils import DEFAULT_ENCODING +from gitlint.utils import FILE_ENCODING COMMIT_MSG_HOOK_SRC_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files", "commit-msg") COMMIT_MSG_HOOK_DST_PATH = "commit-msg" @@ -52,7 +52,7 @@ class GitHookInstaller: if not os.path.exists(dest_path): raise GitHookInstallerError(f"There is no commit-msg hook present in {dest_path}.") - with open(dest_path, encoding=DEFAULT_ENCODING) as fp: + with open(dest_path, encoding=FILE_ENCODING) as fp: lines = fp.readlines() if len(lines) < 2 or lines[1] != GITLINT_HOOK_IDENTIFIER: msg = ( diff --git a/gitlint-core/gitlint/shell.py b/gitlint-core/gitlint/shell.py index a96c517..bab66d6 100644 --- a/gitlint-core/gitlint/shell.py +++ b/gitlint-core/gitlint/shell.py @@ -6,7 +6,7 @@ capabilities wrt dealing with more edge-case environments on *nix systems that a import subprocess -from gitlint.utils import DEFAULT_ENCODING, USE_SH_LIB +from gitlint.utils import TERMINAL_ENCODING, USE_SH_LIB def shell(cmd): @@ -64,7 +64,7 @@ else: raise CommandNotFound from e exit_code = p.returncode - stdout = result[0].decode(DEFAULT_ENCODING) + stdout = result[0].decode(TERMINAL_ENCODING) stderr = result[1] # 'sh' does not decode the stderr bytes to unicode full_cmd = "" if args is None else " ".join(args) diff --git a/gitlint-core/gitlint/tests/base.py b/gitlint-core/gitlint/tests/base.py index 0dbb57b..326c7cb 100644 --- a/gitlint-core/gitlint/tests/base.py +++ b/gitlint-core/gitlint/tests/base.py @@ -13,7 +13,7 @@ from gitlint.config import LintConfig from gitlint.deprecation import LOG as DEPRECATION_LOG from gitlint.deprecation import Deprecation from gitlint.git import GitChangedFileStats, GitContext -from gitlint.utils import DEFAULT_ENCODING, LOG_FORMAT +from gitlint.utils import FILE_ENCODING, LOG_FORMAT EXPECTED_REGEX_STYLE_SEARCH_DEPRECATION_WARNING = ( "WARNING: gitlint.deprecated.regex_style_search {0} - {1}: gitlint will be switching from using " @@ -95,7 +95,7 @@ class BaseTestCase(unittest.TestCase): def get_sample(filename=""): """Read and return the contents of a file in gitlint/tests/samples""" sample_path = BaseTestCase.get_sample_path(filename) - return Path(sample_path).read_text(encoding=DEFAULT_ENCODING) + return Path(sample_path).read_text(encoding=FILE_ENCODING) @staticmethod def patch_input(side_effect): @@ -109,7 +109,7 @@ class BaseTestCase(unittest.TestCase): """Utility method to read an expected file from gitlint/tests/expected and return it as a string. Optionally replace template variables specified by variable_dict.""" expected_path = os.path.join(BaseTestCase.EXPECTED_DIR, filename) - expected = Path(expected_path).read_text(encoding=DEFAULT_ENCODING) + expected = Path(expected_path).read_text(encoding=FILE_ENCODING) if variable_dict: expected = expected.format(**variable_dict) diff --git a/gitlint-core/gitlint/tests/cli/test_cli.py b/gitlint-core/gitlint/tests/cli/test_cli.py index b4577c8..b519fac 100644 --- a/gitlint-core/gitlint/tests/cli/test_cli.py +++ b/gitlint-core/gitlint/tests/cli/test_cli.py @@ -9,7 +9,7 @@ from click.testing import CliRunner from gitlint import __version__, cli from gitlint.shell import CommandNotFound from gitlint.tests.base import BaseTestCase -from gitlint.utils import DEFAULT_ENCODING +from gitlint.utils import FILE_ENCODING, TERMINAL_ENCODING class CLITests(BaseTestCase): @@ -39,7 +39,7 @@ class CLITests(BaseTestCase): "gitlint_version": __version__, "GITLINT_USE_SH_LIB": BaseTestCase.GITLINT_USE_SH_LIB, "target": os.path.realpath(os.getcwd()), - "DEFAULT_ENCODING": DEFAULT_ENCODING, + "DEFAULT_ENCODING": TERMINAL_ENCODING, } def test_version(self): @@ -315,7 +315,7 @@ class CLITests(BaseTestCase): with self.tempdir() as tmpdir: msg_filename = os.path.join(tmpdir, "msg") - with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f: + with open(msg_filename, "w", encoding=FILE_ENCODING) as f: f.write("WIP: msg-filename tïtle\n") with patch("gitlint.display.stderr", new=StringIO()) as stderr: @@ -370,7 +370,7 @@ class CLITests(BaseTestCase): with self.tempdir() as tmpdir: msg_filename = os.path.join(tmpdir, "msg") - with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f: + with open(msg_filename, "w", encoding=FILE_ENCODING) as f: f.write("Commït title\n") with patch("gitlint.display.stderr", new=StringIO()) as stderr: diff --git a/gitlint-core/gitlint/tests/cli/test_cli_hooks.py b/gitlint-core/gitlint/tests/cli/test_cli_hooks.py index cf8298a..c9e4eba 100644 --- a/gitlint-core/gitlint/tests/cli/test_cli_hooks.py +++ b/gitlint-core/gitlint/tests/cli/test_cli_hooks.py @@ -6,7 +6,7 @@ from click.testing import CliRunner from gitlint import cli, config, hooks from gitlint.shell import ErrorReturnCode from gitlint.tests.base import BaseTestCase -from gitlint.utils import DEFAULT_ENCODING +from gitlint.utils import FILE_ENCODING class CLIHookTests(BaseTestCase): @@ -102,7 +102,7 @@ class CLIHookTests(BaseTestCase): with self.tempdir() as tmpdir: msg_filename = os.path.join(tmpdir, "hür") - with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f: + with open(msg_filename, "w", encoding=FILE_ENCODING) as f: f.write("WIP: tïtle\n") with patch("gitlint.display.stderr", new=StringIO()) as stderr: @@ -130,7 +130,7 @@ class CLIHookTests(BaseTestCase): with self.patch_input(["e", "e", "n"]), self.tempdir() as tmpdir: msg_filename = os.path.realpath(os.path.join(tmpdir, "hür")) - with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f: + with open(msg_filename, "w", encoding=FILE_ENCODING) as f: f.write(commit_messages[i] + "\n") with patch("gitlint.display.stderr", new=StringIO()) as stderr: @@ -158,7 +158,7 @@ class CLIHookTests(BaseTestCase): with self.patch_input(["n"]), self.tempdir() as tmpdir: msg_filename = os.path.join(tmpdir, "hür") - with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f: + with open(msg_filename, "w", encoding=FILE_ENCODING) as f: f.write("WIP: höok no\n") with patch("gitlint.display.stderr", new=StringIO()) as stderr: @@ -175,7 +175,7 @@ class CLIHookTests(BaseTestCase): """Test for run-hook subcommand, answering 'y(es)' after commit-hook""" with self.patch_input(["y"]), self.tempdir() as tmpdir: msg_filename = os.path.join(tmpdir, "hür") - with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f: + with open(msg_filename, "w", encoding=FILE_ENCODING) as f: f.write("WIP: höok yes\n") with patch("gitlint.display.stderr", new=StringIO()) as stderr: diff --git a/gitlint-core/gitlint/tests/test_utils.py b/gitlint-core/gitlint/tests/test_utils.py index 27036d3..d21ec3f 100644 --- a/gitlint-core/gitlint/tests/test_utils.py +++ b/gitlint-core/gitlint/tests/test_utils.py @@ -27,7 +27,7 @@ class UtilsTests(BaseTestCase): self.assertEqual(utils.use_sh_library(), False) @patch("gitlint.utils.locale") - def test_default_encoding_non_windows(self, mocked_locale): + def test_terminal_encoding_non_windows(self, mocked_locale): utils.PLATFORM_IS_WINDOWS = False mocked_locale.getpreferredencoding.return_value = "foöbar" self.assertEqual(utils.getpreferredencoding(), "foöbar") @@ -37,7 +37,7 @@ class UtilsTests(BaseTestCase): self.assertEqual(utils.getpreferredencoding(), "UTF-8") @patch("os.environ") - def test_default_encoding_windows(self, patched_env): + def test_terminal_encoding_windows(self, patched_env): utils.PLATFORM_IS_WINDOWS = True # Mock out os.environ mock_env = {} diff --git a/gitlint-core/gitlint/utils.py b/gitlint-core/gitlint/utils.py index 33b0584..3ccb78b 100644 --- a/gitlint-core/gitlint/utils.py +++ b/gitlint-core/gitlint/utils.py @@ -38,27 +38,28 @@ def use_sh_library(): USE_SH_LIB = use_sh_library() ######################################################################################################################## -# DEFAULT_ENCODING +# TERMINAL_ENCODING +# Encoding used for terminal encoding/decoding. def getpreferredencoding(): """Modified version of local.getpreferredencoding() that takes into account LC_ALL, LC_CTYPE, LANG env vars on windows and falls back to UTF-8.""" fallback_encoding = "UTF-8" - default_encoding = locale.getpreferredencoding() or fallback_encoding + preferred_encoding = locale.getpreferredencoding() or fallback_encoding # On Windows, we mimic git/linux by trying to read the LC_ALL, LC_CTYPE, LANG env vars manually # (on Linux/MacOS the `getpreferredencoding()` call will take care of this). # We fallback to UTF-8 if PLATFORM_IS_WINDOWS: - default_encoding = fallback_encoding + preferred_encoding = fallback_encoding for env_var in ["LC_ALL", "LC_CTYPE", "LANG"]: encoding = os.environ.get(env_var, False) if encoding: # Support dotted (C.UTF-8) and non-dotted (C or UTF-8) charsets: # If encoding contains a dot: split and use second part, otherwise use everything dot_index = encoding.find(".") - default_encoding = encoding[dot_index + 1 :] if dot_index != -1 else encoding + preferred_encoding = encoding[dot_index + 1 :] if dot_index != -1 else encoding break # We've determined what encoding the user *wants*, let's now check if it's actually a valid encoding on the @@ -66,11 +67,21 @@ def getpreferredencoding(): # This scenario is fairly common on Windows where git sets LC_CTYPE=C when invoking the commit-msg hook, which # is not a valid encoding in Python on Windows. try: - codecs.lookup(default_encoding) + codecs.lookup(preferred_encoding) except LookupError: - default_encoding = fallback_encoding + preferred_encoding = fallback_encoding - return default_encoding + return preferred_encoding -DEFAULT_ENCODING = getpreferredencoding() +TERMINAL_ENCODING = getpreferredencoding() + +######################################################################################################################## +# FILE_ENCODING +# Gitlint assumes UTF-8 encoding for all file operations: +# - reading/writing its own hook and config files +# - reading/writing git commit messages +# Git does have i18n.commitEncoding and i18n.logOutputEncoding options which we might want to take into account, +# but that's not supported today. + +FILE_ENCODING = "UTF-8" diff --git a/qa/test_gitlint.py b/qa/test_gitlint.py index 45110c0..7a04a39 100644 --- a/qa/test_gitlint.py +++ b/qa/test_gitlint.py @@ -2,7 +2,7 @@ import os from qa.base import BaseTestCase from qa.shell import echo, git, gitlint -from qa.utils import DEFAULT_ENCODING +from qa.utils import FILE_ENCODING class IntegrationTests(BaseTestCase): @@ -58,7 +58,7 @@ class IntegrationTests(BaseTestCase): self.assertEqualStdout(output, expected) # Make a small modification to the commit and commit it using fixup commit - with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh: + with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh: fh.write("Appending söme stuff\n") git("add", test_filename, _cwd=self.tmp_git_repo) @@ -87,7 +87,7 @@ class IntegrationTests(BaseTestCase): self.assertEqualStdout(output, expected) # Make a small modification to the commit and commit it using fixup=amend commit - with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh: + with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh: fh.write("Appending söme stuff\n") git("add", test_filename, _cwd=self.tmp_git_repo) @@ -133,7 +133,7 @@ class IntegrationTests(BaseTestCase): self.assertEqualStdout(output, expected) # Make a small modification to the commit and commit it using squash commit - with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh: + with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh: # Wanted to write a unicode string, but that's obnoxious if you want to do it across Python 2 and 3. # https://stackoverflow.com/questions/22392377/ # error-writing-a-file-with-file-write-in-python-unicodeencodeerror |