summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoris Roovers <joris.roovers@gmail.com>2023-02-07 08:27:27 +0000
committerGitHub <noreply@github.com>2023-02-07 08:27:27 +0000
commitad0dfae525fb036747ff99d68fdcc4228022d15d (patch)
treef97cd7f524df4b5bce27685a0f1bb117b67c3096
parentf1093bc69594b3b5e46b8adff70bc7d239669831 (diff)
Separate FILE_ENCODING from TERMINAL_ENCODING (#424)
Distinguish between terminal encoding (now TERMINAL_ENCODING, renamed from DEFAULT_ENCODING) and file encoding (FILE_ENCODING). Before, gitlint always decoded files using the terminal encoding (DEFAULT_ENCODING) which can be problematic in cases where the terminal encoding is non UTF-8. File encoding is hard set to UTF-8.
-rw-r--r--gitlint-core/gitlint/cli.py4
-rw-r--r--gitlint-core/gitlint/config.py4
-rw-r--r--gitlint-core/gitlint/hooks.py4
-rw-r--r--gitlint-core/gitlint/shell.py4
-rw-r--r--gitlint-core/gitlint/tests/base.py6
-rw-r--r--gitlint-core/gitlint/tests/cli/test_cli.py8
-rw-r--r--gitlint-core/gitlint/tests/cli/test_cli_hooks.py10
-rw-r--r--gitlint-core/gitlint/tests/test_utils.py4
-rw-r--r--gitlint-core/gitlint/utils.py27
-rw-r--r--qa/test_gitlint.py8
10 files changed, 45 insertions, 34 deletions
diff --git a/gitlint-core/gitlint/cli.py b/gitlint-core/gitlint/cli.py
index d28d373..1caddf6 100644
--- a/gitlint-core/gitlint/cli.py
+++ b/gitlint-core/gitlint/cli.py
@@ -68,7 +68,7 @@ def log_system_info():
LOG.debug("Git version: %s", git_version())
LOG.debug("Gitlint version: %s", gitlint.__version__)
LOG.debug("GITLINT_USE_SH_LIB: %s", os.environ.get("GITLINT_USE_SH_LIB", "[NOT SET]"))
- LOG.debug("DEFAULT_ENCODING: %s", gitlint.utils.DEFAULT_ENCODING)
+ LOG.debug("DEFAULT_ENCODING: %s", gitlint.utils.TERMINAL_ENCODING)
def build_config(
@@ -264,7 +264,7 @@ class ContextObj:
@click.option("--ignore", envvar="GITLINT_IGNORE", default="", help="Ignore rules (comma-separated by id or name).")
@click.option("--contrib", envvar="GITLINT_CONTRIB", default="",
help="Contrib rules to enable (comma-separated by id or name).")
-@click.option("--msg-filename", type=click.File(encoding=gitlint.utils.DEFAULT_ENCODING),
+@click.option("--msg-filename", type=click.File(encoding=gitlint.utils.FILE_ENCODING),
help="Path to a file containing a commit-msg.")
@click.option("--ignore-stdin", envvar="GITLINT_IGNORE_STDIN", is_flag=True,
help="Ignore any stdin data. Useful for running in CI server.")
diff --git a/gitlint-core/gitlint/config.py b/gitlint-core/gitlint/config.py
index 72f283c..4205ce1 100644
--- a/gitlint-core/gitlint/config.py
+++ b/gitlint-core/gitlint/config.py
@@ -13,7 +13,7 @@ from gitlint import (
)
from gitlint.contrib import rules as contrib_rules
from gitlint.exception import GitlintError
-from gitlint.utils import DEFAULT_ENCODING
+from gitlint.utils import FILE_ENCODING
def handle_option_error(func):
@@ -468,7 +468,7 @@ class LintConfigBuilder:
try:
parser = ConfigParser()
- with open(filename, encoding=DEFAULT_ENCODING) as config_file:
+ with open(filename, encoding=FILE_ENCODING) as config_file:
parser.read_file(config_file, filename)
for section_name in parser.sections():
diff --git a/gitlint-core/gitlint/hooks.py b/gitlint-core/gitlint/hooks.py
index bdc459e..91756d5 100644
--- a/gitlint-core/gitlint/hooks.py
+++ b/gitlint-core/gitlint/hooks.py
@@ -4,7 +4,7 @@ import stat
from gitlint.exception import GitlintError
from gitlint.git import git_hooks_dir
-from gitlint.utils import DEFAULT_ENCODING
+from gitlint.utils import FILE_ENCODING
COMMIT_MSG_HOOK_SRC_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files", "commit-msg")
COMMIT_MSG_HOOK_DST_PATH = "commit-msg"
@@ -52,7 +52,7 @@ class GitHookInstaller:
if not os.path.exists(dest_path):
raise GitHookInstallerError(f"There is no commit-msg hook present in {dest_path}.")
- with open(dest_path, encoding=DEFAULT_ENCODING) as fp:
+ with open(dest_path, encoding=FILE_ENCODING) as fp:
lines = fp.readlines()
if len(lines) < 2 or lines[1] != GITLINT_HOOK_IDENTIFIER:
msg = (
diff --git a/gitlint-core/gitlint/shell.py b/gitlint-core/gitlint/shell.py
index a96c517..bab66d6 100644
--- a/gitlint-core/gitlint/shell.py
+++ b/gitlint-core/gitlint/shell.py
@@ -6,7 +6,7 @@ capabilities wrt dealing with more edge-case environments on *nix systems that a
import subprocess
-from gitlint.utils import DEFAULT_ENCODING, USE_SH_LIB
+from gitlint.utils import TERMINAL_ENCODING, USE_SH_LIB
def shell(cmd):
@@ -64,7 +64,7 @@ else:
raise CommandNotFound from e
exit_code = p.returncode
- stdout = result[0].decode(DEFAULT_ENCODING)
+ stdout = result[0].decode(TERMINAL_ENCODING)
stderr = result[1] # 'sh' does not decode the stderr bytes to unicode
full_cmd = "" if args is None else " ".join(args)
diff --git a/gitlint-core/gitlint/tests/base.py b/gitlint-core/gitlint/tests/base.py
index 0dbb57b..326c7cb 100644
--- a/gitlint-core/gitlint/tests/base.py
+++ b/gitlint-core/gitlint/tests/base.py
@@ -13,7 +13,7 @@ from gitlint.config import LintConfig
from gitlint.deprecation import LOG as DEPRECATION_LOG
from gitlint.deprecation import Deprecation
from gitlint.git import GitChangedFileStats, GitContext
-from gitlint.utils import DEFAULT_ENCODING, LOG_FORMAT
+from gitlint.utils import FILE_ENCODING, LOG_FORMAT
EXPECTED_REGEX_STYLE_SEARCH_DEPRECATION_WARNING = (
"WARNING: gitlint.deprecated.regex_style_search {0} - {1}: gitlint will be switching from using "
@@ -95,7 +95,7 @@ class BaseTestCase(unittest.TestCase):
def get_sample(filename=""):
"""Read and return the contents of a file in gitlint/tests/samples"""
sample_path = BaseTestCase.get_sample_path(filename)
- return Path(sample_path).read_text(encoding=DEFAULT_ENCODING)
+ return Path(sample_path).read_text(encoding=FILE_ENCODING)
@staticmethod
def patch_input(side_effect):
@@ -109,7 +109,7 @@ class BaseTestCase(unittest.TestCase):
"""Utility method to read an expected file from gitlint/tests/expected and return it as a string.
Optionally replace template variables specified by variable_dict."""
expected_path = os.path.join(BaseTestCase.EXPECTED_DIR, filename)
- expected = Path(expected_path).read_text(encoding=DEFAULT_ENCODING)
+ expected = Path(expected_path).read_text(encoding=FILE_ENCODING)
if variable_dict:
expected = expected.format(**variable_dict)
diff --git a/gitlint-core/gitlint/tests/cli/test_cli.py b/gitlint-core/gitlint/tests/cli/test_cli.py
index b4577c8..b519fac 100644
--- a/gitlint-core/gitlint/tests/cli/test_cli.py
+++ b/gitlint-core/gitlint/tests/cli/test_cli.py
@@ -9,7 +9,7 @@ from click.testing import CliRunner
from gitlint import __version__, cli
from gitlint.shell import CommandNotFound
from gitlint.tests.base import BaseTestCase
-from gitlint.utils import DEFAULT_ENCODING
+from gitlint.utils import FILE_ENCODING, TERMINAL_ENCODING
class CLITests(BaseTestCase):
@@ -39,7 +39,7 @@ class CLITests(BaseTestCase):
"gitlint_version": __version__,
"GITLINT_USE_SH_LIB": BaseTestCase.GITLINT_USE_SH_LIB,
"target": os.path.realpath(os.getcwd()),
- "DEFAULT_ENCODING": DEFAULT_ENCODING,
+ "DEFAULT_ENCODING": TERMINAL_ENCODING,
}
def test_version(self):
@@ -315,7 +315,7 @@ class CLITests(BaseTestCase):
with self.tempdir() as tmpdir:
msg_filename = os.path.join(tmpdir, "msg")
- with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
+ with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
f.write("WIP: msg-filename tïtle\n")
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -370,7 +370,7 @@ class CLITests(BaseTestCase):
with self.tempdir() as tmpdir:
msg_filename = os.path.join(tmpdir, "msg")
- with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
+ with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
f.write("Commït title\n")
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
diff --git a/gitlint-core/gitlint/tests/cli/test_cli_hooks.py b/gitlint-core/gitlint/tests/cli/test_cli_hooks.py
index cf8298a..c9e4eba 100644
--- a/gitlint-core/gitlint/tests/cli/test_cli_hooks.py
+++ b/gitlint-core/gitlint/tests/cli/test_cli_hooks.py
@@ -6,7 +6,7 @@ from click.testing import CliRunner
from gitlint import cli, config, hooks
from gitlint.shell import ErrorReturnCode
from gitlint.tests.base import BaseTestCase
-from gitlint.utils import DEFAULT_ENCODING
+from gitlint.utils import FILE_ENCODING
class CLIHookTests(BaseTestCase):
@@ -102,7 +102,7 @@ class CLIHookTests(BaseTestCase):
with self.tempdir() as tmpdir:
msg_filename = os.path.join(tmpdir, "hür")
- with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
+ with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
f.write("WIP: tïtle\n")
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -130,7 +130,7 @@ class CLIHookTests(BaseTestCase):
with self.patch_input(["e", "e", "n"]), self.tempdir() as tmpdir:
msg_filename = os.path.realpath(os.path.join(tmpdir, "hür"))
- with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
+ with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
f.write(commit_messages[i] + "\n")
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -158,7 +158,7 @@ class CLIHookTests(BaseTestCase):
with self.patch_input(["n"]), self.tempdir() as tmpdir:
msg_filename = os.path.join(tmpdir, "hür")
- with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
+ with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
f.write("WIP: höok no\n")
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
@@ -175,7 +175,7 @@ class CLIHookTests(BaseTestCase):
"""Test for run-hook subcommand, answering 'y(es)' after commit-hook"""
with self.patch_input(["y"]), self.tempdir() as tmpdir:
msg_filename = os.path.join(tmpdir, "hür")
- with open(msg_filename, "w", encoding=DEFAULT_ENCODING) as f:
+ with open(msg_filename, "w", encoding=FILE_ENCODING) as f:
f.write("WIP: höok yes\n")
with patch("gitlint.display.stderr", new=StringIO()) as stderr:
diff --git a/gitlint-core/gitlint/tests/test_utils.py b/gitlint-core/gitlint/tests/test_utils.py
index 27036d3..d21ec3f 100644
--- a/gitlint-core/gitlint/tests/test_utils.py
+++ b/gitlint-core/gitlint/tests/test_utils.py
@@ -27,7 +27,7 @@ class UtilsTests(BaseTestCase):
self.assertEqual(utils.use_sh_library(), False)
@patch("gitlint.utils.locale")
- def test_default_encoding_non_windows(self, mocked_locale):
+ def test_terminal_encoding_non_windows(self, mocked_locale):
utils.PLATFORM_IS_WINDOWS = False
mocked_locale.getpreferredencoding.return_value = "foöbar"
self.assertEqual(utils.getpreferredencoding(), "foöbar")
@@ -37,7 +37,7 @@ class UtilsTests(BaseTestCase):
self.assertEqual(utils.getpreferredencoding(), "UTF-8")
@patch("os.environ")
- def test_default_encoding_windows(self, patched_env):
+ def test_terminal_encoding_windows(self, patched_env):
utils.PLATFORM_IS_WINDOWS = True
# Mock out os.environ
mock_env = {}
diff --git a/gitlint-core/gitlint/utils.py b/gitlint-core/gitlint/utils.py
index 33b0584..3ccb78b 100644
--- a/gitlint-core/gitlint/utils.py
+++ b/gitlint-core/gitlint/utils.py
@@ -38,27 +38,28 @@ def use_sh_library():
USE_SH_LIB = use_sh_library()
########################################################################################################################
-# DEFAULT_ENCODING
+# TERMINAL_ENCODING
+# Encoding used for terminal encoding/decoding.
def getpreferredencoding():
"""Modified version of local.getpreferredencoding() that takes into account LC_ALL, LC_CTYPE, LANG env vars
on windows and falls back to UTF-8."""
fallback_encoding = "UTF-8"
- default_encoding = locale.getpreferredencoding() or fallback_encoding
+ preferred_encoding = locale.getpreferredencoding() or fallback_encoding
# On Windows, we mimic git/linux by trying to read the LC_ALL, LC_CTYPE, LANG env vars manually
# (on Linux/MacOS the `getpreferredencoding()` call will take care of this).
# We fallback to UTF-8
if PLATFORM_IS_WINDOWS:
- default_encoding = fallback_encoding
+ preferred_encoding = fallback_encoding
for env_var in ["LC_ALL", "LC_CTYPE", "LANG"]:
encoding = os.environ.get(env_var, False)
if encoding:
# Support dotted (C.UTF-8) and non-dotted (C or UTF-8) charsets:
# If encoding contains a dot: split and use second part, otherwise use everything
dot_index = encoding.find(".")
- default_encoding = encoding[dot_index + 1 :] if dot_index != -1 else encoding
+ preferred_encoding = encoding[dot_index + 1 :] if dot_index != -1 else encoding
break
# We've determined what encoding the user *wants*, let's now check if it's actually a valid encoding on the
@@ -66,11 +67,21 @@ def getpreferredencoding():
# This scenario is fairly common on Windows where git sets LC_CTYPE=C when invoking the commit-msg hook, which
# is not a valid encoding in Python on Windows.
try:
- codecs.lookup(default_encoding)
+ codecs.lookup(preferred_encoding)
except LookupError:
- default_encoding = fallback_encoding
+ preferred_encoding = fallback_encoding
- return default_encoding
+ return preferred_encoding
-DEFAULT_ENCODING = getpreferredencoding()
+TERMINAL_ENCODING = getpreferredencoding()
+
+########################################################################################################################
+# FILE_ENCODING
+# Gitlint assumes UTF-8 encoding for all file operations:
+# - reading/writing its own hook and config files
+# - reading/writing git commit messages
+# Git does have i18n.commitEncoding and i18n.logOutputEncoding options which we might want to take into account,
+# but that's not supported today.
+
+FILE_ENCODING = "UTF-8"
diff --git a/qa/test_gitlint.py b/qa/test_gitlint.py
index 45110c0..7a04a39 100644
--- a/qa/test_gitlint.py
+++ b/qa/test_gitlint.py
@@ -2,7 +2,7 @@ import os
from qa.base import BaseTestCase
from qa.shell import echo, git, gitlint
-from qa.utils import DEFAULT_ENCODING
+from qa.utils import FILE_ENCODING
class IntegrationTests(BaseTestCase):
@@ -58,7 +58,7 @@ class IntegrationTests(BaseTestCase):
self.assertEqualStdout(output, expected)
# Make a small modification to the commit and commit it using fixup commit
- with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh:
+ with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh:
fh.write("Appending söme stuff\n")
git("add", test_filename, _cwd=self.tmp_git_repo)
@@ -87,7 +87,7 @@ class IntegrationTests(BaseTestCase):
self.assertEqualStdout(output, expected)
# Make a small modification to the commit and commit it using fixup=amend commit
- with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh:
+ with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh:
fh.write("Appending söme stuff\n")
git("add", test_filename, _cwd=self.tmp_git_repo)
@@ -133,7 +133,7 @@ class IntegrationTests(BaseTestCase):
self.assertEqualStdout(output, expected)
# Make a small modification to the commit and commit it using squash commit
- with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=DEFAULT_ENCODING) as fh:
+ with open(os.path.join(self.tmp_git_repo, test_filename), "a", encoding=FILE_ENCODING) as fh:
# Wanted to write a unicode string, but that's obnoxious if you want to do it across Python 2 and 3.
# https://stackoverflow.com/questions/22392377/
# error-writing-a-file-with-file-write-in-python-unicodeencodeerror