summaryrefslogtreecommitdiffstats
path: root/tools/codespell.py
blob: df23663872cc80f31d3c16c95fe35866a6d32afe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import itertools
import logging
import os
import subprocess
import sys
import typing

import githelper


def run_codespell_on_lines(
    rootdir, filename, lines, codespell_args, ignore_matches
):
    """
    Run codespell on the requested lines.

    Returns 1 if any changes were made, else 0.
    """
    logger = logging.getLogger(__name__)
    cmd = ["codespell", *codespell_args, "--", filename]
    logger.debug("Running command: %r", cmd)

    try:
        subprocess.check_output(cmd)
    except subprocess.CalledProcessError as e:
        output = e.output.decode().strip()
    else:
        return 0

    result = 0
    for line in output.splitlines():
        matched_fname_with_line, _, content = line.partition(": ")
        matched_fname, _, linenum = matched_fname_with_line.rpartition(":")
        assert matched_fname == filename
        if int(linenum) not in lines:
            continue

        match, _, suggestions = content.partition("==>")
        if match.strip() in ignore_matches:
            continue

        result = 1
        print(line)

    return result


def get_ignore_matches(fp):
    for line in fp:
        content, sep, comment = line.partition("#")
        content = content.strip()
        if not content:
            continue
        yield content


def main(argv: typing.Optional[typing.List[str]] = None) -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--from-ref", help="use changes changes since commit")
    parser.add_argument("--to-ref", help="use changes until commit")
    parser.add_argument(
        "--ignore-file",
        type=argparse.FileType("r"),
        help="ignore matches (one per line)",
    )
    parser.add_argument("--files", nargs="*", help="only check these files")
    parser.add_argument(
        "-v", "--verbose", action="store_true", help="Be verbose"
    )
    args, codespell_args = parser.parse_known_args(argv)

    logging.basicConfig(
        format="[%(levelname)s] %(message)s",
        level=logging.DEBUG if args.verbose else logging.INFO,
    )

    if not args.from_ref:
        args.from_ref = os.getenv("PRE_COMMIT_FROM_REF") or os.getenv(
            "PRE_COMMIT_SOURCE"
        )

    if not args.to_ref:
        args.to_ref = os.getenv("PRE_COMMIT_TO_REF") or os.getenv(
            "PRE_COMMIT_ORIGIN"
        )

    # Filter filenames
    rootdir = githelper.get_toplevel_path()

    files_with_added_lines = githelper.get_changed_lines(
        from_ref=args.from_ref,
        to_ref=args.to_ref,
        filter_lines=lambda line: line.added,
        include_files=args.files,
    )

    ignore_matches = set()
    if args.ignore_file:
        ignore_matches = set(get_ignore_matches(args.ignore_file))

    result = 0
    for filename, file_lines in itertools.groupby(
        files_with_added_lines, key=lambda line: line.sourcefile
    ):
        lines = set(line.number for line in file_lines)
        result |= run_codespell_on_lines(
            rootdir,
            filename,
            lines,
            [arg for arg in codespell_args if arg != "--"],
            ignore_matches,
        )

    return result


if __name__ == "__main__":
    sys.exit(main())