summaryrefslogtreecommitdiffstats
path: root/crate_db/src/stopwords.rs
blob: b779e3a7b7a526b975a312361da518553bdf033a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
use lazy_static::lazy_static;
use std::collections::{HashMap, HashSet};

lazy_static! {
    /// ignore these as keywords
    pub(crate) static ref STOPWORDS: HashSet<&'static str> = [
    "a", "sys", "ffi", "placeholder", "app", "loops", "master", "library", "rs",
    "accidentally", "additional", "adds", "against", "all", "allow", "allows",
    "already", "also", "alternative", "always", "an", "and", "any", "appropriate",
    "arbitrary", "are", "as", "at", "available", "based", "be", "because", "been",
    "both", "but", "by", "can", "certain", "changes", "comes", "contains", "core", "cost",
    "crate", "crates.io", "current", "currently", "custom", "dependencies",
    "dependency", "developers", "do", "don't", "e.g", "easily", "easy", "either",
    "enables", "etc", "even", "every", "example", "examples", "features", "feel",
    "files", "for", "from", "fully", "function", "get", "given", "had", "has",
    "have", "here", "if", "implementing", "implements", "in", "includes",
    "including", "incurring", "installation", "interested", "into", "is", "it",
    "it's", "its", "itself", "just", "known", "large", "later", "library",
    "license", "lightweight", "like", "made", "main", "make", "makes", "many",
    "may", "me", "means", "method", "minimal", "mit", "more", "mostly", "much",
    "need", "needed", "never", "new", "no", "noop", "not", "of", "on", "one",
    "only", "or", "other", "over", "plausible", "please", "possible", "program",
    "provides", "put", "readme", "release", "runs", "rust", "rust's", "same",
    "see", "selected", "should", "similar", "simple", "simply", "since", "small", "so",
    "some", "specific", "still", "stuff", "such", "take", "than", "that", "the",
    "their", "them", "then", "there", "therefore", "these", "they", "things",
    "this", "those", "to", "todo", "too", "travis", "two", "under", "us",
    "usable", "use", "used", "useful", "using", "v1", "v2", "v3", "v4", "various",
    "very", "via", "want", "way", "well", "we'll", "what", "when", "where", "which",
    "while", "will", "wip", "with", "without", "working", "works", "writing",
    "written", "yet", "you", "your", "build status", "meritbadge", "common",
    "file was generated", "easy to use",
    ].iter().map(|s|*s).collect();

    /// If one is present, ignore the others
    pub(crate) static ref COND_STOPWORDS: HashMap<&'static str, Option<&'static [&'static str]>> = [
        ("game-engine", Some(&["game", "ffi"][..])),
        ("game-engines", Some(&["game", "ffi"])),
        ("game-dev", Some(&["game", "games"])),
        ("gamedev", Some(&["game", "games"])),
        ("game", Some(&["wasm", "webassembly"])), // wasm games are nice, but should be in games category
        ("opengl", Some(&["terminal", "console"])),
        ("protocol", Some(&["game", "games", "container"])),
        ("framework", Some(&["game", "games"])),
        ("engine", Some(&["ffi"])),
        ("mock", Some(&["macro", "derive", "plugin", "cargo"])),

        ("caching", Some(&["allocator"])),
        ("aws", Some(&["ecs"])), // not game engine
        ("raspberry", Some(&["osx", "windows"])),
        ("linux", Some(&["windows", "winsdk", "macos", "mac", "osx"])),
        ("cross-platform", Some(&["windows", "winsdk", "macos", "mac", "osx", "linux", "unix", "gnu"])),
        ("portable", Some(&["windows", "winsdk", "macos", "mac", "osx", "linux", "unix", "gnu"])),
        ("winapi", Some(&["target", "windows", "gnu", "x86", "i686", "64", "pc"])),
        ("windows", Some(&["gnu"])),
        ("iterator", Some(&["window"])),
        ("web", Some(&["windows", "macos", "mac", "osx", "linux"])),
        ("macos", Some(&["core"])),
        ("wasm", Some(&["embedded", "javascript", "no-std", "no_std", "feature:no_std"])),
        ("javascript", Some(&["embedded", "no-std", "no_std", "feature:no_std"])),
        ("webassembly", Some(&["embedded", "javascript", "no-std", "no_std", "feature:no_std"])),
        ("deep-learning", Some(&["math", "statistics"])),
        ("machine-learning", Some(&["math", "statistics"])),
        ("neural-networks", Some(&["math", "statistics", "network"])),
        ("neural", Some(&["network"])),
        ("database", Some(&["embedded"])),
        ("robotics", Some(&["localization"])),
        ("thread", Some(&["storage"])),
        ("bitcoin", Some(&["http", "day", "database", "key-value", "network", "wasm"])),
        ("exonum", Some(&["http", "day", "database", "key-value", "network", "wasm"])),
        ("blockchain", Some(&["database", "key-value", "network", "wasm", "nosql", "orm", "driver"])),
        ("ethereum", Some(&["http", "day", "nosql", "log", "generic", "network", "wasm", "key-value", "orm", "database"])),
        ("iter", Some(&["math"])),
        ("macro", Some(&["no-std", "no_std", "feature:no_std"])),
        ("macros", Some(&["no-std", "no_std", "feature:no_std"])),
        ("embedded", Some(&["no-std", "no_std", "feature:no_std"])),
        ("arm", Some(&["no-std", "no_std", "feature:no_std"])),
        ("float", Some(&["math"])),
        ("c64", Some(&["terminal", "core"])),
        ("emulator", Some(&["6502", "core", "gpu", "color", "timer"])),
        ("garbage", Some(&["tracing"])),
        ("terminal", Some(&["math", "emulator"])),
        ("terminal-emulator", Some(&["math", "emulator"])),
        ("editor", Some(&["terminal"])),
        ("build", Some(&["logic"])), // confuses categorization
        ("messaging", Some(&["matrix"])), // confuses categorization
        ("chat", Some(&["matrix"])), // confuses categorization
        ("math", Some(&["num", "symbolic", "algorithms", "algorithm", "utils"])), // confuses categorization
        ("mathematics", Some(&["num", "numeric", "symbolic", "algorithms", "algorithm", "utils"])), // confuses categorization
        ("cuda", Some(&["nvidia"])), // confuses categorization
        ("subcommand", Some(&["plugin"])),
        ("lint", Some(&["plugin"])),
        ("email", Some(&["validator", "validation"])),
        ("e-mail", Some(&["validator", "validation"])),
        ("template", Some(&["derive"])),
        ("dsl", Some(&["template"])),
        ("syn", Some(&["nom"])),
        ("cargo", Some(&["plugin"])),
        ("git", Some(&["terminal"])),
        ("wide", Some(&["windows", "win32"])),
        ("i18n", Some(&["text", "format", "message", "json", "ffi"