diff options
Diffstat (limited to 'src/syntax_mapping')
33 files changed, 311 insertions, 0 deletions
diff --git a/src/syntax_mapping/builtin.rs b/src/syntax_mapping/builtin.rs new file mode 100644 index 00000000..1822be57 --- /dev/null +++ b/src/syntax_mapping/builtin.rs @@ -0,0 +1,91 @@ +use std::env; + +use globset::GlobMatcher; +use once_cell::sync::Lazy; + +use crate::syntax_mapping::{make_glob_matcher, MappingTarget}; + +// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the +// build script (/build/syntax_mapping.rs). +include!(concat!( + env!("OUT_DIR"), + "/codegen_static_syntax_mappings.rs" +)); + +// The defined matcher strings are analysed at compile time and converted into +// lazily-compiled `GlobMatcher`s. This is so that the string searches are moved +// from run time to compile time, thus improving startup performance. +// +// To any future maintainer (including possibly myself) wondering why there is +// not a `BuiltinMatcher` enum that looks like this: +// +// ``` +// enum BuiltinMatcher { +// Fixed(&'static str), +// Dynamic(Lazy<Option<String>>), +// } +// ``` +// +// Because there was. I tried it and threw it out. +// +// Naively looking at the problem from a distance, this may seem like a good +// design (strongly typed etc. etc.). It would also save on compiled size by +// extracting out common behaviour into functions. But while actually +// implementing the lazy matcher compilation logic, I realised that it's most +// convenient for `BUILTIN_MAPPINGS` to have the following type: +// +// `[(Lazy<Option<GlobMatcher>>, MappingTarget); N]` +// +// The benefit for this is that operations like listing all builtin mappings +// would be effectively memoised. The caller would not have to compile another +// `GlobMatcher` for rules that they have previously visited. +// +// Unfortunately, this means we are going to have to store a distinct closure +// for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer +// of indirection. +// +// In the current implementation, the closure within each generated rule simply +// calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on +// whether the defined matcher contains dynamic segments or not. + +/// Compile a fixed glob string into a glob matcher. +/// +/// A failure to compile is a fatal error. +/// +/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure. +fn build_matcher_fixed(from: &str) -> GlobMatcher { + make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile") +} + +/// Join a list of matcher segments to create a glob string, replacing all +/// environment variables, then compile to a glob matcher. +/// +/// Returns `None` if any replacement fails, or if the joined glob string fails +/// to compile. +/// +/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure. +fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> { + // join segments + let mut buf = String::new(); + for seg in segs { + match seg { + MatcherSegment::Text(s) => buf.push_str(s), + MatcherSegment::Env(var) => { + let replaced = env::var(var).ok()?; + buf.push_str(&replaced); + } + } + } + // compile glob matcher + let matcher = make_glob_matcher(&buf).ok()?; + Some(matcher) +} + +/// A segment of a dynamic builtin matcher. +/// +/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure. +#[derive(Clone, Debug)] +enum MatcherSegment { + Text(&'static str), + Env(&'static str), +} diff --git a/src/syntax_mapping/builtins/README.md b/src/syntax_mapping/builtins/README.md new file mode 100644 index 00000000..29cf43ee --- /dev/null +++ b/src/syntax_mapping/builtins/README.md @@ -0,0 +1,116 @@ +# `/src/syntax_mapping/builtins` + +The files in this directory define path/name-based syntax mappings, which amend +and take precedence over the extension/content-based syntax mappings provided by +[syntect](https://github.com/trishume/syntect). + +## File organisation + +Each TOML file should describe the syntax mappings of a single application, or +otherwise a set of logically-related rules. + +What defines "a single application" here is deliberately vague, since the +file-splitting is purely for maintainability reasons. (Technically, we could +just as well use a single TOML file.) So just use common sense. + +TOML files should reside in the corresponding subdirectory of the platform(s) +that they intend to target. At compile time, the build script will go through +each subdirectory that is applicable to the compilation target, collect the +syntax mappings defined by all TOML files, and embed them into the binary. + +## File syntax + +Each TOML file should contain a single section named `mappings`, with each of +its keys being a language identifier (first column of `bat -L`; also referred to +as "target"). + +The value of each key should be an array of strings, with each item being a glob +matcher. We will call each of these items a "rule". + +For example, if `foo-application` uses both TOML and YAML configuration files, +we could write something like this: + +```toml +# 30-foo-application.toml +[mappings] +"TOML" = [ + # rules for TOML syntax go here + "/usr/share/foo-application/toml-config/*.conf", + "/etc/foo-application/toml-config/*.conf", +] +"YAML" = [ + # rules for YAML syntax go here + # ... +] +``` + +### Dynamic environment variable replacement + +In additional to the standard glob matcher syntax, rules also support dynamic +replacement of environment variables at runtime. This allows us to concisely +handle things like [XDG](https://specifications.freedesktop.org/basedir-spec/latest/). + +All environment variables intended to be replaced at runtime must be enclosed in +`${}`, for example `"/foo/*/${YOUR_ENV}-suffix/*.log"`. Note that this is the +**only** admissible syntax; other variable substitution syntaxes are not +supported and will either cause a compile time error, or be treated as plain +text. + +For example, if `foo-application` also supports per-user configuration files, we +could write something like this: + +```toml +# 30-foo-application.toml +[mappings] +"TOML" = [ + # rules for TOML syntax go here + "/usr/share/foo-application/toml-config/*.conf", + "/etc/foo-application/toml-config/*.conf", + "${XDG_CONFIG_HOME}/foo-application/toml-config/*.conf", + "${HOME}/.config/foo-application/toml-config/*.conf", +] +"YAML" = [ + # rules for YAML syntax go here + # ... +] +``` + +If any environment variable replacement in a rule fails (for example when a +variable is unset), or if the glob string after replacements is invalid, the +entire rule will be ignored. + +### Explicitly mapping to unknown + +Sometimes it may be necessary to "unset" a particular syntect mapping - perhaps +a syntax's matching rules are "too greedy", and is claiming files that it should +not. In this case, there are two special identifiers: +`MappingTarget::MapToUnknown` and `MappingTarget::MapExtensionToUnknown` +(corresponding to the two variants of the `syntax_mapping::MappingTarget` enum). + +An example of this would be `*.conf` files in general. So we may write something +like this: + +```toml +# 99-unset-ambiguous-extensions.toml +[mappings] +"MappingTarget::MapExtensionToUnknown" = [ + "*.conf", +] +``` + +## Ordering + +At compile time, all TOML files applicable to the target are processed in +lexicographical filename order. So `00-foo.toml` takes precedence over +`10-bar.toml`, which takes precedence over `20-baz.toml`, and so on. Note that +**only** the filenames of the TOML files are taken into account; the +subdirectories they are placed in have no influence on ordering. + +This behaviour can be occasionally useful for creating high/low priority rules, +such as in the aforementioned example of explicitly mapping `*.conf` files to +unknown. Generally this should not be much of a concern though, since rules +should be written as specifically as possible for each application. + +Rules within each TOML file are processed (and therefore matched) in the order +in which they are defined. At runtime, the syntax selection algorithm will +short-circuit and return the target of the first matching rule. diff --git a/src/syntax_mapping/builtins/bsd-family/.gitkeep b/src/syntax_mapping/builtins/bsd-family/.gitkeep new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/syntax_mapping/builtins/bsd-family/.gitkeep diff --git a/src/syntax_mapping/builtins/bsd-family/50-os-release.toml b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml new file mode 100644 index 00000000..91b003d7 --- /dev/null +++ b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml @@ -0,0 +1,2 @@ +[mappings] +"Bourne Again Shell (bash)" = ["/etc/os-release", "/var/run/os-release"] diff --git a/src/syntax_mapping/builtins/common/.gitkeep b/src/syntax_mapping/builtins/common/.gitkeep new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/syntax_mapping/builtins/common/.gitkeep diff --git a/src/syntax_mapping/builtins/common/50-apache.toml b/src/syntax_mapping/builtins/common/50-apache.toml new file mode 100644 index 00000000..0e557aff --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-apache.toml @@ -0,0 +1,2 @@ +[mappings] +"Apache Conf" = ["httpd.conf"] diff --git a/src/syntax_mapping/builtins/common/50-bat.toml b/src/syntax_mapping/builtins/common/50-bat.toml new file mode 100644 index 00000000..e70b6b09 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-bat.toml @@ -0,0 +1,2 @@ +[mappings] +"Bourne Again Shell (bash)" = ["**/bat/config"] diff --git a/src/syntax_mapping/builtins/common/50-container.toml b/src/syntax_mapping/builtins/common/50-container.toml new file mode 100644 index 00000000..ad48c29b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-container.toml @@ -0,0 +1,2 @@ +[mappings] +"Dockerfile" = ["Containerfile"] diff --git a/src/syntax_mapping/builtins/common/50-cpp.toml b/src/syntax_mapping/builtins/common/50-cpp.toml new file mode 100644 index 00000000..99d8a32b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-cpp.toml @@ -0,0 +1,6 @@ +[mappings] +"C++" = [ + # probably better than the default Objective C mapping #877 + "*.h", +] +"YAML" = [".clang-format"] diff --git a/src/syntax_mapping/builtins/common/50-f-sharp.toml b/src/syntax_mapping/builtins/common/50-f-sharp.toml new file mode 100644 index 00000000..a39e7ebd --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-f-sharp.toml @@ -0,0 +1,2 @@ +[mappings] +"F#" = ["*.fs"] diff --git a/src/syntax_mapping/builtins/common/50-git.toml b/src/syntax_mapping/builtins/common/50-git.toml new file mode 100644 index 00000000..44a49a25 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-git.toml @@ -0,0 +1,10 @@ +# Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/` +# See e.g. https://git-scm.com/docs/git-config#FILES + +[mappings] +"Git Config" = ["${XDG_CONFIG_HOME}/git/config", "${HOME}/.config/git/config"] +"Git Ignore" = ["${XDG_CONFIG_HOME}/git/ignore", "${HOME}/.config/git/ignore"] +"Git Attributes" = [ + "${XDG_CONFIG_HOME}/git/attributes", + "${HOME}/.config/git/attributes", +] diff --git a/src/syntax_mapping/builtins/common/50-jsonl.toml b/src/syntax_mapping/builtins/common/50-jsonl.toml new file mode 100644 index 00000000..4b70a4d0 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-jsonl.toml @@ -0,0 +1,3 @@ +# JSON Lines is a simple variation of JSON #2535 +[mappings] +"JSON" = ["*.jsonl"] diff --git a/src/syntax_mapping/builtins/common/50-nginx.toml b/src/syntax_mapping/builtins/common/50-nginx.toml new file mode 100644 index 00000000..305418bb --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-nginx.toml @@ -0,0 +1,2 @@ +[mappings] +"nginx" = ["nginx.conf", "mime.types"] diff --git a/src/syntax_mapping/builtins/common/50-nmap.toml b/src/syntax_mapping/builtins/common/50-nmap.toml new file mode 100644 index 00000000..f79a5e97 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-nmap.toml @@ -0,0 +1,3 @@ +[mappings] +# See #2151, https://nmap.org/book/nse-language.html +"Lua" = ["*.nse"] diff --git a/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml new file mode 100644 index 00000000..70e51c92 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml @@ -0,0 +1,3 @@ +# 1515 +[mappings] +"JavaScript (Babel)" = ["*.pac"] diff --git a/src/syntax_mapping/builtins/common/50-ron.toml b/src/syntax_mapping/builtins/common/50-ron.toml new file mode 100644 index 00000000..bc04221b --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-ron.toml @@ -0,0 +1,3 @@ +# Rusty Object Notation #2427 +[mappings] +"Rust" = ["*.ron"] diff --git a/src/syntax_mapping/builtins/common/50-sarif.toml b/src/syntax_mapping/builtins/common/50-sarif.toml new file mode 100644 index 00000000..2542b9cd --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-sarif.toml @@ -0,0 +1,3 @@ +# SARIF is a format for reporting static analysis results #2695 +[mappings] +"JSON" = ["*.sarif"] diff --git a/src/syntax_mapping/builtins/common/50-ssh.toml b/src/syntax_mapping/builtins/common/50-ssh.toml new file mode 100644 index 00000000..6ec24050 --- /dev/null +++ b/src/syntax_mapping/builtins/common/50-ssh.toml @@ -0,0 +1,2 @@ +[mappings] +"SSH Config" = ["**/.ssh/config"] diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml new file mode 100644 index 00000000..d87537d7 --- /dev/null +++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml @@ -0,0 +1,5 @@ +[mappings] +"MappingTarget::MapExtensionToUnknown" = [ + # common extension used for all kinds of formats + "*.conf", +] diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml new file mode 100644 index 00000000..21941ebc --- /dev/null +++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml @@ -0,0 +1,7 @@ +[mappings] +"MappingTarget::MapToUnknown" = [ + # "NAnt Build File" should only match *.build files, not files named "build" + "build", + # "bin/rails" scripts in a Ruby project misidentified as HTML (Rails) #1008 + "rails", +] diff --git a/src/syntax_mapping/builtins/linux/.gitkeep b/src/syntax_mapping/builtins/linux/.gitkeep new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/syntax_mapping/builtins/linux/.gitkeep diff --git a/src/syntax_mapping/builtins/linux/50-os-release.toml b/src/syntax_mapping/builtins/linux/50-os-release.toml new file mode 100644 index 00000000..791599aa --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-os-release.toml @@ -0,0 +1,7 @@ +[mappings] +"Bourne Again Shell (bash)" = [ + "/etc/os-release", + "/usr/lib/os-release", + "/etc/initrd-release", + "/usr/lib/extension-release.d/extension-release.*", +] diff --git a/src/syntax_mapping/builtins/linux/50-pacman.toml b/src/syntax_mapping/builtins/linux/50-pacman.toml new file mode 100644 index 00000000..655118c5 --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-pacman.toml @@ -0,0 +1,3 @@ +[mappings] +# pacman hooks +"INI" = ["/usr/share/libalpm/hooks/*.hook", "/etc/pacman.d/hooks/*.hook"] diff --git a/src/syntax_mapping/builtins/linux/50-systemd.toml b/src/syntax_mapping/builtins/linux/50-systemd.toml new file mode 100644 index 00000000..6f91b0be --- /dev/null +++ b/src/syntax_mapping/builtins/linux/50-systemd.toml @@ -0,0 +1,21 @@ +[mappings] +"INI" = [ + "**/systemd/**/*.conf", + "**/systemd/**/*.example", + "*.automount", + "*.device", + "*.dnssd", + "*.link", + "*.mount", + "*.netdev", + "*.network", + "*.nspawn", + "*.path", + "*.service", + "*.scope", + "*.slice", + "*.socket", + "*.swap", + "*.target", + "*.timer", +] diff --git a/src/syntax_mapping/builtins/macos/.gitkeep b/src/syntax_mapping/builtins/macos/.gitkeep new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/syntax_mapping/builtins/macos/.gitkeep diff --git a/src/syntax_mapping/builtins/unix-family/.gitkeep b/src/syntax_mapping/builtins/unix-family/.gitkeep new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/.gitkeep diff --git a/src/syntax_mapping/builtins/unix-family/50-apache.toml b/src/syntax_mapping/builtins/unix-family/50-apache.toml new file mode 100644 index 00000000..dfb920f3 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-apache.toml @@ -0,0 +1,2 @@ +[mappings] +"Apache Conf" = ["/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml new file mode 100644 index 00000000..f2a9e224 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml @@ -0,0 +1,2 @@ +[mappings] +"YAML" = ["fish_history"] diff --git a/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml new file mode 100644 index 00000000..6c788d1d --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml @@ -0,0 +1,3 @@ +# KornShell is backward-compatible with the Bourne shell #2633 +[mappings] +"Bourne Again Shell (bash)" = ["*.ksh"] diff --git a/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml new file mode 100644 index 00000000..c798358c --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml @@ -0,0 +1,2 @@ +[mappings] +"Email" = ["/var/spool/mail/*", "/var/mail/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-nginx.toml b/src/syntax_mapping/builtins/unix-family/50-nginx.toml new file mode 100644 index 00000000..580b65d8 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-nginx.toml @@ -0,0 +1,2 @@ +[mappings] +"nginx" = ["/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*"] diff --git a/src/syntax_mapping/builtins/unix-family/50-shell.toml b/src/syntax_mapping/builtins/unix-family/50-shell.toml new file mode 100644 index 00000000..d015ca81 --- /dev/null +++ b/src/syntax_mapping/builtins/unix-family/50-shell.toml @@ -0,0 +1,5 @@ +[mappings] +"Bourne Again Shell (bash)" = [ + # used by lots of shells + "/etc/profile", +] diff --git a/src/syntax_mapping/builtins/windows/.gitkeep b/src/syntax_mapping/builtins/windows/.gitkeep new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/src/syntax_mapping/builtins/windows/.gitkeep |