summaryrefslogtreecommitdiffstats
path: root/src/syntax_mapping
diff options
context:
space:
mode:
Diffstat (limited to 'src/syntax_mapping')
-rw-r--r--src/syntax_mapping/builtin.rs91
-rw-r--r--src/syntax_mapping/builtins/README.md116
-rw-r--r--src/syntax_mapping/builtins/bsd-family/.gitkeep0
-rw-r--r--src/syntax_mapping/builtins/bsd-family/50-os-release.toml2
-rw-r--r--src/syntax_mapping/builtins/common/.gitkeep0
-rw-r--r--src/syntax_mapping/builtins/common/50-apache.toml2
-rw-r--r--src/syntax_mapping/builtins/common/50-bat.toml2
-rw-r--r--src/syntax_mapping/builtins/common/50-container.toml2
-rw-r--r--src/syntax_mapping/builtins/common/50-cpp.toml6
-rw-r--r--src/syntax_mapping/builtins/common/50-f-sharp.toml2
-rw-r--r--src/syntax_mapping/builtins/common/50-git.toml10
-rw-r--r--src/syntax_mapping/builtins/common/50-jsonl.toml3
-rw-r--r--src/syntax_mapping/builtins/common/50-nginx.toml2
-rw-r--r--src/syntax_mapping/builtins/common/50-nmap.toml3
-rw-r--r--src/syntax_mapping/builtins/common/50-proxy-auto-config.toml3
-rw-r--r--src/syntax_mapping/builtins/common/50-ron.toml3
-rw-r--r--src/syntax_mapping/builtins/common/50-sarif.toml3
-rw-r--r--src/syntax_mapping/builtins/common/50-ssh.toml2
-rw-r--r--src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml5
-rw-r--r--src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml7
-rw-r--r--src/syntax_mapping/builtins/linux/.gitkeep0
-rw-r--r--src/syntax_mapping/builtins/linux/50-os-release.toml7
-rw-r--r--src/syntax_mapping/builtins/linux/50-pacman.toml3
-rw-r--r--src/syntax_mapping/builtins/linux/50-systemd.toml21
-rw-r--r--src/syntax_mapping/builtins/macos/.gitkeep0
-rw-r--r--src/syntax_mapping/builtins/unix-family/.gitkeep0
-rw-r--r--src/syntax_mapping/builtins/unix-family/50-apache.toml2
-rw-r--r--src/syntax_mapping/builtins/unix-family/50-fish-shell.toml2
-rw-r--r--src/syntax_mapping/builtins/unix-family/50-korn-shell.toml3
-rw-r--r--src/syntax_mapping/builtins/unix-family/50-mail-spool.toml2
-rw-r--r--src/syntax_mapping/builtins/unix-family/50-nginx.toml2
-rw-r--r--src/syntax_mapping/builtins/unix-family/50-shell.toml5
-rw-r--r--src/syntax_mapping/builtins/windows/.gitkeep0
33 files changed, 311 insertions, 0 deletions
diff --git a/src/syntax_mapping/builtin.rs b/src/syntax_mapping/builtin.rs
new file mode 100644
index 00000000..1822be57
--- /dev/null
+++ b/src/syntax_mapping/builtin.rs
@@ -0,0 +1,91 @@
+use std::env;
+
+use globset::GlobMatcher;
+use once_cell::sync::Lazy;
+
+use crate::syntax_mapping::{make_glob_matcher, MappingTarget};
+
+// Static syntax mappings generated from /src/syntax_mapping/builtins/ by the
+// build script (/build/syntax_mapping.rs).
+include!(concat!(
+ env!("OUT_DIR"),
+ "/codegen_static_syntax_mappings.rs"
+));
+
+// The defined matcher strings are analysed at compile time and converted into
+// lazily-compiled `GlobMatcher`s. This is so that the string searches are moved
+// from run time to compile time, thus improving startup performance.
+//
+// To any future maintainer (including possibly myself) wondering why there is
+// not a `BuiltinMatcher` enum that looks like this:
+//
+// ```
+// enum BuiltinMatcher {
+// Fixed(&'static str),
+// Dynamic(Lazy<Option<String>>),
+// }
+// ```
+//
+// Because there was. I tried it and threw it out.
+//
+// Naively looking at the problem from a distance, this may seem like a good
+// design (strongly typed etc. etc.). It would also save on compiled size by
+// extracting out common behaviour into functions. But while actually
+// implementing the lazy matcher compilation logic, I realised that it's most
+// convenient for `BUILTIN_MAPPINGS` to have the following type:
+//
+// `[(Lazy<Option<GlobMatcher>>, MappingTarget); N]`
+//
+// The benefit for this is that operations like listing all builtin mappings
+// would be effectively memoised. The caller would not have to compile another
+// `GlobMatcher` for rules that they have previously visited.
+//
+// Unfortunately, this means we are going to have to store a distinct closure
+// for each rule anyway, which makes a `BuiltinMatcher` enum a pointless layer
+// of indirection.
+//
+// In the current implementation, the closure within each generated rule simply
+// calls either `build_matcher_fixed` or `build_matcher_dynamic`, depending on
+// whether the defined matcher contains dynamic segments or not.
+
+/// Compile a fixed glob string into a glob matcher.
+///
+/// A failure to compile is a fatal error.
+///
+/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
+fn build_matcher_fixed(from: &str) -> GlobMatcher {
+ make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
+}
+
+/// Join a list of matcher segments to create a glob string, replacing all
+/// environment variables, then compile to a glob matcher.
+///
+/// Returns `None` if any replacement fails, or if the joined glob string fails
+/// to compile.
+///
+/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
+fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
+ // join segments
+ let mut buf = String::new();
+ for seg in segs {
+ match seg {
+ MatcherSegment::Text(s) => buf.push_str(s),
+ MatcherSegment::Env(var) => {
+ let replaced = env::var(var).ok()?;
+ buf.push_str(&replaced);
+ }
+ }
+ }
+ // compile glob matcher
+ let matcher = make_glob_matcher(&buf).ok()?;
+ Some(matcher)
+}
+
+/// A segment of a dynamic builtin matcher.
+///
+/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
+#[derive(Clone, Debug)]
+enum MatcherSegment {
+ Text(&'static str),
+ Env(&'static str),
+}
diff --git a/src/syntax_mapping/builtins/README.md b/src/syntax_mapping/builtins/README.md
new file mode 100644
index 00000000..29cf43ee
--- /dev/null
+++ b/src/syntax_mapping/builtins/README.md
@@ -0,0 +1,116 @@
+# `/src/syntax_mapping/builtins`
+
+The files in this directory define path/name-based syntax mappings, which amend
+and take precedence over the extension/content-based syntax mappings provided by
+[syntect](https://github.com/trishume/syntect).
+
+## File organisation
+
+Each TOML file should describe the syntax mappings of a single application, or
+otherwise a set of logically-related rules.
+
+What defines "a single application" here is deliberately vague, since the
+file-splitting is purely for maintainability reasons. (Technically, we could
+just as well use a single TOML file.) So just use common sense.
+
+TOML files should reside in the corresponding subdirectory of the platform(s)
+that they intend to target. At compile time, the build script will go through
+each subdirectory that is applicable to the compilation target, collect the
+syntax mappings defined by all TOML files, and embed them into the binary.
+
+## File syntax
+
+Each TOML file should contain a single section named `mappings`, with each of
+its keys being a language identifier (first column of `bat -L`; also referred to
+as "target").
+
+The value of each key should be an array of strings, with each item being a glob
+matcher. We will call each of these items a "rule".
+
+For example, if `foo-application` uses both TOML and YAML configuration files,
+we could write something like this:
+
+```toml
+# 30-foo-application.toml
+[mappings]
+"TOML" = [
+ # rules for TOML syntax go here
+ "/usr/share/foo-application/toml-config/*.conf",
+ "/etc/foo-application/toml-config/*.conf",
+]
+"YAML" = [
+ # rules for YAML syntax go here
+ # ...
+]
+```
+
+### Dynamic environment variable replacement
+
+In additional to the standard glob matcher syntax, rules also support dynamic
+replacement of environment variables at runtime. This allows us to concisely
+handle things like [XDG](https://specifications.freedesktop.org/basedir-spec/latest/).
+
+All environment variables intended to be replaced at runtime must be enclosed in
+`${}`, for example `"/foo/*/${YOUR_ENV}-suffix/*.log"`. Note that this is the
+**only** admissible syntax; other variable substitution syntaxes are not
+supported and will either cause a compile time error, or be treated as plain
+text.
+
+For example, if `foo-application` also supports per-user configuration files, we
+could write something like this:
+
+```toml
+# 30-foo-application.toml
+[mappings]
+"TOML" = [
+ # rules for TOML syntax go here
+ "/usr/share/foo-application/toml-config/*.conf",
+ "/etc/foo-application/toml-config/*.conf",
+ "${XDG_CONFIG_HOME}/foo-application/toml-config/*.conf",
+ "${HOME}/.config/foo-application/toml-config/*.conf",
+]
+"YAML" = [
+ # rules for YAML syntax go here
+ # ...
+]
+```
+
+If any environment variable replacement in a rule fails (for example when a
+variable is unset), or if the glob string after replacements is invalid, the
+entire rule will be ignored.
+
+### Explicitly mapping to unknown
+
+Sometimes it may be necessary to "unset" a particular syntect mapping - perhaps
+a syntax's matching rules are "too greedy", and is claiming files that it should
+not. In this case, there are two special identifiers:
+`MappingTarget::MapToUnknown` and `MappingTarget::MapExtensionToUnknown`
+(corresponding to the two variants of the `syntax_mapping::MappingTarget` enum).
+
+An example of this would be `*.conf` files in general. So we may write something
+like this:
+
+```toml
+# 99-unset-ambiguous-extensions.toml
+[mappings]
+"MappingTarget::MapExtensionToUnknown" = [
+ "*.conf",
+]
+```
+
+## Ordering
+
+At compile time, all TOML files applicable to the target are processed in
+lexicographical filename order. So `00-foo.toml` takes precedence over
+`10-bar.toml`, which takes precedence over `20-baz.toml`, and so on. Note that
+**only** the filenames of the TOML files are taken into account; the
+subdirectories they are placed in have no influence on ordering.
+
+This behaviour can be occasionally useful for creating high/low priority rules,
+such as in the aforementioned example of explicitly mapping `*.conf` files to
+unknown. Generally this should not be much of a concern though, since rules
+should be written as specifically as possible for each application.
+
+Rules within each TOML file are processed (and therefore matched) in the order
+in which they are defined. At runtime, the syntax selection algorithm will
+short-circuit and return the target of the first matching rule.
diff --git a/src/syntax_mapping/builtins/bsd-family/.gitkeep b/src/syntax_mapping/builtins/bsd-family/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/bsd-family/.gitkeep
diff --git a/src/syntax_mapping/builtins/bsd-family/50-os-release.toml b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml
new file mode 100644
index 00000000..91b003d7
--- /dev/null
+++ b/src/syntax_mapping/builtins/bsd-family/50-os-release.toml
@@ -0,0 +1,2 @@
+[mappings]
+"Bourne Again Shell (bash)" = ["/etc/os-release", "/var/run/os-release"]
diff --git a/src/syntax_mapping/builtins/common/.gitkeep b/src/syntax_mapping/builtins/common/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/.gitkeep
diff --git a/src/syntax_mapping/builtins/common/50-apache.toml b/src/syntax_mapping/builtins/common/50-apache.toml
new file mode 100644
index 00000000..0e557aff
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-apache.toml
@@ -0,0 +1,2 @@
+[mappings]
+"Apache Conf" = ["httpd.conf"]
diff --git a/src/syntax_mapping/builtins/common/50-bat.toml b/src/syntax_mapping/builtins/common/50-bat.toml
new file mode 100644
index 00000000..e70b6b09
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-bat.toml
@@ -0,0 +1,2 @@
+[mappings]
+"Bourne Again Shell (bash)" = ["**/bat/config"]
diff --git a/src/syntax_mapping/builtins/common/50-container.toml b/src/syntax_mapping/builtins/common/50-container.toml
new file mode 100644
index 00000000..ad48c29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-container.toml
@@ -0,0 +1,2 @@
+[mappings]
+"Dockerfile" = ["Containerfile"]
diff --git a/src/syntax_mapping/builtins/common/50-cpp.toml b/src/syntax_mapping/builtins/common/50-cpp.toml
new file mode 100644
index 00000000..99d8a32b
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-cpp.toml
@@ -0,0 +1,6 @@
+[mappings]
+"C++" = [
+ # probably better than the default Objective C mapping #877
+ "*.h",
+]
+"YAML" = [".clang-format"]
diff --git a/src/syntax_mapping/builtins/common/50-f-sharp.toml b/src/syntax_mapping/builtins/common/50-f-sharp.toml
new file mode 100644
index 00000000..a39e7ebd
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-f-sharp.toml
@@ -0,0 +1,2 @@
+[mappings]
+"F#" = ["*.fs"]
diff --git a/src/syntax_mapping/builtins/common/50-git.toml b/src/syntax_mapping/builtins/common/50-git.toml
new file mode 100644
index 00000000..44a49a25
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-git.toml
@@ -0,0 +1,10 @@
+# Global git config files rooted in `$XDG_CONFIG_HOME/git/` or `$HOME/.config/git/`
+# See e.g. https://git-scm.com/docs/git-config#FILES
+
+[mappings]
+"Git Config" = ["${XDG_CONFIG_HOME}/git/config", "${HOME}/.config/git/config"]
+"Git Ignore" = ["${XDG_CONFIG_HOME}/git/ignore", "${HOME}/.config/git/ignore"]
+"Git Attributes" = [
+ "${XDG_CONFIG_HOME}/git/attributes",
+ "${HOME}/.config/git/attributes",
+]
diff --git a/src/syntax_mapping/builtins/common/50-jsonl.toml b/src/syntax_mapping/builtins/common/50-jsonl.toml
new file mode 100644
index 00000000..4b70a4d0
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-jsonl.toml
@@ -0,0 +1,3 @@
+# JSON Lines is a simple variation of JSON #2535
+[mappings]
+"JSON" = ["*.jsonl"]
diff --git a/src/syntax_mapping/builtins/common/50-nginx.toml b/src/syntax_mapping/builtins/common/50-nginx.toml
new file mode 100644
index 00000000..305418bb
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-nginx.toml
@@ -0,0 +1,2 @@
+[mappings]
+"nginx" = ["nginx.conf", "mime.types"]
diff --git a/src/syntax_mapping/builtins/common/50-nmap.toml b/src/syntax_mapping/builtins/common/50-nmap.toml
new file mode 100644
index 00000000..f79a5e97
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-nmap.toml
@@ -0,0 +1,3 @@
+[mappings]
+# See #2151, https://nmap.org/book/nse-language.html
+"Lua" = ["*.nse"]
diff --git a/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml
new file mode 100644
index 00000000..70e51c92
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-proxy-auto-config.toml
@@ -0,0 +1,3 @@
+# 1515
+[mappings]
+"JavaScript (Babel)" = ["*.pac"]
diff --git a/src/syntax_mapping/builtins/common/50-ron.toml b/src/syntax_mapping/builtins/common/50-ron.toml
new file mode 100644
index 00000000..bc04221b
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-ron.toml
@@ -0,0 +1,3 @@
+# Rusty Object Notation #2427
+[mappings]
+"Rust" = ["*.ron"]
diff --git a/src/syntax_mapping/builtins/common/50-sarif.toml b/src/syntax_mapping/builtins/common/50-sarif.toml
new file mode 100644
index 00000000..2542b9cd
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-sarif.toml
@@ -0,0 +1,3 @@
+# SARIF is a format for reporting static analysis results #2695
+[mappings]
+"JSON" = ["*.sarif"]
diff --git a/src/syntax_mapping/builtins/common/50-ssh.toml b/src/syntax_mapping/builtins/common/50-ssh.toml
new file mode 100644
index 00000000..6ec24050
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/50-ssh.toml
@@ -0,0 +1,2 @@
+[mappings]
+"SSH Config" = ["**/.ssh/config"]
diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml
new file mode 100644
index 00000000..d87537d7
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-extensions.toml
@@ -0,0 +1,5 @@
+[mappings]
+"MappingTarget::MapExtensionToUnknown" = [
+ # common extension used for all kinds of formats
+ "*.conf",
+]
diff --git a/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml
new file mode 100644
index 00000000..21941ebc
--- /dev/null
+++ b/src/syntax_mapping/builtins/common/99-unset-ambiguous-filenames.toml
@@ -0,0 +1,7 @@
+[mappings]
+"MappingTarget::MapToUnknown" = [
+ # "NAnt Build File" should only match *.build files, not files named "build"
+ "build",
+ # "bin/rails" scripts in a Ruby project misidentified as HTML (Rails) #1008
+ "rails",
+]
diff --git a/src/syntax_mapping/builtins/linux/.gitkeep b/src/syntax_mapping/builtins/linux/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/linux/.gitkeep
diff --git a/src/syntax_mapping/builtins/linux/50-os-release.toml b/src/syntax_mapping/builtins/linux/50-os-release.toml
new file mode 100644
index 00000000..791599aa
--- /dev/null
+++ b/src/syntax_mapping/builtins/linux/50-os-release.toml
@@ -0,0 +1,7 @@
+[mappings]
+"Bourne Again Shell (bash)" = [
+ "/etc/os-release",
+ "/usr/lib/os-release",
+ "/etc/initrd-release",
+ "/usr/lib/extension-release.d/extension-release.*",
+]
diff --git a/src/syntax_mapping/builtins/linux/50-pacman.toml b/src/syntax_mapping/builtins/linux/50-pacman.toml
new file mode 100644
index 00000000..655118c5
--- /dev/null
+++ b/src/syntax_mapping/builtins/linux/50-pacman.toml
@@ -0,0 +1,3 @@
+[mappings]
+# pacman hooks
+"INI" = ["/usr/share/libalpm/hooks/*.hook", "/etc/pacman.d/hooks/*.hook"]
diff --git a/src/syntax_mapping/builtins/linux/50-systemd.toml b/src/syntax_mapping/builtins/linux/50-systemd.toml
new file mode 100644
index 00000000..6f91b0be
--- /dev/null
+++ b/src/syntax_mapping/builtins/linux/50-systemd.toml
@@ -0,0 +1,21 @@
+[mappings]
+"INI" = [
+ "**/systemd/**/*.conf",
+ "**/systemd/**/*.example",
+ "*.automount",
+ "*.device",
+ "*.dnssd",
+ "*.link",
+ "*.mount",
+ "*.netdev",
+ "*.network",
+ "*.nspawn",
+ "*.path",
+ "*.service",
+ "*.scope",
+ "*.slice",
+ "*.socket",
+ "*.swap",
+ "*.target",
+ "*.timer",
+]
diff --git a/src/syntax_mapping/builtins/macos/.gitkeep b/src/syntax_mapping/builtins/macos/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/macos/.gitkeep
diff --git a/src/syntax_mapping/builtins/unix-family/.gitkeep b/src/syntax_mapping/builtins/unix-family/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/.gitkeep
diff --git a/src/syntax_mapping/builtins/unix-family/50-apache.toml b/src/syntax_mapping/builtins/unix-family/50-apache.toml
new file mode 100644
index 00000000..dfb920f3
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/50-apache.toml
@@ -0,0 +1,2 @@
+[mappings]
+"Apache Conf" = ["/etc/apache2/**/*.conf", "/etc/apache2/sites-*/**/*"]
diff --git a/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml
new file mode 100644
index 00000000..f2a9e224
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/50-fish-shell.toml
@@ -0,0 +1,2 @@
+[mappings]
+"YAML" = ["fish_history"]
diff --git a/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml
new file mode 100644
index 00000000..6c788d1d
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/50-korn-shell.toml
@@ -0,0 +1,3 @@
+# KornShell is backward-compatible with the Bourne shell #2633
+[mappings]
+"Bourne Again Shell (bash)" = ["*.ksh"]
diff --git a/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml
new file mode 100644
index 00000000..c798358c
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/50-mail-spool.toml
@@ -0,0 +1,2 @@
+[mappings]
+"Email" = ["/var/spool/mail/*", "/var/mail/*"]
diff --git a/src/syntax_mapping/builtins/unix-family/50-nginx.toml b/src/syntax_mapping/builtins/unix-family/50-nginx.toml
new file mode 100644
index 00000000..580b65d8
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/50-nginx.toml
@@ -0,0 +1,2 @@
+[mappings]
+"nginx" = ["/etc/nginx/**/*.conf", "/etc/nginx/sites-*/**/*"]
diff --git a/src/syntax_mapping/builtins/unix-family/50-shell.toml b/src/syntax_mapping/builtins/unix-family/50-shell.toml
new file mode 100644
index 00000000..d015ca81
--- /dev/null
+++ b/src/syntax_mapping/builtins/unix-family/50-shell.toml
@@ -0,0 +1,5 @@
+[mappings]
+"Bourne Again Shell (bash)" = [
+ # used by lots of shells
+ "/etc/profile",
+]
diff --git a/src/syntax_mapping/builtins/windows/.gitkeep b/src/syntax_mapping/builtins/windows/.gitkeep
new file mode 100644
index 00000000..e69de29b
--- /dev/null
+++ b/src/syntax_mapping/builtins/windows/.gitkeep