summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Nordholts <enselic@gmail.com>2021-09-15 07:59:33 +0200
committerGitHub <noreply@github.com>2021-09-15 07:59:33 +0200
commite84b702309471e31621cf0bc06f8754511a5cbbc (patch)
tree0418911d3f6315562394096e67a0836a9246d9b0
parent6226eba52a05f53026abb927500629074279ef51 (diff)
Extract some private submodules from 'bat::assets' (#1850)
-rw-r--r--src/assets.rs142
-rw-r--r--src/assets/assets_metadata.rs (renamed from src/assets_metadata.rs)0
-rw-r--r--src/assets/build_assets.rs (renamed from src/build_assets.rs)3
-rw-r--r--src/assets/ignored_suffixes.rs42
-rw-r--r--src/assets/minimal_assets.rs72
-rw-r--r--src/assets/serialized_syntax_set.rs27
-rw-r--r--src/lib.rs6
7 files changed, 166 insertions, 126 deletions
diff --git a/src/assets.rs b/src/assets.rs
index 8f794483..3f844bc4 100644
--- a/src/assets.rs
+++ b/src/assets.rs
@@ -1,7 +1,6 @@
-use std::collections::HashMap;
use std::ffi::OsStr;
use std::fs;
-use std::path::{Path, PathBuf};
+use std::path::Path;
use lazycell::LazyCell;
@@ -15,17 +14,26 @@ use crate::error::*;
use crate::input::{InputReader, OpenedInput, OpenedInputKind};
use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
+use ignored_suffixes::*;
+use minimal_assets::*;
+use serialized_syntax_set::*;
+
+#[cfg(feature = "build-assets")]
+pub use crate::assets::build_assets::*;
+
+pub(crate) mod assets_metadata;
+#[cfg(feature = "build-assets")]
+mod build_assets;
+mod ignored_suffixes;
+mod minimal_assets;
+mod serialized_syntax_set;
+
#[derive(Debug)]
pub struct HighlightingAssets {
syntax_set_cell: LazyCell<SyntaxSet>,
serialized_syntax_set: SerializedSyntaxSet,
- minimal_syntaxes: MinimalSyntaxes,
-
- /// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
- /// index in this vec matches the index in
- /// [Self.minimal_syntaxes.serialized_syntax_sets]
- deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
+ minimal_assets: MinimalAssets,
theme_set: ThemeSet,
fallback_theme: Option<&'static str>,
@@ -37,22 +45,6 @@ pub struct SyntaxReferenceInSet<'a> {
pub syntax_set: &'a SyntaxSet,
}
-/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
-/// stored in serialized form, and are deserialized on-demand. This gives good
-/// startup performance since only the necessary [SyntaxReference]s needs to be
-/// deserialized.
-#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
-pub(crate) struct MinimalSyntaxes {
- /// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
- /// name of any [SyntaxReference] inside the [SyntaxSet]
- /// (We will later add `by_extension`, `by_first_line`, etc.)
- pub(crate) by_name: HashMap<String, usize>,
-
- /// Serialized [SyntaxSet]s. Whether or not this data is compressed is
- /// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
- pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
-}
-
// Compress for size of ~700 kB instead of ~4600 kB at the cost of ~30% longer deserialization time
pub(crate) const COMPRESS_SYNTAXES: bool = true;
@@ -70,41 +62,16 @@ pub(crate) const COMPRESS_SERIALIZED_MINIMAL_SYNTAXES: bool = true;
// efficient byte-by-byte copy of `serialized_syntax_sets`.
pub(crate) const COMPRESS_MINIMAL_SYNTAXES: bool = false;
-const IGNORED_SUFFIXES: [&str; 13] = [
- // Editor etc backups
- "~",
- ".bak",
- ".old",
- ".orig",
- // Debian and derivatives apt/dpkg/ucf backups
- ".dpkg-dist",
- ".dpkg-old",
- ".ucf-dist",
- ".ucf-new",
- ".ucf-old",
- // Red Hat and derivatives rpm backups
- ".rpmnew",
- ".rpmorig",
- ".rpmsave",
- // Build system input/template files
- ".in",
-];
-
impl HighlightingAssets {
fn new(
serialized_syntax_set: SerializedSyntaxSet,
minimal_syntaxes: MinimalSyntaxes,
theme_set: ThemeSet,
) -> Self {
- // Prepare so we can lazily load minimal syntaxes without a mut reference
- let deserialized_minimal_syntaxes =
- vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
-
HighlightingAssets {
syntax_set_cell: LazyCell::new(),
serialized_syntax_set,
- deserialized_minimal_syntaxes,
- minimal_syntaxes,
+ minimal_assets: MinimalAssets::new(minimal_syntaxes),
theme_set,
fallback_theme: None,
}
@@ -167,37 +134,12 @@ impl HighlightingAssets {
/// tries to find a minimal [SyntaxSet]. If none is found, returns the
/// [SyntaxSet] that contains all syntaxes.
fn get_syntax_set_by_name(&self, name: &str) -> Result<&SyntaxSet> {
- let minimal_syntax_set = self
- .minimal_syntaxes
- .by_name
- .get(&name.to_ascii_lowercase())
- .and_then(|index| self.get_minimal_syntax_set_with_index(*index));
-
- match minimal_syntax_set {
+ match self.minimal_assets.get_syntax_set_by_name(name) {
Some(syntax_set) => Ok(syntax_set),
None => self.get_syntax_set(),
}
}
- fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
- let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
- asset_from_contents(
- &serialized_syntax_set[..],
- &format!("minimal syntax set {}", index),
- COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
- )
- .map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
- }
-
- fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
- self.deserialized_minimal_syntaxes
- .get(index)
- .and_then(|cell| {
- cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
- .ok()
- })
- }
-
/// Use [Self::get_syntax_for_file_name] instead
#[deprecated]
pub fn syntax_for_file_name(
@@ -319,7 +261,9 @@ impl HighlightingAssets {
syntax = self.find_syntax_by_file_name_extension(file_name)?;
}
if syntax.is_none() {
- syntax = self.get_extension_syntax_with_stripped_suffix(file_name)?;
+ syntax = try_with_stripped_suffix(file_name, |stripped_file_name| {
+ self.get_extension_syntax(stripped_file_name) // Note: recursion
+ })?;
}
Ok(syntax)
}
@@ -340,25 +284,6 @@ impl HighlightingAssets {
)
}
- /// If we find an ignored suffix on the file name, e.g. '~', we strip it and
- /// then try again to find a syntax without it. Note that we do this recursively.
- fn get_extension_syntax_with_stripped_suffix(
- &self,
- file_name: &OsStr,
- ) -> Result<Option<SyntaxReferenceInSet>> {
- let file_path = Path::new(file_name);
- let mut syntax = None;
- if let Some(file_str) = file_path.to_str() {
- for suffix in &IGNORED_SUFFIXES {
- if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
- syntax = self.get_extension_syntax(OsStr::new(stripped_filename))?;
- break;
- }
- }
- }
- Ok(syntax)
- }
-
fn get_first_line_syntax(
&self,
reader: &mut InputReader,
@@ -371,31 +296,6 @@ impl HighlightingAssets {
}
}
-#[cfg(feature = "build-assets")]
-pub use crate::build_assets::build_assets as build;
-
-/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
-/// We keep it in this format since we want to load it lazily.
-#[derive(Debug)]
-enum SerializedSyntaxSet {
- /// The data comes from a user-generated cache file.
- FromFile(PathBuf),
-
- /// The data to use is embedded into the bat binary.
- FromBinary(&'static [u8]),
-}
-
-impl SerializedSyntaxSet {
- fn deserialize(&self) -> Result<SyntaxSet> {
- match self {
- SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
- SerializedSyntaxSet::FromFile(ref path) => {
- asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
- }
- }
- }
-}
-
pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
include_bytes!("../assets/syntaxes.bin")
}
diff --git a/src/assets_metadata.rs b/src/assets/assets_metadata.rs
index 5dc2dd2c..5dc2dd2c 100644
--- a/src/assets_metadata.rs
+++ b/src/assets/assets_metadata.rs
diff --git a/src/build_assets.rs b/src/assets/build_assets.rs
index 75e6d5bc..e88890c5 100644
--- a/src/build_assets.rs
+++ b/src/assets/build_assets.rs
@@ -7,7 +7,6 @@ use syntect::parsing::syntax_definition::{
use syntect::parsing::{Scope, SyntaxSet, SyntaxSetBuilder};
use crate::assets::*;
-use crate::error::*;
type SyntaxName = String;
@@ -27,7 +26,7 @@ enum Dependency {
ByScope(Scope),
}
-pub fn build_assets(
+pub fn build(
source_dir: &Path,
include_integrated_assets: bool,
target_dir: &Path,
diff --git a/src/assets/ignored_suffixes.rs b/src/assets/ignored_suffixes.rs
new file mode 100644
index 00000000..f653e3a0
--- /dev/null
+++ b/src/assets/ignored_suffixes.rs
@@ -0,0 +1,42 @@
+use std::ffi::OsStr;
+use std::path::Path;
+
+use crate::error::*;
+
+const IGNORED_SUFFIXES: [&str; 13] = [
+ // Editor etc backups
+ "~",
+ ".bak",
+ ".old",
+ ".orig",
+ // Debian and derivatives apt/dpkg/ucf backups
+ ".dpkg-dist",
+ ".dpkg-old",
+ ".ucf-dist",
+ ".ucf-new",
+ ".ucf-old",
+ // Red Hat and derivatives rpm backups
+ ".rpmnew",
+ ".rpmorig",
+ ".rpmsave",
+ // Build system input/template files
+ ".in",
+];
+
+/// If we find an ignored suffix on the file name, e.g. '~', we strip it and
+/// then try again without it.
+pub fn try_with_stripped_suffix<T, F>(file_name: &OsStr, func: F) -> Result<Option<T>>
+where
+ F: Fn(&OsStr) -> Result<Option<T>>,
+{
+ let mut from_stripped = None;
+ if let Some(file_str) = Path::new(file_name).to_str() {
+ for suffix in &IGNORED_SUFFIXES {
+ if let Some(stripped_filename) = file_str.strip_suffix(suffix) {
+ from_stripped = func(OsStr::new(stripped_filename))?;
+ break;
+ }
+ }
+ }
+ Ok(from_stripped)
+}
diff --git a/src/assets/minimal_assets.rs b/src/assets/minimal_assets.rs
new file mode 100644
index 00000000..6bd33d08
--- /dev/null
+++ b/src/assets/minimal_assets.rs
@@ -0,0 +1,72 @@
+use std::collections::HashMap;
+
+use lazycell::LazyCell;
+
+use syntect::parsing::SyntaxSet;
+
+use super::*;
+
+#[derive(Debug)]
+pub(crate) struct MinimalAssets {
+ minimal_syntaxes: MinimalSyntaxes,
+
+ /// Lazily load serialized [SyntaxSet]s from [Self.minimal_syntaxes]. The
+ /// index in this vec matches the index in
+ /// [Self.minimal_syntaxes.serialized_syntax_sets]
+ deserialized_minimal_syntaxes: Vec<LazyCell<SyntaxSet>>,
+}
+
+/// Stores and allows lookup of minimal [SyntaxSet]s. The [SyntaxSet]s are
+/// stored in serialized form, and are deserialized on-demand. This gives good
+/// startup performance since only the necessary [SyntaxReference]s needs to be
+/// deserialized.
+#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
+pub(crate) struct MinimalSyntaxes {
+ /// Lookup the index into `serialized_syntax_sets` of a [SyntaxSet] by the
+ /// name of any [SyntaxReference] inside the [SyntaxSet]
+ /// (We will later add `by_extension`, `by_first_line`, etc.)
+ pub(crate) by_name: HashMap<String, usize>,
+
+ /// Serialized [SyntaxSet]s. Whether or not this data is compressed is
+ /// decided by [COMPRESS_SERIALIZED_MINIMAL_SYNTAXES]
+ pub(crate) serialized_syntax_sets: Vec<Vec<u8>>,
+}
+
+impl MinimalAssets {
+ pub(crate) fn new(minimal_syntaxes: MinimalSyntaxes) -> Self {
+ // Prepare so we can lazily load minimal syntaxes without a mut reference
+ let deserialized_minimal_syntaxes =
+ vec![LazyCell::new(); minimal_syntaxes.serialized_syntax_sets.len()];
+
+ Self {
+ minimal_syntaxes,
+ deserialized_minimal_syntaxes,
+ }
+ }
+
+ pub fn get_syntax_set_by_name(&self, name: &str) -> Option<&SyntaxSet> {
+ self.minimal_syntaxes
+ .by_name
+ .get(&name.to_ascii_lowercase())
+ .and_then(|index| self.get_minimal_syntax_set_with_index(*index))
+ }
+
+ fn load_minimal_syntax_set_with_index(&self, index: usize) -> Result<SyntaxSet> {
+ let serialized_syntax_set = &self.minimal_syntaxes.serialized_syntax_sets[index];
+ asset_from_contents(
+ &serialized_syntax_set[..],
+ &format!("minimal syntax set {}", index),
+ COMPRESS_SERIALIZED_MINIMAL_SYNTAXES,
+ )
+ .map_err(|_| format!("Could not parse minimal syntax set {}", index).into())
+ }
+
+ fn get_minimal_syntax_set_with_index(&self, index: usize) -> Option<&SyntaxSet> {
+ self.deserialized_minimal_syntaxes
+ .get(index)
+ .and_then(|cell| {
+ cell.try_borrow_with(|| self.load_minimal_syntax_set_with_index(index))
+ .ok()
+ })
+ }
+}
diff --git a/src/assets/serialized_syntax_set.rs b/src/assets/serialized_syntax_set.rs
new file mode 100644
index 00000000..46099e32
--- /dev/null
+++ b/src/assets/serialized_syntax_set.rs
@@ -0,0 +1,27 @@
+use std::path::PathBuf;
+
+use syntect::parsing::SyntaxSet;
+
+use super::*;
+
+/// A SyntaxSet in serialized form, i.e. bincoded and flate2 compressed.
+/// We keep it in this format since we want to load it lazily.
+#[derive(Debug)]
+pub enum SerializedSyntaxSet {
+ /// The data comes from a user-generated cache file.
+ FromFile(PathBuf),
+
+ /// The data to use is embedded into the bat binary.
+ FromBinary(&'static [u8]),
+}
+
+impl SerializedSyntaxSet {
+ pub fn deserialize(&self) -> Result<SyntaxSet> {
+ match self {
+ SerializedSyntaxSet::FromBinary(data) => Ok(from_binary(data, COMPRESS_SYNTAXES)),
+ SerializedSyntaxSet::FromFile(ref path) => {
+ asset_from_cache(path, "syntax set", COMPRESS_SYNTAXES)
+ }
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 950d0967..86bb5804 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -22,9 +22,9 @@
mod macros;
pub mod assets;
-pub mod assets_metadata;
-#[cfg(feature = "build-assets")]
-mod build_assets;
+pub mod assets_metadata {
+ pub use super::assets::assets_metadata::*;
+}
pub mod config;
pub mod controller;
mod decorations;