summaryrefslogtreecommitdiffstats
path: root/build.rs
diff options
context:
space:
mode:
authorManos Pitsidianakis <el13635@mail.ntua.gr>2019-08-31 15:37:46 +0300
committerManos Pitsidianakis <el13635@mail.ntua.gr>2019-08-31 15:37:46 +0300
commit31bf144ecdfa3c1da9e38e1f6338329aab996680 (patch)
treeb0aaaa045905bb78adc6e60af16b867e93c4e371 /build.rs
Initial commit
Diffstat (limited to 'build.rs')
-rw-r--r--build.rs73
1 files changed, 73 insertions, 0 deletions
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..b77aa40
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,73 @@
+const LINE_BREAK_TABLE_URL: &str = "http://www.unicode.org/Public/UCD/latest/ucd/LineBreak.txt";
+use std::fs::File;
+use std::io::prelude::*;
+use std::io::BufReader;
+use std::path::PathBuf;
+use std::process::Command;
+
+include!("src/ui/text_processing/types.rs");
+
+fn main() -> Result<(), std::io::Error> {
+ let mod_path = PathBuf::from("src/ui/text_processing/tables.rs");
+ if mod_path.exists() {
+ eprintln!(
+ "{} already exists, delete it if you want to replace it.",
+ mod_path.display()
+ );
+ std::process::exit(0);
+ }
+ let mut tmpdir_path = PathBuf::from(
+ std::str::from_utf8(&Command::new("mktemp").arg("-d").output()?.stdout)
+ .unwrap()
+ .trim(),
+ );
+ tmpdir_path.push("LineBreak.txt");
+ Command::new("curl")
+ .args(&["-o", tmpdir_path.to_str().unwrap(), LINE_BREAK_TABLE_URL])
+ .output()?;
+
+ let file = File::open(&tmpdir_path)?;
+ let buf_reader = BufReader::new(file);
+
+ let mut line_break_table: Vec<(u32, u32, LineBreakClass)> = Vec::with_capacity(3800);
+ for line in buf_reader.lines() {
+ let line = line.unwrap();
+ if line.starts_with('#') || line.starts_with(' ') || line.is_empty() {
+ continue;
+ }
+ let tokens: &str = line.split_whitespace().next().unwrap();
+
+ let semicolon_idx: usize = tokens.chars().position(|c| c == ';').unwrap();
+ /* LineBreak.txt list is ascii encoded so we can assume each char takes one byte: */
+ let chars_str: &str = &tokens[..semicolon_idx];
+
+ let mut codepoint_iter = chars_str.split("..");
+
+ let first_codepoint: u32 =
+ u32::from_str_radix(std::dbg!(codepoint_iter.next().unwrap()), 16).unwrap();
+
+ let sec_codepoint: u32 = codepoint_iter
+ .next()
+ .map(|v| u32::from_str_radix(std::dbg!(v), 16).unwrap())
+ .unwrap_or(first_codepoint);
+ let class = &tokens[semicolon_idx + 1..semicolon_idx + 1 + 2];
+ line_break_table.push((first_codepoint, sec_codepoint, LineBreakClass::from(class)));
+ }
+
+ let mut file = File::create(&mod_path)?;
+ file.write_all(b"use crate::types::LineBreakClass::*;\n")
+ .unwrap();
+ file.write_all(b"use crate::types::LineBreakClass;\n\n")
+ .unwrap();
+ file.write_all(b"const line_break_rules: &'static [(u32, u32, LineBreakClass)] = &[\n")
+ .unwrap();
+ for l in &line_break_table {
+ file.write_all(format!(" (0x{:X}, 0x{:X}, {:?}),\n", l.0, l.1, l.2).as_bytes())
+ .unwrap();
+ }
+ file.write_all(b"];").unwrap();
+ std::fs::remove_file(&tmpdir_path).unwrap();
+ tmpdir_path.pop();
+ std::fs::remove_dir(&tmpdir_path).unwrap();
+ Ok(())
+}