summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorSharif Haason <ssh128@scarletmail.rutgers.edu>2023-06-13 14:47:44 -0400
committerSharif Haason <ssh128@scarletmail.rutgers.edu>2023-12-10 13:21:23 -0500
commit6a230d34a4681aebfe90a738a253dca22aea694b (patch)
tree2dacb5ab77dde1b7d44ae092950303ef0dfca1b8 /src
parent663e4c19ffb79ff12f5e336cda1d064d0e8fb623 (diff)
Implement character table control and codepage 437 option
Diffstat (limited to 'src')
-rw-r--r--src/lib.rs72
-rw-r--r--src/main.rs25
2 files changed, 86 insertions, 11 deletions
diff --git a/src/lib.rs b/src/lib.rs
index 1c0c62e..e52e182 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -21,6 +21,29 @@ const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes();
const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes();
const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes();
+#[rustfmt::skip]
+const CP437: [char; 256] = [
+ // use https://en.wikipedia.org/w/index.php?title=Code_page_437&oldid=978947122
+ // not ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT
+ // because we want the graphic versions of 01h–1Fh + 7Fh
+ '⋄','☺','☻','♥','♦','♣','♠','•','◘','○','◙','♂','♀','♪','♫','☼',
+ '►','◄','↕','‼','¶','§','▬','↨','↑','↓','→','←','∟','↔','▲','▼',
+ ' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/',
+ '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?',
+ '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
+ 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_',
+ '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o',
+ 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','⌂',
+ 'Ç','ü','é','â','ä','à','å','ç','ê','ë','è','ï','î','ì','Ä','Å',
+ 'É','æ','Æ','ô','ö','ò','û','ù','ÿ','Ö','Ü','¢','£','¥','₧','ƒ',
+ 'á','í','ó','ú','ñ','Ñ','ª','º','¿','⌐','¬','½','¼','¡','«','»',
+ '░','▒','▓','│','┤','╡','╢','╖','╕','╣','║','╗','╝','╜','╛','┐',
+ '└','┴','┬','├','─','┼','╞','╟','╚','╔','╩','╦','╠','═','╬','╧',
+ '╨','╤','╥','╙','╘','╒','╓','╫','╪','┘','┌','█','▄','▌','▐','▀',
+ 'α','ß','Γ','π','Σ','σ','µ','τ','Φ','Θ','Ω','δ','∞','φ','ε','∩',
+ '≡','±','≥','≤','⌠','⌡','÷','≈','°','∙','·','√','ⁿ','²','■',' ',
+];
+
#[derive(Copy, Clone)]
pub enum ByteCategory {
Null,
@@ -31,6 +54,12 @@ pub enum ByteCategory {
}
#[derive(Copy, Clone)]
+pub enum CharTable {
+ AsciiOnly,
+ CP437,
+}
+
+#[derive(Copy, Clone)]
pub enum Endianness {
Little,
Big,
@@ -74,17 +103,27 @@ impl Byte {
}
}
- fn as_char(self) -> char {
+ fn as_char(self, char_table: CharTable) -> char {
use crate::ByteCategory::*;
-
- match self.category() {
- Null => '⋄',
- AsciiPrintable => self.0 as char,
- AsciiWhitespace if self.0 == 0x20 => ' ',
- AsciiWhitespace => '_',
- AsciiOther => '•',
- NonAscii => '×',
+ match char_table {
+ CharTable::AsciiOnly => match self.category() {
+ Null => '⋄',
+ AsciiPrintable => self.0 as char,
+ AsciiWhitespace if self.0 == 0x20 => ' ',
+ AsciiWhitespace => '_',
+ AsciiOther => '•',
+ NonAscii => '×',
+ },
+ CharTable::CP437 => CP437[self.0.to_ne_bytes()[0] as usize],
}
+ // match self.category() {
+ // Null => '⋄',
+ // AsciiPrintable => self.0 as char,
+ // AsciiWhitespace if self.0 == 0x20 => ' ',
+ // AsciiWhitespace => '_',
+ // AsciiOther => '•',
+ // NonAscii => '×',
+ // }
}
}
@@ -167,6 +206,7 @@ pub struct PrinterBuilder<'a, Writer: Write> {
group_size: u8,
base: Base,
endianness: Endianness,
+ char_table: CharTable,
}
impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
@@ -182,6 +222,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
group_size: 1,
base: Base::Hexadecimal,
endianness: Endianness::Big,
+ char_table: CharTable::AsciiOnly,
}
}
@@ -230,6 +271,11 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
self
}
+ pub fn char_table(mut self, char_table: CharTable) -> Self {
+ self.char_table = char_table;
+ self
+ }
+
pub fn build(self) -> Printer<'a, Writer> {
Printer::new(
self.writer,
@@ -242,6 +288,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> {
self.group_size,
self.base,
self.endianness,
+ self.char_table,
)
}
}
@@ -285,6 +332,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
group_size: u8,
base: Base,
endianness: Endianness,
+ char_table: CharTable,
) -> Printer<'a, Writer> {
Printer {
idx: 0,
@@ -304,7 +352,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> {
})
.collect(),
byte_char_panel: (0u8..=u8::MAX)
- .map(|i| format!("{}", Byte(i).as_char()))
+ .map(|i| format!("{}", Byte(i).as_char(char_table)))
.collect(),
byte_hex_panel_g: (0u8..=u8::MAX).map(|i| format!("{i:02x}")).collect(),
squeezer: if use_squeeze {
@@ -732,6 +780,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
+ CharTable::AsciiOnly,
);
printer.print_all(input).unwrap();
@@ -787,6 +836,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
+ CharTable::AsciiOnly,
);
printer.display_offset(0xdeadbeef);
@@ -821,6 +871,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
+ CharTable::AsciiOnly,
);
printer.print_all(input).unwrap();
@@ -881,6 +932,7 @@ mod tests {
1,
Base::Hexadecimal,
Endianness::Big,
+ CharTable::AsciiOnly,
);
printer.print_all(input).unwrap();
diff --git a/src/main.rs b/src/main.rs
index 5e022f3..02ecf0b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -17,7 +17,7 @@ use thiserror::Error as ThisError;
use terminal_size::terminal_size;
-use hexyl::{Base, BorderStyle, Endianness, Input, PrinterBuilder};
+use hexyl::{Base, BorderStyle, CharTable, Endianness, Input, PrinterBuilder};
#[cfg(test)]
mod tests;
@@ -213,6 +213,17 @@ fn run() -> Result<()> {
.help("An alias for '--endianness=little'."),
)
.arg(
+ Arg::new("character_table")
+ .long("character_table")
+ .value_name("FORMAT")
+ .value_parser(["codepage-437", "ascii-only"])
+ .help(
+ "The character table that should be used. 'ascii-only' \
+ will show dots for non-ASCII characters, and 'codepage-437 \
+ will use Code page 437 for those characters."
+ ),
+ )
+ .arg(
Arg::new("base")
.short('b')
.long("base")
@@ -469,6 +480,17 @@ fn run() -> Result<()> {
("big", _) => Endianness::Big,
_ => unreachable!(),
};
+
+ let char_table = match matches
+ .get_one::<String>("character_table")
+ .unwrap_or(&String::from("ascii-only"))
+ .as_ref()
+ {
+ "ascii-only" => CharTable::AsciiOnly,
+ "codepage-437" => CharTable::CP437,
+ _ => unreachable!(),
+ };
+
let stdout = io::stdout();
let mut stdout_lock = BufWriter::new(stdout.lock());
@@ -482,6 +504,7 @@ fn run() -> Result<()> {
.group_size(group_size)
.with_base(base)
.endianness(endianness)
+ .char_table(char_table)
.build();
printer.display_offset(skip_offset + display_offset);
printer.print_all(&mut reader).map_err(|e| anyhow!(e))?;