diff options
author | Sharif Haason <ssh128@scarletmail.rutgers.edu> | 2023-06-13 14:47:44 -0400 |
---|---|---|
committer | Sharif Haason <ssh128@scarletmail.rutgers.edu> | 2023-12-10 13:21:23 -0500 |
commit | 6a230d34a4681aebfe90a738a253dca22aea694b (patch) | |
tree | 2dacb5ab77dde1b7d44ae092950303ef0dfca1b8 /src | |
parent | 663e4c19ffb79ff12f5e336cda1d064d0e8fb623 (diff) |
Implement character table control and codepage 437 option
Diffstat (limited to 'src')
-rw-r--r-- | src/lib.rs | 72 | ||||
-rw-r--r-- | src/main.rs | 25 |
2 files changed, 86 insertions, 11 deletions
@@ -21,6 +21,29 @@ const COLOR_ASCII_OTHER: &[u8] = colors::Green::ANSI_FG.as_bytes(); const COLOR_NONASCII: &[u8] = colors::Yellow::ANSI_FG.as_bytes(); const COLOR_RESET: &[u8] = colors::Default::ANSI_FG.as_bytes(); +#[rustfmt::skip] +const CP437: [char; 256] = [ + // use https://en.wikipedia.org/w/index.php?title=Code_page_437&oldid=978947122 + // not ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT + // because we want the graphic versions of 01h–1Fh + 7Fh + '⋄','☺','☻','♥','♦','♣','♠','•','◘','○','◙','♂','♀','♪','♫','☼', + '►','◄','↕','‼','¶','§','▬','↨','↑','↓','→','←','∟','↔','▲','▼', + ' ','!','"','#','$','%','&','\'','(',')','*','+',',','-','.','/', + '0','1','2','3','4','5','6','7','8','9',':',';','<','=','>','?', + '@','A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', + 'P','Q','R','S','T','U','V','W','X','Y','Z','[','\\',']','^','_', + '`','a','b','c','d','e','f','g','h','i','j','k','l','m','n','o', + 'p','q','r','s','t','u','v','w','x','y','z','{','|','}','~','⌂', + 'Ç','ü','é','â','ä','à','å','ç','ê','ë','è','ï','î','ì','Ä','Å', + 'É','æ','Æ','ô','ö','ò','û','ù','ÿ','Ö','Ü','¢','£','¥','₧','ƒ', + 'á','í','ó','ú','ñ','Ñ','ª','º','¿','⌐','¬','½','¼','¡','«','»', + '░','▒','▓','│','┤','╡','╢','╖','╕','╣','║','╗','╝','╜','╛','┐', + '└','┴','┬','├','─','┼','╞','╟','╚','╔','╩','╦','╠','═','╬','╧', + '╨','╤','╥','╙','╘','╒','╓','╫','╪','┘','┌','█','▄','▌','▐','▀', + 'α','ß','Γ','π','Σ','σ','µ','τ','Φ','Θ','Ω','δ','∞','φ','ε','∩', + '≡','±','≥','≤','⌠','⌡','÷','≈','°','∙','·','√','ⁿ','²','■',' ', +]; + #[derive(Copy, Clone)] pub enum ByteCategory { Null, @@ -31,6 +54,12 @@ pub enum ByteCategory { } #[derive(Copy, Clone)] +pub enum CharTable { + AsciiOnly, + CP437, +} + +#[derive(Copy, Clone)] pub enum Endianness { Little, Big, @@ -74,17 +103,27 @@ impl Byte { } } - fn as_char(self) -> char { + fn as_char(self, char_table: CharTable) -> char { use crate::ByteCategory::*; - - match self.category() { - Null => '⋄', - AsciiPrintable => self.0 as char, - AsciiWhitespace if self.0 == 0x20 => ' ', - AsciiWhitespace => '_', - AsciiOther => '•', - NonAscii => '×', + match char_table { + CharTable::AsciiOnly => match self.category() { + Null => '⋄', + AsciiPrintable => self.0 as char, + AsciiWhitespace if self.0 == 0x20 => ' ', + AsciiWhitespace => '_', + AsciiOther => '•', + NonAscii => '×', + }, + CharTable::CP437 => CP437[self.0.to_ne_bytes()[0] as usize], } + // match self.category() { + // Null => '⋄', + // AsciiPrintable => self.0 as char, + // AsciiWhitespace if self.0 == 0x20 => ' ', + // AsciiWhitespace => '_', + // AsciiOther => '•', + // NonAscii => '×', + // } } } @@ -167,6 +206,7 @@ pub struct PrinterBuilder<'a, Writer: Write> { group_size: u8, base: Base, endianness: Endianness, + char_table: CharTable, } impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { @@ -182,6 +222,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { group_size: 1, base: Base::Hexadecimal, endianness: Endianness::Big, + char_table: CharTable::AsciiOnly, } } @@ -230,6 +271,11 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { self } + pub fn char_table(mut self, char_table: CharTable) -> Self { + self.char_table = char_table; + self + } + pub fn build(self) -> Printer<'a, Writer> { Printer::new( self.writer, @@ -242,6 +288,7 @@ impl<'a, Writer: Write> PrinterBuilder<'a, Writer> { self.group_size, self.base, self.endianness, + self.char_table, ) } } @@ -285,6 +332,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> { group_size: u8, base: Base, endianness: Endianness, + char_table: CharTable, ) -> Printer<'a, Writer> { Printer { idx: 0, @@ -304,7 +352,7 @@ impl<'a, Writer: Write> Printer<'a, Writer> { }) .collect(), byte_char_panel: (0u8..=u8::MAX) - .map(|i| format!("{}", Byte(i).as_char())) + .map(|i| format!("{}", Byte(i).as_char(char_table))) .collect(), byte_hex_panel_g: (0u8..=u8::MAX).map(|i| format!("{i:02x}")).collect(), squeezer: if use_squeeze { @@ -732,6 +780,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharTable::AsciiOnly, ); printer.print_all(input).unwrap(); @@ -787,6 +836,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharTable::AsciiOnly, ); printer.display_offset(0xdeadbeef); @@ -821,6 +871,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharTable::AsciiOnly, ); printer.print_all(input).unwrap(); @@ -881,6 +932,7 @@ mod tests { 1, Base::Hexadecimal, Endianness::Big, + CharTable::AsciiOnly, ); printer.print_all(input).unwrap(); diff --git a/src/main.rs b/src/main.rs index 5e022f3..02ecf0b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,7 +17,7 @@ use thiserror::Error as ThisError; use terminal_size::terminal_size; -use hexyl::{Base, BorderStyle, Endianness, Input, PrinterBuilder}; +use hexyl::{Base, BorderStyle, CharTable, Endianness, Input, PrinterBuilder}; #[cfg(test)] mod tests; @@ -213,6 +213,17 @@ fn run() -> Result<()> { .help("An alias for '--endianness=little'."), ) .arg( + Arg::new("character_table") + .long("character_table") + .value_name("FORMAT") + .value_parser(["codepage-437", "ascii-only"]) + .help( + "The character table that should be used. 'ascii-only' \ + will show dots for non-ASCII characters, and 'codepage-437 \ + will use Code page 437 for those characters." + ), + ) + .arg( Arg::new("base") .short('b') .long("base") @@ -469,6 +480,17 @@ fn run() -> Result<()> { ("big", _) => Endianness::Big, _ => unreachable!(), }; + + let char_table = match matches + .get_one::<String>("character_table") + .unwrap_or(&String::from("ascii-only")) + .as_ref() + { + "ascii-only" => CharTable::AsciiOnly, + "codepage-437" => CharTable::CP437, + _ => unreachable!(), + }; + let stdout = io::stdout(); let mut stdout_lock = BufWriter::new(stdout.lock()); @@ -482,6 +504,7 @@ fn run() -> Result<()> { .group_size(group_size) .with_base(base) .endianness(endianness) + .char_table(char_table) .build(); printer.display_offset(skip_offset + display_offset); printer.print_all(&mut reader).map_err(|e| anyhow!(e))?; |