// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// spell-checker:ignore (ToDOs) corasick memchr Roff trunc oset iset CHARCLASS
use clap::{crate_version, Arg, ArgAction, Command};
use regex::Regex;
use std::cmp;
use std::collections::{BTreeSet, HashMap, HashSet};
use std::error::Error;
use std::fmt::{Display, Formatter, Write as FmtWrite};
use std::fs::File;
use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Read, Write};
use std::num::ParseIntError;
use uucore::display::Quotable;
use uucore::error::{FromIo, UError, UResult};
use uucore::{format_usage, help_about, help_usage};
const USAGE: &str = help_usage!("ptx.md");
const ABOUT: &str = help_about!("ptx.md");
const REGEX_CHARCLASS: &str = "^-]\\";
#[derive(Debug)]
enum OutFormat {
Dumb,
Roff,
Tex,
}
#[derive(Debug)]
struct Config {
format: OutFormat,
gnu_ext: bool,
auto_ref: bool,
input_ref: bool,
right_ref: bool,
ignore_case: bool,
macro_name: String,
trunc_str: String,
context_regex: String,
line_width: usize,
gap_size: usize,
}
impl Default for Config {
fn default() -> Self {
Self {
format: OutFormat::Dumb,
gnu_ext: true,
auto_ref: false,
input_ref: false,
right_ref: false,
ignore_case: false,
macro_name: "xx".to_owned(),
trunc_str: "/".to_owned(),
context_regex: "\\w+".to_owned(),
line_width: 72,
gap_size: 3,
}
}
}
fn read_word_filter_file(
matches: &clap::ArgMatches,
option: &str,
) -> std::io::Result<HashSet<String>> {
let filename = matches
.get_one::<String>(option)
.expect("parsing options failed!")
.to_string();
let file = File::open(filename)?;
let reader = BufReader::new(file);
let mut words: HashSet<String> = HashSet::new();
for word in reader.lines() {
words.insert(word?);
}
Ok(words)
}
/// reads contents of file as unique set of characters to be used with the break-file option
fn read_char_filter_file(
matches: &clap::ArgMatches,
option: &str,
) -> std::io::Result<HashSet<char>> {
let filename = matches
.get_one::<String>(option)
.expect("parsing options failed!");
let mut reader = File::open(filename)?;
let mut buffer = String::new();
reader.read_to_string(&mut buffer)?;
Ok(buffer.chars().collect())
}
#[derive(Debug)]
struct WordFilter {
only_specified: bool,
ignore_specified: bool,
only_set: HashSet<String>,
ignore_set: HashSet<String>,
word_regex: String,
}
impl WordFilter {
#[allow(clippy::cognitive_complexity)]
fn new(matches: &clap::ArgMatches, config: &Config) -> UResult<Self> {
let (o, oset): (bool, HashSet<String>) = if matches.contains_id(options::ONLY_FILE) {
let words =
read_word_filter_file(matches, options::ONLY_FILE).map_err_context(String::new)?;
(true, words)
} else {
(false, HashSet::new())
};
let (i, iset): (bool, HashSet<String>) = if matches.contains_id(options::IGNORE_FILE) {
let words = read_word_filter_file(matches, options::IGNORE_FILE)
.map_err_context(String::new)?;
(true, words)
} else {
(false, HashSet::new())
};
let break_set: Option<HashSet<char>> = if matches.contains_id(options::BREAK_FILE)
&& !matches.contains_id(options::WORD_REGEXP)
{
let chars =
read_char_filter_file(matches, options::BREAK_FILE).map_err_context(String::new)?;
let mut hs: HashSet<char> = if config.gnu_ext {
HashSet::new() // really only chars found in file
} else {
// GNU off means at least these are considered
[' ', '\t', '\n'].iter().cloned().collect()
};
hs.extend(chars);
Some(hs)
} else {
// if -W takes precedence or default
None
};
// Ignore empty string regex from cmd-line-args
let arg_reg: Option<String> = if matches.contains_id(options::WORD_REGEXP) {
match matches.get_one::<String>(options::WORD_REGEXP) {
Some(v) =>