diff --git a/examples/bin2h.rs b/examples/bin2h.rs index 774a177..16f9b07 100644 --- a/examples/bin2h.rs +++ b/examples/bin2h.rs @@ -203,19 +203,22 @@ pub fn main() -> ExitCode { // Read & parse arguments from the command line, store results into the above structure enum Arg { Out, Bin, Txt, Whitespace, Help } const OPTIONS: Opts = Opts::new(&[ + Opt::flag(Arg::Help, &["--help", "-h"], "Show this help message and exit"), Opt::positional_required(Arg::Out, "out", "Path to generated header file"), Opt::value(Arg::Bin, &["--bin", "-b"], "data.bin", "Add a binary file"), Opt::value(Arg::Txt, &["--txt", "-t"], "text.txt", "Add a text file"), - Opt::value(Arg::Whitespace, &["--whitespace"], "\"string\"", "Emitted indentation (Default: \"\\t\")"), - Opt::flag(Arg::Help, &["--help", "-h"], "Show this help message and exit"), + Opt::value(Arg::Whitespace, &["--whitespace"], "\" \"", "Emitted indentation (Default: \"\\t\")"), ]); - match OPTIONS.parse_env(|id, _opt, _name, arg| { + match OPTIONS.parse_easy(|program_name, id, _opt, _name, arg| { match id { Arg::Out => { arguments.out = arg.into(); } Arg::Bin => { jobs.push(Job { job_type: JobType::Binary, path: arg.into() }); } Arg::Txt => { jobs.push(Job { job_type: JobType::Text, path: arg.into() }); } Arg::Whitespace => { arguments.whitespace = arg.into(); } - Arg::Help => { todo!(); } + Arg::Help => { + OPTIONS.print_full_help(program_name); + return Ok(ParseControl::Quit); + } } Ok(ParseControl::Continue) }) { diff --git a/src/argparse.rs b/src/argparse.rs index b845f0a..fd9e608 100644 --- a/src/argparse.rs +++ b/src/argparse.rs @@ -25,7 +25,7 @@ pub enum ParseControl { } /// Result type used by the handler passed to the parser -type HandlerResult<'a, T> = Result>; +type HandlerResult<'a, T> = core::result::Result>; #[derive(Debug)] pub enum ParseError<'a> { @@ -110,19 +110,19 @@ impl Default for ParserState { impl Opts { /// Parse an iterator of strings as arguments pub fn parse<'a, S: AsRef + 'a, I: Iterator>(&self, program_name: &str, args: I, - mut handler: impl FnMut(&ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl>, - error: impl FnOnce(ParseError), + mut handler: impl FnMut(&str, &ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl>, + error: impl FnOnce(&str, ParseError), ) -> ParseResult { let mut state = ParserState::default(); for arg in args { // Fetch the next token - match self.next(&mut state, arg.as_ref(), &mut handler) { + match self.next(&mut state, arg.as_ref(), program_name, &mut handler) { Ok(ParseControl::Continue) => {} Ok(ParseControl::Stop) => { break; } Ok(ParseControl::Quit) => { return ParseResult::ExitSuccess; } Err(err) => { // Call the error handler - error(err); + error(program_name, err); return ParseResult::ExitError; } } @@ -130,7 +130,7 @@ impl Opts { // Ensure that value options are provided a value if let Some((name, _)) = state.expects_arg.take() { - error(ParseError::ExpectArgument(name)); + error(program_name, ParseError::ExpectArgument(name)); return ParseResult::ExitError; } @@ -138,8 +138,8 @@ impl Opts { // Ensure that all required positional arguments have been provided for option in self.options[state.positional_index..].iter() { - if option.r#type == OptType::Positional && option.required { - error(ParseError::RequiredPositional(option.first_name())); + if matches!(option.r#type, OptType::Positional) && option.required { + error(program_name, ParseError::RequiredPositional(option.first_name())); return ParseResult::ExitError; } } @@ -149,12 +149,12 @@ impl Opts { } /// Parse the next token in the argument stream - fn next<'a, 'b>(&self, state: &mut ParserState, token: &'b str, - handler: &mut impl FnMut(&ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl> + fn next<'a, 'b>(&self, state: &mut ParserState, token: &'b str, program_name: &str, + handler: &mut impl FnMut(&str, &ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl> ) -> HandlerResult<'b, ParseControl> where 'a: 'b { let mut call_handler = |option: &Opt, name, value| { - match handler(&option.id, option, name, value) { - // HACK: ensure the string fields are set properly, because coerced + match handler(program_name, &option.id, option, name, value) { + // HACK: Ensure the string fields are set properly, because coerced // ParseIntError/ParseFloatError will have the string fields blanked. Err(ParseError::ArgumentError("", "", kind)) => Err(ParseError::ArgumentError(name, token, kind)), @@ -178,7 +178,7 @@ impl Opts { // Match a suitable option by name (ignoring the first flag character & skipping positional arguments) let (name, option) = self.options.iter() - .filter(|opt| opt.r#type != OptType::Positional) + .filter(|opt| matches!(opt.r#type, OptType::Flag | OptType::Value)) .find_map(|opt| opt.match_name(option_str, 1).map(|name| (name, opt))) .ok_or(ParseError::UnknownOption(option_str))?; @@ -200,8 +200,8 @@ impl Opts { } else { // Find the next positional argument for (i, option) in self.options[state.positional_index..].iter().enumerate() { - if option.r#type == OptType::Positional { - handler(&option.id, option, option.first_name(), token)?; + if matches!(option.r#type, OptType::Positional) { + handler(program_name, &option.id, option, option.first_name(), token)?; state.positional_index += i + 1; return Ok(ParseControl::Continue); } diff --git a/src/const_utf8.rs b/src/const_utf8.rs new file mode 100644 index 0000000..a3fc667 --- /dev/null +++ b/src/const_utf8.rs @@ -0,0 +1,125 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +/// Fully const fn nostd UTF-8 character iterator. +/// Assumes a well-formed UTF-8 input string. Doesn't take into account graphemes. +pub(crate) struct CharIterator<'a> { + bytes: &'a [u8], + index: usize +} + +impl<'a> CharIterator<'a> { + /// Create a char iterator from an immutable string slice. + #[inline] + pub(crate) const fn from(value: &'a str) -> Self { + Self { + bytes: value.as_bytes(), + index: 0, + } + } + +} + +impl CharIterator<'_> { + /// Gets a count of the number of Unicode characters (not graphemes) in the string. + pub(crate) const fn count(self) -> usize { + let len = self.bytes.len(); + let mut count = 0; + let mut i = 0; + while i < len { + // Count all bytes that don't start with 0b10xx_xxxx (UTF-8 continuation byte) + if (self.bytes[i] as i8) >= -64 { + count += 1; + } + i += 1; + } + count + } + + /// Gets the next character in a well-formed UTF-8 string, or None for end of string or errors. + pub(crate) const fn next(&mut self) -> Option { + /// UTF-8 2-byte flag bits + const MULTIBYTE_2: u8 = 0b1100_0000; + /// UTF-8 3-byte flag bits + const MULTIBYTE_3: u8 = 0b1110_0000; + /// UTF-8 4-byte flag bits + const MULTIBYTE_4: u8 = 0b1111_0000; + + /// Mask for UTF-8 2-byte flag bits + const MULTIBYTE_2_MASK: u8 = 0b1110_0000; + /// Mask for UTF-8 3-byte flag bits + const MULTIBYTE_3_MASK: u8 = 0b1111_0000; + /// Mask for UTF-8 4-byte flag bits + const MULTIBYTE_4_MASK: u8 = 0b1111_1000; + + /// UTF-8 continuation flag bits + const CONTINUATION: u8 = 0b1000_0000; + /// Mask for the UTF-8 continuation flag bits + const CONTINUATION_MASK: u8 = 0b1100_0000; + + /// Checks if a byte begins with the UTF-8 continuation bits + #[inline] const fn is_continuation(b: u8) -> bool { b & CONTINUATION_MASK == CONTINUATION } + /// Gets the value bits of a UTF-8 continuation byte as u32 + #[inline] const fn cont_bits(b: u8) -> u32 { (b & !CONTINUATION_MASK) as u32 } + + // Return early if we reached the end of the string + if self.index >= self.bytes.len() { + return None; + } + + let byte0 = self.bytes[self.index]; + + // Get the length of the next multibyte UTF-8 character + let len = match byte0 { + ..0x80 => 1, + _ if (byte0 & MULTIBYTE_2_MASK) == MULTIBYTE_2 => 2, + _ if (byte0 & MULTIBYTE_3_MASK) == MULTIBYTE_3 => 3, + _ if (byte0 & MULTIBYTE_4_MASK) == MULTIBYTE_4 => 4, + _ => { + return None; + } + }; + + // Return early for incomplete sequences + if len > self.bytes.len() - self.index { + return None; + } + + // Try to read the next multibyte character + let Some(result) = (match len { + 1 => Some(byte0 as char), + 2 if is_continuation(self.bytes[self.index + 1]) + => { + let cp = (((byte0 & !MULTIBYTE_2_MASK) as u32) << 6) | cont_bits(self.bytes[self.index + 1]); + char::from_u32(cp) + }, + 3 if is_continuation(self.bytes[self.index + 1]) + && is_continuation(self.bytes[self.index + 2]) + => { + let cp = (((byte0 & !MULTIBYTE_3_MASK) as u32) << 12) + | (cont_bits(self.bytes[self.index + 1]) << 6) + | cont_bits(self.bytes[self.index + 2]); + char::from_u32(cp) + } + 4 if is_continuation(self.bytes[self.index + 1]) + && is_continuation(self.bytes[self.index + 2]) + && is_continuation(self.bytes[self.index + 3]) + => { + let cp = (((byte0 & !MULTIBYTE_4_MASK) as u32) << 18) + | (cont_bits(self.bytes[self.index + 1]) << 12) + | (cont_bits(self.bytes[self.index + 2]) << 6) + | cont_bits(self.bytes[self.index + 3]); + char::from_u32(cp) + } + _ => None, + }) else { + return None + }; + + // Advance the internal character index and return success + self.index += len; + Some(result) + } +} diff --git a/src/help.rs b/src/help.rs new file mode 100644 index 0000000..984929c --- /dev/null +++ b/src/help.rs @@ -0,0 +1,177 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +pub struct HelpWriterContext<'a, ID: 'static> { + pub options: &'a Opts, + pub program_name: &'a str, +} + +pub trait HelpWriter<'a, ID: 'static>: core::fmt::Display { + fn new(ctx: HelpWriterContext<'a, ID>) -> Self; +} + +pub struct StandardShortUsageWriter<'a, ID: 'static>(HelpWriterContext<'a, ID>); + +impl<'a, ID: 'static> HelpWriter<'a, ID> for StandardShortUsageWriter<'a, ID> { + fn new(ctx: HelpWriterContext<'a, ID>) -> Self { Self(ctx) } +} + +impl core::fmt::Display for StandardShortUsageWriter<'_, ID> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "Usage: {}", self.0.program_name)?; + + // Write option parameter arguments + for option in self.0.options.options.iter() + .filter(|o| matches!(o.r#type, OptType::Value | OptType::Flag)) { + write!(f, " {}", if option.required { '<' } else { '[' })?; + match (option.first_short_name(), option.first_long_name()) { + (Some(short_name), Some(long_name)) => write!(f, "{short_name}|{long_name}")?, + (Some(short_name), None) => f.write_str(short_name)?, + (None, Some(long_name)) => f.write_str(long_name)?, + _ => unreachable!(), + } + if let Some(value_name) = option.value_name { + write!(f, " {value_name}")?; + } + write!(f, "{}", if option.required { '>' } else { ']' })?; + } + + // Write positional arguments + for option in self.0.options.options.iter() + .filter(|o| matches!(o.r#type, OptType::Positional)) { + let name = option.first_name(); + match option.required { + true => write!(f, " <{name}>")?, + false => write!(f, " [{name}]")?, + } + } + Ok(()) + } +} + +pub struct StandardFullHelpWriter<'a, ID: 'static>(HelpWriterContext<'a, ID>); + +impl<'a, ID: 'static> HelpWriter<'a, ID> for StandardFullHelpWriter<'a, ID> { + fn new(ctx: HelpWriterContext<'a, ID>) -> Self { Self(ctx) } +} + +impl core::fmt::Display for StandardFullHelpWriter<'_, ID> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + use core::fmt::Write; + + // Base usage + write!(f, "Usage: {}", self.0.program_name)?; + let short_flag = self.0.options.flag_chars.chars().next().unwrap(); + + // Write optional short options + let mut first = true; + for option in self.0.options.options { + if let (OptType::Flag | OptType::Value, false) = (option.r#type, option.required) { + if let Some(c) = option.first_short_name_char() { + if first { + write!(f, " [{short_flag}")?; + first = false; + } + f.write_char(c)?; + } + } + } + if !first { + f.write_char(']')?; + } + + // Write required short options + first = true; + for option in self.0.options.options { + if let (OptType::Flag | OptType::Value, true) = (option.r#type, option.required) { + if let Some(c) = option.first_short_name_char() { + if first { + write!(f, " <{short_flag}")?; + first = false; + } + f.write_char(c)?; + } + } + } + if !first { + f.write_char('>')?; + } + + // Write positional arguments + for option in self.0.options.options.iter() + .filter(|o| matches!(o.r#type, OptType::Positional)) { + let name = option.first_name(); + match option.required { + true => write!(f, " <{name}>")?, + false => write!(f, " [{name}]")?, + } + } + writeln!(f)?; + + fn calculate_left_pad(option: &Opt) -> usize { + (match option.names { + OptIdentifier::Single(name) => name.chars().count(), + OptIdentifier::Multi(names) => (names.len() - 1) * 3 + names.iter() + .fold(0, |accum, name| accum + name.chars().count()), + }) + option.value_name.map_or(0, |v| v.len() + 3) + } + + // Determine the alignment width from the longest option parameter + let align_width = 2 + self.0.options.options.iter() + .map(|o| calculate_left_pad(o)).max().unwrap_or(0); + + // Write positional argument descriptions + first = true; + for option in self.0.options.options.iter() + .filter(|o| matches!(o.r#type, OptType::Positional)) { + if first { + // Write separator and positional section header + writeln!(f)?; + writeln!(f, "Positional arguments:")?; + first = false; + } + + // Write positional argument line + writeln!(f, " {name} {:. { + write!(f, " {name}")?; + } + OptIdentifier::Multi(names) => for (i, name) in names.iter().enumerate() { + write!(f, "{prefix}{name}", prefix = if i == 0 { " " } else { " | " })?; + } + } + + // Write value argument for value options parameters + if let Some(value_name) = option.value_name { + write!(f, " <{value_name}>")?; + } + + // Write padding and help text + writeln!(f, " {:. { required: bool, } +// TODO: Improve this interface by making the name field take AsOptIdentifier when const traits are stabilised impl Opt { /// A positional argument that is parsed sequentially without being invoked by an option flag pub const fn positional(id: ID, name: &'static str, help_string: &'static str) -> Self { @@ -36,7 +37,7 @@ impl Opt { pub const fn positional_required(id: ID, name: &'static str, help_string: &'static str) -> Self { Self { id, names: OptIdentifier::Single(name), value_name: None, help_string, r#type: OptType::Positional, required: true } } - /// An flag-type option that takes no value + /// A flag-type option that takes no value pub const fn flag(id: ID, names: &'static[&'static str], help_string: &'static str) -> Self { Self { id, names: OptIdentifier::Multi(names), value_name: None, help_string, r#type: OptType::Flag, required: false } } @@ -56,14 +57,90 @@ impl Opt { impl Opt { /// Get the first name of the option - fn first_name(&self) -> &str { + const fn first_name(&self) -> &str { match self.names { OptIdentifier::Single(name) => name, OptIdentifier::Multi(names) => names.first().unwrap(), } } - /// Search for a matching name in the option, offset allows to skip the first characters in the comparsion + /// Get the first long option name, if one exists + const fn first_long_name(&self) -> Option<&'static str> { + match self.names { + OptIdentifier::Single(name) => if name.len() >= 3 { Some(name) } else { None }, + // Can be replaced with `find_map` once iterators are const fn + OptIdentifier::Multi(names) => { + let mut i = 0; + while i < names.len() { + if const_utf8::CharIterator::from(names[i]).count() >= 3 { + return Some(names[i]); + } + i += 1; + } + None + } + } + } + + /// Get the first short option name, if one exists + const fn first_short_name(&self) -> Option<&'static str> { + const fn predicate(name: &str) -> bool { + let mut chars = const_utf8::CharIterator::from(name); + if let Some(first) = chars.next() { + if let Some(c) = chars.next() { + if c != first && chars.next().is_none() { + return true + } + } + } + false + } + match self.names { + OptIdentifier::Single(name) => if predicate(&name) { Some(name) } else { None }, + // Can be replaced with `find_map` once iterators are const fn + OptIdentifier::Multi(names) => { + let mut i = 0; + while i < names.len() { + if predicate(names[i]) { + return Some(names[i]); + } + i += 1; + } + None + } + } + } + + /// Get the first applicable short option's flag character, if one exists + const fn first_short_name_char(&self) -> Option { + const fn predicate(name: &str) -> Option { + let mut chars = const_utf8::CharIterator::from(name); + if let Some(first) = chars.next() { + if let Some(c) = chars.next() { + if c != first && chars.next().is_none() { + return Some(c) + } + } + } + None + } + match self.names { + OptIdentifier::Single(name) => predicate(&name), + // Can be replaced with `find_map` once iterators are const fn + OptIdentifier::Multi(names) => { + let mut i = 0; + while i < names.len() { + if let Some(c) = predicate(names[i]) { + return Some(c); + } + i += 1; + } + None + } + } + } + + /// Search for a matching name in the option, offset allows to skip the first characters in the comparison fn match_name(&self, string: &str, offset: usize) -> Option<&'static str> { match self.names { OptIdentifier::Single(name) => diff --git a/src/std.rs b/src/std.rs index bd85f1e..728dfb5 100644 --- a/src/std.rs +++ b/src/std.rs @@ -5,17 +5,47 @@ extern crate std; -use crate::{HandlerResult, Opt, Opts, ParseControl, ParseResult}; +use std::{env, eprintln, println}; +use std::path::Path; +use crate::{HandlerResult, Opt, Opts, ParseControl, ParseResult, StandardShortUsageWriter, HelpWriterContext, StandardFullHelpWriter, HelpWriter}; impl Opts { - /// Wrapper around `parse` that gathers arguments from the command line and prints errors to stderr. + /// Wrapper around `jaarg::parse` that gathers arguments from the command line and prints errors to stderr. + /// The errors are formatted in a standard user-friendly format. /// - /// Requires std - pub fn parse_env<'a>(&self, handler: impl FnMut(&ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl> + /// Requires features = [std] + pub fn parse_easy<'a>(&self, handler: impl FnMut(&str, &ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl> ) -> ParseResult { - let mut argv = std::env::args(); + let mut argv = env::args(); let argv0 = argv.next().unwrap(); - let program_name = std::path::Path::new(&argv0).file_name().unwrap().to_string_lossy(); - self.parse(&program_name, argv, handler, |e| { std::eprintln!("error: {e}") }) + let program_name = Path::new(&argv0).file_name().unwrap().to_string_lossy(); + self.parse(&program_name, argv, handler, |program_name, e| { + eprintln!("{program_name}: {e}"); + self.eprint_help::>(program_name); + eprintln!("Run '{program_name} --help' to view all available options."); + }) + } + + /// Prints full help text for the options using the standard full + /// + /// Requires features = [std] + pub fn print_full_help(&self, program_name: &str) { + self.print_help::>(program_name); + } + + /// Print help text to stdout using the provided help writer + /// + /// Requires features = [std] + pub fn print_help<'a, W: HelpWriter<'a, ID>>(&'a self, program_name: &'a str) { + let ctx = HelpWriterContext { options: self, program_name }; + println!("{}", W::new(ctx)); + } + + /// Print help text to stderr using the provided help writer + /// + /// Requires features = [std] + pub fn eprint_help<'a, W: HelpWriter<'a, ID>>(&'a self, program_name: &'a str) { + let ctx = HelpWriterContext { options: self, program_name }; + eprintln!("{}", W::new(ctx)); } }