From 3030c5cad9a32b8e399c40cc64606327049155bd Mon Sep 17 00:00:00 2001 From: a dinosaur Date: Tue, 28 Oct 2025 18:19:51 +1100 Subject: [PATCH] Initial parser implementation and bin2h example program --- .editorconfig | 17 ++++ .gitignore | 9 ++ Cargo.toml | 9 ++ LICENSE | 21 +++++ examples/bin2h.rs | 235 ++++++++++++++++++++++++++++++++++++++++++++++ src/argparse.rs | 213 +++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 13 +++ src/option.rs | 75 +++++++++++++++ src/options.rs | 22 +++++ src/std.rs | 21 +++++ 10 files changed, 635 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 LICENSE create mode 100644 examples/bin2h.rs create mode 100644 src/argparse.rs create mode 100644 src/lib.rs create mode 100644 src/option.rs create mode 100644 src/options.rs create mode 100644 src/std.rs diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..91ffc01 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,17 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +insert_final_newline = true +tab_width = 4 + +[*.rs] +indent_style = space +indent_size = 2 +trim_trailing_whitespace = true + +[*.py] +indent_style = tab +indent_size = tab +trim_trailing_whitespace = true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2d194b4 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +.idea/ +.vs/ +.vscode/ + +Cargo.lock +target/ + +.DS_Store +Thumbs.db diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f74ffb4 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "jaarg" +version = "0.0.1" +edition = "2021" +description = "It can parse your arguments you should use it it's called jaarg" + +[features] +default = ["std"] +std = [] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..499d8f5 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Gay Pizza Specifications + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/examples/bin2h.rs b/examples/bin2h.rs new file mode 100644 index 0000000..774a177 --- /dev/null +++ b/examples/bin2h.rs @@ -0,0 +1,235 @@ +/* bin2c - jaarg example application + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +use jaarg::{Opt, Opts, ParseControl, ParseResult}; +use std::fs::File; +use std::io::Write; +use std::io::{BufRead, BufReader, Seek, SeekFrom}; +use std::path::PathBuf; +use std::process::ExitCode; + +/// Strip disallowed characters from a C preprocessor label +fn sanitise_label(ident: &str) -> String { + let mut out = String::new(); + out.reserve(ident.len()); + // Prevent leading underscore + let mut last = '_'; + for mut i in ident.chars() { + if !out.is_empty() || !i.is_ascii_digit() { + if !i.is_alphanumeric() { + i = '_'; + } + if i != '_' || last != '_' { + out.push(i); + } + last = i; + } + } + // Prevent trailing underscore + if last == '_' { + out.pop(); + } + out +} + +/// Turn filename into an include guard label +fn guard_name(name: &str) -> String { + let mut out = "BIN2H_".to_owned(); + out.reserve(name.len()); + out.extend(sanitise_label(name).chars().flat_map(|c| c.to_uppercase())); + // Ensure guard ends with _H + if !out.ends_with("_H") { + out += "_H"; + } + out +} + +/// If the job is for a plain text file or a binary file +enum JobType { + Binary, + Text, +} + +/// Structure for reading jobs, containing the path and type of job +struct Job { + job_type: JobType, + path: PathBuf +} + +struct Arguments { + out: PathBuf, + whitespace: String, +} + +impl Default for Arguments { + fn default() -> Self { + Self { + out: PathBuf::new(), + whitespace: "\t".into(), + } + } +} + +/// Write an array from a binary file +fn bin2h(name: &str, mut file: File, out: &mut File, whitespace: &str) -> std::io::Result<()> { + let ident = sanitise_label(name); + + // Write length + let length = file.seek(SeekFrom::End(0))?; + file.seek(SeekFrom::Start(0))?; + writeln!(out, "#define {}_SIZE {}", ident.to_uppercase(), length)?; + + // Write signature + writeln!(out, "static const unsigned char {ident}[{length}] = {{")?; + + // Write values + let mut reader = BufReader::with_capacity(16, file); + let mut first_line = true; + loop { + // Get the next row of bytes + let bytes = reader.fill_buf()?; + let bytes_len = bytes.len(); + if bytes.is_empty() { + writeln!(out)?; + break; + } + + // Terminate the previous row + if first_line { + first_line = false; + } else { + writeln!(out, ",")?; + } + + // Write row as hex bytes + for (col, byte) in bytes.iter().enumerate() { + let prefix = if col == 0 { whitespace } else { ", " }; + write!(out, "{prefix}0x{byte:02X}")?; + } + reader.consume(bytes_len); + } + + // Write array terminator + writeln!(out, "}};")?; + Ok(()) +} + +/// Write a C-string from a plain text file +fn txt2h(name: &str, file: File, out: &mut File, whitespace: &str) -> std::io::Result<()> { + let ident = sanitise_label(name); + + // Write signature + writeln!(out, "static const char* const {ident} =")?; + + // Write lines + let mut reader = BufReader::new(file); + let mut line = String::new(); + let mut first_line = true; + loop { + if reader.read_line(&mut line)? == 0 { + // End of file + writeln!(out, ";")?; + break; + } + + // Separate lines + if first_line { + first_line = false; + } else { + writeln!(out)?; + } + + // Write line + write!(out, "{whitespace}\"")?; + for c in line.chars() { + match c { + // Escape backslash and double-quotes + '\\' => write!(out, "\\\\")?, + '"' => write!(out, "\\\"")?, + // Write control codes as character escapes + '\x07' => write!(out, "\\a")?, + '\x08' => write!(out, "\\b")?, + '\x0C' => write!(out, "\\f")?, + '\n' => write!(out, "\\n")?, + '\r' => write!(out, "\\r")?, + '\t' => write!(out, "\\t")?, + '\x0B' => write!(out, "\\v")?, + // Write ASCII control codes that don't have C character escapes as hex codes + _ if c.is_ascii_control() => write!(out, "\\x{:02X}", c as u32)?, + // Write remaining ASCII characters verbatim + _ if c.is_ascii() => write!(out, "{c}")?, + // Write non-ASCII characters as unicode escapes + ..'\u{10000}' => write!(out, "\\u{:04X}", c as u32)?, + _ => write!(out, "\\U{:08X}", c as u32)?, + } + } + write!(out, "\"")?; + line.clear(); + } + Ok(()) +} + +/// Generates and writes out a header file +fn write_h<'a, I: Iterator>(opt: &Arguments, jobs: I) -> std::io::Result<()> { + let mut out = File::create(&opt.out)?; + let guard = guard_name(&opt.out.file_name().unwrap().to_string_lossy()); + writeln!(out, "/*DO NOT EDIT")?; + writeln!(out, " * Autogenerated by bin2h")?; + writeln!(out, " */")?; + writeln!(out)?; + writeln!(out, "#ifndef {guard}")?; + writeln!(out, "#define {guard}")?; + writeln!(out)?; + for job in jobs { + let name = job.path.file_stem().unwrap().to_string_lossy(); + let file = File::open(&job.path)?; + match job.job_type { + JobType::Binary => bin2h(&name, file, &mut out, &opt.whitespace)?, + JobType::Text => txt2h(&name, file, &mut out, &opt.whitespace)?, + } + writeln!(out)?; + } + writeln!(out, "#endif/*{guard}*/")?; + Ok(()) +} + +pub fn main() -> ExitCode { + // Program arguments + let mut arguments = Arguments::default(); + let mut jobs = vec![]; + + // Read & parse arguments from the command line, store results into the above structure + enum Arg { Out, Bin, Txt, Whitespace, Help } + const OPTIONS: Opts = Opts::new(&[ + Opt::positional_required(Arg::Out, "out", "Path to generated header file"), + Opt::value(Arg::Bin, &["--bin", "-b"], "data.bin", "Add a binary file"), + Opt::value(Arg::Txt, &["--txt", "-t"], "text.txt", "Add a text file"), + Opt::value(Arg::Whitespace, &["--whitespace"], "\"string\"", "Emitted indentation (Default: \"\\t\")"), + Opt::flag(Arg::Help, &["--help", "-h"], "Show this help message and exit"), + ]); + match OPTIONS.parse_env(|id, _opt, _name, arg| { + match id { + Arg::Out => { arguments.out = arg.into(); } + Arg::Bin => { jobs.push(Job { job_type: JobType::Binary, path: arg.into() }); } + Arg::Txt => { jobs.push(Job { job_type: JobType::Text, path: arg.into() }); } + Arg::Whitespace => { arguments.whitespace = arg.into(); } + Arg::Help => { todo!(); } + } + Ok(ParseControl::Continue) + }) { + ParseResult::ContinueSuccess => { + // Generate header + match write_h(&arguments, jobs.iter()) { + Ok(_) => ExitCode::SUCCESS, + Err(err) => { + eprintln!("error: {err}"); + ExitCode::FAILURE + } + } + }, + ParseResult::ExitSuccess => { ExitCode::SUCCESS } + ParseResult::ExitError => { ExitCode::FAILURE } + } +} diff --git a/src/argparse.rs b/src/argparse.rs new file mode 100644 index 0000000..3bec8e5 --- /dev/null +++ b/src/argparse.rs @@ -0,0 +1,213 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +/// Enum describing the result of parsing arguments, and how the program should behave. +#[derive(Debug)] +pub enum ParseResult { + /// Parsing succeeded and program execution should continue + ContinueSuccess, + /// Parsing succeeded and program should exit with success (eg; std::process::ExitCode::SUCCESS) + ExitSuccess, + /// There was an error while parsing and program should exit with failure (eg; std::process::ExitCode::FAILURE) + ExitError, +} + +/// Execution control for the parser handler +pub enum ParseControl { + /// Continue parsing arguments + Continue, + /// Tell the parser to stop consuming tokens (treat as end of token stream) + Stop, + /// Tell the parser to stop parsing and quit early, this will skip end of parsing checks + Quit, +} + +/// Result type used by the handler passed to the parser +type HandlerResult<'a, T> = Result>; + +#[derive(Debug)] +pub enum ParseError<'a> { + UnknownOption(&'a str), + UnexpectedToken(&'a str), + ExpectArgument(&'a str), + UnexpectedArgument(&'a str), + ArgumentError(&'static str, &'a str, ParseErrorKind), + //TODO + //Exclusive(&'static str, &'a str), + RequiredPositional(&'static str), +} + +/// The type of parsing error +#[derive(Debug)] +pub enum ParseErrorKind { + IntegerEmpty, + IntegerRange, + InvalidInteger, + InvalidFloat, +} + +impl core::fmt::Display for ParseError<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::UnknownOption(o) => write!(f, "Unrecognised option '{o}'"), + Self::UnexpectedToken(t) => write!(f, "Unexpected positional argument '{t}'"), + Self::ExpectArgument(o) => write!(f, "Option '{o}' requires an argument"), + Self::UnexpectedArgument(o) => write!(f, "Flag '{o}' doesn't take an argument"), + Self::ArgumentError(o, a, ParseErrorKind::IntegerRange) + => write!(f, "Argument '{a}' out of range for option '{o}'"), + Self::ArgumentError(o, a, ParseErrorKind::InvalidInteger | ParseErrorKind::InvalidFloat) + => write!(f, "Invalid argument '{a}' for option '{o}'"), + Self::ArgumentError(o, _, ParseErrorKind::IntegerEmpty) + => write!(f, "Argument for option '{o}' cannot be empty"), + //Self::Exclusive(l, r) => write!(f, "Argument {l}: not allowed with argument {r}"), + Self::RequiredPositional(o) => write!(f, "Missing required positional argument '{o}'"), + } + } +} + +/// Convenience coercion for dealing with integer parsing errors +impl From for ParseError<'_> { + fn from(err: core::num::ParseIntError) -> Self { + use core::num::IntErrorKind; + // HACK: The empty option & argument fields will be fixed up by the parser + Self::ArgumentError("", "", match err.kind() { + IntErrorKind::Empty => ParseErrorKind::IntegerEmpty, + IntErrorKind::PosOverflow | IntErrorKind::NegOverflow | IntErrorKind::Zero + => ParseErrorKind::IntegerRange, + IntErrorKind::InvalidDigit | _ => ParseErrorKind::InvalidInteger, + }) + } +} + +/// Convenience coercion for dealing with floating-point parsing errors +impl From for ParseError<'_> { + fn from(_err: core::num::ParseFloatError) -> Self { + // HACK: The empty option & argument fields will be fixed up by the parser + // NOTE: Unlike ParseIntError, ParseFloatError does not expose kind publicly yet + Self::ArgumentError("", "", ParseErrorKind::InvalidFloat) + } +} + +impl core::error::Error for ParseError<'_> {} + +/// Internal state tracked by the parser +struct ParserState { + positional_index: usize, + expects_arg: Option<(&'static str, &'static Opt)>, +} + +impl Default for ParserState { + fn default() -> Self { + Self { + positional_index: 0, + expects_arg: None + } + } +} + +impl Opts { + /// Parse an iterator of strings as arguments + pub fn parse<'a, S: AsRef + 'a, I: Iterator>(&self, program_name: &str, args: I, + mut handler: impl FnMut(&ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl>, + error: impl FnOnce(ParseError), + ) -> ParseResult { + let mut state = ParserState::default(); + for arg in args { + // Fetch the next token + match self.next(&mut state, arg.as_ref(), &mut handler) { + Ok(ParseControl::Continue) => {} + Ok(ParseControl::Stop) => { break; } + Ok(ParseControl::Quit) => { return ParseResult::ExitSuccess; } + Err(err) => { + // Call the error handler + error(err); + return ParseResult::ExitError; + } + } + } + + // Ensure that value options are provided a value + if let Some((name, _)) = state.expects_arg.take() { + error(ParseError::ExpectArgument(name)); + return ParseResult::ExitError; + } + + //TODO: Ensure all required parameter arguments have been provided + + // Ensure that all required positional arguments have been provided + for option in self.options[state.positional_index..].iter() { + if option.required { + error(ParseError::RequiredPositional(option.first_name())); + return ParseResult::ExitError; + } + } + + // All arguments parsed successfully + ParseResult::ContinueSuccess + } + + /// Parse the next token in the argument stream + fn next<'a, 'b>(&self, state: &mut ParserState, token: &'b str, + handler: &mut impl FnMut(&ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl> + ) -> HandlerResult<'b, ParseControl> where 'a: 'b { + let mut call_handler = |option: &Opt, name, value| { + match handler(&option.id, option, name, value) { + // HACK: ensure the string fields are set properly, because coerced + // ParseIntError/ParseFloatError will have the string fields blanked. + Err(ParseError::ArgumentError("", "", kind)) + => Err(ParseError::ArgumentError(name, token, kind)), + Err(err) => Err(err), + Ok(ctl) => Ok(ctl), + } + }; + + // If the previous token is expecting an argument, ie: value a value option + // was matched and didn't have an equals sign separating a value, + // then call the handler here. + if let Some((name, option)) = state.expects_arg.take() { + call_handler(option, name, token) + } else { + // Check if the next argument token starts with an option flag + if self.flag_chars.chars().any(|c| token.starts_with(c)) { + // Value options can have their value delineated by an equals sign or with whitespace. + // In the latter case; the value will be in the next token. + let (option_str, value_str) = token.split_once("=") + .map_or((token, None), |(k, v)| (k, Some(v))); + + // Match a suitable option by name (ignoring the first flag character & skipping positional arguments) + let (name, option) = self.options.iter() + .filter(|opt| opt.r#type != OptType::Positional) + .find_map(|opt| opt.match_name(option_str, 1).map(|name| (name, opt))) + .ok_or(ParseError::UnknownOption(option_str))?; + + match (&option.r#type, value_str) { + // Call handler for flag-only options + (OptType::Flag, None) => call_handler(option, name, ""), + // Value was provided this token, so call the handler right now + (OptType::Value, Some(value)) => call_handler(option, name, value), + // No value available in this token, delay handling to next token + (OptType::Value, None) => { + state.expects_arg = Some((name, option)); + Ok(ParseControl::Continue) + } + // Flag-only options do not support arguments + (OptType::Flag, Some(_)) => Err(ParseError::UnexpectedArgument(option_str)), + // Positional arguments are filtered out so this is impossible + (OptType::Positional, _) => unreachable!("Won't parse a positional argument as an option"), + } + } else { + // Find the next positional argument + for (i, option) in self.options[state.positional_index..].iter().enumerate() { + if option.r#type == OptType::Positional { + handler(&option.id, option, option.first_name(), token)?; + state.positional_index += i + 1; + return Ok(ParseControl::Continue); + } + } + Err(ParseError::UnexpectedToken(token)) + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..9756c36 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,13 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +#![no_std] + +include!("option.rs"); +include!("options.rs"); +include!("argparse.rs"); + +#[cfg(feature = "std")] +pub mod std; diff --git a/src/option.rs b/src/option.rs new file mode 100644 index 0000000..92797c5 --- /dev/null +++ b/src/option.rs @@ -0,0 +1,75 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +#[derive(Debug, PartialEq)] +enum OptType { + Positional, + Flag, + Value, +} + +#[derive(Debug)] +enum OptIdentifier { + Single(&'static str), + Multi(&'static[&'static str]), +} + +/// Represents an option argument or positional argument to be parsed +#[derive(Debug)] +pub struct Opt { + id: ID, + names: OptIdentifier, + value_name: Option<&'static str>, + help_string: &'static str, + r#type: OptType, + required: bool, +} + +impl Opt { + /// A positional argument that is parsed sequentially without being invoked by an option flag + pub const fn positional(id: ID, name: &'static str, help_string: &'static str) -> Self { + Self { id, names: OptIdentifier::Single(name), value_name: None, help_string, r#type: OptType::Positional, required: false } + } + /// A required positional argument that is parsed sequentially without being invoked by an option flag + pub const fn positional_required(id: ID, name: &'static str, help_string: &'static str) -> Self { + Self { id, names: OptIdentifier::Single(name), value_name: None, help_string, r#type: OptType::Positional, required: true } + } + /// An flag-type option that takes no value + pub const fn flag(id: ID, names: &'static[&'static str], help_string: &'static str) -> Self { + Self { id, names: OptIdentifier::Multi(names), value_name: None, help_string, r#type: OptType::Flag, required: false } + } + /// A required flag-type option that takes no value + pub const fn flag_required(id: ID, names: &'static[&'static str], help_string: &'static str) -> Self { + Self { id, names: OptIdentifier::Multi(names), value_name: None, help_string, r#type: OptType::Flag, required: true } + } + /// An option argument that takes a value + pub const fn value(id: ID, names: &'static[&'static str], value_name: &'static str, help_string: &'static str) -> Self { + Self { id, names: OptIdentifier::Multi(names), value_name: Some(value_name), help_string, r#type: OptType::Value, required: false } + } + /// A required option argument that takes a value + pub const fn value_required(id: ID, names: &'static[&'static str], value_name: &'static str, help_string: &'static str) -> Self { + Self { id, names: OptIdentifier::Multi(names), value_name: Some(value_name), help_string, r#type: OptType::Value, required: true } + } +} + +impl Opt { + /// Get the first name of the option + fn first_name(&self) -> &str { + match self.names { + OptIdentifier::Single(name) => name, + OptIdentifier::Multi(names) => names.first().unwrap(), + } + } + + /// Search for a matching name in the option, offset allows to skip the first characters in the comparsion + fn match_name(&self, string: &str, offset: usize) -> Option<&'static str> { + match self.names { + OptIdentifier::Single(name) => + if name[offset..] == string[offset..] { Some(name) } else { None }, + OptIdentifier::Multi(names) => + names.iter().find(|name| name[offset..] == string[offset..]).map(|v| &**v), + } + } +} diff --git a/src/options.rs b/src/options.rs new file mode 100644 index 0000000..1cfba3c --- /dev/null +++ b/src/options.rs @@ -0,0 +1,22 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +/// Static structure that contains instructions for parsing command-line arguments +pub struct Opts { + /// String containing single characters that match option prefixes + flag_chars: &'static str, + /// List of options + options: &'static[Opt], +} + +impl Opts { + /// Build argument parser options with the default flag character of '-' + pub const fn new(options: &'static[Opt]) -> Self { + Self { flag_chars: "-", options } + } + pub const fn new_flag(flag_chars: &'static str, options: &'static[Opt]) -> Self { + Self { flag_chars, options } + } +} diff --git a/src/std.rs b/src/std.rs new file mode 100644 index 0000000..bd85f1e --- /dev/null +++ b/src/std.rs @@ -0,0 +1,21 @@ +/* jaarg - Argument parser + * SPDX-FileCopyrightText: (C) 2025 Gay Pizza Specifications + * SPDX-License-Identifier: MIT + */ + +extern crate std; + +use crate::{HandlerResult, Opt, Opts, ParseControl, ParseResult}; + +impl Opts { + /// Wrapper around `parse` that gathers arguments from the command line and prints errors to stderr. + /// + /// Requires std + pub fn parse_env<'a>(&self, handler: impl FnMut(&ID, &Opt, &str, &str) -> HandlerResult<'a, ParseControl> + ) -> ParseResult { + let mut argv = std::env::args(); + let argv0 = argv.next().unwrap(); + let program_name = std::path::Path::new(&argv0).file_name().unwrap().to_string_lossy(); + self.parse(&program_name, argv, handler, |e| { std::eprintln!("error: {e}") }) + } +}