mod validation_utilities;
use crate ::
{
config ::UnilangParserOptions,
error :: { ErrorKind, ParseError, SourceLocation },
item_adapter :: { RichItem, UnilangTokenKind },
};
use crate ::instruction :: { Argument, GenericInstruction };
use alloc ::collections ::BTreeMap;
use alloc ::vec :: { Vec, IntoIter };
use alloc ::string :: { String, ToString };
use alloc ::format;
#[ derive( Debug ) ]
pub struct Parser
{
options: UnilangParserOptions,
}
impl Parser
{
#[ must_use ]
pub fn new( options: UnilangParserOptions ) -> Self
{
Self { options }
}
#[ deprecated(
since = "0.33.0",
note = "Use parse_repl_input() for REPL/string input. \
Use parse_from_argv() for CLI argv input."
) ]
pub fn parse_single_instruction( &self, input: &str ) -> Result< crate ::instruction ::GenericInstruction, ParseError >
{
self.parse_repl_input( input )
}
pub fn parse_repl_input( &self, input : &str )
-> Result< crate ::instruction ::GenericInstruction, ParseError >
{
validation_utilities::validate_quote_completeness( input )?;
let mut all_delimiters = alloc::vec::Vec::new();
all_delimiters.extend_from_slice( &[ " ", "\n", "\t", "\r", "#" ] );
all_delimiters.extend( self.options.main_delimiters.iter().copied() );
all_delimiters.extend( self.options.operators.iter().copied() );
let splits_iter = strs_tools::string::split::split()
.delimeters( all_delimiters.iter().map(core::convert::AsRef::as_ref).collect::<Vec<_>>().as_slice() )
.quoting( true )
.preserving_empty( false )
.src( input )
.perform();
let splits: Vec< crate ::item_adapter ::Split< '_ > > = splits_iter
.map( | s | crate ::item_adapter ::Split {
string: s.string,
bounds: ( s.start, s.end ),
start: s.start,
end: s.end,
typ: match s.typ {
strs_tools::string::split::SplitType::Delimited => crate ::item_adapter ::SplitType::Delimiter,
strs_tools::string::split::SplitType::Delimiter => crate ::item_adapter ::SplitType::NonDelimiter,
},
was_quoted: s.was_quoted,
})
.collect();
let rich_items: Vec< RichItem< '_ > > = splits
.into_iter()
.map( | s |
{
let ( kind, adjusted_source_location ) = crate ::item_adapter ::classify_split( &s )?;
Ok( RichItem ::new( s, kind, adjusted_source_location ) )
})
.collect :: < Result< Vec< RichItem< '_ > >, ParseError > >()?;
let rich_items = Self::merge_value_context_tokens( rich_items );
let rich_items: Vec< RichItem< '_ > > = rich_items
.into_iter()
.filter( | item | !matches!( item.kind, UnilangTokenKind ::Delimiter( " " | "\n" | "\t" | "\r" ) ) )
.collect();
let rich_items = validation_utilities::inject_empty_quoted_string_tokens( input, rich_items );
self.parse_single_instruction_from_rich_items( rich_items )
}
pub fn parse_cli( &self, argv : &crate ::argv_types ::ShellArgv )
-> Result< crate ::instruction ::GenericInstruction, ParseError >
{
self.parse_from_argv( argv.as_slice() )
}
pub fn parse_repl( &self, input : &crate ::argv_types ::ReplInput )
-> Result< crate ::instruction ::GenericInstruction, ParseError >
{
self.parse_repl_input( input.as_str() )
}
fn merge_value_context_tokens(
rich_items: Vec< RichItem< '_ > >,
) -> Vec< RichItem< '_ > >
{
let mut result = Vec::new();
let mut iter = rich_items.into_iter().peekable();
while let Some( item ) = iter.next()
{
let is_named_arg_operator = matches!(
&item.kind,
UnilangTokenKind::Operator( "::" | " :: " )
);
if is_named_arg_operator
{
result.push( item );
let mut value_parts = Vec::new();
let mut value_start: Option< usize > = None;
let mut value_end: usize = 0;
while let Some( next_item ) = iter.peek()
{
let is_whitespace = matches!(
&next_item.kind,
UnilangTokenKind::Delimiter( " " | "\t" | "\n" | "\r" )
);
if is_whitespace
{
break;
}
let token = iter.next().unwrap();
if let SourceLocation::StrSpan { start, end } = token.adjusted_source_location
{
if value_start.is_none()
{
value_start = Some( start );
}
value_end = end;
}
let text = match &token.kind
{
UnilangTokenKind::Identifier( s )
| UnilangTokenKind::Number( s )
| UnilangTokenKind::Unrecognized( s ) => s.clone(),
UnilangTokenKind::Operator( s )
| UnilangTokenKind::Delimiter( s ) => (*s).to_string(),
};
value_parts.push( text );
}
if !value_parts.is_empty()
{
let merged_value = value_parts.join( "" );
let source_location = SourceLocation::StrSpan
{
start: value_start.unwrap_or( 0 ),
end: value_end,
};
let split = crate::item_adapter::Split
{
string: alloc::borrow::Cow::Owned( merged_value.clone() ),
bounds: ( value_start.unwrap_or( 0 ), value_end ),
start: value_start.unwrap_or( 0 ),
end: value_end,
typ: crate::item_adapter::SplitType::NonDelimiter,
was_quoted: false,
};
let merged_token = RichItem::new(
split,
UnilangTokenKind::Identifier( merged_value ),
source_location,
);
result.push( merged_token );
}
}
else
{
result.push( item );
}
}
result
}
pub fn parse_multiple_instructions( &self, input: &str ) -> Result< Vec< crate ::instruction ::GenericInstruction >, ParseError >
{
let parts: Vec< &str > = input.split(";;").collect();
let mut instructions = Vec ::new();
if parts.is_empty() || (parts.len() == 1 && parts[0].trim().is_empty())
{
return Ok( Vec ::new() );
}
if input.starts_with(";;")
{
return Err( ParseError ::new
(
ErrorKind ::EmptyInstructionSegment,
SourceLocation ::StrSpan { start: 0, end: 2 },
));
}
if input.contains(";;;;")
{
let pos = input.find(";;;;").unwrap();
return Err( ParseError ::new
(
ErrorKind ::EmptyInstructionSegment,
SourceLocation ::StrSpan { start: pos, end: pos + 4 },
));
}
for (i, part) in parts.iter().enumerate()
{
let trimmed = part.trim();
if trimmed.is_empty()
{
if i == parts.len() - 1 && input.contains(";;")
{
let semicolon_pos = input.rfind(";;").unwrap();
return Err( ParseError ::new
(
ErrorKind ::TrailingDelimiter,
SourceLocation ::StrSpan
{
start: semicolon_pos,
end: semicolon_pos + 2
},
));
}
let part_start = input.find(part).unwrap_or(0);
return Err( ParseError ::new
(
ErrorKind ::EmptyInstructionSegment,
SourceLocation ::StrSpan
{
start: part_start,
end: part_start + part.len().max(1)
},
));
}
let instruction = self.parse_repl_input( trimmed )?;
instructions.push( instruction );
}
Ok( instructions )
}
fn parse_single_instruction_from_rich_items
(
&self,
rich_items: Vec< RichItem< '_ > >,
)
-> Result< crate ::instruction ::GenericInstruction, ParseError >
{
if rich_items.is_empty()
{
return Ok( GenericInstruction
{
command_path_slices: Vec ::new(),
positional_arguments: Vec ::new(),
named_arguments: BTreeMap ::new(),
help_requested: false,
overall_location: SourceLocation ::None, });
}
let instruction_start_location = rich_items.first().map_or( 0, | item | item.inner.start );
let instruction_end_location = rich_items.last().map_or( instruction_start_location, | item | item.inner.end );
let mut items_iter = rich_items.into_iter().peekable();
if let Some( first_item ) = items_iter.peek()
{
if let UnilangTokenKind ::Delimiter( "." ) = &first_item.kind
{
if first_item.inner.start == 0
{
items_iter.next(); }
}
}
let command_path_slices = Self ::parse_command_path( &mut items_iter, instruction_end_location )?;
let ( positional_arguments, named_arguments, help_operator_found ) = self.parse_arguments( &mut items_iter )?;
Ok( GenericInstruction
{
command_path_slices,
positional_arguments,
named_arguments,
help_requested: help_operator_found,
overall_location: SourceLocation ::StrSpan
{
start: instruction_start_location,
end: instruction_end_location,
},
})
}
fn parse_command_path
(
items_iter: &mut core ::iter ::Peekable< IntoIter< RichItem< '_ > > >,
instruction_end_location: usize,
)
-> Result< Vec< String >, ParseError >
{
let mut command_path_slices = Vec ::new();
let mut last_token_was_dot = false;
while let Some( item ) = items_iter.peek()
{
match &item.kind
{
UnilangTokenKind ::Identifier( ref s ) =>
{
if command_path_slices.is_empty() || last_token_was_dot
{
let segment = s.clone();
let item_location = item.adjusted_source_location.clone();
let mut lookahead_iter = items_iter.clone();
lookahead_iter.next();
if let Some( next_item ) = lookahead_iter.peek()
{
let is_named_arg_operator = match &next_item.kind
{
UnilangTokenKind ::Operator( op ) => *op == "::" || *op == " :: ",
_ => false,
};
if is_named_arg_operator
{
break;
}
}
if segment.contains( '-' )
{
return Err( ParseError ::new
(
ErrorKind ::Syntax( format!( "Invalid character '-' in command path segment '{segment}'" ) ),
item_location,
));
}
command_path_slices.push( segment );
last_token_was_dot = false;
items_iter.next(); }
else
{
break; }
}
UnilangTokenKind ::Delimiter( "." ) =>
{
if last_token_was_dot
{
return Err( ParseError ::new
(
ErrorKind ::Syntax( "Consecutive dots in command path".to_string() ),
item.adjusted_source_location.clone(),
));
}
last_token_was_dot = true;
items_iter.next(); }
UnilangTokenKind ::Unrecognized( ref s ) | UnilangTokenKind ::Number( ref s ) =>
{
if last_token_was_dot
{
return Err( ParseError ::new
(
ErrorKind ::Syntax( format!( "Invalid identifier '{s}' in command path" ) ),
item.adjusted_source_location.clone(),
));
}
break; }
_ =>
{
break; }
}
}
if last_token_was_dot
{
return Err( ParseError ::new
(
ErrorKind ::Syntax( "Command path cannot end with a '.'".to_string() ),
SourceLocation ::StrSpan
{
start: instruction_end_location - 1,
end: instruction_end_location,
},
));
}
Ok( command_path_slices )
}
#[ allow( clippy ::too_many_lines ) ]
fn process_named_argument(
&self,
arg_name: &str,
item: &RichItem< '_ >,
items_iter: &mut core ::iter ::Peekable< IntoIter< RichItem< '_ > > >,
named_arguments: &mut BTreeMap< String, Vec< Argument > >,
) -> Result< (), ParseError >
{
if let Some( value_item ) = items_iter.next()
{
match value_item.kind
{
UnilangTokenKind ::Identifier( ref val )
| UnilangTokenKind ::Unrecognized( ref val )
| UnilangTokenKind ::Number( ref val ) =>
{
let mut current_value = val.clone();
let mut current_value_end_location = match value_item.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => 0, };
loop
{
let should_continue = match items_iter.peek()
{
Some( next_token ) =>
{
match &next_token.kind
{
UnilangTokenKind ::Identifier( _ ) =>
{
let remaining_items: Vec<_> = items_iter.clone().collect();
if remaining_items.len() >= 2
{
if let UnilangTokenKind ::Operator( op ) = &remaining_items[1].kind
{
if *op == " :: " || *op == "::"
{
false
}
else
{
true
}
}
else
{
false
}
}
else
{
false
}
}
UnilangTokenKind ::Number( _ ) => true, _ => false, }
}
None => false, };
if !should_continue
{
break;
}
if let Some( consumed_token ) = items_iter.next()
{
current_value.push( ' ' );
current_value.push_str( &consumed_token.inner.string );
current_value_end_location = match consumed_token.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
}
else
{
break;
}
}
loop
{
let Some( peeked_dot ) = items_iter.peek() else
{
break;
};
if let UnilangTokenKind ::Delimiter( "." ) = &peeked_dot.kind
{
let _dot_item = items_iter.next().unwrap(); let Some( peeked_segment ) = items_iter.peek() else
{
break;
};
if let UnilangTokenKind ::Identifier( ref s ) = &peeked_segment.kind
{
current_value.push( '.' );
current_value.push_str( s );
current_value_end_location = match peeked_segment.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location, };
items_iter.next(); }
else if let UnilangTokenKind ::Unrecognized( ref s ) = &peeked_segment.kind
{
current_value.push( '.' );
current_value.push_str( s );
current_value_end_location = match peeked_segment.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location, };
items_iter.next(); }
else if let UnilangTokenKind ::Number( ref s ) = &peeked_segment.kind
{
current_value.push( '.' );
current_value.push_str( s );
current_value_end_location = match peeked_segment.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location, };
items_iter.next(); }
else
{
break;
}
}
else
{
break; }
}
let argument = Argument
{
name: Some( arg_name.to_string() ),
value: current_value,
name_location: Some( item.source_location() ),
value_location: SourceLocation ::StrSpan
{
start: match value_item.source_location()
{
SourceLocation ::StrSpan { start, .. } => start,
SourceLocation ::None => 0,
},
end: current_value_end_location,
},
};
if self.options.error_on_duplicate_named_arguments && named_arguments.contains_key( arg_name )
{
return Err( validation_utilities::error_duplicate_named_argument( arg_name, item.adjusted_source_location.clone() ) );
}
named_arguments.entry( arg_name.to_string() )
.or_default()
.push( argument );
}
UnilangTokenKind ::Delimiter( "." ) =>
{
let mut current_value = ".".to_string();
let mut current_value_end_location = match value_item.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => 0,
};
if let Some( next_item ) = items_iter.peek()
{
match &next_item.kind
{
UnilangTokenKind ::Unrecognized( ref s ) =>
{
current_value.push_str( s );
current_value_end_location = match next_item.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
items_iter.next(); }
UnilangTokenKind ::Delimiter( "." ) =>
{
current_value.push( '.' );
current_value_end_location = match next_item.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
items_iter.next();
if let Some( third_item ) = items_iter.peek()
{
if let UnilangTokenKind ::Unrecognized( ref s ) = &third_item.kind
{
current_value.push_str( s );
current_value_end_location = match third_item.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
items_iter.next(); }
}
}
_ =>
{
}
}
loop
{
let Some( peeked_dot ) = items_iter.peek() else
{
break;
};
if let UnilangTokenKind ::Delimiter( "." ) = &peeked_dot.kind
{
let _dot_item = items_iter.next().unwrap(); let Some( peeked_segment ) = items_iter.peek() else
{
break;
};
if let UnilangTokenKind ::Identifier( ref s ) = &peeked_segment.kind
{
current_value.push( '.' );
current_value.push_str( s );
current_value_end_location = match peeked_segment.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
items_iter.next(); }
else if let UnilangTokenKind ::Unrecognized( ref s ) = &peeked_segment.kind
{
current_value.push( '.' );
current_value.push_str( s );
current_value_end_location = match peeked_segment.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
items_iter.next(); }
else if let UnilangTokenKind ::Number( ref s ) = &peeked_segment.kind
{
current_value.push( '.' );
current_value.push_str( s );
current_value_end_location = match peeked_segment.source_location()
{
SourceLocation ::StrSpan { end, .. } => end,
SourceLocation ::None => current_value_end_location,
};
items_iter.next(); }
else
{
break;
}
}
else
{
break;
}
}
}
let argument = Argument
{
name: Some( arg_name.to_string() ),
value: current_value,
name_location: Some( item.source_location() ),
value_location: SourceLocation ::StrSpan
{
start: match value_item.source_location()
{
SourceLocation ::StrSpan { start, .. } => start,
SourceLocation ::None => 0,
},
end: current_value_end_location,
},
};
if self.options.error_on_duplicate_named_arguments && named_arguments.contains_key( arg_name )
{
return Err( validation_utilities::error_duplicate_named_argument( arg_name, item.adjusted_source_location.clone() ) );
}
named_arguments.entry( arg_name.to_string() )
.or_default()
.push( argument );
}
_ =>
{
return Err( validation_utilities::error_missing_named_value( arg_name, value_item.source_location() ) )
}
}
}
else
{
return Err( validation_utilities::error_missing_named_value_at_end( arg_name, item.adjusted_source_location.clone() ) );
}
Ok( () )
}
#[ allow( clippy ::type_complexity ) ]
#[ allow( clippy ::too_many_lines ) ]
fn parse_arguments
(
&self,
items_iter: &mut core ::iter ::Peekable< IntoIter< RichItem< '_ > > >,
)
-> Result< ( Vec< Argument >, BTreeMap< String, Vec< Argument > >, bool ), ParseError >
{
let mut positional_arguments = Vec ::new();
let mut named_arguments = BTreeMap ::new();
let mut help_operator_found = false;
while let Some( item ) = items_iter.next()
{
match item.kind
{
UnilangTokenKind ::Unrecognized( ref s ) =>
{
return Err( validation_utilities::error_unexpected_token( s, item.adjusted_source_location.clone() ) );
}
UnilangTokenKind ::Identifier( ref s ) =>
{
let has_consecutive_colons = {
let mut lookahead_iter = items_iter.clone();
if let Some( first_item ) = lookahead_iter.next()
{
if matches!(first_item.kind, UnilangTokenKind::Delimiter(":"))
{
if let Some( second_item ) = lookahead_iter.peek()
{
matches!(second_item.kind, UnilangTokenKind::Delimiter(":"))
}
else
{
false
}
}
else
{
false
}
}
else
{
false
}
};
if let Some( next_item ) = items_iter.peek()
{
let is_named_argument = match &next_item.kind
{
UnilangTokenKind ::Operator( op ) => *op == " :: " || *op == "::",
UnilangTokenKind ::Delimiter( ":" ) => has_consecutive_colons,
_ => false,
};
if is_named_argument
{
match &next_item.kind
{
UnilangTokenKind ::Operator( _ ) => {
items_iter.next(); },
UnilangTokenKind ::Delimiter( ":" ) => {
items_iter.next(); items_iter.next(); },
_ => unreachable!(),
}
let arg_name = s;
self.process_named_argument( arg_name, &item, items_iter, &mut named_arguments )?;
}
else
{
validation_utilities::process_positional_argument( &self.options, s, &item, &mut positional_arguments, &named_arguments )?;
}
}
else
{
validation_utilities::process_positional_argument( &self.options, s, &item, &mut positional_arguments, &named_arguments )?;
}
}
UnilangTokenKind ::Number( ref s ) =>
{
validation_utilities::process_positional_argument( &self.options, s, &item, &mut positional_arguments, &named_arguments )?;
}
UnilangTokenKind ::Operator( "?" ) =>
{
validation_utilities::validate_help_operator( &item, items_iter )?;
help_operator_found = true;
positional_arguments.clear();
}
UnilangTokenKind::Operator("::" | " :: ") =>
{
return Err( validation_utilities::error_orphaned_operator( item.adjusted_source_location.clone() ) );
}
UnilangTokenKind::Delimiter(":") =>
{
if let Some( next_item ) = items_iter.peek()
{
if let UnilangTokenKind::Delimiter(":") = &next_item.kind
{
return Err( validation_utilities::error_orphaned_operator( item.adjusted_source_location.clone() ) );
}
}
return Err( validation_utilities::error_unexpected_token( ":", item.adjusted_source_location.clone() ) );
}
_ =>
{
return Err( validation_utilities::error_unexpected_token( &item.inner.string, item.adjusted_source_location.clone() ) );
}
}
}
Ok( ( positional_arguments, named_arguments, help_operator_found ) )
}
pub fn parse_from_argv( &self, argv: &[String] ) -> Result< GenericInstruction, ParseError >
{
if argv.is_empty()
{
return Ok( GenericInstruction
{
command_path_slices: Vec ::new(),
positional_arguments: Vec ::new(),
named_arguments: BTreeMap ::new(),
help_requested: false,
overall_location: SourceLocation ::None,
});
}
validation_utilities::detect_argv_misuse( argv );
let mut tokens = Vec::new();
let mut i = 0;
while i < argv.len()
{
let arg = &argv[i];
if arg.contains(':') && !arg.contains("::")
{
return Err( ParseError
{
kind: ErrorKind::Syntax( format!(
"Invalid parameter syntax: '{}'. Parameters must use '::' separator (e.g., 'param::value')",
arg
) ),
location: Some( SourceLocation::None ),
});
}
if let Some( ( key, initial_value ) ) = arg.split_once( "::" )
{
let mut value = initial_value.to_string();
while i + 1 < argv.len()
{
let next_arg = &argv[i + 1];
if next_arg.contains(':') && !next_arg.contains("::")
{
return Err( ParseError
{
kind: ErrorKind::Syntax( format!(
"Invalid parameter syntax: '{}'. Parameters must use '::' separator (e.g., 'param::value')",
next_arg
) ),
location: Some( SourceLocation::None ),
});
}
if next_arg.contains( "::" )
{
break;
}
if next_arg.starts_with( '.' )
{
break;
}
if value.contains( '/' )
{
break;
}
if !value.is_empty()
{
value.push( ' ' );
}
value.push_str( next_arg );
i += 1;
}
if value.chars().any( char::is_whitespace ) || value.is_empty()
{
let escaped_value = value.replace( '"', "\\\"" );
tokens.push( format!( "{key}::\"{escaped_value}\"" ) );
}
else
{
tokens.push( format!( "{key}::{value}" ) );
}
}
else
{
let is_unilang_identifier = !arg.is_empty()
&& arg.chars().next().is_some_and( | c | c.is_ascii_lowercase() || c == '_' )
&& !arg.ends_with( '-' )
&& arg.chars().all( | c | c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_' || c == '-' );
let is_number = arg.parse :: < i64 >().is_ok();
let is_operator = self.options.operators.contains( &arg.as_str() );
let needs_quoting = arg.chars().any( char::is_whitespace )
|| ( !arg.starts_with( '.' ) && !is_unilang_identifier && !is_number && !is_operator );
if needs_quoting
{
let escaped_arg = arg.replace( '"', "\\\"" );
tokens.push( format!( "\"{escaped_arg}\"" ) );
}
else
{
tokens.push( arg.clone() );
}
}
i += 1;
}
let command_str = tokens.join( " " );
self.parse_repl_input( &command_str )
}
}