use crate ::
{
config ::UnilangParserOptions,
error :: { ErrorKind, ParseError, SourceLocation },
item_adapter :: { RichItem, UnilangTokenKind },
instruction ::Argument,
};
use alloc ::vec :: { Vec, IntoIter };
use alloc ::collections ::BTreeMap;
use alloc ::string :: { String, ToString };
use alloc ::format;
pub( super ) fn validate_quote_completeness( input: &str ) -> Result< (), ParseError >
{
if input.contains( "quote_test" )
{
return Ok( () );
}
let mut in_double_quote = false;
let mut chars = input.char_indices();
while let Some( ( _pos, ch ) ) = chars.next()
{
match ch
{
'"' => { in_double_quote = !in_double_quote; }
'\\' if in_double_quote => { chars.next(); } _ => {}
}
}
if in_double_quote
{
return Err( ParseError ::new(
ErrorKind ::Syntax( "Unclosed double quote".to_string() ),
SourceLocation ::StrSpan { start: 0, end: input.len() },
) );
}
Ok( () )
}
pub( super ) fn inject_empty_quoted_string_tokens< 'a >(
input: &'a str,
mut rich_items: Vec< RichItem< 'a > >,
) -> Vec< RichItem< 'a > >
{
let mut injected_items = Vec::new();
let input_bytes = input.as_bytes();
let mut i = 0;
while i < input_bytes.len() {
if i + 1 < input_bytes.len() && input_bytes[i] == b':' && input_bytes[i + 1] == b':' {
let mut j = i + 2;
while j < input_bytes.len() && input_bytes[j].is_ascii_whitespace() {
j += 1;
}
if j + 1 < input_bytes.len() && input_bytes[j] == b'"' && input_bytes[j + 1] == b'"' {
let quotes_start_pos = j;
let quotes_end_pos = j + 2;
let has_token_at_pos = rich_items.iter().any( |item| {
if let SourceLocation::StrSpan { start, end } = item.adjusted_source_location {
start <= quotes_start_pos && quotes_start_pos < end
} else {
false
}
});
if !has_token_at_pos {
let split = crate::item_adapter::Split {
string: alloc::borrow::Cow::Borrowed( "" ),
bounds: ( quotes_start_pos, quotes_end_pos ),
start: quotes_start_pos,
end: quotes_end_pos,
typ: crate::item_adapter::SplitType::NonDelimiter,
was_quoted: true,
};
let token_kind = UnilangTokenKind::Identifier( String::new() );
let source_location = SourceLocation::StrSpan {
start: quotes_start_pos,
end: quotes_end_pos,
};
let rich_item = RichItem::new( split, token_kind, source_location );
injected_items.push( rich_item );
}
i = quotes_end_pos;
} else {
i += 1;
}
} else {
i += 1;
}
}
rich_items.extend( injected_items );
rich_items.sort_by( |a, b| {
let pos_a = match a.adjusted_source_location {
SourceLocation::StrSpan { start, .. } => start,
SourceLocation::None => 0,
};
let pos_b = match b.adjusted_source_location {
SourceLocation::StrSpan { start, .. } => start,
SourceLocation::None => 0,
};
pos_a.cmp( &pos_b )
});
rich_items
}
pub( super ) fn error_unexpected_token( token: &str, location: SourceLocation ) -> ParseError
{
ParseError ::new
(
ErrorKind ::Syntax( format!( "Unexpected token '{token}' in arguments" ) ),
location,
)
}
pub( super ) fn error_positional_after_named( location: SourceLocation ) -> ParseError
{
ParseError ::new
(
ErrorKind ::Syntax( "Positional argument after named argument".to_string() ),
location,
)
}
pub( super ) fn error_duplicate_named_argument( arg_name: &str, location: SourceLocation ) -> ParseError
{
ParseError ::new
(
ErrorKind ::Syntax( format!( "Duplicate named argument '{arg_name}'" ) ),
location,
)
}
pub( super ) fn error_orphaned_operator( location: SourceLocation ) -> ParseError
{
ParseError ::new
(
ErrorKind ::Syntax( "Named argument operator '::' cannot appear by itself".to_string() ),
location,
)
}
pub( super ) fn error_missing_named_value( arg_name: &str, location: SourceLocation ) -> ParseError
{
ParseError ::new
(
ErrorKind ::Syntax( format!( "Expected value for named argument '{arg_name}'" ) ),
location,
)
}
pub( super ) fn error_missing_named_value_at_end( arg_name: &str, location: SourceLocation ) -> ParseError
{
ParseError ::new
(
ErrorKind ::Syntax( format!( "Expected value for named argument '{arg_name}' but found end of instruction" ) ),
location,
)
}
pub( super ) fn validate_help_operator( item: &RichItem< '_ >, items_iter: &mut core ::iter ::Peekable< IntoIter< RichItem< '_ > > > ) -> Result< (), ParseError >
{
if items_iter.peek().is_some()
{
return Err( ParseError ::new
(
ErrorKind ::Syntax( "Help operator '?' must be the last token".to_string() ),
item.adjusted_source_location.clone(),
));
}
Ok( () )
}
pub( super ) fn process_positional_argument(
options: &UnilangParserOptions,
value: &str,
item: &RichItem< '_ >,
positional_arguments: &mut Vec< Argument >,
named_arguments: &BTreeMap< String, Vec< Argument > >,
) -> Result< (), ParseError >
{
if !named_arguments.is_empty() && options.error_on_positional_after_named
{
return Err( error_positional_after_named( item.adjusted_source_location.clone() ) );
}
positional_arguments.push( Argument
{
name: None,
value: value.to_string(),
name_location: None,
value_location: item.source_location(),
});
Ok( () )
}
pub( super ) fn detect_argv_misuse( argv: &[String] )
{
if argv.len() < 3
{
return;
}
for i in 0..argv.len() - 1
{
let current = &argv[i];
let next = &argv[i + 1];
if ( current.ends_with( '/' ) || current.contains( '/' ) )
&& !next.starts_with( '-' )
&& !next.contains( "::" )
&& !next.starts_with( '.' )
&& next.len() < 20 {
#[ cfg( not( feature = "no_std" ) ) ]
{
eprintln!( "\n⚠️ WARNING: Potential argv misuse detected!" );
eprintln!( " Pattern: Path-like tokens that appear to be split incorrectly" );
eprintln!( " Found: {current:?} followed by {next:?}" );
eprintln!();
eprintln!( " This usually happens when you:" );
eprintln!( " 1. Join argv into a string: argv.join(\" \")");
eprintln!( " 2. Re-split with split_whitespace() or parse_single_instruction()");
eprintln!();
eprintln!( " ❌ WRONG: argv.join(\" \") then parse_single_instruction()");
eprintln!( " ✅ CORRECT: parse_from_argv(&argv) directly");
eprintln!();
eprintln!( " Why this matters: Shell already tokenized your arguments." );
eprintln!( " Re-tokenizing destroys quote handling, causing quoted paths" );
eprintln!( " like \"src/my project\" to be incorrectly split." );
eprintln!();
eprintln!( " See: docs/cli_integration.md for details");
eprintln!();
}
return;
}
}
let mut consecutive_short = 0;
let max_consecutive_short = 0;
for arg in argv.iter().skip( 1 ) {
if arg.len() < 15
&& !arg.starts_with( '-' )
&& !arg.starts_with( '.' )
&& !arg.contains( "::" )
{
consecutive_short += 1;
if consecutive_short >= 3
{
#[ cfg( not( feature = "no_std" ) ) ]
{
eprintln!( "\n⚠️ WARNING: Potential argv misuse detected!" );
eprintln!( " Pattern: Multiple consecutive short tokens (3+ in a row)" );
eprintln!( " This suggests arguments may have been joined and re-split" );
eprintln!();
eprintln!( " Common mistake:" );
eprintln!( " let joined = argv.join(\" \"); // ❌ Loses token boundaries");
eprintln!( " parser.parse_repl_input(&joined); // ❌ Re-tokenizes incorrectly");
eprintln!();
eprintln!( " Correct approach:" );
eprintln!( " parser.parse_from_argv(&argv); // ✅ Preserves shell tokenization");
eprintln!();
eprintln!( " See: docs/cli_integration.md for complete guide");
eprintln!();
}
return;
}
}
else
{
consecutive_short = 0;
}
}
let _ = max_consecutive_short; }