use crate::parser::Parser;
use crate::types::Opt;
use bstr::ByteSlice;
use ecow::{EcoString, EcoVec};
use memchr::memchr;
use rayon::prelude::*;
pub struct Layout;
impl Layout {
pub fn parse_blockwise(content: &str) -> EcoVec<Opt> {
let blocks = Self::split_into_blocks_fast(content);
if blocks.len() > 4 {
blocks
.par_iter()
.flat_map(|block| {
let opts = Parser::parse_line(block);
opts.into_iter().collect::<Vec<_>>()
})
.collect::<Vec<_>>()
.into_iter()
.collect()
} else {
blocks
.iter()
.flat_map(|block| Parser::parse_line(block).into_iter())
.collect()
}
}
pub fn preprocess_blockwise(content: &str) -> EcoVec<(EcoString, EcoString)> {
let blocks = Self::split_into_blocks_fast(content);
if blocks.len() > 4 {
blocks
.par_iter()
.flat_map(|block| {
let pairs = Parser::preprocess(block);
pairs.into_iter().collect::<Vec<_>>()
})
.collect::<Vec<_>>()
.into_iter()
.collect()
} else {
blocks
.iter()
.flat_map(|block| Parser::preprocess(block).into_iter())
.collect()
}
}
pub fn parse_usage(content: &str) -> EcoString {
let keywords = ["usage", "synopsis"];
let bytes = content.as_bytes();
if memchr(b'u', bytes).is_none() && memchr(b's', bytes).is_none() {
if memchr(b'U', bytes).is_none() && memchr(b'S', bytes).is_none() {
return EcoString::new();
}
}
let lower = content.to_lowercase();
let mut keyword_pos = None;
for keyword in &keywords {
if let Some(pos) = lower.find(keyword) {
let rest = &lower[pos..];
if rest.contains(':') {
keyword_pos = Some(pos);
break;
}
}
}
if keyword_pos.is_none() {
return EcoString::new();
}
let lines: Vec<&str> = bytes
.lines()
.filter_map(|line| std::str::from_utf8(line).ok())
.collect();
for (i, line) in lines.iter().enumerate() {
let lower = line.to_lowercase();
if keywords.iter().any(|k| lower.contains(k)) && lower.contains(':') {
let mut usage_result = String::with_capacity(256);
let mut first = true;
for l in lines[i..].iter() {
if (l.is_empty() || (!l.starts_with(' ') && !l.contains(':'))) && !first {
break;
}
if !first {
usage_result.push('\n');
}
usage_result.push_str(l);
first = false;
}
if !usage_result.is_empty() {
return EcoString::from(usage_result);
}
}
}
EcoString::new()
}
fn split_into_blocks_fast(content: &str) -> EcoVec<EcoString> {
let bytes = content.as_bytes();
if memchr(b'-', bytes).is_none() {
return EcoVec::new();
}
let mut blocks = EcoVec::new();
let mut current_block = String::with_capacity(256);
let mut in_block = false;
for line in bytes.lines() {
let line_str = unsafe { std::str::from_utf8_unchecked(line) };
let trimmed = line_str.trim_start();
if trimmed.is_empty() {
if in_block && !current_block.is_empty() {
blocks.push(EcoString::from(current_block.as_str()));
current_block.clear();
in_block = false;
}
} else if trimmed.starts_with('-') || in_block {
if !current_block.is_empty() {
current_block.push('\n');
}
current_block.push_str(line_str);
in_block = true;
}
}
if !current_block.is_empty() {
blocks.push(EcoString::from(current_block));
}
blocks
}
pub fn get_option_offsets(s: &str) -> EcoVec<usize> {
let short_offset = Self::get_short_option_offset(s);
let long_offset = Self::get_long_option_offset(s);
let mut result = EcoVec::new();
match (short_offset, long_offset) {
(None, None) => {}
(None, Some(y)) => result.push(y),
(Some(x), None) => result.push(x),
(Some(x), Some(y)) => {
if x == y {
result.push(x);
} else {
result.push(x);
result.push(y);
}
}
}
result
}
fn get_option_locations(s: &str, predicate: fn(&str) -> bool) -> EcoVec<(usize, usize)> {
let bytes = s.as_bytes();
bytes
.lines()
.enumerate()
.filter_map(|(i, line)| {
let line_str = std::str::from_utf8(line).ok()?;
let trimmed = line_str.trim_start();
if !trimmed.is_empty() && predicate(trimmed) {
let offset = line_str.len() - trimmed.len();
Some((i, offset))
} else {
None
}
})
.collect()
}
fn get_long_option_offset(s: &str) -> Option<usize> {
let locations = Self::get_option_locations(s, |line| line.starts_with("--"));
Self::get_most_frequent_offset(&locations)
}
fn get_short_option_offset(s: &str) -> Option<usize> {
let locations =
Self::get_option_locations(s, |line| line.starts_with('-') && !line.starts_with("--"));
Self::get_most_frequent_offset(&locations)
}
fn get_most_frequent_offset(locations: &[(usize, usize)]) -> Option<usize> {
if locations.is_empty() {
return None;
}
let mut freq_map = std::collections::HashMap::with_capacity(locations.len());
for (_, offset) in locations {
*freq_map.entry(*offset).or_insert(0usize) += 1;
}
freq_map
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(offset, _)| offset)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_usage() {
let content = "usage: command [options]\n\ndescription";
let usage = Layout::parse_usage(content);
assert!(!usage.is_empty());
}
#[test]
fn test_parse_and_preprocess_blockwise() {
let content = "\
-a, --all show all\n\
\n\
--verbose be verbose\n";
let opts = Layout::parse_blockwise(content);
assert_eq!(opts.len(), 2);
let pairs = Layout::preprocess_blockwise(content);
assert!(pairs.iter().any(|(opt, _)| opt.contains("-a")));
assert!(pairs.iter().any(|(opt, _)| opt.contains("--verbose")));
}
#[test]
fn test_get_option_offsets() {
let content = "\
-a, --all show all\n\
--verbose be verbose\n";
let offsets = Layout::get_option_offsets(content);
assert!(!offsets.is_empty());
assert_eq!(offsets.len(), 1);
}
}