use super::{CommandDefinition, CommandRegistryError, ParsedCommand};
use pulldown_cmark::{Event, Parser, Tag, TagEnd};
use regex::{Regex, RegexBuilder};
use std::path::{Path, PathBuf};
use std::time::SystemTime;
use ahash::AHashMap;
use terraphim_automata::{Matched, find_matches};
use terraphim_types::{NormalizedTerm, NormalizedTermValue, Thesaurus};
#[derive(Debug, Clone)]
pub struct EnrichedParsedCommand {
pub parsed_command: ParsedCommand,
pub enriched_content: Option<EnrichedContent>,
}
#[derive(Debug, Clone)]
pub struct EnrichedContent {
pub matched_terms: Vec<Matched>,
pub contextual_paragraphs: Vec<(Matched, String)>,
pub extracted_keywords: Vec<String>,
pub related_concepts: Vec<String>,
pub complexity_metrics: ContentMetrics,
}
#[derive(Debug, Clone)]
pub struct ContentMetrics {
pub word_count: usize,
pub technical_term_count: usize,
pub code_block_count: usize,
pub heading_count: usize,
pub richness_score: f64,
}
#[derive(Debug)]
pub struct MarkdownCommandParser {
frontmatter_regex: Regex,
technical_thesaurus: Option<Thesaurus>,
learned_terms: AHashMap<String, NormalizedTermValue>,
}
impl MarkdownCommandParser {
pub fn new() -> Result<Self, CommandRegistryError> {
let frontmatter_regex = RegexBuilder::new(r"^---\s*\n(.*?)\n---\s*\n(.*)$")
.dot_matches_new_line(true)
.build()
.map_err(|e| CommandRegistryError::parse_error("regex", e.to_string()))?;
Ok(Self {
frontmatter_regex,
technical_thesaurus: None,
learned_terms: AHashMap::new(),
})
}
pub fn with_technical_thesaurus(thesaurus: Thesaurus) -> Result<Self, CommandRegistryError> {
let frontmatter_regex = RegexBuilder::new(r"^---\s*\n(.*?)\n---\s*\n(.*)$")
.dot_matches_new_line(true)
.build()
.map_err(|e| CommandRegistryError::parse_error("regex", e.to_string()))?;
Ok(Self {
frontmatter_regex,
technical_thesaurus: Some(thesaurus),
learned_terms: AHashMap::new(),
})
}
pub fn set_technical_thesaurus(&mut self, thesaurus: Thesaurus) {
self.technical_thesaurus = Some(thesaurus);
}
pub async fn parse_file(
&self,
file_path: impl AsRef<Path>,
) -> Result<ParsedCommand, CommandRegistryError> {
let path = file_path.as_ref();
let content = tokio::fs::read_to_string(path)
.await
.map_err(|_e| CommandRegistryError::FileNotFound(path.to_string_lossy().to_string()))?;
let metadata = tokio::fs::metadata(path)
.await
.map_err(CommandRegistryError::IoError)?;
let modified = metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH);
self.parse_content(&content, path.to_path_buf(), modified)
}
pub fn parse_content(
&self,
content: &str,
source_path: PathBuf,
modified: SystemTime,
) -> Result<ParsedCommand, CommandRegistryError> {
let captures = self.frontmatter_regex.captures(content).ok_or_else(|| {
CommandRegistryError::invalid_frontmatter(
&source_path,
"No valid YAML frontmatter found. Expected format: ---\\nyaml\\n---\\ncontent",
)
})?;
let frontmatter_yaml = captures.get(1).unwrap().as_str().trim();
let markdown_content = captures.get(2).unwrap().as_str().trim();
let definition: CommandDefinition =
serde_yaml::from_str(frontmatter_yaml).map_err(|e| {
CommandRegistryError::invalid_frontmatter(
&source_path,
format!("YAML parsing error: {}", e),
)
})?;
self.validate_definition(&definition, &source_path)?;
let content = self.extract_markdown_content(markdown_content);
Ok(ParsedCommand {
definition,
content,
source_path,
modified,
})
}
pub fn parse_content_with_analysis(
&mut self,
content: &str,
source_path: PathBuf,
modified: SystemTime,
) -> Result<EnrichedParsedCommand, CommandRegistryError> {
let parsed_command = self.parse_content(content, source_path.clone(), modified)?;
let enriched_content = self.analyze_content(&parsed_command.content)?;
self.learn_terms_from_content(&parsed_command.content);
Ok(EnrichedParsedCommand {
parsed_command,
enriched_content: Some(enriched_content),
})
}
fn analyze_content(&self, content: &str) -> Result<EnrichedContent, CommandRegistryError> {
let matched_terms = if let Some(ref thesaurus) = self.technical_thesaurus {
find_matches(content, thesaurus.clone(), true)
.map_err(|e| CommandRegistryError::AutomataError(e.to_string()))?
} else {
Vec::new()
};
let extracted_keywords = self.extract_keywords_from_text(content);
let complexity_metrics = self.calculate_complexity_metrics(content, &matched_terms);
let contextual_paragraphs = self.extract_contextual_paragraphs(content, &matched_terms);
let related_concepts = self.identify_related_concepts(&matched_terms, &extracted_keywords);
Ok(EnrichedContent {
matched_terms,
contextual_paragraphs,
extracted_keywords,
related_concepts,
complexity_metrics,
})
}
fn extract_keywords_from_text(&self, text: &str) -> Vec<String> {
let mut keywords = Vec::new();
for word in text.split_whitespace() {
let clean_word = word.trim_matches(
&[
':', ',', '.', ';', '(', ')', '[', ']', '{', '}', '"', '\'', '!', '?', '-', '_',
][..],
);
if clean_word.len() >= 3 && !self.is_stop_word(clean_word) {
if self.is_technical_term(clean_word) {
keywords.push(clean_word.to_lowercase());
}
}
}
keywords.sort();
keywords.dedup();
keywords.truncate(20); keywords
}
fn is_technical_term(&self, word: &str) -> bool {
let tech_indicators = [
"config",
"deploy",
"build",
"test",
"api",
"http",
"json",
"yaml",
"docker",
"kubernetes",
"service",
"database",
"cache",
"queue",
"server",
"client",
"request",
"response",
"endpoint",
"route",
"handler",
"middleware",
"auth",
"token",
"session",
"cluster",
"node",
"container",
"pod",
"namespace",
"helm",
"terraform",
"ansible",
"ci",
"cd",
"pipeline",
"github",
"gitlab",
"jenkins",
"artifact",
"registry",
"monitoring",
"logging",
"metrics",
"alerting",
"grafana",
"prometheus",
"kibana",
"elasticsearch",
"redis",
"postgresql",
"mysql",
"mongodb",
"cassandra",
"kafka",
"rabbitmq",
"nginx",
"apache",
"ssl",
"certificates",
"tls",
"https",
"cert",
"encryption",
"hash",
"helm",
"charts",
"configmaps",
"microservice",
"deploys",
"deployments",
];
let word_lower = word.to_lowercase();
if tech_indicators.contains(&word_lower.as_str()) {
return true;
}
if word_lower.ends_with("config")
|| word_lower.ends_with("service")
|| word_lower.ends_with("server")
|| word_lower.ends_with("client")
|| word_lower.ends_with("manager")
|| word_lower.ends_with("handler")
|| word_lower.ends_with("worker")
|| word_lower.ends_with("process")
|| word_lower.ends_with("thread")
|| word_lower.contains("config")
|| word_lower.contains("deploy")
|| word_lower.contains("build")
|| word_lower.contains("test")
{
return true;
}
if word.contains('_') && word.split('_').count() > 1 {
return true;
}
if word.chars().any(|c| c.is_uppercase()) && word.len() > 4 {
return true;
}
false
}
fn is_stop_word(&self, word: &str) -> bool {
let stop_words = [
"the", "and", "or", "but", "in", "on", "at", "to", "for", "of", "with", "by", "from",
"up", "about", "into", "through", "during", "before", "after", "above", "below", "is",
"are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did",
"will", "would", "could", "should", "may", "might", "must", "can", "this", "that",
"these", "those", "i", "you", "he", "she", "it", "we", "they", "me", "him", "her",
"us", "them", "my", "your", "his", "its", "our", "their", "a", "an", "as", "if",
"when", "where", "why", "how", "what", "which", "who", "whom", "whose", "all", "any",
"both", "each", "every", "few", "many", "most", "other", "some", "such", "only", "own",
"same", "so", "than", "too", "very", "just", "now", "also", "here", "there", "more",
"most",
];
stop_words.contains(&word)
}
fn calculate_complexity_metrics(
&self,
content: &str,
matched_terms: &[Matched],
) -> ContentMetrics {
let word_count = content.split_whitespace().count();
let technical_term_count = matched_terms.len();
let code_block_count = content.matches("```").count() / 2;
let heading_count = content.lines().filter(|line| line.starts_with('#')).count();
let term_density = if word_count > 0 {
technical_term_count as f64 / word_count as f64
} else {
0.0
};
let structural_score = (code_block_count + heading_count) as f64 / 10.0; let richness_score = (term_density * 0.6 + structural_score * 0.4).min(1.0);
ContentMetrics {
word_count,
technical_term_count,
code_block_count,
heading_count,
richness_score,
}
}
fn extract_contextual_paragraphs(
&self,
content: &str,
matched_terms: &[Matched],
) -> Vec<(Matched, String)> {
let mut paragraphs = Vec::new();
for matched in matched_terms {
if let Some((start, _)) = matched.pos {
let paragraph_start = self.find_paragraph_start(content, start);
let paragraph_end = self.find_paragraph_end(content, start + 20);
if paragraph_start < paragraph_end && paragraph_start < content.len() {
let paragraph = &content[paragraph_start..paragraph_end];
paragraphs.push((matched.clone(), paragraph.trim().to_string()));
}
}
}
paragraphs
}
fn find_paragraph_start(&self, content: &str, pos: usize) -> usize {
let mut start = pos;
while start > 0 {
let prev_char = content.chars().nth(start - 1).unwrap_or('\n');
if prev_char == '\n' && start > 1 {
let prev_prev_char = content.chars().nth(start - 2).unwrap_or('\n');
if prev_prev_char == '\n' {
break; }
}
start -= 1;
}
start
}
fn find_paragraph_end(&self, content: &str, pos: usize) -> usize {
let mut end = pos;
let content_len = content.len();
while end < content_len {
let current_char = content.chars().nth(end).unwrap_or('\0');
if current_char == '\n' && end + 1 < content_len {
let next_char = content.chars().nth(end + 1).unwrap_or('\0');
if next_char == '\n' {
end += 2; break;
}
}
end += 1;
}
end.min(content_len)
}
fn identify_related_concepts(
&self,
matched_terms: &[Matched],
keywords: &[String],
) -> Vec<String> {
let mut concepts = Vec::new();
for matched in matched_terms {
let term = &matched.term;
if term.len() > 4 && !concepts.contains(&term.to_lowercase()) {
concepts.push(term.to_lowercase());
}
}
for keyword in keywords.iter().take(10) {
if !concepts.contains(keyword) {
concepts.push(keyword.clone());
}
}
concepts.sort();
concepts.truncate(15);
concepts
}
fn learn_terms_from_content(&mut self, content: &str) {
for word in content.split_whitespace() {
let clean_word = word.trim_matches(
&[
':', ',', '.', ';', '(', ')', '[', ']', '{', '}', '"', '\'', '!', '?',
][..],
);
if clean_word.len() > 4 && self.is_technical_term(clean_word) {
let normalized = NormalizedTermValue::from(clean_word.to_lowercase());
self.learned_terms
.insert(clean_word.to_lowercase(), normalized);
}
}
}
pub fn get_learned_terms(&self) -> &AHashMap<String, NormalizedTermValue> {
&self.learned_terms
}
pub fn build_technical_thesaurus(&self) -> Option<Thesaurus> {
if self.learned_terms.is_empty() {
return None;
}
let mut thesaurus = Thesaurus::new("learned_technical_terms".to_string());
let mut term_id = 1u64;
for (term, normalized_term) in &self.learned_terms {
thesaurus.insert(
normalized_term.clone(),
NormalizedTerm {
id: term_id,
value: normalized_term.clone(),
display_value: None,
url: Some(format!("learned-term:{}", term)),
},
);
term_id += 1;
}
Some(thesaurus)
}
pub async fn parse_directory(
&self,
dir_path: impl AsRef<Path>,
) -> Result<Vec<ParsedCommand>, CommandRegistryError> {
self.parse_directory_recursive(dir_path, 0).await
}
async fn parse_directory_recursive(
&self,
dir_path: impl AsRef<Path>,
depth: usize,
) -> Result<Vec<ParsedCommand>, CommandRegistryError> {
if depth > 10 {
return Ok(Vec::new());
}
let mut commands = Vec::new();
let mut entries = tokio::fs::read_dir(dir_path)
.await
.map_err(CommandRegistryError::IoError)?;
while let Some(entry) = entries
.next_entry()
.await
.map_err(CommandRegistryError::IoError)?
{
let path = entry.path();
if path.is_dir() {
match Box::pin(self.parse_directory_recursive(&path, depth + 1)).await {
Ok(mut sub_commands) => commands.append(&mut sub_commands),
Err(e) => {
eprintln!(
"Warning: Failed to parse directory {}: {}",
path.display(),
e
);
}
}
} else if path.extension().and_then(|s| s.to_str()) == Some("md") {
match self.parse_file(&path).await {
Ok(command) => commands.push(command),
Err(e) => {
eprintln!(
"Warning: Failed to parse command file {}: {}",
path.display(),
e
);
}
}
}
}
Ok(commands)
}
fn extract_markdown_content(&self, markdown_content: &str) -> String {
let parser = Parser::new(markdown_content);
let mut output = String::new();
let mut code_block_fence = String::new();
for event in parser {
match event {
Event::Start(Tag::Heading { level, .. }) => {
output.push_str(&"#".repeat(level as usize));
output.push(' ');
}
Event::End(TagEnd::Heading(_)) => {
output.push('\n');
}
Event::Start(Tag::CodeBlock(kind)) => {
code_block_fence = match kind {
pulldown_cmark::CodeBlockKind::Fenced(fence) => {
if fence.is_empty() {
"```".to_string()
} else {
format!("```{}", fence)
}
}
_ => "```".to_string(),
};
output.push_str(&code_block_fence);
output.push('\n');
}
Event::End(TagEnd::CodeBlock) => {
output.push_str(&code_block_fence);
output.push('\n');
}
Event::Start(Tag::List(..)) => {
}
Event::End(TagEnd::List(_)) => {
output.push('\n');
}
Event::Start(Tag::Item) => {
output.push_str("- ");
}
Event::End(TagEnd::Item) => {
output.push('\n');
}
Event::Text(text) => {
output.push_str(&text);
}
Event::Code(code) => {
output.push('`');
output.push_str(&code);
output.push('`');
}
Event::Start(Tag::Strong) => {
output.push_str("**");
}
Event::End(TagEnd::Strong) => {
output.push_str("**");
}
Event::Start(Tag::Emphasis) => {
output.push('*');
}
Event::End(TagEnd::Emphasis) => {
output.push('*');
}
Event::SoftBreak | Event::HardBreak => {
output.push('\n');
}
_ => {}
}
}
output.trim().to_string()
}
fn validate_definition(
&self,
definition: &CommandDefinition,
source_path: &Path,
) -> Result<(), CommandRegistryError> {
if definition.name.is_empty() {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
"Command name cannot be empty",
));
}
let name_regex = regex::Regex::new(r"^[a-zA-Z][a-zA-Z0-9_-]*$").unwrap();
if !name_regex.is_match(&definition.name) {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
format!(
"Invalid command name '{}'. Must start with letter and contain only alphanumeric characters, hyphens, and underscores",
definition.name
),
));
}
let param_name_regex = regex::Regex::new(r"^[a-zA-Z][a-zA-Z0-9_]*$").unwrap();
for param in &definition.parameters {
if !param_name_regex.is_match(¶m.name) {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
format!(
"Invalid parameter name '{}'. Must start with letter and contain only alphanumeric characters and underscores",
param.name
),
));
}
}
for param in &definition.parameters {
if param.required && param.default_value.is_some() {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
format!(
"Required parameter '{}' cannot have a default value",
param.name
),
));
}
}
if let Some(timeout) = definition.timeout {
if timeout == 0 {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
"Timeout cannot be zero",
));
}
}
if let Some(ref limits) = definition.resource_limits {
if let Some(max_memory) = limits.max_memory_mb {
if max_memory == 0 {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
"Max memory limit cannot be zero",
));
}
}
if let Some(max_cpu) = limits.max_cpu_time {
if max_cpu == 0 {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
"Max CPU time cannot be zero",
));
}
}
if let Some(max_disk) = limits.max_disk_mb {
if max_disk == 0 {
return Err(CommandRegistryError::invalid_frontmatter(
source_path,
"Max disk limit cannot be zero",
));
}
}
}
Ok(())
}
}
impl Default for MarkdownCommandParser {
fn default() -> Self {
Self::new().expect("Failed to create MarkdownCommandParser")
}
}
pub async fn parse_markdown_command(
file_path: impl AsRef<Path>,
) -> Result<ParsedCommand, CommandRegistryError> {
let parser = MarkdownCommandParser::new()?;
parser.parse_file(file_path).await
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
use std::time::SystemTime;
#[test]
fn test_parse_simple_command() {
let parser = MarkdownCommandParser::new().unwrap();
let markdown = r#"---
name: "hello"
description: "Say hello to someone"
parameters:
- name: "name"
type: "string"
required: true
description: "Name of person to greet"
execution_mode: "local"
risk_level: "low"
---
# Hello Command
This command says hello to someone with a friendly message.
## Usage
Just provide a name and get a greeting!
"#;
let result =
parser.parse_content(markdown, PathBuf::from("hello.md"), SystemTime::UNIX_EPOCH);
assert!(result.is_ok());
let parsed = result.unwrap();
assert_eq!(parsed.definition.name, "hello");
assert_eq!(parsed.definition.description, "Say hello to someone");
assert_eq!(parsed.definition.parameters.len(), 1);
assert_eq!(parsed.definition.parameters[0].name, "name");
assert!(parsed.definition.parameters[0].required);
}
#[test]
fn test_invalid_command_name() {
let parser = MarkdownCommandParser::new().unwrap();
let markdown = r#"---
name: "123invalid"
description: "Invalid command name"
execution_mode: "local"
---
Content here
"#;
let result = parser.parse_content(
markdown,
PathBuf::from("invalid.md"),
SystemTime::UNIX_EPOCH,
);
assert!(result.is_err());
let error = result.unwrap_err();
match error {
CommandRegistryError::InvalidFrontmatter(_, msg) => {
assert!(msg.contains("Invalid command name"));
}
_ => panic!("Expected InvalidFrontmatter error"),
}
}
#[test]
fn test_missing_frontmatter() {
let parser = MarkdownCommandParser::new().unwrap();
let markdown = r#"This is just plain markdown
without any frontmatter.
"#;
let result = parser.parse_content(
markdown,
PathBuf::from("no-frontmatter.md"),
SystemTime::UNIX_EPOCH,
);
assert!(result.is_err());
let error = result.unwrap_err();
match error {
CommandRegistryError::InvalidFrontmatter(_, msg) => {
assert!(msg.contains("No valid YAML frontmatter"));
}
_ => panic!("Expected InvalidFrontmatter error"),
}
}
#[test]
fn test_description_extraction() {
let parser = MarkdownCommandParser::new().unwrap();
let markdown = r#"---
name: "test"
description: "Test command"
execution_mode: "local"
---
# Test Command
This is a **bold** description with *italic* text and `code` blocks.
Here's a [link](https://example.com) that should be removed.
## Subheading
Some additional content that might be included.
"#;
let result =
parser.parse_content(markdown, PathBuf::from("test.md"), SystemTime::UNIX_EPOCH);
assert!(result.is_ok());
let parsed = result.unwrap();
assert!(parsed.content.contains("Test Command"));
assert!(
parsed
.content
.contains("**bold** description with *italic* text and `code` blocks")
);
assert!(!parsed.content.contains("https://example.com"));
}
#[test]
fn test_markdown_content_preservation() {
let parser = MarkdownCommandParser::new().unwrap();
let markdown = r#"---
name: "test-command"
description: "Test command with markdown"
execution_mode: "local"
risk_level: "low"
---
# Test Command
This is a **bold** description with *italic* text and `code` blocks.
## Examples
```bash
test-command --input "hello"
```
### Sub-section
Some additional content here.
- List item 1
- List item 2
- List item 3
"#;
let result =
parser.parse_content(markdown, PathBuf::from("test.md"), SystemTime::UNIX_EPOCH);
assert!(result.is_ok());
let parsed = result.unwrap();
assert!(parsed.content.contains("# Test Command"));
assert!(parsed.content.contains("## Examples"));
assert!(parsed.content.contains("### Sub-section"));
assert!(parsed.content.contains("```bash"));
assert!(parsed.content.contains("- List item 1"));
assert!(parsed.content.contains("- List item 2"));
assert!(parsed.content.contains("- List item 3"));
assert!(parsed.content.contains("This is a **bold** description"));
assert!(parsed.content.contains("test-command --input \"hello\""));
let lines: Vec<&str> = parsed.content.lines().collect();
assert!(lines.len() > 5); }
#[test]
fn test_technical_term_identification() {
let parser = MarkdownCommandParser::new().unwrap();
assert!(parser.is_technical_term("database"));
assert!(parser.is_technical_term("APIendpoint"));
assert!(parser.is_technical_term("docker_config"));
assert!(parser.is_technical_term("build_service"));
assert!(parser.is_technical_term("kubernetes_cluster"));
assert!(!parser.is_technical_term("hello"));
assert!(!parser.is_technical_term("world"));
assert!(!parser.is_technical_term("simple"));
assert!(!parser.is_technical_term("basic"));
}
#[test]
fn test_keyword_extraction() {
let parser = MarkdownCommandParser::new().unwrap();
let text = "This command configures the nginx server and sets up SSL certificates for HTTPS connections. It also manages the PostgreSQL database cluster.";
let keywords = parser.extract_keywords_from_text(text);
assert!(keywords.contains(&"nginx".to_string()));
assert!(keywords.contains(&"server".to_string()));
assert!(keywords.contains(&"ssl".to_string()));
assert!(keywords.contains(&"certificates".to_string()));
assert!(keywords.contains(&"https".to_string()));
assert!(keywords.contains(&"postgresql".to_string()));
assert!(keywords.contains(&"database".to_string()));
assert!(keywords.contains(&"cluster".to_string()));
assert!(!keywords.contains(&"this".to_string()));
assert!(!keywords.contains(&"and".to_string()));
assert!(!keywords.contains(&"for".to_string()));
assert!(!keywords.contains(&"the".to_string()));
}
#[test]
fn test_content_complexity_metrics() {
let parser = MarkdownCommandParser::new().unwrap();
let content = r#"# Complex Command
This is a detailed command with multiple paragraphs.
## Technical Details
The service uses Docker containers and Kubernetes for orchestration.
```bash
docker build -t myapp .
kubectl apply -f deployment.yaml
```
## Configuration
Set up the database connection and cache layer."#;
let metrics = parser.calculate_complexity_metrics(content, &[]);
assert!(metrics.word_count > 0);
assert_eq!(metrics.code_block_count, 1); assert_eq!(metrics.heading_count, 3); assert!(metrics.richness_score > 0.0);
assert!(metrics.richness_score <= 1.0);
}
#[test]
fn test_paragraph_extraction() {
let parser = MarkdownCommandParser::new().unwrap();
let content = "First paragraph with some content.
Second paragraph that contains important technical terms like database and server.
Third paragraph with more information.";
let matched_term = Matched {
term: "database".to_string(),
normalized_term: NormalizedTerm::new(1u64, NormalizedTermValue::from("database")),
pos: Some((70, 78)), };
let paragraphs = parser.extract_contextual_paragraphs(content, &[matched_term]);
assert_eq!(paragraphs.len(), 1);
let (_, paragraph) = ¶graphs[0];
assert!(paragraph.contains("Second paragraph"));
assert!(paragraph.contains("technical terms"));
assert!(paragraph.contains("database"));
assert!(paragraph.contains("server"));
}
#[test]
fn test_related_concepts_identification() {
let parser = MarkdownCommandParser::new().unwrap();
let matched_terms = vec![
Matched {
term: "kubernetes".to_string(),
normalized_term: NormalizedTerm::new(1u64, NormalizedTermValue::from("kubernetes")),
pos: Some((0, 10)),
},
Matched {
term: "database".to_string(),
normalized_term: NormalizedTerm::new(2u64, NormalizedTermValue::from("database")),
pos: Some((20, 28)),
},
];
let keywords = vec![
"server".to_string(),
"cluster".to_string(),
"deployment".to_string(),
"cache".to_string(),
];
let concepts = parser.identify_related_concepts(&matched_terms, &keywords);
assert!(!concepts.is_empty());
assert!(concepts.contains(&"kubernetes".to_string()));
assert!(concepts.contains(&"database".to_string()));
assert!(concepts.contains(&"server".to_string()));
assert!(concepts.contains(&"cluster".to_string()));
}
#[test]
fn test_term_learning() {
let mut parser = MarkdownCommandParser::new().unwrap();
let content = "This script deploys the microservice to the Kubernetes cluster using Helm charts and ConfigMaps.";
parser.learn_terms_from_content(content);
let learned_terms = parser.get_learned_terms();
assert!(learned_terms.contains_key("deploys"));
assert!(learned_terms.contains_key("microservice"));
assert!(learned_terms.contains_key("kubernetes"));
assert!(learned_terms.contains_key("cluster"));
assert!(learned_terms.contains_key("charts"));
assert!(learned_terms.contains_key("configmaps"));
}
#[test]
fn test_technical_thesaurus_building() {
let mut parser = MarkdownCommandParser::new().unwrap();
parser.learn_terms_from_content("Deploy the microservice to the cluster");
parser.learn_terms_from_content("Configure the database connection");
let thesaurus = parser.build_technical_thesaurus();
assert!(thesaurus.is_some());
let thesaurus = thesaurus.unwrap();
assert_eq!(thesaurus.name(), "learned_technical_terms");
assert!(!thesaurus.is_empty());
assert!(
thesaurus
.get(&NormalizedTermValue::from("deploy"))
.is_some()
);
assert!(
thesaurus
.get(&NormalizedTermValue::from("microservice"))
.is_some()
);
assert!(
thesaurus
.get(&NormalizedTermValue::from("cluster"))
.is_some()
);
assert!(
thesaurus
.get(&NormalizedTermValue::from("database"))
.is_some()
);
}
#[tokio::test]
async fn test_enhanced_parsing_workflow() {
let mut parser = MarkdownCommandParser::new().unwrap();
let markdown = r#"---
name: "deploy-service"
description: "Deploy microservice to Kubernetes cluster with database and cache"
execution_mode: "local"
parameters:
- name: "environment"
type: "string"
required: true
description: "Target deployment environment"
---
# Deploy Service Command
This command deploys a microservice to the Kubernetes cluster using Helm charts.
It sets up the PostgreSQL database and Redis cache configuration.
## Usage
```bash
deploy-service --environment production
```
## Configuration
The service requires proper database configuration and SSL certificates for secure connections."#;
let result = parser.parse_content_with_analysis(
markdown,
PathBuf::from("deploy-service.md"),
SystemTime::UNIX_EPOCH,
);
assert!(result.is_ok());
let enriched_command = result.unwrap();
assert_eq!(
enriched_command.parsed_command.definition.name,
"deploy-service"
);
assert!(
enriched_command
.parsed_command
.content
.contains("Deploy Service Command")
);
assert!(enriched_command.enriched_content.is_some());
let enriched = enriched_command.enriched_content.unwrap();
assert!(!enriched.extracted_keywords.is_empty());
assert!(
enriched
.extracted_keywords
.contains(&"microservice".to_string())
);
assert!(
enriched
.extracted_keywords
.contains(&"kubernetes".to_string())
);
assert!(
enriched
.extracted_keywords
.contains(&"database".to_string())
);
assert!(enriched.complexity_metrics.word_count > 0);
assert!(!parser.get_learned_terms().is_empty());
}
#[test]
fn test_parser_with_technical_thesaurus() {
let mut thesaurus = Thesaurus::new("test_technical".to_string());
thesaurus.insert(
NormalizedTermValue::from("database"),
NormalizedTerm {
id: 1u64,
value: NormalizedTermValue::from("database"),
display_value: None,
url: Some("concept:database".to_string()),
},
);
thesaurus.insert(
NormalizedTermValue::from("kubernetes"),
NormalizedTerm {
id: 2u64,
value: NormalizedTermValue::from("kubernetes"),
display_value: None,
url: Some("concept:kubernetes".to_string()),
},
);
let parser = MarkdownCommandParser::with_technical_thesaurus(thesaurus).unwrap();
let content = "This command manages the database and Kubernetes cluster.";
let analysis = parser.analyze_content(content).unwrap();
assert!(!analysis.matched_terms.is_empty());
assert!(analysis.matched_terms.iter().any(|m| m.term == "database"));
assert!(
analysis
.matched_terms
.iter()
.any(|m| m.term == "kubernetes")
);
}
}