use anyhow::{Context, Result};
use crate::pattern::Pattern;
use crate::wordlist::HostnameStructure;
use serde::{Deserialize, Serialize};
use std::time::Duration;
use std::io::{self, Write};
use std::collections::{HashMap, HashSet};
use regex::Regex;
use once_cell::sync::Lazy;
use std::sync::Arc;
use indicatif::ProgressBar;
#[derive(Debug, Clone, PartialEq)]
enum ApiType {
OpenAICompatible, Ollama, Anthropic, Unknown, }
static OLLAMA_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r"(?i)ollama").unwrap(),
Regex::new(r":11434").unwrap(),
Regex::new(r"/api/[^v]").unwrap(), Regex::new(r"/api$").unwrap(), ]
});
static ANTHROPIC_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r"(?i)anthropic").unwrap(),
Regex::new(r"api\.anthropic\.com").unwrap(),
]
});
static OPENAI_PATTERNS: Lazy<Vec<Regex>> = Lazy::new(|| {
vec![
Regex::new(r"(?i)openai").unwrap(),
Regex::new(r"/v1(?:/|$)").unwrap(), Regex::new(r"api\.openai\.com").unwrap(),
]
});
fn detect_api_type(url: &str) -> ApiType {
let url_lower = url.to_lowercase();
if url_lower.contains("/api/v1") {
return ApiType::OpenAICompatible;
}
for pattern in OLLAMA_PATTERNS.iter() {
if pattern.is_match(&url_lower) {
return ApiType::Ollama;
}
}
for pattern in ANTHROPIC_PATTERNS.iter() {
if pattern.is_match(&url_lower) {
return ApiType::Anthropic;
}
}
for pattern in OPENAI_PATTERNS.iter() {
if pattern.is_match(&url_lower) {
return ApiType::OpenAICompatible;
}
}
ApiType::OpenAICompatible
}
pub async fn fetch_available_models(base_url: &str) -> Result<Vec<String>> {
let api_type = detect_api_type(base_url);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.build()?;
let models = match api_type {
ApiType::Ollama => {
let url = if base_url.ends_with("/v1") {
base_url.replace("/v1", "/api/tags")
} else if base_url.ends_with("/api") {
format!("{}/tags", base_url)
} else {
format!("{}/api/tags", base_url.trim_end_matches('/'))
};
#[derive(Debug, Deserialize)]
struct OllamaModelsResponse {
models: Vec<OllamaModelInfo>,
}
#[derive(Debug, Deserialize)]
struct OllamaModelInfo {
name: String,
}
let resp = client.get(&url).send().await?;
if !resp.status().is_success() {
return fetch_openai_models(&client, base_url).await;
}
let ollama_resp: OllamaModelsResponse = resp.json().await?;
ollama_resp.models.into_iter().map(|m| m.name).collect()
}
ApiType::Anthropic => {
Vec::new()
}
ApiType::OpenAICompatible | ApiType::Unknown => {
fetch_openai_models(&client, base_url).await?
}
};
Ok(models)
}
async fn fetch_openai_models(client: &reqwest::Client, base_url: &str) -> Result<Vec<String>> {
let url = if base_url.ends_with('/') {
format!("{}models", base_url)
} else {
format!("{}/models", base_url)
};
let resp = client.get(&url).send().await?;
if !resp.status().is_success() {
anyhow::bail!("Failed to fetch models: {}", resp.status());
}
let models: ModelsResponse = resp.json().await?;
Ok(models.data.into_iter().map(|m| m.id).collect())
}
pub fn prompt_model_selection(models: &[String]) -> Result<String> {
if models.is_empty() {
anyhow::bail!("No models available. Please specify a model with --model");
}
println!("\nAvailable models:");
for (idx, model) in models.iter().enumerate() {
println!(" {}. {}", idx + 1, model);
}
print!("\nSelect a model (1-{}): ", models.len());
io::stdout().flush()?;
let mut input = String::new();
io::stdin().read_line(&mut input)?;
let choice: usize = input.trim().parse()
.context("Invalid selection. Please enter a number.")?;
if choice < 1 || choice > models.len() {
anyhow::bail!("Selection out of range. Please choose 1-{}", models.len());
}
Ok(models[choice - 1].clone())
}
#[derive(Debug, Serialize)]
struct ChatMessage {
role: String,
content: String,
}
#[derive(Debug, Serialize)]
struct ChatRequest {
model: String,
messages: Vec<ChatMessage>,
temperature: f64,
}
#[derive(Debug, Deserialize)]
struct ChatResponse {
choices: Vec<ChatChoice>,
}
#[derive(Debug, Deserialize)]
struct ChatChoice {
message: ChatMessageResponse,
}
#[derive(Debug, Deserialize)]
struct ChatMessageResponse {
content: String,
}
#[derive(Debug, Deserialize)]
struct ModelsResponse {
data: Vec<ModelInfo>,
}
#[derive(Debug, Deserialize)]
struct ModelInfo {
id: String,
}
pub async fn preflight_check(base_url: &str, model: &str) -> Result<()> {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.build()?;
let api_type = detect_api_type(base_url);
let models_url = match api_type {
ApiType::Ollama => {
if base_url.ends_with("/v1") {
base_url.replace("/v1", "/api/tags")
} else if base_url.ends_with("/api") {
format!("{}/tags", base_url)
} else {
format!("{}/api/tags", base_url.trim_end_matches('/'))
}
}
_ => {
if base_url.ends_with('/') {
format!("{}models", base_url)
} else {
format!("{}/models", base_url)
}
}
};
let resp = client.get(&models_url).send().await?;
if !resp.status().is_success() {
anyhow::bail!("LLM endpoint returned {}", resp.status());
}
let available_models: Vec<String> = match api_type {
ApiType::Ollama => {
#[derive(Debug, Deserialize)]
struct OllamaModelsResponse {
models: Vec<OllamaModelInfo>,
}
#[derive(Debug, Deserialize)]
struct OllamaModelInfo {
name: String,
}
let ollama_resp: OllamaModelsResponse = resp.json().await?;
ollama_resp.models.into_iter().map(|m| m.name).collect()
}
_ => {
let models: ModelsResponse = resp.json().await?;
models.data.into_iter().map(|m| m.id).collect()
}
};
let model_available = available_models.iter().any(|m| m == model);
if !model_available {
anyhow::bail!(
"Model '{}' not found in available models",
model
);
}
Ok(())
}
pub fn derive_patterns_deterministic(front: &str, back: &str) -> Vec<Pattern> {
let mut patterns = Vec::new();
let front = front.trim_end_matches('.');
let back = back.trim_end_matches('.');
if back.ends_with(front) && back.len() > front.len() {
let prefix = &back[..back.len() - front.len()];
if prefix.ends_with('.') || prefix.ends_with('-') {
patterns.push(Pattern {
find: String::new(),
replace: prefix.to_string(),
position: "prefix".to_string(),
});
}
}
if back.starts_with(front) && back.len() > front.len() {
let suffix = &back[front.len()..];
if suffix.starts_with('.') || suffix.starts_with('-') {
patterns.push(Pattern {
find: String::new(),
replace: suffix.to_string(),
position: "suffix".to_string(),
});
}
}
let front_labels: Vec<&str> = front.split('.').collect();
let back_labels: Vec<&str> = back.split('.').collect();
if front_labels.len() >= 2 && back_labels.len() >= 2 {
let front_base = if front_labels.len() >= 2 {
front_labels[front_labels.len()-2..].join(".")
} else {
front.to_string()
};
let back_base = if back_labels.len() >= 2 {
back_labels[back_labels.len()-2..].join(".")
} else {
back.to_string()
};
if front_base == back_base && front_labels.len() == back_labels.len() {
let front_first = front_labels[0];
let back_first = back_labels[0];
if front_first != back_first {
patterns.push(Pattern {
find: format!("{}.", front_first),
replace: format!("{}.", back_first),
position: "prefix".to_string(),
});
}
}
if back_labels.len() == front_labels.len() && front_base == back_base {
let front_first = front_labels[0];
let back_first = back_labels[0];
if back_first.contains(front_first) && back_first != front_first {
let find_str = format!("{}.", front_first);
let replace_str = format!("{}.", back_first);
if !patterns.iter().any(|p| p.find == find_str && p.replace == replace_str) {
patterns.push(Pattern {
find: find_str,
replace: replace_str,
position: "prefix".to_string(),
});
}
if let Some(pos) = back_first.find(front_first) {
let general_prefix = if pos > 0 {
let gp = &back_first[..pos];
if gp.ends_with('-') || gp.ends_with('.') {
patterns.push(Pattern {
find: String::new(),
replace: gp.to_string(),
position: "prefix".to_string(),
});
}
gp.to_string()
} else {
String::new()
};
let after_pos = pos + front_first.len();
if after_pos < back_first.len() {
let suffix_part = &back_first[after_pos..];
if suffix_part.starts_with('-') || suffix_part.starts_with('.') {
let label_find = format!("{}.", front_first);
let label_replace = format!("{}{}{}.", general_prefix, front_first, suffix_part);
if !patterns.iter().any(|p| p.find == label_find && p.replace == label_replace) {
patterns.push(Pattern {
find: label_find,
replace: label_replace,
position: "prefix".to_string(),
});
}
}
}
}
}
}
}
patterns
}
pub fn derive_multi_pair_templates(pairs: &[(String, String)]) -> Vec<Pattern> {
if pairs.len() < 2 {
return Vec::new();
}
let mut patterns = Vec::new();
struct LabelTransform {
prefix: String, suffix: String, }
let mut transforms: Vec<LabelTransform> = Vec::new();
for (front, back) in pairs {
let front = front.trim_end_matches('.');
let back = back.trim_end_matches('.');
let front_labels: Vec<&str> = front.split('.').collect();
let back_labels: Vec<&str> = back.split('.').collect();
if front_labels.len() < 2 || back_labels.len() < 2 {
continue;
}
let front_base = front_labels[front_labels.len()-2..].join(".");
let back_base = back_labels[back_labels.len()-2..].join(".");
if front_base != back_base {
continue;
}
if front_labels.len() == back_labels.len() {
let fl = front_labels[0];
let bl = back_labels[0];
if let Some(pos) = bl.find(fl) {
let prefix = bl[..pos].to_string();
let suffix = bl[pos + fl.len()..].to_string();
transforms.push(LabelTransform { prefix, suffix });
}
}
if back_labels.len() == front_labels.len() + 1 {
let extra_label = back_labels[0];
let fl = front_labels[0];
let bl = back_labels[1];
if fl == bl {
} else if let Some(pos) = bl.find(fl) {
let prefix = format!("{}.", extra_label);
let suffix = bl[pos + fl.len()..].to_string();
transforms.push(LabelTransform {
prefix: format!("{}{}", prefix, &bl[..pos]),
suffix,
});
}
}
}
if transforms.is_empty() {
return patterns;
}
let common_prefix = if transforms.iter().all(|t| t.prefix == transforms[0].prefix) {
Some(transforms[0].prefix.clone())
} else {
None
};
let mut suffixes: Vec<String> = transforms.iter()
.map(|t| t.suffix.clone())
.collect::<std::collections::HashSet<_>>()
.into_iter()
.collect();
suffixes.sort();
if let Some(prefix) = common_prefix {
if !prefix.is_empty() && suffixes.len() > 1 {
for suffix in &suffixes {
let template_marker = format!("{}{{label}}{}", prefix, suffix);
patterns.push(Pattern {
find: template_marker,
replace: format!("{}{}", prefix, suffix),
position: "label_template".to_string(),
});
}
}
}
patterns
}
pub fn apply_label_templates(targets: &[String], templates: &[Pattern]) -> Vec<String> {
let mut candidates = std::collections::HashSet::new();
let templates: Vec<&Pattern> = templates.iter()
.filter(|p| p.position == "label_template")
.collect();
if templates.is_empty() {
return Vec::new();
}
for target in targets {
let labels: Vec<&str> = target.split('.').collect();
if labels.len() < 2 {
continue;
}
let first_label = labels[0];
let rest = labels[1..].join(".");
for template in &templates {
if let Some(label_pos) = template.find.find("{label}") {
let prefix = &template.find[..label_pos];
let suffix = &template.find[label_pos + 7..]; let new_label = format!("{}{}{}", prefix, first_label, suffix);
let candidate = format!("{}.{}", new_label, rest);
candidates.insert(candidate);
}
}
}
let mut result: Vec<String> = candidates.into_iter().collect();
result.sort();
result
}
pub fn validate_patterns(patterns: &[Pattern], front: &str, back: &str) -> Vec<Pattern> {
let mut valid = Vec::new();
for pattern in patterns {
if let Some(result) = apply_single_pattern(front, pattern) {
if result == back {
valid.push(pattern.clone());
}
}
}
valid
}
fn apply_single_pattern(target: &str, pattern: &Pattern) -> Option<String> {
match pattern.position.as_str() {
"prefix" => {
if pattern.find.is_empty() || target.starts_with(&pattern.find) {
Some(target.replacen(&pattern.find, &pattern.replace, 1))
} else {
None
}
}
"suffix" => {
if pattern.find.is_empty() || target.ends_with(&pattern.find) {
let mut result = target.to_string();
if pattern.find.is_empty() {
result.push_str(&pattern.replace);
} else {
let start = result.len() - pattern.find.len();
result.replace_range(start.., &pattern.replace);
}
Some(result)
} else {
None
}
}
"contains" => {
if target.contains(&pattern.find) {
Some(target.replacen(&pattern.find, &pattern.replace, 1))
} else {
None
}
}
_ => None,
}
}
pub async fn query_patterns(base_url: &str, model: &str, front: &str, back: &str) -> Result<Vec<Pattern>> {
let deterministic = derive_patterns_deterministic(front, back);
if !deterministic.is_empty() {
return Ok(deterministic);
}
let prompt = format!(
r#"You are a hostname pattern analyzer. Given a CDN frontend hostname and its backend origin, derive find/replace rules to transform OTHER frontend hostnames into potential backend origins.
Position types:
- "prefix": match/replace at the START of the hostname. Use find="" to prepend text.
- "suffix": match/replace at the END of the hostname. Use find="" to append text.
- "contains": match/replace ANYWHERE in the hostname (first occurrence only).
Examples:
Frontend: www.example.com → Backend: origin.www.example.com
Answer: {{"patterns": [{{"find": "", "replace": "origin.", "position": "prefix"}}]}}
Frontend: cdn.example.com → Backend: origin.example.com
Answer: {{"patterns": [{{"find": "cdn.", "replace": "origin.", "position": "prefix"}}]}}
Frontend: api.example.com → Backend: api-origin.example.com
Answer: {{"patterns": [{{"find": "api.", "replace": "api-origin.", "position": "prefix"}}]}}
Now analyze this pair. The pattern must transform the frontend into the backend:
Frontend: {front}
Backend: {back}
Respond ONLY with JSON: {{"patterns": [{{"find": "...", "replace": "...", "position": "prefix|suffix|contains"}}]}}"#,
front = front, back = back
);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(30))
.build()?;
let request = ChatRequest {
model: model.to_string(),
messages: vec![ChatMessage {
role: "user".to_string(),
content: prompt,
}],
temperature: 0.1, };
let api_type = detect_api_type(base_url);
let url = match api_type {
ApiType::Ollama => {
if base_url.ends_with("/v1") {
format!("{}/chat/completions", base_url)
} else if base_url.ends_with("/api") {
format!("{}/generate", base_url)
} else {
format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
}
}
_ => {
if base_url.ends_with('/') {
format!("{}chat/completions", base_url)
} else {
format!("{}/chat/completions", base_url)
}
}
};
let resp = client.post(&url).json(&request).send().await?;
if !resp.status().is_success() {
anyhow::bail!("LLM API returned status: {}", resp.status());
}
let chat_response: ChatResponse = resp.json().await?;
let content = chat_response
.choices
.first()
.and_then(|c| Some(c.message.content.clone()))
.context("No response content from LLM")?;
let json_content = extract_json(&content)?;
let pattern_response: PatternResponse = match serde_json::from_str(&json_content) {
Ok(pr) => pr,
Err(e) => {
eprintln!("Warning: Failed to parse LLM JSON response: {}. Falling back to deterministic patterns.", e);
let fallback = derive_patterns_deterministic(front, back);
if !fallback.is_empty() {
return Ok(fallback);
}
return Err(anyhow::anyhow!("Failed to parse LLM response as JSON and deterministic fallback produced no patterns"));
}
};
let validated = validate_patterns(&pattern_response.patterns, front, back);
if !validated.is_empty() {
return Ok(validated);
}
eprintln!("Warning: LLM-derived patterns failed validation against seed pair. Using deterministic fallback.");
let fallback = derive_patterns_deterministic(front, back);
if !fallback.is_empty() {
Ok(fallback)
} else {
eprintln!("Warning: No deterministic patterns could be derived either. Using unvalidated LLM patterns.");
Ok(pattern_response.patterns)
}
}
#[derive(Debug, Deserialize)]
struct PatternResponse {
patterns: Vec<Pattern>,
}
pub async fn expand_position_words(
base_url: &str,
model: &str,
words: &[String],
position: usize,
total_positions: usize,
hostname_context: &str,
) -> Result<Vec<String>> {
if words.is_empty() {
return Ok(Vec::new());
}
let position_context = if position == 0 {
"service/application name"
} else if position == total_positions - 1 {
"organization/team identifier"
} else if words.iter().any(|w| ["dev", "prod", "test", "staging", "qa", "uat"].contains(&w.as_str())) {
"environment identifier"
} else if words.iter().any(|w| ["api", "app", "web", "backend"].contains(&w.as_str())) {
"service type"
} else {
"subdomain segment"
};
let prompt = format!(
r#"You are helping discover backend server hostnames. Given these words from position {} (context: {}) in hostname "{}":
{:?}
Generate related words that might appear in the same position in other backend/internal server hostnames.
Think about:
- For environment positions: dev -> prod, test, staging, qa, uat, preprod, sandbox
- For service positions: api -> rest, graphql, rpc, service, gateway, app, web
- For organization positions: corp -> internal, int, private, team, org
- Synonyms and variations
- Common abbreviations
- Related concepts
Respond ONLY with a JSON array of unique strings, no duplicates, no explanations:
["word1", "word2", "word3", ...]"#,
position, position_context, hostname_context, words
);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(60))
.build()?;
let request = ChatRequest {
model: model.to_string(),
messages: vec![ChatMessage {
role: "user".to_string(),
content: prompt,
}],
temperature: 0.7, };
let api_type = detect_api_type(base_url);
let url = match api_type {
ApiType::Ollama => {
if base_url.ends_with("/v1") {
format!("{}/chat/completions", base_url)
} else if base_url.ends_with("/api") {
format!("{}/generate", base_url)
} else {
format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
}
}
_ => {
if base_url.ends_with('/') {
format!("{}chat/completions", base_url)
} else {
format!("{}/chat/completions", base_url)
}
}
};
let resp = client.post(&url).json(&request).send().await?;
if !resp.status().is_success() {
anyhow::bail!("LLM API returned status: {}", resp.status());
}
let chat_response: ChatResponse = resp.json().await?;
let content = chat_response
.choices
.first()
.and_then(|c| Some(c.message.content.clone()))
.context("No response content from LLM")?;
let json_content = extract_json(&content)?;
let mut words_result: Vec<String> = serde_json::from_str(&json_content)
.unwrap_or_else(|_| Vec::new());
for word in words {
if !words_result.contains(word) {
words_result.push(word.clone());
}
}
let cleaned: Vec<String> = words_result
.into_iter()
.map(|w| w.to_lowercase().trim().to_string())
.filter(|w| !w.is_empty() && w.len() < 50 && !w.contains(' ') && !w.contains('.'))
.collect();
Ok(cleaned)
}
pub async fn batch_expand_positions(
base_url: &str,
model: &str,
structures: &[HostnameStructure],
progress_bar: Option<Arc<ProgressBar>>,
batch_size: usize,
) -> Result<Vec<Vec<Vec<String>>>> {
if structures.is_empty() {
return Ok(Vec::new());
}
let max_positions = structures.iter()
.map(|s| s.subdomain_segments.len())
.max()
.unwrap_or(0);
if max_positions == 0 {
return Ok(structures.iter().map(|_| Vec::new()).collect());
}
let mut position_words: Vec<HashMap<String, Vec<(usize, usize)>>> = vec![HashMap::new(); max_positions];
for (struct_idx, structure) in structures.iter().enumerate() {
for (pos_idx, segment) in structure.subdomain_segments.iter().enumerate() {
if pos_idx < max_positions {
position_words[pos_idx]
.entry(segment.clone())
.or_insert_with(Vec::new)
.push((struct_idx, pos_idx));
}
}
}
let mut expansion_map: HashMap<(usize, usize), Vec<String>> = HashMap::new();
let mut completed = 0usize;
let total_operations: usize = position_words.iter()
.map(|words| (words.len() + batch_size - 1) / batch_size) .sum();
for (pos_idx, words_map) in position_words.iter().enumerate() {
let unique_words: Vec<String> = words_map.keys().cloned().collect();
let sample_structure = &structures[0];
let position_context = if pos_idx == 0 {
"service/application name"
} else if pos_idx == sample_structure.subdomain_segments.len() - 1 {
"organization/team identifier"
} else if unique_words.iter().any(|w| ["dev", "prod", "test", "staging", "qa", "uat"].contains(&w.as_str())) {
"environment identifier"
} else if unique_words.iter().any(|w| ["api", "app", "web", "backend"].contains(&w.as_str())) {
"service type"
} else {
"subdomain segment"
};
for batch in unique_words.chunks(batch_size) {
let batch_words = batch.to_vec();
let context_hostname = if let Some(structure) = structures.first() {
format!("{}.{}", structure.subdomain_segments.join("."), structure.base_domain)
} else {
"hostname".to_string()
};
match expand_position_words_batch(
base_url,
model,
&batch_words,
pos_idx,
max_positions,
position_context,
&context_hostname,
).await {
Ok(expanded_map) => {
for word in batch_words {
if let Some(expanded) = expanded_map.get(&word) {
if let Some(structure_indices) = words_map.get(&word) {
for &(struct_idx, seg_idx) in structure_indices {
expansion_map.insert((struct_idx, seg_idx), expanded.clone());
}
}
}
}
}
Err(e) => {
eprintln!("Warning: Failed to expand batch at position {}: {}", pos_idx, e);
for word in batch_words {
if let Some(structure_indices) = words_map.get(&word) {
for &(struct_idx, seg_idx) in structure_indices {
expansion_map.insert((struct_idx, seg_idx), vec![word.clone()]);
}
}
}
}
}
completed += 1;
if let Some(ref pb) = progress_bar {
pb.set_position(completed as u64);
}
}
}
let mut results = Vec::new();
for (struct_idx, structure) in structures.iter().enumerate() {
let mut hostname_expansions = Vec::new();
for (pos_idx, segment) in structure.subdomain_segments.iter().enumerate() {
if let Some(expanded) = expansion_map.get(&(struct_idx, pos_idx)) {
hostname_expansions.push(expanded.clone());
} else {
hostname_expansions.push(vec![segment.clone()]);
}
}
results.push(hostname_expansions);
}
Ok(results)
}
async fn expand_position_words_batch(
base_url: &str,
model: &str,
words: &[String],
position: usize,
total_positions: usize,
position_context: &str,
hostname_context: &str,
) -> Result<HashMap<String, Vec<String>>> {
if words.is_empty() {
return Ok(HashMap::new());
}
let prompt = format!(
r#"You are helping discover backend server hostnames. Given these words from position {} (context: {}) in hostname "{}":
{:?}
For EACH word in the list, generate related words that might appear in the same position in other backend/internal server hostnames.
Think about:
- For environment positions: dev -> prod, test, staging, qa, uat, preprod, sandbox
- For service positions: api -> rest, graphql, rpc, service, gateway, app, web
- For organization positions: corp -> internal, int, private, team, org
- Synonyms and variations
- Common abbreviations
- Related concepts
Respond ONLY with a JSON object mapping each input word to an array of related words:
{{"word1": ["related1", "related2", ...], "word2": ["related1", "related2", ...], ...}}
Include the original word in each array. No explanations, only the JSON."#,
position, position_context, hostname_context, words
);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(120)) .build()?;
let request = ChatRequest {
model: model.to_string(),
messages: vec![ChatMessage {
role: "user".to_string(),
content: prompt,
}],
temperature: 0.7,
};
let api_type = detect_api_type(base_url);
let url = match api_type {
ApiType::Ollama => {
if base_url.ends_with("/v1") {
format!("{}/chat/completions", base_url)
} else if base_url.ends_with("/api") {
format!("{}/generate", base_url)
} else {
format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
}
}
_ => {
if base_url.ends_with('/') {
format!("{}chat/completions", base_url)
} else {
format!("{}/chat/completions", base_url)
}
}
};
let resp = client.post(&url).json(&request).send().await?;
if !resp.status().is_success() {
anyhow::bail!("LLM API returned status: {}", resp.status());
}
let chat_response: ChatResponse = resp.json().await?;
let content = chat_response
.choices
.first()
.and_then(|c| Some(c.message.content.clone()))
.context("No response content from LLM")?;
let json_content = extract_json(&content)?;
let mut result_map: HashMap<String, Vec<String>> = serde_json::from_str(&json_content)
.unwrap_or_else(|_| HashMap::new());
for word in words {
let entry = result_map.entry(word.clone()).or_insert_with(Vec::new);
if !entry.contains(word) {
entry.insert(0, word.clone());
}
let cleaned: Vec<String> = entry.iter()
.map(|w: &String| w.to_lowercase().trim().to_string())
.filter(|w: &String| !w.is_empty() && w.len() < 50 && !w.contains(' ') && !w.contains('.'))
.collect::<HashSet<String>>()
.into_iter()
.collect();
*entry = cleaned;
}
Ok(result_map)
}
pub async fn expand_words_with_llm(base_url: &str, model: &str, seed_words: &[String], count_per_word: usize) -> Result<Vec<String>> {
if seed_words.is_empty() || count_per_word == 0 {
return Ok(Vec::new());
}
let prompt = format!(
r#"You are helping discover backend server hostnames. Given these subdomain words extracted from a known backend URL: {:?}
For each word, generate {} related words that might appear in backend/internal server hostnames.
Think about:
- Synonyms (api -> rest, graphql, rpc)
- Versions (api -> api-v1, api-v2, api2)
- Related concepts (dev -> staging, test, qa, uat)
- Common abbreviations (development -> dev, production -> prod)
- Internal naming patterns (backend -> origin, internal, int)
Respond ONLY with a JSON array of unique strings, no duplicates, no explanations:
["word1", "word2", "word3", ...]"#,
seed_words, count_per_word
);
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(60))
.build()?;
let request = ChatRequest {
model: model.to_string(),
messages: vec![ChatMessage {
role: "user".to_string(),
content: prompt,
}],
temperature: 0.7, };
let api_type = detect_api_type(base_url);
let url = match api_type {
ApiType::Ollama => {
if base_url.ends_with("/v1") {
format!("{}/chat/completions", base_url)
} else if base_url.ends_with("/api") {
format!("{}/generate", base_url)
} else {
format!("{}/v1/chat/completions", base_url.trim_end_matches('/'))
}
}
_ => {
if base_url.ends_with('/') {
format!("{}chat/completions", base_url)
} else {
format!("{}/chat/completions", base_url)
}
}
};
let resp = client.post(&url).json(&request).send().await?;
if !resp.status().is_success() {
anyhow::bail!("LLM API returned status: {}", resp.status());
}
let chat_response: ChatResponse = resp.json().await?;
let content = chat_response
.choices
.first()
.and_then(|c| Some(c.message.content.clone()))
.context("No response content from LLM")?;
let json_content = extract_json(&content)?;
let words: Vec<String> = serde_json::from_str(&json_content)
.unwrap_or_else(|_| Vec::new());
let cleaned: Vec<String> = words
.into_iter()
.map(|w| w.to_lowercase().trim().to_string())
.filter(|w| !w.is_empty() && w.len() < 50 && !w.contains(' '))
.collect();
Ok(cleaned)
}
fn extract_json(content: &str) -> Result<String> {
let trimmed = content.trim();
if trimmed.starts_with("```") {
let lines: Vec<&str> = trimmed.lines().collect();
let mut json_lines = Vec::new();
let mut in_code_block = false;
for line in lines {
if line.trim().starts_with("```json") || line.trim().starts_with("```") {
in_code_block = true;
continue;
}
if line.trim() == "```" && in_code_block {
break;
}
if in_code_block {
json_lines.push(line);
}
}
if !json_lines.is_empty() {
return Ok(json_lines.join("\n"));
}
}
if let Some(start) = trimmed.find('{') {
if let Some(end) = trimmed.rfind('}') {
if end > start {
return Ok(trimmed[start..=end].to_string());
}
}
}
Ok(trimmed.to_string())
}