use std::path::Path;
use chrono::Utc;
use serde_json::Value;
use crate::fetch::browser_v2;
use crate::session::{event::SessionEvent, layout, log, wiki};
pub const MAX_SEED_PER_URL: usize = 3;
const DEFAULT_CATALOG_TIMEOUT_MS: u64 = 15_000;
#[derive(Debug, Clone, Default)]
pub struct SeedOpts {
pub reseed: bool,
}
#[derive(Debug, Clone, Default)]
pub struct SeedReport {
pub seeded: Vec<SeededPage>,
pub skipped: Vec<(String, SkipReason)>,
}
#[derive(Debug, Clone)]
pub struct SeededPage {
pub page_slug: String,
pub site: String,
pub group: Option<String>,
pub action: Option<String>,
pub bytes: u64,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SkipReason {
AlreadySeeded,
ManualFailed,
Truncated,
}
#[derive(Debug, Clone)]
struct CatalogHit {
site: String,
group: Option<String>,
action: Option<String>,
}
pub fn seed_for_url(url: &str, session_slug: &str, opts: SeedOpts) -> SeedReport {
seed_for_url_in(url, session_slug, &layout::session_wiki_dir(session_slug), opts)
}
pub fn seed_for_url_in(
url: &str,
session_slug: &str,
wiki_dir: &Path,
opts: SeedOpts,
) -> SeedReport {
let mut report = SeedReport::default();
let host = match extract_host(url) {
Some(h) if !h.is_empty() => h,
_ => return report,
};
if std::env::var("ACTIONBOOK_BACKEND").as_deref() == Ok("v1-cli") {
return report;
}
if !browser_v2::is_api_key_set() {
return report;
}
let search_cmd = build_search_cmd(&host);
let search_resp = match browser_v2::call_actionbook_tool(
&search_cmd,
session_slug,
DEFAULT_CATALOG_TIMEOUT_MS,
) {
Ok(text) => text,
Err(_e) => {
return report;
}
};
let hits = match parse_search_response(&search_resp) {
Ok(h) => h,
Err(_e) => return report,
};
if hits.is_empty() {
return report;
}
let total = hits.len();
let to_seed: Vec<CatalogHit> = hits.into_iter().take(MAX_SEED_PER_URL).collect();
if total > MAX_SEED_PER_URL {
eprintln!(
"catalog: truncated to {MAX_SEED_PER_URL} of {total} hits for host={host}"
);
report
.skipped
.push((host.clone(), SkipReason::Truncated));
}
for hit in to_seed {
let page_slug = page_slug_for(&hit.site, hit.group.as_deref(), hit.action.as_deref());
let target = wiki_dir.join(format!("{page_slug}.md"));
if target.exists() && !opts.reseed {
report
.skipped
.push((page_slug.clone(), SkipReason::AlreadySeeded));
continue;
}
let manual_cmd = build_manual_cmd(&hit);
let body = match browser_v2::call_actionbook_tool(
&manual_cmd,
session_slug,
DEFAULT_CATALOG_TIMEOUT_MS,
) {
Ok(text) => extract_manual_body(&text),
Err(_e) => {
report
.skipped
.push((page_slug, SkipReason::ManualFailed));
continue;
}
};
let now = Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true);
let mut fm: Vec<(&str, String)> = vec![
("kind", "actionbook-manual".to_string()),
("source", "catalog".to_string()),
("fetched_at", now),
("host", host.clone()),
("site", hit.site.clone()),
];
if let Some(g) = &hit.group {
fm.push(("group", g.clone()));
}
if let Some(a) = &hit.action {
fm.push(("action", a.clone()));
}
fm.push(("catalog_query", host.clone()));
match wiki::seed_manual_page_in(wiki_dir, &page_slug, &fm, &body, opts.reseed) {
Ok(_path) => {
let bytes = body.len() as u64;
report.seeded.push(SeededPage {
page_slug,
site: hit.site,
group: hit.group,
action: hit.action,
bytes,
});
}
Err(wiki::WikiError::AlreadyExists(s)) => {
report.skipped.push((s, SkipReason::AlreadySeeded));
}
Err(_e) => {
report
.skipped
.push((page_slug, SkipReason::ManualFailed));
}
}
}
report
}
pub fn seed_explicit(
session_slug: &str,
wiki_dir: &Path,
host: &str,
site: &str,
group: Option<&str>,
action: Option<&str>,
body: &str,
opts: SeedOpts,
) -> Option<SeededPage> {
let page_slug = page_slug_for(site, group, action);
let target = wiki_dir.join(format!("{page_slug}.md"));
if target.exists() && !opts.reseed {
return None;
}
let now = Utc::now().to_rfc3339_opts(chrono::SecondsFormat::Secs, true);
let mut fm: Vec<(&str, String)> = vec![
("kind", "actionbook-manual".to_string()),
("source", "catalog".to_string()),
("fetched_at", now),
("host", host.to_string()),
("site", site.to_string()),
];
if let Some(g) = group {
fm.push(("group", g.to_string()));
}
if let Some(a) = action {
fm.push(("action", a.to_string()));
}
fm.push(("catalog_query", host.to_string()));
match wiki::seed_manual_page_in(wiki_dir, &page_slug, &fm, body, opts.reseed) {
Ok(_path) => {
let seeded = SeededPage {
page_slug: page_slug.clone(),
site: site.to_string(),
group: group.map(str::to_string),
action: action.map(str::to_string),
bytes: body.len() as u64,
};
let pseudo_url = match (group, action) {
(Some(g), Some(a)) => format!("actionbook://{site}/{g}/{a}"),
(Some(g), None) => format!("actionbook://{site}/{g}"),
_ => format!("actionbook://{site}"),
};
let ev = SessionEvent::WikiSeeded {
timestamp: Utc::now(),
url: pseudo_url,
host: host.to_string(),
site: seeded.site.clone(),
group: seeded.group.clone(),
action: seeded.action.clone(),
page: seeded.page_slug.clone(),
bytes: seeded.bytes,
source: "catalog".to_string(),
note: None,
};
let _ = log::append(session_slug, &ev);
Some(seeded)
}
Err(_) => None,
}
}
pub fn log_seed_events(session_slug: &str, url: &str, host: &str, report: &SeedReport) {
for page in &report.seeded {
let ev = SessionEvent::WikiSeeded {
timestamp: Utc::now(),
url: url.to_string(),
host: host.to_string(),
site: page.site.clone(),
group: page.group.clone(),
action: page.action.clone(),
page: page.page_slug.clone(),
bytes: page.bytes,
source: "catalog".to_string(),
note: None,
};
let _ = log::append(session_slug, &ev);
}
}
pub fn page_slug_for(site: &str, group: Option<&str>, action: Option<&str>) -> String {
let mut parts: Vec<String> = vec![slug_part(site)];
if let Some(g) = group {
let s = slug_part(g);
if !s.is_empty() {
parts.push(s);
}
}
if let Some(a) = action {
let s = slug_part(a);
if !s.is_empty() {
parts.push(s);
}
}
let joined = parts.join("-");
collapse_dashes(&joined)
}
fn slug_part(raw: &str) -> String {
let mut out = String::with_capacity(raw.len());
for ch in raw.chars() {
let c = ch.to_ascii_lowercase();
if c.is_ascii_lowercase() || c.is_ascii_digit() {
out.push(c);
} else if c == '_' || c == '.' || c == '-' || c == ' ' || c == '/' {
out.push('-');
} else {
out.push('-');
}
}
collapse_dashes(&out)
}
fn collapse_dashes(s: &str) -> String {
let mut out = String::with_capacity(s.len());
let mut prev_dash = true; for c in s.chars() {
if c == '-' {
if !prev_dash {
out.push('-');
}
prev_dash = true;
} else {
out.push(c);
prev_dash = false;
}
}
while out.ends_with('-') {
out.pop();
}
out
}
fn build_search_cmd(host: &str) -> String {
format!("actionbook search \"{host}\" --host {host}")
}
fn build_manual_cmd(hit: &CatalogHit) -> String {
let mut s = format!("actionbook manual {}", hit.site);
if let Some(g) = &hit.group {
s.push(' ');
s.push_str(g);
}
if let Some(a) = &hit.action {
s.push(' ');
s.push_str(a);
}
s
}
fn parse_search_response(text: &str) -> Result<Vec<CatalogHit>, String> {
let start = text.find(['{', '[']).ok_or("no JSON in search response")?;
let json_part = &text[start..];
let parsed: Value = serde_json::from_str(json_part)
.map_err(|e| format!("JSON parse: {e}"))?;
let arr = match &parsed {
Value::Array(a) => a.clone(),
Value::Object(o) => {
if let Some(Value::Array(a)) = o.get("hits") {
a.clone()
} else if let Some(Value::Array(a)) = o.get("results") {
a.clone()
} else if let Some(Value::Object(result_obj)) = o.get("result") {
if let Some(Value::Array(a)) = result_obj.get("hits") {
a.clone()
} else if let Some(Value::Array(a)) = result_obj.get("results") {
a.clone()
} else if let Some(Value::Array(a)) = result_obj.get("content") {
a.clone()
} else {
Vec::new()
}
} else {
Vec::new()
}
}
_ => Vec::new(),
};
let mut hits = Vec::new();
for item in arr {
let Value::Object(obj) = item else { continue };
let Some(site) = obj.get("site").and_then(Value::as_str) else {
continue;
};
let group = obj
.get("group")
.and_then(Value::as_str)
.map(str::to_string);
let action = obj
.get("action")
.and_then(Value::as_str)
.map(str::to_string);
hits.push(CatalogHit {
site: site.to_string(),
group: group.filter(|s| !s.is_empty()),
action: action.filter(|s| !s.is_empty()),
});
}
Ok(hits)
}
fn extract_manual_body(text: &str) -> String {
let lines: Vec<&str> = text.lines().collect();
if lines.len() >= 3
&& lines[0].starts_with('[')
&& lines[0].ends_with(']')
&& (lines[1].starts_with("ok ") || lines[1].starts_with("error "))
{
return lines[2..].join("\n");
}
text.to_string()
}
fn extract_host(url: &str) -> Option<String> {
let rest = url
.strip_prefix("https://")
.or_else(|| url.strip_prefix("http://"))?;
let authority = rest.split('/').next()?;
let host = authority.rsplit_once('@').map_or(authority, |(_, h)| h);
let host = host.split(':').next()?.to_ascii_lowercase();
if host.is_empty() {
None
} else {
Some(host)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn page_slug_full_triplet_lowercases_and_dashes() {
let slug = page_slug_for("X_Com", Some("Search.API"), Some("search__timeline"));
assert_eq!(slug, "x-com-search-api-search-timeline");
}
#[test]
fn page_slug_with_only_site() {
assert_eq!(page_slug_for("x_com", None, None), "x-com");
}
#[test]
fn page_slug_with_site_and_group_no_action() {
assert_eq!(page_slug_for("x_com", Some("search"), None), "x-com-search");
}
#[test]
fn page_slug_dedupes_consecutive_dashes() {
let s = page_slug_for("--x---com--", Some("a__b"), None);
assert_eq!(s, "x-com-a-b");
}
#[test]
fn page_slug_dots_become_dashes() {
assert_eq!(page_slug_for("api.example.com", None, None), "api-example-com");
}
#[test]
fn page_slug_does_not_end_with_dash() {
let s = page_slug_for("x_com_", Some("_search_"), None);
assert!(!s.ends_with('-'));
assert!(!s.contains("--"));
}
#[test]
fn max_seed_per_url_is_three() {
assert_eq!(MAX_SEED_PER_URL, 3);
}
#[test]
fn extract_host_basic() {
assert_eq!(extract_host("https://x.com/explore").as_deref(), Some("x.com"));
assert_eq!(
extract_host("http://Sub.Example.Com:8080/foo").as_deref(),
Some("sub.example.com"),
);
assert_eq!(extract_host("file:///tmp/x.html"), None);
assert_eq!(extract_host("data:text/plain,hi"), None);
}
#[test]
fn parse_search_response_array_form() {
let text = r#"[{"site":"x_com","group":"search","action":"search_timeline"}]"#;
let hits = parse_search_response(text).unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0].site, "x_com");
assert_eq!(hits[0].group.as_deref(), Some("search"));
assert_eq!(hits[0].action.as_deref(), Some("search_timeline"));
}
#[test]
fn parse_search_response_hits_envelope() {
let text = r#"{"hits":[{"site":"x_com"},{"site":"y_com","group":"feed"}]}"#;
let hits = parse_search_response(text).unwrap();
assert_eq!(hits.len(), 2);
assert_eq!(hits[0].site, "x_com");
assert!(hits[0].group.is_none());
assert_eq!(hits[1].site, "y_com");
assert_eq!(hits[1].group.as_deref(), Some("feed"));
}
#[test]
fn parse_search_response_empty_array_returns_empty() {
assert!(parse_search_response("[]").unwrap().is_empty());
}
#[test]
fn parse_search_response_non_json_errors() {
assert!(parse_search_response("not json at all").is_err());
}
#[test]
fn extract_manual_body_strips_envelope() {
let text = "[t1]\nok actionbook manual x_com\n# Manual Header\nbody line";
assert_eq!(extract_manual_body(text), "# Manual Header\nbody line");
}
#[test]
fn extract_manual_body_fallback_returns_input() {
let text = "# Just a markdown body\nno envelope";
assert_eq!(extract_manual_body(text), text);
}
#[test]
fn build_search_cmd_quotes_host() {
assert_eq!(
build_search_cmd("x.com"),
"actionbook search \"x.com\" --host x.com"
);
}
#[test]
fn build_manual_cmd_with_all_parts() {
let h = CatalogHit {
site: "x_com".into(),
group: Some("search".into()),
action: Some("search_timeline".into()),
};
assert_eq!(build_manual_cmd(&h), "actionbook manual x_com search search_timeline");
}
#[test]
fn build_manual_cmd_with_site_only() {
let h = CatalogHit {
site: "x_com".into(),
group: None,
action: None,
};
assert_eq!(build_manual_cmd(&h), "actionbook manual x_com");
}
}