Skip to main content

hematite/tools/
workspace_workflow.rs

1use regex::Regex;
2use serde::{Deserialize, Serialize};
3use serde_json::{Map, Value};
4use std::fs::{self, OpenOptions};
5use std::io::{Read, Seek, SeekFrom};
6use std::path::{Path, PathBuf};
7use std::process::Stdio;
8use std::time::{Duration, SystemTime, UNIX_EPOCH};
9
10const DEFAULT_WORKFLOW_TIMEOUT_MS: u64 = 600_000;
11const DEFAULT_VERIFY_TIMEOUT_MS: u64 = 1_800_000;
12const DEFAULT_WEBSITE_BOOT_TIMEOUT_MS: u64 = 120_000;
13const DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS: u64 = 5_000;
14const DEFAULT_WEBSITE_VALIDATE_TIMEOUT_MS: u64 = 30_000;
15const WEBSITE_LOG_TAIL_BYTES: u64 = 4_096;
16
17pub async fn run_workspace_workflow(args: &Value) -> Result<String, String> {
18    let root = require_project_workspace_root()?;
19    let workflow = required_string(args, "workflow")?;
20
21    match workflow {
22        "website_start" => start_website_server(args, &root).await,
23        "website_probe" => probe_website_server(args, &root).await,
24        "website_validate" => validate_website_server(args, &root).await,
25        "website_status" => website_server_status(args, &root).await,
26        "website_stop" => stop_website_server(args, &root).await,
27        _ => {
28            let invocation = WorkspaceInvocation::from_args(args, &root)?;
29            let output = crate::tools::shell::execute_command_in_dir(
30                &invocation.command,
31                &root,
32                invocation.timeout_ms,
33                false,
34                1000000,
35            )
36            .await?;
37
38            Ok(format!(
39                "Workspace workflow: {}\nWorkspace root: {}\nCommand: {}\n\n{}",
40                invocation.workflow_label,
41                root.display(),
42                invocation.command,
43                output.trim()
44            ))
45        }
46    }
47}
48
49#[derive(Debug, Clone, PartialEq, Eq)]
50struct WorkspaceInvocation {
51    workflow_label: String,
52    command: String,
53    timeout_ms: u64,
54}
55
56#[derive(Debug, Clone, PartialEq, Eq)]
57struct WebsiteLaunchPlan {
58    mode: String,
59    script: String,
60    command: String,
61    url: String,
62    framework_hint: String,
63    boot_timeout_ms: u64,
64    request_timeout_ms: u64,
65}
66
67#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
68struct WebsiteServerState {
69    label: String,
70    mode: String,
71    script: String,
72    command: String,
73    url: String,
74    framework_hint: String,
75    pid: u32,
76    log_path: String,
77    workspace_root: String,
78    started_at_epoch_ms: u64,
79}
80
81#[derive(Debug, Clone, PartialEq, Eq)]
82struct WebsiteProbeSummary {
83    url: String,
84    status: u16,
85    content_type: Option<String>,
86    title: Option<String>,
87    body_preview: String,
88}
89
90#[derive(Debug, Clone, PartialEq, Eq)]
91struct WebsiteResponseSnapshot {
92    summary: WebsiteProbeSummary,
93    body: String,
94}
95
96impl WorkspaceInvocation {
97    fn from_args(args: &Value, root: &Path) -> Result<Self, String> {
98        let workflow = required_string(args, "workflow")?;
99        let timeout_ms = args
100            .get("timeout_ms")
101            .and_then(|value| value.as_u64())
102            .unwrap_or(default_timeout_ms(workflow));
103
104        let command = match workflow {
105            "build" => default_command_for_action(root, "build")?,
106            "test" => default_command_for_action(root, "test")?,
107            "lint" => default_command_for_action(root, "lint")?,
108            "fix" => default_command_for_action(root, "fix")?,
109            "package_script" => build_package_script_command(root, required_string(args, "name")?)?,
110            "task" => format!("task {}", required_string(args, "name")?),
111            "just" => format!("just {}", required_string(args, "name")?),
112            "make" => format!("make {}", required_string(args, "name")?),
113            "script_path" => build_script_path_command(root, required_string(args, "path")?)?,
114            "command" => required_string(args, "command")?.to_string(),
115            other => {
116                return Err(format!(
117                    "Unknown workflow '{}'. Use one of: build, test, lint, fix, package_script, task, just, make, script_path, command, website_start, website_probe, website_validate, website_status, website_stop.",
118                    other
119                ))
120            }
121        };
122
123        Ok(Self {
124            workflow_label: workflow.to_string(),
125            command,
126            timeout_ms,
127        })
128    }
129}
130
131fn require_project_workspace_root() -> Result<PathBuf, String> {
132    Ok(crate::tools::file_ops::workspace_root())
133}
134
135fn required_string<'a>(args: &'a Value, key: &str) -> Result<&'a str, String> {
136    args.get(key)
137        .and_then(|value| value.as_str())
138        .map(str::trim)
139        .filter(|value| !value.is_empty())
140        .ok_or_else(|| format!("Missing required argument: '{}'", key))
141}
142
143fn optional_string<'a>(args: &'a Value, key: &str) -> Option<&'a str> {
144    args.get(key)
145        .and_then(|value| value.as_str())
146        .map(str::trim)
147        .filter(|value| !value.is_empty())
148}
149
150fn optional_string_vec(args: &Value, key: &str) -> Vec<String> {
151    args.get(key)
152        .and_then(|value| value.as_array())
153        .into_iter()
154        .flat_map(|items| items.iter())
155        .filter_map(|value| value.as_str())
156        .map(str::trim)
157        .filter(|value| !value.is_empty())
158        .map(ToOwned::to_owned)
159        .collect()
160}
161
162fn default_timeout_ms(workflow: &str) -> u64 {
163    match workflow {
164        "build" | "test" | "lint" | "fix" => DEFAULT_VERIFY_TIMEOUT_MS,
165        "website_start" => DEFAULT_WEBSITE_BOOT_TIMEOUT_MS,
166        "website_probe" | "website_status" => DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS,
167        "website_validate" => DEFAULT_WEBSITE_VALIDATE_TIMEOUT_MS,
168        _ => DEFAULT_WORKFLOW_TIMEOUT_MS,
169    }
170}
171
172fn default_command_for_action(root: &Path, action: &str) -> Result<String, String> {
173    let profile = crate::agent::workspace_profile::load_workspace_profile(root)
174        .unwrap_or_else(|| crate::agent::workspace_profile::detect_workspace_profile(root));
175
176    match action {
177        "build" => profile
178            .build_hint
179            .ok_or_else(|| missing_workspace_command_message(action, root)),
180        "test" => profile
181            .test_hint
182            .ok_or_else(|| missing_workspace_command_message(action, root)),
183        "lint" => detect_lint_command(root),
184        "fix" => detect_fix_command(root),
185        other => Err(format!("Unsupported workspace action '{}'.", other)),
186    }
187}
188
189fn missing_workspace_command_message(action: &str, root: &Path) -> String {
190    format!(
191        "Hematite could not infer a `{}` command for the locked workspace at {}. Add a workspace verify profile in `.hematite/settings.json`, or ask for an explicit command/script instead.",
192        action,
193        root.display()
194    )
195}
196
197fn detect_lint_command(root: &Path) -> Result<String, String> {
198    if root.join("Cargo.toml").exists() {
199        Ok("cargo clippy --all-targets --all-features -- -D warnings".to_string())
200    } else if root.join("package.json").exists() {
201        Ok(format!(
202            "{} run lint --if-present",
203            detect_node_package_manager(root)
204        ))
205    } else if root.join("go.mod").exists() {
206        Err(missing_workspace_command_message("lint", root))
207    } else {
208        Err(missing_workspace_command_message("lint", root))
209    }
210}
211
212fn detect_fix_command(root: &Path) -> Result<String, String> {
213    if root.join("Cargo.toml").exists() {
214        Ok("cargo fmt".to_string())
215    } else if root.join("package.json").exists() {
216        Ok(format!(
217            "{} run fix --if-present",
218            detect_node_package_manager(root)
219        ))
220    } else {
221        Err(missing_workspace_command_message("fix", root))
222    }
223}
224
225fn build_package_script_command(root: &Path, name: &str) -> Result<String, String> {
226    let package = read_package_json(root)?;
227    let has_script = package
228        .get("scripts")
229        .and_then(|value| value.get(name))
230        .is_some();
231    if !has_script {
232        return Err(format!(
233            "package.json does not define a script named `{}` in {}.",
234            name,
235            root.display()
236        ));
237    }
238
239    let package_manager = detect_node_package_manager(root);
240    let command = match package_manager.as_str() {
241        "yarn" => format!("yarn {}", name),
242        "bun" => format!("bun run {}", name),
243        manager => format!("{} run {}", manager, name),
244    };
245    Ok(command)
246}
247
248fn read_package_json(root: &Path) -> Result<Value, String> {
249    let package_json = root.join("package.json");
250    if !package_json.exists() {
251        return Err(format!(
252            "This workflow requires package.json in the locked workspace root ({}).",
253            root.display()
254        ));
255    }
256
257    let content = fs::read_to_string(&package_json)
258        .map_err(|e| format!("Failed to read {}: {}", package_json.display(), e))?;
259    serde_json::from_str(&content)
260        .map_err(|e| format!("Failed to parse {}: {}", package_json.display(), e))
261}
262
263fn package_scripts(package: &Value) -> Map<String, Value> {
264    package
265        .get("scripts")
266        .and_then(|value| value.as_object())
267        .cloned()
268        .unwrap_or_default()
269}
270
271fn build_script_path_command(root: &Path, relative_path: &str) -> Result<String, String> {
272    let candidate = root.join(relative_path);
273    let canonical_root = root
274        .canonicalize()
275        .map_err(|e| format!("Failed to resolve workspace root {}: {}", root.display(), e))?;
276    let canonical_path = candidate.canonicalize().map_err(|e| {
277        format!(
278            "Could not resolve script path `{}` from workspace root {}: {}",
279            relative_path,
280            root.display(),
281            e
282        )
283    })?;
284    if !canonical_path.starts_with(&canonical_root) {
285        return Err(format!(
286            "Script path `{}` resolves outside the locked workspace root {}.",
287            relative_path,
288            root.display()
289        ));
290    }
291
292    let display_path = normalize_relative_path(&canonical_path, &canonical_root)?;
293    let lower = display_path.to_ascii_lowercase();
294    if lower.ends_with(".ps1") {
295        Ok(format!(
296            "pwsh -ExecutionPolicy Bypass -File {}",
297            quote_command_arg(&display_path)
298        ))
299    } else if lower.ends_with(".cmd") || lower.ends_with(".bat") {
300        Ok(format!("cmd /C {}", quote_command_arg(&display_path)))
301    } else if lower.ends_with(".sh") {
302        Ok(format!("bash {}", quote_command_arg(&display_path)))
303    } else if lower.ends_with(".py") {
304        Ok(format!("python {}", quote_command_arg(&display_path)))
305    } else if lower.ends_with(".js") || lower.ends_with(".cjs") || lower.ends_with(".mjs") {
306        Ok(format!("node {}", quote_command_arg(&display_path)))
307    } else {
308        Ok(display_path)
309    }
310}
311
312fn normalize_relative_path(path: &Path, root: &Path) -> Result<String, String> {
313    let relative = path
314        .strip_prefix(root)
315        .map_err(|e| format!("Failed to normalize script path: {}", e))?;
316    Ok(format!(
317        ".{}{}",
318        std::path::MAIN_SEPARATOR,
319        relative.display()
320    ))
321}
322
323fn quote_command_arg(value: &str) -> String {
324    format!("\"{}\"", value.replace('"', "\\\""))
325}
326
327fn detect_node_package_manager(root: &Path) -> String {
328    if root.join("pnpm-lock.yaml").exists() {
329        "pnpm".to_string()
330    } else if root.join("yarn.lock").exists() {
331        "yarn".to_string()
332    } else if root.join("bun.lockb").exists() || root.join("bun.lock").exists() {
333        "bun".to_string()
334    } else {
335        "npm".to_string()
336    }
337}
338
339async fn start_website_server(args: &Value, root: &Path) -> Result<String, String> {
340    let label = optional_string(args, "label").unwrap_or("default");
341    let state_path = website_state_path(root, label);
342    if let Some(existing) = load_website_server_state(&state_path)? {
343        if is_process_alive(existing.pid).await {
344            return Err(format!(
345                "A website server labeled `{}` is already running.\nURL: {}\nPID: {}\nLog: {}\nUse workflow=website_status or workflow=website_stop first.",
346                existing.label, existing.url, existing.pid, existing.log_path
347            ));
348        }
349        let _ = fs::remove_file(&state_path);
350    }
351
352    let plan = detect_website_launch_plan(args, root)?;
353    let runtime_dir = website_runtime_dir(root);
354    fs::create_dir_all(&runtime_dir)
355        .map_err(|e| format!("Failed to create {}: {}", runtime_dir.display(), e))?;
356    let log_path = website_log_path(root, label);
357    let stdout_log = OpenOptions::new()
358        .create(true)
359        .truncate(true)
360        .write(true)
361        .open(&log_path)
362        .map_err(|e| format!("Failed to create {}: {}", log_path.display(), e))?;
363    let stderr_log = stdout_log
364        .try_clone()
365        .map_err(|e| format!("Failed to clone log handle {}: {}", log_path.display(), e))?;
366
367    let mut command = build_shell_command(&plan.command).await;
368    command
369        .current_dir(root)
370        .stdout(Stdio::from(stdout_log))
371        .stderr(Stdio::from(stderr_log));
372
373    let sandbox_root = crate::tools::file_ops::hematite_dir().join("sandbox");
374    let _ = fs::create_dir_all(&sandbox_root);
375    command.env("HOME", &sandbox_root);
376    command.env("TMPDIR", &sandbox_root);
377    command.env("CI", "1");
378    command.env("BROWSER", "none");
379
380    let mut child = command
381        .spawn()
382        .map_err(|e| format!("Failed to start website server: {}", e))?;
383    let pid = child
384        .id()
385        .ok_or_else(|| "Website server started without a visible process id.".to_string())?;
386
387    let started_at_epoch_ms = SystemTime::now()
388        .duration_since(UNIX_EPOCH)
389        .map_err(|e| format!("Clock error: {}", e))?
390        .as_millis() as u64;
391
392    let state = WebsiteServerState {
393        label: label.to_string(),
394        mode: plan.mode.clone(),
395        script: plan.script.clone(),
396        command: plan.command.clone(),
397        url: plan.url.clone(),
398        framework_hint: plan.framework_hint.clone(),
399        pid,
400        log_path: log_path.display().to_string(),
401        workspace_root: root.display().to_string(),
402        started_at_epoch_ms,
403    };
404
405    let probe = wait_for_website_readiness(
406        &mut child,
407        &plan.url,
408        plan.boot_timeout_ms,
409        plan.request_timeout_ms,
410        &log_path,
411    )
412    .await
413    .map_err(|message| {
414        let _ = fs::remove_file(&state_path);
415        message
416    })?;
417
418    save_website_server_state(&state_path, &state)?;
419
420    Ok(format!(
421        "Workspace workflow: website_start\nWorkspace root: {}\nMode: {}\nLabel: {}\nScript: {}\nCommand: {}\nFramework hint: {}\nURL: {}\nPID: {}\nLog: {}\n\nReady: HTTP {}{}\n{}",
422        root.display(),
423        state.mode,
424        state.label,
425        state.script,
426        state.command,
427        state.framework_hint,
428        state.url,
429        state.pid,
430        state.log_path,
431        probe.status,
432        probe
433            .title
434            .as_ref()
435            .map(|title| format!(" ({title})"))
436            .unwrap_or_default(),
437        format_probe_details(&probe)
438    ))
439}
440
441fn detect_website_launch_plan(args: &Value, root: &Path) -> Result<WebsiteLaunchPlan, String> {
442    let package = read_package_json(root)?;
443    let scripts = package_scripts(&package);
444    let runtime_contract = load_runtime_contract(root);
445    let mode = optional_string(args, "mode")
446        .unwrap_or("dev")
447        .to_ascii_lowercase();
448    let script = if let Some(explicit) = optional_string(args, "script") {
449        explicit.to_string()
450    } else {
451        detect_website_script_name(&scripts, &mode)?
452    };
453    if !scripts.contains_key(&script) {
454        return Err(format!(
455            "package.json does not define a website script named `{}` in {}.",
456            script,
457            root.display()
458        ));
459    }
460
461    let framework_hint = infer_website_framework(&package);
462    let port = args
463        .get("port")
464        .and_then(|value| value.as_u64())
465        .and_then(|value| u16::try_from(value).ok())
466        .or_else(|| infer_website_default_port(&package, &mode));
467    let host = optional_string(args, "host").unwrap_or("127.0.0.1");
468    let url = if let Some(explicit_url) = optional_string(args, "url") {
469        normalize_http_url(explicit_url)
470    } else if let Some(url_hint) = runtime_contract
471        .as_ref()
472        .and_then(|contract| contract.local_url_hint.clone())
473    {
474        url_hint
475    } else {
476        let inferred_port = port.unwrap_or(if mode == "preview" { 4173 } else { 3000 });
477        format!("http://{}:{}/", host, inferred_port)
478    };
479
480    Ok(WebsiteLaunchPlan {
481        mode,
482        script: script.clone(),
483        command: build_package_script_command(root, &script)?,
484        url,
485        framework_hint,
486        boot_timeout_ms: args
487            .get("timeout_ms")
488            .and_then(|value| value.as_u64())
489            .unwrap_or(DEFAULT_WEBSITE_BOOT_TIMEOUT_MS),
490        request_timeout_ms: args
491            .get("request_timeout_ms")
492            .and_then(|value| value.as_u64())
493            .unwrap_or(DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS),
494    })
495}
496
497fn detect_website_script_name(scripts: &Map<String, Value>, mode: &str) -> Result<String, String> {
498    let candidates = match mode {
499        "dev" => ["dev", "start", "serve"],
500        "preview" => ["preview", "serve", "start"],
501        "start" => ["start", "serve", "dev"],
502        other => {
503            return Err(format!(
504                "Unknown website mode `{}`. Use one of: dev, preview, start.",
505                other
506            ))
507        }
508    };
509
510    candidates
511        .iter()
512        .find(|candidate| scripts.contains_key(**candidate))
513        .map(|candidate| candidate.to_string())
514        .ok_or_else(|| {
515            format!(
516                "Could not infer a website {} script from package.json. Define one of [{}], or pass `script` explicitly.",
517                mode,
518                candidates.join(", ")
519            )
520        })
521}
522
523fn infer_website_framework(package: &Value) -> String {
524    let deps = dependency_names(package);
525    let script_text = package_scripts(package)
526        .into_values()
527        .filter_map(|value| value.as_str().map(|text| text.to_ascii_lowercase()))
528        .collect::<Vec<_>>()
529        .join("\n");
530
531    if deps.contains("next") || script_text.contains("next ") {
532        "next".to_string()
533    } else if deps.contains("vite") || script_text.contains("vite") {
534        "vite".to_string()
535    } else if deps.contains("astro") || script_text.contains("astro ") {
536        "astro".to_string()
537    } else if deps.contains("@angular/core") || script_text.contains("ng serve") {
538        "angular".to_string()
539    } else if deps.contains("gatsby") || script_text.contains("gatsby ") {
540        "gatsby".to_string()
541    } else if deps.contains("react-scripts") || script_text.contains("react-scripts") {
542        "react-scripts".to_string()
543    } else if deps.contains("@sveltejs/kit") || script_text.contains("svelte-kit") {
544        "sveltekit".to_string()
545    } else if deps.contains("nuxt") || script_text.contains("nuxt ") {
546        "nuxt".to_string()
547    } else {
548        "generic-node-site".to_string()
549    }
550}
551
552fn infer_website_default_port(package: &Value, mode: &str) -> Option<u16> {
553    match infer_website_framework(package).as_str() {
554        "vite" | "sveltekit" => Some(if mode == "preview" { 4173 } else { 5173 }),
555        "astro" => Some(4321),
556        "gatsby" => Some(8000),
557        "angular" => Some(4200),
558        "next" | "react-scripts" | "nuxt" => Some(3000),
559        _ => None,
560    }
561}
562
563fn dependency_names(package: &Value) -> std::collections::BTreeSet<String> {
564    let mut deps = std::collections::BTreeSet::new();
565    for field in ["dependencies", "devDependencies", "peerDependencies"] {
566        if let Some(map) = package.get(field).and_then(|value| value.as_object()) {
567            for name in map.keys() {
568                deps.insert(name.to_ascii_lowercase());
569            }
570        }
571    }
572    deps
573}
574
575async fn wait_for_website_readiness(
576    child: &mut tokio::process::Child,
577    url: &str,
578    boot_timeout_ms: u64,
579    request_timeout_ms: u64,
580    log_path: &Path,
581) -> Result<WebsiteProbeSummary, String> {
582    let deadline = tokio::time::Instant::now() + Duration::from_millis(boot_timeout_ms);
583    let client = reqwest::Client::builder()
584        .timeout(Duration::from_millis(request_timeout_ms))
585        .redirect(reqwest::redirect::Policy::limited(5))
586        .build()
587        .map_err(|e| format!("Failed to build readiness probe client: {}", e))?;
588
589    loop {
590        let probe_error = match probe_website_once(&client, url).await {
591            Ok(summary) => return Ok(summary),
592            Err(err) => err,
593        };
594
595        match child.try_wait() {
596            Ok(Some(status)) => {
597                return Err(format!(
598                    "Website server exited before it became ready (status: {}).\nLast probe error: {}\n{}",
599                    status,
600                    probe_error,
601                    format_log_tail_for_path("Recent log tail", Some(log_path))
602                ));
603            }
604            Ok(None) => {}
605            Err(err) => {
606                return Err(format!("Failed to inspect website server status: {}", err));
607            }
608        }
609
610        if tokio::time::Instant::now() >= deadline {
611            let _ = child.kill().await;
612            return Err(format!(
613                "Website server did not become ready within {} ms.\nLast probe error: {}\n{}",
614                boot_timeout_ms,
615                probe_error,
616                format_log_tail_for_path("Recent log tail", Some(log_path))
617            ));
618        }
619
620        tokio::time::sleep(Duration::from_millis(750)).await;
621    }
622}
623
624async fn probe_website_server(args: &Value, root: &Path) -> Result<String, String> {
625    let label = optional_string(args, "label").unwrap_or("default");
626    let state = load_website_server_state(&website_state_path(root, label))?;
627    let (url, log_path) = if let Some(state) = state {
628        (state.url, Some(state.log_path))
629    } else if let Some(url) = optional_string(args, "url") {
630        (normalize_http_url(url), None)
631    } else {
632        return Err(format!(
633            "No tracked website server labeled `{}`. Pass `url` to probe an arbitrary local site, or start one with workflow=website_start.",
634            label
635        ));
636    };
637
638    let request_timeout_ms = args
639        .get("timeout_ms")
640        .and_then(|value| value.as_u64())
641        .unwrap_or(DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS);
642    let client = reqwest::Client::builder()
643        .timeout(Duration::from_millis(request_timeout_ms))
644        .redirect(reqwest::redirect::Policy::limited(5))
645        .build()
646        .map_err(|e| format!("Failed to build probe client: {}", e))?;
647    let probe = probe_website_once(&client, &url).await.map_err(|e| {
648        if let Some(path) = log_path.as_deref() {
649            format!("{}\n{}", e, format_log_tail("Recent log tail", Some(path)))
650        } else {
651            e
652        }
653    })?;
654
655    Ok(format!(
656        "Workspace workflow: website_probe\nWorkspace root: {}\nURL: {}\n\nHTTP {}{}\n{}",
657        root.display(),
658        probe.url,
659        probe.status,
660        probe
661            .title
662            .as_ref()
663            .map(|title| format!(" ({title})"))
664            .unwrap_or_default(),
665        format_probe_details(&probe)
666    ))
667}
668
669async fn validate_website_server(args: &Value, root: &Path) -> Result<String, String> {
670    let label = optional_string(args, "label").unwrap_or("default");
671    let (base_url, log_path) = resolve_website_target(args, root, label)?;
672    let routes = default_website_routes(args, root);
673    let asset_limit = args
674        .get("asset_limit")
675        .and_then(|value| value.as_u64())
676        .unwrap_or(8)
677        .min(24) as usize;
678    let request_timeout_ms = args
679        .get("timeout_ms")
680        .and_then(|value| value.as_u64())
681        .unwrap_or(DEFAULT_WEBSITE_VALIDATE_TIMEOUT_MS);
682    let client = reqwest::Client::builder()
683        .timeout(Duration::from_millis(request_timeout_ms))
684        .redirect(reqwest::redirect::Policy::limited(5))
685        .build()
686        .map_err(|e| format!("Failed to build validation client: {}", e))?;
687
688    let mut route_lines = Vec::new();
689    let mut asset_lines = Vec::new();
690    let mut issues = Vec::new();
691    let mut assets = std::collections::BTreeSet::new();
692
693    for route in &routes {
694        let route_url = resolve_website_url(&base_url, route)?;
695        match fetch_website_snapshot(&client, &route_url).await {
696            Ok(snapshot) => {
697                let summary = &snapshot.summary;
698                route_lines.push(format!(
699                    "- {} -> HTTP {}{}",
700                    route,
701                    summary.status,
702                    summary
703                        .title
704                        .as_ref()
705                        .map(|title| format!(" ({title})"))
706                        .unwrap_or_default()
707                ));
708                let content_type = summary.content_type.as_deref().unwrap_or_default();
709                if content_type.contains("text/html") {
710                    if summary.title.is_none() {
711                        issues.push(format!("Route {} returned HTML without a <title>.", route));
712                    }
713                    for asset in extract_local_asset_urls(&route_url, &snapshot.body)
714                        .into_iter()
715                        .take(asset_limit)
716                    {
717                        assets.insert(asset);
718                    }
719                }
720            }
721            Err(err) => {
722                issues.push(format!("Route {} failed validation: {}", route, err));
723            }
724        }
725    }
726
727    for asset_url in assets.iter().take(asset_limit) {
728        match probe_website_once(&client, asset_url).await {
729            Ok(summary) => asset_lines.push(format!(
730                "- {} -> HTTP {} ({})",
731                asset_url,
732                summary.status,
733                summary
734                    .content_type
735                    .as_deref()
736                    .unwrap_or("unknown content type")
737            )),
738            Err(err) => issues.push(format!("Asset {} failed validation: {}", asset_url, err)),
739        }
740    }
741
742    let result = if issues.is_empty() { "PASS" } else { "FAIL" };
743    let mut out = format!(
744        "Workspace workflow: website_validate\nWorkspace root: {}\nBase URL: {}\nRoutes checked: {}\nAssets checked: {}\nResult: {}",
745        root.display(),
746        base_url,
747        routes.len(),
748        asset_lines.len(),
749        result
750    );
751    if !route_lines.is_empty() {
752        out.push_str("\n\nRoutes\n");
753        out.push_str(&route_lines.join("\n"));
754    }
755    if !asset_lines.is_empty() {
756        out.push_str("\n\nAssets\n");
757        out.push_str(&asset_lines.join("\n"));
758    }
759    if !issues.is_empty() {
760        out.push_str("\n\nIssues\n");
761        out.push_str(
762            &issues
763                .into_iter()
764                .map(|issue| format!("- {}", issue))
765                .collect::<Vec<_>>()
766                .join("\n"),
767        );
768    }
769    if let Some(path) = log_path.as_deref() {
770        out.push_str("\n\n");
771        out.push_str(&format_log_tail("Recent log tail", Some(path)));
772    }
773    Ok(out)
774}
775
776fn resolve_website_target(
777    args: &Value,
778    root: &Path,
779    label: &str,
780) -> Result<(String, Option<String>), String> {
781    let state = load_website_server_state(&website_state_path(root, label))?;
782    if let Some(state) = state {
783        return Ok((state.url, Some(state.log_path)));
784    }
785    if let Some(url) = optional_string(args, "url") {
786        return Ok((normalize_http_url(url), None));
787    }
788    if let Some(url_hint) = load_runtime_contract(root).and_then(|contract| contract.local_url_hint)
789    {
790        return Ok((url_hint, None));
791    }
792    Err(format!(
793        "No tracked website server labeled `{}` and no explicit url. Start the site with workflow=website_start or pass `url`.",
794        label
795    ))
796}
797
798fn default_website_routes(args: &Value, root: &Path) -> Vec<String> {
799    let mut routes = optional_string_vec(args, "routes");
800    if !routes.is_empty() {
801        return normalize_route_hints(routes);
802    }
803    if let Some(contract) = load_runtime_contract(root) {
804        routes = contract.route_hints;
805    }
806    if routes.is_empty() {
807        routes.push("/".to_string());
808    }
809    normalize_route_hints(routes)
810}
811
812fn normalize_route_hints(routes: Vec<String>) -> Vec<String> {
813    let mut normalized = std::collections::BTreeSet::new();
814    for route in routes {
815        let trimmed = route.trim();
816        if trimmed.is_empty() {
817            continue;
818        }
819        if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
820            normalized.insert(trimmed.to_string());
821        } else if trimmed.starts_with('/') {
822            normalized.insert(trimmed.to_string());
823        } else {
824            normalized.insert(format!("/{}", trimmed));
825        }
826    }
827    if normalized.is_empty() {
828        normalized.insert("/".to_string());
829    }
830    normalized.into_iter().collect()
831}
832
833fn resolve_website_url(base_url: &str, route: &str) -> Result<String, String> {
834    if route.starts_with("http://") || route.starts_with("https://") {
835        return Ok(route.to_string());
836    }
837    let base = reqwest::Url::parse(base_url)
838        .map_err(|e| format!("Invalid base URL {}: {}", base_url, e))?;
839    base.join(route).map(|url| url.to_string()).map_err(|e| {
840        format!(
841            "Failed to resolve route {} against {}: {}",
842            route, base_url, e
843        )
844    })
845}
846
847async fn website_server_status(args: &Value, root: &Path) -> Result<String, String> {
848    let label = optional_string(args, "label").unwrap_or("default");
849    let state_path = website_state_path(root, label);
850    let Some(state) = load_website_server_state(&state_path)? else {
851        return Err(format!(
852            "No tracked website server labeled `{}`. Start one with workflow=website_start.",
853            label
854        ));
855    };
856
857    let alive = is_process_alive(state.pid).await;
858    let request_timeout_ms = args
859        .get("timeout_ms")
860        .and_then(|value| value.as_u64())
861        .unwrap_or(DEFAULT_WEBSITE_REQUEST_TIMEOUT_MS);
862    let client = reqwest::Client::builder()
863        .timeout(Duration::from_millis(request_timeout_ms))
864        .redirect(reqwest::redirect::Policy::limited(5))
865        .build()
866        .map_err(|e| format!("Failed to build status probe client: {}", e))?;
867    let probe = probe_website_once(&client, &state.url).await.ok();
868
869    let mut out = format!(
870        "Workspace workflow: website_status\nWorkspace root: {}\nLabel: {}\nMode: {}\nScript: {}\nCommand: {}\nFramework hint: {}\nURL: {}\nPID: {}\nAlive: {}\nLog: {}",
871        root.display(),
872        state.label,
873        state.mode,
874        state.script,
875        state.command,
876        state.framework_hint,
877        state.url,
878        state.pid,
879        if alive { "yes" } else { "no" },
880        state.log_path
881    );
882    if let Some(probe) = probe {
883        out.push_str(&format!(
884            "\n\nHTTP {}{}\n{}",
885            probe.status,
886            probe
887                .title
888                .as_ref()
889                .map(|title| format!(" ({title})"))
890                .unwrap_or_default(),
891            format_probe_details(&probe)
892        ));
893    } else {
894        out.push_str("\n\nHTTP probe: unavailable");
895    }
896    out.push_str("\n");
897    out.push_str(&format_log_tail("Recent log tail", Some(&state.log_path)));
898    Ok(out)
899}
900
901async fn stop_website_server(args: &Value, root: &Path) -> Result<String, String> {
902    let label = optional_string(args, "label").unwrap_or("default");
903    let state_path = website_state_path(root, label);
904    let Some(state) = load_website_server_state(&state_path)? else {
905        return Err(format!(
906            "No tracked website server labeled `{}`. Nothing to stop.",
907            label
908        ));
909    };
910
911    let was_alive = is_process_alive(state.pid).await;
912    if was_alive {
913        kill_process(state.pid).await?;
914    }
915    let _ = fs::remove_file(&state_path);
916
917    Ok(format!(
918        "Workspace workflow: website_stop\nWorkspace root: {}\nLabel: {}\nPID: {}\nWas alive: {}\nURL: {}\nLog: {}\n\n{}",
919        root.display(),
920        state.label,
921        state.pid,
922        if was_alive { "yes" } else { "no" },
923        state.url,
924        state.log_path,
925        format_log_tail("Recent log tail", Some(&state.log_path))
926    ))
927}
928
929async fn probe_website_once(
930    client: &reqwest::Client,
931    url: &str,
932) -> Result<WebsiteProbeSummary, String> {
933    Ok(fetch_website_snapshot(client, url).await?.summary)
934}
935
936async fn fetch_website_snapshot(
937    client: &reqwest::Client,
938    url: &str,
939) -> Result<WebsiteResponseSnapshot, String> {
940    let response = client
941        .get(url)
942        .send()
943        .await
944        .map_err(|e| format!("HTTP probe failed for {}: {}", url, e))?;
945    let status = response.status();
946    let content_type = response
947        .headers()
948        .get(reqwest::header::CONTENT_TYPE)
949        .and_then(|value| value.to_str().ok())
950        .map(|value| value.to_string());
951    let body = response
952        .text()
953        .await
954        .map_err(|e| format!("Failed to read response body from {}: {}", url, e))?;
955    if !status.is_success() {
956        return Err(format!(
957            "HTTP probe returned {} for {}.",
958            status.as_u16(),
959            url
960        ));
961    }
962
963    Ok(WebsiteResponseSnapshot {
964        summary: WebsiteProbeSummary {
965            url: url.to_string(),
966            status: status.as_u16(),
967            content_type,
968            title: extract_html_title(&body),
969            body_preview: html_preview_text(&body),
970        },
971        body,
972    })
973}
974
975fn extract_html_title(body: &str) -> Option<String> {
976    let re = Regex::new(r"(?is)<title[^>]*>(.*?)</title>").ok()?;
977    re.captures(body)
978        .and_then(|captures| captures.get(1).map(|value| value.as_str()))
979        .map(compact_whitespace)
980        .filter(|title| !title.is_empty())
981}
982
983fn html_preview_text(body: &str) -> String {
984    let strip_re = Regex::new(r"(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|<[^>]+>")
985        .expect("valid strip regex");
986    let stripped = strip_re.replace_all(body, " ");
987    let compact = compact_whitespace(&stripped);
988    compact.chars().take(240).collect()
989}
990
991fn compact_whitespace(input: &str) -> String {
992    input.split_whitespace().collect::<Vec<_>>().join(" ")
993}
994
995fn format_probe_details(probe: &WebsiteProbeSummary) -> String {
996    let mut lines = Vec::new();
997    if let Some(content_type) = probe.content_type.as_deref() {
998        lines.push(format!("Content-Type: {}", content_type));
999    }
1000    if let Some(title) = probe.title.as_deref() {
1001        lines.push(format!("Title: {}", title));
1002    }
1003    if !probe.body_preview.is_empty() {
1004        lines.push(format!("Body preview: {}", probe.body_preview));
1005    }
1006    if lines.is_empty() {
1007        "(no probe details)".to_string()
1008    } else {
1009        lines.join("\n")
1010    }
1011}
1012
1013fn normalize_http_url(url: &str) -> String {
1014    let trimmed = url.trim();
1015    if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
1016        trimmed.to_string()
1017    } else {
1018        format!("http://{}", trimmed)
1019    }
1020}
1021
1022fn extract_local_asset_urls(page_url: &str, body: &str) -> Vec<String> {
1023    let Ok(page) = reqwest::Url::parse(page_url) else {
1024        return Vec::new();
1025    };
1026    let regex = Regex::new(r#"(?is)(?:src|href)=["']([^"'#]+)["']"#).expect("valid asset regex");
1027    let mut assets = std::collections::BTreeSet::new();
1028    for captures in regex.captures_iter(body) {
1029        let Some(raw) = captures.get(1).map(|value| value.as_str().trim()) else {
1030            continue;
1031        };
1032        let lower = raw.to_ascii_lowercase();
1033        if lower.starts_with("http://")
1034            || lower.starts_with("https://")
1035            || lower.starts_with("data:")
1036            || lower.starts_with("mailto:")
1037            || lower.starts_with("tel:")
1038            || lower.starts_with("javascript:")
1039        {
1040            continue;
1041        }
1042        if !looks_like_static_asset(raw) {
1043            continue;
1044        }
1045        if let Ok(joined) = page.join(raw) {
1046            assets.insert(joined.to_string());
1047        }
1048    }
1049    assets.into_iter().collect()
1050}
1051
1052fn looks_like_static_asset(path: &str) -> bool {
1053    let lower = path.to_ascii_lowercase();
1054    [
1055        ".css",
1056        ".js",
1057        ".mjs",
1058        ".ico",
1059        ".png",
1060        ".jpg",
1061        ".jpeg",
1062        ".svg",
1063        ".webp",
1064        ".gif",
1065        ".woff",
1066        ".woff2",
1067        ".map",
1068        ".json",
1069        ".webmanifest",
1070    ]
1071    .iter()
1072    .any(|suffix| lower.contains(suffix))
1073}
1074
1075fn load_runtime_contract(root: &Path) -> Option<crate::agent::workspace_profile::RuntimeContract> {
1076    crate::agent::workspace_profile::load_workspace_profile(root)
1077        .unwrap_or_else(|| crate::agent::workspace_profile::detect_workspace_profile(root))
1078        .runtime_contract
1079}
1080
1081fn website_runtime_dir(root: &Path) -> PathBuf {
1082    if crate::tools::file_ops::is_os_shortcut_directory(root) {
1083        crate::tools::file_ops::hematite_dir().join("website-runtime")
1084    } else {
1085        root.join(".hematite").join("website-runtime")
1086    }
1087}
1088
1089fn website_state_path(root: &Path, label: &str) -> PathBuf {
1090    website_runtime_dir(root).join(format!("{}.json", slugify_label(label)))
1091}
1092
1093fn website_log_path(root: &Path, label: &str) -> PathBuf {
1094    website_runtime_dir(root).join(format!("{}.log", slugify_label(label)))
1095}
1096
1097fn slugify_label(input: &str) -> String {
1098    let mut slug = String::new();
1099    let mut last_dash = false;
1100    for ch in input.chars() {
1101        let lower = ch.to_ascii_lowercase();
1102        if lower.is_ascii_alphanumeric() {
1103            slug.push(lower);
1104            last_dash = false;
1105        } else if !last_dash {
1106            slug.push('-');
1107            last_dash = true;
1108        }
1109    }
1110    let trimmed = slug.trim_matches('-');
1111    if trimmed.is_empty() {
1112        "default".to_string()
1113    } else {
1114        trimmed.to_string()
1115    }
1116}
1117
1118fn save_website_server_state(path: &Path, state: &WebsiteServerState) -> Result<(), String> {
1119    if let Some(parent) = path.parent() {
1120        fs::create_dir_all(parent)
1121            .map_err(|e| format!("Failed to create {}: {}", parent.display(), e))?;
1122    }
1123    let payload = serde_json::to_string_pretty(state)
1124        .map_err(|e| format!("Failed to encode website state: {}", e))?;
1125    fs::write(path, payload).map_err(|e| format!("Failed to write {}: {}", path.display(), e))
1126}
1127
1128fn load_website_server_state(path: &Path) -> Result<Option<WebsiteServerState>, String> {
1129    if !path.exists() {
1130        return Ok(None);
1131    }
1132    let raw = fs::read_to_string(path)
1133        .map_err(|e| format!("Failed to read {}: {}", path.display(), e))?;
1134    let state = serde_json::from_str(&raw)
1135        .map_err(|e| format!("Failed to parse {}: {}", path.display(), e))?;
1136    Ok(Some(state))
1137}
1138
1139fn format_log_tail(label: &str, path: Option<&str>) -> String {
1140    match path {
1141        Some(path) => match read_log_tail(Path::new(path)) {
1142            Ok(tail) if tail.is_empty() => format!("{}: (empty)", label),
1143            Ok(tail) => format!("{}:\n{}", label, tail),
1144            Err(err) => format!("{}: unavailable ({})", label, err),
1145        },
1146        None => format!("{}: unavailable", label),
1147    }
1148}
1149
1150fn format_log_tail_for_path(label: &str, path: Option<&Path>) -> String {
1151    match path {
1152        Some(path) => match read_log_tail(path) {
1153            Ok(tail) if tail.is_empty() => format!("{}: (empty)", label),
1154            Ok(tail) => format!("{}:\n{}", label, tail),
1155            Err(err) => format!("{}: unavailable ({})", label, err),
1156        },
1157        None => format!("{}: unavailable", label),
1158    }
1159}
1160
1161fn read_log_tail(path: &Path) -> Result<String, String> {
1162    let mut file =
1163        fs::File::open(path).map_err(|e| format!("failed to open {}: {}", path.display(), e))?;
1164    let len = file
1165        .metadata()
1166        .map_err(|e| format!("failed to inspect {}: {}", path.display(), e))?
1167        .len();
1168    let start = len.saturating_sub(WEBSITE_LOG_TAIL_BYTES);
1169    file.seek(SeekFrom::Start(start))
1170        .map_err(|e| format!("failed to seek {}: {}", path.display(), e))?;
1171    let mut buffer = String::new();
1172    file.read_to_string(&mut buffer)
1173        .map_err(|e| format!("failed to read {}: {}", path.display(), e))?;
1174    Ok(buffer.trim().to_string())
1175}
1176
1177async fn build_shell_command(command: &str) -> tokio::process::Command {
1178    #[cfg(target_os = "windows")]
1179    {
1180        let normalized = command
1181            .replace("/dev/null", "$null")
1182            .replace("1>/dev/null", "2>$null")
1183            .replace("2>/dev/null", "2>$null");
1184
1185        if which("pwsh").await {
1186            let mut cmd = tokio::process::Command::new("pwsh");
1187            cmd.args(["-NoProfile", "-NonInteractive", "-Command", &normalized]);
1188            cmd
1189        } else {
1190            let mut cmd = tokio::process::Command::new("powershell");
1191            cmd.args(["-NoProfile", "-NonInteractive", "-Command", &normalized]);
1192            cmd
1193        }
1194    }
1195    #[cfg(not(target_os = "windows"))]
1196    {
1197        let mut cmd = tokio::process::Command::new("sh");
1198        cmd.args(["-c", command]);
1199        cmd
1200    }
1201}
1202
1203#[cfg(target_os = "windows")]
1204async fn which(name: &str) -> bool {
1205    #[cfg(target_os = "windows")]
1206    let check = format!("{}.exe", name);
1207    #[cfg(not(target_os = "windows"))]
1208    let check = name;
1209
1210    tokio::process::Command::new("where")
1211        .arg(check)
1212        .stdout(Stdio::null())
1213        .stderr(Stdio::null())
1214        .status()
1215        .await
1216        .map(|status| status.success())
1217        .unwrap_or(false)
1218}
1219
1220async fn is_process_alive(pid: u32) -> bool {
1221    #[cfg(target_os = "windows")]
1222    {
1223        tokio::process::Command::new("tasklist")
1224            .args(["/FI", &format!("PID eq {}", pid)])
1225            .stdout(Stdio::piped())
1226            .stderr(Stdio::null())
1227            .output()
1228            .await
1229            .ok()
1230            .map(|output| {
1231                let text = String::from_utf8_lossy(&output.stdout);
1232                text.lines().any(|line| {
1233                    line.split_whitespace()
1234                        .any(|token| token == pid.to_string())
1235                })
1236            })
1237            .unwrap_or(false)
1238    }
1239    #[cfg(not(target_os = "windows"))]
1240    {
1241        tokio::process::Command::new("kill")
1242            .args(["-0", &pid.to_string()])
1243            .stdout(Stdio::null())
1244            .stderr(Stdio::null())
1245            .status()
1246            .await
1247            .map(|status| status.success())
1248            .unwrap_or(false)
1249    }
1250}
1251
1252async fn kill_process(pid: u32) -> Result<(), String> {
1253    #[cfg(target_os = "windows")]
1254    {
1255        let output = tokio::process::Command::new("taskkill")
1256            .args(["/PID", &pid.to_string(), "/T", "/F"])
1257            .output()
1258            .await
1259            .map_err(|e| format!("Failed to stop PID {}: {}", pid, e))?;
1260        if output.status.success() {
1261            Ok(())
1262        } else {
1263            Err(format!(
1264                "Failed to stop PID {}: {}",
1265                pid,
1266                String::from_utf8_lossy(&output.stderr).trim()
1267            ))
1268        }
1269    }
1270    #[cfg(not(target_os = "windows"))]
1271    {
1272        let status = tokio::process::Command::new("kill")
1273            .args(["-TERM", &pid.to_string()])
1274            .status()
1275            .await
1276            .map_err(|e| format!("Failed to stop PID {}: {}", pid, e))?;
1277        if status.success() {
1278            Ok(())
1279        } else {
1280            Err(format!("Failed to stop PID {}.", pid))
1281        }
1282    }
1283}
1284
1285#[cfg(test)]
1286mod tests {
1287    use super::*;
1288
1289    fn write_package(root: &Path, json: &str) {
1290        fs::write(root.join("package.json"), json).unwrap();
1291    }
1292
1293    #[test]
1294    fn package_script_uses_detected_package_manager() {
1295        let package_root = std::env::temp_dir().join(format!(
1296            "hematite-workspace-workflow-node-{}",
1297            std::process::id()
1298        ));
1299        std::fs::create_dir_all(&package_root).unwrap();
1300        std::fs::write(
1301            package_root.join("package.json"),
1302            r#"{ "scripts": { "dev": "vite" } }"#,
1303        )
1304        .unwrap();
1305        std::fs::write(package_root.join("pnpm-lock.yaml"), "").unwrap();
1306
1307        let command = build_package_script_command(&package_root, "dev").unwrap();
1308        assert_eq!(command, "pnpm run dev");
1309
1310        let _ = std::fs::remove_file(package_root.join("package.json"));
1311        let _ = std::fs::remove_file(package_root.join("pnpm-lock.yaml"));
1312        let _ = std::fs::remove_dir(package_root);
1313    }
1314
1315    #[test]
1316    fn script_path_stays_inside_workspace_root() {
1317        let script_dir = std::env::temp_dir().join(format!(
1318            "hematite-workspace-workflow-scripts-{}",
1319            std::process::id()
1320        ));
1321        std::fs::create_dir_all(script_dir.join("scripts")).unwrap();
1322        std::fs::write(script_dir.join("scripts").join("dev.ps1"), "Write-Host hi").unwrap();
1323
1324        let command = build_script_path_command(&script_dir, "scripts/dev.ps1").unwrap();
1325        assert!(command.contains("pwsh -ExecutionPolicy Bypass -File"));
1326
1327        let _ = std::fs::remove_file(script_dir.join("scripts").join("dev.ps1"));
1328        let _ = std::fs::remove_dir(script_dir.join("scripts"));
1329        let _ = std::fs::remove_dir(script_dir);
1330    }
1331
1332    #[test]
1333    fn detect_website_launch_plan_prefers_dev_script_and_vite_port() {
1334        let dir = tempfile::tempdir().unwrap();
1335        write_package(
1336            dir.path(),
1337            r#"{
1338                "scripts": { "dev": "vite", "preview": "vite preview" },
1339                "devDependencies": { "vite": "^5.0.0" }
1340            }"#,
1341        );
1342        std::fs::write(dir.path().join("pnpm-lock.yaml"), "").unwrap();
1343
1344        let plan = detect_website_launch_plan(&serde_json::json!({}), dir.path()).unwrap();
1345        assert_eq!(plan.script, "dev");
1346        assert_eq!(plan.command, "pnpm run dev");
1347        assert_eq!(plan.framework_hint, "vite");
1348        assert_eq!(plan.url, "http://127.0.0.1:5173/");
1349    }
1350
1351    #[test]
1352    fn detect_website_launch_plan_honors_preview_mode() {
1353        let dir = tempfile::tempdir().unwrap();
1354        write_package(
1355            dir.path(),
1356            r#"{
1357                "scripts": { "preview": "vite preview" },
1358                "devDependencies": { "vite": "^5.0.0" }
1359            }"#,
1360        );
1361
1362        let plan =
1363            detect_website_launch_plan(&serde_json::json!({ "mode": "preview" }), dir.path())
1364                .unwrap();
1365        assert_eq!(plan.script, "preview");
1366        assert_eq!(plan.url, "http://127.0.0.1:4173/");
1367    }
1368
1369    #[test]
1370    fn extract_html_title_and_preview_are_clean() {
1371        let html = r#"
1372            <html>
1373              <head><title>  Demo Site  </title></head>
1374              <body><h1>Hello</h1><script>ignore()</script><p>Readable preview text.</p></body>
1375            </html>
1376        "#;
1377        assert_eq!(extract_html_title(html).as_deref(), Some("Demo Site"));
1378        let preview = html_preview_text(html);
1379        assert!(preview.contains("Hello"));
1380        assert!(preview.contains("Readable preview text."));
1381        assert!(!preview.contains("ignore()"));
1382    }
1383
1384    #[test]
1385    fn extract_local_asset_urls_resolves_relative_assets() {
1386        let html = r#"
1387            <html>
1388              <head>
1389                <link rel="stylesheet" href="/assets/app.css">
1390                <script src="./main.js"></script>
1391              </head>
1392              <body>
1393                <img src="images/logo.png">
1394                <a href="https://example.com">external</a>
1395              </body>
1396            </html>
1397        "#;
1398        let assets = extract_local_asset_urls("http://127.0.0.1:5173/about/", html);
1399        assert!(assets
1400            .iter()
1401            .any(|asset| asset == "http://127.0.0.1:5173/assets/app.css"));
1402        assert!(assets
1403            .iter()
1404            .any(|asset| asset == "http://127.0.0.1:5173/about/main.js"));
1405        assert!(assets
1406            .iter()
1407            .any(|asset| asset == "http://127.0.0.1:5173/about/images/logo.png"));
1408        assert!(!assets.iter().any(|asset| asset.contains("example.com")));
1409    }
1410
1411    #[test]
1412    fn normalize_route_hints_deduplicates_and_prefixes_slashes() {
1413        let routes = normalize_route_hints(vec![
1414            "".to_string(),
1415            "pricing".to_string(),
1416            "/pricing".to_string(),
1417            "/".to_string(),
1418        ]);
1419        assert_eq!(routes, vec!["/".to_string(), "/pricing".to_string()]);
1420    }
1421
1422    #[tokio::test]
1423    async fn probe_website_once_reads_local_title() {
1424        let listener = std::net::TcpListener::bind("127.0.0.1:0").unwrap();
1425        let addr = listener.local_addr().unwrap();
1426        std::thread::spawn(move || {
1427            if let Ok((mut stream, _)) = listener.accept() {
1428                use std::io::Read;
1429                let response = b"HTTP/1.1 200 OK\r\nContent-Type: text/html\r\nContent-Length: 67\r\nConnection: close\r\n\r\n<html><head><title>Probe Test</title></head><body>hello</body></html>";
1430                let mut request = [0_u8; 1024];
1431                let _ = stream.read(&mut request);
1432                use std::io::Write;
1433                let _ = stream.write_all(response);
1434            }
1435        });
1436
1437        let client = reqwest::Client::builder()
1438            .timeout(Duration::from_secs(2))
1439            .build()
1440            .unwrap();
1441        let probe = probe_website_once(&client, &format!("http://{}/", addr))
1442            .await
1443            .unwrap();
1444        assert_eq!(probe.status, 200);
1445        assert_eq!(probe.title.as_deref(), Some("Probe Test"));
1446        assert!(probe.body_preview.contains("hello"));
1447    }
1448}