cortex_runtime/live/
perceive.rs1use crate::cartography::feature_encoder;
4use crate::cartography::page_classifier;
5use crate::extraction::loader::{ExtractionLoader, ExtractionResult};
6use crate::renderer::{NavigationResult, RenderContext};
7use anyhow::Result;
8use serde::{Deserialize, Serialize};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct PerceiveResult {
13 pub url: String,
15 pub final_url: String,
17 pub page_type: u8,
19 pub confidence: f32,
21 pub features: Vec<(usize, f32)>,
23 pub content: Option<String>,
25 pub load_time_ms: u64,
27}
28
29pub async fn perceive(
31 context: &mut dyn RenderContext,
32 url: &str,
33 include_content: bool,
34) -> Result<PerceiveResult> {
35 let nav_result = context.navigate(url, 30_000).await?;
37
38 let extraction = run_extraction(context).await.unwrap_or_default();
40
41 let (page_type, confidence) =
43 page_classifier::classify_page(&extraction, &nav_result.final_url);
44
45 let features = feature_encoder::encode_features(
47 &extraction,
48 &nav_result,
49 &nav_result.final_url,
50 page_type,
51 confidence,
52 );
53
54 let sparse_features: Vec<(usize, f32)> = features
56 .iter()
57 .enumerate()
58 .filter(|(_, &v)| v != 0.0)
59 .map(|(i, &v)| (i, v))
60 .collect();
61
62 let content = if include_content {
64 extract_text_content(context).await.ok()
65 } else {
66 None
67 };
68
69 Ok(PerceiveResult {
70 url: url.to_string(),
71 final_url: nav_result.final_url,
72 page_type: page_type as u8,
73 confidence,
74 features: sparse_features,
75 content,
76 load_time_ms: nav_result.load_time_ms,
77 })
78}
79
80async fn run_extraction(context: &dyn RenderContext) -> Result<ExtractionResult> {
82 let loader = ExtractionLoader::new()?;
83 loader.inject_and_run(context).await
84}
85
86async fn extract_text_content(context: &dyn RenderContext) -> Result<String> {
88 let result = context
89 .execute_js("document.body ? document.body.innerText : ''")
90 .await?;
91 Ok(result.as_str().unwrap_or("").to_string())
92}