Skip to main content

gid_core/harness/
context.rs

1//! Context assembler: build minimal, precise context for each sub-agent.
2//!
3//! Resolves graph metadata to actual file content — feature docs, design
4//! sections, requirements goals, and project guards.
5
6use std::path::Path;
7use std::collections::{HashSet, VecDeque};
8use anyhow::Result;
9use crate::graph::Graph;
10use super::types::{TaskContext, TaskInfo};
11
12/// Assemble context for a task by resolving docs via the feature node.
13///
14/// Resolution chain:
15/// 1. Task → `implements` edge → feature node
16/// 2. Feature node → `metadata.design_doc` → `.gid/features/{name}/design.md` + `requirements.md`
17/// 3. Task `design_ref` → extract matching section from design.md
18/// 4. Task `satisfies` → resolve GOAL lines from requirements.md
19/// 5. Graph root `metadata.guards` → inject into context
20///
21/// If the feature has no `design_doc`, falls back to `.gid/design.md` and `.gid/requirements.md`.
22/// Missing files produce warnings (logged via tracing) but don't fail the assembly.
23pub fn assemble_task_context(
24    graph: &Graph,
25    task_id: &str,
26    gid_root: &Path,
27) -> Result<TaskContext> {
28    let node = graph.get_node(task_id)
29        .ok_or_else(|| anyhow::anyhow!("Task node '{}' not found in graph", task_id))?;
30
31    // Extract TaskInfo
32    let task_info = extract_task_info_from_node(node, graph);
33
34    // Resolve feature node via `implements` edge
35    let feature_node_id = graph.edges.iter()
36        .find(|e| e.from == task_id && e.relation == "implements")
37        .map(|e| e.to.as_str());
38
39    // Determine doc paths from feature node
40    let (design_path, requirements_path) = resolve_doc_paths(graph, feature_node_id, gid_root);
41
42    // Extract design excerpt from design_ref
43    let design_excerpt = if let Some(ref design_ref) = task_info.design_ref {
44        match &design_path {
45            Some(path) if path.exists() => {
46                match std::fs::read_to_string(path) {
47                    Ok(content) => extract_design_section(&content, design_ref),
48                    Err(e) => {
49                        tracing::warn!("Failed to read design doc {}: {}", path.display(), e);
50                        None
51                    }
52                }
53            }
54            Some(path) => {
55                tracing::warn!("Design doc not found: {}", path.display());
56                None
57            }
58            None => None,
59        }
60    } else {
61        None
62    };
63
64    // Resolve GOAL text from requirements.md
65    let goals_text = if !task_info.satisfies.is_empty() {
66        match &requirements_path {
67            Some(path) if path.exists() => {
68                match std::fs::read_to_string(path) {
69                    Ok(content) => resolve_goals(&content, &task_info.satisfies),
70                    Err(e) => {
71                        tracing::warn!("Failed to read requirements {}: {}", path.display(), e);
72                        Vec::new()
73                    }
74                }
75            }
76            Some(path) => {
77                tracing::warn!("Requirements not found: {}", path.display());
78                Vec::new()
79            }
80            None => Vec::new(),
81        }
82    } else {
83        Vec::new()
84    };
85
86    // Collect dependency interface descriptions
87    let dependency_interfaces = resolve_dependency_interfaces(graph, &task_info);
88
89    // Inject guards from graph root metadata
90    let guards = extract_guards(graph);
91
92    Ok(TaskContext {
93        task_info,
94        goals_text,
95        design_excerpt,
96        dependency_interfaces,
97        guards,
98    })
99}
100
101/// Resolve design.md and requirements.md paths from the feature node.
102///
103/// If the feature has `metadata.design_doc`, maps to `.gid/features/{name}/`.
104/// Otherwise falls back to `.gid/design.md` and `.gid/requirements.md`.
105fn resolve_doc_paths(
106    graph: &Graph,
107    feature_node_id: Option<&str>,
108    gid_root: &Path,
109) -> (Option<std::path::PathBuf>, Option<std::path::PathBuf>) {
110    if let Some(feature_id) = feature_node_id {
111        if let Some(feature_node) = graph.get_node(feature_id) {
112            if let Some(design_doc) = feature_node.metadata.get("design_doc")
113                .and_then(|v| v.as_str())
114            {
115                let feature_dir = gid_root.join("features").join(design_doc);
116                return (
117                    Some(feature_dir.join("design.md")),
118                    Some(feature_dir.join("requirements.md")),
119                );
120            }
121        }
122    }
123
124    // Fallback to root-level docs
125    (
126        Some(gid_root.join("design.md")),
127        Some(gid_root.join("requirements.md")),
128    )
129}
130
131/// Extract a section from a markdown document by section reference.
132///
133/// Finds a heading whose number prefix matches `design_ref` (e.g., "3.2"),
134/// then captures all text until the next heading of same or higher level.
135///
136/// - "3.2" matches "### 3.2 Execution Planner" or "## 3.2 Something"
137/// - "3" captures the heading and all subsections (3.1, 3.2, etc.)
138/// - Missing section returns None
139/// - Multiple matches returns first match
140fn extract_design_section(content: &str, design_ref: &str) -> Option<String> {
141    let lines: Vec<&str> = content.lines().collect();
142    let mut start_idx = None;
143    let mut start_level = 0;
144
145    for (i, line) in lines.iter().enumerate() {
146        if let Some((level, heading_text)) = parse_heading(line) {
147            let trimmed = heading_text.trim();
148            if heading_starts_with_ref(trimmed, design_ref) {
149                start_idx = Some(i);
150                start_level = level;
151                break;
152            }
153        }
154    }
155
156    let start = start_idx?;
157
158    // Capture until next heading of same or higher (lower number) level
159    let mut end_idx = lines.len();
160    for i in (start + 1)..lines.len() {
161        if let Some((level, _)) = parse_heading(lines[i]) {
162            if level <= start_level {
163                end_idx = i;
164                break;
165            }
166        }
167    }
168
169    let section: String = lines[start..end_idx].join("\n");
170    let trimmed = section.trim();
171    if trimmed.is_empty() {
172        None
173    } else {
174        Some(trimmed.to_string())
175    }
176}
177
178/// Parse a markdown heading line. Returns (level, text after #s).
179fn parse_heading(line: &str) -> Option<(usize, &str)> {
180    let trimmed = line.trim_start();
181    if !trimmed.starts_with('#') {
182        return None;
183    }
184    let level = trimmed.chars().take_while(|&c| c == '#').count();
185    if level == 0 || level > 6 {
186        return None;
187    }
188    let rest = &trimmed[level..];
189    // Must have a space after #s (standard markdown)
190    if !rest.starts_with(' ') {
191        return None;
192    }
193    Some((level, rest[1..].trim()))
194}
195
196/// Check if a heading text starts with the given section reference as a number prefix.
197///
198/// "3.2" matches "3.2 Execution Planner", "3.2. Something"
199/// "3" matches "3 Components", "3. Components"
200fn heading_starts_with_ref(heading: &str, design_ref: &str) -> bool {
201    if !heading.starts_with(design_ref) {
202        return false;
203    }
204    let rest = &heading[design_ref.len()..];
205    // After the ref, expect: end of string, space, period, or period+space
206    rest.is_empty()
207        || rest.starts_with(' ')
208        || rest.starts_with('.')
209}
210
211/// Resolve GOAL IDs to their full text from requirements.md content.
212///
213/// Searches for lines containing each GOAL ID (e.g., "GOAL-1.1") and returns
214/// the full line text.
215fn resolve_goals(content: &str, goal_ids: &[String]) -> Vec<String> {
216    let mut results = Vec::new();
217    for goal_id in goal_ids {
218        for line in content.lines() {
219            if line.contains(goal_id.as_str()) {
220                results.push(line.trim().to_string());
221                break;
222            }
223        }
224    }
225    results
226}
227
228/// Extract interface/description info from completed dependency tasks.
229fn resolve_dependency_interfaces(graph: &Graph, task_info: &TaskInfo) -> Vec<String> {
230    let mut interfaces = Vec::new();
231    for dep_id in &task_info.depends_on {
232        if let Some(dep_node) = graph.get_node(dep_id) {
233            let mut info = format!("[{}] {}", dep_node.id, dep_node.title);
234            if let Some(ref desc) = dep_node.description {
235                let truncated: String = desc.chars().take(200).collect();
236                info.push_str(&format!(": {}", truncated));
237            }
238            interfaces.push(info);
239        }
240    }
241    interfaces
242}
243
244/// Extract project-level guards from graph metadata.
245///
246/// Guards are stored in any node's `metadata.guards` as an array of strings.
247/// Convention: the first node with guards (typically a root/project node).
248fn extract_guards(graph: &Graph) -> Vec<String> {
249    for node in &graph.nodes {
250        if let Some(guards_val) = node.metadata.get("guards") {
251            if let Some(arr) = guards_val.as_array() {
252                return arr.iter()
253                    .filter_map(|v| v.as_str().map(|s| s.to_string()))
254                    .collect();
255            }
256        }
257    }
258    Vec::new()
259}
260
261/// Extract TaskInfo from a graph Node.
262fn extract_task_info_from_node(node: &crate::graph::Node, graph: &Graph) -> TaskInfo {
263    let description = node.description.clone().unwrap_or_default();
264
265    let verify = node.metadata.get("verify")
266        .and_then(|v| v.as_str())
267        .map(|s| s.to_string());
268
269    let estimated_turns = node.metadata.get("estimated_turns")
270        .and_then(|v| v.as_u64())
271        .unwrap_or(15) as u32;
272
273    let design_ref = node.metadata.get("design_ref")
274        .and_then(|v| v.as_str())
275        .map(|s| s.to_string());
276
277    let satisfies = node.metadata.get("satisfies")
278        .and_then(|v| v.as_array())
279        .map(|arr| {
280            arr.iter()
281                .filter_map(|v| v.as_str().map(|s| s.to_string()))
282                .collect()
283        })
284        .unwrap_or_default();
285
286    let goals = node.metadata.get("goals")
287        .and_then(|v| v.as_array())
288        .map(|arr| {
289            arr.iter()
290                .filter_map(|v| v.as_str().map(|s| s.to_string()))
291                .collect()
292        })
293        .unwrap_or_default();
294
295    let depends_on: Vec<String> = graph.edges.iter()
296        .filter(|e| e.from == node.id && e.relation == "depends_on")
297        .map(|e| e.to.clone())
298        .collect();
299
300    TaskInfo {
301        id: node.id.clone(),
302        title: node.title.clone(),
303        description,
304        goals,
305        verify,
306        estimated_turns,
307        depends_on,
308        design_ref,
309        satisfies,
310    }
311}
312
313// =============================================================================
314// §5 Relevance Scoring — Edge-Relation-Based 5-Tier Ranking (GOAL-4.4)
315// =============================================================================
316
317/// A raw candidate node discovered during graph traversal (before scoring).
318///
319/// Carries all metadata needed for scoring and budget fitting.
320#[derive(Debug, Clone)]
321pub struct Candidate {
322    pub node_id: String,
323    pub node_type: String,
324    pub file_path: Option<String>,
325    pub signature: Option<String>,
326    pub doc_comment: Option<String>,
327    pub description: Option<String>,
328    pub source_code: Option<String>,
329    /// Number of hops from the nearest target node.
330    pub hop_distance: u32,
331    pub modified_at: Option<i64>,
332    /// The edge relation that connected this node to the traversal.
333    pub connecting_relation: String,
334    pub token_estimate: usize,
335}
336
337/// A candidate with a computed relevance score.
338#[derive(Debug, Clone)]
339pub struct ScoredCandidate {
340    pub candidate: Candidate,
341    pub score: f64,
342    pub token_estimate: usize,
343}
344
345/// GOAL-4.4: 5-tier relevance ranking by edge relation.
346///
347/// | Rank | Category       | Relations                                         |
348/// |------|----------------|---------------------------------------------------|
349/// | 1    | Direct call    | calls, imports                                    |
350/// | 2    | Type reference | type_reference, inherits, implements, uses         |
351/// | 3    | Same-file      | contains, defined_in                              |
352/// | 4    | Structural     | depends_on, part_of, blocks, tests_for            |
353/// | 5    | Transitive     | any unknown / unrecognized relation                |
354pub fn relation_rank(relation: &str) -> u8 {
355    match relation {
356        "calls" | "imports" => 1,                                    // Direct call
357        "type_reference" | "inherits" | "implements" | "uses" => 2,  // Type reference
358        "contains" | "defined_in" => 3,                              // Same-file
359        "depends_on" | "part_of" | "blocks" | "tests_for" => 4,     // Structural
360        _ => 5,                                                       // Transitive / unknown
361    }
362}
363
364/// Map rank to a [0.0, 1.0] score: rank 1 → 1.0, rank 5 → 0.2.
365pub fn relation_score(relation: &str) -> f64 {
366    match relation_rank(relation) {
367        1 => 1.0,
368        2 => 0.8,
369        3 => 0.6,
370        4 => 0.4,
371        5 => 0.2,
372        _ => 0.1,
373    }
374}
375
376/// Scoring weights (v1 constants — documented as tunable for future versions).
377const W_RELATION: f64 = 0.60;
378const W_PROXIMITY: f64 = 0.30;
379const W_WEIGHT: f64 = 0.10;
380
381/// Minimum useful token count for truncated inclusion.
382#[allow(dead_code)]
383const MIN_USEFUL_TOKENS: usize = 20;
384
385/// Estimate token count from text content.
386/// Per design.md §9: tokens ≈ byte_len / 4.
387fn estimate_tokens_str(text: &str) -> usize {
388    let len = text.len();
389    if len == 0 { 0 } else { (len / 4).max(1) }
390}
391
392/// Estimate tokens for a candidate node.
393fn estimate_tokens_for_candidate(c: &Candidate) -> usize {
394    let mut bytes = 0;
395    if let Some(ref sc) = c.source_code { bytes += sc.len(); }
396    if let Some(ref sig) = c.signature { bytes += sig.len(); }
397    if let Some(ref desc) = c.description { bytes += desc.len(); }
398    if let Some(ref dc) = c.doc_comment { bytes += dc.len(); }
399    bytes += 30; // overhead
400    (bytes / 4).max(1)
401}
402
403/// Score a single candidate. **[GOAL-4.4, 4.5]**
404///
405/// Composite score = (W_RELATION * relation_score + W_PROXIMITY * proximity + W_WEIGHT * weight_factor)
406///                   * transitive_penalty
407pub fn score_candidate(candidate: &Candidate) -> ScoredCandidate {
408    // Relation-based score (primary factor).
409    let rel_score = relation_score(&candidate.connecting_relation);
410
411    // Proximity: inverse of hop distance.
412    // hop 1 → 1.0, hop 2 → 0.5, hop 3 → 0.33.
413    let proximity = if candidate.hop_distance == 0 {
414        1.0
415    } else {
416        1.0 / (candidate.hop_distance as f64)
417    };
418
419    // Weight: from edge weight (default 1.0) — could incorporate edge.weight in future.
420    let weight_factor = 1.0;
421
422    // Transitive penalty: candidates at hop > 1 are penalized (GOAL-4.4 tier 5).
423    let transitive_penalty = if candidate.hop_distance > 1 { 0.8 } else { 1.0 };
424
425    let mut score = (W_RELATION * rel_score
426                   + W_PROXIMITY * proximity
427                   + W_WEIGHT * weight_factor)
428                   * transitive_penalty;
429
430    // NaN guard (FINDING-13).
431    if score.is_nan() { score = 0.0; }
432
433    let token_estimate = estimate_tokens_for_candidate(candidate);
434
435    ScoredCandidate {
436        candidate: candidate.clone(),
437        score,
438        token_estimate,
439    }
440}
441
442/// Score and sort a list of candidates by descending relevance.
443pub fn score_candidates(candidates: &[Candidate]) -> Vec<ScoredCandidate> {
444    let mut scored: Vec<ScoredCandidate> = candidates.iter().map(score_candidate).collect();
445    scored.sort_by(|a, b| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal));
446    scored
447}
448
449// =============================================================================
450// §6 Token Budget Management — Category-Based Truncation (GOAL-4.3)
451// =============================================================================
452
453/// Context for a target node — NEVER truncated. **[GOAL-4.1a, 4.3]**
454#[derive(Debug, Clone, serde::Serialize)]
455pub struct TargetContext {
456    /// Node ID.
457    pub node_id: String,
458    /// Node title.
459    pub title: Option<String>,
460    /// File path on disk (for source loading).
461    pub file_path: Option<String>,
462    /// Function/class signature.
463    pub signature: Option<String>,
464    /// Doc comment.
465    pub doc_comment: Option<String>,
466    /// Description.
467    pub description: Option<String>,
468    /// Source code loaded from disk.
469    pub source_code: Option<String>,
470    /// Estimated tokens for this target.
471    pub token_estimate: usize,
472}
473
474impl TargetContext {
475    /// Create a TargetContext with pre-computed token estimate.
476    pub fn new(
477        node_id: String,
478        title: Option<String>,
479        file_path: Option<String>,
480        signature: Option<String>,
481        doc_comment: Option<String>,
482        description: Option<String>,
483        source_code: Option<String>,
484    ) -> Self {
485        let token_estimate = estimate_tokens_for_target_fields(
486            title.as_deref(),
487            description.as_deref(),
488            signature.as_deref(),
489            doc_comment.as_deref(),
490            source_code.as_deref(),
491        );
492        Self {
493            node_id, title, file_path, signature, doc_comment,
494            description, source_code, token_estimate,
495        }
496    }
497}
498
499/// Estimate tokens for target context fields.
500fn estimate_tokens_for_target_fields(
501    title: Option<&str>,
502    description: Option<&str>,
503    signature: Option<&str>,
504    doc_comment: Option<&str>,
505    source_code: Option<&str>,
506) -> usize {
507    let mut bytes = 0usize;
508    if let Some(t) = title { bytes += t.len(); }
509    if let Some(d) = description { bytes += d.len(); }
510    if let Some(s) = signature { bytes += s.len(); }
511    if let Some(dc) = doc_comment { bytes += dc.len(); }
512    if let Some(sc) = source_code { bytes += sc.len(); }
513    bytes += 50; // overhead for headers/formatting
514    (bytes / 4).max(1)
515}
516
517/// A single non-target item in the assembled context. **[GOAL-4.11]**
518#[derive(Debug, Clone, serde::Serialize)]
519pub struct ContextItem {
520    /// Source node ID.
521    pub node_id: String,
522    /// Node type (file, function, class, etc.).
523    pub node_type: String,
524    /// File path (if available).
525    pub file_path: Option<String>,
526    /// Function/class signature (if available).
527    pub signature: Option<String>,
528    /// Doc comment (if available).
529    pub doc_comment: Option<String>,
530    /// Description or source code content.
531    pub content: Option<String>,
532    /// The edge relation that connects this node to the target. **[GOAL-4.11]**
533    pub connecting_relation: String,
534    /// Estimated token count for this item.
535    pub token_estimate: usize,
536    /// Relevance score (visible per GOAL-4.5).
537    pub score: f64,
538    /// Whether this item was truncated to fit the budget.
539    pub truncated: bool,
540}
541
542impl ContextItem {
543    /// Create a ContextItem from a ScoredCandidate (full inclusion).
544    fn from_scored(sc: &ScoredCandidate, truncated: bool) -> Self {
545        let content = sc.candidate.source_code.clone()
546            .or_else(|| sc.candidate.description.clone());
547        Self {
548            node_id: sc.candidate.node_id.clone(),
549            node_type: sc.candidate.node_type.clone(),
550            file_path: sc.candidate.file_path.clone(),
551            signature: sc.candidate.signature.clone(),
552            doc_comment: sc.candidate.doc_comment.clone(),
553            content,
554            connecting_relation: sc.candidate.connecting_relation.clone(),
555            token_estimate: sc.token_estimate,
556            score: sc.score,
557            truncated,
558        }
559    }
560
561    /// Create a truncated ContextItem that fits within `max_tokens`.
562    fn from_scored_truncated(sc: &ScoredCandidate, max_tokens: usize) -> Self {
563        let full_content = sc.candidate.source_code.as_deref()
564            .or(sc.candidate.description.as_deref())
565            .unwrap_or("");
566
567        let truncated_content = truncate_text(full_content, max_tokens);
568        let actual_tokens = estimate_tokens_str(&truncated_content);
569
570        Self {
571            node_id: sc.candidate.node_id.clone(),
572            node_type: sc.candidate.node_type.clone(),
573            file_path: sc.candidate.file_path.clone(),
574            signature: sc.candidate.signature.clone(),
575            doc_comment: sc.candidate.doc_comment.clone(),
576            content: Some(truncated_content),
577            connecting_relation: sc.candidate.connecting_relation.clone(),
578            token_estimate: actual_tokens,
579            score: sc.score,
580            truncated: true,
581        }
582    }
583}
584
585/// Metadata about truncation decisions. **[GOAL-4.3]**
586#[derive(Debug, Clone, Default, serde::Serialize)]
587pub struct TruncationInfo {
588    /// Number of items that were truncated (partially included).
589    pub truncated_count: usize,
590    /// Number of items that were dropped entirely.
591    pub dropped_count: usize,
592    /// Tokens actually consumed by this category.
593    pub budget_used: usize,
594}
595
596impl TruncationInfo {
597    fn merge(&mut self, other: &TruncationInfo) {
598        self.truncated_count += other.truncated_count;
599        self.dropped_count += other.dropped_count;
600        self.budget_used += other.budget_used;
601    }
602}
603
604/// The assembled context result — categorized output. **[GOAL-4.1]**
605#[derive(Debug, Clone, serde::Serialize)]
606pub struct ContextResult {
607    /// GOAL-4.1a: Full target node details (never truncated).
608    pub targets: Vec<TargetContext>,
609    /// GOAL-4.1c,d: Direct + transitive dependencies, sorted by relevance.
610    pub dependencies: Vec<ContextItem>,
611    /// GOAL-4.1e: Callers of target nodes.
612    pub callers: Vec<ContextItem>,
613    /// GOAL-4.1f: Related test nodes.
614    pub tests: Vec<ContextItem>,
615    /// GOAL-4.10: Total estimated tokens in the output.
616    pub estimated_tokens: usize,
617    /// GOAL-4.3: Truncation info.
618    pub truncation_info: TruncationInfo,
619}
620
621impl ContextResult {
622    /// Total number of items included across all categories.
623    pub fn total_included(&self) -> usize {
624        self.targets.len() + self.dependencies.len() + self.callers.len() + self.tests.len()
625    }
626}
627
628/// Minimum tokens for a truncated item to be useful.
629const MIN_USEFUL_TOKENS_TRUNC: usize = 32;
630
631/// Category-based budget allocation. **[GOAL-4.3]**
632///
633/// Priority order (GOAL-4.3):
634/// 1. Targets — NEVER truncated
635/// 2. Direct dependencies (hop == 1)
636/// 3. Callers
637/// 4. Tests
638/// 5. Transitive dependencies (furthest hops dropped first)
639pub fn budget_fit_by_category(
640    targets: &[TargetContext],
641    deps: Vec<ScoredCandidate>,
642    callers: Vec<ScoredCandidate>,
643    tests: Vec<ScoredCandidate>,
644    budget: usize,
645) -> ContextResult {
646    let mut remaining = budget;
647    let mut truncation = TruncationInfo::default();
648
649    // 1. Targets — always included, never truncated.
650    let target_tokens: usize = targets.iter().map(|t| t.token_estimate).sum();
651    remaining = remaining.saturating_sub(target_tokens);
652
653    // Separate direct deps from transitive deps.
654    let (direct_deps, transitive_deps): (Vec<_>, Vec<_>) =
655        deps.into_iter().partition(|d| d.candidate.hop_distance == 1);
656
657    // 2. Direct dependencies — fill as much as budget allows.
658    let (included_direct, direct_trunc) = greedy_fill(&direct_deps, remaining);
659    remaining = remaining.saturating_sub(direct_trunc.budget_used);
660    truncation.merge(&direct_trunc);
661
662    // 3. Callers.
663    let (included_callers, caller_trunc) = greedy_fill(&callers, remaining);
664    remaining = remaining.saturating_sub(caller_trunc.budget_used);
665    truncation.merge(&caller_trunc);
666
667    // 4. Tests.
668    let (included_tests, test_trunc) = greedy_fill(&tests, remaining);
669    remaining = remaining.saturating_sub(test_trunc.budget_used);
670    truncation.merge(&test_trunc);
671
672    // 5. Transitive deps — sorted by hop distance ascending (closest first),
673    //    within same hop: sorted by score descending (highest relevance first).
674    //    This means furthest hops are dropped first when budget runs out.
675    let mut trans_sorted = transitive_deps;
676    trans_sorted.sort_by(|a, b| {
677        a.candidate.hop_distance.cmp(&b.candidate.hop_distance)
678            .then_with(|| b.score.partial_cmp(&a.score).unwrap_or(std::cmp::Ordering::Equal))
679    });
680    let (included_transitive, trans_trunc) = greedy_fill(&trans_sorted, remaining);
681    remaining = remaining.saturating_sub(trans_trunc.budget_used);
682    truncation.merge(&trans_trunc);
683
684    let total_tokens = budget - remaining;
685
686    ContextResult {
687        targets: targets.to_vec(),
688        dependencies: [included_direct, included_transitive].concat(),
689        callers: included_callers,
690        tests: included_tests,
691        estimated_tokens: total_tokens,
692        truncation_info: truncation,
693    }
694}
695
696/// Greedy knapsack: consume items in order until budget exhausted.
697///
698/// Items that fully fit are included as-is. Items that partially fit are
699/// truncated if the remaining budget exceeds `MIN_USEFUL_TOKENS_TRUNC`.
700/// Items that don't fit at all are dropped and counted.
701fn greedy_fill(
702    items: &[ScoredCandidate],
703    budget: usize,
704) -> (Vec<ContextItem>, TruncationInfo) {
705    let mut included = Vec::new();
706    let mut remaining = budget;
707    let mut info = TruncationInfo::default();
708
709    for sc in items {
710        if remaining == 0 {
711            info.dropped_count += 1;
712            continue;
713        }
714
715        if sc.token_estimate <= remaining {
716            // Fully fits.
717            included.push(ContextItem::from_scored(sc, false));
718            remaining -= sc.token_estimate;
719        } else if remaining >= MIN_USEFUL_TOKENS_TRUNC {
720            // Partially fits — truncate content.
721            let truncated = ContextItem::from_scored_truncated(sc, remaining);
722            remaining = remaining.saturating_sub(truncated.token_estimate);
723            included.push(truncated);
724            info.truncated_count += 1;
725        } else {
726            // Remaining budget too small to be useful.
727            info.dropped_count += 1;
728        }
729    }
730
731    info.budget_used = budget - remaining;
732    (included, info)
733}
734
735// =============================================================================
736// §7 Truncation Strategy — UTF-8 Safe Text Truncation (GOAL-4.3)
737// =============================================================================
738
739/// Truncate text content to fit within `max_tokens` tokens. **[GOAL-4.3]**
740///
741/// Rules:
742/// 1. UTF-8 safety: always truncate at valid char boundary.
743/// 2. Prefer line boundaries: trim to last complete line that fits.
744/// 3. Truncation marker: `\n... [truncated]` suffix appended.
745/// 4. Head-biased: preserves beginning of content (imports/signatures first).
746pub fn truncate_text(text: &str, max_tokens: usize) -> String {
747    let max_bytes = max_tokens * 4;
748    let marker = "\n... [truncated]";
749    let usable_bytes = max_bytes.saturating_sub(marker.len());
750
751    if text.len() <= max_bytes {
752        return text.to_string();
753    }
754
755    // Find a safe cut point at a char boundary.
756    let safe_end = if usable_bytes >= text.len() {
757        text.len()
758    } else if text.is_char_boundary(usable_bytes) {
759        usable_bytes
760    } else {
761        // Scan backward to find a valid char boundary.
762        let mut pos = usable_bytes;
763        while pos > 0 && !text.is_char_boundary(pos) {
764            pos -= 1;
765        }
766        pos
767    };
768
769    let safe_slice = &text[..safe_end];
770
771    // Prefer line boundary — find the last newline.
772    let cut_point = safe_slice.rfind('\n').unwrap_or(safe_end);
773
774    format!("{}{}", &text[..cut_point], marker)
775}
776
777// =============================================================================
778// §8 Source Code Loading from Disk (GOAL-4.1b)
779// =============================================================================
780
781/// Result of loading source code from disk.
782#[derive(Debug, Clone, serde::Serialize)]
783pub struct SourceLoadResult {
784    /// The loaded source code (possibly a line range extract).
785    pub source: String,
786    /// Whether the source was loaded from a line range (start_line..end_line).
787    pub is_range: bool,
788    /// Starting line (1-indexed) if range was used.
789    pub start_line: Option<usize>,
790    /// Ending line (1-indexed, inclusive) if range was used.
791    pub end_line: Option<usize>,
792    /// Total lines in the loaded source.
793    pub line_count: usize,
794}
795
796/// Load source code from disk for a node. **[GOAL-4.1b]**
797///
798/// If `start_line` and `end_line` are both provided, loads only that range.
799/// If only `start_line` is provided, loads from that line to end-of-file.
800/// If neither is provided, loads the entire file.
801///
802/// Returns `None` if:
803/// - `file_path` is None
804/// - The file doesn't exist or can't be read
805/// - The file path is not under `project_root` (security check)
806///
807/// Lines are 1-indexed (matching typical IDE conventions).
808pub fn load_source_from_disk(
809    file_path: Option<&str>,
810    start_line: Option<usize>,
811    end_line: Option<usize>,
812    project_root: &Path,
813) -> Option<SourceLoadResult> {
814    let file_path = file_path?;
815
816    // Resolve relative to project_root
817    let path = if Path::new(file_path).is_absolute() {
818        std::path::PathBuf::from(file_path)
819    } else {
820        project_root.join(file_path)
821    };
822
823    // Security: ensure the resolved path is under project_root
824    let canonical_root = project_root.canonicalize().ok()?;
825    let canonical_path = path.canonicalize().ok()?;
826    if !canonical_path.starts_with(&canonical_root) {
827        tracing::warn!(
828            "Refusing to load source outside project root: {} (root: {})",
829            canonical_path.display(), canonical_root.display()
830        );
831        return None;
832    }
833
834    // Read the file
835    let content = std::fs::read_to_string(&canonical_path).ok()?;
836    let lines: Vec<&str> = content.lines().collect();
837
838    match (start_line, end_line) {
839        (Some(start), Some(end)) if start >= 1 && end >= start => {
840            // Range extract: 1-indexed, inclusive
841            let start_idx = start.saturating_sub(1);
842            let end_idx = end.min(lines.len());
843            if start_idx >= lines.len() {
844                // start_line beyond file length
845                return None;
846            }
847            let selected: Vec<&str> = lines[start_idx..end_idx].to_vec();
848            let source = selected.join("\n");
849            Some(SourceLoadResult {
850                line_count: selected.len(),
851                source,
852                is_range: true,
853                start_line: Some(start),
854                end_line: Some(end_idx),
855            })
856        }
857        (Some(start), None) if start >= 1 => {
858            // From start_line to EOF
859            let start_idx = start.saturating_sub(1);
860            if start_idx >= lines.len() {
861                return None;
862            }
863            let selected: Vec<&str> = lines[start_idx..].to_vec();
864            let source = selected.join("\n");
865            Some(SourceLoadResult {
866                line_count: selected.len(),
867                source,
868                is_range: true,
869                start_line: Some(start),
870                end_line: Some(lines.len()),
871            })
872        }
873        _ => {
874            // Full file
875            let line_count = lines.len();
876            Some(SourceLoadResult {
877                source: content,
878                is_range: false,
879                start_line: None,
880                end_line: None,
881                line_count,
882            })
883        }
884    }
885}
886
887// =============================================================================
888// §9 Context Query & Assembly Pipeline (GOAL-4.1–4.13)
889// =============================================================================
890
891/// Filters that narrow which candidate nodes are eligible. **[GOAL-4.8]**
892#[derive(Debug, Clone, Default)]
893pub struct ContextFilters {
894    /// GOAL-4.8: --include patterns. Supports file path globs (e.g., "*.rs")
895    /// and node type filters (e.g., "type:function").
896    /// Semantics: if non-empty, a candidate must match at least one pattern.
897    pub include_patterns: Vec<String>,
898    /// Exclude nodes whose IDs match any of these patterns.
899    pub exclude_ids: Vec<String>,
900    /// Only include nodes modified after this timestamp (epoch secs).
901    pub modified_after: Option<i64>,
902}
903
904/// GOAL-4.9: Output format selection.
905#[derive(Debug, Clone, Copy, PartialEq, Eq)]
906pub enum OutputFormat {
907    /// Human-readable markdown sections (default).
908    Markdown,
909    /// Machine-parseable JSON.
910    Json,
911    /// Same structure as JSON but in YAML syntax.
912    Yaml,
913}
914
915impl Default for OutputFormat {
916    fn default() -> Self { Self::Markdown }
917}
918
919impl std::str::FromStr for OutputFormat {
920    type Err = String;
921    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
922        match s.to_lowercase().as_str() {
923            "markdown" | "md" => Ok(Self::Markdown),
924            "json" => Ok(Self::Json),
925            "yaml" | "yml" => Ok(Self::Yaml),
926            other => Err(format!("unknown format '{}': expected markdown, json, or yaml", other)),
927        }
928    }
929}
930
931impl std::fmt::Display for OutputFormat {
932    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
933        match self {
934            Self::Markdown => write!(f, "markdown"),
935            Self::Json => write!(f, "json"),
936            Self::Yaml => write!(f, "yaml"),
937        }
938    }
939}
940
941/// A request for assembled context. **[GOAL-4.1, 4.6]**
942#[derive(Debug, Clone)]
943pub struct ContextQuery {
944    /// GOAL-4.6: One or more target node IDs whose context we are assembling.
945    /// At least one target must be specified.
946    pub targets: Vec<String>,
947    /// Maximum token budget for the assembled output. **[GOAL-4.2]**
948    pub token_budget: usize,
949    /// Maximum traversal depth (hops from any target). **[GOAL-4.7]**
950    /// Default: 2.
951    pub depth: u32,
952    /// Optional filters to narrow candidates. **[GOAL-4.8]**
953    pub filters: ContextFilters,
954    /// Output format. **[GOAL-4.9]**
955    pub format: OutputFormat,
956    /// Project root for source code loading. **[GOAL-4.1b]**
957    pub project_root: Option<std::path::PathBuf>,
958}
959
960impl Default for ContextQuery {
961    fn default() -> Self {
962        Self {
963            targets: Vec::new(),
964            token_budget: 8000,
965            depth: 2,
966            filters: ContextFilters::default(),
967            format: OutputFormat::default(),
968            project_root: None,
969        }
970    }
971}
972
973/// Traversal statistics for observability. **[GOAL-4.13]**
974#[derive(Debug, Clone, Default, serde::Serialize)]
975pub struct ContextStats {
976    /// Total nodes visited during traversal.
977    pub nodes_visited: usize,
978    /// Nodes included in the final output.
979    pub nodes_included: usize,
980    /// Nodes excluded by --include filter.
981    pub nodes_excluded_by_filter: usize,
982    /// Tokens used in the assembled context.
983    pub budget_used: usize,
984    /// Total token budget available.
985    pub budget_total: usize,
986    /// Elapsed time in milliseconds.
987    pub elapsed_ms: u64,
988}
989
990/// Extended context result with statistics. **[GOAL-4.1, 4.13]**
991#[derive(Debug, Clone, serde::Serialize)]
992pub struct AssembledContext {
993    /// The categorized context result.
994    #[serde(flatten)]
995    pub result: ContextResult,
996    /// Traversal statistics (GOAL-4.13).
997    pub stats: ContextStats,
998}
999
1000/// Top-level entry point for context assembly. **[GOAL-4.2, 4.3, 4.12]**
1001///
1002/// This is the library function that CLI/MCP/LSP all call.
1003/// Takes a `Graph` (already loaded) and a `ContextQuery`, returns
1004/// a structured `AssembledContext` with stats.
1005pub fn assemble_context(
1006    graph: &Graph,
1007    query: &ContextQuery,
1008) -> Result<AssembledContext> {
1009    let start = std::time::Instant::now();
1010
1011    // Validate: at least one target (GOAL-4.6).
1012    if query.targets.is_empty() {
1013        return Err(anyhow::anyhow!("--targets: at least one target node ID required"));
1014    }
1015
1016    let mut stats = ContextStats {
1017        budget_total: query.token_budget,
1018        ..Default::default()
1019    };
1020    let mut filter_excluded = 0usize;
1021
1022    // Stage 1: Gather target node details + source code from disk.
1023    let targets = gather_targets(graph, &query.targets, query.project_root.as_deref())?;
1024    stats.nodes_visited += targets.len();
1025
1026    // Stage 2: Multi-source BFS — gather dependency candidates.
1027    let (dep_candidates, dep_filtered) = gather_dependencies(
1028        graph, &query.targets, query.depth, &query.filters, query.project_root.as_deref(),
1029    );
1030    stats.nodes_visited += dep_candidates.len() + dep_filtered;
1031    filter_excluded += dep_filtered;
1032
1033    // Stage 3: Reverse-edge traversal — gather callers and tests.
1034    let (caller_candidates, test_candidates) = gather_callers_and_tests(
1035        graph, &query.targets, query.project_root.as_deref(),
1036    );
1037    stats.nodes_visited += caller_candidates.len() + test_candidates.len();
1038
1039    // Stage 4: Score all candidates by edge-relation relevance (GOAL-4.4).
1040    let scored_deps = score_candidates(&dep_candidates);
1041    let scored_callers = score_candidates(&caller_candidates);
1042    let scored_tests = score_candidates(&test_candidates);
1043
1044    // Stage 5: Category-based budget allocation (GOAL-4.3).
1045    let context_result = budget_fit_by_category(
1046        &targets,
1047        scored_deps,
1048        scored_callers,
1049        scored_tests,
1050        query.token_budget,
1051    );
1052
1053    // Stage 6: Record stats (GOAL-4.13).
1054    stats.nodes_included = context_result.total_included();
1055    stats.nodes_excluded_by_filter = filter_excluded;
1056    stats.budget_used = context_result.estimated_tokens;
1057    stats.elapsed_ms = start.elapsed().as_millis() as u64;
1058
1059    // Log traversal stats to stderr (GOAL-4.13).
1060    tracing::info!(
1061        visited = stats.nodes_visited,
1062        included = stats.nodes_included,
1063        excluded_filter = stats.nodes_excluded_by_filter,
1064        budget = %format!("{}/{}", stats.budget_used, stats.budget_total),
1065        elapsed_ms = stats.elapsed_ms,
1066        "context assembly complete"
1067    );
1068
1069    Ok(AssembledContext {
1070        result: context_result,
1071        stats,
1072    })
1073}
1074
1075/// Stage 1: Gather full details for each target node. **[GOAL-4.1, 4.1b]**
1076fn gather_targets(
1077    graph: &Graph,
1078    target_ids: &[String],
1079    project_root: Option<&Path>,
1080) -> Result<Vec<TargetContext>> {
1081    let mut targets = Vec::new();
1082
1083    for id in target_ids {
1084        let node = graph.get_node(id)
1085            .ok_or_else(|| anyhow::anyhow!("target node not found: {}", id))?;
1086
1087        // GOAL-4.1b: Read source code from disk if file_path + line range available.
1088        let source_code = if let Some(root) = project_root {
1089            load_source_from_disk(
1090                node.file_path.as_deref(),
1091                node.start_line,
1092                node.end_line,
1093                root,
1094            ).map(|r| r.source)
1095        } else {
1096            None
1097        };
1098
1099        targets.push(TargetContext::new(
1100            node.id.clone(),
1101            Some(node.title.clone()),
1102            node.file_path.clone(),
1103            node.signature.clone(),
1104            node.doc_comment.clone(),
1105            node.description.clone(),
1106            source_code,
1107        ));
1108    }
1109
1110    Ok(targets)
1111}
1112
1113/// Stage 2: Multi-source BFS with depth limit for dependencies. **[GOAL-4.7, 4.8]**
1114///
1115/// Returns (candidates, filtered_count) — filtered_count is nodes excluded by filters.
1116fn gather_dependencies(
1117    graph: &Graph,
1118    root_ids: &[String],
1119    max_depth: u32,
1120    filters: &ContextFilters,
1121    project_root: Option<&Path>,
1122) -> (Vec<Candidate>, usize) {
1123    let mut visited: HashSet<String> = HashSet::new();
1124    let mut queue: VecDeque<(String, u32, String)> = VecDeque::new();
1125    let mut results: Vec<Candidate> = Vec::new();
1126    let mut filtered_count = 0usize;
1127
1128    // Initialize: mark roots as visited, enqueue their outgoing neighbors at hop 1.
1129    for root_id in root_ids {
1130        visited.insert(root_id.clone());
1131    }
1132
1133    for root_id in root_ids {
1134        for edge in &graph.edges {
1135            if edge.from == *root_id {
1136                if !visited.contains(&edge.to) {
1137                    visited.insert(edge.to.clone());
1138                    queue.push_back((edge.to.clone(), 1, edge.relation.clone()));
1139                }
1140            }
1141        }
1142    }
1143
1144    while let Some((current_id, hop, relation)) = queue.pop_front() {
1145        if hop > max_depth { continue; }
1146
1147        let node = match graph.get_node(&current_id) {
1148            Some(n) => n,
1149            None => continue,
1150        };
1151
1152        let source_code = if let Some(root) = project_root {
1153            load_source_from_disk(
1154                node.file_path.as_deref(),
1155                node.start_line,
1156                node.end_line,
1157                root,
1158            ).map(|r| r.source)
1159        } else {
1160            None
1161        };
1162
1163        let candidate = Candidate {
1164            node_id: current_id.clone(),
1165            node_type: node.node_type.clone().unwrap_or_default(),
1166            file_path: node.file_path.clone(),
1167            signature: node.signature.clone(),
1168            doc_comment: node.doc_comment.clone(),
1169            description: node.description.clone(),
1170            source_code,
1171            hop_distance: hop,
1172            modified_at: None,
1173            connecting_relation: relation,
1174            token_estimate: 0, // computed in scoring
1175        };
1176
1177        // GOAL-4.8: Apply --include filters.
1178        if passes_filters(&candidate, filters) {
1179            results.push(candidate);
1180        } else {
1181            filtered_count += 1;
1182        }
1183
1184        // Expand forward for next hop.
1185        if hop < max_depth {
1186            for edge in &graph.edges {
1187                if edge.from == current_id && !visited.contains(&edge.to) {
1188                    visited.insert(edge.to.clone());
1189                    queue.push_back((edge.to.clone(), hop + 1, edge.relation.clone()));
1190                }
1191            }
1192        }
1193    }
1194
1195    (results, filtered_count)
1196}
1197
1198/// Stage 3: Reverse-edge traversal for callers and tests. **[GOAL-4.1e, 4.1f]**
1199fn gather_callers_and_tests(
1200    graph: &Graph,
1201    target_ids: &[String],
1202    project_root: Option<&Path>,
1203) -> (Vec<Candidate>, Vec<Candidate>) {
1204    let mut callers = Vec::new();
1205    let mut tests = Vec::new();
1206    let target_set: HashSet<&str> = target_ids.iter().map(|s| s.as_str()).collect();
1207    let mut seen: HashSet<String> = HashSet::new();
1208
1209    for target_id in target_ids {
1210        // Find edges where to == target_id (incoming edges).
1211        for edge in &graph.edges {
1212            if edge.to != *target_id { continue; }
1213            if target_set.contains(edge.from.as_str()) { continue; }
1214            if seen.contains(&edge.from) { continue; }
1215            seen.insert(edge.from.clone());
1216
1217            let node = match graph.get_node(&edge.from) {
1218                Some(n) => n,
1219                None => continue,
1220            };
1221
1222            let source_code = if let Some(root) = project_root {
1223                load_source_from_disk(
1224                    node.file_path.as_deref(),
1225                    node.start_line,
1226                    node.end_line,
1227                    root,
1228                ).map(|r| r.source)
1229            } else {
1230                None
1231            };
1232
1233            let candidate = Candidate {
1234                node_id: node.id.clone(),
1235                node_type: node.node_type.clone().unwrap_or_default(),
1236                file_path: node.file_path.clone(),
1237                signature: node.signature.clone(),
1238                doc_comment: node.doc_comment.clone(),
1239                description: node.description.clone(),
1240                source_code,
1241                hop_distance: 1,
1242                modified_at: None,
1243                connecting_relation: edge.relation.clone(),
1244                token_estimate: 0,
1245            };
1246
1247            // Categorize: tests_for → test, everything else → caller.
1248            match edge.relation.as_str() {
1249                "tests_for" => tests.push(candidate),
1250                _ => callers.push(candidate),
1251            }
1252        }
1253    }
1254
1255    (callers, tests)
1256}
1257
1258/// GOAL-4.8: Filter by --include patterns.
1259///
1260/// If `include_patterns` is empty, all candidates pass.
1261/// Patterns prefixed with "type:" match node_type (e.g., "type:function").
1262/// Other patterns match file_path as a glob.
1263fn passes_filters(candidate: &Candidate, filters: &ContextFilters) -> bool {
1264    // Check exclude_ids
1265    if filters.exclude_ids.iter().any(|ex| candidate.node_id == *ex) {
1266        return false;
1267    }
1268
1269    // Check modified_after
1270    if let Some(threshold) = filters.modified_after {
1271        if let Some(modified) = candidate.modified_at {
1272            if modified < threshold { return false; }
1273        }
1274        // If no modified_at on node, keep it (don't filter on missing data).
1275    }
1276
1277    // Check include_patterns (any-match semantics).
1278    if filters.include_patterns.is_empty() { return true; }
1279
1280    for pattern in &filters.include_patterns {
1281        if let Some(type_filter) = pattern.strip_prefix("type:") {
1282            // Match by node_type.
1283            if candidate.node_type == type_filter { return true; }
1284        } else {
1285            // Match by file path glob.
1286            if let Some(ref path) = candidate.file_path {
1287                if simple_glob_match(pattern, path) { return true; }
1288            }
1289        }
1290    }
1291
1292    false // No pattern matched.
1293}
1294
1295/// Simple glob matching for --include patterns.
1296///
1297/// Supports:
1298/// - `*` matches any sequence of non-`/` characters
1299/// - `**` matches any sequence including `/`
1300/// - `?` matches a single character
1301/// - Literal characters match exactly
1302fn simple_glob_match(pattern: &str, text: &str) -> bool {
1303    // Handle ** (matches everything including path separators).
1304    if pattern == "**" { return true; }
1305
1306    // Split pattern on `**` segments for recursive matching.
1307    if pattern.contains("**") {
1308        let parts: Vec<&str> = pattern.split("**").collect();
1309        if parts.len() == 2 {
1310            let prefix = parts[0];
1311            let suffix = parts[1];
1312            // Prefix must match the start.
1313            if !prefix.is_empty() {
1314                let prefix_trimmed = prefix.trim_end_matches('/');
1315                if !text.starts_with(prefix_trimmed) { return false; }
1316            }
1317            // Suffix must match the end (with glob).
1318            if !suffix.is_empty() {
1319                let suffix_trimmed = suffix.trim_start_matches('/');
1320                // Check if any tail of text matches the suffix pattern.
1321                for (i, _) in text.char_indices() {
1322                    if simple_glob_segment(suffix_trimmed, &text[i..]) { return true; }
1323                }
1324                return simple_glob_segment(suffix_trimmed, "");
1325            }
1326            return true;
1327        }
1328    }
1329
1330    simple_glob_segment(pattern, text)
1331}
1332
1333/// Match a glob pattern segment (without **) against text.
1334fn simple_glob_segment(pattern: &str, text: &str) -> bool {
1335    let mut pi = pattern.chars().peekable();
1336    let mut ti = text.chars().peekable();
1337
1338    while pi.peek().is_some() || ti.peek().is_some() {
1339        match pi.peek() {
1340            Some('*') => {
1341                pi.next();
1342                // * matches zero or more non-/ chars.
1343                if pi.peek().is_none() {
1344                    // Trailing * — match rest if no slashes.
1345                    return !ti.any(|c| c == '/');
1346                }
1347                // Try matching the rest of the pattern after *.
1348                let remaining_pattern: String = pi.clone().collect();
1349                let remaining_text: String = ti.clone().collect();
1350                for i in 0..=remaining_text.len() {
1351                    if remaining_text.is_char_boundary(i) {
1352                        let slice = &remaining_text[i..];
1353                        // * doesn't match across /.
1354                        if i > 0 && remaining_text.as_bytes()[i - 1] == b'/' { break; }
1355                        if simple_glob_segment(&remaining_pattern, slice) { return true; }
1356                    }
1357                }
1358                return false;
1359            }
1360            Some('?') => {
1361                pi.next();
1362                match ti.next() {
1363                    Some(c) if c != '/' => {}
1364                    _ => return false,
1365                }
1366            }
1367            Some(&pc) => {
1368                pi.next();
1369                match ti.next() {
1370                    Some(tc) if tc == pc => {}
1371                    _ => return false,
1372                }
1373            }
1374            None => {
1375                return ti.peek().is_none();
1376            }
1377        }
1378    }
1379
1380    true
1381}
1382
1383// =============================================================================
1384// §10 Output Formatting (GOAL-4.9)
1385// =============================================================================
1386
1387/// Format the assembled context as a string in the requested format. **[GOAL-4.9]**
1388pub fn format_context(ctx: &AssembledContext, format: OutputFormat) -> String {
1389    match format {
1390        OutputFormat::Json => {
1391            serde_json::to_string_pretty(ctx).unwrap_or_else(|e| format!("{{\"error\": \"{}\"}}", e))
1392        }
1393        OutputFormat::Yaml => {
1394            serde_yaml::to_string(ctx).unwrap_or_else(|e| format!("error: {}", e))
1395        }
1396        OutputFormat::Markdown => format_context_markdown(ctx),
1397    }
1398}
1399
1400/// Render context as human-readable markdown. **[GOAL-4.9]**
1401fn format_context_markdown(ctx: &AssembledContext) -> String {
1402    let mut out = String::new();
1403
1404    // Header with stats.
1405    out.push_str(&format!(
1406        "# Context Assembly\n\n\
1407         **Tokens**: {}/{} | **Nodes**: {} visited, {} included, {} filtered\n\
1408         **Elapsed**: {}ms\n\n",
1409        ctx.stats.budget_used, ctx.stats.budget_total,
1410        ctx.stats.nodes_visited, ctx.stats.nodes_included,
1411        ctx.stats.nodes_excluded_by_filter,
1412        ctx.stats.elapsed_ms,
1413    ));
1414
1415    // Targets.
1416    if !ctx.result.targets.is_empty() {
1417        out.push_str("## Targets\n\n");
1418        for target in &ctx.result.targets {
1419            out.push_str(&format!("### `{}`", target.node_id));
1420            if let Some(ref title) = target.title {
1421                out.push_str(&format!(" — {}", title));
1422            }
1423            out.push('\n');
1424            if let Some(ref fp) = target.file_path {
1425                out.push_str(&format!("**File**: `{}`\n", fp));
1426            }
1427            if let Some(ref sig) = target.signature {
1428                out.push_str(&format!("**Signature**: `{}`\n", sig));
1429            }
1430            if let Some(ref dc) = target.doc_comment {
1431                out.push_str(&format!("**Doc**: {}\n", dc.lines().next().unwrap_or("")));
1432            }
1433            if let Some(ref src) = target.source_code {
1434                let preview: String = src.lines().take(20).collect::<Vec<_>>().join("\n");
1435                out.push_str(&format!("\n```\n{}\n```\n", preview));
1436            }
1437            out.push_str(&format!("*~{} tokens*\n\n", target.token_estimate));
1438        }
1439    }
1440
1441    // Dependencies.
1442    if !ctx.result.dependencies.is_empty() {
1443        out.push_str("## Dependencies\n\n");
1444        for item in &ctx.result.dependencies {
1445            format_context_item(&mut out, item);
1446        }
1447    }
1448
1449    // Callers.
1450    if !ctx.result.callers.is_empty() {
1451        out.push_str("## Callers\n\n");
1452        for item in &ctx.result.callers {
1453            format_context_item(&mut out, item);
1454        }
1455    }
1456
1457    // Tests.
1458    if !ctx.result.tests.is_empty() {
1459        out.push_str("## Tests\n\n");
1460        for item in &ctx.result.tests {
1461            format_context_item(&mut out, item);
1462        }
1463    }
1464
1465    // Truncation summary.
1466    let trunc = &ctx.result.truncation_info;
1467    if trunc.truncated_count > 0 || trunc.dropped_count > 0 {
1468        out.push_str(&format!(
1469            "---\n*Truncation: {} items truncated, {} items dropped*\n",
1470            trunc.truncated_count, trunc.dropped_count,
1471        ));
1472    }
1473
1474    out
1475}
1476
1477/// Format a single ContextItem for markdown output.
1478fn format_context_item(out: &mut String, item: &ContextItem) {
1479    out.push_str(&format!("- **`{}`**", item.node_id));
1480    if let Some(ref fp) = item.file_path {
1481        out.push_str(&format!(" (`{}`)", fp));
1482    }
1483    out.push_str(&format!(
1484        " — {} | score: {:.2}{}",
1485        item.connecting_relation,
1486        item.score,
1487        if item.truncated { " ⚠️truncated" } else { "" },
1488    ));
1489    out.push('\n');
1490    if let Some(ref sig) = item.signature {
1491        out.push_str(&format!("  Sig: `{}`\n", sig));
1492    }
1493    if let Some(ref content) = item.content {
1494        let preview: String = content.lines().take(5).collect::<Vec<_>>().join("\n  ");
1495        if !preview.is_empty() {
1496            out.push_str(&format!("  {}\n", preview));
1497        }
1498    }
1499}
1500
1501#[cfg(test)]
1502mod tests {
1503    use super::*;
1504    use crate::graph::{Node, Edge, NodeStatus};
1505    use tempfile::TempDir;
1506    use std::fs;
1507
1508    fn make_task(id: &str, title: &str) -> Node {
1509        let mut n = Node::new(id, title);
1510        n.node_type = Some("task".to_string());
1511        n
1512    }
1513
1514    fn make_feature(id: &str, title: &str, design_doc: &str) -> Node {
1515        let mut n = Node::new(id, title);
1516        n.node_type = Some("feature".to_string());
1517        n.metadata.insert("design_doc".to_string(), serde_json::json!(design_doc));
1518        n
1519    }
1520
1521    fn setup_gid_dir() -> TempDir {
1522        let tmp = TempDir::new().unwrap();
1523        fs::write(tmp.path().join("design.md"), "# 1 Overview\nFallback design.\n").unwrap();
1524        fs::write(tmp.path().join("requirements.md"), "- GOAL-1: Basic requirement\n").unwrap();
1525        tmp
1526    }
1527
1528    fn setup_feature_docs(gid_root: &Path, feature_name: &str) {
1529        let feature_dir = gid_root.join("features").join(feature_name);
1530        fs::create_dir_all(&feature_dir).unwrap();
1531        fs::write(feature_dir.join("design.md"), concat!(
1532            "# Design\n\n",
1533            "## 3 Components\n\n",
1534            "### 3.1 Topology Analyzer\n\n",
1535            "Validates graph structure and computes layers.\n\n",
1536            "### 3.2 Execution Planner\n\n",
1537            "Generates ExecutionPlan from topology.\n",
1538            "Key interface: `create_plan(graph) -> ExecutionPlan`\n\n",
1539            "### 3.3 Context Assembler\n\n",
1540            "Builds task context from graph metadata.\n\n",
1541            "## 4 Data Models\n\n",
1542            "Data model definitions.\n",
1543        )).unwrap();
1544
1545        fs::write(feature_dir.join("requirements.md"), concat!(
1546            "# Requirements\n\n",
1547            "- GOAL-1.1: Detect cycles in dependency graph\n",
1548            "- GOAL-1.2: Compute parallelizable layers\n",
1549            "- GOAL-1.3: Find critical path\n",
1550            "- GOAL-2.1: Generate execution plan from graph\n",
1551            "- GOAL-2.2: Support parallel task execution\n",
1552        )).unwrap();
1553    }
1554
1555    #[test]
1556    fn test_feature_doc_resolution() {
1557        let gid_root = setup_gid_dir();
1558        setup_feature_docs(gid_root.path(), "task-harness");
1559
1560        let mut graph = Graph::new();
1561        let mut task = make_task("topo", "Implement topology analyzer");
1562        task.metadata.insert("design_ref".to_string(), serde_json::json!("3.1"));
1563        task.metadata.insert("satisfies".to_string(), serde_json::json!(["GOAL-1.1", "GOAL-1.2"]));
1564        graph.add_node(task);
1565        graph.add_node(make_feature("harness-feature", "Task Harness", "task-harness"));
1566        graph.add_edge(Edge::new("topo", "harness-feature", "implements"));
1567
1568        let ctx = assemble_task_context(&graph, "topo", gid_root.path()).unwrap();
1569
1570        assert!(ctx.design_excerpt.is_some());
1571        let excerpt = ctx.design_excerpt.unwrap();
1572        assert!(excerpt.contains("Topology Analyzer"), "excerpt: {}", excerpt);
1573        assert!(excerpt.contains("Validates graph structure"));
1574        assert!(!excerpt.contains("Execution Planner"), "excerpt leaked into next section");
1575
1576        assert_eq!(ctx.goals_text.len(), 2);
1577        assert!(ctx.goals_text[0].contains("GOAL-1.1"));
1578        assert!(ctx.goals_text[1].contains("GOAL-1.2"));
1579    }
1580
1581    #[test]
1582    fn test_design_ref_captures_subsections() {
1583        let content = concat!(
1584            "## 3 Components\n\n",
1585            "### 3.1 First\n\n",
1586            "Content of 3.1.\n\n",
1587            "### 3.2 Second\n\n",
1588            "Content of 3.2.\n\n",
1589            "## 4 Other\n",
1590        );
1591        let section = extract_design_section(content, "3").unwrap();
1592        assert!(section.contains("Components"));
1593        assert!(section.contains("3.1 First"));
1594        assert!(section.contains("3.2 Second"));
1595        assert!(!section.contains("4 Other"));
1596    }
1597
1598    #[test]
1599    fn test_design_ref_missing_section() {
1600        let content = "# 1 Overview\nSome content.\n## 2 Architecture\nMore content.";
1601        assert!(extract_design_section(content, "5.3").is_none());
1602    }
1603
1604    #[test]
1605    fn test_fallback_to_root_docs() {
1606        let gid_root = setup_gid_dir();
1607
1608        let mut graph = Graph::new();
1609        let mut task = make_task("standalone", "Standalone task");
1610        task.metadata.insert("design_ref".to_string(), serde_json::json!("1"));
1611        task.metadata.insert("satisfies".to_string(), serde_json::json!(["GOAL-1"]));
1612        graph.add_node(task);
1613
1614        let ctx = assemble_task_context(&graph, "standalone", gid_root.path()).unwrap();
1615        assert!(ctx.design_excerpt.is_some());
1616        assert!(ctx.design_excerpt.unwrap().contains("Fallback design"));
1617        assert_eq!(ctx.goals_text.len(), 1);
1618        assert!(ctx.goals_text[0].contains("GOAL-1"));
1619    }
1620
1621    #[test]
1622    fn test_guards_injection() {
1623        let gid_root = setup_gid_dir();
1624
1625        let mut graph = Graph::new();
1626        let mut root = Node::new("project-root", "Project");
1627        root.node_type = Some("root".to_string());
1628        root.metadata.insert("guards".to_string(), serde_json::json!([
1629            "GUARD-1: All file writes are atomic",
1630            "GUARD-2: Auth tokens never logged"
1631        ]));
1632        graph.add_node(root);
1633        graph.add_node(make_task("task-a", "Task A"));
1634
1635        let ctx = assemble_task_context(&graph, "task-a", gid_root.path()).unwrap();
1636        assert_eq!(ctx.guards.len(), 2);
1637        assert!(ctx.guards[0].contains("GUARD-1"));
1638        assert!(ctx.guards[1].contains("GUARD-2"));
1639    }
1640
1641    #[test]
1642    fn test_dependency_interfaces() {
1643        let gid_root = setup_gid_dir();
1644
1645        let mut graph = Graph::new();
1646        let mut dep = make_task("dep-task", "Dependency Task");
1647        dep.description = Some("Provides auth module with login() interface".to_string());
1648        dep.status = NodeStatus::Done;
1649        graph.add_node(dep);
1650        graph.add_node(make_task("main-task", "Main Task"));
1651        graph.add_edge(Edge::depends_on("main-task", "dep-task"));
1652
1653        let ctx = assemble_task_context(&graph, "main-task", gid_root.path()).unwrap();
1654        assert_eq!(ctx.dependency_interfaces.len(), 1);
1655        assert!(ctx.dependency_interfaces[0].contains("Dependency Task"));
1656        assert!(ctx.dependency_interfaces[0].contains("auth module"));
1657    }
1658
1659    #[test]
1660    fn test_missing_task_node() {
1661        let gid_root = setup_gid_dir();
1662        let graph = Graph::new();
1663        let result = assemble_task_context(&graph, "nonexistent", gid_root.path());
1664        assert!(result.is_err());
1665        assert!(result.unwrap_err().to_string().contains("not found"));
1666    }
1667
1668    #[test]
1669    fn test_missing_feature_docs_graceful() {
1670        let gid_root = setup_gid_dir();
1671
1672        let mut graph = Graph::new();
1673        let mut task = make_task("task-x", "Task X");
1674        task.metadata.insert("design_ref".to_string(), serde_json::json!("3.1"));
1675        task.metadata.insert("satisfies".to_string(), serde_json::json!(["GOAL-99"]));
1676        graph.add_node(task);
1677        graph.add_node(make_feature("feat", "Feature", "nonexistent-feature"));
1678        graph.add_edge(Edge::new("task-x", "feat", "implements"));
1679
1680        let ctx = assemble_task_context(&graph, "task-x", gid_root.path()).unwrap();
1681        assert!(ctx.design_excerpt.is_none());
1682        assert!(ctx.goals_text.is_empty());
1683    }
1684
1685    #[test]
1686    fn test_context_deterministic() {
1687        let gid_root = setup_gid_dir();
1688        setup_feature_docs(gid_root.path(), "test-feature");
1689
1690        let mut graph = Graph::new();
1691        let mut task = make_task("det-task", "Deterministic");
1692        task.metadata.insert("design_ref".to_string(), serde_json::json!("3.2"));
1693        task.metadata.insert("satisfies".to_string(), serde_json::json!(["GOAL-2.1"]));
1694        graph.add_node(task);
1695        graph.add_node(make_feature("feat", "Feature", "test-feature"));
1696        graph.add_edge(Edge::new("det-task", "feat", "implements"));
1697
1698        let ctx1 = assemble_task_context(&graph, "det-task", gid_root.path()).unwrap();
1699        let ctx2 = assemble_task_context(&graph, "det-task", gid_root.path()).unwrap();
1700
1701        assert_eq!(
1702            serde_json::to_string(&ctx1).unwrap(),
1703            serde_json::to_string(&ctx2).unwrap(),
1704            "assemble_task_context must be deterministic (GUARD-2)"
1705        );
1706    }
1707
1708    #[test]
1709    fn test_heading_parser() {
1710        assert_eq!(parse_heading("## 3.2 Title"), Some((2, "3.2 Title")));
1711        assert_eq!(parse_heading("### 3.2.1 Sub"), Some((3, "3.2.1 Sub")));
1712        assert_eq!(parse_heading("# Top"), Some((1, "Top")));
1713        assert_eq!(parse_heading("Not a heading"), None);
1714        assert_eq!(parse_heading("#NoSpace"), None);
1715    }
1716
1717    #[test]
1718    fn test_heading_ref_matching() {
1719        assert!(heading_starts_with_ref("3.2 Execution Planner", "3.2"));
1720        assert!(heading_starts_with_ref("3.2. Execution Planner", "3.2"));
1721        assert!(heading_starts_with_ref("3 Components", "3"));
1722        assert!(!heading_starts_with_ref("3.2 Execution Planner", "3.20"));
1723        assert!(!heading_starts_with_ref("13 Something", "3"));
1724    }
1725
1726    // =========================================================================
1727    // §5 Relevance Scoring Tests — GOAL-4.4 5-Tier Ranking Verification
1728    // =========================================================================
1729
1730    /// Helper: create a minimal candidate with given relation and hop distance.
1731    fn make_candidate(relation: &str, hop: u32) -> Candidate {
1732        Candidate {
1733            node_id: format!("node-{}-{}", relation, hop),
1734            node_type: "function".to_string(),
1735            file_path: None,
1736            signature: None,
1737            doc_comment: None,
1738            description: None,
1739            source_code: None,
1740            hop_distance: hop,
1741            modified_at: None,
1742            connecting_relation: relation.to_string(),
1743            token_estimate: 0,
1744        }
1745    }
1746
1747    /// Helper: create a candidate with source/signature content for token estimation.
1748    fn make_candidate_with_content(relation: &str, hop: u32, source: &str, sig: &str) -> Candidate {
1749        Candidate {
1750            node_id: format!("node-{}-{}", relation, hop),
1751            node_type: "function".to_string(),
1752            file_path: Some("/src/lib.rs".to_string()),
1753            signature: Some(sig.to_string()),
1754            doc_comment: Some("/// A function".to_string()),
1755            description: Some("Does stuff".to_string()),
1756            source_code: Some(source.to_string()),
1757            hop_distance: hop,
1758            modified_at: None,
1759            connecting_relation: relation.to_string(),
1760            token_estimate: 0,
1761        }
1762    }
1763
1764    // --- Tier 1: Direct Call (calls, imports) → rank 1, score 1.0 ---
1765
1766    #[test]
1767    fn test_rank_tier1_calls() {
1768        assert_eq!(relation_rank("calls"), 1);
1769        assert_eq!(relation_score("calls"), 1.0);
1770    }
1771
1772    #[test]
1773    fn test_rank_tier1_imports() {
1774        assert_eq!(relation_rank("imports"), 1);
1775        assert_eq!(relation_score("imports"), 1.0);
1776    }
1777
1778    // --- Tier 2: Type Reference (type_reference, inherits, implements, uses) → rank 2, score 0.8 ---
1779
1780    #[test]
1781    fn test_rank_tier2_type_reference() {
1782        assert_eq!(relation_rank("type_reference"), 2);
1783        assert_eq!(relation_score("type_reference"), 0.8);
1784    }
1785
1786    #[test]
1787    fn test_rank_tier2_inherits() {
1788        assert_eq!(relation_rank("inherits"), 2);
1789        assert_eq!(relation_score("inherits"), 0.8);
1790    }
1791
1792    #[test]
1793    fn test_rank_tier2_implements() {
1794        assert_eq!(relation_rank("implements"), 2);
1795        assert_eq!(relation_score("implements"), 0.8);
1796    }
1797
1798    #[test]
1799    fn test_rank_tier2_uses() {
1800        assert_eq!(relation_rank("uses"), 2);
1801        assert_eq!(relation_score("uses"), 0.8);
1802    }
1803
1804    // --- Tier 3: Same-file (contains, defined_in) → rank 3, score 0.6 ---
1805
1806    #[test]
1807    fn test_rank_tier3_contains() {
1808        assert_eq!(relation_rank("contains"), 3);
1809        assert_eq!(relation_score("contains"), 0.6);
1810    }
1811
1812    #[test]
1813    fn test_rank_tier3_defined_in() {
1814        assert_eq!(relation_rank("defined_in"), 3);
1815        assert_eq!(relation_score("defined_in"), 0.6);
1816    }
1817
1818    // --- Tier 4: Structural (depends_on, part_of, blocks, tests_for) → rank 4, score 0.4 ---
1819
1820    #[test]
1821    fn test_rank_tier4_depends_on() {
1822        assert_eq!(relation_rank("depends_on"), 4);
1823        assert_eq!(relation_score("depends_on"), 0.4);
1824    }
1825
1826    #[test]
1827    fn test_rank_tier4_part_of() {
1828        assert_eq!(relation_rank("part_of"), 4);
1829        assert_eq!(relation_score("part_of"), 0.4);
1830    }
1831
1832    #[test]
1833    fn test_rank_tier4_blocks() {
1834        assert_eq!(relation_rank("blocks"), 4);
1835        assert_eq!(relation_score("blocks"), 0.4);
1836    }
1837
1838    #[test]
1839    fn test_rank_tier4_tests_for() {
1840        assert_eq!(relation_rank("tests_for"), 4);
1841        assert_eq!(relation_score("tests_for"), 0.4);
1842    }
1843
1844    // --- Tier 5: Transitive / Unknown → rank 5, score 0.2 ---
1845
1846    #[test]
1847    fn test_rank_tier5_unknown_relations() {
1848        // Any unrecognized relation falls to tier 5
1849        for rel in &["relates_to", "references", "mentions", "foobar", "", "CALLS", "Imports"] {
1850            assert_eq!(relation_rank(rel), 5,
1851                "Expected tier 5 for unknown relation '{}'", rel);
1852            assert_eq!(relation_score(rel), 0.2,
1853                "Expected score 0.2 for unknown relation '{}'", rel);
1854        }
1855    }
1856
1857    // --- Score monotonicity: higher-tier relations → higher scores ---
1858
1859    #[test]
1860    fn test_scores_monotonically_decreasing_by_tier() {
1861        let tier1 = relation_score("calls");
1862        let tier2 = relation_score("type_reference");
1863        let tier3 = relation_score("contains");
1864        let tier4 = relation_score("depends_on");
1865        let tier5 = relation_score("unknown");
1866
1867        assert!(tier1 > tier2, "Tier 1 ({}) must be > Tier 2 ({})", tier1, tier2);
1868        assert!(tier2 > tier3, "Tier 2 ({}) must be > Tier 3 ({})", tier2, tier3);
1869        assert!(tier3 > tier4, "Tier 3 ({}) must be > Tier 4 ({})", tier3, tier4);
1870        assert!(tier4 > tier5, "Tier 4 ({}) must be > Tier 5 ({})", tier4, tier5);
1871        assert!(tier5 > 0.0, "Tier 5 ({}) must be > 0", tier5);
1872    }
1873
1874    #[test]
1875    fn test_all_scores_in_valid_range() {
1876        let all_relations = [
1877            "calls", "imports",
1878            "type_reference", "inherits", "implements", "uses",
1879            "contains", "defined_in",
1880            "depends_on", "part_of", "blocks", "tests_for",
1881            "unknown", "foobar",
1882        ];
1883        for rel in &all_relations {
1884            let s = relation_score(rel);
1885            assert!(s > 0.0 && s <= 1.0,
1886                "Score for '{}' is {} — must be in (0.0, 1.0]", rel, s);
1887        }
1888    }
1889
1890    // --- Composite scoring tests ---
1891
1892    #[test]
1893    fn test_score_candidate_hop1_calls() {
1894        let c = make_candidate("calls", 1);
1895        let scored = score_candidate(&c);
1896
1897        // hop=1 → no transitive penalty
1898        // score = (0.60 * 1.0 + 0.30 * 1.0 + 0.10 * 1.0) * 1.0 = 1.0
1899        assert!((scored.score - 1.0).abs() < 1e-10,
1900            "calls at hop 1 should score 1.0, got {}", scored.score);
1901    }
1902
1903    #[test]
1904    fn test_score_candidate_hop1_depends_on() {
1905        let c = make_candidate("depends_on", 1);
1906        let scored = score_candidate(&c);
1907
1908        // hop=1 → no transitive penalty
1909        // score = (0.60 * 0.4 + 0.30 * 1.0 + 0.10 * 1.0) * 1.0 = 0.24 + 0.30 + 0.10 = 0.64
1910        assert!((scored.score - 0.64).abs() < 1e-10,
1911            "depends_on at hop 1 should score 0.64, got {}", scored.score);
1912    }
1913
1914    #[test]
1915    fn test_score_candidate_hop2_transitive_penalty() {
1916        let c = make_candidate("calls", 2);
1917        let scored = score_candidate(&c);
1918
1919        // hop=2 → proximity = 0.5, transitive_penalty = 0.8
1920        // score = (0.60 * 1.0 + 0.30 * 0.5 + 0.10 * 1.0) * 0.8
1921        //       = (0.60 + 0.15 + 0.10) * 0.8 = 0.85 * 0.8 = 0.68
1922        assert!((scored.score - 0.68).abs() < 1e-10,
1923            "calls at hop 2 should score 0.68, got {}", scored.score);
1924    }
1925
1926    #[test]
1927    fn test_score_candidate_hop3_high_penalty() {
1928        let c = make_candidate("unknown", 3);
1929        let scored = score_candidate(&c);
1930
1931        // hop=3 → proximity = 1/3, transitive_penalty = 0.8
1932        // score = (0.60 * 0.2 + 0.30 * (1/3) + 0.10 * 1.0) * 0.8
1933        //       = (0.12 + 0.10 + 0.10) * 0.8 = 0.32 * 0.8 = 0.256
1934        assert!((scored.score - 0.256).abs() < 1e-10,
1935            "unknown at hop 3 should score 0.256, got {}", scored.score);
1936    }
1937
1938    #[test]
1939    fn test_calls_hop1_beats_type_ref_hop1() {
1940        let calls = score_candidate(&make_candidate("calls", 1));
1941        let type_ref = score_candidate(&make_candidate("type_reference", 1));
1942
1943        assert!(calls.score > type_ref.score,
1944            "calls ({}) at hop 1 must beat type_reference ({}) at hop 1",
1945            calls.score, type_ref.score);
1946    }
1947
1948    #[test]
1949    fn test_calls_hop2_vs_type_ref_hop1() {
1950        // calls at hop 2 (penalized) should still be meaningfully scored
1951        let calls_h2 = score_candidate(&make_candidate("calls", 2));
1952        let type_ref_h1 = score_candidate(&make_candidate("type_reference", 1));
1953
1954        // calls@hop2 = 0.68, type_ref@hop1 = (0.60*0.8 + 0.30*1.0 + 0.10*1.0) = 0.88
1955        // So type_ref at hop 1 beats calls at hop 2 — proximity matters
1956        assert!(type_ref_h1.score > calls_h2.score,
1957            "type_ref at hop 1 ({}) should beat calls at hop 2 ({}) because proximity matters",
1958            type_ref_h1.score, calls_h2.score);
1959    }
1960
1961    #[test]
1962    fn test_same_relation_closer_hop_wins() {
1963        let hop1 = score_candidate(&make_candidate("imports", 1));
1964        let hop2 = score_candidate(&make_candidate("imports", 2));
1965        let hop3 = score_candidate(&make_candidate("imports", 3));
1966
1967        assert!(hop1.score > hop2.score, "hop1 ({}) > hop2 ({})", hop1.score, hop2.score);
1968        assert!(hop2.score > hop3.score, "hop2 ({}) > hop3 ({})", hop2.score, hop3.score);
1969    }
1970
1971    #[test]
1972    fn test_hop0_proximity_no_division_by_zero() {
1973        // hop_distance = 0 should not panic or produce NaN/Infinity
1974        let c = make_candidate("calls", 0);
1975        let scored = score_candidate(&c);
1976        assert!(scored.score.is_finite(), "hop 0 must not produce NaN/Infinity");
1977        assert!(scored.score > 0.0, "hop 0 must produce positive score");
1978    }
1979
1980    #[test]
1981    fn test_nan_guard() {
1982        // Create a candidate where hop = 0 (which we handle) 
1983        // The NaN guard should catch any edge case
1984        let c = make_candidate("calls", 0);
1985        let scored = score_candidate(&c);
1986        assert!(!scored.score.is_nan(), "Score must never be NaN");
1987        assert!(scored.score.is_finite(), "Score must be finite");
1988    }
1989
1990    // --- score_candidates: batch scoring and sorting ---
1991
1992    #[test]
1993    fn test_score_candidates_sorted_descending() {
1994        let candidates = vec![
1995            make_candidate("unknown", 3),     // lowest score
1996            make_candidate("calls", 1),       // highest score
1997            make_candidate("depends_on", 2),  // mid-low
1998            make_candidate("contains", 1),    // mid
1999        ];
2000
2001        let scored = score_candidates(&candidates);
2002        for i in 1..scored.len() {
2003            assert!(scored[i-1].score >= scored[i].score,
2004                "Candidates not sorted descending: index {} ({}) < index {} ({})",
2005                i-1, scored[i-1].score, i, scored[i].score);
2006        }
2007
2008        // First should be calls@hop1 (highest)
2009        assert_eq!(scored[0].candidate.connecting_relation, "calls");
2010        // Last should be unknown@hop3 (lowest)
2011        assert_eq!(scored.last().unwrap().candidate.connecting_relation, "unknown");
2012    }
2013
2014    #[test]
2015    fn test_score_candidates_empty_input() {
2016        let scored = score_candidates(&[]);
2017        assert!(scored.is_empty());
2018    }
2019
2020    #[test]
2021    fn test_score_candidates_single_item() {
2022        let scored = score_candidates(&[make_candidate("imports", 1)]);
2023        assert_eq!(scored.len(), 1);
2024        assert_eq!(scored[0].candidate.connecting_relation, "imports");
2025    }
2026
2027    #[test]
2028    fn test_score_candidates_preserves_all() {
2029        let candidates = vec![
2030            make_candidate("calls", 1),
2031            make_candidate("imports", 1),
2032            make_candidate("type_reference", 2),
2033            make_candidate("contains", 1),
2034            make_candidate("depends_on", 3),
2035        ];
2036        let scored = score_candidates(&candidates);
2037        assert_eq!(scored.len(), 5, "All candidates must be preserved after scoring");
2038    }
2039
2040    // --- Token estimation ---
2041
2042    #[test]
2043    fn test_token_estimation_empty_candidate() {
2044        let c = make_candidate("calls", 1);
2045        let tokens = estimate_tokens_for_candidate(&c);
2046        // No content → only overhead (30 bytes) → 30/4 = 7, but max(1) → 7
2047        assert_eq!(tokens, 7, "Empty candidate with 30B overhead → 7 tokens");
2048    }
2049
2050    #[test]
2051    fn test_token_estimation_with_content() {
2052        let source = "fn main() { println!(\"hello\"); }";
2053        let sig = "fn main()";
2054        let desc = "Does stuff";
2055        let doc = "/// A function";
2056        let c = make_candidate_with_content("calls", 1, source, sig);
2057        let tokens = estimate_tokens_for_candidate(&c);
2058        // source + signature + description + doc_comment + overhead(30), all / 4
2059        let expected_bytes = source.len() + sig.len() + desc.len() + doc.len() + 30;
2060        let expected_tokens = (expected_bytes / 4).max(1);
2061        assert_eq!(tokens, expected_tokens,
2062            "bytes: source={} + sig={} + desc={} + doc={} + overhead=30 = {}, /4 = {}",
2063            source.len(), sig.len(), desc.len(), doc.len(), expected_bytes, expected_tokens);
2064    }
2065
2066    #[test]
2067    fn test_estimate_tokens_str_empty() {
2068        assert_eq!(estimate_tokens_str(""), 0);
2069    }
2070
2071    #[test]
2072    fn test_estimate_tokens_str_short() {
2073        assert_eq!(estimate_tokens_str("ab"), 1); // 2/4 = 0 → max(1) = 1
2074    }
2075
2076    #[test]
2077    fn test_estimate_tokens_str_exact() {
2078        assert_eq!(estimate_tokens_str("abcd"), 1); // 4/4 = 1
2079        assert_eq!(estimate_tokens_str("abcdefgh"), 2); // 8/4 = 2
2080    }
2081
2082    // --- Design compliance: all relations from GOAL-4.4 mapped ---
2083
2084    #[test]
2085    fn test_goal_4_4_tier1_complete() {
2086        // GOAL-4.4 Tier 1: calls, imports
2087        let tier1_relations = ["calls", "imports"];
2088        for rel in &tier1_relations {
2089            assert_eq!(relation_rank(rel), 1,
2090                "GOAL-4.4 requires '{}' in Tier 1 (rank 1)", rel);
2091        }
2092    }
2093
2094    #[test]
2095    fn test_goal_4_4_tier2_complete() {
2096        // GOAL-4.4 Tier 2: type_reference, inherits, implements, uses
2097        let tier2_relations = ["type_reference", "inherits", "implements", "uses"];
2098        for rel in &tier2_relations {
2099            assert_eq!(relation_rank(rel), 2,
2100                "GOAL-4.4 requires '{}' in Tier 2 (rank 2)", rel);
2101        }
2102    }
2103
2104    #[test]
2105    fn test_goal_4_4_tier3_complete() {
2106        // GOAL-4.4 Tier 3: contains, defined_in
2107        let tier3_relations = ["contains", "defined_in"];
2108        for rel in &tier3_relations {
2109            assert_eq!(relation_rank(rel), 3,
2110                "GOAL-4.4 requires '{}' in Tier 3 (rank 3)", rel);
2111        }
2112    }
2113
2114    #[test]
2115    fn test_goal_4_4_tier4_complete() {
2116        // GOAL-4.4 Tier 4: depends_on, part_of, blocks, tests_for
2117        let tier4_relations = ["depends_on", "part_of", "blocks", "tests_for"];
2118        for rel in &tier4_relations {
2119            assert_eq!(relation_rank(rel), 4,
2120                "GOAL-4.4 requires '{}' in Tier 4 (rank 4)", rel);
2121        }
2122    }
2123
2124    #[test]
2125    fn test_goal_4_4_tier5_fallback() {
2126        // GOAL-4.4 Tier 5: anything not in tiers 1-4
2127        let unknown_relations = ["unknown", "relates_to", "belongs_to", "subtask_of", ""];
2128        for rel in &unknown_relations {
2129            assert_eq!(relation_rank(rel), 5,
2130                "GOAL-4.4 requires '{}' to fall to Tier 5 (rank 5)", rel);
2131        }
2132    }
2133
2134    // --- Case sensitivity (relations are case-sensitive) ---
2135
2136    #[test]
2137    fn test_relations_case_sensitive() {
2138        // "calls" is tier 1, but "Calls" or "CALLS" should fall to tier 5
2139        assert_eq!(relation_rank("Calls"), 5);
2140        assert_eq!(relation_rank("CALLS"), 5);
2141        assert_eq!(relation_rank("Imports"), 5);
2142        assert_eq!(relation_rank("IMPORTS"), 5);
2143        assert_eq!(relation_rank("Contains"), 5);
2144        assert_eq!(relation_rank("DEPENDS_ON"), 5);
2145    }
2146
2147    // --- Composite score: weight verification ---
2148
2149    #[test]
2150    fn test_scoring_weights_sum_to_one() {
2151        let sum = W_RELATION + W_PROXIMITY + W_WEIGHT;
2152        assert!((sum - 1.0).abs() < 1e-10,
2153            "Scoring weights should sum to 1.0 for normalized output, got {}", sum);
2154    }
2155
2156    #[test]
2157    fn test_relation_is_dominant_factor() {
2158        // W_RELATION (0.60) is the largest weight — relation tier should be the
2159        // primary differentiator, not hop distance alone
2160        assert!(W_RELATION > W_PROXIMITY,
2161            "W_RELATION ({}) must be > W_PROXIMITY ({})", W_RELATION, W_PROXIMITY);
2162        assert!(W_RELATION > W_WEIGHT,
2163            "W_RELATION ({}) must be > W_WEIGHT ({})", W_RELATION, W_WEIGHT);
2164    }
2165
2166    // --- Sorting stability: same-scored candidates maintain relative order ---
2167
2168    #[test]
2169    fn test_score_candidates_stable_ordering_same_tier_same_hop() {
2170        // Two tier-1 relations at same hop → same score → order preserved
2171        let candidates = vec![
2172            make_candidate("calls", 1),
2173            make_candidate("imports", 1),
2174        ];
2175        let scored = score_candidates(&candidates);
2176        assert_eq!(scored.len(), 2);
2177        // Both have identical scores
2178        assert!((scored[0].score - scored[1].score).abs() < 1e-10);
2179    }
2180
2181    // --- Real-world scenario: mixed tiers and hops ---
2182
2183    #[test]
2184    fn test_realistic_scoring_scenario() {
2185        // Simulating a real context assembly:
2186        // Target: fn parse_config()
2187        // - Neighbor via "calls" at hop 1: fn validate_config()  → highest
2188        // - Neighbor via "imports" at hop 1: mod config_types     → highest
2189        // - Neighbor via "type_reference" at hop 1: struct Config → high
2190        // - Neighbor via "defined_in" at hop 1: file config.rs    → medium
2191        // - Neighbor via "depends_on" at hop 1: task impl-config  → low
2192        // - Neighbor via "calls" at hop 2: fn read_file()         → penalized
2193        // - Neighbor via "unknown" at hop 3: some-node            → lowest
2194
2195        let candidates = vec![
2196            make_candidate("calls", 1),
2197            make_candidate("imports", 1),
2198            make_candidate("type_reference", 1),
2199            make_candidate("defined_in", 1),
2200            make_candidate("depends_on", 1),
2201            make_candidate("calls", 2),
2202            make_candidate("unknown", 3),
2203        ];
2204
2205        let scored = score_candidates(&candidates);
2206
2207        // Verify ordering: calls@1 ≥ imports@1 > type_ref@1 > defined_in@1 > calls@2 > depends_on@1 > unknown@3
2208        // (Note: calls@1 == imports@1 in score)
2209        assert_eq!(scored.len(), 7);
2210
2211        // calls@1 and imports@1 should be at the top (both score 1.0)
2212        let top_two_relations: Vec<&str> = scored[..2].iter()
2213            .map(|s| s.candidate.connecting_relation.as_str())
2214            .collect();
2215        assert!(top_two_relations.contains(&"calls") && top_two_relations.contains(&"imports"),
2216            "Top 2 should be calls and imports, got {:?}", top_two_relations);
2217
2218        // type_reference@1 should be 3rd
2219        assert_eq!(scored[2].candidate.connecting_relation, "type_reference");
2220
2221        // unknown@3 should be last
2222        assert_eq!(scored[6].candidate.connecting_relation, "unknown");
2223
2224        // Verify all scores are positive and in descending order
2225        for i in 1..scored.len() {
2226            assert!(scored[i-1].score >= scored[i].score,
2227                "Not descending at index {}: {} vs {}", i, scored[i-1].score, scored[i].score);
2228            assert!(scored[i].score > 0.0, "Score at index {} should be > 0", i);
2229        }
2230    }
2231
2232    // --- Edge case: very deep hops ---
2233
2234    #[test]
2235    fn test_deep_hop_still_positive_score() {
2236        for hop in [5, 10, 50, 100] {
2237            let c = make_candidate("calls", hop);
2238            let scored = score_candidate(&c);
2239            assert!(scored.score > 0.0,
2240                "Score at hop {} must be > 0, got {}", hop, scored.score);
2241            assert!(scored.score.is_finite(),
2242                "Score at hop {} must be finite, got {}", hop, scored.score);
2243        }
2244    }
2245
2246    #[test]
2247    fn test_score_decreases_with_hop_for_same_relation() {
2248        let hops: Vec<u32> = (1..=5).collect();
2249        let scores: Vec<f64> = hops.iter()
2250            .map(|&h| score_candidate(&make_candidate("calls", h)).score)
2251            .collect();
2252
2253        for i in 1..scores.len() {
2254            assert!(scores[i-1] > scores[i],
2255                "Score at hop {} ({}) should be > score at hop {} ({})",
2256                hops[i-1], scores[i-1], hops[i], scores[i]);
2257        }
2258    }
2259
2260    // =========================================================================
2261    // §6 Tests: Category-Based Truncation (GOAL-4.3)
2262    // =========================================================================
2263
2264    /// Helper: make a ScoredCandidate with known token estimate.
2265    fn make_scored(id: &str, relation: &str, hop: u32, tokens: usize) -> ScoredCandidate {
2266        let c = Candidate {
2267            node_id: id.to_string(),
2268            node_type: "function".to_string(),
2269            file_path: Some(format!("/src/{}.rs", id)),
2270            signature: Some(format!("fn {}()", id)),
2271            doc_comment: None,
2272            description: Some(format!("Description of {}", id)),
2273            source_code: Some("x".repeat(tokens * 4)), // ~tokens tokens
2274            hop_distance: hop,
2275            modified_at: None,
2276            connecting_relation: relation.to_string(),
2277            token_estimate: 0,
2278        };
2279        ScoredCandidate {
2280            score: score_candidate(&c).score,
2281            token_estimate: tokens,
2282            candidate: c,
2283        }
2284    }
2285
2286    /// Helper: make a TargetContext with known token estimate.
2287    fn make_target(id: &str, tokens: usize) -> TargetContext {
2288        TargetContext {
2289            node_id: id.to_string(),
2290            title: Some(format!("Target {}", id)),
2291            file_path: Some(format!("/src/{}.rs", id)),
2292            signature: Some(format!("fn {}()", id)),
2293            doc_comment: None,
2294            description: Some(format!("Target desc {}", id)),
2295            source_code: Some("t".repeat(tokens.saturating_sub(20) * 4)),
2296            token_estimate: tokens,
2297        }
2298    }
2299
2300    // --- truncate_text tests ---
2301
2302    #[test]
2303    fn test_truncate_text_short_text_no_truncation() {
2304        let text = "fn foo() { 42 }";
2305        let result = truncate_text(text, 100);
2306        assert_eq!(result, text, "Short text should be returned as-is");
2307    }
2308
2309    #[test]
2310    fn test_truncate_text_exact_boundary() {
2311        let text = "abcd"; // 4 bytes = 1 token
2312        let result = truncate_text(text, 1);
2313        assert_eq!(result, text, "Text that exactly fits should not be truncated");
2314    }
2315
2316    #[test]
2317    fn test_truncate_text_adds_marker() {
2318        // 100 bytes of content, budget of 10 tokens = 40 bytes
2319        let text = "a".repeat(100);
2320        let result = truncate_text(&text, 10);
2321        assert!(result.ends_with("... [truncated]"),
2322            "Truncated text must end with marker, got: {}", result);
2323    }
2324
2325    #[test]
2326    fn test_truncate_text_prefers_line_boundary() {
2327        let text = "line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\nline 9\nline 10";
2328        // Budget of 10 tokens = 40 bytes, marker is 16 bytes → ~24 usable bytes
2329        let result = truncate_text(text, 10);
2330        assert!(result.contains("... [truncated]"));
2331        // Should cut at a newline, not in the middle of "line"
2332        let before_marker = result.split("\n... [truncated]").next().unwrap();
2333        // Each line is ~7 chars, 24 usable bytes → should keep ~3 lines
2334        assert!(before_marker.ends_with(char::is_numeric) || before_marker.ends_with('\n') || before_marker.contains("line"),
2335            "Should truncate at line boundary, got: '{}'", before_marker);
2336    }
2337
2338    #[test]
2339    fn test_truncate_text_utf8_safety() {
2340        // Multi-byte chars: each emoji is 4 bytes
2341        let text = "🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥"; // 10 emojis = 40 bytes
2342        // Budget = 5 tokens = 20 bytes, marker = 16 bytes → 4 usable = 1 emoji
2343        let result = truncate_text(&text, 5);
2344        // Must be valid UTF-8 (String guarantees this)
2345        assert!(result.ends_with("... [truncated]"));
2346        // Must not panic or produce invalid string
2347        for c in result.chars() {
2348            assert!(c.len_utf8() <= 4);
2349        }
2350    }
2351
2352    #[test]
2353    fn test_truncate_text_chinese_chars() {
2354        // Chinese chars are 3 bytes each
2355        let text = "这是一个测试字符串用于验证中文截断功能是否正确工作";
2356        // 17 chars × 3 bytes = 51 bytes total
2357        let result = truncate_text(text, 5); // 20 bytes budget
2358        assert!(result.ends_with("... [truncated]"));
2359        // Verify we can iterate chars without panicking
2360        let _ = result.chars().count();
2361    }
2362
2363    #[test]
2364    fn test_truncate_text_empty_input() {
2365        let result = truncate_text("", 100);
2366        assert_eq!(result, "");
2367    }
2368
2369    #[test]
2370    fn test_truncate_text_zero_budget() {
2371        let text = "some content";
2372        let result = truncate_text(text, 0);
2373        // 0 tokens = 0 bytes, marker = 16 bytes → saturating_sub → 0 usable
2374        // Should truncate to empty + marker, or just marker
2375        assert!(result.contains("... [truncated]") || result.is_empty());
2376    }
2377
2378    #[test]
2379    fn test_truncate_text_result_within_budget() {
2380        let text = "a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np";
2381        let budget_tokens = 5;
2382        let result = truncate_text(text, budget_tokens);
2383        let result_tokens = estimate_tokens_str(&result);
2384        // Result tokens should be ≤ budget (or very close due to rounding)
2385        assert!(result_tokens <= budget_tokens + 1,
2386            "Result {} tokens should be ≤ budget {} tokens", result_tokens, budget_tokens);
2387    }
2388
2389    #[test]
2390    fn test_truncate_text_head_biased() {
2391        let lines: Vec<String> = (1..=20).map(|i| format!("line {}", i)).collect();
2392        let text = lines.join("\n");
2393        let result = truncate_text(&text, 15);
2394        assert!(result.starts_with("line 1"), "Must preserve beginning (head-biased)");
2395        assert!(!result.contains("line 20"), "Must drop end content");
2396    }
2397
2398    // --- greedy_fill tests ---
2399
2400    #[test]
2401    fn test_greedy_fill_all_fit() {
2402        let items = vec![
2403            make_scored("a", "calls", 1, 100),
2404            make_scored("b", "calls", 1, 100),
2405            make_scored("c", "calls", 1, 100),
2406        ];
2407        let (included, info) = greedy_fill(&items, 1000);
2408        assert_eq!(included.len(), 3, "All 3 should fit in 1000 budget");
2409        assert_eq!(info.truncated_count, 0);
2410        assert_eq!(info.dropped_count, 0);
2411        assert_eq!(info.budget_used, 300);
2412    }
2413
2414    #[test]
2415    fn test_greedy_fill_partial_fit() {
2416        let items = vec![
2417            make_scored("a", "calls", 1, 100),
2418            make_scored("b", "calls", 1, 100),
2419            make_scored("c", "calls", 1, 100),
2420        ];
2421        let (included, info) = greedy_fill(&items, 250);
2422        // First two fully fit (200), third has 50 remaining ≥ MIN_USEFUL_TOKENS_TRUNC (32)
2423        assert_eq!(included.len(), 3, "Third item should be truncated, not dropped");
2424        assert_eq!(info.truncated_count, 1);
2425        assert_eq!(info.dropped_count, 0);
2426        assert!(included[2].truncated, "Third item should be marked truncated");
2427    }
2428
2429    #[test]
2430    fn test_greedy_fill_drop_when_budget_too_small() {
2431        let items = vec![
2432            make_scored("a", "calls", 1, 100),
2433            make_scored("b", "calls", 1, 100),
2434        ];
2435        // Budget only fits first item with 10 left over (< MIN_USEFUL_TOKENS_TRUNC)
2436        let (included, info) = greedy_fill(&items, 110);
2437        assert_eq!(included.len(), 1, "Only first should fit");
2438        assert_eq!(info.dropped_count, 1, "Second should be dropped (10 < 32 min)");
2439        assert_eq!(info.truncated_count, 0);
2440    }
2441
2442    #[test]
2443    fn test_greedy_fill_empty_input() {
2444        let items: Vec<ScoredCandidate> = vec![];
2445        let (included, info) = greedy_fill(&items, 1000);
2446        assert!(included.is_empty());
2447        assert_eq!(info.budget_used, 0);
2448    }
2449
2450    #[test]
2451    fn test_greedy_fill_zero_budget() {
2452        let items = vec![
2453            make_scored("a", "calls", 1, 100),
2454        ];
2455        let (included, info) = greedy_fill(&items, 0);
2456        assert!(included.is_empty());
2457        assert_eq!(info.dropped_count, 1);
2458    }
2459
2460    #[test]
2461    fn test_greedy_fill_preserves_order() {
2462        let items = vec![
2463            make_scored("first", "calls", 1, 50),
2464            make_scored("second", "imports", 1, 50),
2465            make_scored("third", "type_reference", 1, 50),
2466        ];
2467        let (included, _) = greedy_fill(&items, 1000);
2468        assert_eq!(included[0].node_id, "first");
2469        assert_eq!(included[1].node_id, "second");
2470        assert_eq!(included[2].node_id, "third");
2471    }
2472
2473    #[test]
2474    fn test_greedy_fill_truncated_item_has_reduced_tokens() {
2475        let items = vec![
2476            make_scored("big", "calls", 1, 500),
2477        ];
2478        let (included, info) = greedy_fill(&items, 100);
2479        assert_eq!(included.len(), 1);
2480        assert!(included[0].truncated);
2481        assert!(included[0].token_estimate <= 100,
2482            "Truncated item tokens {} should be ≤ budget 100", included[0].token_estimate);
2483        assert_eq!(info.truncated_count, 1);
2484    }
2485
2486    #[test]
2487    fn test_greedy_fill_many_small_items() {
2488        // 20 items × 10 tokens = 200 total, budget 150
2489        let items: Vec<ScoredCandidate> = (0..20)
2490            .map(|i| make_scored(&format!("item-{}", i), "calls", 1, 10))
2491            .collect();
2492        let (included, info) = greedy_fill(&items, 150);
2493        assert_eq!(included.len(), 15, "Should fit exactly 15 items (150/10)");
2494        assert_eq!(info.dropped_count, 5);
2495        assert_eq!(info.budget_used, 150);
2496    }
2497
2498    // --- budget_fit_by_category tests ---
2499
2500    #[test]
2501    fn test_budget_targets_never_truncated() {
2502        // Target consumes most of the budget
2503        let targets = vec![make_target("t1", 800)];
2504        let deps = vec![make_scored("d1", "calls", 1, 100)];
2505        let callers = vec![make_scored("c1", "calls", 1, 100)];
2506        let tests = vec![make_scored("test1", "tests_for", 1, 100)];
2507
2508        let result = budget_fit_by_category(&targets, deps, callers, tests, 1000);
2509
2510        // Targets always included
2511        assert_eq!(result.targets.len(), 1);
2512        assert_eq!(result.targets[0].node_id, "t1");
2513        // Only 200 budget remaining for deps+callers+tests (300 needed)
2514        let non_target_count = result.dependencies.len() + result.callers.len() + result.tests.len();
2515        assert!(non_target_count <= 3, "Some items may be truncated or dropped");
2516    }
2517
2518    #[test]
2519    fn test_budget_priority_deps_before_callers() {
2520        let targets = vec![make_target("t1", 100)];
2521        // 400 budget - 100 target = 300 remaining
2522        // 2 deps × 100 = 200, 2 callers × 100 = 200 → only 300 available
2523        let deps = vec![
2524            make_scored("d1", "calls", 1, 100),
2525            make_scored("d2", "imports", 1, 100),
2526        ];
2527        let callers = vec![
2528            make_scored("c1", "calls", 1, 100),
2529            make_scored("c2", "calls", 1, 100),
2530        ];
2531        let tests: Vec<ScoredCandidate> = vec![];
2532
2533        let result = budget_fit_by_category(&targets, deps, callers, tests, 400);
2534
2535        // Both direct deps should be fully included (200 tokens)
2536        assert_eq!(result.dependencies.len(), 2, "Both deps should fit");
2537        // Callers get remaining 100 — one fits, one truncated or dropped
2538        assert!(result.callers.len() >= 1, "At least one caller should fit");
2539        // Total non-target tokens ≤ 300
2540        let dep_tokens: usize = result.dependencies.iter().map(|d| d.token_estimate).sum();
2541        let caller_tokens: usize = result.callers.iter().map(|c| c.token_estimate).sum();
2542        assert!(dep_tokens + caller_tokens <= 300);
2543    }
2544
2545    #[test]
2546    fn test_budget_priority_callers_before_tests() {
2547        let targets = vec![make_target("t1", 50)];
2548        // 200 budget - 50 target = 150 remaining
2549        let deps: Vec<ScoredCandidate> = vec![]; // no deps
2550        let callers = vec![make_scored("c1", "calls", 1, 100)];
2551        let tests = vec![make_scored("test1", "tests_for", 1, 100)];
2552
2553        let result = budget_fit_by_category(&targets, deps, callers, tests, 200);
2554
2555        // Caller should be fully included (100)
2556        assert_eq!(result.callers.len(), 1);
2557        assert!(!result.callers[0].truncated, "Caller should not be truncated");
2558        // Test gets remaining 50 → truncated or dropped
2559        if !result.tests.is_empty() {
2560            assert!(result.tests[0].truncated || result.tests[0].token_estimate <= 50);
2561        }
2562    }
2563
2564    #[test]
2565    fn test_budget_priority_tests_before_transitive() {
2566        let targets = vec![make_target("t1", 50)];
2567        // 300 budget - 50 = 250 remaining
2568        let deps = vec![
2569            make_scored("direct", "calls", 1, 100),
2570            make_scored("trans", "calls", 2, 100),  // hop=2 → transitive
2571        ];
2572        let callers: Vec<ScoredCandidate> = vec![];
2573        let tests = vec![make_scored("test1", "tests_for", 1, 100)];
2574
2575        let result = budget_fit_by_category(&targets, deps, callers, tests, 300);
2576
2577        // Direct dep should be included first
2578        let has_direct = result.dependencies.iter().any(|d| d.node_id == "direct");
2579        assert!(has_direct, "Direct dep should be included");
2580
2581        // Test should be included before transitive dep
2582        assert_eq!(result.tests.len(), 1, "Test should be included");
2583        assert!(!result.tests[0].truncated, "Test should not be truncated");
2584    }
2585
2586    #[test]
2587    fn test_budget_transitive_furthest_dropped_first() {
2588        let targets = vec![make_target("t1", 50)];
2589        // 200 budget - 50 = 150 remaining
2590        let deps = vec![
2591            make_scored("hop2", "calls", 2, 80),
2592            make_scored("hop3", "calls", 3, 80),
2593            make_scored("hop4", "calls", 4, 80),
2594        ];
2595        let callers: Vec<ScoredCandidate> = vec![];
2596        let tests: Vec<ScoredCandidate> = vec![];
2597
2598        let result = budget_fit_by_category(&targets, deps, callers, tests, 200);
2599
2600        // With 150 budget: hop2 (80) fits, hop3 (80) → remaining 70 ≥ 32 → truncated
2601        // hop4 may be dropped
2602        let dep_ids: Vec<&str> = result.dependencies.iter().map(|d| d.node_id.as_str()).collect();
2603        assert!(dep_ids.contains(&"hop2"), "Closest transitive should be included");
2604        // hop4 (furthest) should be dropped or at least last
2605        if dep_ids.contains(&"hop4") {
2606            // If hop4 included, it must be after hop2 and hop3
2607            let pos4 = dep_ids.iter().position(|&id| id == "hop4").unwrap();
2608            let pos2 = dep_ids.iter().position(|&id| id == "hop2").unwrap();
2609            assert!(pos4 > pos2, "hop4 should be after hop2");
2610        }
2611    }
2612
2613    #[test]
2614    fn test_budget_everything_fits() {
2615        let targets = vec![make_target("t1", 100)];
2616        let deps = vec![
2617            make_scored("d1", "calls", 1, 100),
2618            make_scored("d2", "imports", 2, 100),
2619        ];
2620        let callers = vec![make_scored("c1", "calls", 1, 100)];
2621        let tests = vec![make_scored("test1", "tests_for", 1, 100)];
2622
2623        let result = budget_fit_by_category(&targets, deps, callers, tests, 10000);
2624
2625        // Everything should fit with no truncation
2626        assert_eq!(result.targets.len(), 1);
2627        assert_eq!(result.dependencies.len(), 2);
2628        assert_eq!(result.callers.len(), 1);
2629        assert_eq!(result.tests.len(), 1);
2630        assert_eq!(result.truncation_info.truncated_count, 0);
2631        assert_eq!(result.truncation_info.dropped_count, 0);
2632    }
2633
2634    #[test]
2635    fn test_budget_empty_categories() {
2636        let targets = vec![make_target("t1", 100)];
2637        let deps: Vec<ScoredCandidate> = vec![];
2638        let callers: Vec<ScoredCandidate> = vec![];
2639        let tests: Vec<ScoredCandidate> = vec![];
2640
2641        let result = budget_fit_by_category(&targets, deps, callers, tests, 500);
2642
2643        assert_eq!(result.targets.len(), 1);
2644        assert!(result.dependencies.is_empty());
2645        assert!(result.callers.is_empty());
2646        assert!(result.tests.is_empty());
2647        assert_eq!(result.truncation_info.truncated_count, 0);
2648        assert_eq!(result.truncation_info.dropped_count, 0);
2649    }
2650
2651    #[test]
2652    fn test_budget_multiple_targets() {
2653        let targets = vec![
2654            make_target("t1", 200),
2655            make_target("t2", 200),
2656            make_target("t3", 200),
2657        ];
2658        let deps = vec![make_scored("d1", "calls", 1, 100)];
2659        let callers: Vec<ScoredCandidate> = vec![];
2660        let tests: Vec<ScoredCandidate> = vec![];
2661
2662        // Budget = 700 → targets use 600, dep gets 100
2663        let result = budget_fit_by_category(&targets, deps, callers, tests, 700);
2664
2665        assert_eq!(result.targets.len(), 3, "All targets must be included");
2666        assert_eq!(result.dependencies.len(), 1, "Dep should fit in remaining 100");
2667    }
2668
2669    #[test]
2670    fn test_budget_target_exceeds_budget() {
2671        // Target alone is 500, budget is 300 — targets are NEVER truncated
2672        let targets = vec![make_target("big-target", 500)];
2673        let deps = vec![make_scored("d1", "calls", 1, 100)];
2674        let callers: Vec<ScoredCandidate> = vec![];
2675        let tests: Vec<ScoredCandidate> = vec![];
2676
2677        let result = budget_fit_by_category(&targets, deps, callers, tests, 300);
2678
2679        // Target MUST be included regardless
2680        assert_eq!(result.targets.len(), 1);
2681        assert_eq!(result.targets[0].node_id, "big-target");
2682        // Remaining is 0 (saturating_sub), so dep is dropped
2683        assert!(result.dependencies.is_empty() || result.dependencies[0].truncated,
2684            "Dep should be dropped or truncated when target exceeds budget");
2685    }
2686
2687    // --- ContextResult tests ---
2688
2689    #[test]
2690    fn test_context_result_total_included() {
2691        let targets = vec![make_target("t1", 100)];
2692        let deps = vec![
2693            make_scored("d1", "calls", 1, 50),
2694            make_scored("d2", "imports", 1, 50),
2695        ];
2696        let callers = vec![make_scored("c1", "calls", 1, 50)];
2697        let tests = vec![make_scored("test1", "tests_for", 1, 50)];
2698
2699        let result = budget_fit_by_category(&targets, deps, callers, tests, 10000);
2700        assert_eq!(result.total_included(), 5); // 1 target + 2 deps + 1 caller + 1 test
2701    }
2702
2703    #[test]
2704    fn test_context_result_estimated_tokens() {
2705        let targets = vec![make_target("t1", 100)];
2706        let deps = vec![make_scored("d1", "calls", 1, 200)];
2707        let callers: Vec<ScoredCandidate> = vec![];
2708        let tests: Vec<ScoredCandidate> = vec![];
2709
2710        let result = budget_fit_by_category(&targets, deps, callers, tests, 10000);
2711        // estimated_tokens = budget - remaining = tokens actually used
2712        assert!(result.estimated_tokens > 0);
2713        assert!(result.estimated_tokens <= 10000);
2714    }
2715
2716    // --- TruncationInfo tests ---
2717
2718    #[test]
2719    fn test_truncation_info_merge() {
2720        let mut a = TruncationInfo { truncated_count: 1, dropped_count: 2, budget_used: 100 };
2721        let b = TruncationInfo { truncated_count: 3, dropped_count: 4, budget_used: 200 };
2722        a.merge(&b);
2723        assert_eq!(a.truncated_count, 4);
2724        assert_eq!(a.dropped_count, 6);
2725        assert_eq!(a.budget_used, 300);
2726    }
2727
2728    #[test]
2729    fn test_truncation_info_default() {
2730        let info = TruncationInfo::default();
2731        assert_eq!(info.truncated_count, 0);
2732        assert_eq!(info.dropped_count, 0);
2733        assert_eq!(info.budget_used, 0);
2734    }
2735
2736    // --- TargetContext tests ---
2737
2738    #[test]
2739    fn test_target_context_token_estimate() {
2740        let t = TargetContext::new(
2741            "t1".into(),
2742            Some("My Function".into()),
2743            Some("/src/lib.rs".into()),
2744            Some("fn my_func() -> i32".into()),
2745            Some("/// Does something".into()),
2746            Some("A function that does something".into()),
2747            Some("fn my_func() -> i32 { 42 }".into()),
2748        );
2749        assert!(t.token_estimate > 0, "Token estimate should be positive");
2750        // Total bytes: "My Function" + "A function..." + "fn my_func..." + "/// Does..." + "fn my_func...{42}" + 50 overhead
2751        // = 11 + 30 + 20 + 18 + 26 + 50 = 155 bytes → 155/4 = 38 tokens
2752        assert!(t.token_estimate >= 30, "Should be at least 30 tokens");
2753    }
2754
2755    #[test]
2756    fn test_target_context_empty_fields() {
2757        let t = TargetContext::new(
2758            "t1".into(), None, None, None, None, None, None,
2759        );
2760        // Only 50 bytes overhead → 50/4 = 12 tokens
2761        assert!(t.token_estimate >= 1, "Even empty target has overhead tokens");
2762    }
2763
2764    // --- ContextItem tests ---
2765
2766    #[test]
2767    fn test_context_item_from_scored_not_truncated() {
2768        let sc = make_scored("func1", "calls", 1, 100);
2769        let item = ContextItem::from_scored(&sc, false);
2770        assert_eq!(item.node_id, "func1");
2771        assert_eq!(item.connecting_relation, "calls");
2772        assert!(!item.truncated);
2773        assert!(item.content.is_some());
2774    }
2775
2776    #[test]
2777    fn test_context_item_from_scored_truncated() {
2778        let sc = make_scored("big-func", "calls", 1, 500);
2779        let item = ContextItem::from_scored_truncated(&sc, 50);
2780        assert_eq!(item.node_id, "big-func");
2781        assert!(item.truncated);
2782        assert!(item.token_estimate <= 50,
2783            "Truncated item should have ≤ budget tokens, got {}", item.token_estimate);
2784    }
2785
2786    // --- Integration: realistic scenario ---
2787
2788    #[test]
2789    fn test_realistic_truncation_scenario() {
2790        // Simulates a real context assembly with 1 target, mixed deps/callers/tests
2791        let targets = vec![make_target("parse_yaml", 150)];
2792
2793        let deps = vec![
2794            make_scored("load_file", "calls", 1, 80),       // direct dep
2795            make_scored("validate", "calls", 1, 60),          // direct dep
2796            make_scored("serde_yaml", "imports", 1, 40),       // direct dep
2797            make_scored("deep_util", "calls", 3, 100),         // transitive
2798        ];
2799        let callers = vec![
2800            make_scored("main_cli", "calls", 1, 120),
2801            make_scored("api_handler", "calls", 1, 80),
2802        ];
2803        let tests = vec![
2804            make_scored("test_parse", "tests_for", 1, 70),
2805            make_scored("test_parse_edge", "tests_for", 1, 50),
2806        ];
2807
2808        // Budget: 150 (target) + ~500 for others = 650
2809        let result = budget_fit_by_category(&targets, deps, callers, tests, 650);
2810
2811        // Target always included
2812        assert_eq!(result.targets.len(), 1);
2813        assert_eq!(result.targets[0].node_id, "parse_yaml");
2814
2815        // Check priority: direct deps should be included before transitive
2816        let dep_ids: Vec<&str> = result.dependencies.iter().map(|d| d.node_id.as_str()).collect();
2817        // All 3 direct deps (80+60+40=180) should fit
2818        assert!(dep_ids.contains(&"load_file"), "Direct dep should be included");
2819        assert!(dep_ids.contains(&"validate"), "Direct dep should be included");
2820        assert!(dep_ids.contains(&"serde_yaml"), "Direct dep should be included");
2821
2822        // Verify total doesn't exceed budget
2823        assert!(result.estimated_tokens <= 650,
2824            "Total tokens {} should be ≤ budget 650", result.estimated_tokens);
2825
2826        // Verify truncation info is consistent
2827        let _total_in = result.total_included();
2828        let total_possible = 4 + 2 + 2; // deps + callers + tests (excluding target)
2829        let items_included = result.dependencies.len() + result.callers.len() + result.tests.len();
2830        // items_included + dropped = total_possible
2831        assert_eq!(
2832            items_included + result.truncation_info.dropped_count,
2833            total_possible,
2834            "included ({}) + dropped ({}) should equal total possible ({})",
2835            items_included, result.truncation_info.dropped_count, total_possible,
2836        );
2837        // truncated_count should match items with truncated=true
2838        let actually_truncated = result.dependencies.iter().filter(|d| d.truncated).count()
2839            + result.callers.iter().filter(|c| c.truncated).count()
2840            + result.tests.iter().filter(|t| t.truncated).count();
2841        assert_eq!(
2842            result.truncation_info.truncated_count, actually_truncated,
2843            "Truncation info count should match actual truncated items",
2844        );
2845    }
2846
2847    #[test]
2848    fn test_budget_direct_deps_separated_from_transitive() {
2849        // Verify that hop=1 goes to direct deps and hop>1 goes to transitive
2850        let targets = vec![make_target("t1", 50)];
2851        let deps = vec![
2852            make_scored("hop1a", "calls", 1, 30),   // direct
2853            make_scored("hop1b", "imports", 1, 30),  // direct
2854            make_scored("hop2a", "calls", 2, 30),    // transitive
2855            make_scored("hop3a", "calls", 3, 30),    // transitive
2856        ];
2857        let callers: Vec<ScoredCandidate> = vec![];
2858        let tests: Vec<ScoredCandidate> = vec![];
2859
2860        let result = budget_fit_by_category(&targets, deps, callers, tests, 10000);
2861
2862        // All 4 should be in dependencies
2863        assert_eq!(result.dependencies.len(), 4);
2864        // First two should be the direct deps (hop=1), then transitive sorted by hop
2865        // Direct deps come first because they're filled first by budget_fit_by_category
2866        let ids: Vec<&str> = result.dependencies.iter().map(|d| d.node_id.as_str()).collect();
2867        // Direct deps (hop1a, hop1b) should appear before transitive (hop2a, hop3a)
2868        let pos_1a = ids.iter().position(|&id| id == "hop1a").unwrap();
2869        let pos_1b = ids.iter().position(|&id| id == "hop1b").unwrap();
2870        let pos_2a = ids.iter().position(|&id| id == "hop2a").unwrap();
2871        let pos_3a = ids.iter().position(|&id| id == "hop3a").unwrap();
2872        assert!(pos_1a < pos_2a, "Direct dep hop1a should be before transitive hop2a");
2873        assert!(pos_1b < pos_3a, "Direct dep hop1b should be before transitive hop3a");
2874        assert!(pos_2a < pos_3a, "Closer transitive (hop2) should be before further (hop3)");
2875    }
2876
2877    #[test]
2878    fn test_budget_stress_many_items() {
2879        let targets = vec![make_target("t1", 50)];
2880        // 50 deps, 20 callers, 10 tests
2881        let deps: Vec<ScoredCandidate> = (0..50)
2882            .map(|i| make_scored(&format!("dep-{}", i), "calls", (i / 10 + 1) as u32, 20))
2883            .collect();
2884        let callers: Vec<ScoredCandidate> = (0..20)
2885            .map(|i| make_scored(&format!("caller-{}", i), "calls", 1, 15))
2886            .collect();
2887        let tests: Vec<ScoredCandidate> = (0..10)
2888            .map(|i| make_scored(&format!("test-{}", i), "tests_for", 1, 25))
2889            .collect();
2890
2891        // Budget fits target (50) + some deps/callers/tests but not all
2892        // Total possible: 50 + 50*20 + 20*15 + 10*25 = 50 + 1000 + 300 + 250 = 1600
2893        let result = budget_fit_by_category(&targets, deps, callers, tests, 500);
2894
2895        assert_eq!(result.targets.len(), 1);
2896        // Should have some items in each category but not all
2897        assert!(result.estimated_tokens <= 500,
2898            "Tokens {} should be ≤ 500", result.estimated_tokens);
2899        assert!(result.truncation_info.dropped_count > 0,
2900            "Some items should be dropped with tight budget");
2901    }
2902
2903    // --- estimate_tokens_for_target_fields ---
2904
2905    #[test]
2906    fn test_estimate_tokens_target_all_none() {
2907        let tokens = estimate_tokens_for_target_fields(None, None, None, None, None);
2908        // 0 + 50 overhead = 50 bytes → 50/4 = 12
2909        assert_eq!(tokens, 12);
2910    }
2911
2912    #[test]
2913    fn test_estimate_tokens_target_with_content() {
2914        let tokens = estimate_tokens_for_target_fields(
2915            Some("title"),           // 5
2916            Some("description"),     // 11
2917            Some("fn foo()"),        // 8
2918            Some("/// doc"),         // 7
2919            Some("fn foo() { 42 }"), // 16
2920        );
2921        // 5 + 11 + 8 + 7 + 16 + 50 = 97 bytes → 97/4 = 24
2922        assert_eq!(tokens, 24);
2923    }
2924
2925    // =========================================================================
2926    // §8 Tests: Source Code Loading from Disk (GOAL-4.1b)
2927    // =========================================================================
2928
2929    /// Helper: create a temp dir with a source file.
2930    fn setup_source_file(filename: &str, content: &str) -> TempDir {
2931        let tmp = TempDir::new().unwrap();
2932        let src_dir = tmp.path().join("src");
2933        fs::create_dir_all(&src_dir).unwrap();
2934        fs::write(src_dir.join(filename), content).unwrap();
2935        tmp
2936    }
2937
2938    #[test]
2939    fn test_load_source_full_file() {
2940        let content = "line 1\nline 2\nline 3\nline 4\nline 5\n";
2941        let tmp = setup_source_file("lib.rs", content);
2942
2943        let result = load_source_from_disk(
2944            Some("src/lib.rs"), None, None, tmp.path()
2945        );
2946        assert!(result.is_some());
2947        let r = result.unwrap();
2948        assert!(!r.is_range);
2949        assert_eq!(r.start_line, None);
2950        assert_eq!(r.end_line, None);
2951        assert_eq!(r.line_count, 5);
2952        assert!(r.source.contains("line 1"));
2953        assert!(r.source.contains("line 5"));
2954    }
2955
2956    #[test]
2957    fn test_load_source_line_range() {
2958        let content = "line 1\nline 2\nline 3\nline 4\nline 5\n";
2959        let tmp = setup_source_file("lib.rs", content);
2960
2961        let result = load_source_from_disk(
2962            Some("src/lib.rs"), Some(2), Some(4), tmp.path()
2963        );
2964        assert!(result.is_some());
2965        let r = result.unwrap();
2966        assert!(r.is_range);
2967        assert_eq!(r.start_line, Some(2));
2968        assert_eq!(r.end_line, Some(4));
2969        assert_eq!(r.line_count, 3); // lines 2, 3, 4 (inclusive range)
2970        assert!(r.source.contains("line 2"));
2971        assert!(r.source.contains("line 3"));
2972        assert!(r.source.contains("line 4"));
2973        assert!(!r.source.contains("line 1"));
2974        assert!(!r.source.contains("line 5"));
2975    }
2976
2977    #[test]
2978    fn test_load_source_from_start_line_to_eof() {
2979        let content = "line 1\nline 2\nline 3\nline 4\nline 5\n";
2980        let tmp = setup_source_file("lib.rs", content);
2981
2982        let result = load_source_from_disk(
2983            Some("src/lib.rs"), Some(3), None, tmp.path()
2984        );
2985        assert!(result.is_some());
2986        let r = result.unwrap();
2987        assert!(r.is_range);
2988        assert_eq!(r.start_line, Some(3));
2989        assert!(r.source.contains("line 3"));
2990        assert!(r.source.contains("line 4"));
2991        assert!(r.source.contains("line 5"));
2992        assert!(!r.source.contains("line 1"));
2993    }
2994
2995    #[test]
2996    fn test_load_source_none_file_path() {
2997        let tmp = TempDir::new().unwrap();
2998        let result = load_source_from_disk(None, None, None, tmp.path());
2999        assert!(result.is_none());
3000    }
3001
3002    #[test]
3003    fn test_load_source_nonexistent_file() {
3004        let tmp = TempDir::new().unwrap();
3005        let result = load_source_from_disk(
3006            Some("src/nonexistent.rs"), None, None, tmp.path()
3007        );
3008        assert!(result.is_none());
3009    }
3010
3011    #[test]
3012    fn test_load_source_start_line_beyond_file() {
3013        let content = "line 1\nline 2\n";
3014        let tmp = setup_source_file("lib.rs", content);
3015
3016        let result = load_source_from_disk(
3017            Some("src/lib.rs"), Some(100), Some(200), tmp.path()
3018        );
3019        assert!(result.is_none(), "start_line beyond file should return None");
3020    }
3021
3022    #[test]
3023    fn test_load_source_single_line_range() {
3024        let content = "fn foo() {}\nfn bar() {}\nfn baz() {}\n";
3025        let tmp = setup_source_file("lib.rs", content);
3026
3027        let result = load_source_from_disk(
3028            Some("src/lib.rs"), Some(2), Some(2), tmp.path()
3029        );
3030        // start=2, end=2 → end_idx = min(2, 3) = 2, range [1..2] = 1 line
3031        assert!(result.is_some());
3032        let r = result.unwrap();
3033        assert_eq!(r.line_count, 1);
3034        assert!(r.source.contains("fn bar()"));
3035        assert!(!r.source.contains("fn foo()"));
3036        assert!(!r.source.contains("fn baz()"));
3037    }
3038
3039    #[test]
3040    fn test_load_source_end_line_clamped_to_file_length() {
3041        let content = "line 1\nline 2\nline 3\n";
3042        let tmp = setup_source_file("lib.rs", content);
3043
3044        // end_line = 1000 but file only has 3 lines
3045        let result = load_source_from_disk(
3046            Some("src/lib.rs"), Some(1), Some(1000), tmp.path()
3047        );
3048        assert!(result.is_some());
3049        let r = result.unwrap();
3050        assert_eq!(r.line_count, 3);
3051        assert!(r.source.contains("line 1"));
3052        assert!(r.source.contains("line 3"));
3053    }
3054
3055    #[test]
3056    fn test_load_source_security_outside_root() {
3057        let tmp = setup_source_file("lib.rs", "safe content");
3058        // Try to escape using ../
3059        let result = load_source_from_disk(
3060            Some("../../etc/passwd"), None, None, tmp.path()
3061        );
3062        // On macOS/Linux, /etc/passwd exists but is outside project root
3063        // canonicalize will resolve the path and starts_with check will reject
3064        assert!(result.is_none(), "Should reject path outside project root");
3065    }
3066
3067    #[test]
3068    fn test_load_source_absolute_path_under_root() {
3069        let content = "fn absolute() {}";
3070        let tmp = setup_source_file("lib.rs", content);
3071        let abs_path = tmp.path().join("src/lib.rs");
3072        let abs_str = abs_path.to_str().unwrap();
3073
3074        let result = load_source_from_disk(
3075            Some(abs_str), None, None, tmp.path()
3076        );
3077        assert!(result.is_some());
3078        assert!(result.unwrap().source.contains("fn absolute()"));
3079    }
3080
3081    #[test]
3082    fn test_load_source_empty_file() {
3083        let tmp = setup_source_file("empty.rs", "");
3084
3085        let result = load_source_from_disk(
3086            Some("src/empty.rs"), None, None, tmp.path()
3087        );
3088        assert!(result.is_some());
3089        let r = result.unwrap();
3090        assert_eq!(r.source, "");
3091        assert_eq!(r.line_count, 0);
3092    }
3093
3094    #[test]
3095    fn test_load_source_unicode_content() {
3096        let content = "// 中文注释\nfn 函数() -> String {\n    \"こんにちは\".into()\n}\n";
3097        let tmp = setup_source_file("unicode.rs", content);
3098
3099        let result = load_source_from_disk(
3100            Some("src/unicode.rs"), None, None, tmp.path()
3101        );
3102        assert!(result.is_some());
3103        let r = result.unwrap();
3104        assert!(r.source.contains("中文注释"));
3105        assert!(r.source.contains("こんにちは"));
3106        assert_eq!(r.line_count, 4);
3107    }
3108
3109    #[test]
3110    fn test_load_source_line_range_with_unicode() {
3111        let content = "line 1\n中文行2\nline 3\n日本語行4\nline 5\n";
3112        let tmp = setup_source_file("mixed.rs", content);
3113
3114        let result = load_source_from_disk(
3115            Some("src/mixed.rs"), Some(2), Some(4), tmp.path()
3116        );
3117        assert!(result.is_some());
3118        let r = result.unwrap();
3119        assert!(r.source.contains("中文行2"));
3120        assert!(r.source.contains("line 3"));
3121        assert!(!r.source.contains("line 1"));
3122    }
3123
3124    #[test]
3125    fn test_load_source_result_fields() {
3126        let content = "a\nb\nc\nd\ne\n";
3127        let tmp = setup_source_file("test.rs", content);
3128
3129        // Full file
3130        let r1 = load_source_from_disk(Some("src/test.rs"), None, None, tmp.path()).unwrap();
3131        assert!(!r1.is_range);
3132        assert_eq!(r1.start_line, None);
3133        assert_eq!(r1.end_line, None);
3134
3135        // Range
3136        let r2 = load_source_from_disk(Some("src/test.rs"), Some(2), Some(4), tmp.path()).unwrap();
3137        assert!(r2.is_range);
3138        assert_eq!(r2.start_line, Some(2));
3139        // end_line is min(4, 5) = 4
3140        assert!(r2.end_line.unwrap() <= 5);
3141    }
3142
3143    #[test]
3144    fn test_load_source_start_line_zero_falls_through() {
3145        let content = "line 1\nline 2\n";
3146        let tmp = setup_source_file("lib.rs", content);
3147
3148        // start_line = 0 doesn't match the guard `start >= 1`, falls to full file
3149        let result = load_source_from_disk(
3150            Some("src/lib.rs"), Some(0), Some(2), tmp.path()
3151        );
3152        assert!(result.is_some());
3153        let r = result.unwrap();
3154        // Falls through to full file since start=0 doesn't match range guard
3155        assert!(!r.is_range);
3156    }
3157
3158    #[test]
3159    fn test_load_source_nested_directory() {
3160        let tmp = TempDir::new().unwrap();
3161        let deep_dir = tmp.path().join("src").join("module").join("sub");
3162        fs::create_dir_all(&deep_dir).unwrap();
3163        fs::write(deep_dir.join("deep.rs"), "fn deep() {}").unwrap();
3164
3165        let result = load_source_from_disk(
3166            Some("src/module/sub/deep.rs"), None, None, tmp.path()
3167        );
3168        assert!(result.is_some());
3169        assert!(result.unwrap().source.contains("fn deep()"));
3170    }
3171
3172    // =========================================================================
3173    // §9 Integration Tests: Scoring + Truncation + Source Loading + Traversal
3174    // =========================================================================
3175
3176    #[test]
3177    fn test_integration_score_then_truncate() {
3178        // Build candidates, score them, then budget-fit with truncation
3179        let c1 = make_candidate_with_content("calls", 1, &"x".repeat(400), "fn called()");
3180        let c2 = make_candidate_with_content("imports", 1, &"y".repeat(200), "use crate::dep");
3181        let c3 = make_candidate_with_content("depends_on", 2, &"z".repeat(300), "fn transitive()");
3182
3183        let scored = score_candidates(&[c1, c2, c3]);
3184        // calls and imports should score highest (tier 1)
3185        assert!(scored[0].score >= scored[1].score);
3186        assert!(scored[1].score >= scored[2].score);
3187
3188        // Now feed into budget_fit_by_category
3189        let targets = vec![make_target("main_fn", 50)];
3190
3191        // Partition scored into direct deps and transitive
3192        let (direct, trans): (Vec<_>, Vec<_>) = scored.into_iter()
3193            .partition(|s| s.candidate.hop_distance == 1);
3194
3195        let result = budget_fit_by_category(&targets, 
3196            [direct, trans].concat(),
3197            vec![], vec![], 200);
3198
3199        // Target always present
3200        assert_eq!(result.targets.len(), 1);
3201        // Some deps should be included, some may be truncated
3202        assert!(!result.dependencies.is_empty());
3203        assert!(result.estimated_tokens <= 200);
3204    }
3205
3206    #[test]
3207    fn test_integration_source_loading_feeds_target_context() {
3208        // Source loading → TargetContext → budget_fit
3209        let tmp = setup_source_file("main.rs", "fn main() {\n    println!(\"hello\");\n}\n");
3210
3211        let loaded = load_source_from_disk(
3212            Some("src/main.rs"), None, None, tmp.path()
3213        ).unwrap();
3214
3215        let target = TargetContext::new(
3216            "main_fn".into(),
3217            Some("main".into()),
3218            Some("src/main.rs".into()),
3219            Some("fn main()".into()),
3220            None,
3221            None,
3222            Some(loaded.source.clone()),
3223        );
3224        assert!(target.token_estimate > 0);
3225        assert!(target.source_code.as_ref().unwrap().contains("println!"));
3226
3227        // Budget fit with this target
3228        let deps = vec![make_scored("dep1", "calls", 1, 30)];
3229        let result = budget_fit_by_category(&[target], deps, vec![], vec![], 500);
3230        assert_eq!(result.targets.len(), 1);
3231        assert!(result.targets[0].source_code.as_ref().unwrap().contains("println!"));
3232    }
3233
3234    #[test]
3235    fn test_integration_source_range_loading() {
3236        // Load a range, verify it gets correct lines for TargetContext
3237        let content = "use std::io;\n\nfn important() -> Result<()> {\n    let x = 42;\n    Ok(())\n}\n\nfn other() {}\n";
3238        let tmp = setup_source_file("lib.rs", content);
3239
3240        let loaded = load_source_from_disk(
3241            Some("src/lib.rs"), Some(3), Some(6), tmp.path()
3242        ).unwrap();
3243        assert!(loaded.source.contains("fn important()"));
3244        assert!(loaded.source.contains("Ok(())"));
3245        assert!(!loaded.source.contains("fn other()"));
3246        assert!(!loaded.source.contains("use std::io"));
3247    }
3248
3249    #[test]
3250    fn test_integration_edge_traversal_categories() {
3251        // Simulate an edge traversal that categorizes nodes correctly
3252        // This tests the full pipeline: candidates → scoring → categorization → budget
3253
3254        // Target
3255        let targets = vec![make_target("parse_fn", 100)];
3256
3257        // Direct deps (hop 1, various relations)
3258        let direct_calls = make_scored("called_fn", "calls", 1, 80);
3259        let direct_import = make_scored("dep_module", "imports", 1, 50);
3260
3261        // Callers
3262        let caller = make_scored("caller_fn", "calls", 1, 60);
3263
3264        // Tests
3265        let test_fn = make_scored("test_parse", "tests_for", 1, 70);
3266
3267        // Transitive (hop 2+)
3268        let trans1 = make_scored("deep_dep", "calls", 2, 90);
3269        let trans2 = make_scored("deeper_dep", "calls", 3, 90);
3270
3271        let all_deps = vec![direct_calls, direct_import, trans1, trans2];
3272
3273        let result = budget_fit_by_category(
3274            &targets, all_deps, vec![caller], vec![test_fn], 400
3275        );
3276
3277        // Verify priority: target (100) + direct deps first, then callers, tests, transitive
3278        assert_eq!(result.targets.len(), 1);
3279
3280        // Direct deps (hop=1) should appear before transitive in dependencies
3281        let dep_ids: Vec<&str> = result.dependencies.iter()
3282            .map(|d| d.node_id.as_str()).collect();
3283        if dep_ids.contains(&"called_fn") && dep_ids.contains(&"deep_dep") {
3284            let pos_direct = dep_ids.iter().position(|&id| id == "called_fn").unwrap();
3285            let pos_trans = dep_ids.iter().position(|&id| id == "deep_dep").unwrap();
3286            assert!(pos_direct < pos_trans);
3287        }
3288
3289        assert!(result.estimated_tokens <= 400);
3290    }
3291
3292    #[test]
3293    fn test_integration_truncation_preserves_structure() {
3294        // Large content that gets truncated — verify structure is maintained
3295        let big_source = (0..100).map(|i| format!("fn func_{}() {{ /* impl */ }}", i))
3296            .collect::<Vec<_>>().join("\n");
3297
3298        let targets = vec![TargetContext::new(
3299            "big_module".into(),
3300            Some("Big Module".into()),
3301            Some("src/big.rs".into()),
3302            Some("mod big".into()),
3303            None,
3304            None,
3305            Some(big_source.clone()),
3306        )];
3307
3308        let deps: Vec<ScoredCandidate> = (0..10).map(|i| {
3309            let source = format!("fn dep_{}() {{ /* dep impl {} */ }}", i, i);
3310            let mut sc = make_scored(&format!("dep-{}", i), "calls", 1, 40);
3311            sc.candidate.source_code = Some(source);
3312            sc
3313        }).collect();
3314
3315        let result = budget_fit_by_category(&targets, deps, vec![], vec![], 500);
3316
3317        // Target always included regardless of size
3318        assert_eq!(result.targets.len(), 1);
3319        assert_eq!(result.targets[0].node_id, "big_module");
3320
3321        // Some deps included, some may be truncated/dropped
3322        for dep in &result.dependencies {
3323            assert!(!dep.node_id.is_empty());
3324            assert_eq!(dep.connecting_relation, "calls");
3325            // Score should be visible per GOAL-4.5
3326            assert!(dep.score > 0.0);
3327        }
3328    }
3329
3330    #[test]
3331    fn test_integration_full_pipeline_realistic() {
3332        // End-to-end: create source files, load them, build targets + deps, budget fit
3333        let tmp = TempDir::new().unwrap();
3334        let src_dir = tmp.path().join("src");
3335        fs::create_dir_all(&src_dir).unwrap();
3336
3337        // Target source
3338        fs::write(src_dir.join("parser.rs"), concat!(
3339            "use crate::lexer::Token;\n",
3340            "\n",
3341            "pub struct Parser {\n",
3342            "    tokens: Vec<Token>,\n",
3343            "    pos: usize,\n",
3344            "}\n",
3345            "\n",
3346            "impl Parser {\n",
3347            "    pub fn new(tokens: Vec<Token>) -> Self {\n",
3348            "        Self { tokens, pos: 0 }\n",
3349            "    }\n",
3350            "\n",
3351            "    pub fn parse(&mut self) -> Ast {\n",
3352            "        // parsing logic\n",
3353            "        todo!()\n",
3354            "    }\n",
3355            "}\n",
3356        )).unwrap();
3357
3358        // Dep source
3359        fs::write(src_dir.join("lexer.rs"), concat!(
3360            "pub enum Token {\n",
3361            "    Ident(String),\n",
3362            "    Number(i64),\n",
3363            "    Punct(char),\n",
3364            "}\n",
3365            "\n",
3366            "pub fn tokenize(input: &str) -> Vec<Token> {\n",
3367            "    vec![] // stub\n",
3368            "}\n",
3369        )).unwrap();
3370
3371        // Load target source
3372        let target_source = load_source_from_disk(
3373            Some("src/parser.rs"), Some(8), Some(16), tmp.path()
3374        ).unwrap();
3375        assert!(target_source.source.contains("impl Parser"));
3376
3377        // Build target
3378        let target = TargetContext::new(
3379            "parser::Parser::parse".into(),
3380            Some("Parser::parse".into()),
3381            Some("src/parser.rs".into()),
3382            Some("pub fn parse(&mut self) -> Ast".into()),
3383            Some("/// Parses tokens into AST".into()),
3384            None,
3385            Some(target_source.source),
3386        );
3387
3388        // Build deps (lexer is called by parser)
3389        let lexer_source = load_source_from_disk(
3390            Some("src/lexer.rs"), None, None, tmp.path()
3391        ).unwrap();
3392
3393        let mut lexer_candidate = make_scored("lexer::tokenize", "calls", 1, 30);
3394        lexer_candidate.candidate.source_code = Some(lexer_source.source);
3395        lexer_candidate.candidate.file_path = Some("src/lexer.rs".to_string());
3396
3397        // Budget fit
3398        let result = budget_fit_by_category(
3399            &[target], vec![lexer_candidate], vec![], vec![], 500
3400        );
3401
3402        // Verify full pipeline output
3403        assert_eq!(result.targets.len(), 1);
3404        assert_eq!(result.targets[0].node_id, "parser::Parser::parse");
3405        assert!(result.targets[0].source_code.as_ref().unwrap().contains("impl Parser"));
3406
3407        assert!(!result.dependencies.is_empty());
3408        assert_eq!(result.dependencies[0].node_id, "lexer::tokenize");
3409        assert_eq!(result.dependencies[0].connecting_relation, "calls");
3410        assert!(result.dependencies[0].score > 0.0, "GOAL-4.5: score visible");
3411
3412        assert!(result.estimated_tokens <= 500);
3413        assert_eq!(result.truncation_info.dropped_count, 0);
3414    }
3415
3416    #[test]
3417    fn test_integration_score_ordering_matches_budget_priority() {
3418        // Verify that the scoring order (calls > type_ref > structural > unknown)
3419        // aligns with budget priority (direct deps filled first)
3420        let high = make_scored("caller", "calls", 1, 50);       // score ≈ 0.90
3421        let med = make_scored("type_dep", "type_reference", 1, 50); // score ≈ 0.78
3422        let low = make_scored("struct_dep", "depends_on", 1, 50);   // score ≈ 0.64
3423
3424        // Score ordering
3425        assert!(high.score > med.score, "calls should score higher than type_reference");
3426        assert!(med.score > low.score, "type_reference should score higher than depends_on");
3427
3428        // All three as direct deps, tight budget
3429        let targets = vec![make_target("t", 50)];
3430        let result = budget_fit_by_category(
3431            &targets, vec![high.clone(), med.clone(), low.clone()], vec![], vec![], 200
3432        );
3433
3434        // With 150 budget for deps (200 - 50 target), all three fit (3 × 50 = 150)
3435        assert_eq!(result.dependencies.len(), 3);
3436        // Order preserved from input (greedy_fill preserves order)
3437        assert_eq!(result.dependencies[0].node_id, "caller");
3438        assert_eq!(result.dependencies[1].node_id, "type_dep");
3439        assert_eq!(result.dependencies[2].node_id, "struct_dep");
3440    }
3441
3442    // =========================================================================
3443    // §9 Tests: ContextQuery + Pipeline (assemble_context)
3444    // =========================================================================
3445
3446    fn make_code_node(id: &str, file_path: &str, sig: Option<&str>) -> Node {
3447        let mut n = Node::new(id, id);
3448        n.node_type = Some("function".to_string());
3449        n.file_path = Some(file_path.to_string());
3450        n.signature = sig.map(|s| s.to_string());
3451        n
3452    }
3453
3454    fn make_graph_with_deps() -> Graph {
3455        // target -> dep1 -> dep2 (transitive)
3456        // caller -> target (reverse)
3457        // test -> target (tests_for)
3458        let mut g = Graph::default();
3459        let mut target = Node::new("target", "Target Function");
3460        target.node_type = Some("function".to_string());
3461        target.file_path = Some("src/lib.rs".to_string());
3462        target.signature = Some("fn target() -> i32".to_string());
3463        g.nodes.push(target);
3464
3465        let mut dep1 = Node::new("dep1", "Direct Dependency");
3466        dep1.node_type = Some("function".to_string());
3467        dep1.file_path = Some("src/dep.rs".to_string());
3468        dep1.signature = Some("fn dep1() -> bool".to_string());
3469        g.nodes.push(dep1);
3470
3471        let mut dep2 = Node::new("dep2", "Transitive Dependency");
3472        dep2.node_type = Some("function".to_string());
3473        dep2.file_path = Some("src/deep.rs".to_string());
3474        g.nodes.push(dep2);
3475
3476        let mut caller = Node::new("caller1", "A Caller");
3477        caller.node_type = Some("function".to_string());
3478        caller.file_path = Some("src/main.rs".to_string());
3479        g.nodes.push(caller);
3480
3481        let mut test = Node::new("test1", "Test for Target");
3482        test.node_type = Some("function".to_string());
3483        test.file_path = Some("tests/test_target.rs".to_string());
3484        g.nodes.push(test);
3485
3486        // Edges
3487        g.edges.push(Edge::new("target", "dep1", "calls"));
3488        g.edges.push(Edge::new("dep1", "dep2", "calls"));
3489        g.edges.push(Edge::new("caller1", "target", "calls"));
3490        g.edges.push(Edge::new("test1", "target", "tests_for"));
3491
3492        g
3493    }
3494
3495    #[test]
3496    fn test_assemble_context_basic() {
3497        let graph = make_graph_with_deps();
3498        let query = ContextQuery {
3499            targets: vec!["target".into()],
3500            token_budget: 10000,
3501            depth: 2,
3502            ..Default::default()
3503        };
3504        let result = assemble_context(&graph, &query).unwrap();
3505        assert_eq!(result.result.targets.len(), 1);
3506        assert_eq!(result.result.targets[0].node_id, "target");
3507        assert!(!result.result.dependencies.is_empty(), "should have deps");
3508        assert!(!result.result.callers.is_empty(), "should have callers");
3509        assert!(!result.result.tests.is_empty(), "should have tests");
3510        assert!(result.stats.nodes_visited > 0);
3511        assert!(result.stats.nodes_included > 0);
3512        assert!(result.stats.budget_used > 0);
3513        assert_eq!(result.stats.budget_total, 10000);
3514    }
3515
3516    #[test]
3517    fn test_assemble_context_empty_targets_errors() {
3518        let graph = make_graph_with_deps();
3519        let query = ContextQuery {
3520            targets: vec![],
3521            ..Default::default()
3522        };
3523        let err = assemble_context(&graph, &query).unwrap_err();
3524        assert!(err.to_string().contains("at least one target"));
3525    }
3526
3527    #[test]
3528    fn test_assemble_context_nonexistent_target_errors() {
3529        let graph = make_graph_with_deps();
3530        let query = ContextQuery {
3531            targets: vec!["nonexistent".into()],
3532            ..Default::default()
3533        };
3534        let err = assemble_context(&graph, &query).unwrap_err();
3535        assert!(err.to_string().contains("not found"));
3536    }
3537
3538    #[test]
3539    fn test_assemble_context_depth_1() {
3540        let graph = make_graph_with_deps();
3541        let query = ContextQuery {
3542            targets: vec!["target".into()],
3543            token_budget: 10000,
3544            depth: 1,
3545            ..Default::default()
3546        };
3547        let result = assemble_context(&graph, &query).unwrap();
3548        // Only direct dep (dep1), not transitive (dep2).
3549        let dep_ids: Vec<&str> = result.result.dependencies.iter()
3550            .map(|d| d.node_id.as_str()).collect();
3551        assert!(dep_ids.contains(&"dep1"), "should include direct dep");
3552        assert!(!dep_ids.contains(&"dep2"), "should NOT include transitive dep at depth=1");
3553    }
3554
3555    #[test]
3556    fn test_assemble_context_depth_2_includes_transitive() {
3557        let graph = make_graph_with_deps();
3558        let query = ContextQuery {
3559            targets: vec!["target".into()],
3560            token_budget: 10000,
3561            depth: 2,
3562            ..Default::default()
3563        };
3564        let result = assemble_context(&graph, &query).unwrap();
3565        let dep_ids: Vec<&str> = result.result.dependencies.iter()
3566            .map(|d| d.node_id.as_str()).collect();
3567        assert!(dep_ids.contains(&"dep1"), "should include direct dep");
3568        assert!(dep_ids.contains(&"dep2"), "should include transitive dep at depth=2");
3569    }
3570
3571    #[test]
3572    fn test_assemble_context_include_filter_type() {
3573        let mut graph = make_graph_with_deps();
3574        // Add a class node as a dep.
3575        let mut cls = Node::new("class1", "MyClass");
3576        cls.node_type = Some("class".to_string());
3577        cls.file_path = Some("src/class.rs".to_string());
3578        graph.nodes.push(cls);
3579        graph.edges.push(Edge::new("target", "class1", "uses"));
3580
3581        let query = ContextQuery {
3582            targets: vec!["target".into()],
3583            token_budget: 10000,
3584            depth: 2,
3585            filters: ContextFilters {
3586                include_patterns: vec!["type:function".into()],
3587                ..Default::default()
3588            },
3589            ..Default::default()
3590        };
3591        let result = assemble_context(&graph, &query).unwrap();
3592        // class1 should be excluded by the type filter.
3593        let dep_ids: Vec<&str> = result.result.dependencies.iter()
3594            .map(|d| d.node_id.as_str()).collect();
3595        assert!(!dep_ids.contains(&"class1"), "class should be filtered out by type:function");
3596        assert!(dep_ids.contains(&"dep1"), "functions should pass");
3597    }
3598
3599    #[test]
3600    fn test_assemble_context_include_filter_glob() {
3601        let graph = make_graph_with_deps();
3602        let query = ContextQuery {
3603            targets: vec!["target".into()],
3604            token_budget: 10000,
3605            depth: 2,
3606            filters: ContextFilters {
3607                include_patterns: vec!["src/dep.rs".into()],
3608                ..Default::default()
3609            },
3610            ..Default::default()
3611        };
3612        let result = assemble_context(&graph, &query).unwrap();
3613        let dep_ids: Vec<&str> = result.result.dependencies.iter()
3614            .map(|d| d.node_id.as_str()).collect();
3615        assert!(dep_ids.contains(&"dep1"), "dep1 (src/dep.rs) should pass");
3616        assert!(!dep_ids.contains(&"dep2"), "dep2 (src/deep.rs) should be filtered");
3617        assert!(result.stats.nodes_excluded_by_filter > 0);
3618    }
3619
3620    #[test]
3621    fn test_assemble_context_multiple_targets() {
3622        let graph = make_graph_with_deps();
3623        let query = ContextQuery {
3624            targets: vec!["target".into(), "dep1".into()],
3625            token_budget: 10000,
3626            depth: 2,
3627            ..Default::default()
3628        };
3629        let result = assemble_context(&graph, &query).unwrap();
3630        assert_eq!(result.result.targets.len(), 2);
3631        let target_ids: Vec<&str> = result.result.targets.iter()
3632            .map(|t| t.node_id.as_str()).collect();
3633        assert!(target_ids.contains(&"target"));
3634        assert!(target_ids.contains(&"dep1"));
3635    }
3636
3637    #[test]
3638    fn test_assemble_context_tight_budget() {
3639        let graph = make_graph_with_deps();
3640        let query = ContextQuery {
3641            targets: vec!["target".into()],
3642            token_budget: 50, // Very tight budget.
3643            depth: 2,
3644            ..Default::default()
3645        };
3646        let result = assemble_context(&graph, &query).unwrap();
3647        // Targets always included, but deps/callers may be dropped.
3648        assert_eq!(result.result.targets.len(), 1);
3649        assert!(result.result.estimated_tokens <= 50 + result.result.targets[0].token_estimate,
3650            "budget should be approximately respected");
3651    }
3652
3653    #[test]
3654    fn test_assemble_context_stats_populated() {
3655        let graph = make_graph_with_deps();
3656        let query = ContextQuery {
3657            targets: vec!["target".into()],
3658            token_budget: 10000,
3659            depth: 2,
3660            ..Default::default()
3661        };
3662        let result = assemble_context(&graph, &query).unwrap();
3663        assert!(result.stats.nodes_visited >= 4, "should visit target + dep1 + dep2 + caller + test");
3664        assert!(result.stats.nodes_included >= 4, "should include target + dep1 + dep2 + caller + test");
3665        assert_eq!(result.stats.nodes_excluded_by_filter, 0, "no filter applied");
3666        assert_eq!(result.stats.budget_total, 10000);
3667        assert!(result.stats.budget_used > 0);
3668        // elapsed_ms might be 0 on fast machines, that's ok.
3669    }
3670
3671    // =========================================================================
3672    // §9.1 Tests: ContextFilters + passes_filters
3673    // =========================================================================
3674
3675    #[test]
3676    fn test_passes_filters_empty_filters() {
3677        let c = Candidate {
3678            node_id: "x".into(), node_type: "function".into(),
3679            file_path: Some("src/lib.rs".into()), signature: None,
3680            doc_comment: None, description: None, source_code: None,
3681            hop_distance: 1, modified_at: None,
3682            connecting_relation: "calls".into(), token_estimate: 10,
3683        };
3684        let filters = ContextFilters::default();
3685        assert!(passes_filters(&c, &filters));
3686    }
3687
3688    #[test]
3689    fn test_passes_filters_type_match() {
3690        let c = Candidate {
3691            node_id: "x".into(), node_type: "function".into(),
3692            file_path: None, signature: None,
3693            doc_comment: None, description: None, source_code: None,
3694            hop_distance: 1, modified_at: None,
3695            connecting_relation: "calls".into(), token_estimate: 10,
3696        };
3697        let filters = ContextFilters {
3698            include_patterns: vec!["type:function".into()],
3699            ..Default::default()
3700        };
3701        assert!(passes_filters(&c, &filters));
3702    }
3703
3704    #[test]
3705    fn test_passes_filters_type_no_match() {
3706        let c = Candidate {
3707            node_id: "x".into(), node_type: "class".into(),
3708            file_path: None, signature: None,
3709            doc_comment: None, description: None, source_code: None,
3710            hop_distance: 1, modified_at: None,
3711            connecting_relation: "calls".into(), token_estimate: 10,
3712        };
3713        let filters = ContextFilters {
3714            include_patterns: vec!["type:function".into()],
3715            ..Default::default()
3716        };
3717        assert!(!passes_filters(&c, &filters));
3718    }
3719
3720    #[test]
3721    fn test_passes_filters_glob_match() {
3722        let c = Candidate {
3723            node_id: "x".into(), node_type: "file".into(),
3724            file_path: Some("src/lib.rs".into()), signature: None,
3725            doc_comment: None, description: None, source_code: None,
3726            hop_distance: 1, modified_at: None,
3727            connecting_relation: "calls".into(), token_estimate: 10,
3728        };
3729        let filters = ContextFilters {
3730            include_patterns: vec!["**/*.rs".into()],
3731            ..Default::default()
3732        };
3733        assert!(passes_filters(&c, &filters));
3734    }
3735
3736    #[test]
3737    fn test_passes_filters_glob_no_match() {
3738        let c = Candidate {
3739            node_id: "x".into(), node_type: "file".into(),
3740            file_path: Some("src/lib.rs".into()), signature: None,
3741            doc_comment: None, description: None, source_code: None,
3742            hop_distance: 1, modified_at: None,
3743            connecting_relation: "calls".into(), token_estimate: 10,
3744        };
3745        let filters = ContextFilters {
3746            include_patterns: vec!["**/*.py".into()],
3747            ..Default::default()
3748        };
3749        assert!(!passes_filters(&c, &filters));
3750    }
3751
3752    #[test]
3753    fn test_passes_filters_exclude_ids() {
3754        let c = Candidate {
3755            node_id: "excluded-node".into(), node_type: "function".into(),
3756            file_path: None, signature: None,
3757            doc_comment: None, description: None, source_code: None,
3758            hop_distance: 1, modified_at: None,
3759            connecting_relation: "calls".into(), token_estimate: 10,
3760        };
3761        let filters = ContextFilters {
3762            exclude_ids: vec!["excluded-node".into()],
3763            ..Default::default()
3764        };
3765        assert!(!passes_filters(&c, &filters));
3766    }
3767
3768    #[test]
3769    fn test_passes_filters_any_match_semantics() {
3770        let c = Candidate {
3771            node_id: "x".into(), node_type: "function".into(),
3772            file_path: Some("src/lib.rs".into()), signature: None,
3773            doc_comment: None, description: None, source_code: None,
3774            hop_distance: 1, modified_at: None,
3775            connecting_relation: "calls".into(), token_estimate: 10,
3776        };
3777        // One pattern doesn't match, the other does — should pass.
3778        let filters = ContextFilters {
3779            include_patterns: vec!["*.py".into(), "type:function".into()],
3780            ..Default::default()
3781        };
3782        assert!(passes_filters(&c, &filters));
3783    }
3784
3785    // =========================================================================
3786    // §9.2 Tests: simple_glob_match
3787    // =========================================================================
3788
3789    #[test]
3790    fn test_glob_exact() {
3791        assert!(simple_glob_match("foo.rs", "foo.rs"));
3792        assert!(!simple_glob_match("foo.rs", "bar.rs"));
3793    }
3794
3795    #[test]
3796    fn test_glob_star() {
3797        assert!(simple_glob_match("*.rs", "lib.rs"));
3798        assert!(simple_glob_match("*.rs", "main.rs"));
3799        assert!(!simple_glob_match("*.rs", "src/lib.rs")); // * doesn't cross /
3800        assert!(!simple_glob_match("*.py", "lib.rs"));
3801    }
3802
3803    #[test]
3804    fn test_glob_doublestar() {
3805        assert!(simple_glob_match("src/**", "src/lib.rs"));
3806        assert!(simple_glob_match("src/**", "src/a/b/c.rs"));
3807        assert!(!simple_glob_match("src/**", "tests/lib.rs"));
3808    }
3809
3810    #[test]
3811    fn test_glob_doublestar_suffix() {
3812        assert!(simple_glob_match("**/*.rs", "src/lib.rs"));
3813        assert!(simple_glob_match("**/*.rs", "a/b/c.rs"));
3814        assert!(!simple_glob_match("**/*.py", "src/lib.rs"));
3815    }
3816
3817    #[test]
3818    fn test_glob_question_mark() {
3819        assert!(simple_glob_match("?.rs", "a.rs"));
3820        assert!(!simple_glob_match("?.rs", "ab.rs"));
3821    }
3822
3823    // =========================================================================
3824    // §9.3 Tests: OutputFormat
3825    // =========================================================================
3826
3827    #[test]
3828    fn test_output_format_parse() {
3829        assert_eq!("json".parse::<OutputFormat>().unwrap(), OutputFormat::Json);
3830        assert_eq!("markdown".parse::<OutputFormat>().unwrap(), OutputFormat::Markdown);
3831        assert_eq!("md".parse::<OutputFormat>().unwrap(), OutputFormat::Markdown);
3832        assert_eq!("yaml".parse::<OutputFormat>().unwrap(), OutputFormat::Yaml);
3833        assert_eq!("yml".parse::<OutputFormat>().unwrap(), OutputFormat::Yaml);
3834        assert!("xml".parse::<OutputFormat>().is_err());
3835    }
3836
3837    #[test]
3838    fn test_output_format_display() {
3839        assert_eq!(OutputFormat::Json.to_string(), "json");
3840        assert_eq!(OutputFormat::Markdown.to_string(), "markdown");
3841        assert_eq!(OutputFormat::Yaml.to_string(), "yaml");
3842    }
3843
3844    #[test]
3845    fn test_output_format_default() {
3846        assert_eq!(OutputFormat::default(), OutputFormat::Markdown);
3847    }
3848
3849    // =========================================================================
3850    // §9.4 Tests: format_context
3851    // =========================================================================
3852
3853    #[test]
3854    fn test_format_context_json() {
3855        let graph = make_graph_with_deps();
3856        let query = ContextQuery {
3857            targets: vec!["target".into()],
3858            token_budget: 10000,
3859            depth: 2,
3860            ..Default::default()
3861        };
3862        let assembled = assemble_context(&graph, &query).unwrap();
3863        let json_str = format_context(&assembled, OutputFormat::Json);
3864        let parsed: serde_json::Value = serde_json::from_str(&json_str).expect("valid JSON");
3865        assert!(parsed.get("targets").is_some());
3866        assert!(parsed.get("dependencies").is_some());
3867        assert!(parsed.get("callers").is_some());
3868        assert!(parsed.get("tests").is_some());
3869        assert!(parsed.get("estimated_tokens").is_some());
3870        assert!(parsed.get("stats").is_some());
3871    }
3872
3873    #[test]
3874    fn test_format_context_yaml() {
3875        let graph = make_graph_with_deps();
3876        let query = ContextQuery {
3877            targets: vec!["target".into()],
3878            token_budget: 10000,
3879            depth: 2,
3880            ..Default::default()
3881        };
3882        let assembled = assemble_context(&graph, &query).unwrap();
3883        let yaml_str = format_context(&assembled, OutputFormat::Yaml);
3884        assert!(yaml_str.contains("targets:"), "YAML should contain targets key");
3885        assert!(yaml_str.contains("stats:"), "YAML should contain stats key");
3886    }
3887
3888    #[test]
3889    fn test_format_context_markdown() {
3890        let graph = make_graph_with_deps();
3891        let query = ContextQuery {
3892            targets: vec!["target".into()],
3893            token_budget: 10000,
3894            depth: 2,
3895            ..Default::default()
3896        };
3897        let assembled = assemble_context(&graph, &query).unwrap();
3898        let md_str = format_context(&assembled, OutputFormat::Markdown);
3899        assert!(md_str.contains("# Context Assembly"), "markdown should have header");
3900        assert!(md_str.contains("## Targets"), "markdown should have targets section");
3901        assert!(md_str.contains("`target`"), "markdown should reference target node");
3902        assert!(md_str.contains("## Dependencies"), "markdown should have deps section");
3903    }
3904
3905    // =========================================================================
3906    // §9.5 Tests: gather_targets
3907    // =========================================================================
3908
3909    #[test]
3910    fn test_gather_targets_basic() {
3911        let graph = make_graph_with_deps();
3912        let targets = gather_targets(&graph, &["target".into()], None).unwrap();
3913        assert_eq!(targets.len(), 1);
3914        assert_eq!(targets[0].node_id, "target");
3915        assert_eq!(targets[0].title.as_deref(), Some("Target Function"));
3916        assert_eq!(targets[0].file_path.as_deref(), Some("src/lib.rs"));
3917        assert_eq!(targets[0].signature.as_deref(), Some("fn target() -> i32"));
3918    }
3919
3920    #[test]
3921    fn test_gather_targets_nonexistent() {
3922        let graph = make_graph_with_deps();
3923        let err = gather_targets(&graph, &["missing".into()], None).unwrap_err();
3924        assert!(err.to_string().contains("not found"));
3925    }
3926
3927    // =========================================================================
3928    // §9.6 Tests: gather_dependencies
3929    // =========================================================================
3930
3931    #[test]
3932    fn test_gather_deps_depth_1() {
3933        let graph = make_graph_with_deps();
3934        let (deps, filtered) = gather_dependencies(
3935            &graph, &["target".into()], 1, &ContextFilters::default(), None,
3936        );
3937        assert_eq!(filtered, 0);
3938        let ids: Vec<&str> = deps.iter().map(|d| d.node_id.as_str()).collect();
3939        assert!(ids.contains(&"dep1"));
3940        assert!(!ids.contains(&"dep2"), "depth=1 should not include transitive");
3941    }
3942
3943    #[test]
3944    fn test_gather_deps_depth_2() {
3945        let graph = make_graph_with_deps();
3946        let (deps, _) = gather_dependencies(
3947            &graph, &["target".into()], 2, &ContextFilters::default(), None,
3948        );
3949        let ids: Vec<&str> = deps.iter().map(|d| d.node_id.as_str()).collect();
3950        assert!(ids.contains(&"dep1"));
3951        assert!(ids.contains(&"dep2"), "depth=2 should include transitive");
3952    }
3953
3954    #[test]
3955    fn test_gather_deps_with_filter() {
3956        let graph = make_graph_with_deps();
3957        let filters = ContextFilters {
3958            include_patterns: vec!["src/dep.rs".into()],
3959            ..Default::default()
3960        };
3961        let (deps, filtered) = gather_dependencies(
3962            &graph, &["target".into()], 2, &filters, None,
3963        );
3964        assert_eq!(deps.len(), 1, "only dep1 should pass filter");
3965        assert_eq!(deps[0].node_id, "dep1");
3966        assert_eq!(filtered, 1, "dep2 should be filtered out");
3967    }
3968
3969    #[test]
3970    fn test_gather_deps_no_self_loops() {
3971        let graph = make_graph_with_deps();
3972        let (deps, _) = gather_dependencies(
3973            &graph, &["target".into()], 2, &ContextFilters::default(), None,
3974        );
3975        // Target itself should not appear as a dependency.
3976        assert!(!deps.iter().any(|d| d.node_id == "target"));
3977    }
3978
3979    // =========================================================================
3980    // §9.7 Tests: gather_callers_and_tests
3981    // =========================================================================
3982
3983    #[test]
3984    fn test_gather_callers_and_tests() {
3985        let graph = make_graph_with_deps();
3986        let (callers, tests) = gather_callers_and_tests(&graph, &["target".into()], None);
3987        assert_eq!(callers.len(), 1);
3988        assert_eq!(callers[0].node_id, "caller1");
3989        assert_eq!(callers[0].connecting_relation, "calls");
3990        assert_eq!(tests.len(), 1);
3991        assert_eq!(tests[0].node_id, "test1");
3992        assert_eq!(tests[0].connecting_relation, "tests_for");
3993    }
3994
3995    #[test]
3996    fn test_gather_callers_excludes_targets() {
3997        // If target A calls target B, B calling A should not show A as a caller.
3998        let graph = make_graph_with_deps();
3999        let (callers, _) = gather_callers_and_tests(
4000            &graph, &["target".into(), "caller1".into()], None,
4001        );
4002        // caller1 is also a target, so it should not appear as a caller.
4003        assert!(!callers.iter().any(|c| c.node_id == "caller1"));
4004    }
4005
4006    // =========================================================================
4007    // §9.8 Tests: ContextQuery defaults
4008    // =========================================================================
4009
4010    #[test]
4011    fn test_context_query_defaults() {
4012        let q = ContextQuery::default();
4013        assert!(q.targets.is_empty());
4014        assert_eq!(q.token_budget, 8000);
4015        assert_eq!(q.depth, 2);
4016        assert!(q.filters.include_patterns.is_empty());
4017        assert!(q.filters.exclude_ids.is_empty());
4018        assert_eq!(q.format, OutputFormat::Markdown);
4019        assert!(q.project_root.is_none());
4020    }
4021
4022    // =========================================================================
4023    // §9.9 Tests: ContextStats
4024    // =========================================================================
4025
4026    #[test]
4027    fn test_context_stats_default() {
4028        let s = ContextStats::default();
4029        assert_eq!(s.nodes_visited, 0);
4030        assert_eq!(s.nodes_included, 0);
4031        assert_eq!(s.nodes_excluded_by_filter, 0);
4032        assert_eq!(s.budget_used, 0);
4033        assert_eq!(s.budget_total, 0);
4034        assert_eq!(s.elapsed_ms, 0);
4035    }
4036
4037    #[test]
4038    fn test_context_stats_serializable() {
4039        let s = ContextStats {
4040            nodes_visited: 10,
4041            nodes_included: 5,
4042            nodes_excluded_by_filter: 2,
4043            budget_used: 3000,
4044            budget_total: 8000,
4045            elapsed_ms: 42,
4046        };
4047        let json = serde_json::to_string(&s).unwrap();
4048        assert!(json.contains("\"nodes_visited\":10"));
4049        assert!(json.contains("\"elapsed_ms\":42"));
4050    }
4051
4052    // =========================================================================
4053    // §9.10 Tests: assemble_context edge cases
4054    // =========================================================================
4055
4056    #[test]
4057    fn test_assemble_context_no_deps_no_callers() {
4058        // Isolated node with no edges.
4059        let mut g = Graph::default();
4060        let mut n = Node::new("lonely", "Lonely Node");
4061        n.node_type = Some("function".to_string());
4062        g.nodes.push(n);
4063
4064        let query = ContextQuery {
4065            targets: vec!["lonely".into()],
4066            token_budget: 10000,
4067            depth: 2,
4068            ..Default::default()
4069        };
4070        let result = assemble_context(&g, &query).unwrap();
4071        assert_eq!(result.result.targets.len(), 1);
4072        assert!(result.result.dependencies.is_empty());
4073        assert!(result.result.callers.is_empty());
4074        assert!(result.result.tests.is_empty());
4075    }
4076
4077    #[test]
4078    fn test_assemble_context_diamond_dedup() {
4079        // target -> A, target -> B, A -> C, B -> C
4080        // C should appear only once.
4081        let mut g = Graph::default();
4082        g.nodes.push(Node::new("target", "T"));
4083        g.nodes.push(Node::new("a", "A"));
4084        g.nodes.push(Node::new("b", "B"));
4085        g.nodes.push(Node::new("c", "C"));
4086        g.edges.push(Edge::new("target", "a", "calls"));
4087        g.edges.push(Edge::new("target", "b", "calls"));
4088        g.edges.push(Edge::new("a", "c", "calls"));
4089        g.edges.push(Edge::new("b", "c", "calls"));
4090
4091        let query = ContextQuery {
4092            targets: vec!["target".into()],
4093            token_budget: 10000,
4094            depth: 3,
4095            ..Default::default()
4096        };
4097        let result = assemble_context(&g, &query).unwrap();
4098        let dep_ids: Vec<&str> = result.result.dependencies.iter()
4099            .map(|d| d.node_id.as_str()).collect();
4100        // C should appear exactly once (BFS deduplication).
4101        assert_eq!(dep_ids.iter().filter(|&&id| id == "c").count(), 1);
4102    }
4103}