rag-plusplus-core 0.1.0

//! Branch Resolution Module
//!
//! Provides algorithms for finding and recovering "lost" branches
//! in trajectory DAGs. This is the key solution to the problem where
//! valuable exploration paths become inaccessible over time.

use std::collections::{HashSet, VecDeque};
use crate::trajectory::graph::{NodeId, TrajectoryGraph};
use super::operations::{BranchId, BranchStatus, BranchError};
use super::state_machine::BranchStateMachine;

/// Strategy for recovering a lost branch.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum RecoveryStrategy {
    /// Reactivate the branch without modification
    Reactivate,
    /// Create a copy of the branch
    Copy,
    /// Merge into another branch
    MergeInto(BranchId),
    /// Split from parent and make independent
    SplitIndependent,
}

/// A branch that can potentially be recovered.
#[derive(Debug, Clone)]
pub struct RecoverableBranch {
    /// ID of the branch (if it exists) or generated ID
    pub branch_id: Option<BranchId>,
    /// Fork point where this branch diverges
    pub fork_point: NodeId,
    /// The first node of this branch path
    pub entry_node: NodeId,
    /// All nodes in this branch
    pub nodes: Vec<NodeId>,
    /// Head (deepest leaf) of this branch
    pub head: NodeId,
    /// Depth of the branch
    pub depth: u32,
    /// Why this branch is considered "lost"
    pub lost_reason: LostReason,
    /// Score indicating how valuable this branch might be (higher = more valuable)
    pub recovery_score: f32,
    /// Suggested recovery strategy
    pub suggested_strategy: RecoveryStrategy,
}

/// Reasons why a branch might be considered "lost".
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum LostReason {
    /// Branch was archived and forgotten
    Archived,
    /// Branch has no explicit tracking (exists in DAG but not in state machine)
    Untracked,
    /// Branch's parent was deleted
    OrphanedByDeletion,
    /// Branch was created by regeneration but not selected
    UnselectedRegeneration,
    /// Branch was explicitly abandoned
    Abandoned,
    /// Branch diverged during exploration
    ExplorationDivergence,
}

/// Resolver for finding and recovering lost branches.
///
/// The resolver analyzes a trajectory DAG and state machine to find
/// branches that exist in the data but are not actively tracked or
/// have become inaccessible.
///
/// # Algorithm
///
/// 1. **Discovery**: Find all paths in the DAG that aren't part of active branches
/// 2. **Scoring**: Rank branches by potential value (length, depth, content)
/// 3. **Strategy**: Suggest recovery strategies for each branch
/// 4. **Recovery**: Execute the chosen recovery strategy
///
/// # Example
///
/// ```ignore
/// let resolver = BranchResolver::new(&machine);
///
/// // Find all recoverable branches
/// let lost = resolver.find_recoverable_branches();
///
/// for branch in lost {
///     println!("Found lost branch at {:?} with {} nodes",
///              branch.fork_point, branch.nodes.len());
///
///     // Recover using suggested strategy
///     resolver.recover(&mut machine, &branch)?;
/// }
/// ```
pub struct BranchResolver<'a> {
    machine: &'a BranchStateMachine,
}

impl<'a> BranchResolver<'a> {
    /// Create a new resolver for a state machine.
    pub fn new(machine: &'a BranchStateMachine) -> Self {
        Self { machine }
    }

    /// Find all branches that could be recovered.
    ///
    /// This analyzes the DAG to find paths that aren't actively tracked
    /// in the state machine.
    pub fn find_recoverable_branches(&self) -> Vec<RecoverableBranch> {
        let mut recoverable = Vec::new();
        let graph = self.machine.graph();

        // Collect all nodes that are already tracked
        let tracked_nodes: HashSet<NodeId> = self.machine.all_branches()
            .flat_map(|b| b.nodes.iter().copied())
            .collect();

        // Find fork points in the graph
        let fork_points: Vec<NodeId> = graph.find_branch_points()
            .iter()
            .map(|bp| bp.branch_point)
            .collect();

        // For each fork point, check if any children are untracked
        for fork_point in fork_points {
            if let Some(episode) = graph.get_node(fork_point) {
                for &child_id in &episode.children {
                    let subtree = self.collect_subtree(graph, child_id);

                    // Check if this subtree is untracked or partially tracked
                    let untracked: Vec<NodeId> = subtree.iter()
                        .filter(|n| !tracked_nodes.contains(n))
                        .copied()
                        .collect();

                    if !untracked.is_empty() {
                        // Found a recoverable branch
                        let branch = self.create_recoverable_branch(
                            graph,
                            fork_point,
                            child_id,
                            untracked,
                        );
                        recoverable.push(branch);
                    }
                }
            }
        }

        // Also find archived branches that could be recovered
        for branch in self.machine.all_branches() {
            if branch.status == BranchStatus::Archived {
                let recoverable_branch = RecoverableBranch {
                    branch_id: Some(branch.id),
                    fork_point: branch.fork_point,
                    entry_node: branch.nodes.first().copied().unwrap_or(branch.fork_point),
                    nodes: branch.nodes.clone(),
                    head: branch.head,
                    depth: self.compute_depth(graph, branch.head),
                    lost_reason: LostReason::Archived,
                    recovery_score: self.compute_recovery_score(graph, &branch.nodes),
                    suggested_strategy: RecoveryStrategy::Reactivate,
                };
                recoverable.push(recoverable_branch);
            }
        }

        // Sort by recovery score (highest first)
        recoverable.sort_by(|a, b| {
            b.recovery_score.partial_cmp(&a.recovery_score).unwrap_or(std::cmp::Ordering::Equal)
        });

        recoverable
    }

    /// Find branches created by regeneration that weren't selected.
    pub fn find_unselected_regenerations(&self) -> Vec<RecoverableBranch> {
        let graph = self.machine.graph();
        let mut unselected = Vec::new();

        // Find fork points that represent regenerations
        for fork in self.machine.fork_points() {
            let selected = fork.selected_child;

            for &child_id in &fork.children {
                // Skip the selected child
                if Some(child_id) == selected {
                    continue;
                }

                // Check if this branch exists and is active
                if let Some(branch) = self.machine.get_branch(child_id) {
                    if branch.is_active() {
                        continue;
                    }
                }

                // This is an unselected regeneration
                let subtree = self.collect_subtree(graph, fork.node_id);
                let child_nodes: Vec<NodeId> = subtree.into_iter()
                    .filter(|&n| {
                        self.is_descendant_of(graph, n, child_id) || n == child_id
                    })
                    .collect();

                if !child_nodes.is_empty() {
                    let recoverable = RecoverableBranch {
                        branch_id: None,
                        fork_point: fork.node_id,
                        entry_node: child_id,
                        nodes: child_nodes.clone(),
                        head: self.find_deepest_leaf(graph, &child_nodes),
                        depth: fork.depth + 1,
                        lost_reason: LostReason::UnselectedRegeneration,
                        recovery_score: self.compute_recovery_score(graph, &child_nodes),
                        suggested_strategy: RecoveryStrategy::SplitIndependent,
                    };
                    unselected.push(recoverable);
                }
            }
        }

        unselected
    }

    /// Recover a lost branch using its suggested strategy.
    pub fn recover(
        &self,
        machine: &mut BranchStateMachine,
        recoverable: &RecoverableBranch,
    ) -> Result<BranchId, BranchError> {
        match &recoverable.suggested_strategy {
            RecoveryStrategy::Reactivate => {
                if let Some(branch_id) = recoverable.branch_id {
                    self.reactivate_branch(machine, branch_id)
                } else {
                    Err(BranchError::InvalidState("No branch ID for reactivation".to_string()))
                }
            }
            RecoveryStrategy::Copy => {
                self.copy_as_new_branch(machine, recoverable)
            }
            RecoveryStrategy::MergeInto(target) => {
                if let Some(branch_id) = recoverable.branch_id {
                    machine.merge(branch_id, *target)?;
                    Ok(*target)
                } else {
                    Err(BranchError::InvalidState("No branch ID for merge".to_string()))
                }
            }
            RecoveryStrategy::SplitIndependent => {
                self.create_independent_branch(machine, recoverable)
            }
        }
    }

    // =========================================================================
    // RECOVERY IMPLEMENTATIONS
    // =========================================================================

    fn reactivate_branch(
        &self,
        machine: &mut BranchStateMachine,
        branch_id: BranchId,
    ) -> Result<BranchId, BranchError> {
        // Get mutable access to the branch and recover it
        // Note: This requires interior mutability or returning the ID
        // For now, we'll use a simple approach
        let _branch = machine.get_branch(branch_id)
            .ok_or(BranchError::BranchNotFound(branch_id))?;

        // Record the recovery operation
        // machine.recover_branch(branch_id)?;

        Ok(branch_id)
    }

    fn copy_as_new_branch(
        &self,
        machine: &mut BranchStateMachine,
        recoverable: &RecoverableBranch,
    ) -> Result<BranchId, BranchError> {
        // Split at the fork point to create a new branch
        let result = machine.split(recoverable.entry_node)?;
        Ok(result.new_branch)
    }

    fn create_independent_branch(
        &self,
        machine: &mut BranchStateMachine,
        recoverable: &RecoverableBranch,
    ) -> Result<BranchId, BranchError> {
        // Split at the entry node
        let result = machine.split(recoverable.entry_node)?;
        Ok(result.new_branch)
    }

    // =========================================================================
    // HELPER METHODS
    // =========================================================================

    fn create_recoverable_branch(
        &self,
        graph: &TrajectoryGraph,
        fork_point: NodeId,
        entry_node: NodeId,
        nodes: Vec<NodeId>,
    ) -> RecoverableBranch {
        let head = self.find_deepest_leaf(graph, &nodes);
        let depth = self.compute_depth(graph, head);
        let score = self.compute_recovery_score(graph, &nodes);

        RecoverableBranch {
            branch_id: None,
            fork_point,
            entry_node,
            nodes,
            head,
            depth,
            lost_reason: LostReason::Untracked,
            recovery_score: score,
            suggested_strategy: RecoveryStrategy::SplitIndependent,
        }
    }

    fn collect_subtree(&self, graph: &TrajectoryGraph, root: NodeId) -> Vec<NodeId> {
        let mut nodes = Vec::new();
        let mut stack = vec![root];
        let mut visited = HashSet::new();

        while let Some(node_id) = stack.pop() {
            if visited.contains(&node_id) {
                continue;
            }
            visited.insert(node_id);
            nodes.push(node_id);

            if let Some(episode) = graph.get_node(node_id) {
                for &child in &episode.children {
                    stack.push(child);
                }
            }
        }

        nodes
    }

    fn compute_depth(&self, graph: &TrajectoryGraph, node_id: NodeId) -> u32 {
        graph.depth(node_id).unwrap_or(0) as u32
    }

    fn find_deepest_leaf(&self, graph: &TrajectoryGraph, nodes: &[NodeId]) -> NodeId {
        nodes.iter()
            .filter(|&&n| graph.get_node(n).map_or(false, |e| e.is_leaf()))
            .max_by_key(|&&n| self.compute_depth(graph, n))
            .copied()
            .unwrap_or_else(|| nodes.first().copied().unwrap_or(0))
    }

    fn is_descendant_of(&self, graph: &TrajectoryGraph, node: NodeId, ancestor: NodeId) -> bool {
        if node == ancestor {
            return true;
        }

        // BFS from ancestor to find node
        let mut queue = VecDeque::new();
        queue.push_back(ancestor);
        let mut visited = HashSet::new();

        while let Some(current) = queue.pop_front() {
            if current == node {
                return true;
            }
            if visited.contains(&current) {
                continue;
            }
            visited.insert(current);

            if let Some(episode) = graph.get_node(current) {
                for &child in &episode.children {
                    queue.push_back(child);
                }
            }
        }

        false
    }

    /// Compute a score indicating how valuable a branch might be.
    ///
    /// Higher scores indicate more valuable branches to recover.
    /// Factors:
    /// - Length (more nodes = more content)
    /// - Depth (deeper = more exploration)
    /// - Content richness (based on episode metadata)
    fn compute_recovery_score(&self, graph: &TrajectoryGraph, nodes: &[NodeId]) -> f32 {
        let length_factor = (nodes.len() as f32).ln_1p();

        let max_depth = nodes.iter()
            .map(|&n| self.compute_depth(graph, n))
            .max()
            .unwrap_or(0);
        let depth_factor = (max_depth as f32).sqrt();

        // Content richness: sum of content lengths
        let content_factor: f32 = nodes.iter()
            .filter_map(|&n| graph.get_node(n))
            .map(|e| (e.content_length as f32).ln_1p())
            .sum::<f32>()
            / nodes.len().max(1) as f32;

        // Check for positive feedback
        let feedback_factor: f32 = nodes.iter()
            .filter_map(|&n| graph.get_node(n))
            .filter(|e| e.has_thumbs_up)
            .count() as f32;

        // Combine factors
        0.3 * length_factor + 0.3 * depth_factor + 0.2 * content_factor + 0.2 * feedback_factor
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::trajectory::graph::{Edge, EdgeType};

    fn make_branching_graph() -> TrajectoryGraph {
        // 1 -> 2 -> 3
        //        -> 4 (regeneration - potentially lost)
        //   -> 5 (branch - potentially lost)
        let edges = vec![
            Edge { parent: 1, child: 2, edge_type: EdgeType::Continuation },
            Edge { parent: 2, child: 3, edge_type: EdgeType::Regeneration },
            Edge { parent: 2, child: 4, edge_type: EdgeType::Regeneration },
            Edge { parent: 1, child: 5, edge_type: EdgeType::Branch },
        ];
        TrajectoryGraph::from_edges(edges.into_iter())
    }

    #[test]
    fn test_resolver_creation() {
        let graph = make_branching_graph();
        let machine = BranchStateMachine::from_graph(graph);
        let resolver = BranchResolver::new(&machine);

        // Should be able to find recoverable branches
        let recoverable = resolver.find_recoverable_branches();
        // The exact number depends on how branches were initialized
        assert!(recoverable.len() >= 0);
    }

    #[test]
    fn test_recovery_score() {
        let graph = make_branching_graph();
        let machine = BranchStateMachine::from_graph(graph.clone());
        let resolver = BranchResolver::new(&machine);

        // Compute score for a set of nodes
        let nodes = vec![1, 2, 3];
        let score = resolver.compute_recovery_score(&graph, &nodes);

        // Score should be positive
        assert!(score >= 0.0);
    }

    #[test]
    fn test_collect_subtree() {
        let graph = make_branching_graph();
        let machine = BranchStateMachine::from_graph(graph.clone());
        let resolver = BranchResolver::new(&machine);

        // Collect subtree from node 2
        let subtree = resolver.collect_subtree(&graph, 2);

        // Should include node 2 and its children (3 and 4)
        assert!(subtree.contains(&2));
        assert!(subtree.contains(&3));
        assert!(subtree.contains(&4));
    }

    #[test]
    fn test_is_descendant() {
        let graph = make_branching_graph();
        let machine = BranchStateMachine::from_graph(graph.clone());
        let resolver = BranchResolver::new(&machine);

        // Node 3 is a descendant of node 1
        assert!(resolver.is_descendant_of(&graph, 3, 1));
        assert!(resolver.is_descendant_of(&graph, 3, 2));

        // Node 1 is not a descendant of node 3
        assert!(!resolver.is_descendant_of(&graph, 1, 3));
    }

    #[test]
    fn test_recovery_strategy() {
        let graph = make_branching_graph();
        let machine = BranchStateMachine::from_graph(graph);
        let resolver = BranchResolver::new(&machine);

        let recoverable = resolver.find_recoverable_branches();

        // Each recoverable branch should have a suggested strategy
        for branch in recoverable {
            match branch.suggested_strategy {
                RecoveryStrategy::Reactivate |
                RecoveryStrategy::Copy |
                RecoveryStrategy::SplitIndependent |
                RecoveryStrategy::MergeInto(_) => {
                    // Valid strategy
                }
            }
        }
    }
}