gobby-wiki 0.7.0

Gobby wiki CLI shell
use std::collections::BTreeMap;

use gobby_core::graph_analytics::{
    AnalyticsEdge, AnalyticsGraph, AnalyticsNode, CentralityScore, Community, EdgeRef,
    GraphAnalytics, Hotspot, NodeRef, analyze, weight_for_kind,
};

use super::{
    MemoryWikiGraph, WikiGraphFacts, WikiGraphLinkTarget, citation_node, document_id,
    document_kind, source_node_id, unresolved_target_id,
};

#[derive(Debug, Clone, PartialEq)]
pub enum GraphAnalyticsError {
    DuplicateNode {
        id: String,
        existing_kind: String,
        duplicate_kind: String,
        existing_weight: f64,
        duplicate_weight: f64,
    },
}

impl std::fmt::Display for GraphAnalyticsError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            Self::DuplicateNode {
                id,
                existing_kind,
                duplicate_kind,
                existing_weight,
                duplicate_weight,
            } => write!(
                f,
                "duplicate graph node `{id}` has conflicting metadata: existing kind `{existing_kind}` weight {existing_weight}, duplicate kind `{duplicate_kind}` weight {duplicate_weight}"
            ),
        }
    }
}

impl std::error::Error for GraphAnalyticsError {}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct GraphExportAnalytics {
    pub communities: Vec<GraphExportCommunity>,
    pub centrality: Vec<GraphExportCentrality>,
    pub bridges: Vec<GraphExportNodeRef>,
    pub god_nodes: Vec<GraphExportNodeRef>,
    pub unexpected_links: Vec<GraphExportEdgeRef>,
    pub hotspots: Vec<GraphExportHotspot>,
}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct GraphExportCommunity {
    pub id: String,
    pub nodes: Vec<GraphExportNodeRef>,
    pub weight: f64,
}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct GraphExportCentrality {
    pub node: GraphExportNodeRef,
    pub degree: usize,
    pub score: f64,
}

#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct GraphExportNodeRef {
    pub id: String,
    pub kind: String,
}

#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize)]
pub struct GraphExportEdgeRef {
    pub source: String,
    pub target: String,
    pub kind: String,
}

#[derive(Debug, Clone, PartialEq, serde::Serialize)]
pub struct GraphExportHotspot {
    pub node: GraphExportNodeRef,
    pub frequency: usize,
    pub weight: f64,
}

pub fn analyze_facts(facts: &WikiGraphFacts) -> Result<GraphExportAnalytics, GraphAnalyticsError> {
    Ok(GraphExportAnalytics::from_core(analyze(
        &analytics_graph_from_facts(facts)?,
    )))
}

#[allow(dead_code, reason = "reserved gwiki CLI/API split")]
pub fn analytics_graph_from_memory(
    graph: &MemoryWikiGraph,
) -> Result<AnalyticsGraph, GraphAnalyticsError> {
    analytics_graph_from_facts(&graph.facts)
}

pub fn analytics_graph_from_facts(
    facts: &WikiGraphFacts,
) -> Result<AnalyticsGraph, GraphAnalyticsError> {
    let mut nodes = BTreeMap::new();
    let mut edges = Vec::new();

    for document in &facts.documents {
        insert_node(
            &mut nodes,
            document_id(&document.scope, &document.path),
            document_kind(&document.path),
            1.0,
        )?;
    }

    for source in &facts.sources {
        let source_id = source_node_id(&source.scope, &source.source_path);
        insert_node(&mut nodes, source_id.clone(), "source", 0.5)?;

        let citation_id = citation_node(source).id;
        insert_node(&mut nodes, citation_id.clone(), "citation", 0.25)?;

        edges.push(AnalyticsEdge {
            source: source_id,
            target: document_id(&source.scope, &source.document_path),
            kind: "supports".to_string(),
            weight: weight_for_kind("supports"),
        });
        edges.push(AnalyticsEdge {
            source: citation_id,
            target: source_node_id(&source.scope, &source.source_path),
            kind: "cites".to_string(),
            weight: weight_for_kind("cites"),
        });
    }

    for link in &facts.links {
        let target = match &link.target {
            WikiGraphLinkTarget::Resolved(path) => {
                let node_id = document_id(&link.scope, path);
                insert_node(&mut nodes, node_id.clone(), document_kind(path), 1.0)?;
                node_id
            }
            WikiGraphLinkTarget::Unresolved(target) => {
                let node_id = unresolved_target_id(&link.scope, target);
                insert_node(&mut nodes, node_id.clone(), "unresolved_target", 0.25)?;
                node_id
            }
        };
        edges.push(AnalyticsEdge {
            source: document_id(&link.scope, &link.source_path),
            target,
            kind: "links".to_string(),
            weight: weight_for_kind("links"),
        });
    }

    Ok(AnalyticsGraph {
        nodes: nodes.into_values().collect(),
        edges,
    })
}

fn insert_node(
    nodes: &mut BTreeMap<String, AnalyticsNode>,
    id: String,
    kind: impl Into<String>,
    weight: f64,
) -> Result<(), GraphAnalyticsError> {
    let kind = kind.into();
    if let Some(existing) = nodes.get(&id) {
        if existing.kind != kind || existing.weight != weight {
            return Err(GraphAnalyticsError::DuplicateNode {
                id,
                existing_kind: existing.kind.clone(),
                duplicate_kind: kind,
                existing_weight: existing.weight,
                duplicate_weight: weight,
            });
        }
        return Ok(());
    }
    nodes.insert(id.clone(), AnalyticsNode { id, kind, weight });
    Ok(())
}

impl GraphExportAnalytics {
    fn from_core(analytics: GraphAnalytics) -> Self {
        Self {
            communities: analytics
                .communities
                .into_iter()
                .map(GraphExportCommunity::from_core)
                .collect(),
            centrality: analytics
                .centrality
                .into_iter()
                .map(GraphExportCentrality::from_core)
                .collect(),
            bridges: analytics
                .bridges
                .into_iter()
                .map(GraphExportNodeRef::from)
                .collect(),
            god_nodes: analytics
                .god_nodes
                .into_iter()
                .map(GraphExportNodeRef::from)
                .collect(),
            unexpected_links: analytics
                .unexpected_links
                .into_iter()
                .map(GraphExportEdgeRef::from)
                .collect(),
            hotspots: analytics
                .hotspots
                .into_iter()
                .map(GraphExportHotspot::from_core)
                .collect(),
        }
    }
}

impl GraphExportCommunity {
    fn from_core(community: Community) -> Self {
        Self {
            id: community.id,
            nodes: community
                .nodes
                .into_iter()
                .map(GraphExportNodeRef::from)
                .collect(),
            weight: community.weight,
        }
    }
}

impl GraphExportCentrality {
    fn from_core(score: CentralityScore) -> Self {
        Self {
            node: GraphExportNodeRef::from(score.node),
            degree: score.degree,
            score: score.score,
        }
    }
}

impl GraphExportHotspot {
    fn from_core(hotspot: Hotspot) -> Self {
        Self {
            node: GraphExportNodeRef::from(hotspot.node),
            frequency: hotspot.frequency,
            weight: hotspot.weight,
        }
    }
}

impl From<NodeRef> for GraphExportNodeRef {
    fn from(node: NodeRef) -> Self {
        Self {
            id: node.id,
            kind: node.kind,
        }
    }
}

impl From<EdgeRef> for GraphExportEdgeRef {
    fn from(edge: EdgeRef) -> Self {
        Self {
            source: edge.source,
            target: edge.target,
            kind: edge.kind,
        }
    }
}

#[cfg(test)]
mod tests {
    use std::path::PathBuf;

    use crate::graph::{
        MemoryWikiGraph, WikiGraphDocument, WikiGraphFacts, WikiGraphLink, WikiGraphLinkTarget,
    };
    use crate::search::SearchScope;

    use super::*;

    #[test]
    fn graph_analytics_converts_memory_graph_to_core_graph() {
        let scope = SearchScope::project("project-1");
        let mut graph = MemoryWikiGraph::default();
        graph.replace_facts(WikiGraphFacts {
            documents: vec![
                WikiGraphDocument {
                    scope: scope.clone(),
                    path: "knowledge/topics/a.md".into(),
                    title: None,
                },
                WikiGraphDocument {
                    scope: scope.clone(),
                    path: "knowledge/topics/b.md".into(),
                    title: None,
                },
            ],
            links: vec![WikiGraphLink {
                scope,
                source_path: "knowledge/topics/a.md".into(),
                raw_target: "B".to_string(),
                target: WikiGraphLinkTarget::Resolved("knowledge/topics/b.md".into()),
            }],
            sources: Vec::new(),
            code_edges: Vec::new(),
        });

        let analytics_graph = analytics_graph_from_memory(&graph).expect("analytics graph");

        assert_eq!(analytics_graph.nodes.len(), 2);
        assert_eq!(analytics_graph.edges.len(), 1);
        assert_eq!(analytics_graph.edges[0].kind, "links");
    }

    #[test]
    fn graph_analytics_adds_placeholder_for_missing_resolved_target() {
        let scope = SearchScope::project("project-1");
        let mut graph = MemoryWikiGraph::default();
        graph.replace_facts(WikiGraphFacts {
            documents: vec![WikiGraphDocument {
                scope: scope.clone(),
                path: "knowledge/topics/a.md".into(),
                title: None,
            }],
            links: vec![WikiGraphLink {
                scope: scope.clone(),
                source_path: "knowledge/topics/a.md".into(),
                raw_target: "B".to_string(),
                target: WikiGraphLinkTarget::Resolved("knowledge/topics/b.md".into()),
            }],
            sources: Vec::new(),
            code_edges: Vec::new(),
        });

        let analytics_graph = analytics_graph_from_memory(&graph).expect("analytics graph");
        let target_id = document_id(&scope, &PathBuf::from("knowledge/topics/b.md"));

        assert!(analytics_graph.nodes.iter().any(|node| {
            node.id == target_id && node.kind == "wiki_page" && node.weight == 1.0
        }));
    }

    #[test]
    fn graph_analytics_rejects_duplicate_node_metadata() {
        let mut nodes = BTreeMap::new();
        insert_node(&mut nodes, "node-1".to_string(), "topic", 1.0).expect("first insert");

        let error = insert_node(&mut nodes, "node-1".to_string(), "source", 0.5)
            .expect_err("duplicate node must fail");

        assert!(matches!(
            error,
            GraphAnalyticsError::DuplicateNode {
                existing_kind,
                duplicate_kind,
                ..
            } if existing_kind == "topic" && duplicate_kind == "source"
        ));
    }
}