gobby-wiki 0.7.0

Gobby wiki CLI shell
use std::collections::{BTreeSet, HashMap};
use std::path::Path;

use gobby_core::config::{CODE_GRAPH_NAME, FalkorConfig};
use gobby_core::falkor::GraphClient;

use crate::graph::{WikiGraphCodeEdge, WikiGraphDocument};
use crate::search::SearchScope;
use crate::support::config::SharedCodeGraphLimits;

use super::query::{optional_row_string, optional_row_usize};
use super::{
    CODE_CALL_EDGE_TRUNCATION_COMPONENT, CODE_GRAPH_PROVENANCE,
    CODE_IMPORT_EDGE_TRUNCATION_COMPONENT, CODE_TOTAL_EDGE_TRUNCATION_COMPONENT,
    MAX_TOTAL_CODE_EDGES, SharedCodeGraphEdges, SharedCodeGraphTruncation,
};

struct LimitedCodeGraphEdges {
    edges: Vec<WikiGraphCodeEdge>,
    truncated: bool,
}

pub(crate) fn load_code_graph_edges(
    config: &FalkorConfig,
    project_id: &str,
    documents: &[WikiGraphDocument],
    limits: SharedCodeGraphLimits,
) -> anyhow::Result<SharedCodeGraphEdges> {
    let mut client = GraphClient::from_config(config, CODE_GRAPH_NAME)?;
    let code_documents = documents
        .iter()
        .filter_map(|document| {
            code_doc_source_path(&document.path)
                .map(|file_path| (document.scope.clone(), document.path.clone(), file_path))
        })
        .collect::<Vec<_>>();
    let mut edges = Vec::new();
    let mut truncated_components = BTreeSet::new();
    let mut remaining_edges = MAX_TOTAL_CODE_EDGES;
    for (scope, document_path, file_path) in code_documents {
        let Some(call_limit) = remaining_code_edge_limit(limits.call_edge_limit, remaining_edges)
        else {
            truncated_components.insert(truncation_component(
                CODE_TOTAL_EDGE_TRUNCATION_COMPONENT,
                MAX_TOTAL_CODE_EDGES,
            ));
            break;
        };
        let call_edges = code_call_edges(
            &mut client,
            project_id,
            &scope,
            &document_path,
            &file_path,
            call_limit,
        )?;
        if call_edges.truncated {
            record_code_edge_truncation(
                &mut truncated_components,
                CODE_CALL_EDGE_TRUNCATION_COMPONENT,
                limits.call_edge_limit,
                call_limit,
            );
        }
        remaining_edges = remaining_edges.saturating_sub(call_edges.edges.len());
        edges.extend(call_edges.edges);
        if remaining_edges == 0 {
            truncated_components.insert(truncation_component(
                CODE_TOTAL_EDGE_TRUNCATION_COMPONENT,
                MAX_TOTAL_CODE_EDGES,
            ));
            break;
        }

        let Some(import_limit) =
            remaining_code_edge_limit(limits.import_edge_limit, remaining_edges)
        else {
            truncated_components.insert(truncation_component(
                CODE_TOTAL_EDGE_TRUNCATION_COMPONENT,
                MAX_TOTAL_CODE_EDGES,
            ));
            break;
        };
        let import_edges = code_import_edges(
            &mut client,
            project_id,
            &scope,
            &document_path,
            &file_path,
            import_limit,
        )?;
        if import_edges.truncated {
            record_code_edge_truncation(
                &mut truncated_components,
                CODE_IMPORT_EDGE_TRUNCATION_COMPONENT,
                limits.import_edge_limit,
                import_limit,
            );
        }
        remaining_edges = remaining_edges.saturating_sub(import_edges.edges.len());
        edges.extend(import_edges.edges);
        if remaining_edges == 0 {
            truncated_components.insert(truncation_component(
                CODE_TOTAL_EDGE_TRUNCATION_COMPONENT,
                MAX_TOTAL_CODE_EDGES,
            ));
            break;
        }
    }
    Ok(SharedCodeGraphEdges {
        edges,
        truncation: SharedCodeGraphTruncation::from_components(truncated_components),
    })
}

fn code_call_edges(
    client: &mut GraphClient,
    project_id: &str,
    scope: &SearchScope,
    document_path: &Path,
    file_path: &str,
    limit: usize,
) -> anyhow::Result<LimitedCodeGraphEdges> {
    let query = code_call_edges_query();
    let mut rows = client.query(
        query,
        Some(code_edge_query_params(project_id, file_path, limit)?),
    )?;
    let truncated = truncate_to_limit(&mut rows, limit);
    Ok(LimitedCodeGraphEdges {
        edges: rows
            .into_iter()
            .map(|row| {
                let source_file = optional_row_string(&row, "source_file_path")
                    .unwrap_or_else(|| file_path.to_string());
                let source_name = optional_row_string(&row, "source_name")
                    .unwrap_or_else(|| "unknown".to_string());
                let target_file = optional_row_string(&row, "target_file_path")
                    .unwrap_or_else(|| "external".to_string());
                let target_name = optional_row_string(&row, "target_name")
                    .unwrap_or_else(|| "unknown".to_string());
                let incoming = target_file == file_path && source_file != file_path;
                WikiGraphCodeEdge {
                    scope: scope.clone(),
                    document_path: document_path.to_path_buf(),
                    source: code_endpoint(&source_file, &source_name),
                    target: code_endpoint(&target_file, &target_name),
                    kind: if incoming { "callers" } else { "calls" }.to_string(),
                    direction: if incoming { "incoming" } else { "outgoing" }.to_string(),
                    line: optional_row_usize(&row, "line"),
                    provenance: CODE_GRAPH_PROVENANCE.to_string(),
                }
            })
            .collect(),
        truncated,
    })
}

pub(super) fn code_call_edges_query() -> &'static str {
    "\
        MATCH (source:CodeSymbol {project: $project})-[r:CALLS]->(target {project: $project}) \
        WHERE source.file_path = $path OR (target:CodeSymbol AND target.file_path = $path) \
        RETURN source.file_path AS source_file_path, source.name AS source_name, \
               target.file_path AS target_file_path, target.name AS target_name, r.line AS line \
        ORDER BY source.file_path, source.name, target.file_path, target.name, r.line \
        LIMIT $limit"
}

fn code_import_edges(
    client: &mut GraphClient,
    project_id: &str,
    scope: &SearchScope,
    document_path: &Path,
    file_path: &str,
    limit: usize,
) -> anyhow::Result<LimitedCodeGraphEdges> {
    let query = code_import_edges_query();
    let mut rows = client.query(
        query,
        Some(code_edge_query_params(project_id, file_path, limit)?),
    )?;
    let truncated = truncate_to_limit(&mut rows, limit);
    Ok(LimitedCodeGraphEdges {
        edges: rows
            .into_iter()
            .map(|row| {
                let source_file = optional_row_string(&row, "source_file_path")
                    .unwrap_or_else(|| file_path.to_string());
                let target_name = optional_row_string(&row, "target_name")
                    .unwrap_or_else(|| "unknown".to_string());
                WikiGraphCodeEdge {
                    scope: scope.clone(),
                    document_path: document_path.to_path_buf(),
                    source: source_file,
                    target: target_name,
                    kind: "imports".to_string(),
                    direction: "outgoing".to_string(),
                    line: None,
                    provenance: CODE_GRAPH_PROVENANCE.to_string(),
                }
            })
            .collect(),
        truncated,
    })
}

pub(super) fn code_import_edges_query() -> &'static str {
    "\
        MATCH (file:CodeFile {path: $path, project: $project})-[r:IMPORTS]->(module:CodeModule {project: $project}) \
        RETURN file.path AS source_file_path, module.name AS target_name \
        ORDER BY file.path, module.name \
        LIMIT $limit"
}

pub(super) fn code_edge_query_params(
    project_id: &str,
    file_path: &str,
    limit: usize,
) -> anyhow::Result<HashMap<String, String>> {
    Ok(HashMap::from([
        (
            "project".to_string(),
            gobby_core::falkor::escape_string(project_id),
        ),
        (
            "path".to_string(),
            gobby_core::falkor::escape_string(file_path),
        ),
        ("limit".to_string(), sentinel_limit(limit)?.to_string()),
    ]))
}

fn sentinel_limit(limit: usize) -> anyhow::Result<usize> {
    limit
        .checked_add(1)
        .ok_or_else(|| anyhow::anyhow!("shared code graph edge limit is too large: {limit}"))
}

pub(super) fn truncate_to_limit<T>(rows: &mut Vec<T>, limit: usize) -> bool {
    let truncated = rows.len() > limit;
    if truncated {
        rows.truncate(limit);
    }
    truncated
}

pub(super) fn remaining_code_edge_limit(
    configured_limit: usize,
    remaining_edges: usize,
) -> Option<usize> {
    (remaining_edges > 0).then_some(configured_limit.min(remaining_edges))
}

pub(super) fn record_code_edge_truncation(
    components: &mut BTreeSet<String>,
    component: &str,
    configured_limit: usize,
    query_limit: usize,
) {
    let (component, limit) = if query_limit < configured_limit {
        (CODE_TOTAL_EDGE_TRUNCATION_COMPONENT, MAX_TOTAL_CODE_EDGES)
    } else {
        (component, configured_limit)
    };
    components.insert(truncation_component(component, limit));
}

pub(super) fn truncation_component(component: &str, limit: usize) -> String {
    format!("{component}>{limit}")
}

pub(super) fn code_doc_source_path(path: &Path) -> Option<String> {
    normalize_graph_path(path)
        .strip_prefix("code/files/")
        .and_then(|path| path.strip_suffix(".md"))
        .map(str::to_string)
}

fn code_endpoint(file_path: &str, symbol: &str) -> String {
    if symbol.is_empty() {
        file_path.to_string()
    } else {
        format!("{file_path}:{symbol}")
    }
}

fn normalize_graph_path(path: &Path) -> String {
    path.to_string_lossy().replace('\\', "/")
}