paceflow 0.2.1 - Docs.rs

use anyhow::Result;
use chrono::TimeZone;
use rusqlite::{Connection, OpenFlags, OptionalExtension, TransactionBehavior, params};
use serde_json::Value;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use std::time::UNIX_EPOCH;

use crate::change_intel::line_hash::{diff_with_hashes, hash_line};
use crate::change_intel::path_resolver::{detect_repo_root, to_rel_path};
use crate::change_intel::pipeline::ProviderCodeChangeSummary;
use crate::change_intel::storage;
use crate::change_intel::types::{
    ChangeOpCandidate, LineHashCount, LineSide, ParseError, SessionInfo, WriteMode,
};
use crate::cursor_paths::cursor_state_path;
use crate::ingest_progress::IngestProgressObserver;
use crate::path_utils::{normalize_filesystem_path, path_to_string};
use crate::providers::cursor::shared::{
    CursorSessionGraph, load_cursor_session_graphs_with_observer, resolve_tool_call_edits,
};

const CURSOR_CURSOR_NAMESPACE: &str = "cursor_core_v1";
const INLINE_PARSER_NAME: &str = "cursor_inline_undo_v1";
const PARTIAL_PARSER_NAME: &str = "cursor_partial_fates_v1";
const NEW_SCHEMA_PARSER_NAME: &str = "cursor_new_schema_partial_fates_v1";
const LEGACY_DIFF_PARSER_NAME: &str = "cursor_legacy_code_block_diff_v1";
const LEGACY_CONTENT_PARSER_NAME: &str = "cursor_legacy_code_block_content_v1";
const LEGACY_TOTALS_PARSER_NAME: &str = "cursor_legacy_session_totals_v1";
const REPO_CONTENT_MAX_FILE_BYTES: u64 = 512 * 1024;
const REPO_CONTENT_SKIP_DIRS: &[&str] = &[
    ".git",
    "node_modules",
    "target",
    "dist",
    "build",
    ".next",
    ".venv",
    "coverage",
];

/// `(call_id, timestamp, [(partial_id, payload)])` accumulated per abs_path
/// when grouping pathless partial fates that resolve to the same file.
type NewPartialGroup = (String, Option<String>, Vec<(String, Value)>);

/// Ranking tuple returned by `score_new_schema_inline_hint`. Ordered
/// lexicographically so higher values win in a `max_by_key` / sort.
type InlineHintScore = (usize, usize, usize, usize, usize);

#[derive(Debug, Clone)]
struct CandidateSession {
    info: SessionInfo,
    checkpoint_paths: HashSet<String>,
    strong_path_hints: HashSet<String>,
    weak_path_hints: HashSet<String>,
    original_state_contents: HashMap<String, String>,
    total_lines_added: Option<i64>,
    total_lines_removed: Option<i64>,
    partial_targets: Vec<PartialTarget>,
    legacy_targets: Vec<LegacyTarget>,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
struct PartialTarget {
    partial_id: String,
    abs_path: String,
}

#[derive(Debug, Clone)]
struct NewInlineHint {
    call_id: String,
    abs_path: String,
    timestamp: Option<String>,
    original_text_lines: Vec<String>,
}

#[derive(Debug, Clone)]
struct ResolvedNewSchemaPartial {
    abs_path: String,
    call_id: String,
    timestamp: Option<String>,
}

#[derive(Debug, Clone, Default)]
struct RepoContentIndex {
    line_hash_hits: HashMap<String, Vec<(String, i64)>>,
}

#[derive(Debug, Clone, Default, PartialEq, Eq)]
struct ContentMatchScore {
    abs_path: String,
    historical_removed_overlap: usize,
    live_added_overlap: usize,
    live_removed_overlap: usize,
    checkpoint_match: usize,
    strong_hint_match: usize,
    weak_hint_match: usize,
    basename_match: usize,
    path_proximity: usize,
}

impl ContentMatchScore {
    fn ranking_tuple(&self) -> (usize, usize, usize, usize, usize, usize, usize, usize) {
        (
            self.historical_removed_overlap,
            self.live_added_overlap,
            self.live_removed_overlap,
            self.checkpoint_match,
            self.strong_hint_match,
            self.weak_hint_match,
            self.basename_match,
            self.path_proximity,
        )
    }

    fn primary_score(&self) -> usize {
        self.historical_removed_overlap * 100
            + self.live_added_overlap * 10
            + self.live_removed_overlap
    }

    fn secondary_score(&self) -> usize {
        self.checkpoint_match * 100
            + self.strong_hint_match * 10
            + self.weak_hint_match * 5
            + self.basename_match * 2
            + self.path_proximity
    }
}

#[derive(Debug, Clone)]
enum ContentResolverDecision {
    Resolved(ResolvedNewSchemaPartial),
    Unresolved { candidate_count: usize },
    NoSignal,
}

#[derive(Debug, Clone)]
struct LegacyDeferredParseError {
    call_id: String,
    timestamp: Option<String>,
    parser_name: &'static str,
    error: String,
}

#[derive(Debug, Clone)]
struct LegacyTarget {
    diff_id: Option<String>,
    abs_path: String,
    version: i32,
    code_block_idx: i32,
    timestamp: Option<String>,
    content: Option<String>,
    bubble_id: Option<String>,
}

impl LegacyTarget {
    fn call_id(&self) -> String {
        self.diff_id
            .clone()
            .or_else(|| self.bubble_id.clone())
            .unwrap_or_else(|| {
                format!(
                    "legacy-content:{}:{}:{}",
                    self.abs_path, self.version, self.code_block_idx
                )
            })
    }

    fn op_index(&self) -> i32 {
        self.version
            .saturating_mul(1000)
            .saturating_add(self.code_block_idx.max(0))
    }
}

#[allow(dead_code)]
pub fn ingest_cursor_code_changes(
    conn: &mut Connection,
    verbose: bool,
) -> Result<ProviderCodeChangeSummary> {
    let Some(vscdb_path) = cursor_vscdb_path()? else {
        return Ok(ProviderCodeChangeSummary {
            provider: "cursor".to_string(),
            ..ProviderCodeChangeSummary::default()
        });
    };

    ingest_cursor_code_changes_from_path(conn, &vscdb_path, verbose)
}

pub fn ingest_cursor_code_changes_from_sources(
    conn: &mut Connection,
    sources: &[PathBuf],
    verbose: bool,
    mut progress: Option<&mut dyn IngestProgressObserver>,
) -> Result<ProviderCodeChangeSummary> {
    let mut combined = ProviderCodeChangeSummary {
        provider: "cursor".to_string(),
        ..ProviderCodeChangeSummary::default()
    };

    for source in sources {
        let summary = match progress.as_mut() {
            Some(observer) => ingest_cursor_code_changes_from_path_with_progress(
                conn,
                source,
                verbose,
                Some(&mut **observer),
            )?,
            None => {
                ingest_cursor_code_changes_from_path_with_progress(conn, source, verbose, None)?
            }
        };
        combined.sources_discovered += summary.sources_discovered;
        combined.sources_skipped += summary.sources_skipped;
        combined.tool_calls_inspected += summary.tool_calls_inspected;
        combined.ops_upserted += summary.ops_upserted;
        combined.parse_errors += summary.parse_errors;
        combined.legacy_sessions_considered += summary.legacy_sessions_considered;
        combined.legacy_entries_inspected += summary.legacy_entries_inspected;
        combined.legacy_diff_rows_found += summary.legacy_diff_rows_found;
        combined.legacy_ops_upserted += summary.legacy_ops_upserted;
        combined.legacy_parse_errors += summary.legacy_parse_errors;
    }

    Ok(combined)
}

pub(crate) fn ingest_cursor_code_changes_from_path(
    conn: &mut Connection,
    vscdb_path: &Path,
    verbose: bool,
) -> Result<ProviderCodeChangeSummary> {
    ingest_cursor_code_changes_from_path_with_progress(conn, vscdb_path, verbose, None)
}

fn ingest_cursor_code_changes_from_path_with_progress(
    conn: &mut Connection,
    vscdb_path: &Path,
    verbose: bool,
    mut progress: Option<&mut dyn IngestProgressObserver>,
) -> Result<ProviderCodeChangeSummary> {
    let mut summary = ProviderCodeChangeSummary {
        provider: "cursor".to_string(),
        sources_discovered: 1,
        ..ProviderCodeChangeSummary::default()
    };

    let source_file = vscdb_path.to_string_lossy().to_string();
    let sig = file_signature(vscdb_path)?;

    if let Some((mtime, size)) = sig {
        let cursor = storage::get_ingest_cursor(conn, CURSOR_CURSOR_NAMESPACE, &source_file)?;
        if let Some(cursor) = cursor
            && cursor.file_mtime == mtime
            && cursor.file_size == size
        {
            summary.sources_skipped += 1;
            if let Some(observer) = progress.as_mut() {
                observer.advance("cursor changes (cached)");
            }
            return Ok(summary);
        }
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("opening vscdb");
    }
    let vscdb = Connection::open_with_flags(
        vscdb_path,
        OpenFlags::SQLITE_OPEN_READ_ONLY | OpenFlags::SQLITE_OPEN_NO_MUTEX,
    )?;
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor vscdb opened");
    }
    let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("session graphs");
    }
    let graphs = match progress.as_mut() {
        Some(obs) => {
            load_cursor_session_graphs_with_observer(&vscdb, &source_file, Some(&mut **obs))?
        }
        None => load_cursor_session_graphs_with_observer(&vscdb, &source_file, None)?,
    };
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor session graphs");
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("candidate sessions");
    }
    let graph_by_session: HashMap<String, CursorSessionGraph> = graphs
        .into_iter()
        .map(|graph| (graph.composer_id.clone(), graph))
        .collect();
    let sessions = collect_candidate_sessions_from_graphs(&graph_by_session);
    for session in sessions.values() {
        storage::upsert_change_session(&tx, &session.info)?;
    }
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor candidate sessions");
    }

    let mut seen_paths_by_session: HashMap<String, HashSet<String>> = HashMap::new();
    let mut ops = Vec::new();

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("tool writes");
    }
    ingest_tool_call_writes(
        &sessions,
        &graph_by_session,
        &mut seen_paths_by_session,
        &mut ops,
        &mut summary,
    );
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor tool writes");
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("inline undo rows");
    }
    ingest_inline_undo_rows(
        &tx,
        &source_file,
        &sessions,
        &graph_by_session,
        &mut seen_paths_by_session,
        &mut ops,
        &mut summary,
        verbose,
    )?;
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor inline undo rows");
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("partial fates (old)");
    }
    ingest_partial_fates_fallback(
        &tx,
        &vscdb,
        &source_file,
        &sessions,
        &mut seen_paths_by_session,
        &mut ops,
        &mut summary,
        verbose,
    )?;
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor partial fates (old)");
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("partial fates (new)");
    }
    ingest_new_schema_partial_fates(
        &tx,
        &source_file,
        &sessions,
        &graph_by_session,
        &mut seen_paths_by_session,
        &mut ops,
        &mut summary,
        verbose,
    )?;
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor partial fates (new)");
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("legacy code blocks");
    }
    ingest_legacy_code_block_fallback(
        &tx,
        &graph_by_session,
        &source_file,
        &sessions,
        &mut seen_paths_by_session,
        &mut ops,
        &mut summary,
        verbose,
    )?;
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor legacy code blocks");
    }

    if let Some(observer) = progress.as_mut() {
        observer.set_phase("reconciling + commit");
    }
    let reconcile = storage::reconcile_source_tool_writes(&tx, "cursor", &source_file, &ops)?;
    summary.ops_upserted += reconcile.ops_upserted;

    if let Some((mtime, size)) = sig {
        storage::upsert_ingest_cursor(&tx, CURSOR_CURSOR_NAMESPACE, &source_file, mtime, size)?;
    }

    tx.commit()?;
    if let Some(observer) = progress.as_mut() {
        observer.advance("cursor committed");
    }

    Ok(summary)
}

fn collect_candidate_sessions_from_graphs(
    graphs: &HashMap<String, CursorSessionGraph>,
) -> HashMap<String, CandidateSession> {
    let mut sessions = HashMap::new();

    for (session_id, graph) in graphs {
        if !graph.is_candidate_edit_session() {
            continue;
        }

        let original_state_contents = graph
            .original_file_states
            .iter()
            .filter_map(|(path, state)| {
                state
                    .content
                    .as_ref()
                    .map(|content| (path.clone(), content.clone()))
            })
            .collect();

        sessions.insert(
            session_id.clone(),
            CandidateSession {
                info: SessionInfo {
                    provider: "cursor".to_string(),
                    session_id: session_id.clone(),
                    source_file: graph.source_file.clone(),
                    session_cwd: None,
                    last_seen_at: graph.last_seen_at(),
                },
                checkpoint_paths: graph.checkpoint_paths.clone(),
                strong_path_hints: graph.strong_path_hints.clone(),
                weak_path_hints: graph.weak_path_hints.clone(),
                original_state_contents,
                total_lines_added: graph.total_lines_added,
                total_lines_removed: graph.total_lines_removed,
                partial_targets: graph
                    .partial_targets
                    .iter()
                    .map(|target| PartialTarget {
                        partial_id: target.partial_id.clone(),
                        abs_path: target.abs_path.clone(),
                    })
                    .collect(),
                legacy_targets: graph
                    .legacy_targets
                    .iter()
                    .map(|target| LegacyTarget {
                        diff_id: target.diff_id.clone(),
                        abs_path: target.abs_path.clone(),
                        version: target.version,
                        code_block_idx: target.code_block_idx,
                        timestamp: target.timestamp.clone(),
                        content: target.content.clone(),
                        bubble_id: target.bubble_id.clone(),
                    })
                    .collect(),
            },
        );
    }

    sessions
}

fn ingest_tool_call_writes(
    sessions: &HashMap<String, CandidateSession>,
    graphs: &HashMap<String, CursorSessionGraph>,
    seen_paths_by_session: &mut HashMap<String, HashSet<String>>,
    ops: &mut Vec<ChangeOpCandidate>,
    summary: &mut ProviderCodeChangeSummary,
) {
    for (session_id, graph) in graphs {
        let Some(session) = sessions.get(session_id) else {
            continue;
        };

        let resolved = resolve_tool_call_edits(graph);
        if resolved.is_empty() {
            continue;
        }

        let resolved_paths = seen_paths_by_session.entry(session_id.clone()).or_default();
        for edit in resolved {
            summary.tool_calls_inspected += 1;
            resolved_paths.insert(edit.abs_path.clone());
            ops.push(build_change_op_candidate(
                session,
                edit.call_id,
                edit.op_index,
                edit.timestamp,
                &edit.abs_path,
                edit.write_mode,
                edit.before_known,
                edit.added_lines,
                edit.removed_lines,
                &edit.parser_name,
                edit.line_hashes,
            ));
        }
    }
}

#[expect(
    clippy::too_many_arguments,
    reason = "cursor ingestion threads database handles, mutable accumulators, and source metadata through one helper"
)]
fn ingest_inline_undo_rows(
    conn: &Connection,
    source_file: &str,
    sessions: &HashMap<String, CandidateSession>,
    graphs: &HashMap<String, CursorSessionGraph>,
    seen_paths_by_session: &mut HashMap<String, HashSet<String>>,
    ops: &mut Vec<ChangeOpCandidate>,
    summary: &mut ProviderCodeChangeSummary,
    verbose: bool,
) -> Result<()> {
    for (composer_id, graph) in graphs {
        let Some(session) = sessions.get(composer_id) else {
            continue;
        };

        for row in &graph.inline_undo_rows {
            summary.tool_calls_inspected += 1;

            if !session.checkpoint_paths.is_empty()
                && !session.checkpoint_paths.contains(&row.abs_path)
            {
                continue;
            }

            let op =
                match build_inline_change_op(&row.call_id, &row.payload, &row.abs_path, session) {
                    Ok(op) => op,
                    Err(err) => {
                        insert_parse_error(
                            conn,
                            summary,
                            &session.info.session_id,
                            source_file,
                            &row.call_id,
                            row.timestamp
                                .clone()
                                .or_else(|| session.info.last_seen_at.clone()),
                            INLINE_PARSER_NAME,
                            err,
                            verbose,
                        )?;
                        continue;
                    }
                };

            ops.push(op);
            seen_paths_by_session
                .entry(composer_id.clone())
                .or_default()
                .insert(row.abs_path.clone());
        }
    }

    Ok(())
}

#[expect(
    clippy::too_many_arguments,
    reason = "cursor fallback ingestion needs shared mutable state plus parser context in one place"
)]
fn ingest_partial_fates_fallback(
    conn: &Connection,
    vscdb: &Connection,
    source_file: &str,
    sessions: &HashMap<String, CandidateSession>,
    seen_paths_by_session: &mut HashMap<String, HashSet<String>>,
    ops: &mut Vec<ChangeOpCandidate>,
    summary: &mut ProviderCodeChangeSummary,
    verbose: bool,
) -> Result<()> {
    for (composer_id, session) in sessions {
        let mut seen_partial_keys: HashSet<String> = HashSet::new();

        for target in &session.partial_targets {
            summary.tool_calls_inspected += 1;

            if seen_paths_by_session
                .get(composer_id)
                .is_some_and(|set| set.contains(&target.abs_path))
            {
                continue;
            }

            let partial_key = format!(
                "codeBlockPartialInlineDiffFates:{}:{}",
                composer_id, target.partial_id
            );
            if !seen_partial_keys.insert(partial_key.clone()) {
                continue;
            }

            let raw_payload: Option<String> = vscdb
                .query_row(
                    "SELECT value FROM cursorDiskKV WHERE key = ?1",
                    params![partial_key],
                    |row| row.get(0),
                )
                .optional()?;

            let Some(raw_payload) = raw_payload else {
                insert_parse_error(
                    conn,
                    summary,
                    composer_id,
                    source_file,
                    &partial_key,
                    session.info.last_seen_at.clone(),
                    PARTIAL_PARSER_NAME,
                    "partial fates key not found".to_string(),
                    verbose,
                )?;
                continue;
            };

            let parsed: Value = match serde_json::from_str(&raw_payload) {
                Ok(v) => v,
                Err(e) => {
                    insert_parse_error(
                        conn,
                        summary,
                        composer_id,
                        source_file,
                        &partial_key,
                        session.info.last_seen_at.clone(),
                        PARTIAL_PARSER_NAME,
                        format!("invalid partial fates JSON: {}", e),
                        verbose,
                    )?;
                    continue;
                }
            };

            let op = match build_partial_fates_op(&partial_key, &parsed, &target.abs_path, session)
            {
                Ok(op) => op,
                Err(err) => {
                    insert_parse_error(
                        conn,
                        summary,
                        composer_id,
                        source_file,
                        &partial_key,
                        session.info.last_seen_at.clone(),
                        PARTIAL_PARSER_NAME,
                        err,
                        verbose,
                    )?;
                    continue;
                }
            };

            let Some(op) = op else {
                continue;
            };

            ops.push(op);

            seen_paths_by_session
                .entry(composer_id.clone())
                .or_default()
                .insert(target.abs_path.clone());
        }
    }

    Ok(())
}

#[expect(
    clippy::too_many_arguments,
    reason = "new-schema cursor fallback needs the same shared mutable parser state as the legacy paths"
)]
fn ingest_new_schema_partial_fates(
    conn: &Connection,
    source_file: &str,
    sessions: &HashMap<String, CandidateSession>,
    graphs: &HashMap<String, CursorSessionGraph>,
    seen_paths_by_session: &mut HashMap<String, HashSet<String>>,
    ops: &mut Vec<ChangeOpCandidate>,
    summary: &mut ProviderCodeChangeSummary,
    verbose: bool,
) -> Result<()> {
    let inline_hints_by_session = collect_new_inline_hints_from_graphs(graphs, sessions);
    let partials_by_session = collect_new_partial_fates_from_graphs(graphs, sessions);
    let mut repo_content_indexes: HashMap<String, RepoContentIndex> = HashMap::new();

    for (composer_id, session) in sessions {
        let Some(partials) = partials_by_session.get(composer_id) else {
            continue;
        };

        let explicit_partial_ids: HashSet<&str> = session
            .partial_targets
            .iter()
            .map(|target| target.partial_id.as_str())
            .collect();
        let inline_hints = inline_hints_by_session
            .get(composer_id)
            .map(Vec::as_slice)
            .unwrap_or(&[]);
        let prior_seen_paths = seen_paths_by_session
            .get(composer_id)
            .cloned()
            .unwrap_or_default();

        let mut grouped_by_path: HashMap<String, NewPartialGroup> = HashMap::new();
        let mut deferred_errors: HashMap<String, (String, Option<String>)> = HashMap::new();

        for (partial_id, payload) in partials {
            if explicit_partial_ids.contains(partial_id.as_str()) {
                continue;
            }

            summary.tool_calls_inspected += 1;

            match resolve_new_schema_partial_path(
                session,
                partial_id,
                payload,
                inline_hints,
                &prior_seen_paths,
                &mut repo_content_indexes,
            ) {
                Ok(Some(resolved)) => {
                    let entry = grouped_by_path.entry(resolved.abs_path).or_insert_with(|| {
                        (
                            resolved.call_id.clone(),
                            resolved.timestamp.clone(),
                            Vec::new(),
                        )
                    });
                    if entry.1.is_none() {
                        entry.1 = resolved.timestamp.clone();
                    }
                    if entry.0 == *partial_id && resolved.call_id != *partial_id {
                        entry.0 = resolved.call_id;
                    }
                    entry.2.push((partial_id.clone(), payload.clone()));
                }
                Ok(None) => {}
                Err(err) => {
                    deferred_errors
                        .entry(err)
                        .or_insert_with(|| (partial_id.clone(), session.info.last_seen_at.clone()));
                }
            }
        }

        for (abs_path, (call_id, timestamp, grouped_partials)) in grouped_by_path {
            if seen_paths_by_session
                .get(composer_id)
                .is_some_and(|set| set.contains(&abs_path))
            {
                continue;
            }

            let op = match build_new_schema_partial_fates_op(
                &call_id,
                &grouped_partials,
                &abs_path,
                timestamp,
                session,
            ) {
                Ok(op) => op,
                Err(err) => {
                    deferred_errors
                        .entry(err)
                        .or_insert_with(|| (call_id.clone(), session.info.last_seen_at.clone()));
                    continue;
                }
            };

            let Some(op) = op else {
                continue;
            };

            ops.push(op);
            seen_paths_by_session
                .entry(composer_id.clone())
                .or_default()
                .insert(abs_path);
        }

        for (error, (call_id, timestamp)) in deferred_errors {
            insert_parse_error(
                conn,
                summary,
                composer_id,
                source_file,
                &call_id,
                timestamp,
                NEW_SCHEMA_PARSER_NAME,
                error,
                verbose,
            )?;
        }
    }

    Ok(())
}

fn collect_new_inline_hints_from_graphs(
    graphs: &HashMap<String, CursorSessionGraph>,
    sessions: &HashMap<String, CandidateSession>,
) -> HashMap<String, Vec<NewInlineHint>> {
    let mut out: HashMap<String, Vec<NewInlineHint>> = HashMap::new();
    for (composer_id, graph) in graphs {
        if !sessions.contains_key(composer_id) {
            continue;
        }
        out.insert(
            composer_id.clone(),
            graph
                .inline_hints
                .iter()
                .map(|hint| NewInlineHint {
                    call_id: hint.call_id.clone(),
                    abs_path: hint.abs_path.clone(),
                    timestamp: hint.timestamp.clone(),
                    original_text_lines: hint.original_text_lines.clone(),
                })
                .collect(),
        );
    }

    out
}

fn collect_new_partial_fates_from_graphs(
    graphs: &HashMap<String, CursorSessionGraph>,
    sessions: &HashMap<String, CandidateSession>,
) -> HashMap<String, Vec<(String, Value)>> {
    let mut out: HashMap<String, Vec<(String, Value)>> = HashMap::new();
    for (composer_id, graph) in graphs {
        if !sessions.contains_key(composer_id) {
            continue;
        }
        out.insert(
            composer_id.clone(),
            graph
                .partial_fates
                .iter()
                .map(|(partial_id, payload)| (partial_id.clone(), payload.clone()))
                .collect(),
        );
    }

    out
}

fn resolve_new_schema_partial_path(
    session: &CandidateSession,
    partial_id: &str,
    payload: &Value,
    inline_hints: &[NewInlineHint],
    prior_seen_paths: &HashSet<String>,
    repo_content_indexes: &mut HashMap<String, RepoContentIndex>,
) -> std::result::Result<Option<ResolvedNewSchemaPartial>, String> {
    let (added_lines, removed_lines) = accepted_partial_fate_lines(payload)?;
    if added_lines.is_empty() && removed_lines.is_empty() {
        return Ok(None);
    }

    let inline_candidates: Vec<&NewInlineHint> = inline_hints
        .iter()
        .filter(|hint| !prior_seen_paths.contains(&hint.abs_path))
        .collect();

    if inline_candidates.len() == 1 {
        let hint = inline_candidates[0];
        return Ok(Some(ResolvedNewSchemaPartial {
            abs_path: hint.abs_path.clone(),
            call_id: hint.call_id.clone(),
            timestamp: hint.timestamp.clone(),
        }));
    }

    let scored_inline: Vec<(InlineHintScore, &NewInlineHint)> = inline_candidates
        .iter()
        .map(|hint| {
            (
                score_new_schema_inline_hint(
                    session,
                    hint,
                    &removed_lines,
                    removed_lines.is_empty(),
                ),
                *hint,
            )
        })
        .collect();

    if let Some((score, hint)) = scored_inline.iter().max_by_key(|(score, _)| *score) {
        let best_score = *score;
        let best_count = scored_inline
            .iter()
            .filter(|(other, _)| *other == best_score)
            .count();
        let has_non_weak_signal =
            best_score.0 > 0 || best_score.1 > 0 || best_score.2 > 0 || best_score.3 > 0;
        if best_count == 1 && has_non_weak_signal {
            return Ok(Some(ResolvedNewSchemaPartial {
                abs_path: hint.abs_path.clone(),
                call_id: hint.call_id.clone(),
                timestamp: hint.timestamp.clone(),
            }));
        }
    }

    let strong_candidates: HashSet<String> = session
        .checkpoint_paths
        .iter()
        .chain(session.strong_path_hints.iter())
        .filter(|path| !prior_seen_paths.contains(*path))
        .cloned()
        .collect();
    if strong_candidates.len() == 1 {
        let abs_path = strong_candidates
            .into_iter()
            .next()
            .expect("len checked above");
        return Ok(Some(resolve_partial_with_path(
            partial_id,
            &abs_path,
            inline_hints,
            session,
        )));
    }

    let inline_paths: HashSet<String> = inline_candidates
        .iter()
        .map(|hint| hint.abs_path.clone())
        .collect();
    if inline_paths.len() == 1 {
        let abs_path = inline_paths.into_iter().next().expect("len checked above");
        return Ok(Some(resolve_partial_with_path(
            partial_id,
            &abs_path,
            inline_hints,
            session,
        )));
    }

    let weak_candidates: HashSet<String> = session
        .weak_path_hints
        .iter()
        .filter(|path| !prior_seen_paths.contains(*path))
        .cloned()
        .collect();
    if strong_candidates.is_empty() && weak_candidates.len() == 1 {
        let abs_path = weak_candidates
            .into_iter()
            .next()
            .expect("len checked above");
        return Ok(Some(resolve_partial_with_path(
            partial_id,
            &abs_path,
            inline_hints,
            session,
        )));
    }

    let overlapping_inline_paths: HashSet<String> = scored_inline
        .iter()
        .filter(|(score, _)| score.0 > 0 || score.1 > 0)
        .map(|(_, hint)| hint.abs_path.clone())
        .collect();
    let candidate_paths: HashSet<String> = if !overlapping_inline_paths.is_empty() {
        overlapping_inline_paths
    } else {
        inline_hints
            .iter()
            .map(|hint| hint.abs_path.clone())
            .chain(session.checkpoint_paths.iter().cloned())
            .chain(session.strong_path_hints.iter().cloned())
            .chain(session.weak_path_hints.iter().cloned())
            .filter(|path| !prior_seen_paths.contains(path))
            .filter(|path| !is_cursor_plan_path(path))
            .collect()
    };

    if candidate_paths.is_empty() {
        return Ok(None);
    }

    match resolve_new_schema_partial_with_content(
        session,
        partial_id,
        &added_lines,
        &removed_lines,
        inline_hints,
        &candidate_paths,
        repo_content_indexes,
    )? {
        ContentResolverDecision::Resolved(resolved) => return Ok(Some(resolved)),
        ContentResolverDecision::NoSignal => {}
        ContentResolverDecision::Unresolved { candidate_count } => {
            return Err(format!(
                "ambiguous new-schema partial fate mapping: {} candidate paths for 1 partial rows",
                candidate_count
            ));
        }
    }

    Err(format!(
        "ambiguous new-schema partial fate mapping: {} candidate paths for 1 partial rows",
        candidate_paths.len()
    ))
}

fn resolve_new_schema_partial_with_content(
    session: &CandidateSession,
    partial_id: &str,
    added_lines: &[String],
    removed_lines: &[String],
    inline_hints: &[NewInlineHint],
    export_candidate_paths: &HashSet<String>,
    repo_content_indexes: &mut HashMap<String, RepoContentIndex>,
) -> std::result::Result<ContentResolverDecision, String> {
    let Some(repo_root) = plausible_repo_root(session, inline_hints, export_candidate_paths) else {
        return Ok(ContentResolverDecision::NoSignal);
    };

    let repo_content_index = load_repo_content_index(&repo_root, repo_content_indexes)?;
    let normalized_export_candidate_paths: HashSet<String> = export_candidate_paths
        .iter()
        .map(|path| normalize_repo_candidate_path(&repo_root, path))
        .collect();
    let added_hashes = hash_count_map_for_lines(added_lines);
    let removed_hashes = hash_count_map_for_lines(removed_lines);

    if added_hashes.is_empty() && removed_hashes.is_empty() {
        return Ok(ContentResolverDecision::NoSignal);
    }

    let mut candidate_paths = normalized_export_candidate_paths.clone();
    for path in repo_candidate_paths_from_added_lines(repo_content_index, &added_hashes) {
        candidate_paths.insert(normalize_repo_candidate_path(&repo_root, &path));
    }

    if candidate_paths.is_empty() {
        return Ok(ContentResolverDecision::NoSignal);
    }

    let mut scores: Vec<ContentMatchScore> = candidate_paths
        .iter()
        .map(|abs_path| {
            score_new_schema_content_candidate(
                session,
                abs_path,
                inline_hints,
                &removed_hashes,
                &added_hashes,
                repo_content_index,
                &normalized_export_candidate_paths,
                &repo_root,
            )
        })
        .collect();

    scores.sort_by(|a, b| {
        b.ranking_tuple()
            .cmp(&a.ranking_tuple())
            .then_with(|| a.abs_path.cmp(&b.abs_path))
    });

    let Some(winner) = scores.first() else {
        return Ok(ContentResolverDecision::NoSignal);
    };
    let runner = scores.get(1);
    if !content_match_is_confident(winner, runner) {
        return Ok(ContentResolverDecision::Unresolved {
            candidate_count: candidate_paths.len(),
        });
    }

    Ok(ContentResolverDecision::Resolved(
        resolve_partial_with_path(partial_id, &winner.abs_path, inline_hints, session),
    ))
}

fn plausible_repo_root(
    session: &CandidateSession,
    inline_hints: &[NewInlineHint],
    export_candidate_paths: &HashSet<String>,
) -> Option<String> {
    let roots: HashSet<String> = export_candidate_paths
        .iter()
        .cloned()
        .chain(session.checkpoint_paths.iter().cloned())
        .chain(session.strong_path_hints.iter().cloned())
        .chain(session.weak_path_hints.iter().cloned())
        .chain(inline_hints.iter().map(|hint| hint.abs_path.clone()))
        .filter_map(|path| detect_repo_root(Path::new(&path)).map(|root| path_to_string(&root)))
        .collect();

    if roots.len() == 1 {
        roots.into_iter().next()
    } else {
        None
    }
}

fn load_repo_content_index<'a>(
    repo_root: &str,
    repo_content_indexes: &'a mut HashMap<String, RepoContentIndex>,
) -> std::result::Result<&'a RepoContentIndex, String> {
    if !repo_content_indexes.contains_key(repo_root) {
        let index = build_repo_content_index(repo_root)?;
        repo_content_indexes.insert(repo_root.to_string(), index);
    }

    repo_content_indexes
        .get(repo_root)
        .ok_or_else(|| format!("repo content index missing for {}", repo_root))
}

fn build_repo_content_index(repo_root: &str) -> std::result::Result<RepoContentIndex, String> {
    let mut index = RepoContentIndex::default();
    index_repo_content_dir(Path::new(repo_root), &mut index)?;
    Ok(index)
}

fn index_repo_content_dir(
    dir: &Path,
    index: &mut RepoContentIndex,
) -> std::result::Result<(), String> {
    let entries = fs::read_dir(dir).map_err(|e| {
        format!(
            "failed to read repo content directory {}: {}",
            dir.display(),
            e
        )
    })?;

    for entry in entries {
        let entry = match entry {
            Ok(entry) => entry,
            Err(_) => continue,
        };
        let file_type = match entry.file_type() {
            Ok(file_type) => file_type,
            Err(_) => continue,
        };
        let path = entry.path();

        if file_type.is_dir() {
            let Some(name) = path.file_name().and_then(|value| value.to_str()) else {
                continue;
            };
            if REPO_CONTENT_SKIP_DIRS.contains(&name) {
                continue;
            }
            index_repo_content_dir(&path, index)?;
            continue;
        }

        if !file_type.is_file() || file_type.is_symlink() {
            continue;
        }

        let metadata = match entry.metadata() {
            Ok(metadata) => metadata,
            Err(_) => continue,
        };
        if metadata.len() > REPO_CONTENT_MAX_FILE_BYTES {
            continue;
        }

        let raw = match fs::read_to_string(&path) {
            Ok(raw) => raw,
            Err(_) => continue,
        };
        let file_hashes = hash_count_map_for_text(&raw);
        if file_hashes.is_empty() {
            continue;
        }

        let abs_path = path_to_string(&path);
        for (line_hash, count) in file_hashes {
            index
                .line_hash_hits
                .entry(line_hash)
                .or_default()
                .push((abs_path.clone(), count));
        }
    }

    Ok(())
}

fn repo_candidate_paths_from_added_lines(
    repo_content_index: &RepoContentIndex,
    added_hashes: &HashMap<String, i64>,
) -> HashSet<String> {
    let mut out = HashSet::new();
    for line_hash in added_hashes.keys() {
        let Some(hits) = repo_content_index.line_hash_hits.get(line_hash) else {
            continue;
        };
        for (path, count) in hits {
            if *count > 0 {
                out.insert(path.clone());
            }
        }
    }
    out
}

#[allow(clippy::too_many_arguments)]
fn score_new_schema_content_candidate(
    session: &CandidateSession,
    abs_path: &str,
    inline_hints: &[NewInlineHint],
    removed_hashes: &HashMap<String, i64>,
    added_hashes: &HashMap<String, i64>,
    repo_content_index: &RepoContentIndex,
    export_candidate_paths: &HashSet<String>,
    repo_root: &str,
) -> ContentMatchScore {
    let inline_removed_overlap = inline_hints
        .iter()
        .filter(|hint| normalize_repo_candidate_path(repo_root, &hint.abs_path) == abs_path)
        .map(|hint| {
            overlap_hash_counts(
                removed_hashes,
                &hash_count_map_for_lines(&hint.original_text_lines),
            )
        })
        .max()
        .unwrap_or(0);
    let original_state_removed_overlap = session
        .original_state_contents
        .iter()
        .find_map(|(path, content)| {
            (normalize_repo_candidate_path(repo_root, path) == abs_path)
                .then(|| overlap_hash_counts(removed_hashes, &hash_count_map_for_text(content)))
        })
        .unwrap_or(0);
    let historical_removed_overlap = inline_removed_overlap.max(original_state_removed_overlap);

    let live_added_overlap =
        overlap_repo_index_hash_counts(repo_content_index, abs_path, added_hashes);
    let live_removed_overlap =
        overlap_repo_index_hash_counts(repo_content_index, abs_path, removed_hashes);

    let basename_match = basename_match_against_export_candidates(abs_path, export_candidate_paths);
    let path_proximity =
        path_proximity_against_export_candidates(repo_root, abs_path, export_candidate_paths);

    ContentMatchScore {
        abs_path: abs_path.to_string(),
        historical_removed_overlap,
        live_added_overlap,
        live_removed_overlap,
        checkpoint_match: usize::from(
            session
                .checkpoint_paths
                .iter()
                .any(|path| normalize_repo_candidate_path(repo_root, path) == abs_path),
        ),
        strong_hint_match: usize::from(
            session
                .strong_path_hints
                .iter()
                .any(|path| normalize_repo_candidate_path(repo_root, path) == abs_path),
        ),
        weak_hint_match: usize::from(
            session
                .weak_path_hints
                .iter()
                .any(|path| normalize_repo_candidate_path(repo_root, path) == abs_path),
        ),
        basename_match,
        path_proximity,
    }
}

fn content_match_is_confident(
    winner: &ContentMatchScore,
    runner: Option<&ContentMatchScore>,
) -> bool {
    let winner_primary = winner.primary_score();
    if winner_primary == 0 {
        return false;
    }

    let Some(runner) = runner else {
        return winner.historical_removed_overlap > 0
            || winner.live_added_overlap > 0
            || winner.live_removed_overlap >= 2;
    };

    if winner.ranking_tuple() == runner.ranking_tuple() {
        return false;
    }

    if winner_primary > runner.primary_score() {
        return winner.historical_removed_overlap > 0
            || winner.live_added_overlap > 0
            || winner.live_removed_overlap >= 2;
    }

    winner_primary == runner.primary_score()
        && winner.secondary_score() > runner.secondary_score()
        && (winner.historical_removed_overlap > 0
            || winner.live_added_overlap >= 2
            || winner.live_removed_overlap >= 2)
}

fn overlap_repo_index_hash_counts(
    repo_content_index: &RepoContentIndex,
    abs_path: &str,
    query_hashes: &HashMap<String, i64>,
) -> usize {
    let mut matched = 0usize;
    for (line_hash, query_count) in query_hashes {
        let Some(hits) = repo_content_index.line_hash_hits.get(line_hash) else {
            continue;
        };
        let Some((_path, file_count)) = hits.iter().find(|(path, _)| path == abs_path) else {
            continue;
        };
        matched += (*query_count).min(*file_count) as usize;
    }
    matched
}

fn overlap_hash_counts(
    query_hashes: &HashMap<String, i64>,
    candidate_hashes: &HashMap<String, i64>,
) -> usize {
    query_hashes
        .iter()
        .map(|(line_hash, query_count)| {
            candidate_hashes
                .get(line_hash)
                .map(|candidate_count| (*query_count).min(*candidate_count) as usize)
                .unwrap_or(0)
        })
        .sum()
}

fn basename_match_against_export_candidates(
    abs_path: &str,
    export_candidate_paths: &HashSet<String>,
) -> usize {
    let Some(candidate_basename) = Path::new(abs_path)
        .file_name()
        .and_then(|value| value.to_str())
    else {
        return 0;
    };

    usize::from(export_candidate_paths.iter().any(|candidate| {
        candidate != abs_path
            && Path::new(candidate)
                .file_name()
                .and_then(|value| value.to_str())
                == Some(candidate_basename)
    }))
}

fn path_proximity_against_export_candidates(
    repo_root: &str,
    abs_path: &str,
    export_candidate_paths: &HashSet<String>,
) -> usize {
    export_candidate_paths
        .iter()
        .filter(|candidate| candidate.as_str() != abs_path)
        .map(|candidate| shared_repo_relative_components(repo_root, abs_path, candidate))
        .max()
        .unwrap_or(0)
}

fn shared_repo_relative_components(repo_root: &str, left: &str, right: &str) -> usize {
    let repo_root = Path::new(repo_root);
    let Ok(left_rel) = Path::new(left).strip_prefix(repo_root) else {
        return 0;
    };
    let Ok(right_rel) = Path::new(right).strip_prefix(repo_root) else {
        return 0;
    };

    left_rel
        .components()
        .zip(right_rel.components())
        .take_while(|(left_component, right_component)| left_component == right_component)
        .count()
}

fn normalize_repo_candidate_path(repo_root: &str, abs_path: &str) -> String {
    let repo_root = Path::new(repo_root);
    let abs_path_buf = PathBuf::from(abs_path);
    to_rel_path(Some(repo_root), &abs_path_buf)
        .map(|rel_path| path_to_string(&repo_root.join(rel_path)))
        .unwrap_or_else(|| abs_path.to_string())
}

fn resolve_partial_with_path(
    partial_id: &str,
    abs_path: &str,
    inline_hints: &[NewInlineHint],
    session: &CandidateSession,
) -> ResolvedNewSchemaPartial {
    if let Some(hint) = inline_hints.iter().find(|hint| hint.abs_path == abs_path) {
        return ResolvedNewSchemaPartial {
            abs_path: abs_path.to_string(),
            call_id: hint.call_id.clone(),
            timestamp: hint.timestamp.clone(),
        };
    }

    ResolvedNewSchemaPartial {
        abs_path: abs_path.to_string(),
        call_id: partial_id.to_string(),
        timestamp: session.info.last_seen_at.clone(),
    }
}

fn score_new_schema_inline_hint(
    session: &CandidateSession,
    hint: &NewInlineHint,
    removed_lines: &[String],
    partial_has_only_additions: bool,
) -> (usize, usize, usize, usize, usize) {
    let removed_overlap = removed_lines
        .iter()
        .filter(|line| {
            hint.original_text_lines
                .iter()
                .any(|candidate| candidate == *line)
        })
        .count();
    let new_file_match =
        usize::from(partial_has_only_additions && hint.original_text_lines.is_empty());
    let checkpoint_match = usize::from(session.checkpoint_paths.contains(&hint.abs_path));
    let strong_hint_match = usize::from(session.strong_path_hints.contains(&hint.abs_path));
    let weak_hint_match = usize::from(session.weak_path_hints.contains(&hint.abs_path));

    (
        removed_overlap,
        new_file_match,
        checkpoint_match,
        strong_hint_match,
        weak_hint_match,
    )
}

#[expect(
    clippy::too_many_arguments,
    reason = "legacy fallback ingestion carries the same shared parser state as the other cursor ingestion helpers"
)]
fn ingest_legacy_code_block_fallback(
    conn: &Connection,
    graphs: &HashMap<String, CursorSessionGraph>,
    source_file: &str,
    sessions: &HashMap<String, CandidateSession>,
    seen_paths_by_session: &mut HashMap<String, HashSet<String>>,
    ops: &mut Vec<ChangeOpCandidate>,
    summary: &mut ProviderCodeChangeSummary,
    verbose: bool,
) -> Result<()> {
    for (composer_id, session) in sessions {
        if session.legacy_targets.is_empty() {
            continue;
        }
        let Some(graph) = graphs.get(composer_id) else {
            continue;
        };

        summary.legacy_sessions_considered += 1;

        let mut targets = session.legacy_targets.clone();
        targets.sort_by(|a, b| {
            a.abs_path
                .cmp(&b.abs_path)
                .then(a.version.cmp(&b.version))
                .then(a.code_block_idx.cmp(&b.code_block_idx))
                .then(a.diff_id.cmp(&b.diff_id))
        });

        let mut previous_content_by_path: HashMap<String, String> = HashMap::new();
        let mut exact_legacy_paths: HashSet<String> = HashSet::new();
        let mut deferred_errors_by_path: HashMap<String, LegacyDeferredParseError> = HashMap::new();

        for target in targets {
            if seen_paths_by_session
                .get(composer_id)
                .is_some_and(|set| set.contains(&target.abs_path))
            {
                if let Some(content) = &target.content {
                    previous_content_by_path.insert(target.abs_path.clone(), content.clone());
                }
                continue;
            }

            summary.tool_calls_inspected += 1;
            summary.legacy_entries_inspected += 1;

            let op = match build_legacy_diff_op(graph, &target, session) {
                Ok(Some(op)) => {
                    summary.legacy_diff_rows_found += 1;
                    deferred_errors_by_path.remove(&target.abs_path);
                    Some(op)
                }
                Ok(None) => match build_legacy_content_fallback_op(
                    previous_content_by_path
                        .get(&target.abs_path)
                        .map(String::as_str),
                    &target,
                    session,
                ) {
                    Ok(Some(op)) => {
                        deferred_errors_by_path.remove(&target.abs_path);
                        Some(op)
                    }
                    Ok(None) => None,
                    Err(content_err) => {
                        deferred_errors_by_path
                            .entry(target.abs_path.clone())
                            .or_insert_with(|| LegacyDeferredParseError {
                                call_id: target.call_id(),
                                timestamp: target
                                    .timestamp
                                    .clone()
                                    .or_else(|| session.info.last_seen_at.clone()),
                                parser_name: LEGACY_CONTENT_PARSER_NAME,
                                error: content_err,
                            });
                        None
                    }
                },
                Err(diff_err) => match build_legacy_content_fallback_op(
                    previous_content_by_path
                        .get(&target.abs_path)
                        .map(String::as_str),
                    &target,
                    session,
                ) {
                    Ok(Some(op)) => {
                        deferred_errors_by_path.remove(&target.abs_path);
                        Some(op)
                    }
                    Ok(None) => {
                        deferred_errors_by_path
                            .entry(target.abs_path.clone())
                            .or_insert_with(|| LegacyDeferredParseError {
                                call_id: target.call_id(),
                                timestamp: target
                                    .timestamp
                                    .clone()
                                    .or_else(|| session.info.last_seen_at.clone()),
                                parser_name: LEGACY_DIFF_PARSER_NAME,
                                error: diff_err,
                            });
                        None
                    }
                    Err(content_err) => {
                        deferred_errors_by_path
                            .entry(target.abs_path.clone())
                            .or_insert_with(|| LegacyDeferredParseError {
                                call_id: target.call_id(),
                                timestamp: target
                                    .timestamp
                                    .clone()
                                    .or_else(|| session.info.last_seen_at.clone()),
                                parser_name: LEGACY_CONTENT_PARSER_NAME,
                                error: format!("{}; fallback failed: {}", diff_err, content_err),
                            });
                        None
                    }
                },
            };

            if let Some(op) = op {
                exact_legacy_paths.insert(target.abs_path.clone());
                ops.push(op);
                summary.legacy_ops_upserted += 1;
            }

            if let Some(content) = &target.content {
                previous_content_by_path.insert(target.abs_path.clone(), content.clone());
            }
        }

        if deferred_errors_by_path.len() == 1 {
            let (abs_path, deferred) = deferred_errors_by_path
                .iter()
                .next()
                .map(|(path, deferred)| (path.clone(), deferred.clone()))
                .expect("len checked above");
            if !exact_legacy_paths.contains(&abs_path)
                && !seen_paths_by_session
                    .get(composer_id)
                    .is_some_and(|set| set.contains(&abs_path))
                && let Some(op) =
                    build_legacy_session_totals_fallback_op(session, &abs_path, &deferred)
            {
                ops.push(op);
                summary.legacy_ops_upserted += 1;
                seen_paths_by_session
                    .entry(composer_id.clone())
                    .or_default()
                    .insert(abs_path);
                deferred_errors_by_path.clear();
            }
        }

        for deferred in deferred_errors_by_path.into_values() {
            insert_legacy_parse_error(
                conn,
                summary,
                &session.info.session_id,
                source_file,
                &deferred.call_id,
                deferred.timestamp,
                deferred.parser_name,
                deferred.error,
                verbose,
            )?;
        }
    }

    Ok(())
}

fn build_legacy_diff_op(
    graph: &CursorSessionGraph,
    target: &LegacyTarget,
    session: &CandidateSession,
) -> std::result::Result<Option<ChangeOpCandidate>, String> {
    let Some(diff_id) = &target.diff_id else {
        return Ok(None);
    };

    let Some(payload) = graph.legacy_diff_payloads.get(diff_id) else {
        return Err("legacy codeBlockDiff row not found".to_string());
    };

    let removed_lines = extract_legacy_diff_lines(payload, "originalModelDiffWrtV0")?;
    let added_lines = extract_legacy_diff_lines(payload, "newModelDiffWrtV0")?;

    if added_lines.is_empty() && removed_lines.is_empty() {
        return Ok(None);
    }

    let mut line_hashes = hash_counts_for_lines(&added_lines, LineSide::Added);
    line_hashes.extend(hash_counts_for_lines(&removed_lines, LineSide::Removed));

    Ok(Some(build_change_op_candidate(
        session,
        diff_id.to_string(),
        target.op_index(),
        target.timestamp.clone(),
        &target.abs_path,
        WriteMode::Patch,
        true,
        added_lines.len() as i64,
        removed_lines.len() as i64,
        LEGACY_DIFF_PARSER_NAME,
        line_hashes,
    )))
}

fn build_legacy_content_fallback_op(
    previous_content: Option<&str>,
    target: &LegacyTarget,
    session: &CandidateSession,
) -> std::result::Result<Option<ChangeOpCandidate>, String> {
    let Some(before_content) = previous_content else {
        return Ok(None);
    };
    let Some(after_content) = target.content.as_deref() else {
        return Ok(None);
    };

    let diff = diff_with_hashes(before_content, after_content);
    if diff.added_lines == 0 && diff.removed_lines == 0 {
        return Ok(None);
    }

    let call_id = target.call_id();
    Ok(Some(build_change_op_candidate(
        session,
        call_id,
        target.op_index(),
        target.timestamp.clone(),
        &target.abs_path,
        WriteMode::Patch,
        true,
        diff.added_lines,
        diff.removed_lines,
        LEGACY_CONTENT_PARSER_NAME,
        diff.line_hashes,
    )))
}

fn extract_legacy_diff_lines(
    payload: &Value,
    field: &str,
) -> std::result::Result<Vec<String>, String> {
    let hunks = payload
        .get(field)
        .and_then(|v| v.as_array())
        .ok_or_else(|| format!("legacy diff payload missing '{}' array", field))?;

    let mut lines = Vec::new();
    for hunk in hunks {
        let Some(hunk_obj) = hunk.as_object() else {
            continue;
        };
        let Some(modified) = hunk_obj.get("modified").and_then(|v| v.as_array()) else {
            continue;
        };
        for line in modified {
            let Some(line) = line.as_str() else {
                return Err(format!(
                    "legacy diff '{}' modified entry was not a string",
                    field
                ));
            };
            lines.push(line.to_string());
        }
    }

    Ok(lines)
}

#[expect(
    clippy::too_many_arguments,
    reason = "the parsed change fields are not materialized anywhere else before constructing the candidate"
)]
fn build_change_op_candidate(
    session: &CandidateSession,
    call_id: String,
    op_index: i32,
    timestamp: Option<String>,
    abs_path: &str,
    write_mode: WriteMode,
    before_known: bool,
    added_lines: i64,
    removed_lines: i64,
    parser_name: &str,
    line_hashes: Vec<LineHashCount>,
) -> ChangeOpCandidate {
    let abs_path_buf = PathBuf::from(abs_path);
    let repo_root = detect_repo_root(&abs_path_buf);
    let rel_path = to_rel_path(repo_root.as_deref(), &abs_path_buf);

    ChangeOpCandidate {
        provider: "cursor".to_string(),
        session_id: session.info.session_id.clone(),
        source_file: session.info.source_file.clone(),
        call_id,
        op_index,
        timestamp: timestamp.or_else(|| session.info.last_seen_at.clone()),
        repo_root: repo_root.as_deref().map(path_to_string),
        abs_path: abs_path.to_string(),
        rel_path,
        write_mode,
        before_known,
        added_lines,
        removed_lines,
        parser_name: parser_name.to_string(),
        line_hashes,
    }
}

fn build_inline_change_op(
    call_id: &str,
    row: &Value,
    abs_path: &str,
    session: &CandidateSession,
) -> std::result::Result<ChangeOpCandidate, String> {
    let changes = row
        .get("changes")
        .and_then(|v| v.as_array())
        .ok_or_else(|| "inline row missing 'changes' array".to_string())?;

    let new_text_lines = parse_string_array(row.get("newTextLines"));

    let mut added_lines: Vec<String> = Vec::new();
    let mut removed_lines: Vec<String> = Vec::new();

    for change in changes {
        let Some(change_obj) = change.as_object() else {
            continue;
        };

        if let Some(removed) = change_obj.get("removedTextLines") {
            removed_lines.extend(parse_string_array(Some(removed)));
        }

        if let Some(added_text_lines) = change_obj.get("addedTextLines") {
            added_lines.extend(parse_string_array(Some(added_text_lines)));
            continue;
        }

        let Some(added_range) = change_obj.get("addedRange").and_then(|v| v.as_object()) else {
            continue;
        };

        let start = added_range
            .get("startLineNumber")
            .and_then(|v| v.as_i64())
            .ok_or_else(|| "inline change missing addedRange.startLineNumber".to_string())?;
        let end = added_range
            .get("endLineNumberExclusive")
            .and_then(|v| v.as_i64())
            .ok_or_else(|| "inline change missing addedRange.endLineNumberExclusive".to_string())?;

        if start < 1 || end < start {
            return Err(format!(
                "invalid addedRange bounds start={} end={}",
                start, end
            ));
        }

        let start_idx = (start - 1) as usize;
        let end_idx = (end - 1) as usize;

        if end_idx > new_text_lines.len() || start_idx > end_idx {
            return Err(format!(
                "addedRange out of bounds start={} end={} newTextLines={}",
                start,
                end,
                new_text_lines.len()
            ));
        }

        added_lines.extend(new_text_lines[start_idx..end_idx].iter().cloned());
    }

    if added_lines.is_empty() && removed_lines.is_empty() {
        return Err("inline row produced zero added/removed lines".to_string());
    }

    let mut line_hashes = hash_counts_for_lines(&added_lines, LineSide::Added);
    line_hashes.extend(hash_counts_for_lines(&removed_lines, LineSide::Removed));

    let abs_path_buf = PathBuf::from(abs_path);
    let repo_root = detect_repo_root(&abs_path_buf);
    let rel_path = to_rel_path(repo_root.as_deref(), &abs_path_buf);

    Ok(ChangeOpCandidate {
        provider: "cursor".to_string(),
        session_id: session.info.session_id.clone(),
        source_file: session.info.source_file.clone(),
        call_id: call_id.to_string(),
        op_index: 0,
        timestamp: row
            .get("createdAt")
            .and_then(|v| v.as_i64())
            .and_then(ms_to_iso)
            .or_else(|| session.info.last_seen_at.clone()),
        repo_root: repo_root.as_deref().map(path_to_string),
        abs_path: abs_path.to_string(),
        rel_path,
        write_mode: WriteMode::Patch,
        before_known: true,
        added_lines: added_lines.len() as i64,
        removed_lines: removed_lines.len() as i64,
        parser_name: INLINE_PARSER_NAME.to_string(),
        line_hashes,
    })
}

fn build_partial_fates_op(
    call_id: &str,
    payload: &Value,
    abs_path: &str,
    session: &CandidateSession,
) -> std::result::Result<Option<ChangeOpCandidate>, String> {
    let (added_lines, removed_lines) = accepted_partial_fate_lines(payload)?;

    if added_lines.is_empty() && removed_lines.is_empty() {
        return Ok(None);
    }

    let mut line_hashes = hash_counts_for_lines(&added_lines, LineSide::Added);
    line_hashes.extend(hash_counts_for_lines(&removed_lines, LineSide::Removed));

    let abs_path_buf = PathBuf::from(abs_path);
    let repo_root = detect_repo_root(&abs_path_buf);
    let rel_path = to_rel_path(repo_root.as_deref(), &abs_path_buf);

    Ok(Some(ChangeOpCandidate {
        provider: "cursor".to_string(),
        session_id: session.info.session_id.clone(),
        source_file: session.info.source_file.clone(),
        call_id: call_id.to_string(),
        op_index: 0,
        timestamp: session.info.last_seen_at.clone(),
        repo_root: repo_root.as_deref().map(path_to_string),
        abs_path: abs_path.to_string(),
        rel_path,
        write_mode: WriteMode::Patch,
        before_known: true,
        added_lines: added_lines.len() as i64,
        removed_lines: removed_lines.len() as i64,
        parser_name: PARTIAL_PARSER_NAME.to_string(),
        line_hashes,
    }))
}

fn build_new_schema_partial_fates_op(
    call_id: &str,
    partials: &[(String, Value)],
    abs_path: &str,
    timestamp: Option<String>,
    session: &CandidateSession,
) -> std::result::Result<Option<ChangeOpCandidate>, String> {
    let mut added_lines = Vec::new();
    let mut removed_lines = Vec::new();

    for (_partial_id, payload) in partials {
        let (added, removed) = accepted_partial_fate_lines(payload)?;
        added_lines.extend(added);
        removed_lines.extend(removed);
    }

    if added_lines.is_empty() && removed_lines.is_empty() {
        return Ok(None);
    }

    let mut line_hashes = hash_counts_for_lines(&added_lines, LineSide::Added);
    line_hashes.extend(hash_counts_for_lines(&removed_lines, LineSide::Removed));

    let abs_path_buf = PathBuf::from(abs_path);
    let repo_root = detect_repo_root(&abs_path_buf);
    let rel_path = to_rel_path(repo_root.as_deref(), &abs_path_buf);

    Ok(Some(ChangeOpCandidate {
        provider: "cursor".to_string(),
        session_id: session.info.session_id.clone(),
        source_file: session.info.source_file.clone(),
        call_id: call_id.to_string(),
        op_index: 0,
        timestamp: timestamp.or_else(|| session.info.last_seen_at.clone()),
        repo_root: repo_root.as_deref().map(path_to_string),
        abs_path: abs_path.to_string(),
        rel_path,
        write_mode: WriteMode::Patch,
        before_known: true,
        added_lines: added_lines.len() as i64,
        removed_lines: removed_lines.len() as i64,
        parser_name: NEW_SCHEMA_PARSER_NAME.to_string(),
        line_hashes,
    }))
}

fn accepted_partial_fate_lines(
    payload: &Value,
) -> std::result::Result<(Vec<String>, Vec<String>), String> {
    let fates = payload
        .get("fates")
        .and_then(|v| v.as_array())
        .ok_or_else(|| "partial fates payload missing 'fates' array".to_string())?;

    let mut added_lines: Vec<String> = Vec::new();
    let mut removed_lines: Vec<String> = Vec::new();

    for fate in fates {
        let Some(fate_obj) = fate.as_object() else {
            continue;
        };
        if fate_obj.get("fate").and_then(|v| v.as_str()) != Some("accepted") {
            continue;
        }

        if let Some(added) = fate_obj.get("addedLines") {
            added_lines.extend(parse_string_array(Some(added)));
        }
        if let Some(removed) = fate_obj.get("removedLines") {
            removed_lines.extend(parse_string_array(Some(removed)));
        }
    }

    Ok((added_lines, removed_lines))
}

fn build_legacy_session_totals_fallback_op(
    session: &CandidateSession,
    abs_path: &str,
    deferred: &LegacyDeferredParseError,
) -> Option<ChangeOpCandidate> {
    let has_original_state = session.weak_path_hints.contains(abs_path)
        || session.original_state_contents.contains_key(abs_path);
    if !has_original_state {
        return None;
    }

    let added_lines = session.total_lines_added.unwrap_or(0);
    let removed_lines = session.total_lines_removed.unwrap_or(0);
    if added_lines == 0 && removed_lines == 0 {
        return None;
    }

    Some(build_change_op_candidate(
        session,
        deferred.call_id.clone(),
        0,
        deferred.timestamp.clone(),
        abs_path,
        WriteMode::Patch,
        false,
        added_lines,
        removed_lines,
        LEGACY_TOTALS_PARSER_NAME,
        Vec::new(),
    ))
}

#[expect(
    clippy::too_many_arguments,
    reason = "parse errors are recorded directly from the callsite context without allocating an intermediate struct"
)]
fn insert_parse_error(
    conn: &Connection,
    summary: &mut ProviderCodeChangeSummary,
    session_id: &str,
    source_file: &str,
    call_id: &str,
    timestamp: Option<String>,
    parser_name: &str,
    error: String,
    verbose: bool,
) -> Result<()> {
    if verbose {
        eprintln!("[change-intel] {} {} {}", parser_name, call_id, error);
    }

    storage::insert_parse_error(
        conn,
        &ParseError {
            provider: "cursor".to_string(),
            session_id: session_id.to_string(),
            source_file: source_file.to_string(),
            call_id: call_id.to_string(),
            timestamp,
            parser_name: parser_name.to_string(),
            error,
        },
    )?;
    summary.parse_errors += 1;
    Ok(())
}

#[expect(
    clippy::too_many_arguments,
    reason = "legacy parse errors reuse the shared helper while incrementing legacy-specific summary counters"
)]
fn insert_legacy_parse_error(
    conn: &Connection,
    summary: &mut ProviderCodeChangeSummary,
    session_id: &str,
    source_file: &str,
    call_id: &str,
    timestamp: Option<String>,
    parser_name: &str,
    error: String,
    verbose: bool,
) -> Result<()> {
    insert_parse_error(
        conn,
        summary,
        session_id,
        source_file,
        call_id,
        timestamp,
        parser_name,
        error,
        verbose,
    )?;
    summary.legacy_parse_errors += 1;
    Ok(())
}

fn parse_string_array(value: Option<&Value>) -> Vec<String> {
    let Some(value) = value else {
        return Vec::new();
    };

    let Some(arr) = value.as_array() else {
        return Vec::new();
    };

    arr.iter()
        .filter_map(|v| v.as_str().map(ToOwned::to_owned))
        .collect()
}

fn hash_counts_for_lines(lines: &[String], side: LineSide) -> Vec<LineHashCount> {
    let mut counts: HashMap<String, i64> = HashMap::new();
    for line in lines {
        *counts.entry(hash_line(line)).or_insert(0) += 1;
    }

    counts
        .into_iter()
        .map(|(line_hash, count)| LineHashCount {
            side,
            line_hash,
            count,
        })
        .collect()
}

fn hash_count_map_for_lines(lines: &[String]) -> HashMap<String, i64> {
    let mut counts = HashMap::new();
    for line in lines {
        if !is_contentful_content_match_line(line) {
            continue;
        }
        *counts.entry(hash_line(line)).or_insert(0) += 1;
    }
    counts
}

fn hash_count_map_for_text(text: &str) -> HashMap<String, i64> {
    let mut counts = HashMap::new();
    for line in text.lines() {
        if !is_contentful_content_match_line(line) {
            continue;
        }
        *counts.entry(hash_line(line)).or_insert(0) += 1;
    }
    counts
}

fn is_contentful_content_match_line(line: &str) -> bool {
    let trimmed = line.trim();
    trimmed.len() >= 3 && trimmed.chars().any(char::is_alphanumeric)
}

fn is_cursor_plan_path(path: &str) -> bool {
    let normalized = normalize_filesystem_path(path);
    normalized.contains("/.cursor/plans/") || normalized.ends_with("/.cursor/plans")
}

fn file_signature(path: &Path) -> Result<Option<(i64, i64)>> {
    if !path.is_file() {
        return Ok(None);
    }

    let md = std::fs::metadata(path)?;
    let size = md.len() as i64;
    let mtime = md
        .modified()?
        .duration_since(UNIX_EPOCH)
        .unwrap_or_default()
        .as_secs() as i64;

    Ok(Some((mtime, size)))
}

fn ms_to_iso(ms: i64) -> Option<String> {
    chrono::Utc
        .timestamp_millis_opt(ms)
        .single()
        .map(|dt| dt.to_rfc3339())
}

pub(crate) fn cursor_vscdb_path() -> Result<Option<PathBuf>> {
    cursor_state_path()
}

#[cfg(test)]
mod tests {
    use super::*;
    use rusqlite::Connection;
    use serde_json::{Map, json};
    use std::fs;
    use std::process::Command;
    use std::time::{SystemTime, UNIX_EPOCH};

    use crate::change_intel::schema::init_change_intel_schema;
    use crate::db::init_app_schema;
    use crate::providers::cursor::ingest_planned_sessions_from_source;

    fn temp_db_path(label: &str) -> PathBuf {
        let n = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap_or_default()
            .as_nanos();
        std::env::temp_dir().join(format!("paceflow_cursor_{}_{}.db", label, n))
    }

    fn create_cursor_db(path: &Path, rows: &[(String, String)]) -> Result<()> {
        let conn = Connection::open(path)?;
        conn.execute_batch(
            "CREATE TABLE cursorDiskKV (
                key   TEXT PRIMARY KEY,
                value TEXT
            );",
        )?;

        for (key, value) in rows {
            conn.execute(
                "INSERT INTO cursorDiskKV (key, value) VALUES (?1, ?2)",
                params![key, value],
            )?;
        }
        Ok(())
    }

    fn cleanup(path: &Path) {
        let _ = fs::remove_file(path);
    }

    fn temp_repo_path(label: &str) -> PathBuf {
        let n = SystemTime::now()
            .duration_since(UNIX_EPOCH)
            .unwrap_or_default()
            .as_nanos();
        std::env::temp_dir().join(format!("paceflow_cursor_repo_{}_{}", label, n))
    }

    fn cleanup_dir(path: &Path) {
        let _ = fs::remove_dir_all(path);
    }

    fn git(args: &[&str], cwd: &Path) -> Result<()> {
        let status = Command::new("git").current_dir(cwd).args(args).status()?;
        anyhow::ensure!(
            status.success(),
            "git {:?} failed in {}",
            args,
            cwd.display()
        );
        Ok(())
    }

    fn create_git_repo(root: &Path, files: &[(&str, &str)]) -> Result<()> {
        fs::create_dir_all(root)?;
        git(&["init", "-q"], root)?;

        for (rel_path, content) in files {
            let abs_path = root.join(rel_path);
            if let Some(parent) = abs_path.parent() {
                fs::create_dir_all(parent)?;
            }
            fs::write(abs_path, content)?;
        }

        Ok(())
    }

    fn file_uri(path: &Path) -> String {
        format!("file://{}", path_to_string(path))
    }

    fn original_file_states(entries: &[(&Path, &str)]) -> Map<String, Value> {
        let mut out = Map::new();
        for (path, content) in entries {
            out.insert(file_uri(path), json!({ "content": content }));
        }
        out
    }

    fn canonical_repo_path(path: &Path) -> String {
        let repo_root = detect_repo_root(path).expect("temporary git repo should have a repo root");
        normalize_repo_candidate_path(&path_to_string(&repo_root), &path_to_string(path))
    }

    #[test]
    fn inline_diff_ingest_and_idempotent_skip() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("inline_idempotent");
        let rows = vec![
            (
                "composerData:c1".to_string(),
                json!({
                    "composerId": "c1",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "totalLinesAdded": 2,
                    "totalLinesRemoved": 1,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "checkpointId:c1:chk1".to_string(),
                json!({
                    "files": [
                        {"uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md", "external": "file:///tmp/repo/README.md"}}
                    ]
                })
                .to_string(),
            ),
            (
                "inlineDiffUndoRedo-1".to_string(),
                json!({
                    "composerMetadata": {"composerId": "c1"},
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md", "external": "file:///tmp/repo/README.md"},
                    "createdAt": 1772694300812i64,
                    "newTextLines": ["new line"],
                    "changes": [
                        {
                            "removedTextLines": ["old line"],
                            "addedRange": {"startLineNumber": 1, "endLineNumberExclusive": 2}
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary1 = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary1.provider, "cursor");
        assert_eq!(summary1.sources_discovered, 1);
        assert_eq!(summary1.sources_skipped, 0);
        assert_eq!(summary1.ops_upserted, 1);

        let op_row: (String, i64, i64) = analytics.query_row(
            "SELECT parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write'",
            [],
            |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
        )?;
        assert_eq!(op_row.0, INLINE_PARSER_NAME);
        assert_eq!(op_row.1, 1);
        assert_eq!(op_row.2, 1);

        let summary2 = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary2.sources_skipped, 1);

        let count: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(count, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn inline_diff_ingest_decodes_percent_encoded_windows_file_uri_paths() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("inline_windows_uri");
        let rows = vec![
            (
                "composerData:c_windows".to_string(),
                json!({
                    "composerId": "c_windows",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "inlineDiffUndoRedo-windows".to_string(),
                json!({
                    "composerMetadata": {"composerId": "c_windows"},
                    "uri": {"scheme": "file", "external": "file:///c%3A/dev/paceflow/README.md"},
                    "createdAt": 1772694300812i64,
                    "newTextLines": ["new line"],
                    "changes": [
                        {
                            "removedTextLines": ["old line"],
                            "addedRange": {"startLineNumber": 1, "endLineNumberExclusive": 2}
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);

        let abs_path: String = analytics.query_row(
            "SELECT abs_path
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(abs_path, "C:/dev/paceflow/README.md");

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn tool_call_edit_file_v2_ingest_creates_tool_write() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("tool_edit_file");
        let rows = vec![
            (
                "composerData:tool_edit".to_string(),
                json!({
                    "composerId": "tool_edit",
                    "conversation": [{"type": 1, "text": "refactor"}],
                    "filesChangedCount": 1,
                    "totalLinesAdded": 2,
                    "totalLinesRemoved": 1,
                    "originalFileStates": {
                        "file:///tmp/repo/src/plan.ts": {
                            "firstEditBubbleId": "bubble-plan",
                            "content": "old\n"
                        }
                    }
                })
                .to_string(),
            ),
            (
                "bubbleId:tool_edit:bubble-plan".to_string(),
                json!({
                    "type": 2,
                    "text": "",
                    "toolFormerData": {
                        "status": "completed",
                        "name": "edit_file_v2",
                        "toolCallId": "call-plan",
                        "params": "{\"relativeWorkspacePath\":\"/tmp/repo/src/plan.ts\",\"streamingContent\":\"@@\\n-old\\n+new\\n+another\\n\"}"
                    }
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);

        let row: (String, String, i64, i64) = analytics.query_row(
            "SELECT abs_path, parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND session_id='tool_edit' AND source_kind='tool_write'",
            [],
            |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?, row.get(3)?)),
        )?;
        assert_eq!(row.0, "/tmp/repo/src/plan.ts");
        assert_eq!(row.1, "cursor_tool_edit_file_v2_v1");
        assert_eq!(row.2, 2);
        assert_eq!(row.3, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn session_ingest_and_change_intel_agree_on_latest_tool_edit_files() -> Result<()> {
        let analytics = Connection::open_in_memory()?;
        init_app_schema(&analytics)?;

        let source = temp_db_path("tool_edit_agreement");
        let rows = vec![
            (
                "composerData:agree".to_string(),
                json!({
                    "composerId": "agree",
                    "conversation": [{"type": 1, "text": "refactor these files"}],
                    "filesChangedCount": 2,
                    "totalLinesAdded": 3,
                    "totalLinesRemoved": 2,
                    "originalFileStates": {
                        "file:///tmp/repo/src/plan.ts": {
                            "firstEditBubbleId": "bubble-plan",
                            "content": "old\n"
                        },
                        "file:///tmp/repo/src/assignment.ts": {
                            "firstEditBubbleId": "bubble-assign",
                            "content": "before\n"
                        }
                    }
                })
                .to_string(),
            ),
            (
                "bubbleId:agree:bubble-plan".to_string(),
                json!({
                    "type": 2,
                    "text": "",
                    "toolFormerData": {
                        "status": "completed",
                        "name": "edit_file_v2",
                        "toolCallId": "call-plan",
                        "params": "{\"relativeWorkspacePath\":\"/tmp/repo/src/plan.ts\",\"streamingContent\":\"@@\\n-old\\n+new\\n+another\\n\"}"
                    }
                })
                .to_string(),
            ),
            (
                "bubbleId:agree:bubble-assign".to_string(),
                json!({
                    "type": 2,
                    "text": "",
                    "toolFormerData": {
                        "status": "completed",
                        "name": "edit_file_v2",
                        "toolCallId": "call-assign",
                        "params": "{\"relativeWorkspacePath\":\"/tmp/repo/src/assignment.ts\",\"streamingContent\":\"@@\\n-before\\n+after\\n\"}"
                    }
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;
        let source_conn = Connection::open(&source)?;

        ingest_planned_sessions_from_source(
            &analytics,
            &source_conn,
            source.to_string_lossy().as_ref(),
            &[("composerData:agree".to_string(), rows[0].1.clone())],
            &HashMap::new(),
            false,
            None,
        )?;

        let mut analytics_for_change_intel = analytics;
        ingest_cursor_code_changes_from_path(&mut analytics_for_change_intel, &source, false)?;

        let session_paths = analytics_for_change_intel
            .prepare(
                "SELECT abs_path
                 FROM fact_session_code_change
                 WHERE provider='cursor' AND session_id='agree' AND source_kind='accepted_change'
                 ORDER BY abs_path",
            )?
            .query_map([], |row| row.get::<_, String>(0))?
            .collect::<std::result::Result<Vec<_>, _>>()?;
        let tool_paths = analytics_for_change_intel
            .prepare(
                "SELECT abs_path
                 FROM fact_session_code_change
                 WHERE provider='cursor' AND session_id='agree' AND source_kind='tool_write'
                 ORDER BY abs_path",
            )?
            .query_map([], |row| row.get::<_, String>(0))?
            .collect::<std::result::Result<Vec<_>, _>>()?;

        assert_eq!(session_paths, tool_paths);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn invalid_added_range_records_parse_error() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("invalid_range");
        let rows = vec![
            (
                "composerData:c2".to_string(),
                json!({
                    "composerId": "c2",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "inlineDiffUndoRedo-2".to_string(),
                json!({
                    "composerMetadata": {"composerId": "c2"},
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md"},
                    "createdAt": 1772694300812i64,
                    "newTextLines": ["only one"],
                    "changes": [
                        {
                            "removedTextLines": ["old"],
                            "addedRange": {"startLineNumber": 1, "endLineNumberExclusive": 3}
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 0);
        assert_eq!(summary.parse_errors, 1);

        let parse_errs: i64 = analytics.query_row(
            "SELECT COUNT(*) FROM change_parse_errors WHERE parser_name = ?1",
            params![INLINE_PARSER_NAME],
            |r| r.get(0),
        )?;
        assert_eq!(parse_errs, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn partial_fallback_supports_list_and_dict_shapes() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("partial_shapes");
        let rows = vec![
            (
                "composerData:list_sess".to_string(),
                json!({
                    "composerId": "list_sess",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "codeBlockData": {
                        "file:///tmp/repo/list.txt": [
                            {"status": "accepted", "partialInlineDiffFatesId": "p_list"}
                        ]
                    }
                })
                .to_string(),
            ),
            (
                "composerData:dict_sess".to_string(),
                json!({
                    "composerId": "dict_sess",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "codeBlockData": {
                        "file:///tmp/repo/dict.txt": {
                            "block-1": {
                                "status": "accepted",
                                "partialInlineDiffFatesId": "p_dict",
                                "uri": {"scheme": "file", "fsPath": "/tmp/repo/dict.txt"}
                            }
                        }
                    }
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:list_sess:p_list".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": ["old list"], "addedLines": ["new list"]}
                    ]
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:dict_sess:p_dict".to_string(),
                json!({
                    "fates": [
                        {"fate": "rejected", "removedLines": ["x"], "addedLines": ["y"]},
                        {"fate": "accepted", "removedLines": ["old dict"], "addedLines": ["new dict 1", "new dict 2"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 2);

        let mut stmt = analytics.prepare(
            "SELECT session_id, parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write'
             ORDER BY session_id",
        )?;
        let rows = stmt.query_map([], |r| {
            Ok((
                r.get::<_, String>(0)?,
                r.get::<_, String>(1)?,
                r.get::<_, i64>(2)?,
                r.get::<_, i64>(3)?,
            ))
        })?;
        let collected = rows.collect::<std::result::Result<Vec<_>, _>>()?;

        assert_eq!(collected.len(), 2);
        assert_eq!(
            collected[0],
            (
                "dict_sess".to_string(),
                PARTIAL_PARSER_NAME.to_string(),
                2,
                1
            )
        );
        assert_eq!(
            collected[1],
            (
                "list_sess".to_string(),
                PARTIAL_PARSER_NAME.to_string(),
                1,
                1
            )
        );

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn inline_wins_over_partial_for_same_file() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("inline_wins");
        let rows = vec![
            (
                "composerData:same_sess".to_string(),
                json!({
                    "composerId": "same_sess",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "codeBlockData": {
                        "file:///tmp/repo/README.md": {
                            "b1": {
                                "status": "accepted",
                                "partialInlineDiffFatesId": "p_same",
                                "uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md"}
                            }
                        }
                    }
                })
                .to_string(),
            ),
            (
                "checkpointId:same_sess:chk1".to_string(),
                json!({
                    "files": [
                        {"uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md"}}
                    ]
                })
                .to_string(),
            ),
            (
                "inlineDiffUndoRedo-same".to_string(),
                json!({
                    "composerMetadata": {"composerId": "same_sess"},
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md"},
                    "createdAt": 1772694300812i64,
                    "newTextLines": ["new inline"],
                    "changes": [
                        {
                            "removedTextLines": ["old inline"],
                            "addedRange": {"startLineNumber": 1, "endLineNumberExclusive": 2}
                        }
                    ]
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:same_sess:p_same".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": ["old partial"], "addedLines": ["new partial"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);

        let op_row: (String, i64, i64) = analytics.query_row(
            "SELECT parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write' AND session_id='same_sess'",
            [],
            |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
        )?;
        assert_eq!(op_row.0, INLINE_PARSER_NAME);
        assert_eq!(op_row.1, 1);
        assert_eq!(op_row.2, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn legacy_code_block_diff_ingest_creates_tool_write_hashes() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("legacy_diff");
        let rows = vec![
            (
                "composerData:legacy_diff".to_string(),
                json!({
                    "composerId": "legacy_diff",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "codeBlockData": {
                        "file:///tmp/repo/legacy.txt": [
                            {
                                "status": "accepted",
                                "diffId": "legacy-d1",
                                "version": 0,
                                "codeBlockIdx": 0,
                                "uri": {"scheme": "file", "fsPath": "/tmp/repo/legacy.txt"}
                            }
                        ]
                    }
                })
                .to_string(),
            ),
            (
                "codeBlockDiff:legacy_diff:legacy-d1".to_string(),
                json!({
                    "originalModelDiffWrtV0": [
                        {
                            "original": {"startLineNumber": 1, "endLineNumberExclusive": 2},
                            "modified": ["old line"]
                        }
                    ],
                    "newModelDiffWrtV0": [
                        {
                            "original": {"startLineNumber": 1, "endLineNumberExclusive": 2},
                            "modified": ["new line 1", "new line 2"]
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.legacy_sessions_considered, 1);
        assert_eq!(summary.legacy_entries_inspected, 1);
        assert_eq!(summary.legacy_diff_rows_found, 1);
        assert_eq!(summary.legacy_ops_upserted, 1);
        assert_eq!(summary.legacy_parse_errors, 0);

        let op_row: (String, i64, i64) = analytics.query_row(
            "SELECT parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write' AND session_id='legacy_diff'",
            [],
            |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
        )?;
        assert_eq!(op_row.0, LEGACY_DIFF_PARSER_NAME);
        assert_eq!(op_row.1, 2);
        assert_eq!(op_row.2, 1);

        let hash_count: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM fact_session_code_change_line_hashes h
             JOIN fact_session_code_change c ON c.id = h.code_change_id
             WHERE c.provider='cursor' AND c.session_id='legacy_diff'",
            [],
            |r| r.get(0),
        )?;
        assert!(hash_count > 0);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn legacy_content_fallback_uses_previous_accepted_content() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("legacy_content");
        let rows = vec![(
            "composerData:legacy_content".to_string(),
            json!({
                "composerId": "legacy_content",
                "createdAt": 1772694178155i64,
                "lastUpdatedAt": 1772694194894i64,
                "codeBlockData": {
                    "file:///tmp/repo/legacy.txt": [
                        {
                            "status": "accepted",
                            "version": 0,
                            "codeBlockIdx": 0,
                            "bubbleId": "b0",
                            "content": "old line\nsame line\n",
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/legacy.txt"}
                        },
                        {
                            "status": "accepted",
                            "version": 1,
                            "codeBlockIdx": 0,
                            "bubbleId": "b1",
                            "content": "new line\nsame line\n",
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/legacy.txt"}
                        }
                    ]
                }
            })
            .to_string(),
        )];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.legacy_ops_upserted, 1);

        let op_row: (String, i64, i64) = analytics.query_row(
            "SELECT parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind = 'tool_write' AND session_id='legacy_content'",
            [],
            |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?)),
        )?;
        assert_eq!(op_row.0, LEGACY_CONTENT_PARSER_NAME);
        assert_eq!(op_row.1, 1);
        assert_eq!(op_row.2, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn inline_wins_over_legacy_for_same_file() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("inline_wins_legacy");
        let rows = vec![
            (
                "composerData:legacy_same".to_string(),
                json!({
                    "composerId": "legacy_same",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 1,
                    "codeBlockData": {
                        "file:///tmp/repo/README.md": [
                            {
                                "status": "accepted",
                                "diffId": "legacy-same-d1",
                                "version": 0,
                                "codeBlockIdx": 0,
                                "uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md"}
                            }
                        ]
                    }
                })
                .to_string(),
            ),
            (
                "inlineDiffUndoRedo-legacy-same".to_string(),
                json!({
                    "composerMetadata": {"composerId": "legacy_same"},
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/README.md"},
                    "createdAt": 1772694300812i64,
                    "newTextLines": ["new inline"],
                    "changes": [
                        {
                            "removedTextLines": ["old inline"],
                            "addedRange": {"startLineNumber": 1, "endLineNumberExclusive": 2}
                        }
                    ]
                })
                .to_string(),
            ),
            (
                "codeBlockDiff:legacy_same:legacy-same-d1".to_string(),
                json!({
                    "originalModelDiffWrtV0": [
                        {
                            "original": {"startLineNumber": 1, "endLineNumberExclusive": 2},
                            "modified": ["old legacy"]
                        }
                    ],
                    "newModelDiffWrtV0": [
                        {
                            "original": {"startLineNumber": 1, "endLineNumberExclusive": 2},
                            "modified": ["new legacy"]
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.legacy_ops_upserted, 0);

        let op_row: String = analytics.query_row(
            "SELECT parser_name
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='legacy_same'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(op_row, INLINE_PARSER_NAME);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn new_schema_pathless_partial_prefers_inline_removed_line_overlap() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_overlap");
        let rows = vec![
            (
                "composerData:new_schema_overlap".to_string(),
                json!({
                    "composerId": "new_schema_overlap",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": {
                        "file:///tmp/repo/src/config.py": {"content": "old config\n"},
                        "file:///tmp/repo/docs/readme.md": {"content": "other old\n"}
                    },
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "inlineDiff:overlap:config".to_string(),
                json!({
                    "diffId": "diff-config",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/src/config.py", "external": "file:///tmp/repo/src/config.py"},
                    "originalTextLines": ["old config"],
                    "composerMetadata": {
                        "composerId": "new_schema_overlap",
                        "toolCallId": "tool-config"
                    }
                })
                .to_string(),
            ),
            (
                "inlineDiff:overlap:readme".to_string(),
                json!({
                    "diffId": "diff-readme",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/docs/readme.md", "external": "file:///tmp/repo/docs/readme.md"},
                    "originalTextLines": ["other old"],
                    "composerMetadata": {
                        "composerId": "new_schema_overlap",
                        "toolCallId": "tool-readme"
                    }
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_overlap:partial-1".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": ["old config"], "addedLines": ["new config"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: (String, String, i64, i64) = analytics.query_row(
            "SELECT abs_path, parser_name, lines_added, lines_removed
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_overlap'",
            [],
            |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
        )?;
        assert_eq!(op_row.0, "/tmp/repo/src/config.py");
        assert_eq!(op_row.1, "cursor_new_schema_partial_fates_v1");
        assert_eq!(op_row.2, 1);
        assert_eq!(op_row.3, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn new_schema_pathless_partial_prefers_new_file_inline_hint() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_new_file");
        let rows = vec![
            (
                "composerData:new_schema_new_file".to_string(),
                json!({
                    "composerId": "new_schema_new_file",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": {
                        "file:///tmp/repo/src/new_file.py": {"content": ""},
                        "file:///tmp/repo/src/existing.py": {"content": "old existing\n"}
                    },
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "inlineDiff:new-file:new".to_string(),
                json!({
                    "diffId": "diff-new",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/src/new_file.py", "external": "file:///tmp/repo/src/new_file.py"},
                    "originalTextLines": [],
                    "composerMetadata": {
                        "composerId": "new_schema_new_file",
                        "toolCallId": "tool-new"
                    }
                })
                .to_string(),
            ),
            (
                "inlineDiff:new-file:existing".to_string(),
                json!({
                    "diffId": "diff-existing",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/src/existing.py", "external": "file:///tmp/repo/src/existing.py"},
                    "originalTextLines": ["old existing"],
                    "composerMetadata": {
                        "composerId": "new_schema_new_file",
                        "toolCallId": "tool-existing"
                    }
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_new_file:partial-1".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": [], "addedLines": ["print('hello')"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: String = analytics.query_row(
            "SELECT abs_path
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_new_file'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(op_row, "/tmp/repo/src/new_file.py");

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn new_schema_add_only_partial_uses_repo_content_search() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_repo_add");
        let repo_root = temp_repo_path("new_schema_repo_add");
        create_git_repo(
            &repo_root,
            &[
                (
                    "src/alpha.py",
                    "def alpha():\n    return 1\nUNIQUE_REPO_ADDED_MARKER = True\n",
                ),
                ("src/beta.py", "def beta():\n    return 2\n"),
            ],
        )?;
        let alpha = repo_root.join("src/alpha.py");
        let beta = repo_root.join("src/beta.py");
        let original_states = original_file_states(&[
            (&alpha, "def alpha():\n    return 1\n"),
            (&beta, "def beta():\n    return 2\n"),
        ]);

        let rows = vec![
            (
                "composerData:new_schema_repo_add".to_string(),
                json!({
                    "composerId": "new_schema_repo_add",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": original_states,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_repo_add:partial-1".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": [], "addedLines": ["UNIQUE_REPO_ADDED_MARKER = True"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: String = analytics.query_row(
            "SELECT abs_path
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_repo_add'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(op_row, canonical_repo_path(&alpha));

        cleanup(&source);
        cleanup_dir(&repo_root);
        Ok(())
    }

    #[test]
    fn new_schema_removal_only_partial_uses_original_file_states_history() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_original_state_remove");
        let repo_root = temp_repo_path("new_schema_original_state_remove");
        create_git_repo(
            &repo_root,
            &[
                ("src/alpha.py", "def alpha():\n    return current_alpha\n"),
                ("src/beta.py", "def beta():\n    return current_beta\n"),
            ],
        )?;
        let alpha = repo_root.join("src/alpha.py");
        let beta = repo_root.join("src/beta.py");
        let original_states = original_file_states(&[
            (&alpha, "legacy_unique_removed_line\nshared line\n"),
            (&beta, "different line\nshared line\n"),
        ]);

        let rows = vec![
            (
                "composerData:new_schema_original_state_remove".to_string(),
                json!({
                    "composerId": "new_schema_original_state_remove",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": original_states,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_original_state_remove:partial-1".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": ["legacy_unique_removed_line"], "addedLines": []}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: String = analytics.query_row(
            "SELECT abs_path
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_original_state_remove'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(op_row, canonical_repo_path(&alpha));

        cleanup(&source);
        cleanup_dir(&repo_root);
        Ok(())
    }

    #[test]
    fn new_schema_mixed_partial_combines_history_and_repo_content() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_repo_mixed");
        let repo_root = temp_repo_path("new_schema_repo_mixed");
        create_git_repo(
            &repo_root,
            &[
                (
                    "src/alpha.py",
                    "def alpha():\n    return 1\nMIXED_ADDED_MARKER = True\n",
                ),
                ("src/beta.py", "def beta():\n    return 2\n"),
            ],
        )?;
        let alpha = repo_root.join("src/alpha.py");
        let beta = repo_root.join("src/beta.py");
        let original_states = original_file_states(&[
            (&alpha, "legacy_mixed_removed_line\nalpha stable\n"),
            (&beta, "beta stable\n"),
        ]);

        let rows = vec![
            (
                "composerData:new_schema_repo_mixed".to_string(),
                json!({
                    "composerId": "new_schema_repo_mixed",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": original_states,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_repo_mixed:partial-1".to_string(),
                json!({
                    "fates": [
                        {
                            "fate": "accepted",
                            "removedLines": ["legacy_mixed_removed_line"],
                            "addedLines": ["MIXED_ADDED_MARKER = True"]
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: String = analytics.query_row(
            "SELECT abs_path
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_repo_mixed'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(op_row, canonical_repo_path(&alpha));

        cleanup(&source);
        cleanup_dir(&repo_root);
        Ok(())
    }

    #[test]
    fn new_schema_repo_content_stays_unresolved_for_boilerplate_matches() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_repo_boilerplate");
        let repo_root = temp_repo_path("new_schema_repo_boilerplate");
        create_git_repo(
            &repo_root,
            &[
                ("src/alpha.py", "COMMON_SHARED_CONFIG = True\n"),
                ("src/beta.py", "COMMON_SHARED_CONFIG = True\n"),
            ],
        )?;
        let alpha = repo_root.join("src/alpha.py");
        let beta = repo_root.join("src/beta.py");
        let original_states = original_file_states(&[(&alpha, "alpha\n"), (&beta, "beta\n")]);

        let rows = vec![
            (
                "composerData:new_schema_repo_boilerplate".to_string(),
                json!({
                    "composerId": "new_schema_repo_boilerplate",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": original_states,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_repo_boilerplate:partial-1".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": [], "addedLines": ["COMMON_SHARED_CONFIG = True"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 0);
        assert_eq!(summary.parse_errors, 1);

        let writes: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_repo_boilerplate'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(writes, 0);

        cleanup(&source);
        cleanup_dir(&repo_root);
        Ok(())
    }

    #[test]
    fn new_schema_repo_drift_falls_back_to_historical_removed_lines() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_repo_drift");
        let repo_root = temp_repo_path("new_schema_repo_drift");
        create_git_repo(
            &repo_root,
            &[
                ("src/alpha.py", "def alpha():\n    return drifted_alpha\n"),
                ("src/beta.py", "def beta():\n    return drifted_beta\n"),
            ],
        )?;
        let alpha = repo_root.join("src/alpha.py");
        let beta = repo_root.join("src/beta.py");
        let original_states = original_file_states(&[
            (&alpha, "legacy_unique_drift_line\nalpha stable\n"),
            (&beta, "beta stable\n"),
        ]);

        let rows = vec![
            (
                "composerData:new_schema_repo_drift".to_string(),
                json!({
                    "composerId": "new_schema_repo_drift",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 2,
                    "originalFileStates": original_states,
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_repo_drift:partial-1".to_string(),
                json!({
                    "fates": [
                        {
                            "fate": "accepted",
                            "removedLines": ["legacy_unique_drift_line"],
                            "addedLines": ["ADDED_LINE_MISSING_FROM_REPO = True"]
                        }
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: String = analytics.query_row(
            "SELECT abs_path
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_repo_drift'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(op_row, canonical_repo_path(&alpha));

        cleanup(&source);
        cleanup_dir(&repo_root);
        Ok(())
    }

    #[test]
    fn new_schema_ambiguous_partial_records_error_without_dropping_resolved_rows() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("new_schema_partial_mix");
        let rows = vec![
            (
                "composerData:new_schema_partial_mix".to_string(),
                json!({
                    "composerId": "new_schema_partial_mix",
                    "createdAt": 1772694178155i64,
                    "lastUpdatedAt": 1772694194894i64,
                    "filesChangedCount": 3,
                    "originalFileStates": {
                        "file:///tmp/repo/src/config.py": {"content": "old config\n"},
                        "file:///tmp/repo/docs/alpha.md": {"content": "shared old\n"},
                        "file:///tmp/repo/docs/beta.md": {"content": "shared old\n"}
                    },
                    "codeBlockData": {}
                })
                .to_string(),
            ),
            (
                "inlineDiff:mix:config".to_string(),
                json!({
                    "diffId": "diff-config",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/src/config.py", "external": "file:///tmp/repo/src/config.py"},
                    "originalTextLines": ["old config"],
                    "composerMetadata": {
                        "composerId": "new_schema_partial_mix",
                        "toolCallId": "tool-config"
                    }
                })
                .to_string(),
            ),
            (
                "inlineDiff:mix:alpha".to_string(),
                json!({
                    "diffId": "diff-alpha",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/docs/alpha.md", "external": "file:///tmp/repo/docs/alpha.md"},
                    "originalTextLines": ["shared old"],
                    "composerMetadata": {
                        "composerId": "new_schema_partial_mix",
                        "toolCallId": "tool-alpha"
                    }
                })
                .to_string(),
            ),
            (
                "inlineDiff:mix:beta".to_string(),
                json!({
                    "diffId": "diff-beta",
                    "uri": {"scheme": "file", "fsPath": "/tmp/repo/docs/beta.md", "external": "file:///tmp/repo/docs/beta.md"},
                    "originalTextLines": ["shared old"],
                    "composerMetadata": {
                        "composerId": "new_schema_partial_mix",
                        "toolCallId": "tool-beta"
                    }
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_partial_mix:partial-1".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": ["old config"], "addedLines": ["new config"]}
                    ]
                })
                .to_string(),
            ),
            (
                "codeBlockPartialInlineDiffFates:new_schema_partial_mix:partial-2".to_string(),
                json!({
                    "fates": [
                        {"fate": "accepted", "removedLines": ["shared old"], "addedLines": ["shared new"]}
                    ]
                })
                .to_string(),
            ),
        ];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 1);

        let writes: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='new_schema_partial_mix'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(writes, 1);

        let parse_errs: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM change_parse_errors
             WHERE provider='cursor' AND session_id='new_schema_partial_mix' AND parser_name='cursor_new_schema_partial_fates_v1'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(parse_errs, 1);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn new_schema_content_index_is_built_only_for_ambiguous_rows() -> Result<()> {
        let repo_root = temp_repo_path("new_schema_cache");
        create_git_repo(
            &repo_root,
            &[
                (
                    "src/alpha.py",
                    "def alpha():\n    return 1\nCACHE_SCAN_TARGET = True\n",
                ),
                ("src/beta.py", "def beta():\n    return 2\n"),
            ],
        )?;
        let alpha = repo_root.join("src/alpha.py");
        let beta = repo_root.join("src/beta.py");

        let session = CandidateSession {
            info: SessionInfo {
                provider: "cursor".to_string(),
                session_id: "cache_session".to_string(),
                source_file: "/tmp/cache.vscdb".to_string(),
                session_cwd: None,
                last_seen_at: None,
            },
            checkpoint_paths: HashSet::new(),
            strong_path_hints: HashSet::new(),
            weak_path_hints: [path_to_string(&alpha), path_to_string(&beta)]
                .into_iter()
                .collect(),
            original_state_contents: HashMap::new(),
            total_lines_added: None,
            total_lines_removed: None,
            partial_targets: Vec::new(),
            legacy_targets: Vec::new(),
        };
        let inline_hints = vec![
            NewInlineHint {
                call_id: "tool-alpha".to_string(),
                abs_path: path_to_string(&alpha),
                timestamp: None,
                original_text_lines: vec!["old alpha".to_string()],
            },
            NewInlineHint {
                call_id: "tool-beta".to_string(),
                abs_path: path_to_string(&beta),
                timestamp: None,
                original_text_lines: vec!["old beta".to_string()],
            },
        ];
        let prior_seen_paths = HashSet::new();
        let mut repo_content_indexes = HashMap::new();

        let resolved_without_scan = resolve_new_schema_partial_path(
            &session,
            "partial-resolved",
            &json!({
                "fates": [
                    {
                        "fate": "accepted",
                        "removedLines": ["old alpha"],
                        "addedLines": ["new alpha"]
                    }
                ]
            }),
            &inline_hints,
            &prior_seen_paths,
            &mut repo_content_indexes,
        )
        .map_err(anyhow::Error::msg)?;
        assert_eq!(
            resolved_without_scan.map(|resolved| resolved.abs_path),
            Some(path_to_string(&alpha))
        );
        assert!(repo_content_indexes.is_empty());

        let resolved_with_scan = resolve_new_schema_partial_path(
            &session,
            "partial-ambiguous",
            &json!({
                "fates": [
                    {
                        "fate": "accepted",
                        "removedLines": [],
                        "addedLines": ["CACHE_SCAN_TARGET = True"]
                    }
                ]
            }),
            &inline_hints,
            &prior_seen_paths,
            &mut repo_content_indexes,
        )
        .map_err(anyhow::Error::msg)?;
        assert_eq!(
            resolved_with_scan.map(|resolved| resolved.abs_path),
            Some(canonical_repo_path(&alpha))
        );
        assert_eq!(repo_content_indexes.len(), 1);

        cleanup_dir(&repo_root);
        Ok(())
    }

    #[test]
    fn single_file_missing_legacy_diff_uses_session_totals_fallback() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("legacy_single_file_totals");
        let rows = vec![(
            "composerData:legacy_single_file_totals".to_string(),
            json!({
                "composerId": "legacy_single_file_totals",
                "createdAt": 1772694178155i64,
                "lastUpdatedAt": 1772694194894i64,
                "totalLinesAdded": 5,
                "totalLinesRemoved": 2,
                "originalFileStates": {
                    "file:///tmp/repo/legacy.txt": {"content": "before"}
                },
                "codeBlockData": {
                    "file:///tmp/repo/legacy.txt": [
                        {
                            "status": "accepted",
                            "diffId": "missing-diff-row",
                            "version": 0,
                            "codeBlockIdx": 0,
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/legacy.txt"}
                        }
                    ]
                }
            })
            .to_string(),
        )];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 1);
        assert_eq!(summary.parse_errors, 0);

        let op_row: (String, i64, i64, i64) = analytics.query_row(
            "SELECT parser_name, lines_added, lines_removed, before_known
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='legacy_single_file_totals'",
            [],
            |r| Ok((r.get(0)?, r.get(1)?, r.get(2)?, r.get(3)?)),
        )?;
        assert_eq!(op_row.0, "cursor_legacy_session_totals_v1");
        assert_eq!(op_row.1, 5);
        assert_eq!(op_row.2, 2);
        assert_eq!(op_row.3, 0);

        let hash_count: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM fact_session_code_change_line_hashes h
             JOIN fact_session_code_change c ON c.id = h.code_change_id
             WHERE c.provider='cursor' AND c.session_id='legacy_single_file_totals'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(hash_count, 0);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn multi_file_missing_legacy_diffs_record_one_parse_error_per_file() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("legacy_multi_file_missing");
        let rows = vec![(
            "composerData:legacy_multi_file_missing".to_string(),
            json!({
                "composerId": "legacy_multi_file_missing",
                "createdAt": 1772694178155i64,
                "lastUpdatedAt": 1772694194894i64,
                "totalLinesAdded": 7,
                "totalLinesRemoved": 3,
                "originalFileStates": {
                    "file:///tmp/repo/a.txt": {"content": "before a"},
                    "file:///tmp/repo/b.txt": {"content": "before b"}
                },
                "codeBlockData": {
                    "file:///tmp/repo/a.txt": [
                        {
                            "status": "accepted",
                            "diffId": "missing-a-1",
                            "version": 0,
                            "codeBlockIdx": 0,
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/a.txt"}
                        },
                        {
                            "status": "accepted",
                            "diffId": "missing-a-2",
                            "version": 1,
                            "codeBlockIdx": 0,
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/a.txt"}
                        }
                    ],
                    "file:///tmp/repo/b.txt": [
                        {
                            "status": "accepted",
                            "diffId": "missing-b-1",
                            "version": 0,
                            "codeBlockIdx": 0,
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/b.txt"}
                        }
                    ]
                }
            })
            .to_string(),
        )];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 0);
        assert_eq!(summary.parse_errors, 2);
        assert_eq!(summary.legacy_parse_errors, 2);

        let writes: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM fact_session_code_change
             WHERE provider='cursor' AND source_kind='tool_write' AND session_id='legacy_multi_file_missing'",
            [],
            |r| r.get(0),
        )?;
        assert_eq!(writes, 0);

        let parse_errs: i64 = analytics.query_row(
            "SELECT COUNT(*)
             FROM change_parse_errors
             WHERE provider='cursor' AND session_id='legacy_multi_file_missing' AND parser_name=?1",
            params![LEGACY_DIFF_PARSER_NAME],
            |r| r.get(0),
        )?;
        assert_eq!(parse_errs, 2);

        cleanup(&source);
        Ok(())
    }

    #[test]
    fn missing_legacy_diff_records_parse_error_when_no_content_fallback_exists() -> Result<()> {
        let mut analytics = Connection::open_in_memory()?;
        init_change_intel_schema(&analytics)?;

        let source = temp_db_path("legacy_missing_diff");
        let rows = vec![(
            "composerData:legacy_missing".to_string(),
            json!({
                "composerId": "legacy_missing",
                "createdAt": 1772694178155i64,
                "lastUpdatedAt": 1772694194894i64,
                "codeBlockData": {
                    "file:///tmp/repo/legacy.txt": [
                        {
                            "status": "accepted",
                            "diffId": "missing-diff-row",
                            "version": 0,
                            "codeBlockIdx": 0,
                            "uri": {"scheme": "file", "fsPath": "/tmp/repo/legacy.txt"}
                        }
                    ]
                }
            })
            .to_string(),
        )];

        create_cursor_db(&source, &rows)?;

        let summary = ingest_cursor_code_changes_from_path(&mut analytics, &source, false)?;
        assert_eq!(summary.ops_upserted, 0);
        assert_eq!(summary.parse_errors, 1);
        assert_eq!(summary.legacy_parse_errors, 1);

        let parse_errs: i64 = analytics.query_row(
            "SELECT COUNT(*) FROM change_parse_errors WHERE parser_name = ?1",
            params![LEGACY_DIFF_PARSER_NAME],
            |r| r.get(0),
        )?;
        assert_eq!(parse_errs, 1);

        cleanup(&source);
        Ok(())
    }
}