use anyhow::{Context, Result};
use rusqlite::{Connection, params};
use serde::Serialize;
use std::path::Path;
use crate::search;
use crate::store::Store;
#[derive(Debug, Clone, Serialize)]
pub struct Export {
pub schema_version: i64,
pub exported_at: i64,
pub filter: FilterSnapshot,
pub sources: Vec<ExportedSource>,
}
#[derive(Debug, Clone, Serialize)]
pub struct FilterSnapshot {
pub path: Option<String>,
pub query: Option<String>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ExportedSource {
pub source_id: String,
pub uri: String,
pub path: Option<String>,
pub kind: String,
pub bytes: i64,
pub content_sha256: String,
pub mtime_unix: Option<i64>,
pub ingested_at: i64,
pub chunks: Vec<ExportedChunk>,
}
#[derive(Debug, Clone, Serialize)]
pub struct ExportedChunk {
pub chunk_id: String,
pub ordinal: i64,
pub byte_start: i64,
pub byte_end: i64,
pub char_count: i64,
pub sha256: String,
pub text: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub role: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub session_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub turn_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_name: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub timestamp_unix: Option<i64>,
}
#[derive(Debug, Clone, Default)]
pub struct ExportFilter {
pub path_contains: Option<String>,
pub query: Option<String>,
}
pub fn export(store: &Store, filter: &ExportFilter) -> Result<Export> {
let conn = store.conn();
let ids = select_source_ids(conn, filter)?;
let mut sources = Vec::with_capacity(ids.len());
for id in &ids {
sources.push(load_source(conn, id)?);
}
Ok(Export {
schema_version: store.schema_version()?,
exported_at: now_unix(),
filter: FilterSnapshot {
path: filter.path_contains.clone(),
query: filter.query.clone(),
},
sources,
})
}
pub fn write_json(export: &Export, output: Option<&Path>) -> Result<()> {
let json = serde_json::to_string_pretty(export)?;
match output {
Some(path) => std::fs::write(path, format!("{json}\n"))
.with_context(|| format!("writing export to {}", path.display()))?,
None => println!("{json}"),
}
Ok(())
}
fn select_source_ids(conn: &Connection, filter: &ExportFilter) -> Result<Vec<String>> {
let fts_query = filter.query.as_deref().map(search::build_fts_query);
if matches!(fts_query.as_deref(), Some("")) {
return Ok(Vec::new());
}
let path_like = filter.path_contains.as_deref().map(|p| format!("%{p}%"));
let ids = match (path_like.as_deref(), fts_query.as_deref()) {
(None, None) => collect_ids(
conn,
"SELECT id FROM sources ORDER BY ingested_at DESC, id DESC",
params![],
)?,
(Some(like), None) => collect_ids(
conn,
"SELECT id FROM sources
WHERE (path LIKE ?1 OR uri LIKE ?1)
ORDER BY ingested_at DESC, id DESC",
params![like],
)?,
(None, Some(fts)) => collect_ids(
conn,
"SELECT s.id FROM sources s
WHERE EXISTS (
SELECT 1 FROM chunks c
JOIN chunks_fts ON chunks_fts.rowid = c.rowid
WHERE c.source_id = s.id AND chunks_fts MATCH ?1
)
ORDER BY s.ingested_at DESC, s.id DESC",
params![fts],
)?,
(Some(like), Some(fts)) => collect_ids(
conn,
"SELECT s.id FROM sources s
WHERE (s.path LIKE ?1 OR s.uri LIKE ?1)
AND EXISTS (
SELECT 1 FROM chunks c
JOIN chunks_fts ON chunks_fts.rowid = c.rowid
WHERE c.source_id = s.id AND chunks_fts MATCH ?2
)
ORDER BY s.ingested_at DESC, s.id DESC",
params![like, fts],
)?,
};
Ok(ids)
}
fn collect_ids(
conn: &Connection,
sql: &str,
params: &[&dyn rusqlite::ToSql],
) -> Result<Vec<String>> {
let mut stmt = conn.prepare(sql)?;
let rows = stmt.query_map(params, |row| row.get::<_, String>(0))?;
Ok(rows.collect::<Result<Vec<_>, _>>()?)
}
pub(crate) fn load_source(conn: &Connection, id: &str) -> Result<ExportedSource> {
let mut source = conn.query_row(
"SELECT id, uri, path, kind, bytes, content_sha256, mtime_unix, ingested_at
FROM sources WHERE id = ?1",
params![id],
|row| {
Ok(ExportedSource {
source_id: row.get(0)?,
uri: row.get(1)?,
path: row.get(2)?,
kind: row.get(3)?,
bytes: row.get(4)?,
content_sha256: row.get(5)?,
mtime_unix: row.get(6)?,
ingested_at: row.get(7)?,
chunks: Vec::new(),
})
},
)?;
let mut stmt = conn.prepare(
"SELECT id, ordinal, byte_start, byte_end, char_count, sha256, text,
role, session_id, turn_id, tool_name, timestamp_unix
FROM chunks WHERE source_id = ?1 ORDER BY ordinal",
)?;
let chunks = stmt.query_map(params![id], |row| {
Ok(ExportedChunk {
chunk_id: row.get(0)?,
ordinal: row.get(1)?,
byte_start: row.get(2)?,
byte_end: row.get(3)?,
char_count: row.get(4)?,
sha256: row.get(5)?,
text: row.get(6)?,
role: row.get(7)?,
session_id: row.get(8)?,
turn_id: row.get(9)?,
tool_name: row.get(10)?,
timestamp_unix: row.get(11)?,
})
})?;
source.chunks = chunks.collect::<Result<Vec<_>, _>>()?;
Ok(source)
}
fn now_unix() -> i64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.map(|d| d.as_secs() as i64)
.unwrap_or(0)
}