patina-ai 0.23.0

Context orchestration for AI development - captures and evolves patterns over time
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
//! Shared eventlog infrastructure for Patina
//!
//! The eventlog is an append-only, immutable source of truth for all structured
//! events in Patina. Multiple commands write events (scrape, session, scry);
//! multiple commands read them (scry, eval, assay). No single command owns the pipe.
//!
//! Following the LiveStore pattern:
//! - eventlog table (immutable source of truth)
//! - materialized views (derived, rebuildable)
//!
//! This module was extracted from `commands/scrape/database.rs` to make the
//! eventlog accessible as shared infrastructure across all commands.

use anyhow::Result;
use rusqlite::Connection;
use std::path::Path;

/// Path to unified database
pub const PATINA_DB: &str = ".patina/local/data/patina.db";

/// Check if a path is within a ref repo (external reference repository).
///
/// Ref repos live in `~/.patina/cache/repos/` and use lean storage:
/// - Git/code data: direct insert (no eventlog) - rebuilds from source
/// - Forge data: eventlog with dedup - caches expensive API data
///
/// See: layer/surface/build/spec-ref-repo-storage.md
pub fn is_ref_repo(path: &Path) -> bool {
    // Try the path directly first
    if path.to_string_lossy().contains(".patina/cache/repos") {
        return true;
    }
    // If path is relative, check current working directory
    if let Ok(cwd) = std::env::current_dir() {
        if cwd.to_string_lossy().contains(".patina/cache/repos") {
            return true;
        }
        // Also check canonical path
        if let Ok(canonical) = cwd.join(path).canonicalize() {
            return canonical.to_string_lossy().contains(".patina/cache/repos");
        }
    }
    false
}

/// Initialize the unified patina.db with eventlog table and indexes
pub fn initialize(db_path: &Path) -> Result<Connection> {
    // Ensure parent directory exists
    if let Some(parent) = db_path.parent() {
        std::fs::create_dir_all(parent)?;
    }

    let conn = Connection::open(db_path)?;

    // Create eventlog table (LiveStore pattern - immutable source of truth)
    conn.execute_batch(
        r#"
        -- Eventlog: Unified source of truth for ALL events
        CREATE TABLE IF NOT EXISTS eventlog (
            seq INTEGER PRIMARY KEY AUTOINCREMENT,  -- Global ordering
            event_type TEXT NOT NULL,                -- e.g. 'git.commit', 'session.decision'
            timestamp TEXT NOT NULL,                 -- ISO8601 when event occurred
            source_id TEXT NOT NULL,                 -- sha, session_id, function_name, etc
            source_file TEXT,                        -- Original file path
            data TEXT NOT NULL,                      -- Event-specific JSON payload
            CHECK(json_valid(data))
        );

        -- Indexes for common queries
        CREATE INDEX IF NOT EXISTS idx_eventlog_type ON eventlog(event_type);
        CREATE INDEX IF NOT EXISTS idx_eventlog_timestamp ON eventlog(timestamp);
        CREATE INDEX IF NOT EXISTS idx_eventlog_source ON eventlog(source_id);
        CREATE INDEX IF NOT EXISTS idx_eventlog_type_time ON eventlog(event_type, timestamp);

        -- Scrape metadata (track last processed for incremental updates)
        CREATE TABLE IF NOT EXISTS scrape_meta (
            key TEXT PRIMARY KEY,
            value TEXT
        );

        -- FTS5 virtual table for exact-match lexical search (code)
        CREATE VIRTUAL TABLE IF NOT EXISTS code_fts USING fts5(
            symbol_name,
            file_path,
            content,
            event_type,
            tokenize='porter unicode61'
        );

        -- FTS5 virtual table for commit message search (git narrative)
        CREATE VIRTUAL TABLE IF NOT EXISTS commits_fts USING fts5(
            sha,
            message,
            author_name,
            tokenize='porter unicode61'
        );

        -- Moments table for derived temporal signals (assay derive)
        CREATE TABLE IF NOT EXISTS moments (
            sha TEXT PRIMARY KEY,
            moment_type TEXT NOT NULL,
            file_count INTEGER,
            timestamp TEXT,
            message TEXT
        );

        CREATE INDEX IF NOT EXISTS idx_moments_type ON moments(moment_type);
        CREATE INDEX IF NOT EXISTS idx_moments_timestamp ON moments(timestamp);
        "#,
    )?;

    Ok(conn)
}

/// Insert an event into the unified eventlog
pub fn insert_event(
    conn: &Connection,
    event_type: &str,
    timestamp: &str,
    source_id: &str,
    source_file: Option<&str>,
    data: &str,
) -> Result<i64> {
    let seq = conn.execute(
        "INSERT INTO eventlog (event_type, timestamp, source_id, source_file, data)
         VALUES (?1, ?2, ?3, ?4, ?5)",
        rusqlite::params![event_type, timestamp, source_id, source_file, data],
    )?;
    Ok(seq as i64)
}

/// Get the last processed value for a scraper (for incremental updates)
pub fn get_last_processed(conn: &Connection, scraper: &str) -> Result<Option<String>> {
    let key = format!("last_processed_{}", scraper);
    let result: Result<String, _> = conn.query_row(
        "SELECT value FROM scrape_meta WHERE key = ?1",
        [&key],
        |row| row.get(0),
    );

    match result {
        Ok(value) => Ok(Some(value)),
        Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
        Err(e) => Err(e.into()),
    }
}

/// Update the last processed value for a scraper
pub fn set_last_processed(conn: &Connection, scraper: &str, value: &str) -> Result<()> {
    let key = format!("last_processed_{}", scraper);
    conn.execute(
        "INSERT OR REPLACE INTO scrape_meta (key, value) VALUES (?1, ?2)",
        rusqlite::params![&key, value],
    )?;
    Ok(())
}

// ============================================================================
// Feedback Loop Views (Phase 3)
// ============================================================================

/// Create SQL views for feedback loop analysis
///
/// These views correlate scry queries with subsequent commits to measure
/// retrieval precision in real-world usage.
pub fn create_feedback_views(conn: &Connection) -> Result<()> {
    conn.execute_batch(
        r#"
        -- View: Queries made during each session
        CREATE VIEW IF NOT EXISTS feedback_session_queries AS
        SELECT
            json_extract(data, '$.session_id') as session_id,
            json_extract(data, '$.query_id') as query_id,
            json_extract(data, '$.query') as query,
            json_extract(data, '$.mode') as mode,
            json_extract(data, '$.results') as results,
            timestamp
        FROM eventlog
        WHERE event_type = 'scry.query'
          AND json_extract(data, '$.session_id') IS NOT NULL;

        -- View: Files committed during each session (from latest scrape only)
        -- Uses window function to deduplicate by (sha, file_path) keeping latest scrape
        CREATE VIEW IF NOT EXISTS feedback_commit_files AS
        SELECT
            session_id,
            sha,
            file_path,
            change_type,
            timestamp
        FROM (
            SELECT
                json_extract(data, '$.session_id') as session_id,
                json_extract(data, '$.sha') as sha,
                json_extract(f.value, '$.path') as file_path,
                json_extract(f.value, '$.change_type') as change_type,
                timestamp,
                ROW_NUMBER() OVER (
                    PARTITION BY json_extract(data, '$.sha'), json_extract(f.value, '$.path')
                    ORDER BY seq DESC
                ) as rn
            FROM eventlog, json_each(json_extract(data, '$.files')) as f
            WHERE event_type = 'git.commit'
              AND json_extract(data, '$.session_id') IS NOT NULL
        )
        WHERE rn = 1;

        -- View: Query results matched to committed files
        -- A "hit" is when a retrieved doc_id matches a file that was committed.
        -- doc_id may contain '::' suffixes (e.g. "src/main.rs::fn:main" from
        -- SemanticOracle) — strip those before matching. Also strip leading "./".
        CREATE VIEW IF NOT EXISTS feedback_query_hits AS
        SELECT
            q.session_id,
            q.query,
            q.mode,
            q.timestamp as query_time,
            json_extract(r.value, '$.doc_id') as retrieved_doc_id,
            json_extract(r.value, '$.rank') as rank,
            json_extract(r.value, '$.score') as score,
            CASE
                WHEN EXISTS (
                    SELECT 1 FROM feedback_commit_files cf
                    WHERE cf.session_id = q.session_id
                      AND (
                        -- Normalize: strip '::...' suffix and './' prefix from doc_id,
                        -- then check if the file path matches
                        cf.file_path = REPLACE(
                            CASE
                                WHEN INSTR(json_extract(r.value, '$.doc_id'), '::') > 0
                                THEN SUBSTR(json_extract(r.value, '$.doc_id'), 1,
                                     INSTR(json_extract(r.value, '$.doc_id'), '::') - 1)
                                ELSE json_extract(r.value, '$.doc_id')
                            END,
                            './', '')
                        OR cf.file_path LIKE '%/' || REPLACE(
                            CASE
                                WHEN INSTR(json_extract(r.value, '$.doc_id'), '::') > 0
                                THEN SUBSTR(json_extract(r.value, '$.doc_id'), 1,
                                     INSTR(json_extract(r.value, '$.doc_id'), '::') - 1)
                                ELSE json_extract(r.value, '$.doc_id')
                            END,
                            './', '')
                      )
                ) THEN 1
                ELSE 0
            END as is_hit
        FROM feedback_session_queries q,
             json_each(q.results) as r;

        -- View: scry.use events (Phase 3) - explicit result usage from agents
        CREATE VIEW IF NOT EXISTS feedback_usage AS
        SELECT
            json_extract(data, '$.query_id') as query_id,
            json_extract(data, '$.result_used') as doc_id,
            json_extract(data, '$.rank') as rank,
            json_extract(data, '$.session_id') as session_id,
            timestamp
        FROM eventlog
        WHERE event_type = 'scry.use';

        -- View: scry.feedback events (Phase 3) - explicit good/bad ratings
        CREATE VIEW IF NOT EXISTS feedback_ratings AS
        SELECT
            json_extract(data, '$.query_id') as query_id,
            json_extract(data, '$.signal') as signal,
            json_extract(data, '$.comment') as comment,
            json_extract(data, '$.session_id') as session_id,
            timestamp
        FROM eventlog
        WHERE event_type = 'scry.feedback';

        -- View: Combined query analysis with usage and feedback
        CREATE VIEW IF NOT EXISTS feedback_query_analysis AS
        SELECT
            q.session_id,
            json_extract(q.data, '$.query_id') as query_id,
            json_extract(q.data, '$.query') as query,
            json_extract(q.data, '$.mode') as mode,
            q.timestamp as query_time,
            (SELECT COUNT(*) FROM eventlog u
             WHERE u.event_type = 'scry.use'
               AND json_extract(u.data, '$.query_id') = json_extract(q.data, '$.query_id')
            ) as use_count,
            (SELECT json_group_array(json_extract(u.data, '$.rank'))
             FROM eventlog u
             WHERE u.event_type = 'scry.use'
               AND json_extract(u.data, '$.query_id') = json_extract(q.data, '$.query_id')
            ) as used_ranks,
            (SELECT json_extract(f.data, '$.signal')
             FROM eventlog f
             WHERE f.event_type = 'scry.feedback'
               AND json_extract(f.data, '$.query_id') = json_extract(q.data, '$.query_id')
             LIMIT 1
            ) as feedback_signal
        FROM eventlog q
        WHERE q.event_type = 'scry.query';
        "#,
    )?;

    Ok(())
}

#[cfg(test)]
mod tests {
    use super::*;
    use tempfile::tempdir;

    /// Count events by type (test helper)
    fn count_events_by_type(conn: &Connection, event_type: &str) -> Result<i64> {
        let count: i64 = conn.query_row(
            "SELECT COUNT(*) FROM eventlog WHERE event_type = ?1",
            [event_type],
            |row| row.get(0),
        )?;
        Ok(count)
    }

    /// Get total event count (test helper)
    fn count_total_events(conn: &Connection) -> Result<i64> {
        let count: i64 = conn.query_row("SELECT COUNT(*) FROM eventlog", [], |row| row.get(0))?;
        Ok(count)
    }

    #[test]
    fn test_initialize_creates_tables() -> Result<()> {
        let dir = tempdir()?;
        let db_path = dir.path().join("test.db");
        let conn = initialize(&db_path)?;

        // Check eventlog table exists
        let tables: Vec<String> = conn
            .prepare("SELECT name FROM sqlite_master WHERE type='table'")?
            .query_map([], |row| row.get(0))?
            .collect::<Result<_, _>>()?;

        assert!(tables.contains(&"eventlog".to_string()));
        assert!(tables.contains(&"scrape_meta".to_string()));

        Ok(())
    }

    #[test]
    fn test_insert_and_count_events() -> Result<()> {
        let dir = tempdir()?;
        let db_path = dir.path().join("test.db");
        let conn = initialize(&db_path)?;

        // Insert a test event
        let data = r#"{"message": "test commit", "author": "test"}"#;
        insert_event(
            &conn,
            "git.commit",
            "2025-11-21T12:00:00Z",
            "abc123",
            Some("test.rs"),
            data,
        )?;

        // Count events
        assert_eq!(count_total_events(&conn)?, 1);
        assert_eq!(count_events_by_type(&conn, "git.commit")?, 1);
        assert_eq!(count_events_by_type(&conn, "session.decision")?, 0);

        Ok(())
    }

    #[test]
    fn test_last_processed_tracking() -> Result<()> {
        let dir = tempdir()?;
        let db_path = dir.path().join("test.db");
        let conn = initialize(&db_path)?;

        // Initially no value
        assert_eq!(get_last_processed(&conn, "git")?, None);

        // Set value
        set_last_processed(&conn, "git", "abc123")?;
        assert_eq!(
            get_last_processed(&conn, "git")?,
            Some("abc123".to_string())
        );

        // Update value
        set_last_processed(&conn, "git", "def456")?;
        assert_eq!(
            get_last_processed(&conn, "git")?,
            Some("def456".to_string())
        );

        Ok(())
    }

    #[test]
    fn test_json_validation() -> Result<()> {
        let dir = tempdir()?;
        let db_path = dir.path().join("test.db");
        let conn = initialize(&db_path)?;

        // Valid JSON should work
        let valid_json = r#"{"key": "value"}"#;
        assert!(insert_event(
            &conn,
            "test.event",
            "2025-11-21T12:00:00Z",
            "test1",
            None,
            valid_json
        )
        .is_ok());

        // Invalid JSON should fail
        let invalid_json = r#"{not valid json"#;
        assert!(insert_event(
            &conn,
            "test.event",
            "2025-11-21T12:00:00Z",
            "test2",
            None,
            invalid_json
        )
        .is_err());

        Ok(())
    }
}