SCHEMA_SQL

Constant SCHEMA_SQL 

Source
pub const SCHEMA_SQL: &str = r#"
-- ============================================================================
-- RALL TELEMETRY SCHEMA v1
-- ReasonKit Adaptive Learning Loop - Local Telemetry Storage
-- ============================================================================

-- Schema version tracking
CREATE TABLE IF NOT EXISTS schema_version (
    version INTEGER PRIMARY KEY,
    applied_at TEXT NOT NULL DEFAULT (datetime('now')),
    description TEXT
);

-- Insert current version if not exists
INSERT OR IGNORE INTO schema_version (version, description)
VALUES (1, 'Initial RALL telemetry schema');

-- ============================================================================
-- CORE TABLES
-- ============================================================================

-- Sessions table: One entry per CLI session
CREATE TABLE IF NOT EXISTS sessions (
    id TEXT PRIMARY KEY,                    -- UUID
    started_at TEXT NOT NULL,               -- ISO 8601 timestamp
    ended_at TEXT,                          -- ISO 8601 timestamp (NULL if active)
    duration_ms INTEGER,                    -- Session duration in milliseconds
    tool_count INTEGER DEFAULT 0,           -- Number of tools used
    query_count INTEGER DEFAULT 0,          -- Number of queries
    feedback_count INTEGER DEFAULT 0,       -- Number of feedback events
    profile TEXT,                           -- Reasoning profile used (quick/balanced/deep/etc)
    success_rate REAL,                      -- Overall success rate (0.0-1.0)
    client_version TEXT,                    -- ReasonKit CLI version
    os_family TEXT                          -- Operating system (sanitized)
);

-- Queries table: Individual query events (PII-stripped)
CREATE TABLE IF NOT EXISTS queries (
    id TEXT PRIMARY KEY,                    -- UUID
    session_id TEXT NOT NULL,               -- FK to sessions
    timestamp TEXT NOT NULL,                -- ISO 8601 timestamp

    -- Query metadata (NO raw query text - privacy)
    query_hash TEXT NOT NULL,               -- SHA-256 hash of normalized query
    query_length INTEGER NOT NULL,          -- Character count
    query_token_count INTEGER,              -- Token count (if available)
    query_type TEXT,                        -- Classification: search/reason/code/general

    -- Execution metrics
    latency_ms INTEGER NOT NULL,            -- Total execution time
    tool_calls INTEGER DEFAULT 0,           -- Number of tool calls
    retrieval_count INTEGER DEFAULT 0,      -- Number of documents retrieved

    -- Results (anonymized)
    result_count INTEGER,                   -- Number of results returned
    result_quality_score REAL,              -- Self-assessed quality (0.0-1.0)
    error_occurred INTEGER DEFAULT 0,       -- Boolean: did an error occur?
    error_category TEXT,                    -- Error classification (if error)

    -- Context
    profile_used TEXT,                      -- Reasoning profile
    tools_used TEXT,                        -- JSON array of tool names used

    FOREIGN KEY (session_id) REFERENCES sessions(id)
);

-- Create indexes for common queries
CREATE INDEX IF NOT EXISTS idx_queries_session ON queries(session_id);
CREATE INDEX IF NOT EXISTS idx_queries_timestamp ON queries(timestamp);
CREATE INDEX IF NOT EXISTS idx_queries_type ON queries(query_type);
CREATE INDEX IF NOT EXISTS idx_queries_hash ON queries(query_hash);

-- Feedback table: User feedback events
CREATE TABLE IF NOT EXISTS feedback (
    id TEXT PRIMARY KEY,                    -- UUID
    session_id TEXT NOT NULL,               -- FK to sessions
    query_id TEXT,                          -- FK to queries (optional)
    timestamp TEXT NOT NULL,                -- ISO 8601 timestamp

    -- Feedback data
    feedback_type TEXT NOT NULL,            -- thumbs_up/thumbs_down/explicit/implicit
    rating INTEGER,                         -- 1-5 star rating (if explicit)
    category TEXT,                          -- accuracy/relevance/speed/format/other

    -- Context (anonymized)
    context_hash TEXT,                      -- Hash of surrounding context

    FOREIGN KEY (session_id) REFERENCES sessions(id),
    FOREIGN KEY (query_id) REFERENCES queries(id)
);

CREATE INDEX IF NOT EXISTS idx_feedback_session ON feedback(session_id);
CREATE INDEX IF NOT EXISTS idx_feedback_query ON feedback(query_id);
CREATE INDEX IF NOT EXISTS idx_feedback_type ON feedback(feedback_type);

-- Tool usage table: Track tool invocations
CREATE TABLE IF NOT EXISTS tool_usage (
    id TEXT PRIMARY KEY,                    -- UUID
    session_id TEXT NOT NULL,               -- FK to sessions
    query_id TEXT,                          -- FK to queries
    timestamp TEXT NOT NULL,                -- ISO 8601 timestamp

    -- Tool metadata
    tool_name TEXT NOT NULL,                -- Tool identifier
    tool_category TEXT,                     -- search/file/shell/mcp/reasoning

    -- Execution metrics
    execution_ms INTEGER NOT NULL,          -- Execution time
    success INTEGER NOT NULL,               -- Boolean: did it succeed?
    error_type TEXT,                        -- Error classification (if failed)

    -- Input/output stats (NO content - privacy)
    input_size_bytes INTEGER,               -- Size of input
    output_size_bytes INTEGER,              -- Size of output

    FOREIGN KEY (session_id) REFERENCES sessions(id),
    FOREIGN KEY (query_id) REFERENCES queries(id)
);

CREATE INDEX IF NOT EXISTS idx_tool_usage_session ON tool_usage(session_id);
CREATE INDEX IF NOT EXISTS idx_tool_usage_tool ON tool_usage(tool_name);
CREATE INDEX IF NOT EXISTS idx_tool_usage_category ON tool_usage(tool_category);

-- Reasoning traces table: ThinkTool execution traces
CREATE TABLE IF NOT EXISTS reasoning_traces (
    id TEXT PRIMARY KEY,                    -- UUID
    session_id TEXT NOT NULL,               -- FK to sessions
    query_id TEXT,                          -- FK to queries
    timestamp TEXT NOT NULL,                -- ISO 8601 timestamp

    -- Trace metadata
    thinktool_name TEXT NOT NULL,           -- GigaThink/LaserLogic/etc
    step_count INTEGER NOT NULL,            -- Number of reasoning steps

    -- Execution metrics
    total_ms INTEGER NOT NULL,              -- Total execution time
    avg_step_ms REAL,                       -- Average time per step

    -- Quality metrics (computed, not user-provided)
    coherence_score REAL,                   -- Self-consistency check (0.0-1.0)
    depth_score REAL,                       -- Reasoning depth metric

    -- Anonymized structure
    step_types TEXT,                        -- JSON array of step type names

    FOREIGN KEY (session_id) REFERENCES sessions(id),
    FOREIGN KEY (query_id) REFERENCES queries(id)
);

CREATE INDEX IF NOT EXISTS idx_traces_session ON reasoning_traces(session_id);
CREATE INDEX IF NOT EXISTS idx_traces_thinktool ON reasoning_traces(thinktool_name);

-- ============================================================================
-- AGGREGATION TABLES (for ML training data)
-- ============================================================================

-- Daily aggregates: Pre-computed daily statistics
CREATE TABLE IF NOT EXISTS daily_aggregates (
    date TEXT PRIMARY KEY,                  -- YYYY-MM-DD
    computed_at TEXT NOT NULL,              -- When this was computed

    -- Volume metrics
    session_count INTEGER DEFAULT 0,
    query_count INTEGER DEFAULT 0,
    feedback_count INTEGER DEFAULT 0,
    tool_invocations INTEGER DEFAULT 0,

    -- Performance metrics
    avg_latency_ms REAL,
    p50_latency_ms REAL,
    p95_latency_ms REAL,
    p99_latency_ms REAL,

    -- Quality metrics
    avg_success_rate REAL,
    positive_feedback_ratio REAL,
    error_rate REAL,

    -- Tool distribution (JSON)
    tool_distribution TEXT,

    -- Query type distribution (JSON)
    query_type_distribution TEXT
);

-- Query clusters: K-means clustering results for pattern detection
CREATE TABLE IF NOT EXISTS query_clusters (
    id INTEGER PRIMARY KEY,
    computed_at TEXT NOT NULL,

    -- Cluster metadata
    cluster_count INTEGER NOT NULL,         -- K value used
    silhouette_score REAL,                  -- Clustering quality

    -- Cluster data (JSON)
    centroids TEXT,                         -- JSON array of centroid embeddings
    cluster_sizes TEXT,                     -- JSON array of cluster sizes
    representative_hashes TEXT              -- JSON array of representative query hashes
);

-- ============================================================================
-- PRIVACY CONTROLS
-- ============================================================================

-- Privacy consent tracking
CREATE TABLE IF NOT EXISTS privacy_consent (
    id TEXT PRIMARY KEY,
    timestamp TEXT NOT NULL,

    -- Consent levels
    local_telemetry INTEGER NOT NULL,       -- Boolean: allow local storage
    aggregated_sharing INTEGER NOT NULL,    -- Boolean: allow aggregated sharing
    community_contribution INTEGER NOT NULL, -- Boolean: contribute to community model

    -- Metadata
    consent_version INTEGER NOT NULL,       -- Consent form version
    ip_hash TEXT                            -- Hashed IP for legal compliance
);

-- Redaction log: Track what was redacted (for audit)
CREATE TABLE IF NOT EXISTS redaction_log (
    id TEXT PRIMARY KEY,
    timestamp TEXT NOT NULL,

    -- Redaction metadata
    source_table TEXT NOT NULL,
    source_id TEXT NOT NULL,
    redaction_type TEXT NOT NULL,           -- pii/sensitive/custom
    pattern_matched TEXT                    -- Regex pattern that triggered redaction
);

CREATE INDEX IF NOT EXISTS idx_redaction_source ON redaction_log(source_table, source_id);

-- ============================================================================
-- VIEWS for common queries
-- ============================================================================

-- Recent session summary
CREATE VIEW IF NOT EXISTS v_recent_sessions AS
SELECT
    s.id,
    s.started_at,
    s.duration_ms,
    s.query_count,
    s.tool_count,
    s.success_rate,
    COUNT(DISTINCT f.id) as feedback_items,
    AVG(CASE WHEN f.feedback_type = 'thumbs_up' THEN 1.0
             WHEN f.feedback_type = 'thumbs_down' THEN 0.0
             ELSE NULL END) as feedback_score
FROM sessions s
LEFT JOIN feedback f ON s.id = f.session_id
WHERE s.started_at > datetime('now', '-30 days')
GROUP BY s.id
ORDER BY s.started_at DESC;

-- Tool performance summary
CREATE VIEW IF NOT EXISTS v_tool_performance AS
SELECT
    tool_name,
    tool_category,
    COUNT(*) as invocation_count,
    AVG(execution_ms) as avg_execution_ms,
    SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) * 1.0 / COUNT(*) as success_rate,
    SUM(input_size_bytes) as total_input_bytes,
    SUM(output_size_bytes) as total_output_bytes
FROM tool_usage
WHERE timestamp > datetime('now', '-30 days')
GROUP BY tool_name, tool_category
ORDER BY invocation_count DESC;

-- ThinkTool effectiveness
CREATE VIEW IF NOT EXISTS v_thinktool_stats AS
SELECT
    thinktool_name,
    COUNT(*) as usage_count,
    AVG(step_count) as avg_steps,
    AVG(total_ms) as avg_execution_ms,
    AVG(coherence_score) as avg_coherence,
    AVG(depth_score) as avg_depth
FROM reasoning_traces
WHERE timestamp > datetime('now', '-30 days')
GROUP BY thinktool_name
ORDER BY usage_count DESC;
"#;
Expand description

SQLite schema for telemetry database