1use crate::telemetry::TELEMETRY_SCHEMA_VERSION;
6
7pub const SCHEMA_SQL: &str = r#"
9-- ============================================================================
10-- RALL TELEMETRY SCHEMA v1
11-- ReasonKit Adaptive Learning Loop - Local Telemetry Storage
12-- ============================================================================
13
14-- Schema version tracking
15CREATE TABLE IF NOT EXISTS schema_version (
16 version INTEGER PRIMARY KEY,
17 applied_at TEXT NOT NULL DEFAULT (datetime('now')),
18 description TEXT
19);
20
21-- Insert current version if not exists
22INSERT OR IGNORE INTO schema_version (version, description)
23VALUES (1, 'Initial RALL telemetry schema');
24
25-- ============================================================================
26-- CORE TABLES
27-- ============================================================================
28
29-- Sessions table: One entry per CLI session
30CREATE TABLE IF NOT EXISTS sessions (
31 id TEXT PRIMARY KEY, -- UUID
32 started_at TEXT NOT NULL, -- ISO 8601 timestamp
33 ended_at TEXT, -- ISO 8601 timestamp (NULL if active)
34 duration_ms INTEGER, -- Session duration in milliseconds
35 tool_count INTEGER DEFAULT 0, -- Number of tools used
36 query_count INTEGER DEFAULT 0, -- Number of queries
37 feedback_count INTEGER DEFAULT 0, -- Number of feedback events
38 profile TEXT, -- Reasoning profile used (quick/balanced/deep/etc)
39 success_rate REAL, -- Overall success rate (0.0-1.0)
40 client_version TEXT, -- ReasonKit CLI version
41 os_family TEXT -- Operating system (sanitized)
42);
43
44-- Queries table: Individual query events (PII-stripped)
45CREATE TABLE IF NOT EXISTS queries (
46 id TEXT PRIMARY KEY, -- UUID
47 session_id TEXT NOT NULL, -- FK to sessions
48 timestamp TEXT NOT NULL, -- ISO 8601 timestamp
49
50 -- Query metadata (NO raw query text - privacy)
51 query_hash TEXT NOT NULL, -- SHA-256 hash of normalized query
52 query_length INTEGER NOT NULL, -- Character count
53 query_token_count INTEGER, -- Token count (if available)
54 query_type TEXT, -- Classification: search/reason/code/general
55
56 -- Execution metrics
57 latency_ms INTEGER NOT NULL, -- Total execution time
58 tool_calls INTEGER DEFAULT 0, -- Number of tool calls
59 retrieval_count INTEGER DEFAULT 0, -- Number of documents retrieved
60
61 -- Results (anonymized)
62 result_count INTEGER, -- Number of results returned
63 result_quality_score REAL, -- Self-assessed quality (0.0-1.0)
64 error_occurred INTEGER DEFAULT 0, -- Boolean: did an error occur?
65 error_category TEXT, -- Error classification (if error)
66
67 -- Context
68 profile_used TEXT, -- Reasoning profile
69 tools_used TEXT, -- JSON array of tool names used
70
71 FOREIGN KEY (session_id) REFERENCES sessions(id)
72);
73
74-- Create indexes for common queries
75CREATE INDEX IF NOT EXISTS idx_queries_session ON queries(session_id);
76CREATE INDEX IF NOT EXISTS idx_queries_timestamp ON queries(timestamp);
77CREATE INDEX IF NOT EXISTS idx_queries_type ON queries(query_type);
78CREATE INDEX IF NOT EXISTS idx_queries_hash ON queries(query_hash);
79
80-- Feedback table: User feedback events
81CREATE TABLE IF NOT EXISTS feedback (
82 id TEXT PRIMARY KEY, -- UUID
83 session_id TEXT NOT NULL, -- FK to sessions
84 query_id TEXT, -- FK to queries (optional)
85 timestamp TEXT NOT NULL, -- ISO 8601 timestamp
86
87 -- Feedback data
88 feedback_type TEXT NOT NULL, -- thumbs_up/thumbs_down/explicit/implicit
89 rating INTEGER, -- 1-5 star rating (if explicit)
90 category TEXT, -- accuracy/relevance/speed/format/other
91
92 -- Context (anonymized)
93 context_hash TEXT, -- Hash of surrounding context
94
95 FOREIGN KEY (session_id) REFERENCES sessions(id),
96 FOREIGN KEY (query_id) REFERENCES queries(id)
97);
98
99CREATE INDEX IF NOT EXISTS idx_feedback_session ON feedback(session_id);
100CREATE INDEX IF NOT EXISTS idx_feedback_query ON feedback(query_id);
101CREATE INDEX IF NOT EXISTS idx_feedback_type ON feedback(feedback_type);
102
103-- Tool usage table: Track tool invocations
104CREATE TABLE IF NOT EXISTS tool_usage (
105 id TEXT PRIMARY KEY, -- UUID
106 session_id TEXT NOT NULL, -- FK to sessions
107 query_id TEXT, -- FK to queries
108 timestamp TEXT NOT NULL, -- ISO 8601 timestamp
109
110 -- Tool metadata
111 tool_name TEXT NOT NULL, -- Tool identifier
112 tool_category TEXT, -- search/file/shell/mcp/reasoning
113
114 -- Execution metrics
115 execution_ms INTEGER NOT NULL, -- Execution time
116 success INTEGER NOT NULL, -- Boolean: did it succeed?
117 error_type TEXT, -- Error classification (if failed)
118
119 -- Input/output stats (NO content - privacy)
120 input_size_bytes INTEGER, -- Size of input
121 output_size_bytes INTEGER, -- Size of output
122
123 FOREIGN KEY (session_id) REFERENCES sessions(id),
124 FOREIGN KEY (query_id) REFERENCES queries(id)
125);
126
127CREATE INDEX IF NOT EXISTS idx_tool_usage_session ON tool_usage(session_id);
128CREATE INDEX IF NOT EXISTS idx_tool_usage_tool ON tool_usage(tool_name);
129CREATE INDEX IF NOT EXISTS idx_tool_usage_category ON tool_usage(tool_category);
130
131-- Reasoning traces table: ThinkTool execution traces
132CREATE TABLE IF NOT EXISTS reasoning_traces (
133 id TEXT PRIMARY KEY, -- UUID
134 session_id TEXT NOT NULL, -- FK to sessions
135 query_id TEXT, -- FK to queries
136 timestamp TEXT NOT NULL, -- ISO 8601 timestamp
137
138 -- Trace metadata
139 thinktool_name TEXT NOT NULL, -- GigaThink/LaserLogic/etc
140 step_count INTEGER NOT NULL, -- Number of reasoning steps
141
142 -- Execution metrics
143 total_ms INTEGER NOT NULL, -- Total execution time
144 avg_step_ms REAL, -- Average time per step
145
146 -- Quality metrics (computed, not user-provided)
147 coherence_score REAL, -- Self-consistency check (0.0-1.0)
148 depth_score REAL, -- Reasoning depth metric
149
150 -- Anonymized structure
151 step_types TEXT, -- JSON array of step type names
152
153 FOREIGN KEY (session_id) REFERENCES sessions(id),
154 FOREIGN KEY (query_id) REFERENCES queries(id)
155);
156
157CREATE INDEX IF NOT EXISTS idx_traces_session ON reasoning_traces(session_id);
158CREATE INDEX IF NOT EXISTS idx_traces_thinktool ON reasoning_traces(thinktool_name);
159
160-- ============================================================================
161-- AGGREGATION TABLES (for ML training data)
162-- ============================================================================
163
164-- Daily aggregates: Pre-computed daily statistics
165CREATE TABLE IF NOT EXISTS daily_aggregates (
166 date TEXT PRIMARY KEY, -- YYYY-MM-DD
167 computed_at TEXT NOT NULL, -- When this was computed
168
169 -- Volume metrics
170 session_count INTEGER DEFAULT 0,
171 query_count INTEGER DEFAULT 0,
172 feedback_count INTEGER DEFAULT 0,
173 tool_invocations INTEGER DEFAULT 0,
174
175 -- Performance metrics
176 avg_latency_ms REAL,
177 p50_latency_ms REAL,
178 p95_latency_ms REAL,
179 p99_latency_ms REAL,
180
181 -- Quality metrics
182 avg_success_rate REAL,
183 positive_feedback_ratio REAL,
184 error_rate REAL,
185
186 -- Tool distribution (JSON)
187 tool_distribution TEXT,
188
189 -- Query type distribution (JSON)
190 query_type_distribution TEXT
191);
192
193-- Query clusters: K-means clustering results for pattern detection
194CREATE TABLE IF NOT EXISTS query_clusters (
195 id INTEGER PRIMARY KEY,
196 computed_at TEXT NOT NULL,
197
198 -- Cluster metadata
199 cluster_count INTEGER NOT NULL, -- K value used
200 silhouette_score REAL, -- Clustering quality
201
202 -- Cluster data (JSON)
203 centroids TEXT, -- JSON array of centroid embeddings
204 cluster_sizes TEXT, -- JSON array of cluster sizes
205 representative_hashes TEXT -- JSON array of representative query hashes
206);
207
208-- ============================================================================
209-- PRIVACY CONTROLS
210-- ============================================================================
211
212-- Privacy consent tracking
213CREATE TABLE IF NOT EXISTS privacy_consent (
214 id TEXT PRIMARY KEY,
215 timestamp TEXT NOT NULL,
216
217 -- Consent levels
218 local_telemetry INTEGER NOT NULL, -- Boolean: allow local storage
219 aggregated_sharing INTEGER NOT NULL, -- Boolean: allow aggregated sharing
220 community_contribution INTEGER NOT NULL, -- Boolean: contribute to community model
221
222 -- Metadata
223 consent_version INTEGER NOT NULL, -- Consent form version
224 ip_hash TEXT -- Hashed IP for legal compliance
225);
226
227-- Redaction log: Track what was redacted (for audit)
228CREATE TABLE IF NOT EXISTS redaction_log (
229 id TEXT PRIMARY KEY,
230 timestamp TEXT NOT NULL,
231
232 -- Redaction metadata
233 source_table TEXT NOT NULL,
234 source_id TEXT NOT NULL,
235 redaction_type TEXT NOT NULL, -- pii/sensitive/custom
236 pattern_matched TEXT -- Regex pattern that triggered redaction
237);
238
239CREATE INDEX IF NOT EXISTS idx_redaction_source ON redaction_log(source_table, source_id);
240
241-- ============================================================================
242-- VIEWS for common queries
243-- ============================================================================
244
245-- Recent session summary
246CREATE VIEW IF NOT EXISTS v_recent_sessions AS
247SELECT
248 s.id,
249 s.started_at,
250 s.duration_ms,
251 s.query_count,
252 s.tool_count,
253 s.success_rate,
254 COUNT(DISTINCT f.id) as feedback_items,
255 AVG(CASE WHEN f.feedback_type = 'thumbs_up' THEN 1.0
256 WHEN f.feedback_type = 'thumbs_down' THEN 0.0
257 ELSE NULL END) as feedback_score
258FROM sessions s
259LEFT JOIN feedback f ON s.id = f.session_id
260WHERE s.started_at > datetime('now', '-30 days')
261GROUP BY s.id
262ORDER BY s.started_at DESC;
263
264-- Tool performance summary
265CREATE VIEW IF NOT EXISTS v_tool_performance AS
266SELECT
267 tool_name,
268 tool_category,
269 COUNT(*) as invocation_count,
270 AVG(execution_ms) as avg_execution_ms,
271 SUM(CASE WHEN success = 1 THEN 1 ELSE 0 END) * 1.0 / COUNT(*) as success_rate,
272 SUM(input_size_bytes) as total_input_bytes,
273 SUM(output_size_bytes) as total_output_bytes
274FROM tool_usage
275WHERE timestamp > datetime('now', '-30 days')
276GROUP BY tool_name, tool_category
277ORDER BY invocation_count DESC;
278
279-- ThinkTool effectiveness
280CREATE VIEW IF NOT EXISTS v_thinktool_stats AS
281SELECT
282 thinktool_name,
283 COUNT(*) as usage_count,
284 AVG(step_count) as avg_steps,
285 AVG(total_ms) as avg_execution_ms,
286 AVG(coherence_score) as avg_coherence,
287 AVG(depth_score) as avg_depth
288FROM reasoning_traces
289WHERE timestamp > datetime('now', '-30 days')
290GROUP BY thinktool_name
291ORDER BY usage_count DESC;
292"#;
293
294pub fn current_version() -> u32 {
296 TELEMETRY_SCHEMA_VERSION
297}
298
299pub fn get_migration_sql(_from_version: u32, _to_version: u32) -> Option<&'static str> {
301 None
302}
303
304#[cfg(test)]
305mod tests {
306 use super::*;
307
308 #[test]
309 fn test_schema_version() {
310 assert_eq!(current_version(), 1);
311 }
312
313 #[test]
314 fn test_schema_sql_not_empty() {
315 assert!(!SCHEMA_SQL.is_empty());
316 assert!(SCHEMA_SQL.contains("CREATE TABLE"));
317 }
318}