1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
//! Ingestion operations for Lambda context
use crate::ingestion::config::SavedConfig;
use crate::ingestion::core::IngestionRequest;
use crate::ingestion::progress::ProgressService;
use crate::ingestion::simple_service::SimpleIngestionService;
use crate::ingestion::{IngestionConfig, IngestionError, IngestionProgress, IngestionResponse};
use serde_json::Value;
use super::context::LambdaContext;
impl LambdaContext {
/// Validate JSON data for ingestion without processing
///
/// Checks if the JSON data is valid for ingestion.
///
/// # Arguments
///
/// * `json_data` - The JSON data to validate
///
/// # Example
///
/// ```ignore
/// use datafold::lambda::LambdaContext;
/// use serde_json::json;
///
/// async fn handler() -> Result<(), Box<dyn std::error::Error>> {
/// let data = json!({"key": "value"});
/// LambdaContext::validate_json(data).await?;
/// println!("JSON is valid");
/// Ok(())
/// }
/// ```
pub async fn validate_json(json_data: Value) -> Result<(), IngestionError> {
let config = IngestionConfig::from_env()?;
let service = SimpleIngestionService::new(config)?;
service.validate_input(&json_data)
}
/// Get ingestion service status
///
/// Returns whether the ingestion service is configured and enabled.
///
/// # Example
///
/// ```ignore
/// use datafold::lambda::LambdaContext;
///
/// async fn handler() -> Result<(), Box<dyn std::error::Error>> {
/// let status = LambdaContext::get_ingestion_status().await?;
/// println!("Ingestion enabled: {:?}", status);
/// Ok(())
/// }
/// ```
pub async fn get_ingestion_status() -> Result<Value, IngestionError> {
let config = IngestionConfig::from_env_allow_empty();
let is_configured = config.is_ready();
Ok(serde_json::json!({
"enabled": config.enabled,
"configured": is_configured,
"provider": format!("{:?}", config.provider),
}))
}
/// Get ingestion progress by ID.
///
/// # Arguments
///
/// * `progress_id` - The progress ID from an ingestion operation
///
/// # Returns
///
/// Returns `Some(IngestionProgress)` if found, or `None` if the ID is not found.
///
/// # Example
///
/// ```ignore
/// use datafold::lambda::LambdaContext;
///
/// async fn check_progress(progress_id: &str) -> Result<(), Box<dyn std::error::Error>> {
/// if let Some(progress) = LambdaContext::get_progress(progress_id)? {
/// println!("Current step: {:?}", progress.current_step);
/// println!("Completed: {}", progress.completed);
/// }
/// Ok(())
/// }
/// ```
pub async fn get_progress(
progress_id: &str,
) -> Result<Option<IngestionProgress>, IngestionError> {
let ctx = Self::get()?;
let tracker = ctx.progress_tracker.clone();
match tracker.load(progress_id).await {
Ok(Some(job)) => Ok(Some(job.into())),
Ok(None) => Ok(None),
Err(e) => {
log::error!("Failed to load progress {}: {}", progress_id, e);
Ok(None)
}
}
}
/// Get all active ingestion progress
///
/// Returns all current ingestion operations and their progress.
///
/// # Example
///
/// ```ignore
/// use datafold::lambda::LambdaContext;
///
/// async fn handler() -> Result<(), Box<dyn std::error::Error>> {
/// let all_progress = LambdaContext::get_all_progress()?;
/// println!("Active ingestions: {}", all_progress.len());
/// Ok(())
/// }
/// ```
pub async fn get_all_progress() -> Result<Vec<IngestionProgress>, IngestionError> {
let ctx = Self::get()?;
let tracker = ctx.progress_tracker.clone();
let user_id = crate::logging::core::get_current_user_id().unwrap_or_else(|| "default".to_string());
match tracker.list_by_user(&user_id).await {
Ok(jobs) => Ok(jobs.into_iter().map(|j| j.into()).collect()),
Err(e) => {
log::error!("Failed to list progress for user {}: {}", user_id, e);
Ok(Vec::new())
}
}
}
/// Ingest JSON data asynchronously (returns immediately with progress_id)
///
/// This function processes JSON data in the background and returns a progress_id
/// that can be used to track the ingestion status.
///
/// # Arguments
///
/// * `json_data` - The JSON data to ingest (array of objects or single object)
/// * `auto_execute` - Whether to execute mutations after generation
/// * `trust_distance` - Trust distance for mutations (default: 0)
/// * `pub_key` - Public key for mutations (default: "default")
///
/// # Example
///
/// ```ignore
/// use datafold::lambda::LambdaContext;
/// use serde_json::json;
///
/// async fn handler() -> Result<(), Box<dyn std::error::Error>> {
/// let data = json!([
/// {"id": 1, "name": "Alice"},
/// {"id": 2, "name": "Bob"}
/// ]);
///
/// let progress_id = LambdaContext::ingest_json(data, true, 0, "user_123".to_string()).await?;
///
/// println!("Started ingestion: {}", progress_id);
/// Ok(())
/// }
/// ```
pub async fn ingest_json(
json_data: Value,
auto_execute: bool,
trust_distance: u32,
pub_key: String,
user_id: String,
) -> Result<String, IngestionError> {
let ctx = Self::get()?;
let node = Self::get_node(&user_id).await?; // Use user-specific node
let progress_tracker = ctx.progress_tracker.clone();
// Generate unique progress ID
let progress_id = uuid::Uuid::new_v4().to_string();
// Start progress tracking
let progress_service = ProgressService::new(progress_tracker);
progress_service.start_progress(progress_id.clone()).await;
// Load ingestion config
let config = IngestionConfig::from_env()?;
// Clone for background task
let progress_id_clone = progress_id.clone();
let json_data_clone = json_data.clone();
let pub_key_clone = pub_key.clone();
let user_id_clone = user_id.clone();
// Spawn background ingestion task
tokio::spawn(async move {
use crate::lambda::logging::run_with_user;
run_with_user(&user_id_clone, async move {
// Create ingestion service
let service = match SimpleIngestionService::new(config) {
Ok(service) => service,
Err(e) => {
let error_msg = format!("Failed to create ingestion service: {}", e);
log::error!("{}", error_msg);
progress_service
.fail_progress(&progress_id_clone, error_msg)
.await;
return;
}
};
// Create ingestion request
let request = IngestionRequest {
data: json_data_clone,
auto_execute: Some(auto_execute),
trust_distance: Some(trust_distance),
pub_key: Some(pub_key_clone),
source_file_name: None,
};
// Process ingestion
{
let node_guard = node.lock().await;
match service
.process_json_with_node_and_progress(
request,
&*node_guard,
&progress_service,
progress_id_clone.clone(),
)
.await
{
Ok(_) => {
log::info!(
"Ingestion completed successfully for id: {}",
progress_id_clone
);
}
Err(e) => {
let error_msg = format!("Ingestion failed: {}", e);
log::error!("{}", error_msg);
progress_service
.fail_progress(&progress_id_clone, error_msg)
.await;
}
}
}
})
.await;
});
Ok(progress_id)
}
/// Ingest JSON data synchronously (waits for completion)
///
/// This function processes JSON data and waits for completion before returning.
/// Use this when you need the full ingestion results immediately.
///
/// # Arguments
///
/// * `json_data` - The JSON data to ingest (array of objects or single object)
/// * `auto_execute` - Whether to execute mutations after generation
/// * `trust_distance` - Trust distance for mutations (default: 0)
/// * `pub_key` - Public key for mutations
///
/// # Example
///
/// ```ignore
/// use datafold::lambda::LambdaContext;
/// use serde_json::json;
///
/// async fn handler() -> Result<(), Box<dyn std::error::Error>> {
/// let data = json!([
/// {"id": 1, "name": "Alice"},
/// {"id": 2, "name": "Bob"}
/// ]);
///
/// let response = LambdaContext::ingest_json_sync(data, true, 0, "user_123".to_string()).await?;
///
/// println!("Ingested {} mutations", response.mutations_executed);
/// Ok(())
/// }
/// ```
pub async fn ingest_json_sync(
json_data: Value,
auto_execute: bool,
trust_distance: u32,
pub_key: String,
user_id: String,
) -> Result<IngestionResponse, IngestionError> {
let ctx = Self::get()?;
let node = Self::get_node(&user_id).await?; // Use user-specific node
let progress_tracker = ctx.progress_tracker.clone();
// Generate unique progress ID
let progress_id = uuid::Uuid::new_v4().to_string();
// Start progress tracking
let progress_service = ProgressService::new(progress_tracker);
progress_service.start_progress(progress_id.clone()).await;
// Load ingestion config
let config = IngestionConfig::from_env()?;
// Create ingestion service
let service = SimpleIngestionService::new(config)?;
// Create ingestion request
let request = IngestionRequest {
data: json_data,
auto_execute: Some(auto_execute),
trust_distance: Some(trust_distance),
pub_key: Some(pub_key),
source_file_name: None,
};
// Process synchronously
use crate::lambda::logging::run_with_user;
run_with_user(&user_id, async {
let node_guard = node.lock().await;
service
.process_json_with_node_and_progress(
request,
&*node_guard,
&progress_service,
progress_id,
)
.await
})
.await
}
/// Health check for ingestion service
pub async fn health_check() -> Result<Value, IngestionError> {
let config = IngestionConfig::from_env_allow_empty();
let is_ready = config.is_ready();
if is_ready {
Ok(serde_json::json!({
"status": "healthy",
"service": "ingestion",
"details": {
"enabled": config.enabled,
"configured": true,
"provider": format!("{:?}", config.provider)
}
}))
} else {
Ok(serde_json::json!({
"status": "unhealthy",
"service": "ingestion",
"details": {
"enabled": config.enabled,
"configured": false,
"provider": format!("{:?}", config.provider)
}
}))
}
}
/// Get ingestion configuration
pub async fn get_ingestion_config() -> Result<IngestionConfig, IngestionError> {
let mut config = IngestionConfig::from_env_allow_empty();
if !config.openrouter.api_key.is_empty() {
config.openrouter.api_key = "***configured***".to_string();
}
Ok(config)
}
/// Save ingestion configuration
pub async fn save_ingestion_config(config: SavedConfig) -> Result<(), IngestionError> {
IngestionConfig::save_to_file(&config)
.map_err(|e| IngestionError::InvalidInput(format!("Failed to save config: {}", e)))
}
}