use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use ares_core::job::ScrapeJob;
use ares_core::models::Extraction;
#[derive(Debug, Deserialize, utoipa::ToSchema)]
pub struct CreateJobRequest {
pub url: String,
pub schema_name: String,
pub schema: serde_json::Value,
pub model: String,
pub base_url: String,
pub max_retries: Option<u32>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct CreateJobResponse {
pub job_id: Uuid,
pub status: String,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct JobResponse {
pub id: Uuid,
pub url: String,
pub schema_name: String,
pub schema: serde_json::Value,
pub model: String,
pub base_url: String,
pub status: String,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
pub started_at: Option<DateTime<Utc>>,
pub completed_at: Option<DateTime<Utc>>,
pub retry_count: u32,
pub max_retries: u32,
pub next_retry_at: Option<DateTime<Utc>>,
pub error_message: Option<String>,
pub extraction_id: Option<Uuid>,
pub worker_id: Option<String>,
pub crawl_session_id: Option<Uuid>,
pub parent_job_id: Option<Uuid>,
pub depth: u32,
pub max_depth: u32,
}
impl From<ScrapeJob> for JobResponse {
fn from(job: ScrapeJob) -> Self {
Self {
id: job.id,
url: job.url,
schema_name: job.schema_name,
schema: job.schema,
model: job.model,
base_url: job.base_url,
status: job.status.to_string(),
created_at: job.created_at,
updated_at: job.updated_at,
started_at: job.started_at,
completed_at: job.completed_at,
retry_count: job.retry_count,
max_retries: job.max_retries,
next_retry_at: job.next_retry_at,
error_message: job.error_message,
extraction_id: job.extraction_id,
worker_id: job.worker_id,
crawl_session_id: job.crawl_session_id,
parent_job_id: job.parent_job_id,
depth: job.depth,
max_depth: job.max_depth,
}
}
}
#[derive(Debug, Deserialize, utoipa::IntoParams)]
pub struct ListJobsQuery {
pub status: Option<String>,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct JobListResponse {
pub jobs: Vec<JobResponse>,
pub total: usize,
pub limit: usize,
pub offset: usize,
}
#[derive(Debug, Deserialize, utoipa::IntoParams)]
pub struct ExtractionHistoryQuery {
pub url: String,
pub schema_name: String,
pub limit: Option<usize>,
pub offset: Option<usize>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct ExtractionResponse {
pub id: Uuid,
pub url: String,
pub schema_name: String,
pub extracted_data: serde_json::Value,
pub content_hash: String,
pub data_hash: String,
pub model: String,
pub created_at: DateTime<Utc>,
}
impl From<Extraction> for ExtractionResponse {
fn from(e: Extraction) -> Self {
Self {
id: e.id,
url: e.url,
schema_name: e.schema_name,
extracted_data: e.extracted_data,
content_hash: e.content_hash,
data_hash: e.data_hash,
model: e.model,
created_at: e.created_at,
}
}
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct ExtractionHistoryResponse {
pub extractions: Vec<ExtractionResponse>,
pub total: usize,
pub limit: usize,
pub offset: usize,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct CrawlResultsResponse {
pub extractions: Vec<ExtractionResponse>,
pub total: usize,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct HealthResponse {
pub status: &'static str,
pub database: &'static str,
}
#[derive(Debug, Deserialize, utoipa::ToSchema)]
pub struct ScrapeRequest {
pub url: String,
pub schema: serde_json::Value,
pub schema_name: String,
pub model: Option<String>,
pub base_url: Option<String>,
pub save: Option<bool>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct ScrapeResponse {
pub extracted_data: serde_json::Value,
pub content_hash: String,
pub data_hash: String,
pub changed: bool,
pub extraction_id: Option<Uuid>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct SchemaListResponse {
pub schemas: Vec<SchemaEntryResponse>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct SchemaEntryResponse {
pub name: String,
pub latest_version: String,
pub versions: Vec<String>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct SchemaDetailResponse {
pub name: String,
pub version: String,
pub schema: serde_json::Value,
}
#[derive(Debug, Deserialize, utoipa::ToSchema)]
pub struct UpdateSchemaRequest {
pub schema: serde_json::Value,
}
#[derive(Debug, Deserialize, utoipa::ToSchema)]
pub struct CreateSchemaRequest {
pub name: String,
pub version: String,
pub schema: serde_json::Value,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct CreateSchemaResponse {
pub name: String,
pub version: String,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct ErrorResponse {
pub error: String,
pub message: String,
}
#[derive(Debug, Deserialize, utoipa::ToSchema)]
pub struct CrawlRequest {
pub url: String,
pub schema_name: String,
pub schema: serde_json::Value,
pub model: String,
pub base_url: String,
pub max_depth: u32,
pub max_pages: Option<u32>,
pub allowed_domains: Option<Vec<String>>,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct CrawlResponse {
pub session_id: Uuid,
pub status: String,
}
#[derive(Debug, Serialize, utoipa::ToSchema)]
pub struct CrawlStatusResponse {
pub session_id: Uuid,
pub total_jobs: usize,
pub pending_jobs: usize,
pub running_jobs: usize,
pub completed_jobs: usize,
pub failed_jobs: usize,
}