Skip to main content

aicx_parser/
timeline.rs

1//! Shared timeline and segmentation data types.
2//!
3//! Vibecrafted with AI Agents by VetCoders (c)2026 VetCoders
4
5use chrono::{DateTime, Utc};
6#[cfg(feature = "json-schema")]
7use schemars::JsonSchema;
8use serde::{Deserialize, Serialize};
9use std::fmt;
10use std::path::PathBuf;
11
12/// Canonical kind for a session segment in the store.
13///
14/// Kind determines the subdirectory under `<project>/<date>/` and is part
15/// of the canonical store path. Classification is conservative: when in
16/// doubt, segments fall through to `Other`.
17#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Hash, Serialize, Deserialize)]
18#[serde(rename_all = "lowercase")]
19pub enum Kind {
20    Conversations,
21    Plans,
22    Reports,
23    #[default]
24    Other,
25}
26
27impl Kind {
28    /// Directory name used in the canonical store layout.
29    pub fn dir_name(self) -> &'static str {
30        match self {
31            Self::Conversations => "conversations",
32            Self::Plans => "plans",
33            Self::Reports => "reports",
34            Self::Other => "other",
35        }
36    }
37
38    /// Parse from a string (case-insensitive, accepts both singular and plural).
39    pub fn parse(s: &str) -> Option<Self> {
40        match s.to_ascii_lowercase().as_str() {
41            "conversations" | "conversation" => Some(Self::Conversations),
42            "plans" | "plan" => Some(Self::Plans),
43            "reports" | "report" => Some(Self::Reports),
44            "other" => Some(Self::Other),
45            _ => None,
46        }
47    }
48}
49
50impl fmt::Display for Kind {
51    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
52        f.write_str(self.dir_name())
53    }
54}
55
56/// Canonical stream/frame classification for a timeline entry or stored chunk.
57///
58/// This axis is intentionally orthogonal to `role`: source formats drift in how
59/// they spell assistant reasoning or tool payloads, but downstream retrieval
60/// needs one stable vocabulary for "which channel is this?".
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
62#[cfg_attr(feature = "json-schema", derive(JsonSchema))]
63#[serde(rename_all = "snake_case")]
64pub enum FrameKind {
65    UserMsg,
66    AgentReply,
67    InternalThought,
68    ToolCall,
69}
70
71impl FrameKind {
72    pub fn as_str(self) -> &'static str {
73        match self {
74            Self::UserMsg => "user_msg",
75            Self::AgentReply => "agent_reply",
76            Self::InternalThought => "internal_thought",
77            Self::ToolCall => "tool_call",
78        }
79    }
80
81    pub fn parse(value: &str) -> Option<Self> {
82        match value.trim().to_ascii_lowercase().as_str() {
83            "user_msg" | "user" => Some(Self::UserMsg),
84            "agent_reply" | "assistant" | "reply" => Some(Self::AgentReply),
85            "internal_thought" | "thought" | "thinking" | "reasoning" => {
86                Some(Self::InternalThought)
87            }
88            "tool_call" | "tool" | "tool_result" | "function_call" => Some(Self::ToolCall),
89            _ => None,
90        }
91    }
92}
93
94impl fmt::Display for FrameKind {
95    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
96        f.write_str(self.as_str())
97    }
98}
99
100/// Unified timeline entry from any AI agent source.
101#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct TimelineEntry {
103    pub timestamp: DateTime<Utc>,
104    pub agent: String,
105    pub session_id: String,
106    pub role: String,
107    pub message: String,
108    #[serde(default, skip_serializing_if = "Option::is_none")]
109    pub frame_kind: Option<FrameKind>,
110    #[serde(skip_serializing_if = "Option::is_none")]
111    pub branch: Option<String>,
112    #[serde(skip_serializing_if = "Option::is_none")]
113    pub cwd: Option<String>,
114}
115
116/// Denoised conversation message — the canonical projection of a TimelineEntry
117/// containing only user/assistant messages with repo-centric identity.
118///
119/// This is the primary unit for "recover the conversation" workflows.
120/// Tool calls, tool results, reasoning/thoughts, system noise, and artifact
121/// payloads are excluded. Artifact paths may appear as references only.
122#[derive(Debug, Clone, Serialize, Deserialize)]
123pub struct ConversationMessage {
124    pub timestamp: DateTime<Utc>,
125    pub agent: String,
126    pub session_id: String,
127    /// Only "user" or "assistant" — reasoning and system roles are excluded.
128    pub role: String,
129    /// Raw, untrimmed, untruncated message body.
130    pub message: String,
131    /// Canonical project/repo identity (derived from cwd + project filter).
132    pub repo_project: String,
133    /// Secondary provenance: source working directory path.
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub source_path: Option<String>,
136    /// Git branch at time of message (when available).
137    #[serde(skip_serializing_if = "Option::is_none")]
138    pub branch: Option<String>,
139}
140
141/// Configuration for extraction.
142#[derive(Debug, Clone)]
143pub struct ExtractionConfig {
144    pub project_filter: Vec<String>,
145    pub cutoff: DateTime<Utc>,
146    pub include_assistant: bool,
147    pub watermark: Option<DateTime<Utc>>,
148}
149
150/// Info about an available source directory/file.
151#[derive(Debug, Clone, Serialize)]
152pub struct SourceInfo {
153    pub agent: String,
154    pub path: PathBuf,
155    pub sessions: usize,
156    pub size_bytes: u64,
157    pub protected_by_git: bool,
158    pub protection_backend: String,
159    pub protection_root: Option<PathBuf>,
160    pub git_remote_count: usize,
161    pub git_remotes: Vec<String>,
162    pub protection_warning: Option<String>,
163}
164
165/// Explicit trust tier for a repo identity signal.
166///
167/// Not all evidence for "which repo is this?" is equal. A git remote URL
168/// is canonical truth; a directory layout is a strong hint; a hex hash is
169/// opaque noise. This enum makes the distinction machine-readable so the
170/// store can decide whether to assert identity or route to fallback.
171#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
172pub enum SourceTier {
173    /// Git remote URL or explicit GitHub/GitLab link in message text.
174    /// The strongest signal — the repo literally named itself.
175    Primary,
176    /// Local git repo discovered on disk (via `.git/` traversal + known layout),
177    /// or a projectHash resolved through a trustworthy local mapping file.
178    Secondary,
179    /// Known directory layout (e.g. `~/hosted/<org>/<repo>`) without a `.git/`
180    /// directory or remote confirmation. Plausible but not proven.
181    Fallback,
182    /// Hex hash, opaque identifier, or source that is explicitly not a
183    /// conversation (e.g. `.pb` protobuf, step-output). Must never assert
184    /// repo identity on its own.
185    Opaque,
186}
187
188impl SourceTier {
189    /// Whether this tier is strong enough to assert repo identity for
190    /// canonical store placement (under `store/<org>/<repo>/`).
191    pub fn is_assertable(self) -> bool {
192        matches!(self, Self::Primary | Self::Secondary)
193    }
194}
195
196#[derive(Debug, Clone, PartialEq, Eq, Hash)]
197pub struct RepoIdentity {
198    pub organization: String,
199    pub repository: String,
200}
201
202impl RepoIdentity {
203    pub fn slug(&self) -> String {
204        format!("{}/{}", self.organization, self.repository)
205    }
206}
207
208#[derive(Debug, Clone)]
209pub struct SemanticSegment {
210    pub repo: Option<RepoIdentity>,
211    /// The trust tier of the strongest signal that produced `repo`.
212    /// `None` when `repo` is `None`.
213    pub source_tier: Option<SourceTier>,
214    pub kind: Kind,
215    pub agent: String,
216    pub session_id: String,
217    pub entries: Vec<TimelineEntry>,
218}
219
220impl SemanticSegment {
221    pub fn project_label(&self) -> String {
222        self.repo
223            .as_ref()
224            .map(RepoIdentity::slug)
225            .unwrap_or_else(|| "non-repository-contexts".to_string())
226    }
227
228    /// Whether the repo identity is strong enough for canonical store placement.
229    /// Returns `false` for `None` repo or Fallback/Opaque tiers.
230    pub fn has_assertable_identity(&self) -> bool {
231        self.source_tier.is_some_and(SourceTier::is_assertable)
232    }
233}