Skip to main content

bkb_core/
model.rs

1use std::fmt;
2
3use chrono::{DateTime, NaiveDate, Utc};
4use serde::{Deserialize, Serialize};
5
6/// Parse a date/datetime string flexibly, accepting both full RFC 3339
7/// timestamps (e.g., `2023-01-01T00:00:00Z`) and plain ISO 8601 dates
8/// (e.g., `2023-01-01`).
9pub fn parse_datetime(s: &str) -> Option<DateTime<Utc>> {
10	// Try full RFC 3339 first.
11	if let Ok(dt) = DateTime::parse_from_rfc3339(s) {
12		return Some(dt.with_timezone(&Utc));
13	}
14
15	// Fall back to plain YYYY-MM-DD date, interpreting as midnight UTC.
16	if let Ok(date) = NaiveDate::parse_from_str(s, "%Y-%m-%d") {
17		return date.and_hms_opt(0, 0, 0).map(|naive| naive.and_utc());
18	}
19
20	None
21}
22
23/// The type of source a document originated from.
24#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
25#[serde(rename_all = "snake_case")]
26pub enum SourceType {
27	GithubIssue,
28	GithubPr,
29	GithubComment,
30	GithubReview,
31	GithubReviewComment,
32	GithubDiscussion,
33	GithubDiscussionComment,
34	Commit,
35	MailingListMsg,
36	IrcLog,
37	DelvingTopic,
38	DelvingPost,
39	Bip,
40	Bolt,
41	Blip,
42	Lud,
43	Nut,
44	OptechNewsletter,
45	OptechTopic,
46	OptechBlog,
47	BitcointalkTopic,
48	BitcointalkPost,
49}
50
51impl SourceType {
52	pub fn as_str(&self) -> &'static str {
53		match self {
54			Self::GithubIssue => "github_issue",
55			Self::GithubPr => "github_pr",
56			Self::GithubComment => "github_comment",
57			Self::GithubReview => "github_review",
58			Self::GithubReviewComment => "github_review_comment",
59			Self::GithubDiscussion => "github_discussion",
60			Self::GithubDiscussionComment => "github_discussion_comment",
61			Self::Commit => "commit",
62			Self::MailingListMsg => "mailing_list_msg",
63			Self::IrcLog => "irc_log",
64			Self::DelvingTopic => "delving_topic",
65			Self::DelvingPost => "delving_post",
66			Self::Bip => "bip",
67			Self::Bolt => "bolt",
68			Self::Blip => "blip",
69			Self::Lud => "lud",
70			Self::Nut => "nut",
71			Self::OptechNewsletter => "optech_newsletter",
72			Self::OptechTopic => "optech_topic",
73			Self::OptechBlog => "optech_blog",
74			Self::BitcointalkTopic => "bitcointalk_topic",
75			Self::BitcointalkPost => "bitcointalk_post",
76		}
77	}
78
79	pub fn from_str(s: &str) -> Option<Self> {
80		match s {
81			"github_issue" => Some(Self::GithubIssue),
82			"github_pr" => Some(Self::GithubPr),
83			"github_comment" => Some(Self::GithubComment),
84			"github_review" => Some(Self::GithubReview),
85			"github_review_comment" => Some(Self::GithubReviewComment),
86			"github_discussion" => Some(Self::GithubDiscussion),
87			"github_discussion_comment" => Some(Self::GithubDiscussionComment),
88			"commit" => Some(Self::Commit),
89			"mailing_list_msg" => Some(Self::MailingListMsg),
90			"irc_log" => Some(Self::IrcLog),
91			"delving_topic" => Some(Self::DelvingTopic),
92			"delving_post" => Some(Self::DelvingPost),
93			"bip" => Some(Self::Bip),
94			"bolt" => Some(Self::Bolt),
95			"blip" => Some(Self::Blip),
96			"lud" => Some(Self::Lud),
97			"nut" => Some(Self::Nut),
98			"optech_newsletter" => Some(Self::OptechNewsletter),
99			"optech_topic" => Some(Self::OptechTopic),
100			"optech_blog" => Some(Self::OptechBlog),
101			"bitcointalk_topic" => Some(Self::BitcointalkTopic),
102			"bitcointalk_post" => Some(Self::BitcointalkPost),
103			_ => None,
104		}
105	}
106}
107
108impl fmt::Display for SourceType {
109	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
110		f.write_str(self.as_str())
111	}
112}
113
114/// A normalized document from any source.
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct Document {
117	pub id: String,
118	pub source_type: SourceType,
119	pub source_repo: Option<String>,
120	pub source_id: String,
121	pub title: Option<String>,
122	pub body: Option<String>,
123	pub author: Option<String>,
124	pub author_id: Option<String>,
125	pub created_at: DateTime<Utc>,
126	pub updated_at: Option<DateTime<Utc>>,
127	pub parent_id: Option<String>,
128	pub metadata: Option<serde_json::Value>,
129	pub seq: Option<i64>,
130}
131
132impl Document {
133	/// Build the canonical document ID from its components.
134	pub fn make_id(source_type: &SourceType, source_repo: Option<&str>, source_id: &str) -> String {
135		match source_repo {
136			Some(repo) => format!("{}:{}:{}", source_type, repo, source_id),
137			None => format!("{}:{}", source_type, source_id),
138		}
139	}
140
141	/// Return the stored URL from metadata, if present.
142	fn metadata_url(&self) -> Option<String> {
143		self.metadata
144			.as_ref()
145			.and_then(|m| m.get("url"))
146			.and_then(|v| v.as_str())
147			.map(|s| s.to_string())
148	}
149
150	/// Derive the canonical URL for this document.
151	///
152	/// For spec types (BIP, BOLT, bLIP, LUD, NUT), the URL is stored in
153	/// metadata at ingestion time since the filename is known then and
154	/// cannot be reliably reconstructed from the spec number alone.
155	pub fn url(&self) -> Option<String> {
156		match self.source_type {
157			SourceType::GithubIssue => Some(format!(
158				"https://github.com/{}/issues/{}",
159				self.source_repo.as_deref()?,
160				self.source_id
161			)),
162			SourceType::GithubPr => Some(format!(
163				"https://github.com/{}/pull/{}",
164				self.source_repo.as_deref()?,
165				self.source_id
166			)),
167			SourceType::GithubComment => {
168				// Extract the issue/PR number from parent_id (e.g. "github_issue:owner/repo:123")
169				// to build a proper permalink. GitHub redirects /issues/N to /pull/N for PRs.
170				let issue_num = self
171					.parent_id
172					.as_deref()
173					.and_then(|pid| pid.rsplit(':').next())
174					.filter(|n| n.chars().all(|c| c.is_ascii_digit()));
175				match issue_num {
176					Some(num) => Some(format!(
177						"https://github.com/{}/issues/{}#issuecomment-{}",
178						self.source_repo.as_deref()?,
179						num,
180						self.source_id
181					)),
182					None => None,
183				}
184			},
185			SourceType::Commit => Some(format!(
186				"https://github.com/{}/commit/{}",
187				self.source_repo.as_deref()?,
188				self.source_id
189			)),
190			// Spec types: use stored URL from metadata (set during ingestion).
191			SourceType::Bip
192			| SourceType::Bolt
193			| SourceType::Blip
194			| SourceType::Lud
195			| SourceType::Nut => self.metadata_url(),
196			SourceType::DelvingTopic => {
197				Some(format!("https://delvingbitcoin.org/t/{}", self.source_id))
198			},
199			SourceType::DelvingPost => {
200				Some(format!("https://delvingbitcoin.org/p/{}", self.source_id))
201			},
202			SourceType::OptechNewsletter => {
203				// The slug (e.g. "2023-03-01-newsletter") is stored in metadata.
204				// The URL format is /newsletters/YYYY/MM/DD/.
205				let slug =
206					self.metadata.as_ref().and_then(|m| m.get("slug")).and_then(|s| s.as_str());
207				slug.and_then(|s| {
208					let parts: Vec<&str> = s.splitn(4, '-').collect();
209					if parts.len() >= 3 {
210						Some(format!(
211							"https://bitcoinops.org/en/newsletters/{}/{}/{}/",
212							parts[0], parts[1], parts[2]
213						))
214					} else {
215						None
216					}
217				})
218			},
219			SourceType::OptechTopic => {
220				Some(format!("https://bitcoinops.org/en/topics/{}/", self.source_id))
221			},
222			SourceType::BitcointalkTopic => {
223				Some(format!("https://bitcointalk.org/index.php?topic={}.0", self.source_id))
224			},
225			SourceType::BitcointalkPost => {
226				// Extract topic_id from parent_id (e.g., "bitcointalk_topic::{topic_id}")
227				let topic_id = self
228					.parent_id
229					.as_deref()
230					.and_then(|pid| pid.strip_prefix("bitcointalk_topic::"));
231				match topic_id {
232					Some(tid) => Some(format!(
233						"https://bitcointalk.org/index.php?topic={}.msg{}#msg{}",
234						tid, self.source_id, self.source_id
235					)),
236					None => None,
237				}
238			},
239			_ => None,
240		}
241	}
242}
243
244/// A cross-reference between documents.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct Reference {
247	pub id: Option<i64>,
248	pub from_doc_id: String,
249	pub to_doc_id: Option<String>,
250	pub ref_type: RefType,
251	pub to_external: Option<String>,
252	pub context: Option<String>,
253}
254
255/// Type of cross-reference.
256#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
257#[serde(rename_all = "snake_case")]
258pub enum RefType {
259	MentionsIssue,
260	MentionsPr,
261	Fixes,
262	ReferencesCommit,
263	ReferencesBip,
264	ReferencesBolt,
265	ReferencesBlip,
266	ReferencesLud,
267	ReferencesNut,
268	RepliesTo,
269}
270
271impl RefType {
272	pub fn as_str(&self) -> &'static str {
273		match self {
274			Self::MentionsIssue => "mentions_issue",
275			Self::MentionsPr => "mentions_pr",
276			Self::Fixes => "fixes",
277			Self::ReferencesCommit => "references_commit",
278			Self::ReferencesBip => "references_bip",
279			Self::ReferencesBolt => "references_bolt",
280			Self::ReferencesBlip => "references_blip",
281			Self::ReferencesLud => "references_lud",
282			Self::ReferencesNut => "references_nut",
283			Self::RepliesTo => "replies_to",
284		}
285	}
286
287	pub fn from_str(s: &str) -> Option<Self> {
288		match s {
289			"mentions_issue" => Some(Self::MentionsIssue),
290			"mentions_pr" => Some(Self::MentionsPr),
291			"fixes" => Some(Self::Fixes),
292			"references_commit" => Some(Self::ReferencesCommit),
293			"references_bip" => Some(Self::ReferencesBip),
294			"references_bolt" => Some(Self::ReferencesBolt),
295			"references_blip" => Some(Self::ReferencesBlip),
296			"references_lud" => Some(Self::ReferencesLud),
297			"references_nut" => Some(Self::ReferencesNut),
298			"replies_to" => Some(Self::RepliesTo),
299			_ => None,
300		}
301	}
302}
303
304impl fmt::Display for RefType {
305	fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
306		f.write_str(self.as_str())
307	}
308}
309
310/// Parameters for a search query.
311#[derive(Debug, Clone, Default, Serialize, Deserialize)]
312pub struct SearchParams {
313	pub query: String,
314	pub source_type: Option<Vec<SourceType>>,
315	pub source_repo: Option<Vec<String>>,
316	pub author: Option<String>,
317	pub after: Option<DateTime<Utc>>,
318	pub before: Option<DateTime<Utc>>,
319	pub semantic: bool,
320	pub limit: Option<u32>,
321}
322
323/// A single search result.
324#[derive(Debug, Clone, Serialize, Deserialize)]
325pub struct SearchResult {
326	pub id: String,
327	pub source_type: SourceType,
328	pub source_repo: Option<String>,
329	pub title: Option<String>,
330	pub snippet: Option<String>,
331	pub author: Option<String>,
332	pub created_at: DateTime<Utc>,
333	pub score: f64,
334	pub url: Option<String>,
335	pub concepts: Vec<String>,
336}
337
338/// Container for search results with total count.
339#[derive(Debug, Clone, Serialize, Deserialize)]
340pub struct SearchResults {
341	pub results: Vec<SearchResult>,
342	pub total_count: u32,
343}
344
345/// Full document context returned by `get_document`.
346#[derive(Debug, Clone, Serialize, Deserialize)]
347pub struct DocumentContext {
348	pub document: Document,
349	pub url: Option<String>,
350	pub outgoing_refs: Vec<Reference>,
351	pub incoming_refs: Vec<Reference>,
352	pub concepts: Vec<String>,
353}
354
355/// A timeline event for a concept.
356#[derive(Debug, Clone, Serialize, Deserialize)]
357pub struct TimelineEvent {
358	pub date: String,
359	#[serde(rename = "type")]
360	pub source_type: SourceType,
361	pub title: Option<String>,
362	pub id: String,
363	pub url: Option<String>,
364}
365
366/// Timeline of a concept across all sources.
367#[derive(Debug, Clone, Serialize, Deserialize)]
368pub struct Timeline {
369	pub concept: String,
370	pub events: Vec<TimelineEvent>,
371}
372
373/// Context for a commit search result.
374#[derive(Debug, Clone, Serialize, Deserialize)]
375pub struct CommitContext {
376	pub document: Document,
377	pub url: Option<String>,
378	pub associated_prs: Vec<SearchResult>,
379}
380
381/// Sync state for a source.
382#[derive(Debug, Clone, Serialize, Deserialize)]
383pub struct SyncState {
384	pub source_id: String,
385	pub source_type: String,
386	pub source_repo: Option<String>,
387	pub last_cursor: Option<String>,
388	pub last_synced_at: Option<DateTime<Utc>>,
389	pub next_run_at: Option<DateTime<Utc>>,
390	pub status: SyncStatus,
391	pub error_message: Option<String>,
392	pub retry_count: i32,
393	pub items_found: i32,
394}
395
396#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
397#[serde(rename_all = "snake_case")]
398pub enum SyncStatus {
399	Pending,
400	Running,
401	Ok,
402	Error,
403}
404
405impl SyncStatus {
406	pub fn as_str(&self) -> &'static str {
407		match self {
408			Self::Pending => "pending",
409			Self::Running => "running",
410			Self::Ok => "ok",
411			Self::Error => "error",
412		}
413	}
414
415	pub fn from_str(s: &str) -> Self {
416		match s {
417			"running" => Self::Running,
418			"ok" => Self::Ok,
419			"error" => Self::Error,
420			_ => Self::Pending,
421		}
422	}
423}
424
425#[cfg(test)]
426mod tests {
427	use super::*;
428
429	fn make_doc(source_type: SourceType, source_repo: Option<&str>, source_id: &str) -> Document {
430		Document {
431			id: Document::make_id(&source_type, source_repo, source_id),
432			source_type,
433			source_repo: source_repo.map(|s| s.to_string()),
434			source_id: source_id.to_string(),
435			title: None,
436			body: None,
437			author: None,
438			author_id: None,
439			created_at: chrono::Utc::now(),
440			updated_at: None,
441			parent_id: None,
442			metadata: None,
443			seq: None,
444		}
445	}
446
447	#[test]
448	fn test_comment_url_with_parent_id() {
449		let mut doc =
450			make_doc(SourceType::GithubComment, Some("lightningdevkit/ldk-sample"), "2135734193");
451		doc.parent_id = Some("github_issue:lightningdevkit/ldk-sample:133".to_string());
452		assert_eq!(
453			doc.url().unwrap(),
454			"https://github.com/lightningdevkit/ldk-sample/issues/133#issuecomment-2135734193"
455		);
456	}
457
458	#[test]
459	fn test_comment_url_without_parent_id() {
460		let doc =
461			make_doc(SourceType::GithubComment, Some("lightningdevkit/ldk-sample"), "2135734193");
462		assert!(doc.url().is_none());
463	}
464
465	#[test]
466	fn test_issue_url() {
467		let doc = make_doc(SourceType::GithubIssue, Some("bitcoin/bitcoin"), "12345");
468		assert_eq!(doc.url().unwrap(), "https://github.com/bitcoin/bitcoin/issues/12345");
469	}
470
471	#[test]
472	fn test_bip_url() {
473		let mut doc = make_doc(SourceType::Bip, None, "340");
474		doc.metadata = Some(
475			serde_json::json!({ "format": "mediawiki", "url": "https://github.com/bitcoin/bips/blob/master/bip-0340.mediawiki" }),
476		);
477		assert_eq!(
478			doc.url().unwrap(),
479			"https://github.com/bitcoin/bips/blob/master/bip-0340.mediawiki"
480		);
481	}
482
483	#[test]
484	fn test_bip_url_md_format() {
485		let mut doc = make_doc(SourceType::Bip, None, "388");
486		doc.metadata = Some(
487			serde_json::json!({ "format": "md", "url": "https://github.com/bitcoin/bips/blob/master/bip-0388.md" }),
488		);
489		assert_eq!(doc.url().unwrap(), "https://github.com/bitcoin/bips/blob/master/bip-0388.md");
490	}
491
492	#[test]
493	fn test_bip_url_without_metadata() {
494		let doc = make_doc(SourceType::Bip, None, "340");
495		assert!(doc.url().is_none());
496	}
497
498	#[test]
499	fn test_optech_newsletter_url_with_slug() {
500		let mut doc = make_doc(SourceType::OptechNewsletter, None, "240");
501		doc.metadata = Some(serde_json::json!({ "slug": "2023-03-01-newsletter" }));
502		assert_eq!(doc.url().unwrap(), "https://bitcoinops.org/en/newsletters/2023/03/01/");
503	}
504
505	#[test]
506	fn test_optech_newsletter_url_without_slug() {
507		let doc = make_doc(SourceType::OptechNewsletter, None, "151");
508		assert!(doc.url().is_none());
509	}
510
511	#[test]
512	fn test_bolt_url() {
513		let mut doc = make_doc(SourceType::Bolt, None, "1");
514		doc.metadata = Some(
515			serde_json::json!({ "url": "https://github.com/lightning/bolts/blob/master/01-messaging.md" }),
516		);
517		assert_eq!(
518			doc.url().unwrap(),
519			"https://github.com/lightning/bolts/blob/master/01-messaging.md"
520		);
521	}
522
523	#[test]
524	fn test_bolt_url_without_metadata() {
525		let doc = make_doc(SourceType::Bolt, None, "1");
526		assert!(doc.url().is_none());
527	}
528
529	#[test]
530	fn test_blip_url() {
531		let mut doc = make_doc(SourceType::Blip, None, "1");
532		doc.metadata = Some(
533			serde_json::json!({ "url": "https://github.com/lightning/blips/blob/master/blip-0001.md" }),
534		);
535		assert_eq!(
536			doc.url().unwrap(),
537			"https://github.com/lightning/blips/blob/master/blip-0001.md"
538		);
539	}
540
541	#[test]
542	fn test_lud_url() {
543		let mut doc = make_doc(SourceType::Lud, None, "6");
544		doc.metadata =
545			Some(serde_json::json!({ "url": "https://github.com/lnurl/luds/blob/luds/06.md" }));
546		assert_eq!(doc.url().unwrap(), "https://github.com/lnurl/luds/blob/luds/06.md");
547	}
548
549	#[test]
550	fn test_nut_url() {
551		let mut doc = make_doc(SourceType::Nut, None, "0");
552		doc.metadata =
553			Some(serde_json::json!({ "url": "https://github.com/cashubtc/nuts/blob/main/00.md" }));
554		assert_eq!(doc.url().unwrap(), "https://github.com/cashubtc/nuts/blob/main/00.md");
555	}
556
557	#[test]
558	fn test_bitcointalk_topic_url() {
559		let doc = make_doc(SourceType::BitcointalkTopic, None, "5");
560		assert_eq!(doc.url().unwrap(), "https://bitcointalk.org/index.php?topic=5.0");
561	}
562
563	#[test]
564	fn test_bitcointalk_post_url_with_parent() {
565		let mut doc = make_doc(SourceType::BitcointalkPost, None, "12345");
566		doc.parent_id = Some("bitcointalk_topic::5".to_string());
567		assert_eq!(
568			doc.url().unwrap(),
569			"https://bitcointalk.org/index.php?topic=5.msg12345#msg12345"
570		);
571	}
572
573	#[test]
574	fn test_bitcointalk_post_url_without_parent() {
575		let doc = make_doc(SourceType::BitcointalkPost, None, "12345");
576		assert!(doc.url().is_none());
577	}
578
579	#[test]
580	fn test_parse_datetime_rfc3339() {
581		let dt = parse_datetime("2023-06-01T00:00:00Z").unwrap();
582		assert_eq!(
583			dt,
584			chrono::NaiveDate::from_ymd_opt(2023, 6, 1)
585				.unwrap()
586				.and_hms_opt(0, 0, 0)
587				.unwrap()
588				.and_utc()
589		);
590	}
591
592	#[test]
593	fn test_parse_datetime_rfc3339_with_offset() {
594		let dt = parse_datetime("2023-06-01T12:00:00+02:00").unwrap();
595		assert_eq!(
596			dt,
597			chrono::NaiveDate::from_ymd_opt(2023, 6, 1)
598				.unwrap()
599				.and_hms_opt(10, 0, 0)
600				.unwrap()
601				.and_utc()
602		);
603	}
604
605	#[test]
606	fn test_parse_datetime_plain_date() {
607		let dt = parse_datetime("2023-06-01").unwrap();
608		assert_eq!(
609			dt,
610			chrono::NaiveDate::from_ymd_opt(2023, 6, 1)
611				.unwrap()
612				.and_hms_opt(0, 0, 0)
613				.unwrap()
614				.and_utc()
615		);
616	}
617
618	#[test]
619	fn test_parse_datetime_invalid() {
620		assert!(parse_datetime("not-a-date").is_none());
621		assert!(parse_datetime("2023/06/01").is_none());
622		assert!(parse_datetime("").is_none());
623	}
624}