1use async_trait::async_trait;
7use chrono::{DateTime, Utc};
8use serde::{Deserialize, Serialize};
9
10use crate::error::PunchResult;
11
12#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
14#[serde(rename_all = "snake_case")]
15pub enum LinkContentType {
16 Article,
18 Documentation,
20 Repository,
22 SocialMedia,
24 Video,
26 Image,
28 Other,
30}
31
32#[derive(Debug, Clone, Serialize, Deserialize)]
34pub struct LinkMetadata {
35 pub author: Option<String>,
37 pub published_at: Option<DateTime<Utc>>,
39 pub word_count: usize,
41 pub language: Option<String>,
43 pub description: Option<String>,
45}
46
47impl LinkMetadata {
48 pub fn empty() -> Self {
50 Self {
51 author: None,
52 published_at: None,
53 word_count: 0,
54 language: None,
55 description: None,
56 }
57 }
58}
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct LinkContent {
63 pub url: String,
65 pub title: Option<String>,
67 pub content: String,
69 pub content_type: LinkContentType,
71 pub metadata: LinkMetadata,
73}
74
75impl LinkContent {
76 pub fn new(
78 url: impl Into<String>,
79 content: impl Into<String>,
80 content_type: LinkContentType,
81 ) -> Self {
82 let content = content.into();
83 let word_count = content.split_whitespace().count();
84 Self {
85 url: url.into(),
86 title: None,
87 content,
88 content_type,
89 metadata: LinkMetadata {
90 author: None,
91 published_at: None,
92 word_count,
93 language: None,
94 description: None,
95 },
96 }
97 }
98
99 pub fn with_title(mut self, title: impl Into<String>) -> Self {
101 self.title = Some(title.into());
102 self
103 }
104
105 pub fn with_metadata(mut self, metadata: LinkMetadata) -> Self {
107 self.metadata = metadata;
108 self
109 }
110}
111
112#[async_trait]
114pub trait LinkExtractor: Send + Sync {
115 async fn extract(&self, url: &str) -> PunchResult<LinkContent>;
117
118 fn supports_url(&self, url: &str) -> bool;
120}
121
122#[cfg(test)]
123mod tests {
124 use super::*;
125
126 #[test]
127 fn test_content_construction() {
128 let content = LinkContent::new(
129 "https://example.com/article",
130 "This is a test article about fighting techniques.",
131 LinkContentType::Article,
132 )
133 .with_title("Fighting Techniques");
134
135 assert_eq!(content.url, "https://example.com/article");
136 assert_eq!(content.title, Some("Fighting Techniques".to_string()));
137 assert_eq!(content.content_type, LinkContentType::Article);
138 assert!(!content.content.is_empty());
139 }
140
141 #[test]
142 fn test_content_type_classification() {
143 let types = vec![
144 LinkContentType::Article,
145 LinkContentType::Documentation,
146 LinkContentType::Repository,
147 LinkContentType::SocialMedia,
148 LinkContentType::Video,
149 LinkContentType::Image,
150 LinkContentType::Other,
151 ];
152
153 for ct in &types {
154 let json = serde_json::to_string(ct).expect("serialize content type");
155 let deser: LinkContentType =
156 serde_json::from_str(&json).expect("deserialize content type");
157 assert_eq!(&deser, ct);
158 }
159
160 assert_eq!(
161 serde_json::to_string(&LinkContentType::SocialMedia).expect("social media"),
162 "\"social_media\""
163 );
164 }
165
166 #[test]
167 fn test_metadata() {
168 let metadata = LinkMetadata {
169 author: Some("The Champion".to_string()),
170 published_at: Some(Utc::now()),
171 word_count: 1500,
172 language: Some("en".to_string()),
173 description: Some("A guide to winning".to_string()),
174 };
175
176 let json = serde_json::to_string(&metadata).expect("serialize metadata");
177 let deser: LinkMetadata = serde_json::from_str(&json).expect("deserialize metadata");
178
179 assert_eq!(deser.author, Some("The Champion".to_string()));
180 assert_eq!(deser.word_count, 1500);
181 assert_eq!(deser.language, Some("en".to_string()));
182 }
183
184 #[test]
185 fn test_url_support_check() {
186 let github_url = "https://github.com/humancto/punch";
188 let docs_url = "https://docs.rs/serde/latest";
189 let random_url = "https://example.com/page";
190
191 fn classify_url(url: &str) -> LinkContentType {
193 if url.contains("github.com") {
194 LinkContentType::Repository
195 } else if url.contains("docs.rs") || url.contains("docs.") {
196 LinkContentType::Documentation
197 } else {
198 LinkContentType::Other
199 }
200 }
201
202 assert_eq!(classify_url(github_url), LinkContentType::Repository);
203 assert_eq!(classify_url(docs_url), LinkContentType::Documentation);
204 assert_eq!(classify_url(random_url), LinkContentType::Other);
205 }
206
207 #[test]
208 fn test_word_count() {
209 let content = LinkContent::new(
210 "https://example.com",
211 "one two three four five six seven eight nine ten",
212 LinkContentType::Article,
213 );
214
215 assert_eq!(content.metadata.word_count, 10);
216
217 let empty_content = LinkContent::new("https://example.com", "", LinkContentType::Other);
218 assert_eq!(empty_content.metadata.word_count, 0);
219 }
220}