1use crate::error::Result;
2use crate::parser::CodeAst;
3use regex::Regex;
4use std::path::Path;
5
6pub struct MetadataExtractor;
8
9#[derive(Debug, Clone)]
10pub struct ExtractedMetadata {
11 pub title: Option<String>,
12 pub description: Option<String>,
13 pub keywords: Vec<String>,
14 pub concepts: Vec<String>,
15 pub license: Option<String>,
16 pub authors: Vec<String>,
17 pub tags: Vec<String>,
18 pub detected_frameworks: Vec<String>,
19}
20
21impl MetadataExtractor {
22 pub fn extract(content: &str, file_path: Option<&Path>) -> Result<ExtractedMetadata> {
24 let mut metadata = ExtractedMetadata {
25 title: Self::extract_title(content, file_path),
26 description: Self::extract_description(content),
27 keywords: Self::extract_keywords(content),
28 concepts: Self::extract_concepts(content),
29 license: Self::detect_license(content),
30 authors: Self::extract_authors(content),
31 tags: Self::extract_tags(content),
32 detected_frameworks: Self::detect_frameworks(content),
33 };
34
35 if metadata.title.is_none() {
37 if let Some(path) = file_path {
38 if let Some(name) = path.file_stem() {
39 metadata.title = Some(name.to_string_lossy().to_string());
40 }
41 }
42 }
43
44 Ok(metadata)
45 }
46
47 fn extract_title(content: &str, file_path: Option<&Path>) -> Option<String> {
48 if let Some(line) = content.lines().find(|l| l.starts_with("# ")) {
50 return Some(line.trim_start_matches("# ").trim().to_string());
51 }
52
53 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) {
55 if let Some(name) = value.get("name").and_then(|v| v.as_str()) {
56 return Some(name.to_string());
57 }
58 }
59
60 if let Ok(table) = toml::from_str::<toml::Table>(content) {
62 if let Some(name) = table.get("package")
63 .and_then(|p| p.get("name"))
64 .and_then(|n| n.as_str())
65 {
66 return Some(name.to_string());
67 }
68 }
69
70 file_path.and_then(|p| p.file_stem()?.to_str().map(|s| s.to_string()))
72 }
73
74 fn extract_description(content: &str) -> Option<String> {
75 let mut lines = content.lines();
77 while let Some(line) = lines.next() {
78 if line.starts_with("# ") {
79 for desc in lines.by_ref() {
81 if !desc.trim().is_empty() && !desc.starts_with("#") {
82 return Some(desc.trim().to_string());
83 }
84 }
85 }
86 }
87
88 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) {
90 if let Some(desc) = value.get("description").and_then(|v| v.as_str()) {
91 return Some(desc.to_string());
92 }
93 }
94
95 None
96 }
97
98 fn extract_keywords(content: &str) -> Vec<String> {
99 let mut keywords = Vec::new();
100
101 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) {
103 if let Some(kw) = value.get("keywords").and_then(|v| v.as_array()) {
104 for item in kw {
105 if let Some(s) = item.as_str() {
106 keywords.push(s.to_string());
107 }
108 }
109 }
110 }
111
112 keywords
113 }
114
115 fn extract_concepts(content: &str) -> Vec<String> {
116 let mut concepts = Vec::new();
117
118 let concept_patterns = [
119 ("database", "db|postgres|mysql|mongodb|redis"),
120 ("api", "api|rest|graphql|rpc"),
121 ("ui", "ui|component|react|vue|angular"),
122 ("testing", "test|spec|jest|mocha|unittest"),
123 ("async", "async|await|promise|future"),
124 ("concurrency", "thread|concurrent|parallel|mutex"),
125 ("cli", "cli|command|argv|argument"),
126 ("storage", "storage|cache|file|s3"),
127 ];
128
129 for (concept, pattern) in &concept_patterns {
130 if let Ok(re) = Regex::new(pattern) {
131 if re.is_match(content) {
132 concepts.push(concept.to_string());
133 }
134 }
135 }
136
137 concepts
138 }
139
140 fn detect_license(content: &str) -> Option<String> {
141 let licenses = [
143 ("MIT", "MIT"),
144 ("Apache", "Apache-2.0"),
145 ("GPL", "GPL-3.0"),
146 ("BSD", "BSD-2-Clause"),
147 ("ISC", "ISC"),
148 ];
149
150 for (pattern, license) in &licenses {
151 if content.contains(pattern) {
152 return Some(license.to_string());
153 }
154 }
155
156 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) {
158 if let Some(license) = value.get("license").and_then(|v| v.as_str()) {
159 return Some(license.to_string());
160 }
161 }
162
163 None
164 }
165
166 fn extract_authors(content: &str) -> Vec<String> {
167 let mut authors = Vec::new();
168
169 if let Ok(value) = serde_json::from_str::<serde_json::Value>(content) {
171 if let Some(author) = value.get("author").and_then(|v| v.as_str()) {
172 authors.push(author.to_string());
173 }
174 if let Some(contributors) = value.get("contributors").and_then(|v| v.as_array()) {
175 for item in contributors {
176 if let Some(s) = item.as_str() {
177 authors.push(s.to_string());
178 }
179 }
180 }
181 }
182
183 if let Ok(table) = toml::from_str::<toml::Table>(content) {
185 if let Some(authors_arr) = table.get("package")
186 .and_then(|p| p.get("authors"))
187 .and_then(|a| a.as_array())
188 {
189 for item in authors_arr {
190 if let Some(s) = item.as_str() {
191 authors.push(s.to_string());
192 }
193 }
194 }
195 }
196
197 authors
198 }
199
200 fn extract_tags(content: &str) -> Vec<String> {
201 let mut tags = Vec::new();
202
203 let tag_pattern = Regex::new(r"@tags?\s*:\s*([^\n]+)").ok();
205 if let Some(re) = tag_pattern {
206 for cap in re.captures_iter(content) {
207 if let Some(tag_str) = cap.get(1) {
208 let parts: Vec<&str> = tag_str.as_str().split(',').collect();
209 for part in parts {
210 tags.push(part.trim().to_string());
211 }
212 }
213 }
214 }
215
216 tags
217 }
218
219 fn detect_frameworks(content: &str) -> Vec<String> {
220 let mut frameworks = Vec::new();
221
222 let framework_patterns = [
223 ("react", r"react|React"),
224 ("vue", r"vue|Vue"),
225 ("angular", r"angular|Angular"),
226 ("svelte", r"svelte|Svelte"),
227 ("next.js", r"next|Next"),
228 ("express", r"express|Express"),
229 ("fastapi", r"fastapi|FastAPI"),
230 ("django", r"django|Django"),
231 ("rails", r"rails|Rails"),
232 ("spring", r"spring|Spring"),
233 ("actix", r"actix|Actix"),
234 ("axum", r"axum|Axum"),
235 ];
236
237 for (framework, pattern) in &framework_patterns {
238 if let Ok(re) = Regex::new(pattern) {
239 if re.is_match(content) {
240 frameworks.push(framework.to_string());
241 }
242 }
243 }
244
245 frameworks
246 }
247}
248
249pub fn extract_api_surface(ast: &CodeAst) -> ApiSurface {
251 ApiSurface {
252 functions: ast.functions.clone(),
253 structs: ast.structs.clone(),
254 traits: ast.traits.clone(),
255 classes: ast.classes.clone(),
256 interfaces: ast.interfaces.clone(),
257 exports: extract_public_api(ast),
258 }
259}
260
261#[derive(Debug, Clone)]
262pub struct ApiSurface {
263 pub functions: Vec<String>,
264 pub structs: Vec<String>,
265 pub traits: Vec<String>,
266 pub classes: Vec<String>,
267 pub interfaces: Vec<String>,
268 pub exports: Vec<String>,
269}
270
271fn extract_public_api(ast: &CodeAst) -> Vec<String> {
272 let mut api = Vec::new();
274 api.extend(ast.functions.clone());
275 api.extend(ast.structs.clone());
276 api.extend(ast.classes.clone());
277 api.extend(ast.interfaces.clone());
278 api.extend(ast.traits.clone());
279 api
280}