provenant/parsers/
readme.rs1use crate::models::PackageData;
28use crate::models::{DatasourceId, PackageType};
29use crate::parser_warn as warn;
30use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
31use std::path::Path;
32
33use super::PackageParser;
34use super::metadata::ParserMetadata;
35
36pub struct ReadmeParser;
41
42impl PackageParser for ReadmeParser {
43 const PACKAGE_TYPE: PackageType = PackageType::Readme;
44
45 fn metadata() -> Vec<ParserMetadata> {
46 vec![ParserMetadata {
47 description: "Third-party attribution README files",
48 file_patterns: &[
49 "**/README.android",
50 "**/README.chromium",
51 "**/README.facebook",
52 "**/README.google",
53 "**/README.thirdparty",
54 ],
55 package_type: "readme",
56 primary_language: "",
57 documentation_url: Some(
58 "https://github.com/chromium/chromium/blob/main/docs/contributing.md#third_party-components",
59 ),
60 }]
61 }
62
63 fn is_match(path: &Path) -> bool {
64 path.file_name().is_some_and(|name| {
65 let name = name.to_string_lossy().to_lowercase();
66 matches!(
67 name.as_str(),
68 "readme.android"
69 | "readme.chromium"
70 | "readme.facebook"
71 | "readme.google"
72 | "readme.thirdparty"
73 )
74 })
75 }
76
77 fn extract_packages(path: &Path) -> Vec<PackageData> {
78 let content = match read_file_to_string(path, None) {
79 Ok(content) => content,
80 Err(e) => {
81 warn!("Failed to read README file at {:?}: {}", path, e);
82 return vec![default_package_data()];
83 }
84 };
85
86 let mut pkg = default_package_data();
87
88 for line in content.lines().take(MAX_ITERATION_COUNT) {
90 let line = line.trim();
91 if line.is_empty() {
92 continue;
93 }
94
95 let split_colon = line.split_once(':');
96 let split_equals = line.split_once('=');
97
98 let (key, value) = match (split_colon, split_equals) {
99 (Some((ck, cv)), Some((ek, _))) if ck.len() <= ek.len() => (ck.trim(), cv.trim()),
100 (_, Some((ek, ev))) => (ek.trim(), ev.trim()),
101 (Some((ck, cv)), None) => (ck.trim(), cv.trim()),
102 (None, None) => continue,
103 };
104
105 if key.is_empty() || value.is_empty() {
106 continue;
107 }
108
109 let key_lower = key.to_lowercase();
111 match key_lower.as_str() {
112 "name" | "project" => {
113 pkg.name = Some(truncate_field(value.to_string()));
114 }
115 "version" => {
116 pkg.version = Some(truncate_field(value.to_string()));
117 }
118 "copyright" => {
119 pkg.copyright = Some(truncate_field(value.to_string()));
120 }
121 "download link" | "downloaded from" => {
122 pkg.download_url = Some(truncate_field(value.to_string()));
123 }
124 "homepage" | "website" | "repo" | "source" | "upstream" | "url" | "project url" => {
125 pkg.homepage_url = Some(truncate_field(value.to_string()));
126 }
127 "licence" | "license" => {
128 pkg.extracted_license_statement = Some(truncate_field(value.to_string()));
129 }
130 _ => {
131 }
133 }
134 }
135
136 if pkg.name.is_none()
138 && let Some(parent) = path.parent()
139 && let Some(parent_name) = parent.file_name()
140 {
141 pkg.name = Some(truncate_field(parent_name.to_string_lossy().to_string()));
142 }
143
144 vec![pkg]
145 }
146}
147
148fn default_package_data() -> PackageData {
149 PackageData {
150 package_type: Some(ReadmeParser::PACKAGE_TYPE),
151 datasource_id: Some(DatasourceId::Readme),
152 ..Default::default()
153 }
154}
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use std::path::PathBuf;
160
161 #[test]
162 fn test_is_match_android() {
163 let valid = PathBuf::from("/some/path/README.android");
164 assert!(ReadmeParser::is_match(&valid));
165 }
166
167 #[test]
168 fn test_is_match_chromium() {
169 let valid = PathBuf::from("/some/path/README.chromium");
170 assert!(ReadmeParser::is_match(&valid));
171 }
172
173 #[test]
174 fn test_is_match_facebook() {
175 let valid = PathBuf::from("/some/path/README.facebook");
176 assert!(ReadmeParser::is_match(&valid));
177 }
178
179 #[test]
180 fn test_is_match_google() {
181 let valid = PathBuf::from("/some/path/README.google");
182 assert!(ReadmeParser::is_match(&valid));
183 }
184
185 #[test]
186 fn test_is_match_thirdparty() {
187 let valid = PathBuf::from("/some/path/README.thirdparty");
188 assert!(ReadmeParser::is_match(&valid));
189 }
190
191 #[test]
192 fn test_is_match_case_insensitive() {
193 let upper = PathBuf::from("/some/path/README.CHROMIUM");
194 let mixed = PathBuf::from("/some/path/README.ChRoMiUm");
195 assert!(ReadmeParser::is_match(&upper));
196 assert!(ReadmeParser::is_match(&mixed));
197 }
198
199 #[test]
200 fn test_is_match_negative_cases() {
201 let readme_md = PathBuf::from("/some/path/README.md");
202 let readme_txt = PathBuf::from("/some/path/README.txt");
203 let readme = PathBuf::from("/some/path/README");
204 let other = PathBuf::from("/some/path/INSTALL.txt");
205
206 assert!(!ReadmeParser::is_match(&readme_md));
207 assert!(!ReadmeParser::is_match(&readme_txt));
208 assert!(!ReadmeParser::is_match(&readme));
209 assert!(!ReadmeParser::is_match(&other));
210 }
211
212 #[test]
213 fn test_extract_chromium_format() {
214 let path = PathBuf::from("testdata/readme/chromium/third_party/example/README.chromium");
215 let pkg = ReadmeParser::extract_first_package(&path);
216
217 assert_eq!(pkg.package_type, Some(PackageType::Readme));
218 assert_eq!(pkg.name, Some("Example Library".to_string()));
219 assert_eq!(pkg.version, Some("2.1.0".to_string()));
220 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
221 assert_eq!(pkg.extracted_license_statement, Some("MIT".to_string()));
222 assert_eq!(pkg.datasource_id, Some(DatasourceId::Readme));
223 }
224
225 #[test]
226 fn test_extract_android_format() {
227 let path = PathBuf::from("testdata/readme/android/third_party/example/README.android");
228 let pkg = ReadmeParser::extract_first_package(&path);
229
230 assert_eq!(pkg.name, Some("Android Example".to_string()));
231 assert_eq!(pkg.version, Some("1.0".to_string()));
232 assert_eq!(
233 pkg.homepage_url,
234 Some("https://android.example.com".to_string())
235 );
236 assert_eq!(pkg.copyright, Some("2024 Google Inc.".to_string()));
237 }
238
239 #[test]
240 fn test_extract_facebook_format() {
241 let path = PathBuf::from("testdata/readme/facebook/third_party/example/README.facebook");
242 let pkg = ReadmeParser::extract_first_package(&path);
243
244 assert_eq!(pkg.name, Some("FB Library".to_string()));
245 assert_eq!(
246 pkg.download_url,
247 Some("https://github.com/example/fb-lib".to_string())
248 );
249 assert_eq!(
250 pkg.extracted_license_statement,
251 Some("BSD-3-Clause".to_string())
252 );
253 }
254
255 #[test]
256 fn test_extract_parent_dir_fallback() {
257 let path = PathBuf::from("testdata/readme/no-name/third_party/mylib/README.thirdparty");
258 let pkg = ReadmeParser::extract_first_package(&path);
259
260 assert_eq!(pkg.name, Some("mylib".to_string()));
262 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
263 assert_eq!(pkg.version, Some("3.0".to_string()));
264 }
265
266 #[test]
267 fn test_extract_equals_separator() {
268 let path =
269 PathBuf::from("testdata/readme/equals-separator/third_party/eqlib/README.google");
270 let pkg = ReadmeParser::extract_first_package(&path);
271
272 assert_eq!(pkg.name, Some("Google Lib".to_string()));
273 assert_eq!(
274 pkg.homepage_url,
275 Some("https://google.example.com".to_string())
276 );
277 assert_eq!(
278 pkg.extracted_license_statement,
279 Some("Apache-2.0".to_string())
280 );
281 }
282
283 #[test]
284 fn test_case_insensitive_field_names() {
285 let path = PathBuf::from("testdata/readme/chromium/third_party/example/README.chromium");
286 let pkg = ReadmeParser::extract_first_package(&path);
287
288 assert!(pkg.name.is_some());
291 assert!(pkg.version.is_some());
292 assert!(pkg.homepage_url.is_some());
293 assert!(pkg.extracted_license_statement.is_some());
294 }
295
296 #[test]
297 fn test_invalid_file() {
298 let nonexistent = PathBuf::from("testdata/readme/nonexistent/README.chromium");
299 let pkg = ReadmeParser::extract_first_package(&nonexistent);
300
301 assert_eq!(pkg.package_type, Some(PackageType::Readme));
303 assert_eq!(pkg.datasource_id, Some(DatasourceId::Readme));
304 }
305}