provenant/parsers/
readme.rs1use crate::models::PackageData;
28use crate::models::{DatasourceId, PackageType};
29use crate::parser_warn as warn;
30use crate::parsers::utils::{MAX_ITERATION_COUNT, read_file_to_string, truncate_field};
31use std::path::Path;
32
33use super::PackageParser;
34
35pub struct ReadmeParser;
40
41impl PackageParser for ReadmeParser {
42 const PACKAGE_TYPE: PackageType = PackageType::Readme;
43
44 fn is_match(path: &Path) -> bool {
45 path.file_name().is_some_and(|name| {
46 let name = name.to_string_lossy().to_lowercase();
47 matches!(
48 name.as_str(),
49 "readme.android"
50 | "readme.chromium"
51 | "readme.facebook"
52 | "readme.google"
53 | "readme.thirdparty"
54 )
55 })
56 }
57
58 fn extract_packages(path: &Path) -> Vec<PackageData> {
59 let content = match read_file_to_string(path, None) {
60 Ok(content) => content,
61 Err(e) => {
62 warn!("Failed to read README file at {:?}: {}", path, e);
63 return vec![default_package_data()];
64 }
65 };
66
67 let mut pkg = default_package_data();
68
69 for line in content.lines().take(MAX_ITERATION_COUNT) {
71 let line = line.trim();
72 if line.is_empty() {
73 continue;
74 }
75
76 let split_colon = line.split_once(':');
77 let split_equals = line.split_once('=');
78
79 let (key, value) = match (split_colon, split_equals) {
80 (Some((ck, cv)), Some((ek, _))) if ck.len() <= ek.len() => (ck.trim(), cv.trim()),
81 (_, Some((ek, ev))) => (ek.trim(), ev.trim()),
82 (Some((ck, cv)), None) => (ck.trim(), cv.trim()),
83 (None, None) => continue,
84 };
85
86 if key.is_empty() || value.is_empty() {
87 continue;
88 }
89
90 let key_lower = key.to_lowercase();
92 match key_lower.as_str() {
93 "name" | "project" => {
94 pkg.name = Some(truncate_field(value.to_string()));
95 }
96 "version" => {
97 pkg.version = Some(truncate_field(value.to_string()));
98 }
99 "copyright" => {
100 pkg.copyright = Some(truncate_field(value.to_string()));
101 }
102 "download link" | "downloaded from" => {
103 pkg.download_url = Some(truncate_field(value.to_string()));
104 }
105 "homepage" | "website" | "repo" | "source" | "upstream" | "url" | "project url" => {
106 pkg.homepage_url = Some(truncate_field(value.to_string()));
107 }
108 "licence" | "license" => {
109 pkg.extracted_license_statement = Some(truncate_field(value.to_string()));
110 }
111 _ => {
112 }
114 }
115 }
116
117 if pkg.name.is_none()
119 && let Some(parent) = path.parent()
120 && let Some(parent_name) = parent.file_name()
121 {
122 pkg.name = Some(truncate_field(parent_name.to_string_lossy().to_string()));
123 }
124
125 vec![pkg]
126 }
127}
128
129fn default_package_data() -> PackageData {
130 PackageData {
131 package_type: Some(ReadmeParser::PACKAGE_TYPE),
132 datasource_id: Some(DatasourceId::Readme),
133 ..Default::default()
134 }
135}
136
137#[cfg(test)]
138mod tests {
139 use super::*;
140 use std::path::PathBuf;
141
142 #[test]
143 fn test_is_match_android() {
144 let valid = PathBuf::from("/some/path/README.android");
145 assert!(ReadmeParser::is_match(&valid));
146 }
147
148 #[test]
149 fn test_is_match_chromium() {
150 let valid = PathBuf::from("/some/path/README.chromium");
151 assert!(ReadmeParser::is_match(&valid));
152 }
153
154 #[test]
155 fn test_is_match_facebook() {
156 let valid = PathBuf::from("/some/path/README.facebook");
157 assert!(ReadmeParser::is_match(&valid));
158 }
159
160 #[test]
161 fn test_is_match_google() {
162 let valid = PathBuf::from("/some/path/README.google");
163 assert!(ReadmeParser::is_match(&valid));
164 }
165
166 #[test]
167 fn test_is_match_thirdparty() {
168 let valid = PathBuf::from("/some/path/README.thirdparty");
169 assert!(ReadmeParser::is_match(&valid));
170 }
171
172 #[test]
173 fn test_is_match_case_insensitive() {
174 let upper = PathBuf::from("/some/path/README.CHROMIUM");
175 let mixed = PathBuf::from("/some/path/README.ChRoMiUm");
176 assert!(ReadmeParser::is_match(&upper));
177 assert!(ReadmeParser::is_match(&mixed));
178 }
179
180 #[test]
181 fn test_is_match_negative_cases() {
182 let readme_md = PathBuf::from("/some/path/README.md");
183 let readme_txt = PathBuf::from("/some/path/README.txt");
184 let readme = PathBuf::from("/some/path/README");
185 let other = PathBuf::from("/some/path/INSTALL.txt");
186
187 assert!(!ReadmeParser::is_match(&readme_md));
188 assert!(!ReadmeParser::is_match(&readme_txt));
189 assert!(!ReadmeParser::is_match(&readme));
190 assert!(!ReadmeParser::is_match(&other));
191 }
192
193 #[test]
194 fn test_extract_chromium_format() {
195 let path = PathBuf::from("testdata/readme/chromium/third_party/example/README.chromium");
196 let pkg = ReadmeParser::extract_first_package(&path);
197
198 assert_eq!(pkg.package_type, Some(PackageType::Readme));
199 assert_eq!(pkg.name, Some("Example Library".to_string()));
200 assert_eq!(pkg.version, Some("2.1.0".to_string()));
201 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
202 assert_eq!(pkg.extracted_license_statement, Some("MIT".to_string()));
203 assert_eq!(pkg.datasource_id, Some(DatasourceId::Readme));
204 }
205
206 #[test]
207 fn test_extract_android_format() {
208 let path = PathBuf::from("testdata/readme/android/third_party/example/README.android");
209 let pkg = ReadmeParser::extract_first_package(&path);
210
211 assert_eq!(pkg.name, Some("Android Example".to_string()));
212 assert_eq!(pkg.version, Some("1.0".to_string()));
213 assert_eq!(
214 pkg.homepage_url,
215 Some("https://android.example.com".to_string())
216 );
217 assert_eq!(pkg.copyright, Some("2024 Google Inc.".to_string()));
218 }
219
220 #[test]
221 fn test_extract_facebook_format() {
222 let path = PathBuf::from("testdata/readme/facebook/third_party/example/README.facebook");
223 let pkg = ReadmeParser::extract_first_package(&path);
224
225 assert_eq!(pkg.name, Some("FB Library".to_string()));
226 assert_eq!(
227 pkg.download_url,
228 Some("https://github.com/example/fb-lib".to_string())
229 );
230 assert_eq!(
231 pkg.extracted_license_statement,
232 Some("BSD-3-Clause".to_string())
233 );
234 }
235
236 #[test]
237 fn test_extract_parent_dir_fallback() {
238 let path = PathBuf::from("testdata/readme/no-name/third_party/mylib/README.thirdparty");
239 let pkg = ReadmeParser::extract_first_package(&path);
240
241 assert_eq!(pkg.name, Some("mylib".to_string()));
243 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
244 assert_eq!(pkg.version, Some("3.0".to_string()));
245 }
246
247 #[test]
248 fn test_extract_equals_separator() {
249 let path =
250 PathBuf::from("testdata/readme/equals-separator/third_party/eqlib/README.google");
251 let pkg = ReadmeParser::extract_first_package(&path);
252
253 assert_eq!(pkg.name, Some("Google Lib".to_string()));
254 assert_eq!(
255 pkg.homepage_url,
256 Some("https://google.example.com".to_string())
257 );
258 assert_eq!(
259 pkg.extracted_license_statement,
260 Some("Apache-2.0".to_string())
261 );
262 }
263
264 #[test]
265 fn test_case_insensitive_field_names() {
266 let path = PathBuf::from("testdata/readme/chromium/third_party/example/README.chromium");
267 let pkg = ReadmeParser::extract_first_package(&path);
268
269 assert!(pkg.name.is_some());
272 assert!(pkg.version.is_some());
273 assert!(pkg.homepage_url.is_some());
274 assert!(pkg.extracted_license_statement.is_some());
275 }
276
277 #[test]
278 fn test_invalid_file() {
279 let nonexistent = PathBuf::from("testdata/readme/nonexistent/README.chromium");
280 let pkg = ReadmeParser::extract_first_package(&nonexistent);
281
282 assert_eq!(pkg.package_type, Some(PackageType::Readme));
284 assert_eq!(pkg.datasource_id, Some(DatasourceId::Readme));
285 }
286}
287
288crate::register_parser!(
289 "Third-party attribution README files",
290 &[
291 "**/README.android",
292 "**/README.chromium",
293 "**/README.facebook",
294 "**/README.google",
295 "**/README.thirdparty"
296 ],
297 "readme",
298 "",
299 Some(
300 "https://github.com/chromium/chromium/blob/main/docs/contributing.md#third_party-components"
301 ),
302);