provenant/parsers/
readme.rs1use crate::models::PackageData;
25use crate::models::{DatasourceId, PackageType};
26use crate::parsers::utils::read_file_to_string;
27use log::warn;
28use std::path::Path;
29
30use super::PackageParser;
31
32pub struct ReadmeParser;
37
38impl PackageParser for ReadmeParser {
39 const PACKAGE_TYPE: PackageType = PackageType::Readme;
40
41 fn is_match(path: &Path) -> bool {
42 path.file_name().is_some_and(|name| {
43 let name = name.to_string_lossy().to_lowercase();
44 matches!(
45 name.as_str(),
46 "readme.android"
47 | "readme.chromium"
48 | "readme.facebook"
49 | "readme.google"
50 | "readme.thirdparty"
51 )
52 })
53 }
54
55 fn extract_packages(path: &Path) -> Vec<PackageData> {
56 let content = match read_file_to_string(path) {
57 Ok(content) => content,
58 Err(e) => {
59 warn!("Failed to read README file at {:?}: {}", path, e);
60 return vec![default_package_data()];
61 }
62 };
63
64 let mut pkg = default_package_data();
65
66 for line in content.lines() {
68 let line = line.trim();
69 if line.is_empty() {
70 continue;
71 }
72
73 let split_colon = line.split_once(':');
74 let split_equals = line.split_once('=');
75
76 let (key, value) = match (split_colon, split_equals) {
77 (Some((ck, cv)), Some((ek, _))) if ck.len() <= ek.len() => (ck.trim(), cv.trim()),
78 (_, Some((ek, ev))) => (ek.trim(), ev.trim()),
79 (Some((ck, cv)), None) => (ck.trim(), cv.trim()),
80 (None, None) => continue,
81 };
82
83 if key.is_empty() || value.is_empty() {
84 continue;
85 }
86
87 let key_lower = key.to_lowercase();
89 match key_lower.as_str() {
90 "name" | "project" => {
91 pkg.name = Some(value.to_string());
92 }
93 "version" => {
94 pkg.version = Some(value.to_string());
95 }
96 "copyright" => {
97 pkg.copyright = Some(value.to_string());
98 }
99 "download link" | "downloaded from" => {
100 pkg.download_url = Some(value.to_string());
101 }
102 "homepage" | "website" | "repo" | "source" | "upstream" | "url" | "project url" => {
103 pkg.homepage_url = Some(value.to_string());
104 }
105 "licence" | "license" => {
106 pkg.extracted_license_statement = Some(value.to_string());
107 }
108 _ => {
109 }
111 }
112 }
113
114 if pkg.name.is_none()
116 && let Some(parent) = path.parent()
117 && let Some(parent_name) = parent.file_name()
118 {
119 pkg.name = Some(parent_name.to_string_lossy().to_string());
120 }
121
122 vec![pkg]
123 }
124}
125
126fn default_package_data() -> PackageData {
127 PackageData {
128 package_type: Some(ReadmeParser::PACKAGE_TYPE),
129 datasource_id: Some(DatasourceId::Readme),
130 ..Default::default()
131 }
132}
133
134#[cfg(test)]
135mod tests {
136 use super::*;
137 use std::path::PathBuf;
138
139 #[test]
140 fn test_is_match_android() {
141 let valid = PathBuf::from("/some/path/README.android");
142 assert!(ReadmeParser::is_match(&valid));
143 }
144
145 #[test]
146 fn test_is_match_chromium() {
147 let valid = PathBuf::from("/some/path/README.chromium");
148 assert!(ReadmeParser::is_match(&valid));
149 }
150
151 #[test]
152 fn test_is_match_facebook() {
153 let valid = PathBuf::from("/some/path/README.facebook");
154 assert!(ReadmeParser::is_match(&valid));
155 }
156
157 #[test]
158 fn test_is_match_google() {
159 let valid = PathBuf::from("/some/path/README.google");
160 assert!(ReadmeParser::is_match(&valid));
161 }
162
163 #[test]
164 fn test_is_match_thirdparty() {
165 let valid = PathBuf::from("/some/path/README.thirdparty");
166 assert!(ReadmeParser::is_match(&valid));
167 }
168
169 #[test]
170 fn test_is_match_case_insensitive() {
171 let upper = PathBuf::from("/some/path/README.CHROMIUM");
172 let mixed = PathBuf::from("/some/path/README.ChRoMiUm");
173 assert!(ReadmeParser::is_match(&upper));
174 assert!(ReadmeParser::is_match(&mixed));
175 }
176
177 #[test]
178 fn test_is_match_negative_cases() {
179 let readme_md = PathBuf::from("/some/path/README.md");
180 let readme_txt = PathBuf::from("/some/path/README.txt");
181 let readme = PathBuf::from("/some/path/README");
182 let other = PathBuf::from("/some/path/INSTALL.txt");
183
184 assert!(!ReadmeParser::is_match(&readme_md));
185 assert!(!ReadmeParser::is_match(&readme_txt));
186 assert!(!ReadmeParser::is_match(&readme));
187 assert!(!ReadmeParser::is_match(&other));
188 }
189
190 #[test]
191 fn test_extract_chromium_format() {
192 let path = PathBuf::from("testdata/readme/chromium/third_party/example/README.chromium");
193 let pkg = ReadmeParser::extract_first_package(&path);
194
195 assert_eq!(pkg.package_type, Some(PackageType::Readme));
196 assert_eq!(pkg.name, Some("Example Library".to_string()));
197 assert_eq!(pkg.version, Some("2.1.0".to_string()));
198 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
199 assert_eq!(pkg.extracted_license_statement, Some("MIT".to_string()));
200 assert_eq!(pkg.datasource_id, Some(DatasourceId::Readme));
201 }
202
203 #[test]
204 fn test_extract_android_format() {
205 let path = PathBuf::from("testdata/readme/android/third_party/example/README.android");
206 let pkg = ReadmeParser::extract_first_package(&path);
207
208 assert_eq!(pkg.name, Some("Android Example".to_string()));
209 assert_eq!(pkg.version, Some("1.0".to_string()));
210 assert_eq!(
211 pkg.homepage_url,
212 Some("https://android.example.com".to_string())
213 );
214 assert_eq!(pkg.copyright, Some("2024 Google Inc.".to_string()));
215 }
216
217 #[test]
218 fn test_extract_facebook_format() {
219 let path = PathBuf::from("testdata/readme/facebook/third_party/example/README.facebook");
220 let pkg = ReadmeParser::extract_first_package(&path);
221
222 assert_eq!(pkg.name, Some("FB Library".to_string()));
223 assert_eq!(
224 pkg.download_url,
225 Some("https://github.com/example/fb-lib".to_string())
226 );
227 assert_eq!(
228 pkg.extracted_license_statement,
229 Some("BSD-3-Clause".to_string())
230 );
231 }
232
233 #[test]
234 fn test_extract_parent_dir_fallback() {
235 let path = PathBuf::from("testdata/readme/no-name/third_party/mylib/README.thirdparty");
236 let pkg = ReadmeParser::extract_first_package(&path);
237
238 assert_eq!(pkg.name, Some("mylib".to_string()));
240 assert_eq!(pkg.homepage_url, Some("https://example.com".to_string()));
241 assert_eq!(pkg.version, Some("3.0".to_string()));
242 }
243
244 #[test]
245 fn test_extract_equals_separator() {
246 let path =
247 PathBuf::from("testdata/readme/equals-separator/third_party/eqlib/README.google");
248 let pkg = ReadmeParser::extract_first_package(&path);
249
250 assert_eq!(pkg.name, Some("Google Lib".to_string()));
251 assert_eq!(
252 pkg.homepage_url,
253 Some("https://google.example.com".to_string())
254 );
255 assert_eq!(
256 pkg.extracted_license_statement,
257 Some("Apache-2.0".to_string())
258 );
259 }
260
261 #[test]
262 fn test_case_insensitive_field_names() {
263 let path = PathBuf::from("testdata/readme/chromium/third_party/example/README.chromium");
264 let pkg = ReadmeParser::extract_first_package(&path);
265
266 assert!(pkg.name.is_some());
269 assert!(pkg.version.is_some());
270 assert!(pkg.homepage_url.is_some());
271 assert!(pkg.extracted_license_statement.is_some());
272 }
273
274 #[test]
275 fn test_invalid_file() {
276 let nonexistent = PathBuf::from("testdata/readme/nonexistent/README.chromium");
277 let pkg = ReadmeParser::extract_first_package(&nonexistent);
278
279 assert_eq!(pkg.package_type, Some(PackageType::Readme));
281 assert_eq!(pkg.datasource_id, Some(DatasourceId::Readme));
282 }
283}
284
285crate::register_parser!(
286 "Third-party attribution README files",
287 &[
288 "**/README.android",
289 "**/README.chromium",
290 "**/README.facebook",
291 "**/README.google",
292 "**/README.thirdparty"
293 ],
294 "readme",
295 "",
296 Some(
297 "https://chromium.googlesource.com/chromium/src/+/HEAD/docs/contributing.md#third_party-components"
298 ),
299);