1#[cfg(not(feature = "std"))]
29use alloc::{
30 string::{String, ToString},
31 vec::Vec,
32};
33
34use crate::{bzz_new::bzz_decode, error::BzzError};
35
36#[derive(Debug, thiserror::Error)]
40pub enum MetadataError {
41 #[error("bzz decode failed: {0}")]
43 Bzz(#[from] BzzError),
44
45 #[error("metadata chunk is not valid UTF-8")]
47 InvalidUtf8,
48}
49
50#[derive(Debug, Clone, Default, PartialEq, Eq)]
58#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
59pub struct DjVuMetadata {
60 pub title: Option<String>,
62 pub author: Option<String>,
64 pub subject: Option<String>,
66 pub publisher: Option<String>,
68 pub year: Option<String>,
70 pub keywords: Option<String>,
72 pub extra: Vec<(String, String)>,
74}
75
76pub fn parse_metadata(data: &[u8]) -> Result<DjVuMetadata, MetadataError> {
83 let text = core::str::from_utf8(data).map_err(|_| MetadataError::InvalidUtf8)?;
84 Ok(parse_metadata_text(text))
85}
86
87pub fn parse_metadata_bzz(data: &[u8]) -> Result<DjVuMetadata, MetadataError> {
91 let decoded = bzz_decode(data)?;
92 parse_metadata(&decoded)
93}
94
95fn parse_metadata_text(text: &str) -> DjVuMetadata {
98 let tokens = tokenize(text);
99 let sexprs = parse_sexprs(&tokens);
100
101 let mut meta = DjVuMetadata::default();
102
103 for expr in &sexprs {
105 if let SExpr::List(items) = expr
106 && let Some(SExpr::Atom(head)) = items.first()
107 {
108 if !head.eq_ignore_ascii_case("metadata") {
109 continue;
110 }
111 for item in &items[1..] {
112 if let SExpr::List(pair) = item
113 && let (Some(SExpr::Atom(key)), Some(SExpr::Atom(val))) =
114 (pair.first(), pair.get(1))
115 {
116 store_kv(&mut meta, key, val);
117 }
118 }
119 }
120 }
121
122 meta
123}
124
125fn store_kv(meta: &mut DjVuMetadata, key: &str, value: &str) {
126 match key.to_lowercase().as_str() {
127 "title" => meta.title = Some(value.to_string()),
128 "author" => meta.author = Some(value.to_string()),
129 "subject" | "description" => meta.subject = Some(value.to_string()),
130 "publisher" => meta.publisher = Some(value.to_string()),
131 "year" | "date" => meta.year = Some(value.to_string()),
132 "keywords" | "keyword" => meta.keywords = Some(value.to_string()),
133 _ => meta.extra.push((key.to_string(), value.to_string())),
134 }
135}
136
137#[derive(Debug)]
143enum Token<'a> {
144 LParen,
145 RParen,
146 Atom(&'a str),
147 Quoted(String),
148}
149
150fn tokenize(input: &str) -> Vec<Token<'_>> {
151 let mut tokens = Vec::new();
152 let bytes = input.as_bytes();
153 let mut i = 0;
154
155 while i < bytes.len() {
156 match bytes.get(i) {
157 Some(b'(') => {
158 tokens.push(Token::LParen);
159 i += 1;
160 }
161 Some(b')') => {
162 tokens.push(Token::RParen);
163 i += 1;
164 }
165 Some(b'"') => {
166 i += 1;
167 let mut s = String::new();
168 while i < bytes.len() {
169 match bytes.get(i) {
170 Some(b'\\') if i + 1 < bytes.len() => {
171 i += 1;
172 if let Some(&c) = bytes.get(i) {
173 s.push(c as char);
174 }
175 i += 1;
176 }
177 Some(b'"') => {
178 i += 1;
179 break;
180 }
181 Some(&c) => {
182 s.push(c as char);
183 i += 1;
184 }
185 None => break,
186 }
187 }
188 tokens.push(Token::Quoted(s));
189 }
190 Some(b' ') | Some(b'\t') | Some(b'\n') | Some(b'\r') => {
191 i += 1;
192 }
193 Some(b';') => {
194 while i < bytes.len() && bytes.get(i) != Some(&b'\n') {
195 i += 1;
196 }
197 }
198 _ => {
199 let start = i;
200 while i < bytes.len() {
201 match bytes.get(i) {
202 Some(b'(') | Some(b')') | Some(b'"') | Some(b' ') | Some(b'\t')
203 | Some(b'\n') | Some(b'\r') => break,
204 _ => i += 1,
205 }
206 }
207 if let Some(slice) = input.get(start..i)
208 && !slice.is_empty()
209 {
210 tokens.push(Token::Atom(slice));
211 }
212 }
213 }
214 }
215
216 tokens
217}
218
219#[derive(Debug)]
220enum SExpr {
221 Atom(String),
222 List(Vec<SExpr>),
223}
224
225fn parse_sexprs(tokens: &[Token<'_>]) -> Vec<SExpr> {
226 let mut result = Vec::new();
227 let mut pos = 0usize;
228 while pos < tokens.len() {
229 if let Some(expr) = parse_one(tokens, &mut pos) {
230 result.push(expr);
231 }
232 }
233 result
234}
235
236fn parse_one(tokens: &[Token<'_>], pos: &mut usize) -> Option<SExpr> {
237 match tokens.get(*pos) {
238 Some(Token::LParen) => {
239 *pos += 1;
240 let mut items = Vec::new();
241 loop {
242 match tokens.get(*pos) {
243 Some(Token::RParen) => {
244 *pos += 1;
245 break;
246 }
247 None => break,
248 _ => {
249 if let Some(child) = parse_one(tokens, pos) {
250 items.push(child);
251 } else {
252 break;
253 }
254 }
255 }
256 }
257 Some(SExpr::List(items))
258 }
259 Some(Token::RParen) => {
260 *pos += 1;
261 None
262 }
263 Some(Token::Atom(s)) => {
264 let s = s.to_string();
265 *pos += 1;
266 Some(SExpr::Atom(s))
267 }
268 Some(Token::Quoted(s)) => {
269 let s = s.clone();
270 *pos += 1;
271 Some(SExpr::Atom(s))
272 }
273 None => None,
274 }
275}
276
277#[cfg(test)]
280mod tests {
281 use super::*;
282
283 #[test]
284 fn empty_input_returns_default() {
285 let meta = parse_metadata(b"").unwrap();
286 assert_eq!(meta, DjVuMetadata::default());
287 }
288
289 #[test]
290 fn basic_metadata_block() {
291 let text = br#"(metadata (title "My Book") (author "Jane Doe") (year "2023"))"#;
292 let meta = parse_metadata(text).unwrap();
293 assert_eq!(meta.title.as_deref(), Some("My Book"));
294 assert_eq!(meta.author.as_deref(), Some("Jane Doe"));
295 assert_eq!(meta.year.as_deref(), Some("2023"));
296 assert!(meta.subject.is_none());
297 }
298
299 #[test]
300 fn subject_and_keywords() {
301 let text = br#"(metadata (subject "Science") (keywords "physics, chemistry"))"#;
302 let meta = parse_metadata(text).unwrap();
303 assert_eq!(meta.subject.as_deref(), Some("Science"));
304 assert_eq!(meta.keywords.as_deref(), Some("physics, chemistry"));
305 }
306
307 #[test]
308 fn description_alias_maps_to_subject() {
309 let text = br#"(metadata (description "A long description"))"#;
310 let meta = parse_metadata(text).unwrap();
311 assert_eq!(meta.subject.as_deref(), Some("A long description"));
312 }
313
314 #[test]
315 fn date_alias_maps_to_year() {
316 let text = br#"(metadata (date "2020-01-15"))"#;
317 let meta = parse_metadata(text).unwrap();
318 assert_eq!(meta.year.as_deref(), Some("2020-01-15"));
319 }
320
321 #[test]
322 fn extra_keys_go_to_extra_vec() {
323 let text = br#"(metadata (custom-field "value1") (another "value2"))"#;
324 let meta = parse_metadata(text).unwrap();
325 assert_eq!(meta.extra.len(), 2);
326 assert_eq!(
327 meta.extra[0],
328 ("custom-field".to_string(), "value1".to_string())
329 );
330 assert_eq!(meta.extra[1], ("another".to_string(), "value2".to_string()));
331 }
332
333 #[test]
334 fn publisher_field() {
335 let text = br#"(metadata (publisher "Oxford University Press"))"#;
336 let meta = parse_metadata(text).unwrap();
337 assert_eq!(meta.publisher.as_deref(), Some("Oxford University Press"));
338 }
339
340 #[test]
341 fn case_insensitive_keys() {
342 let text = br#"(metadata (TITLE "Upper") (Author "Mixed"))"#;
343 let meta = parse_metadata(text).unwrap();
344 assert_eq!(meta.title.as_deref(), Some("Upper"));
345 assert_eq!(meta.author.as_deref(), Some("Mixed"));
346 }
347
348 #[test]
349 fn escaped_quotes_in_value() {
350 let text = br#"(metadata (title "Book with \"quotes\""))"#;
351 let meta = parse_metadata(text).unwrap();
352 assert_eq!(meta.title.as_deref(), Some(r#"Book with "quotes""#));
353 }
354
355 #[test]
356 fn no_metadata_wrapper_returns_default() {
357 let text = br#"(background #ffffff)"#;
359 let meta = parse_metadata(text).unwrap();
360 assert_eq!(meta, DjVuMetadata::default());
361 }
362
363 #[test]
364 fn multiline_metadata() {
365 let text = b"(metadata\n (title \"Line1\")\n (author \"Line2\")\n)";
366 let meta = parse_metadata(text).unwrap();
367 assert_eq!(meta.title.as_deref(), Some("Line1"));
368 assert_eq!(meta.author.as_deref(), Some("Line2"));
369 }
370
371 #[test]
372 fn invalid_utf8_returns_error() {
373 let invalid = b"\xFF\xFE";
374 assert!(matches!(
375 parse_metadata(invalid),
376 Err(MetadataError::InvalidUtf8)
377 ));
378 }
379}