Skip to main content

irontide_core/
metainfo.rs

1use bytes::Bytes;
2use serde::de::{self, Deserializer};
3use serde::{Deserialize, Serialize};
4
5use crate::error::Error;
6use crate::hash::Id20;
7
8/// Deserialize a list of raw 20-byte binary strings into `Vec<Id20>`, silently
9/// dropping entries that are not exactly 20 bytes.
10///
11/// BEP 38 `similar` is a list of info hashes (raw 20-byte binary).  Rather than
12/// rejecting the entire torrent when a single entry has the wrong length, we
13/// keep only the valid ones — a robustness-over-strictness choice consistent
14/// with Postel's law.
15fn deserialize_similar<'de, D: Deserializer<'de>>(deserializer: D) -> Result<Vec<Id20>, D::Error> {
16    struct SimilarVisitor;
17
18    impl<'de> de::Visitor<'de> for SimilarVisitor {
19        type Value = Vec<Id20>;
20
21        fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
22            f.write_str("a list of 20-byte binary strings")
23        }
24
25        fn visit_seq<A: de::SeqAccess<'de>>(self, mut seq: A) -> Result<Vec<Id20>, A::Error> {
26            let mut hashes = Vec::new();
27            // Each element is a raw byte string; accept via serde_bytes.
28            while let Some(bytes) = seq.next_element::<serde_bytes::ByteBuf>()? {
29                if let Ok(id) = Id20::from_bytes(bytes.as_ref()) {
30                    hashes.push(id);
31                }
32                // Silently drop entries that are not exactly 20 bytes.
33            }
34            Ok(hashes)
35        }
36    }
37
38    deserializer.deserialize_seq(SimilarVisitor)
39}
40
41/// Wrapper for `url-list` that handles both a single string and a list of strings.
42#[derive(Debug, Clone, Default)]
43pub struct UrlList(pub Vec<String>);
44
45impl<'de> Deserialize<'de> for UrlList {
46    fn deserialize<D: Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
47        struct UrlListVisitor;
48
49        impl<'de> de::Visitor<'de> for UrlListVisitor {
50            type Value = UrlList;
51
52            fn expecting(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53                f.write_str("a string or list of strings")
54            }
55
56            fn visit_str<E: de::Error>(self, v: &str) -> Result<UrlList, E> {
57                Ok(UrlList(vec![v.to_owned()]))
58            }
59
60            fn visit_bytes<E: de::Error>(self, v: &[u8]) -> Result<UrlList, E> {
61                let s = std::str::from_utf8(v).map_err(de::Error::custom)?;
62                Ok(UrlList(vec![s.to_owned()]))
63            }
64
65            fn visit_seq<A: de::SeqAccess<'de>>(self, mut seq: A) -> Result<UrlList, A::Error> {
66                let mut urls = Vec::new();
67                while let Some(url) = seq.next_element::<String>()? {
68                    urls.push(url);
69                }
70                Ok(UrlList(urls))
71            }
72        }
73
74        deserializer.deserialize_any(UrlListVisitor)
75    }
76}
77
78/// Parsed .torrent file (BEP 3 metainfo, v1).
79#[derive(Debug, Clone)]
80pub struct TorrentMetaV1 {
81    /// The info hash (SHA1 of the raw "info" dict bytes).
82    pub info_hash: Id20,
83    /// Primary announce URL.
84    pub announce: Option<String>,
85    /// Announce list (BEP 12) — list of tracker tiers.
86    pub announce_list: Option<Vec<Vec<String>>>,
87    /// Comment.
88    pub comment: Option<String>,
89    /// Created by.
90    pub created_by: Option<String>,
91    /// Creation date (unix timestamp).
92    pub creation_date: Option<i64>,
93    /// Info dictionary.
94    pub info: InfoDict,
95    /// BEP 19 web seed URLs (GetRight-style).
96    pub url_list: Vec<String>,
97    /// BEP 17 HTTP seed URLs (Hoffman-style).
98    pub httpseeds: Vec<String>,
99    /// Raw info dict bytes for BEP 9 metadata serving.
100    pub info_bytes: Option<Bytes>,
101    /// PEM-encoded SSL CA certificate from the info dict, if present.
102    pub ssl_cert: Option<Vec<u8>>,
103}
104
105/// The "info" dictionary from a .torrent file.
106#[derive(Debug, Clone, Deserialize, Serialize)]
107pub struct InfoDict {
108    /// Suggested file/directory name.
109    pub name: String,
110    /// Piece length in bytes.
111    #[serde(rename = "piece length")]
112    pub piece_length: u64,
113    /// Concatenated SHA1 hashes of each piece (20 bytes each).
114    #[serde(with = "serde_bytes")]
115    pub pieces: Vec<u8>,
116    /// Length in bytes (single-file mode).
117    #[serde(skip_serializing_if = "Option::is_none", default)]
118    pub length: Option<u64>,
119    /// Files (multi-file mode).
120    #[serde(skip_serializing_if = "Option::is_none", default)]
121    pub files: Option<Vec<FileEntry>>,
122    /// Private flag.
123    #[serde(skip_serializing_if = "Option::is_none", default)]
124    pub private: Option<i64>,
125    /// Source tag (private tracker identification).
126    #[serde(skip_serializing_if = "Option::is_none", default)]
127    pub source: Option<String>,
128    /// BEP 35 / SSL torrent: PEM-encoded X.509 CA certificate.
129    /// When present, all peer connections must use TLS with certs chaining to this CA.
130    #[serde(rename = "ssl-cert", skip_serializing_if = "Option::is_none", default)]
131    #[serde(with = "serde_bytes")]
132    pub ssl_cert: Option<Vec<u8>>,
133    /// BEP 38: info hashes of similar/related torrents (raw 20-byte binary strings).
134    ///
135    /// Entries that are not exactly 20 bytes are silently dropped during parsing.
136    #[serde(
137        default,
138        skip_serializing_if = "Vec::is_empty",
139        deserialize_with = "deserialize_similar"
140    )]
141    pub similar: Vec<Id20>,
142    /// BEP 38: collection names this torrent belongs to.
143    #[serde(default, skip_serializing_if = "Vec::is_empty")]
144    pub collections: Vec<String>,
145}
146
147/// A file entry in multi-file mode.
148#[derive(Debug, Clone, Deserialize, Serialize)]
149pub struct FileEntry {
150    /// File length in bytes.
151    pub length: u64,
152    /// Path components (e.g., `["dir", "file.txt"]`).
153    pub path: Vec<String>,
154    /// BEP 47 file attributes ("p"=pad, "h"=hidden, "x"=executable, "l"=symlink).
155    #[serde(skip_serializing_if = "Option::is_none", default)]
156    pub attr: Option<String>,
157    /// File modification time (unix timestamp).
158    #[serde(skip_serializing_if = "Option::is_none", default)]
159    pub mtime: Option<i64>,
160    /// Symlink target path components.
161    #[serde(
162        rename = "symlink path",
163        skip_serializing_if = "Option::is_none",
164        default
165    )]
166    pub symlink_path: Option<Vec<String>>,
167}
168
169/// High-level file info (unified from single-file and multi-file modes).
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub struct FileInfo {
172    /// Relative path components.
173    pub path: Vec<String>,
174    /// File length in bytes.
175    pub length: u64,
176}
177
178/// Raw top-level torrent structure for serde deserialization.
179#[derive(Deserialize)]
180struct RawTorrent {
181    announce: Option<String>,
182    #[serde(rename = "announce-list")]
183    announce_list: Option<Vec<Vec<String>>>,
184    comment: Option<String>,
185    #[serde(rename = "created by")]
186    created_by: Option<String>,
187    #[serde(rename = "creation date")]
188    creation_date: Option<i64>,
189    info: InfoDict,
190    /// BEP 19: web seed URL(s) — single string or list.
191    #[serde(rename = "url-list", default)]
192    url_list: UrlList,
193    /// BEP 17: HTTP seed URLs.
194    #[serde(default)]
195    httpseeds: Vec<String>,
196}
197
198/// Parse a .torrent file from raw bytes.
199///
200/// Computes the info-hash by finding the raw byte span of the "info" key
201/// and SHA1-hashing it directly (not the re-serialized form).
202///
203/// # Errors
204///
205/// Returns an error if the data is not a valid v1 torrent file.
206pub fn torrent_from_bytes(data: &[u8]) -> Result<TorrentMetaV1, Error> {
207    // Step 1: Find the raw info dict span for hashing
208    let info_span = irontide_bencode::find_dict_key_span(data, "info")?;
209    let info_hash = crate::sha1(&data[info_span.clone()]);
210    let info_raw = Bytes::copy_from_slice(&data[info_span]);
211
212    // Step 2: Deserialize the full structure
213    let raw: RawTorrent = irontide_bencode::from_bytes(data)?;
214
215    // Step 3: Validate the info dict
216    validate_info(&raw.info)?;
217
218    let ssl_cert = raw.info.ssl_cert.clone();
219
220    Ok(TorrentMetaV1 {
221        info_hash,
222        announce: raw.announce,
223        announce_list: raw.announce_list,
224        comment: raw.comment,
225        created_by: raw.created_by,
226        creation_date: raw.creation_date,
227        info: raw.info,
228        url_list: raw.url_list.0,
229        httpseeds: raw.httpseeds,
230        info_bytes: Some(info_raw),
231        ssl_cert,
232    })
233}
234
235fn validate_info(info: &InfoDict) -> Result<(), Error> {
236    if info.piece_length == 0 {
237        return Err(Error::InvalidTorrent("piece length is 0".into()));
238    }
239
240    if !info.pieces.len().is_multiple_of(20) {
241        return Err(Error::InvalidTorrent(format!(
242            "pieces length {} is not a multiple of 20",
243            info.pieces.len()
244        )));
245    }
246
247    if info.length.is_none() && info.files.is_none() {
248        return Err(Error::InvalidTorrent(
249            "neither 'length' nor 'files' present".into(),
250        ));
251    }
252
253    if info.length.is_some() && info.files.is_some() {
254        return Err(Error::InvalidTorrent(
255            "both 'length' and 'files' present".into(),
256        ));
257    }
258
259    Ok(())
260}
261
262impl InfoDict {
263    /// Total size of all files in bytes.
264    #[must_use]
265    pub fn total_length(&self) -> u64 {
266        if let Some(length) = self.length {
267            length
268        } else if let Some(ref files) = self.files {
269            files.iter().map(|f| f.length).sum()
270        } else {
271            0
272        }
273    }
274
275    /// Number of pieces.
276    #[must_use]
277    pub fn num_pieces(&self) -> usize {
278        self.pieces.len() / 20
279    }
280
281    /// Get the SHA1 hash for a specific piece.
282    #[must_use]
283    pub fn piece_hash(&self, index: usize) -> Option<Id20> {
284        let start = index * 20;
285        if start + 20 > self.pieces.len() {
286            return None;
287        }
288        let mut hash = [0u8; 20];
289        hash.copy_from_slice(&self.pieces[start..start + 20]);
290        Some(Id20(hash))
291    }
292
293    /// Get file info in a unified format.
294    #[must_use]
295    pub fn files(&self) -> Vec<FileInfo> {
296        if let Some(length) = self.length {
297            vec![FileInfo {
298                path: vec![self.name.clone()],
299                length,
300            }]
301        } else if let Some(ref files) = self.files {
302            files
303                .iter()
304                .map(|f| {
305                    let mut path = vec![self.name.clone()];
306                    path.extend(f.path.clone());
307                    FileInfo {
308                        path,
309                        length: f.length,
310                    }
311                })
312                .collect()
313        } else {
314            vec![]
315        }
316    }
317}
318
319#[cfg(test)]
320mod tests {
321    use super::*;
322
323    /// Build a minimal torrent bencoded dict with extra keys sorted correctly.
324    ///
325    /// `before_info` contains keys that sort before "info" (e.g., "httpseeds").
326    /// `after_info` contains keys that sort after "info" (e.g., "url-list").
327    fn make_torrent_bytes_sorted(before_info: &[u8], after_info: &[u8]) -> Vec<u8> {
328        // Minimal info dict: name, piece length, pieces (20 zero bytes), length
329        let info = b"d6:lengthi1048576e4:name4:test12:piece lengthi262144e6:pieces20:\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00e";
330        let mut buf = Vec::new();
331        buf.push(b'd');
332        buf.extend_from_slice(before_info);
333        buf.extend_from_slice(b"4:info");
334        buf.extend_from_slice(info);
335        buf.extend_from_slice(after_info);
336        buf.push(b'e');
337        buf
338    }
339
340    #[test]
341    fn url_list_single_string() {
342        // url-list sorts after info
343        let data = make_torrent_bytes_sorted(b"", b"8:url-list24:http://example.com/files");
344        let meta = torrent_from_bytes(&data).unwrap();
345        assert_eq!(meta.url_list, vec!["http://example.com/files"]);
346    }
347
348    #[test]
349    fn url_list_multiple() {
350        let data = make_torrent_bytes_sorted(
351            b"",
352            b"8:url-listl24:http://example.com/files26:http://mirror.example.com/e",
353        );
354        let meta = torrent_from_bytes(&data).unwrap();
355        assert_eq!(meta.url_list.len(), 2);
356        assert_eq!(meta.url_list[0], "http://example.com/files");
357        assert_eq!(meta.url_list[1], "http://mirror.example.com/");
358    }
359
360    #[test]
361    fn url_list_absent() {
362        let data = make_torrent_bytes_sorted(b"", b"");
363        let meta = torrent_from_bytes(&data).unwrap();
364        assert!(meta.url_list.is_empty());
365    }
366
367    #[test]
368    fn httpseeds_present() {
369        // httpseeds sorts before info
370        let data = make_torrent_bytes_sorted(b"9:httpseedsl28:http://seed.example.com/seede", b"");
371        let meta = torrent_from_bytes(&data).unwrap();
372        assert_eq!(meta.httpseeds, vec!["http://seed.example.com/seed"]);
373    }
374
375    #[test]
376    fn httpseeds_absent() {
377        let data = make_torrent_bytes_sorted(b"", b"");
378        let meta = torrent_from_bytes(&data).unwrap();
379        assert!(meta.httpseeds.is_empty());
380    }
381
382    #[test]
383    fn torrent_from_bytes_stores_raw_info_bytes() {
384        let data = make_torrent_bytes_sorted(b"", b"");
385        let meta = torrent_from_bytes(&data).unwrap();
386        assert!(meta.info_bytes.is_some());
387        let info_bytes = meta.info_bytes.unwrap();
388        // Re-hashing the stored bytes should produce the same info hash
389        let rehash = crate::sha1(&info_bytes);
390        assert_eq!(rehash, meta.info_hash);
391    }
392
393    #[test]
394    fn ssl_cert_parsed_from_info_dict() {
395        // Build a torrent with ssl-cert in the info dict.
396        let cert_pem = b"-----BEGIN CERTIFICATE-----\nMIIBtest\n-----END CERTIFICATE-----\n";
397        let cert_len = cert_pem.len();
398
399        // Minimal info dict with ssl-cert inserted (keys must be sorted)
400        let mut info = Vec::new();
401        info.extend_from_slice(b"d");
402        info.extend_from_slice(b"6:lengthi1048576e");
403        info.extend_from_slice(b"4:name4:test");
404        info.extend_from_slice(b"12:piece lengthi262144e");
405        info.extend_from_slice(b"6:pieces20:");
406        info.extend_from_slice(&[0u8; 20]);
407        info.extend_from_slice(format!("8:ssl-cert{cert_len}:").as_bytes());
408        info.extend_from_slice(cert_pem);
409        info.extend_from_slice(b"e");
410
411        let mut torrent = Vec::new();
412        torrent.extend_from_slice(b"d4:info");
413        torrent.extend_from_slice(&info);
414        torrent.extend_from_slice(b"e");
415
416        let meta = torrent_from_bytes(&torrent).unwrap();
417        assert!(meta.ssl_cert.is_some());
418        assert_eq!(meta.ssl_cert.as_deref().unwrap(), cert_pem);
419        assert_eq!(meta.info.ssl_cert.as_deref().unwrap(), cert_pem);
420    }
421
422    #[test]
423    fn ssl_cert_absent_by_default() {
424        let data = make_torrent_bytes_sorted(b"", b"");
425        let meta = torrent_from_bytes(&data).unwrap();
426        assert!(meta.ssl_cert.is_none());
427        assert!(meta.info.ssl_cert.is_none());
428    }
429
430    /// Build a minimal info dict with optional `similar` and `collections` entries,
431    /// wrapped in an outer torrent dict.  Keys are kept in bencode-sorted order.
432    fn make_torrent_with_bep38(similar: Option<&[u8]>, collections: Option<&[u8]>) -> Vec<u8> {
433        let mut info = Vec::new();
434        info.extend_from_slice(b"d");
435        // "collections" < "length" — insert first if present.
436        if let Some(c) = collections {
437            info.extend_from_slice(b"11:collections");
438            info.extend_from_slice(c);
439        }
440        info.extend_from_slice(b"6:lengthi1048576e");
441        info.extend_from_slice(b"4:name4:test");
442        info.extend_from_slice(b"12:piece lengthi262144e");
443        info.extend_from_slice(b"6:pieces20:");
444        info.extend_from_slice(&[0u8; 20]);
445        // "similar" > "pieces" and < "source"/"ssl-cert"
446        if let Some(s) = similar {
447            info.extend_from_slice(b"7:similar");
448            info.extend_from_slice(s);
449        }
450        info.extend_from_slice(b"e");
451
452        let mut torrent = Vec::new();
453        torrent.extend_from_slice(b"d4:info");
454        torrent.extend_from_slice(&info);
455        torrent.extend_from_slice(b"e");
456        torrent
457    }
458
459    #[test]
460    fn parse_similar_torrents_from_info() {
461        let hash_a = [0xAAu8; 20];
462        let hash_b = [0xBBu8; 20];
463
464        // Build bencode list: l20:<hash_a>20:<hash_b>e
465        let mut similar_list = Vec::new();
466        similar_list.extend_from_slice(b"l");
467        similar_list.extend_from_slice(b"20:");
468        similar_list.extend_from_slice(&hash_a);
469        similar_list.extend_from_slice(b"20:");
470        similar_list.extend_from_slice(&hash_b);
471        similar_list.extend_from_slice(b"e");
472
473        let data = make_torrent_with_bep38(Some(&similar_list), None);
474        let meta = torrent_from_bytes(&data).expect("parse should succeed");
475
476        assert_eq!(meta.info.similar.len(), 2);
477        assert_eq!(meta.info.similar[0], Id20(hash_a));
478        assert_eq!(meta.info.similar[1], Id20(hash_b));
479    }
480
481    #[test]
482    fn parse_collections_from_info() {
483        // Build bencode list: l6:movies6:sci-fie
484        let collections_list = b"l6:movies6:sci-fie";
485
486        let data = make_torrent_with_bep38(None, Some(collections_list));
487        let meta = torrent_from_bytes(&data).expect("parse should succeed");
488
489        assert_eq!(meta.info.collections.len(), 2);
490        assert_eq!(meta.info.collections[0], "movies");
491        assert_eq!(meta.info.collections[1], "sci-fi");
492    }
493
494    #[test]
495    fn similar_empty_when_absent() {
496        let data = make_torrent_bytes_sorted(b"", b"");
497        let meta = torrent_from_bytes(&data).expect("parse should succeed");
498        assert!(meta.info.similar.is_empty());
499        assert!(meta.info.collections.is_empty());
500    }
501
502    #[test]
503    fn similar_ignores_wrong_length_hashes() {
504        let valid_hash = [0xCCu8; 20];
505        let too_short = [0xDDu8; 19];
506        let too_long = [0xEEu8; 21];
507
508        // Build bencode list with mixed entries: 19-byte, 20-byte valid, 21-byte
509        let mut similar_list = Vec::new();
510        similar_list.extend_from_slice(b"l");
511        // 19 bytes — invalid
512        similar_list.extend_from_slice(b"19:");
513        similar_list.extend_from_slice(&too_short);
514        // 20 bytes — valid
515        similar_list.extend_from_slice(b"20:");
516        similar_list.extend_from_slice(&valid_hash);
517        // 21 bytes — invalid
518        similar_list.extend_from_slice(b"21:");
519        similar_list.extend_from_slice(&too_long);
520        similar_list.extend_from_slice(b"e");
521
522        let data = make_torrent_with_bep38(Some(&similar_list), None);
523        let meta = torrent_from_bytes(&data).expect("parse should succeed");
524
525        // Only the 20-byte entry survives.
526        assert_eq!(meta.info.similar.len(), 1);
527        assert_eq!(meta.info.similar[0], Id20(valid_hash));
528    }
529}