Skip to main content

irontide_bencode/
span.rs

1use crate::error::{Error, Result};
2
3/// Find the raw byte span of a value for a given key in a bencoded dictionary.
4///
5/// This is critical for info-hash computation: the info-hash is the SHA1 of the
6/// *original raw bytes* of the "info" dictionary value, not a re-serialized copy.
7///
8/// Returns the byte range `start..end` of the value associated with `key`.
9///
10/// # Example
11///
12/// ```
13/// use irontide_bencode::find_dict_key_span;
14///
15/// let data = b"d4:infod4:name4:test12:piece lengthi1024eee";
16/// let span = find_dict_key_span(data, "info").unwrap();
17/// assert_eq!(&data[span.clone()], b"d4:name4:test12:piece lengthi1024ee");
18/// ```
19pub fn find_dict_key_span(data: &[u8], key: &str) -> Result<std::ops::Range<usize>> {
20    let mut pos = 0;
21
22    // Expect dict start
23    if data.get(pos) != Some(&b'd') {
24        return Err(Error::NotADictionary { position: pos });
25    }
26    pos += 1;
27
28    let key_bytes = key.as_bytes();
29
30    loop {
31        // Check for dict end
32        if data.get(pos) == Some(&b'e') {
33            return Err(Error::KeyNotFound {
34                key: key.to_string(),
35            });
36        }
37
38        if pos >= data.len() {
39            return Err(Error::UnexpectedEof {
40                position: pos,
41                context: "while scanning dict for key".into(),
42            });
43        }
44
45        // Parse key (byte string)
46        let parsed_key = parse_byte_string(data, &mut pos)?;
47
48        // Record value start
49        let value_start = pos;
50
51        // Skip value
52        skip_value(data, &mut pos)?;
53
54        // Check if this was our target key
55        if parsed_key == key_bytes {
56            return Ok(value_start..pos);
57        }
58    }
59}
60
61/// Parse a bencode byte string, returning the string data and advancing `pos`.
62fn parse_byte_string<'a>(data: &'a [u8], pos: &mut usize) -> Result<&'a [u8]> {
63    let start = *pos;
64
65    // Find colon
66    let colon = data[*pos..]
67        .iter()
68        .position(|&b| b == b':')
69        .ok_or(Error::InvalidByteString {
70            position: start,
71            detail: "missing ':'".into(),
72        })?;
73
74    let len_str =
75        std::str::from_utf8(&data[*pos..*pos + colon]).map_err(|_| Error::InvalidByteString {
76            position: start,
77            detail: "non-ASCII length".into(),
78        })?;
79
80    let len: usize =
81        len_str
82            .parse()
83            .map_err(|e: std::num::ParseIntError| Error::InvalidByteString {
84                position: start,
85                detail: e.to_string(),
86            })?;
87
88    *pos += colon + 1;
89
90    if *pos + len > data.len() {
91        return Err(Error::UnexpectedEof {
92            position: *pos,
93            context: format!("byte string needs {len} bytes"),
94        });
95    }
96
97    let result = &data[*pos..*pos + len];
98    *pos += len;
99    Ok(result)
100}
101
102/// Skip over a complete bencode value, advancing `pos` past it.
103fn skip_value(data: &[u8], pos: &mut usize) -> Result<()> {
104    match data.get(*pos) {
105        Some(b'i') => {
106            *pos += 1;
107            let end = data[*pos..]
108                .iter()
109                .position(|&b| b == b'e')
110                .ok_or(Error::UnexpectedEof {
111                    position: *pos,
112                    context: "unterminated integer".into(),
113                })?;
114            *pos += end + 1;
115            Ok(())
116        }
117        Some(b'l') => {
118            *pos += 1;
119            while data.get(*pos) != Some(&b'e') {
120                if *pos >= data.len() {
121                    return Err(Error::UnexpectedEof {
122                        position: *pos,
123                        context: "unterminated list".into(),
124                    });
125                }
126                skip_value(data, pos)?;
127            }
128            *pos += 1; // skip 'e'
129            Ok(())
130        }
131        Some(b'd') => {
132            *pos += 1;
133            while data.get(*pos) != Some(&b'e') {
134                if *pos >= data.len() {
135                    return Err(Error::UnexpectedEof {
136                        position: *pos,
137                        context: "unterminated dict".into(),
138                    });
139                }
140                parse_byte_string(data, pos)?; // key
141                skip_value(data, pos)?; // value
142            }
143            *pos += 1; // skip 'e'
144            Ok(())
145        }
146        Some(b'0'..=b'9') => {
147            parse_byte_string(data, pos)?;
148            Ok(())
149        }
150        Some(&byte) => Err(Error::UnexpectedByte {
151            byte,
152            position: *pos,
153            expected: "bencode value",
154        }),
155        None => Err(Error::UnexpectedEof {
156            position: *pos,
157            context: "expected value".into(),
158        }),
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    #[test]
167    fn find_info_key() {
168        let data = b"d4:infod4:name4:test12:piece lengthi1024ee8:url-list4:httpe";
169        let span = find_dict_key_span(data, "info").unwrap();
170        assert_eq!(&data[span], b"d4:name4:test12:piece lengthi1024ee");
171    }
172
173    #[test]
174    fn find_last_key() {
175        let data = b"d1:ai1e1:bi2e1:ci3ee";
176        let span = find_dict_key_span(data, "c").unwrap();
177        assert_eq!(&data[span], b"i3e");
178    }
179
180    #[test]
181    fn key_not_found() {
182        let data = b"d1:ai1ee";
183        assert!(matches!(
184            find_dict_key_span(data, "z"),
185            Err(Error::KeyNotFound { .. })
186        ));
187    }
188
189    #[test]
190    fn not_a_dict() {
191        assert!(matches!(
192            find_dict_key_span(b"i42e", "info"),
193            Err(Error::NotADictionary { .. })
194        ));
195    }
196
197    #[test]
198    fn nested_dict_value() {
199        let data = b"d5:outerd5:inner3:valee";
200        let span = find_dict_key_span(data, "outer").unwrap();
201        assert_eq!(&data[span], b"d5:inner3:vale");
202    }
203}