1mod dict;
2mod end;
3mod int;
4mod list;
5mod node;
6mod stack_frame;
7mod str;
8mod utils;
9mod macros;
10mod commons;
11mod token;
12
13use std::{borrow::Cow, collections::HashMap, sync::Arc};
14
15use commons::limits::{self, BUFFER_MAX_OFFSET, DEFAULT_DEPTH_LIMIT, DEFAULT_TOKEN_LIMIT};
16use stack_frame::{StackFrame, StackFrameBuilder};
17use token::{BdecodeToken, BdecodeTokenType};
18use utils::{check_integer, gen_item_indexes, parse_uint};
19
20pub use {dict::*, end::*, int::*, list::*, node::*, str::*};
21
22use crate::{BdecodeError, BdecodeResult};
23
24#[derive(Clone)]
26pub enum BdecodeNode {
27 Dict(Dict),
28 List(List),
29 Str(Str),
30 Int(Int),
31 End(End),
32}
33
34impl BdecodeNode {
35 pub fn new(
36 token_idx: u32,
37 tokens: Arc<Vec<BdecodeToken>>,
38 buffer: Arc<Vec<u8>>,
39 ) -> BdecodeNode {
40 let token = &tokens[token_idx as usize];
41 let node = match token.node_type() {
42 BdecodeTokenType::Str => {
43 let v = Str::new(buffer, tokens, token_idx);
44 BdecodeNode::Str(v)
45 }
46 BdecodeTokenType::Int => {
47 let v = Int::new(buffer, tokens, token_idx);
48 BdecodeNode::Int(v)
49 }
50 BdecodeTokenType::List => {
51 let (item_indexes, len) = gen_item_indexes(&tokens, token_idx as usize);
52 let v = List::new(buffer, tokens, token_idx, item_indexes, len);
53
54 BdecodeNode::List(v)
55 }
56 BdecodeTokenType::Dict => {
57 let (item_indexes, len) = gen_item_indexes(&tokens, token_idx as usize);
58 let v = Dict::new(buffer, tokens, token_idx, item_indexes, len);
59
60 BdecodeNode::Dict(v)
61 }
62 BdecodeTokenType::End => {
63 let v = End::new(buffer, tokens, token_idx);
64 BdecodeNode::End(v)
65 }
66 };
67 node
68 }
69
70 pub fn as_int(&self) -> BdecodeResult<i64> {
71 let BdecodeNode::Int(inner_node) = self else {
72 panic!("not a Int node")
73 };
74
75 inner_node.value()
76 }
77
78 pub fn as_str(&self) -> Cow<[u8]> {
79 let BdecodeNode::Str(inner_node) = self else {
80 panic!("not a Str node")
81 };
82
83 inner_node.value()
84 }
85
86 pub fn len(&self) -> usize {
87 use BdecodeNode::*;
88
89 match self {
90 List(inner_node) => inner_node.len(),
91 Dict(inner_node) => inner_node.len(),
92 _ => panic!("not a List or Dict node"),
93 }
94 }
95
96 pub fn list_item(&self, index: usize) -> BdecodeNode {
97 let BdecodeNode::List(inner_node) = self else {
98 panic!("not a List node")
99 };
100
101 inner_node.item(index)
102 }
103
104 pub fn list_item_as_int(&self, index: usize) -> BdecodeResult<i64> {
105 let BdecodeNode::List(inner_node) = self else {
106 panic!("not a List node")
107 };
108
109 inner_node.as_int(index)
110 }
111
112 pub fn list_item_as_str(&self, index: usize) -> Cow<[u8]> {
113 let BdecodeNode::List(inner_node) = self else {
114 panic!("not a List node")
115 };
116
117 inner_node.as_str(index)
118 }
119
120 pub fn dict_item(&self, index: usize) -> (BdecodeNode, BdecodeNode) {
121 let BdecodeNode::Dict(inner_node) = self else {
122 panic!("not a Dict node")
123 };
124
125 inner_node.item(index)
126 }
127
128 pub fn dict_find(&self, key: &[u8]) -> Option<BdecodeNode> {
129 let BdecodeNode::Dict(inner_node) = self else {
130 panic!("not a Dict node")
131 };
132
133 inner_node.find(key)
134 }
135
136 pub fn dict_find_as_str(&self, key: &[u8]) -> Option<Cow<[u8]>> {
137 let BdecodeNode::Dict(inner_node) = self else {
138 panic!("not a Dict node")
139 };
140
141 inner_node.find_as_str(key)
142 }
143
144 pub fn dict_find_as_int(&self, key: &[u8]) -> Option<i64> {
145 let BdecodeNode::Dict(inner_node) = self else {
146 panic!("not a Dict node")
147 };
148
149 inner_node.find_as_int(key)
150 }
151
152 pub fn dict_find_as_list(&self, key: &[u8]) -> Option<Vec<BdecodeNode>> {
153 let BdecodeNode::Dict(inner_node) = self else {
154 panic!("not a Dict node")
155 };
156
157 inner_node.find_as_list(key)
158 }
159
160 pub fn dict_find_as_dict(&self, key: &[u8]) -> Option<HashMap<Cow<[u8]>, BdecodeNode>> {
161 let BdecodeNode::Dict(inner_node) = self else {
162 panic!("not a Dict node")
163 };
164
165 inner_node.find_as_dict(key)
166 }
167
168 pub fn parse(
169 buffer: Vec<u8>,
170 depth_limit: Option<usize>,
171 token_limit: Option<i32>,
172 ) -> BdecodeResult<Self> {
173 let depth_limit = depth_limit.unwrap_or(DEFAULT_DEPTH_LIMIT);
174 let mut token_limit = token_limit.unwrap_or(DEFAULT_TOKEN_LIMIT as i32);
175
176 let mut tokens = Vec::<BdecodeToken>::new();
177
178 if buffer.len() > BUFFER_MAX_OFFSET as usize {
179 Err(BdecodeError::LimitExceeded(buffer.len()))?
180 }
181
182 let mut start = 0;
183 let end = buffer.len();
184
185 let mut stack = Vec::<StackFrame>::with_capacity(depth_limit as usize);
188
189 let mut current_frame_ptr: Option<* mut StackFrame> = None;
191
192 if end == 0 {
193 Err(BdecodeError::UnexpectedEof(0))?
194 }
195
196 while start <= end {
197 if stack.len() >= depth_limit as usize {
198 Err(BdecodeError::DepthExceeded(depth_limit as usize))?
199 }
200
201 token_limit -= 1;
202 if token_limit < 0 {
203 Err(BdecodeError::LimitExceeded(DEFAULT_TOKEN_LIMIT as usize))?
204 }
205
206 let Some(t) = buffer.get(start) else {
208 Err(BdecodeError::UnexpectedEof(start))?
209 };
210
211 if let Some(stack_frame_ptr) = current_frame_ptr {
213 let stack_frame = unsafe { *stack_frame_ptr };
214 if tokens[stack_frame.token() as usize].node_type() == BdecodeTokenType::Dict
216 && stack_frame.state() == 0
218 && !t.is_ascii_digit()
220 && *t != b'e'
222 {
223 Err(BdecodeError::ExpectedDigit(start))?
224 }
225 }
226
227 match t {
228 b'd' => {
229 let frame = StackFrameBuilder::new()
230 .with_token(tokens.len() as u32)
231 .build();
232 stack.push(frame);
233 tokens.push(BdecodeToken::new_dict(start as u32, 0));
235
236 start += 1;
237 }
238 b'l' => {
239 let frame = StackFrameBuilder::new()
240 .with_token(tokens.len() as u32)
241 .build();
242 stack.push(frame);
243 tokens.push(BdecodeToken::new_list(start as u32, 0));
245
246 start += 1;
247 }
248 b'i' => {
249 let int_start = start;
250 start = check_integer(buffer.as_ref(), start + 1 as usize)?;
251 tokens.push(BdecodeToken::new_int(int_start as u32));
252
253 assert!(buffer[start] == b'e');
254
255 start += 1;
257 }
258 b'e' => {
259 if stack.is_empty() {
260 return Err(BdecodeError::UnexpectedEof(start));
261 }
262
263 if let Some(stack_frame) = stack.last() {
265 if tokens[stack_frame.token() as usize].node_type() == BdecodeTokenType::Dict
267 && stack_frame.state() == 1
269 {
270 Err(BdecodeError::ExpectedValue(start))?
271 }
272 }
273
274 tokens.push(BdecodeToken::new_end(start as u32));
276
277 let top = stack.last().expect("stack is empty").token() as usize;
281 let next_item = tokens.len() - top;
282
283 if next_item > limits::MAX_NEXT_ITEM {
284 return Err(BdecodeError::LimitExceeded(limits::MAX_NEXT_ITEM));
285 }
286
287 tokens[top].set_next_item(next_item as u32);
289
290 stack.pop();
291 start += 1;
292 }
293 _ => {
295 if !t.is_ascii_digit() {
296 return Err(BdecodeError::ExpectedDigit(start));
297 }
298
299 let mut len = (t - b'0') as i64;
300 let str_start = start;
301 start += 1;
302
303 if start >= end {
304 return Err(BdecodeError::UnexpectedEof(start));
305 }
306
307 start = parse_uint(buffer.as_ref(), start, b':', &mut len)?;
309
310 if start == end {
311 return Err(BdecodeError::ExpectedColon(str_start, end));
312 }
313
314 let buff_size = (end - start - 1) as i64;
316 if len > buff_size {
317 return Err(BdecodeError::UnexpectedEof(start));
318 }
319
320 start += 1;
322 if start > end {
323 return Err(BdecodeError::UnexpectedEof(start));
324 }
325
326 let header_size = start - str_start - 1;
338 if header_size > limits::MAX_HEADER_SIZE {
339 return Err(BdecodeError::LimitExceeded(limits::MAX_HEADER_SIZE));
340 }
341
342 tokens.push(BdecodeToken::new_str(str_start as u32, header_size as u8));
343 start += len as usize;
345 }
346 }
347
348
349 if let Some(stack_frame_prt) = current_frame_ptr {
350 let stack_frame = unsafe { stack_frame_prt.as_mut_unchecked() };
351 if tokens[stack_frame.token() as usize].node_type() == BdecodeTokenType::Dict {
367 let _state = stack_frame.state();
369 stack_frame.set_state(!stack_frame.state());
370 }
371 }
372
373 current_frame_ptr = stack.last_mut().map(|frame_ref| {
375 frame_ref as *mut StackFrame
376 });
377
378 if stack.is_empty() {
380 break;
381 }
382 } tokens.push(BdecodeToken::new_end(start as u32));
386
387 Ok(BdecodeNode::new(0, Arc::new(tokens), Arc::new(buffer)))
388 }
389
390 pub fn parse_buffer(buffer: Vec<u8>) -> BdecodeResult<Self> {
391 Self::parse(buffer, None, None)
392 }
393
394 pub fn to_json(&self) -> String {
395 match self {
396 BdecodeNode::Dict(inner_node) => inner_node.to_json(),
397 BdecodeNode::List(inner_node) => inner_node.to_json(),
398 BdecodeNode::Str(inner_node) => inner_node.to_json(),
399 BdecodeNode::Int(inner_node) => inner_node.to_json(),
400 BdecodeNode::End(inner_node) => inner_node.to_json(),
401 }
402 }
403}
404
405impl core::fmt::Debug for BdecodeNode {
406 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407 match self {
408 BdecodeNode::Dict(inner_node) => {
409 f.debug_struct("Dict")
410 .field("token_idx", &inner_node.token_index)
411 .field("item_indexes", &inner_node.item_indexes)
412 .field("len", &inner_node.len())
413 .field("tokens", &inner_node.tokens)
414 .field("buffer", &bytes::Bytes::copy_from_slice(&inner_node.buffer))
415 .finish()
416 }
417 BdecodeNode::List(inner_node) => {
418 f.debug_struct("List")
419 .field("token_idx", &inner_node.token_index)
420 .field("item_indexes", &inner_node.item_indexes)
421 .field("len", &inner_node.len())
422 .field("tokens", &inner_node.tokens)
423 .field("buffer", &bytes::Bytes::copy_from_slice(&inner_node.buffer))
424 .finish()
425 }
426 BdecodeNode::Str(inner_node) => {
427 f.debug_struct("Str")
428 .field("token_idx", &inner_node.token_index)
429 .field("tokens", &inner_node.tokens)
430 .field("buffer", &bytes::Bytes::copy_from_slice(&inner_node.buffer))
431 .finish()
432 }
433 BdecodeNode::Int(inner_node) => {
434 f.debug_struct("Int")
435 .field("token_idx", &inner_node.token_index)
436 .field("tokens", &inner_node.tokens)
437 .field("buffer", &bytes::Bytes::copy_from_slice(&inner_node.buffer))
438 .finish()
439 }
440 BdecodeNode::End(inner_node) => {
441 f.debug_struct("End")
442 .field("token_idx", &inner_node.token_index)
443 .field("tokens", &inner_node.tokens)
444 .field("buffer", &bytes::Bytes::copy_from_slice(&inner_node.buffer))
445 .finish()
446 }
447 }
448 }
449}
450
451#[cfg(test)]
452mod tests {
453 use super::*;
454
455 #[test]
456 fn test_print() {
457 let buffer = "d 2:k1 2:v1 2:k2 d 2:k3 2:v3 2:k4 i9e e 2:k5 l i7e i8e e 2:k6 2:v6 e".replace(" ", "").into();
459 let node = BdecodeNode::parse_buffer(buffer).unwrap();
460 println!("{}", node.to_json());
461 }
462
463 #[test]
464 fn test_new_bdecode_node() {
465 let buffer = "2:k1";
467 let node = BdecodeNode::parse_buffer(buffer.into()).unwrap();
468 let BdecodeNode::Str(node) = node else {
469 panic!("not a Str node");
470 };
471 assert_eq!(2, node.tokens.len());
472
473 let buffer = "i19e";
475 let node = BdecodeNode::parse_buffer(buffer.into()).unwrap();
476 let BdecodeNode::Int(node) = node else {
477 panic!("not a Int node");
478 };
479 assert_eq!(2, node.tokens.len());
480
481 let buffer = "l i19e 2:ab e".replace(" ", "").into();
483 let node = BdecodeNode::parse_buffer(buffer).unwrap();
484 let BdecodeNode::List(node) = node else {
485 panic!("not a List node");
486 };
487 assert_eq!(5, node.tokens.len());
488 assert_eq!(2, node.len());
489
490 let buffer = "d 1:a 1:b 2:cd 3:foo 4:baro i9e e".replace(" ", "").into();
492 let node = BdecodeNode::parse_buffer(buffer).unwrap();
493 let BdecodeNode::Dict(node) = node else {
494 panic!("not a Dict node");
495 };
496 assert_eq!(node.tokens.len(), 9);
497 assert_eq!(3, node.len());
498
499 let buffer = "d 2:k1 2:v1 2:k2 d 2:k3 2:v3 2:k4 i9e e 2:k5 l i7e i8e e 2:k6 2:v6 e".replace(" ", "").into();
501 let node = BdecodeNode::parse_buffer(buffer).unwrap();
502 let BdecodeNode::Dict(node) = node else {
503 panic!("not a Dict node");
504 };
505 assert_eq!(node.tokens.len(), 19);
506 assert_eq!(4, node.len());
507
508 let buffer = "d 10:k111111111 2:v1 2:k2 d 2:k3 i9e e e".replace(" ", "").into();
510 let node = BdecodeNode::parse_buffer(buffer).unwrap();
511 let BdecodeNode::Dict(node) = node else {
512 panic!("not a Dict node");
513 };
514 assert_eq!(10, node.tokens.len());
515 assert_eq!(2, node.len());
516
517 let buffer = "d 2:k1 l i9e e 2:k2 i2e e".replace(" ", "").into();
530 let node = BdecodeNode::parse_buffer(buffer).unwrap();
531 let BdecodeNode::Dict(node) = node else {
532 panic!("not a Dict node");
533 };
534 assert_eq!(9, node.tokens.len());
535 assert_eq!(2, node.len());
536
537 let buffer = "d 2:k1 d 2:k2 i9e e 2:k3 i3e e".replace(" ", "").into();
551 let node = BdecodeNode::parse_buffer(buffer).unwrap();
552 let BdecodeNode::Dict(node) = node else {
553 panic!("not a Dict node");
554 };
555 assert_eq!(10, node.tokens.len());
556 assert_eq!(2, node.len());
557
558 let buffer = "d 2:k1 d 2:k2 d 2:k3 l i9e e e e 2:k4 1:4 e".replace(" ", "").into();
577 let node = BdecodeNode::parse_buffer(buffer).unwrap();
578 let BdecodeNode::Dict(node) = node else {
579 panic!("not a Dict node");
580 };
581 assert_eq!(15, node.tokens.len());
582 assert_eq!(2, node.len());
583 }
584
585 #[test]
586 fn test_list_at() {
587 let buffer = "l i19e 2:ab d 2:k1 2:v1 2:k2 l i1e i2e e e e".replace(" ", "").into();
589 let node = BdecodeNode::parse_buffer(buffer).unwrap();
590 assert_eq!(19, node.list_item(0).as_int().unwrap());
591 assert_eq!(19, node.list_item_as_int(0).unwrap());
592 assert_eq!(b"ab", node.list_item(1).as_str().as_ref());
593
594 let node_2 = node.list_item(2);
595 assert!(matches!(node_2, BdecodeNode::Dict(_)));
596 assert_eq!(2, node.list_item(2).len());
597
598 assert_eq!(b"ab", node.list_item_as_str(1).as_ref());
599 }
600
601 #[test]
602 fn test_dict_item() {
603 let buffer = "l i19e 2:ab d 2:k1 2:v1 2:k2 l i1e i2e e e e".replace(" ", "").into();
605 let node = BdecodeNode::parse_buffer(buffer).unwrap();
606 assert_eq!(3, node.len());
607
608 let node_2 = node.list_item(2);
609 assert_eq!(2, node_2.len());
610
611 let (key, val) = node_2.dict_item(0);
612 assert_eq!(b"k1", key.as_str().as_ref());
613 assert_eq!(b"v1", val.as_str().as_ref());
614
615 let (key, val) = node_2.dict_item(1);
616 assert_eq!(b"k2", key.as_str().as_ref());
617 let BdecodeNode::List(val) = val else {
618 panic!("not a List node");
619 };
620 assert_eq!(7, val.token_index());
621 assert_eq!(2, val.len());
622 assert_eq!(&vec![8, 9], val.item_indexes.as_ref());
623 }
624
625 #[test]
626 fn test_dict_find() {
627 let buffer = "d 2:k1 2:v1 2:k2 l i1e i2e e 3:k03 i3e 2:k4 d 2:k5 i5e 2:k6 i6e e e".replace(" ", "").into();
629 let node = BdecodeNode::parse_buffer(buffer).unwrap();
630 assert_eq!(4, node.len());
631
632 let val_1 = node.dict_find(b"k1").unwrap();
633 assert_eq!(b"v1", val_1.as_str().as_ref());
634
635 let val_3 = node.dict_find(b"k03").unwrap();
636 assert_eq!(3, val_3.as_int().unwrap());
637
638 let val_2 = node.dict_find(b"k2").unwrap();
639 assert!(matches!(val_2, BdecodeNode::List(_)));
640 let BdecodeNode::List(val_2) = val_2 else {
641 panic!("not a List node");
642 };
643 assert_eq!(4, val_2.token_index());
644 assert_eq!(2, val_2.len());
645 assert_eq!(1, val_2.item(0).as_int().unwrap());
646 assert_eq!(2, val_2.item(1).as_int().unwrap());
647
648 let v1 = node.dict_find_as_str(b"k1");
649 assert_eq!(b"v1", v1.unwrap().as_ref());
650
651 let v03 = node.dict_find_as_int(b"k03");
652 assert_eq!(3, v03.unwrap());
653
654 let v2 = node.dict_find_as_list(b"k2").unwrap();
655 let BdecodeNode::Int(v2_0) = &v2[0] else {
656 panic!("not a Int node");
657 };
658 assert_eq!(5, v2_0.token_index());
659 let BdecodeNode::Int(v2_1) = &v2[1] else {
660 panic!("not a Int node");
661 };
662 assert_eq!(6, v2_1.token_index());
663
664 let v4 = node.dict_find_as_dict(b"k4").unwrap();
665 let v5 = v4.get(b"k5".as_ref()).unwrap();
666 assert_eq!(5, v5.as_int().unwrap());
667 let v6 = v4.get(b"k6".as_ref()).unwrap();
668 assert_eq!(6, v6.as_int().unwrap());
669 }
670
671 #[test]
672 #[should_panic(expected = "index out of range")]
673 fn test_panic_list_at() {
674 let buffer = "l i19e 2:ab 2:cd 2:ef e".replace(" ", "").into();
676 let node = BdecodeNode::parse_buffer(buffer).unwrap();
677 let _ = node.list_item(4);
678 }
679
680 #[test]
681 fn test_string_value() {
682 let buffer = "11:k1000000012".into();
683 let node = BdecodeNode::parse_buffer(buffer).unwrap();
684 assert_eq!(node.as_str().as_ref(), b"k1000000012");
685 }
686
687 #[test]
688 fn test_int_value() {
689 let buffer = "i19e".into();
690 let node = BdecodeNode::parse_buffer(buffer).unwrap();
691 assert_eq!(node.as_int().unwrap(), 19);
692 }
693
694 #[test]
695 fn test_node_type() {
696 let buffer = "2:k1".into();
697 let node = BdecodeNode::parse_buffer(buffer).unwrap();
698 assert!(matches!(node, BdecodeNode::Str(_)))
699 }
700}