1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
use serde::{Deserialize, Serialize};
#[derive(Clone, Debug, Deserialize, Eq, Hash, PartialEq, Serialize)]
#[serde(tag = "type")]
pub enum ChunkSource {
/// Built directly from a file.
/// It's `index`th chunk of `path`.
/// `path` is a relative path.
File {
path: String,
index: usize,
// If the chunk is from a pdf file, it tells which page the chunk is from.
page: Option<usize>,
},
}
impl ChunkSource {
// this value is directly used to hash this instance
pub fn hash_str(&self) -> String {
match self {
ChunkSource::File { path, index, page } => format!(
"{path}{index}{}",
match page {
Some(page) => format!("p{page}"),
None => String::new(),
},
),
}
}
pub fn set_path(&mut self, new_path: String) {
match self {
ChunkSource::File { path, .. } => { *path = new_path; },
}
}
pub fn unwrap_index(&self) -> usize {
match self {
ChunkSource::File { index, .. } => *index,
}
}
pub fn sortable_string(&self) -> String {
match self {
// It doesn't care about page numbers because
// 1. `index` is mandatory but `page` is optional.
// 2. `index` is guaranteed to be unique and sequential,
// while `page` can have arbitrary values (it's up to file readers).
ChunkSource::File { path, index, page: _ } => format!("file: {path}-{index:09}"),
}
}
pub fn render(&self) -> String {
match self {
ChunkSource::File { path, index, page } => format!(
"{} chunk of {path}{}",
// it's 0-base
match index {
0 => String::from("1st"),
1 => String::from("2nd"),
2 => String::from("3rd"),
n => format!("{}th", n + 1),
},
// it's 1-base
match page {
Some(page) => format!(" (page {page})"),
None => String::new(),
},
),
}
}
}