obsidian_parser/vault/
vault_duplicates.rs1use std::collections::HashSet;
4
5use super::Vault;
6use crate::note::Note;
7
8impl<N> Vault<N>
9where
10 N: Note,
11{
12 #[must_use]
20 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(path = %self.path.display(), count_notes = %self.notes.len())))]
21 pub fn get_duplicates_notes_by_name(&self) -> Vec<&N> {
22 #[cfg(feature = "tracing")]
23 tracing::debug!("Get duplicates notes by name...");
24
25 let mut duplicated_notes = Vec::new();
26 let mut viewed = HashSet::new();
27 for note in self.notes() {
28 if let Some(note_name) = note.note_name() {
29 let already_have = !viewed.insert(note_name);
30
31 if already_have {
32 duplicated_notes.push(note);
33 }
34 }
35 }
36
37 #[cfg(feature = "tracing")]
38 tracing::debug!("Found {} duplicated notes", duplicated_notes.len());
39
40 duplicated_notes
41 }
42
43 #[must_use]
54 pub fn have_duplicates_notes_by_name(&self) -> bool {
55 !self.get_duplicates_notes_by_name().is_empty()
56 }
57
58 #[cfg(feature = "digest")]
60 #[cfg_attr(docsrs, doc(cfg(feature = "digest")))]
61 #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(path = %self.path.display(), count_notes = %self.notes.len())))]
62 pub fn get_duplicates_notes_by_content<D>(&self) -> Result<Vec<&N>, N::Error>
63 where
64 D: digest::Digest,
65 {
66 #[cfg(feature = "tracing")]
67 tracing::debug!("Get duplicates notes by content");
68
69 let hashed = {
70 let mut hashed = Vec::with_capacity(self.count_notes());
71 for i in 0..self.count_notes() {
72 let content = self.notes()[i].content()?;
73 let hash = D::digest(content.as_bytes());
74
75 hashed.push(hash);
76 }
77
78 hashed
79 };
80
81 let mut duplicated_notes = Vec::new();
82 let mut viewed = HashSet::new();
83 for (note, hash_content) in self.notes().iter().zip(hashed) {
84 let already_have = !viewed.insert(hash_content);
85
86 if already_have {
87 duplicated_notes.push(note);
88 }
89 }
90
91 #[cfg(feature = "tracing")]
92 tracing::debug!("Found {} duplicated notes", duplicated_notes.len());
93
94 Ok(duplicated_notes)
95 }
96
97 #[cfg(feature = "digest")]
99 #[cfg_attr(docsrs, doc(cfg(feature = "digest")))]
100 pub fn have_duplicates_notes_by_content<D>(&self) -> Result<bool, N::Error>
101 where
102 D: digest::Digest,
103 {
104 Ok(!self.get_duplicates_notes_by_content::<D>()?.is_empty())
105 }
106}
107
108#[cfg(test)]
109mod tests {
110 use crate::{
111 note::{Note, NoteFromFile},
112 prelude::{IteratorVaultBuilder, NoteInMemory, VaultBuilder, VaultOptions},
113 vault::Vault,
114 };
115 use serde::de::DeserializeOwned;
116 use std::{fs::File, io::Write};
117 use tempfile::TempDir;
118
119 fn create_vault_with_diplicates_files<F>() -> (Vault<F>, TempDir)
120 where
121 F: NoteFromFile,
122 F::Error: From<std::io::Error>,
123 F::Properties: DeserializeOwned,
124 {
125 let temp_dir = TempDir::new().unwrap();
126
127 let mut file1 = File::create(&temp_dir.path().join("file.md")).unwrap();
128 file1.write_all(b"same text").unwrap();
129
130 let path_to_duplicate_file = temp_dir.path().join("folder");
131 std::fs::create_dir(&path_to_duplicate_file).unwrap();
132 let mut file2 = File::create(path_to_duplicate_file.join("file.md")).unwrap();
133 file2.write_all(b"same text").unwrap();
134
135 let options = VaultOptions::new(&temp_dir);
136 let vault = VaultBuilder::new(&options)
137 .include_hidden(true)
138 .into_iter()
139 .map(Result::unwrap)
140 .build_vault(&options);
141
142 (vault, temp_dir)
143 }
144
145 fn create_vault_without_diplicates_files<F>() -> (Vault<F>, TempDir)
146 where
147 F: NoteFromFile,
148 F::Error: From<std::io::Error>,
149 F::Properties: DeserializeOwned,
150 {
151 let temp_dir = TempDir::new().unwrap();
152
153 File::create(&temp_dir.path().join("file.md")).unwrap();
154
155 let options = VaultOptions::new(&temp_dir);
156 let vault = VaultBuilder::new(&options)
157 .include_hidden(true)
158 .into_iter()
159 .map(Result::unwrap)
160 .build_vault(&options);
161
162 (vault, temp_dir)
163 }
164
165 #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
166 #[test]
167 fn with_duplicates_notes_by_name() {
168 let (vault, _path) = create_vault_with_diplicates_files::<NoteInMemory>();
169
170 let duplicated_notes: Vec<_> = vault
171 .get_duplicates_notes_by_name()
172 .into_iter()
173 .map(|note| note.note_name().unwrap())
174 .collect();
175
176 assert_eq!(duplicated_notes, ["file".to_string()]);
177 assert!(vault.have_duplicates_notes_by_name());
178 }
179
180 #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
181 #[test]
182 fn without_duplicates_notes_by_name() {
183 let (vault, _path) = create_vault_without_diplicates_files::<NoteInMemory>();
184
185 let duplicated_notes: Vec<_> = vault
186 .get_duplicates_notes_by_name()
187 .into_iter()
188 .map(|note| note.note_name().unwrap())
189 .collect();
190
191 assert_eq!(duplicated_notes.is_empty(), true);
192 assert!(!vault.have_duplicates_notes_by_name());
193 }
194
195 #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
196 #[test]
197 #[cfg(feature = "digest")]
198 fn with_duplicates_notes_by_content() {
199 let (vault, _path) = create_vault_with_diplicates_files::<NoteInMemory>();
200
201 let duplicated_notes: Vec<_> = vault
202 .get_duplicates_notes_by_content::<sha2::Sha256>()
203 .unwrap()
204 .into_iter()
205 .map(|note| note.note_name().unwrap())
206 .collect();
207
208 assert_eq!(duplicated_notes, ["file".to_string()]);
209
210 assert!(
211 vault
212 .have_duplicates_notes_by_content::<sha2::Sha256>()
213 .unwrap()
214 );
215 }
216
217 #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
218 #[test]
219 #[cfg(feature = "digest")]
220 fn without_duplicates_notes_by_content() {
221 let (vault, _path) = create_vault_without_diplicates_files::<NoteInMemory>();
222
223 let duplicated_notes: Vec<_> = vault
224 .get_duplicates_notes_by_content::<sha2::Sha256>()
225 .unwrap()
226 .into_iter()
227 .map(|note| note.note_name().unwrap())
228 .collect();
229
230 assert_eq!(duplicated_notes.is_empty(), true);
231 assert!(
232 !vault
233 .have_duplicates_notes_by_content::<sha2::Sha256>()
234 .unwrap()
235 );
236 }
237}