ankit_engine/media.rs
1//! Media audit and cleanup operations.
2//!
3//! This module provides workflows for auditing media files
4//! and cleaning up orphaned or missing references.
5
6use crate::Result;
7use ankit::AnkiClient;
8use serde::Serialize;
9use std::collections::HashSet;
10
11/// Result of a media audit.
12#[derive(Debug, Clone, Default, Serialize)]
13pub struct MediaAudit {
14 /// Total number of media files.
15 pub total_files: usize,
16 /// Total size of media files in bytes.
17 pub total_size_bytes: u64,
18 /// Media files not referenced by any note.
19 pub orphaned: Vec<String>,
20 /// Media references in notes that don't exist.
21 pub missing: Vec<MissingMedia>,
22 /// Media files by type.
23 pub by_type: MediaByType,
24}
25
26/// Missing media reference.
27#[derive(Debug, Clone, Serialize)]
28pub struct MissingMedia {
29 /// The note ID referencing this media.
30 pub note_id: i64,
31 /// The missing filename.
32 pub filename: String,
33}
34
35/// Media file counts by type.
36#[derive(Debug, Clone, Default, Serialize)]
37pub struct MediaByType {
38 /// Number of image files.
39 pub images: usize,
40 /// Number of audio files.
41 pub audio: usize,
42 /// Number of video files.
43 pub video: usize,
44 /// Number of other files.
45 pub other: usize,
46}
47
48/// Result of a cleanup operation.
49#[derive(Debug, Clone, Default, Serialize)]
50pub struct CleanupReport {
51 /// Number of files deleted.
52 pub files_deleted: usize,
53 /// Bytes freed.
54 pub bytes_freed: u64,
55 /// Files that failed to delete.
56 pub failed: Vec<String>,
57}
58
59/// Media workflow engine.
60#[derive(Debug)]
61pub struct MediaEngine<'a> {
62 client: &'a AnkiClient,
63}
64
65impl<'a> MediaEngine<'a> {
66 pub(crate) fn new(client: &'a AnkiClient) -> Self {
67 Self { client }
68 }
69
70 /// Audit media files in the collection.
71 ///
72 /// Identifies orphaned files (not referenced by notes) and
73 /// missing references (notes referencing non-existent files).
74 ///
75 /// # Example
76 ///
77 /// ```no_run
78 /// # use ankit_engine::Engine;
79 /// # async fn example() -> ankit_engine::Result<()> {
80 /// let engine = Engine::new();
81 /// let audit = engine.media().audit().await?;
82 /// println!("Found {} orphaned files", audit.orphaned.len());
83 /// println!("Found {} missing references", audit.missing.len());
84 /// # Ok(())
85 /// # }
86 /// ```
87 pub async fn audit(&self) -> Result<MediaAudit> {
88 // Get all media files
89 let all_files = self.client.media().list("*").await?;
90
91 let mut audit = MediaAudit {
92 total_files: all_files.len(),
93 ..Default::default()
94 };
95
96 // Categorize by type
97 for file in &all_files {
98 let lower = file.to_lowercase();
99 if lower.ends_with(".jpg")
100 || lower.ends_with(".jpeg")
101 || lower.ends_with(".png")
102 || lower.ends_with(".gif")
103 || lower.ends_with(".webp")
104 || lower.ends_with(".svg")
105 {
106 audit.by_type.images += 1;
107 } else if lower.ends_with(".mp3")
108 || lower.ends_with(".wav")
109 || lower.ends_with(".ogg")
110 || lower.ends_with(".m4a")
111 || lower.ends_with(".flac")
112 {
113 audit.by_type.audio += 1;
114 } else if lower.ends_with(".mp4")
115 || lower.ends_with(".webm")
116 || lower.ends_with(".mkv")
117 || lower.ends_with(".avi")
118 {
119 audit.by_type.video += 1;
120 } else {
121 audit.by_type.other += 1;
122 }
123 }
124
125 // Get all notes and check for media references
126 let all_notes = self.client.notes().find("*").await?;
127
128 if all_notes.is_empty() {
129 // No notes, all media is orphaned
130 audit.orphaned = all_files;
131 return Ok(audit);
132 }
133
134 // Get note info in batches
135 let mut referenced_files: HashSet<String> = HashSet::new();
136 let batch_size = 100;
137
138 for chunk in all_notes.chunks(batch_size) {
139 let infos = self.client.notes().info(chunk).await?;
140 for info in infos {
141 for field in info.fields.values() {
142 // Extract media references from field content
143 // Matches [sound:filename] and <img src="filename">
144 for filename in extract_media_references(&field.value) {
145 referenced_files.insert(filename);
146 }
147 }
148 }
149 }
150
151 // Find orphaned files
152 let file_set: HashSet<_> = all_files.iter().cloned().collect();
153 audit.orphaned = all_files
154 .iter()
155 .filter(|f| !referenced_files.contains(*f))
156 .cloned()
157 .collect();
158
159 // Find missing references
160 for filename in &referenced_files {
161 if !file_set.contains(filename) {
162 // Find which note references this
163 // For now, just record the filename without the note ID
164 audit.missing.push(MissingMedia {
165 note_id: 0, // Would need to track this during extraction
166 filename: filename.clone(),
167 });
168 }
169 }
170
171 Ok(audit)
172 }
173
174 /// Delete orphaned media files.
175 ///
176 /// # Arguments
177 ///
178 /// * `dry_run` - If true, only report what would be deleted
179 ///
180 /// # Example
181 ///
182 /// ```no_run
183 /// # use ankit_engine::Engine;
184 /// # async fn example() -> ankit_engine::Result<()> {
185 /// let engine = Engine::new();
186 ///
187 /// // Preview what would be deleted
188 /// let preview = engine.media().cleanup_orphaned(true).await?;
189 /// println!("Would delete {} files", preview.files_deleted);
190 ///
191 /// // Actually delete
192 /// let report = engine.media().cleanup_orphaned(false).await?;
193 /// # Ok(())
194 /// # }
195 /// ```
196 pub async fn cleanup_orphaned(&self, dry_run: bool) -> Result<CleanupReport> {
197 let audit = self.audit().await?;
198
199 if dry_run || audit.orphaned.is_empty() {
200 return Ok(CleanupReport {
201 files_deleted: audit.orphaned.len(),
202 ..Default::default()
203 });
204 }
205
206 let mut report = CleanupReport::default();
207
208 for filename in audit.orphaned {
209 match self.client.media().delete(&filename).await {
210 Ok(_) => report.files_deleted += 1,
211 Err(_) => report.failed.push(filename),
212 }
213 }
214
215 Ok(report)
216 }
217
218 /// List media files matching a pattern.
219 ///
220 /// # Arguments
221 ///
222 /// * `pattern` - Glob pattern (e.g., "*.mp3", "image_*")
223 ///
224 /// # Example
225 ///
226 /// ```no_run
227 /// # use ankit_engine::Engine;
228 /// # async fn example() -> ankit_engine::Result<()> {
229 /// let engine = Engine::new();
230 /// let audio_files = engine.media().list("*.mp3").await?;
231 /// # Ok(())
232 /// # }
233 /// ```
234 pub async fn list(&self, pattern: &str) -> Result<Vec<String>> {
235 Ok(self.client.media().list(pattern).await?)
236 }
237}
238
239/// Extract media filenames from HTML field content.
240fn extract_media_references(html: &str) -> Vec<String> {
241 let mut files = Vec::new();
242
243 // Match [sound:filename]
244 let sound_pattern = regex_lite::Regex::new(r"\[sound:([^\]]+)\]").unwrap();
245 for cap in sound_pattern.captures_iter(html) {
246 if let Some(m) = cap.get(1) {
247 files.push(m.as_str().to_string());
248 }
249 }
250
251 // Match <img src="filename">
252 let img_pattern = regex_lite::Regex::new(r#"<img[^>]+src="([^"]+)"[^>]*>"#).unwrap();
253 for cap in img_pattern.captures_iter(html) {
254 if let Some(m) = cap.get(1) {
255 let src = m.as_str();
256 // Skip external URLs
257 if !src.starts_with("http://") && !src.starts_with("https://") {
258 files.push(src.to_string());
259 }
260 }
261 }
262
263 files
264}