mbr-markdown-browser 0.4.5

A fast, featureful markdown viewer, browser, and (optional) static site generator
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
//! Tag index module for tracking tagged pages.
//!
//! This module provides a thread-safe index of pages organized by tag source and value.
//! Tags are normalized (lowercase, spaces as underscores) for consistent lookup,
//! while preserving the original display form.

use papaya::HashMap;
use serde::Serialize;
use std::collections::HashSet;

use crate::wikilink::normalize_tag_value;

/// Information about a page tagged with a specific tag.
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct TaggedPage {
    /// URL path to the page (e.g., "/docs/rust-guide/")
    pub url_path: String,
    /// Page title (from frontmatter or filename)
    pub title: String,
    /// Optional page description (from frontmatter)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub description: Option<String>,
    /// The original tag value as it appears on this page (preserves case/spacing)
    pub original_tag_value: String,
}

impl TaggedPage {
    /// Creates a new TaggedPage.
    pub fn new(
        url_path: impl Into<String>,
        title: impl Into<String>,
        original_tag_value: impl Into<String>,
    ) -> Self {
        Self {
            url_path: url_path.into(),
            title: title.into(),
            description: None,
            original_tag_value: original_tag_value.into(),
        }
    }

    /// Creates a new TaggedPage with a description.
    pub fn with_description(
        url_path: impl Into<String>,
        title: impl Into<String>,
        description: impl Into<String>,
        original_tag_value: impl Into<String>,
    ) -> Self {
        Self {
            url_path: url_path.into(),
            title: title.into(),
            description: Some(description.into()),
            original_tag_value: original_tag_value.into(),
        }
    }
}

/// A single tag with its normalized key and display value.
#[derive(Debug, Clone, Serialize, PartialEq, Eq)]
pub struct TagInfo {
    /// Normalized tag value (lowercase, underscores for spaces) - used in URLs
    pub normalized: String,
    /// Display value (first occurrence's original form, e.g., "Joshua Jay")
    pub display: String,
    /// Number of pages with this tag
    pub count: usize,
}

/// Thread-safe index of tagged pages.
///
/// Uses papaya concurrent HashMap for lock-free reads and writes.
/// Keys are normalized (lowercase source, lowercase+underscore value),
/// while display forms are preserved from the first occurrence.
pub struct TagIndex {
    /// Map of (source, tag_value) -> Vec<TaggedPage>
    /// Key format: "{normalized_source}:{normalized_value}"
    index: HashMap<String, Vec<TaggedPage>>,
    /// Map of normalized tag key -> display value (first occurrence wins)
    display_values: HashMap<String, String>,
    /// Set of all sources that have at least one tag
    sources: HashMap<String, String>, // normalized -> display
}

impl Default for TagIndex {
    fn default() -> Self {
        Self::new()
    }
}

impl TagIndex {
    /// Creates a new empty TagIndex.
    pub fn new() -> Self {
        Self {
            index: HashMap::new(),
            display_values: HashMap::new(),
            sources: HashMap::new(),
        }
    }

    /// Normalizes a source name for use as a key.
    pub fn normalize_source(source: &str) -> String {
        source.to_lowercase()
    }

    /// Normalizes a tag value for use as a key.
    pub fn normalize_value(value: &str) -> String {
        normalize_tag_value(value)
    }

    /// Builds a cache key from source and value.
    fn make_key(normalized_source: &str, normalized_value: &str) -> String {
        format!("{}:{}", normalized_source, normalized_value)
    }

    /// Adds a page to the index under the given source and tag value.
    ///
    /// # Arguments
    ///
    /// * `source` - The tag source (e.g., "tags", "performers")
    /// * `value` - The tag value (e.g., "rust", "Joshua Jay")
    /// * `page` - The tagged page information
    pub fn add_page(&self, source: &str, value: &str, page: TaggedPage) {
        let norm_source = Self::normalize_source(source);
        let norm_value = Self::normalize_value(value);
        let key = Self::make_key(&norm_source, &norm_value);

        // Track the source (first occurrence wins for display)
        let sources_guard = self.sources.pin();
        if sources_guard.get(&norm_source).is_none() {
            sources_guard.insert(norm_source.clone(), source.to_string());
        }

        // Track display value (first occurrence wins)
        let display_guard = self.display_values.pin();
        if display_guard.get(&key).is_none() {
            display_guard.insert(key.clone(), value.to_string());
        }

        // Add page to the index atomically to avoid TOCTOU race under parallel insertion.
        let guard = self.index.pin();
        let page_for_insert = page.clone();
        guard.update_or_insert_with(
            key,
            move |existing| {
                // Avoid duplicate pages (same url_path)
                if existing.iter().any(|p| p.url_path == page.url_path) {
                    existing.clone()
                } else {
                    let mut pages = existing.clone();
                    pages.push(page.clone());
                    pages
                }
            },
            || vec![page_for_insert],
        );
    }

    /// Gets all pages tagged with the given source and value.
    ///
    /// Returns an empty vector if no pages have this tag.
    pub fn get_pages(&self, source: &str, value: &str) -> Vec<TaggedPage> {
        let norm_source = Self::normalize_source(source);
        let norm_value = Self::normalize_value(value);
        let key = Self::make_key(&norm_source, &norm_value);

        self.index.pin().get(&key).cloned().unwrap_or_default()
    }

    /// Gets all unique tags for a given source.
    ///
    /// Returns a vector of TagInfo with normalized key, display value, and count.
    pub fn get_all_tags(&self, source: &str) -> Vec<TagInfo> {
        let norm_source = Self::normalize_source(source);
        let prefix = format!("{}:", norm_source);

        let guard = self.index.pin();
        let display_guard = self.display_values.pin();

        let mut tags: Vec<TagInfo> = guard
            .iter()
            .filter(|(k, _)| k.starts_with(&prefix))
            .map(|(key, pages)| {
                let norm_value = key.strip_prefix(&prefix).unwrap_or(key).to_string();
                let display = display_guard
                    .get(key)
                    .cloned()
                    .unwrap_or_else(|| norm_value.clone());
                TagInfo {
                    normalized: norm_value,
                    display,
                    count: pages.len(),
                }
            })
            .collect();

        // Sort by display name (case-insensitive)
        tags.sort_by(|a, b| a.display.to_lowercase().cmp(&b.display.to_lowercase()));

        tags
    }

    /// Gets all sources that have at least one tag.
    ///
    /// Returns a set of normalized source names.
    pub fn get_all_sources(&self) -> HashSet<String> {
        self.sources.pin().iter().map(|(k, _)| k.clone()).collect()
    }

    /// Gets the display name for a source.
    pub fn get_source_display(&self, normalized_source: &str) -> Option<String> {
        self.sources.pin().get(normalized_source).cloned()
    }

    /// Gets the display name for a tag value.
    pub fn get_tag_display(&self, source: &str, value: &str) -> Option<String> {
        let norm_source = Self::normalize_source(source);
        let norm_value = Self::normalize_value(value);
        let key = Self::make_key(&norm_source, &norm_value);

        self.display_values.pin().get(&key).cloned()
    }

    /// Checks if a tag exists in the index.
    pub fn has_tag(&self, source: &str, value: &str) -> bool {
        let norm_source = Self::normalize_source(source);
        let norm_value = Self::normalize_value(value);
        let key = Self::make_key(&norm_source, &norm_value);

        self.index.pin().get(&key).is_some()
    }

    /// Checks if a source has any tags.
    pub fn has_source(&self, source: &str) -> bool {
        let norm_source = Self::normalize_source(source);
        self.sources.pin().get(&norm_source).is_some()
    }

    /// Returns the total number of unique tags across all sources.
    pub fn total_tags(&self) -> usize {
        self.index.pin().len()
    }

    /// Returns the total number of sources.
    pub fn total_sources(&self) -> usize {
        self.sources.pin().len()
    }

    /// Clear all indexed tags and display values.
    ///
    /// Call this when the underlying files have changed and the index
    /// needs to be rebuilt on next scan.
    pub fn clear(&self) {
        self.index.pin().clear();
        self.display_values.pin().clear();
        self.sources.pin().clear();
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_normalize_source() {
        assert_eq!(TagIndex::normalize_source("Tags"), "tags");
        assert_eq!(TagIndex::normalize_source("PERFORMERS"), "performers");
        assert_eq!(TagIndex::normalize_source("taxonomy.tags"), "taxonomy.tags");
    }

    #[test]
    fn test_normalize_value() {
        assert_eq!(TagIndex::normalize_value("rust"), "rust");
        assert_eq!(TagIndex::normalize_value("Rust"), "rust");
        assert_eq!(TagIndex::normalize_value("Joshua Jay"), "joshua_jay");
    }

    #[test]
    fn test_add_and_get_page() {
        let index = TagIndex::new();

        let page = TaggedPage::new("/docs/rust-guide/", "Rust Guide", "rust");
        index.add_page("tags", "rust", page.clone());

        let pages = index.get_pages("tags", "rust");
        assert_eq!(pages.len(), 1);
        assert_eq!(pages[0].url_path, "/docs/rust-guide/");
    }

    #[test]
    fn test_case_insensitive_source() {
        let index = TagIndex::new();

        let page = TaggedPage::new("/page/", "Page", "rust");
        index.add_page("Tags", "rust", page);

        // All these should find the same tag
        assert_eq!(index.get_pages("tags", "rust").len(), 1);
        assert_eq!(index.get_pages("Tags", "rust").len(), 1);
        assert_eq!(index.get_pages("TAGS", "rust").len(), 1);
    }

    #[test]
    fn test_case_insensitive_value() {
        let index = TagIndex::new();

        let page = TaggedPage::new("/page/", "Page", "Rust");
        index.add_page("tags", "Rust", page);

        // All these should find the same tag
        assert_eq!(index.get_pages("tags", "rust").len(), 1);
        assert_eq!(index.get_pages("tags", "Rust").len(), 1);
        assert_eq!(index.get_pages("tags", "RUST").len(), 1);
    }

    #[test]
    fn test_value_with_spaces() {
        let index = TagIndex::new();

        let page = TaggedPage::new("/performer/", "Page", "Joshua Jay");
        index.add_page("performers", "Joshua Jay", page);

        // Can be found by normalized or original form
        assert_eq!(index.get_pages("performers", "Joshua Jay").len(), 1);
        assert_eq!(index.get_pages("performers", "joshua_jay").len(), 1);
        assert_eq!(index.get_pages("performers", "joshua jay").len(), 1);
    }

    #[test]
    fn test_multiple_pages_same_tag() {
        let index = TagIndex::new();

        let page1 = TaggedPage::new("/page1/", "Page 1", "rust");
        let page2 = TaggedPage::new("/page2/", "Page 2", "Rust"); // Different case

        index.add_page("tags", "rust", page1);
        index.add_page("tags", "Rust", page2); // Should go to same tag

        let pages = index.get_pages("tags", "rust");
        assert_eq!(pages.len(), 2);
    }

    #[test]
    fn test_no_duplicate_pages() {
        let index = TagIndex::new();

        let page = TaggedPage::new("/page/", "Page", "rust");

        index.add_page("tags", "rust", page.clone());
        index.add_page("tags", "rust", page.clone()); // Same page, should not duplicate

        let pages = index.get_pages("tags", "rust");
        assert_eq!(pages.len(), 1);
    }

    #[test]
    fn test_get_all_tags() {
        let index = TagIndex::new();

        index.add_page("tags", "rust", TaggedPage::new("/p1/", "P1", "rust"));
        index.add_page("tags", "Python", TaggedPage::new("/p2/", "P2", "Python"));
        index.add_page("tags", "go", TaggedPage::new("/p3/", "P3", "go"));
        index.add_page("tags", "rust", TaggedPage::new("/p4/", "P4", "Rust")); // Another rust page

        let tags = index.get_all_tags("tags");
        assert_eq!(tags.len(), 3);

        // Find the rust tag
        let rust_tag = tags.iter().find(|t| t.normalized == "rust").unwrap();
        assert_eq!(rust_tag.count, 2);
        assert_eq!(rust_tag.display, "rust"); // First occurrence
    }

    #[test]
    fn test_get_all_sources() {
        let index = TagIndex::new();

        index.add_page("tags", "rust", TaggedPage::new("/p1/", "P1", "rust"));
        index.add_page(
            "performers",
            "Joshua Jay",
            TaggedPage::new("/p2/", "P2", "Joshua Jay"),
        );

        let sources = index.get_all_sources();
        assert_eq!(sources.len(), 2);
        assert!(sources.contains("tags"));
        assert!(sources.contains("performers"));
    }

    #[test]
    fn test_source_display() {
        let index = TagIndex::new();

        index.add_page("Tags", "rust", TaggedPage::new("/p/", "P", "rust"));

        let display = index.get_source_display("tags");
        assert_eq!(display, Some("Tags".to_string()));
    }

    #[test]
    fn test_tag_display() {
        let index = TagIndex::new();

        index.add_page(
            "performers",
            "Joshua Jay",
            TaggedPage::new("/p/", "P", "Joshua Jay"),
        );

        let display = index.get_tag_display("performers", "joshua_jay");
        assert_eq!(display, Some("Joshua Jay".to_string()));
    }

    #[test]
    fn test_has_tag() {
        let index = TagIndex::new();

        index.add_page("tags", "rust", TaggedPage::new("/p/", "P", "rust"));

        assert!(index.has_tag("tags", "rust"));
        assert!(index.has_tag("Tags", "Rust")); // Case insensitive
        assert!(!index.has_tag("tags", "python"));
        assert!(!index.has_tag("category", "rust"));
    }

    #[test]
    fn test_has_source() {
        let index = TagIndex::new();

        index.add_page("tags", "rust", TaggedPage::new("/p/", "P", "rust"));

        assert!(index.has_source("tags"));
        assert!(index.has_source("Tags")); // Case insensitive
        assert!(!index.has_source("performers"));
    }

    #[test]
    fn test_totals() {
        let index = TagIndex::new();

        assert_eq!(index.total_tags(), 0);
        assert_eq!(index.total_sources(), 0);

        index.add_page("tags", "rust", TaggedPage::new("/p1/", "P1", "rust"));
        index.add_page("tags", "python", TaggedPage::new("/p2/", "P2", "python"));
        index.add_page(
            "performers",
            "Joshua Jay",
            TaggedPage::new("/p3/", "P3", "Joshua Jay"),
        );

        assert_eq!(index.total_tags(), 3);
        assert_eq!(index.total_sources(), 2);
    }

    #[test]
    fn test_empty_results() {
        let index = TagIndex::new();

        let pages = index.get_pages("tags", "nonexistent");
        assert!(pages.is_empty());

        let tags = index.get_all_tags("nonexistent");
        assert!(tags.is_empty());
    }

    #[test]
    fn test_tagged_page_with_description() {
        let page =
            TaggedPage::with_description("/page/", "Page Title", "This is a description", "rust");

        assert_eq!(page.title, "Page Title");
        assert_eq!(page.description, Some("This is a description".to_string()));
    }
}