terraphim_automata 1.16.34

Automata for searching and processing knowledge graphs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
//! Fast text matching and autocomplete engine for knowledge graphs.
//!
//! `terraphim_automata` provides high-performance text processing using Aho-Corasick
//! automata and finite state transducers (FST). It powers Terraphim's autocomplete
//! and knowledge graph linking features.
//!
//! # Features
//!
//! - **Fast Autocomplete**: Prefix-based search with fuzzy matching (Levenshtein/Jaro-Winkler)
//! - **Text Matching**: Find and replace terms using Aho-Corasick automata
//! - **Link Generation**: Convert matched terms to Markdown, HTML, or Wiki links
//! - **Paragraph Extraction**: Extract context around matched terms
//! - **WASM Support**: Browser-compatible autocomplete with TypeScript bindings
//! - **Remote Loading**: Async loading of thesaurus files from HTTP (feature-gated)
//!
//! # Architecture
//!
//! - **Autocomplete Index**: FST-based prefix search with metadata
//! - **Aho-Corasick Matcher**: Multi-pattern matching for link generation
//! - **Thesaurus Builder**: Parse knowledge graphs from JSON/Markdown
//!
//! # Cargo Features
//!
//! - `remote-loading`: Enable async HTTP loading of thesaurus files (requires tokio)
//! - `tokio-runtime`: Add tokio runtime support
//! - `typescript`: Generate TypeScript definitions via tsify
//! - `wasm`: Enable WebAssembly compilation
//!
//! # Examples
//!
//! ## Autocomplete with Fuzzy Matching
//!
//! ```rust
//! use terraphim_automata::{build_autocomplete_index, fuzzy_autocomplete_search};
//! use terraphim_types::{Thesaurus, NormalizedTermValue, NormalizedTerm};
//!
//! // Create a simple thesaurus
//! let mut thesaurus = Thesaurus::new("programming".to_string());
//! thesaurus.insert(
//!     NormalizedTermValue::from("rust"),
//!     NormalizedTerm::new(1, NormalizedTermValue::from("rust"))
//! );
//! thesaurus.insert(
//!     NormalizedTermValue::from("rust async"),
//!     NormalizedTerm::new(2, NormalizedTermValue::from("rust async"))
//! );
//!
//! // Build autocomplete index
//! let index = build_autocomplete_index(thesaurus, None).unwrap();
//!
//! // Fuzzy search (returns Result)
//! let results = fuzzy_autocomplete_search(&index, "rast", 0.8, Some(5)).unwrap();
//! assert!(!results.is_empty());
//! ```
//!
//! ## Text Matching and Link Generation
//!
//! ```rust
//! use terraphim_automata::{load_thesaurus_from_json, replace_matches, LinkType};
//!
//! let json = r#"{
//!   "name": "test",
//!   "data": {
//!     "rust": {
//!       "id": 1,
//!       "nterm": "rust programming",
//!       "url": "https://rust-lang.org"
//!     }
//!   }
//! }"#;
//!
//! let thesaurus = load_thesaurus_from_json(json).unwrap();
//! let text = "I love rust!";
//!
//! // Replace matches with Markdown links
//! let linked = replace_matches(text, thesaurus, LinkType::MarkdownLinks).unwrap();
//! let result = String::from_utf8(linked).unwrap();
//! println!("{}", result); // "I love [rust](https://rust-lang.org)!"
//! ```
//!
//! ## Loading Thesaurus Files
//!
//! ```no_run
//! use terraphim_automata::{AutomataPath, load_thesaurus};
//!
//! # #[cfg(feature = "remote-loading")]
//! # async fn example() {
//! // Load from local file
//! let local_path = AutomataPath::from_local("thesaurus.json");
//! let thesaurus = load_thesaurus(&local_path).await.unwrap();
//!
//! // Load from remote URL (requires 'remote-loading' feature)
//! let remote_path = AutomataPath::from_remote("https://example.com/thesaurus.json").unwrap();
//! let thesaurus = load_thesaurus(&remote_path).await.unwrap();
//! # }
//! ```
//!
//! # WASM Support
//!
//! Build for WebAssembly:
//!
//! ```bash
//! wasm-pack build --target web --features wasm
//! ```
//!
//! See the [WASM package](wasm/) for browser usage.

pub use self::builder::{Logseq, ThesaurusBuilder};
pub mod autocomplete;
pub mod builder;
pub mod markdown_directives;
pub mod matcher;
pub mod url_protector;

// Medical entity extraction modules (SNOMED CT and UMLS)
#[cfg(feature = "medical")]
pub mod medical_artifact;
#[cfg(feature = "medical")]
pub mod medical_extractor;
#[cfg(feature = "medical")]
pub mod sharded_extractor;
#[cfg(feature = "medical")]
pub mod snomed;
#[cfg(feature = "medical")]
pub mod umls;
#[cfg(feature = "medical")]
pub mod umls_extractor;

pub use autocomplete::{
    AutocompleteConfig, AutocompleteIndex, AutocompleteMetadata, AutocompleteResult,
    autocomplete_search, build_autocomplete_index, deserialize_autocomplete_index,
    fuzzy_autocomplete_search, fuzzy_autocomplete_search_levenshtein, serialize_autocomplete_index,
};
pub use markdown_directives::{
    MarkdownDirectiveWarning, MarkdownDirectivesParseResult, parse_markdown_directives_dir,
};
pub use matcher::{
    LinkType, Matched, extract_paragraphs_from_automata, find_matches, replace_matches,
};

// Medical entity extraction re-exports
#[cfg(feature = "medical")]
pub use medical_extractor::{EntityExtractor, ExtractedEntity};
#[cfg(feature = "medical")]
pub use sharded_extractor::ShardedUmlsExtractor;
#[cfg(feature = "medical")]
pub use snomed::{SemanticType, SnomedConcept, SnomedMatch};
#[cfg(feature = "medical")]
pub use umls::{UmlsConcept, UmlsDataset, UmlsStats};
#[cfg(feature = "medical")]
pub use umls_extractor::{UmlsExtractor, UmlsExtractorStats, UmlsMatch};

// Re-export helpers for metadata iteration to support graph-embedding expansions in consumers
pub mod autocomplete_helpers {
    use super::autocomplete::{AutocompleteIndex, AutocompleteMetadata};
    pub fn iter_metadata(
        index: &AutocompleteIndex,
    ) -> impl Iterator<Item = (&str, &AutocompleteMetadata)> {
        index.metadata_iter()
    }
    pub fn get_metadata<'a>(
        index: &'a AutocompleteIndex,
        term: &str,
    ) -> Option<&'a AutocompleteMetadata> {
        index.metadata_get(term)
    }
}

#[cfg(feature = "remote-loading")]
pub use autocomplete::load_autocomplete_index;
use std::collections::HashMap;
use std::fmt::Display;
use std::fs;
use std::path::PathBuf;

use serde::{Deserialize, Serialize};

#[cfg(feature = "typescript")]
use tsify::Tsify;

use terraphim_types::{NormalizedTerm, NormalizedTermValue, Thesaurus};

/// Errors that can occur when working with automata and thesaurus operations.
#[derive(thiserror::Error, Debug)]
pub enum TerraphimAutomataError {
    /// Invalid thesaurus format or structure
    #[error("Invalid thesaurus: {0}")]
    InvalidThesaurus(String),

    /// JSON serialization/deserialization error
    #[error("Serde deserialization error: {0}")]
    Serde(#[from] serde_json::Error),

    /// Dictionary-related error
    #[error("Dict error: {0}")]
    Dict(String),

    /// File I/O error
    #[error("IO error: {0}")]
    Io(#[from] std::io::Error),

    /// Aho-Corasick automata construction error
    #[error("Aho-Corasick build error: {0}")]
    AhoCorasick(#[from] aho_corasick::BuildError),

    /// Finite state transducer (FST) error
    #[error("FST error: {0}")]
    Fst(#[from] fst::Error),
}

/// Result type alias using `TerraphimAutomataError`.
pub type Result<T> = std::result::Result<T, TerraphimAutomataError>;

/// Path to a thesaurus/automata file, either local or remote.
///
/// Supports loading thesaurus files from local filesystem or HTTP URLs.
/// Remote loading requires the `remote-loading` feature to be enabled.
///
/// # Examples
///
/// ```
/// use terraphim_automata::AutomataPath;
///
/// // Local file path
/// let local = AutomataPath::from_local("thesaurus.json");
///
/// // Remote URL (requires 'remote-loading' feature)
/// let remote = AutomataPath::from_remote("https://example.com/thesaurus.json").unwrap();
/// ```
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[cfg_attr(feature = "typescript", derive(Tsify))]
#[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))]
pub enum AutomataPath {
    /// Local filesystem path
    Local(PathBuf),
    /// Remote HTTP/HTTPS URL
    Remote(String),
}

impl Display for AutomataPath {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        match self {
            AutomataPath::Local(path) => write!(f, "Local Path: {:?}", path),
            AutomataPath::Remote(url) => write!(f, "Remote URL: {:?}", url),
        }
    }
}

impl AutomataPath {
    /// Create a new AutomataPath from a URL
    pub fn from_remote(url: &str) -> Result<Self> {
        if !url.starts_with("http://") && !url.starts_with("https://") {
            return Err(TerraphimAutomataError::Dict(format!(
                "Invalid URL scheme. Only `http` and `https` are supported right now. Got {}",
                url
            )));
        }

        Ok(AutomataPath::Remote(url.to_string()))
    }

    /// Create a new AutomataPath from a file
    pub fn from_local<P: AsRef<std::path::Path>>(file: P) -> Self {
        AutomataPath::Local(file.as_ref().to_path_buf())
    }

    /// Local example for testing
    pub fn local_example() -> Self {
        log::debug!("Current folder {:?}", std::env::current_dir());
        let cwd = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));
        let simple_path = if cwd.ends_with("terraphim_automata")
            || cwd.ends_with("terraphim_kg_orchestration")
            || cwd.ends_with("terraphim_task_decomposition")
            || cwd.ends_with("terraphim_kg_agents")
            || cwd.ends_with("terraphim_agent_registry")
        {
            "../../test-fixtures/term_to_id_simple.json"
        } else if cwd.file_name().is_some_and(|name| name == "terraphim-ai") {
            "test-fixtures/term_to_id_simple.json"
        } else {
            "data/term_to_id_simple.json" // fallback to old path
        };
        AutomataPath::from_local(simple_path)
    }
    /// Full Local example for testing
    pub fn local_example_full() -> Self {
        let cwd = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from("."));

        // Try multiple possible paths for the test fixtures
        let possible_paths = [
            "test-fixtures/term_to_id.json",       // from workspace root
            "../../test-fixtures/term_to_id.json", // from crate dir
            "../test-fixtures/term_to_id.json",    // from crates/ dir
            "data/term_to_id.json",                // legacy fallback
        ];

        let full_path = possible_paths
            .iter()
            .find(|path| cwd.join(path).exists())
            .unwrap_or(&"test-fixtures/term_to_id.json");

        AutomataPath::from_local(full_path)
    }

    /// Create a sample remote AutomataPath for testing
    pub fn remote_example() -> Self {
        AutomataPath::from_remote("https://staging-storage.terraphim.io/thesaurus_Default.json")
            .unwrap()
    }
}

/// Load thesaurus from JSON string (sync version for WASM compatibility)
pub fn load_thesaurus_from_json(json_str: &str) -> Result<Thesaurus> {
    let thesaurus: Thesaurus = serde_json::from_str(json_str)?;
    Ok(thesaurus)
}

/// Load thesaurus from JSON string and replace terms using streaming matcher
pub fn load_thesaurus_from_json_and_replace(
    json_str: &str,
    content: &str,
    link_type: LinkType,
) -> Result<Vec<u8>> {
    let thesaurus = load_thesaurus_from_json(json_str)?;
    let replaced = replace_matches(content, thesaurus, link_type)?;
    Ok(replaced)
}

/// Load thesaurus from JSON string (async version for compatibility)
#[cfg(feature = "remote-loading")]
pub async fn load_thesaurus_from_json_async(json_str: &str) -> Result<Thesaurus> {
    load_thesaurus_from_json(json_str)
}

/// Load thesaurus from JSON string and replace terms using streaming matcher (async version)
#[cfg(feature = "remote-loading")]
pub async fn load_thesaurus_from_json_and_replace_async(
    json_str: &str,
    content: &str,
    link_type: LinkType,
) -> Result<Vec<u8>> {
    load_thesaurus_from_json_and_replace(json_str, content, link_type)
}

/// Parse thesaurus JSON supporting both new and legacy formats.
///
/// New format: `{"name": "...", "data": {"term": {"id": N, "nterm": "..."}, ...}}`
/// Legacy format: `{"term": {"id": N, "nterm": "..."}, ...}` (flat map)
fn parse_thesaurus_json(contents: &str) -> Result<Thesaurus> {
    #[derive(Deserialize)]
    struct ThesaurusFormat {
        name: String,
        data: HashMap<String, NormalizedTerm>,
    }

    #[derive(Deserialize)]
    #[allow(dead_code)]
    struct LegacyTerm {
        #[allow(dead_code)]
        id: u64,
        nterm: String,
        #[serde(default)]
        display_value: Option<String>,
        #[serde(default)]
        url: Option<String>,
    }

    // Try new format first
    match serde_json::from_str::<ThesaurusFormat>(contents) {
        Ok(parsed) => {
            log::debug!("Parsed thesaurus in new format with name: {}", parsed.name);
            let mut thesaurus = Thesaurus::new(parsed.name);
            for (key, term) in parsed.data {
                thesaurus.insert(NormalizedTermValue::from(key.as_str()), term);
            }
            return Ok(thesaurus);
        }
        Err(e) => {
            log::debug!(
                "Failed to parse as new Thesaurus format: {}, trying legacy format",
                e
            );
        }
    }

    // Try legacy format (flat map of terms)
    match serde_json::from_str::<HashMap<String, LegacyTerm>>(contents) {
        Ok(legacy) => {
            log::info!(
                "Parsed thesaurus in legacy flat format with {} terms",
                legacy.len()
            );
            let mut thesaurus = Thesaurus::new("imported".to_string());
            for (key, term) in legacy {
                let normalized =
                    NormalizedTerm::with_auto_id(NormalizedTermValue::from(key.as_str()))
                        .with_display_value(
                            term.display_value.unwrap_or_else(|| term.nterm.clone()),
                        )
                        .with_url(term.url.unwrap_or_default());
                thesaurus.insert(NormalizedTermValue::from(key.as_str()), normalized);
            }
            return Ok(thesaurus);
        }
        Err(e) => {
            log::warn!("Failed to parse thesaurus JSON in either format: {}", e);
        }
    }

    Err(TerraphimAutomataError::InvalidThesaurus(
        "Could not parse thesaurus JSON in either new or legacy format".to_string(),
    ))
}

/// Load a thesaurus from a file or URL
///
/// Note: Remote loading requires the "remote-loading" feature to be enabled.
#[cfg(feature = "remote-loading")]
pub async fn load_thesaurus(automata_path: &AutomataPath) -> Result<Thesaurus> {
    async fn read_url(url: String) -> Result<String> {
        log::debug!("Reading thesaurus from remote: {url}");
        let response = reqwest::Client::builder()
            .timeout(std::time::Duration::from_secs(30))
            .user_agent("Terraphim-Automata/1.0")
            .build()
            .unwrap_or_else(|_| reqwest::Client::new())
            .get(url.clone())
            .header("Accept", "application/json")
            .send()
            .await
            .map_err(|e| {
                TerraphimAutomataError::InvalidThesaurus(format!(
                    "Failed to fetch thesaurus from remote {url}. Error: {e:#?}",
                ))
            })?;

        let status = response.status();
        let headers = response.headers().clone();
        let body = response.text().await;

        match body {
            Ok(text) => Ok(text),
            Err(e) => {
                let error_info = format!(
                    "Failed to read thesaurus from remote {url}. Status: {status}. Headers: {headers:#?}. Error: {e:#?}",
                );
                Err(TerraphimAutomataError::InvalidThesaurus(error_info))
            }
        }
    }

    let contents = match automata_path {
        AutomataPath::Local(path) => {
            if !std::path::Path::new(path).exists() {
                return Err(TerraphimAutomataError::InvalidThesaurus(format!(
                    "Thesaurus file not found: {}",
                    path.display()
                )));
            }
            fs::read_to_string(path)?
        }
        AutomataPath::Remote(url) => read_url(url.clone()).await?,
    };

    parse_thesaurus_json(&contents)
}

/// Load a thesaurus from a local file only (WASM-compatible version)
///
/// This version only supports local file loading and doesn't require async runtime.
#[cfg(not(feature = "remote-loading"))]
pub fn load_thesaurus(automata_path: &AutomataPath) -> Result<Thesaurus> {
    let contents = match automata_path {
        AutomataPath::Local(path) => fs::read_to_string(path)?,
        AutomataPath::Remote(_) => {
            return Err(TerraphimAutomataError::InvalidThesaurus(
                "Remote loading is not supported. Enable the 'remote-loading' feature.".to_string(),
            ));
        }
    };

    parse_thesaurus_json(&contents)
}

#[cfg(test)]
mod tests {
    use terraphim_types::NormalizedTermValue;

    use super::*;

    #[cfg(feature = "remote-loading")]
    #[tokio::test]
    async fn test_load_thesaurus_from_file() {
        let automata_path = AutomataPath::local_example();
        let thesaurus = load_thesaurus(&automata_path).await.unwrap();
        assert_eq!(thesaurus.len(), 3);
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("foo")).unwrap().id,
            1u64
        );
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("bar")).unwrap().id,
            2u64
        );
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("baz")).unwrap().id,
            1u64
        );
    }

    #[cfg(feature = "remote-loading")]
    #[tokio::test]
    #[ignore]
    async fn test_load_thesaurus_from_url() {
        let automata_path = AutomataPath::remote_example();
        let thesaurus = load_thesaurus(&automata_path).await.unwrap();
        assert_eq!(thesaurus.len(), 1725);
        assert_eq!(
            thesaurus
                .get(&NormalizedTermValue::from("@risk a user guide"))
                .unwrap()
                .id,
            661u64
        );
    }

    #[cfg(not(feature = "remote-loading"))]
    #[test]
    fn test_load_thesaurus_from_file_sync() {
        let automata_path = AutomataPath::local_example();
        let thesaurus = load_thesaurus(&automata_path).unwrap();
        assert_eq!(thesaurus.len(), 3);
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("foo")).unwrap().id,
            1
        );
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("bar")).unwrap().id,
            2
        );
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("baz")).unwrap().id,
            1
        );
    }

    #[cfg(feature = "remote-loading")]
    #[tokio::test]
    async fn test_load_thesaurus_from_file_async() {
        let automata_path = AutomataPath::local_example();
        let thesaurus = load_thesaurus(&automata_path).await.unwrap();
        assert_eq!(thesaurus.len(), 3);
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("foo")).unwrap().id,
            1
        );
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("bar")).unwrap().id,
            2
        );
        assert_eq!(
            thesaurus.get(&NormalizedTermValue::from("baz")).unwrap().id,
            1
        );
    }

    #[test]
    fn test_load_thesaurus_from_json() {
        let json_str = r#"
{
  "name": "Engineering",
  "data": {
    "project management framework tailoring": {
      "id": 1,
      "nterm": "project tailoring strategy",
      "url": "https://example.com/project-tailoring-strategy"
    },
    "strategy documents": {
      "id": 2,
      "nterm": "strategy documents",
      "url": "https://example.com/strategy-documents"
    },
    "project constraints": {
      "id": 3,
      "nterm": "project constraints",
      "url": "https://example.com/project-constraints"
    }
  }
}"#;

        let thesaurus = load_thesaurus_from_json(json_str).unwrap();
        assert_eq!(thesaurus.len(), 3);
        assert_eq!(
            thesaurus
                .get(&NormalizedTermValue::from(
                    "project management framework tailoring"
                ))
                .unwrap()
                .id,
            1
        );
        assert_eq!(
            thesaurus
                .get(&NormalizedTermValue::from("strategy documents"))
                .unwrap()
                .id,
            2
        );
        assert_eq!(
            thesaurus
                .get(&NormalizedTermValue::from("project constraints"))
                .unwrap()
                .id,
            3
        );
        assert_eq!(
            thesaurus
                .get(&NormalizedTermValue::from(
                    "project management framework tailoring"
                ))
                .unwrap()
                .url,
            Some("https://example.com/project-tailoring-strategy".to_string())
        );
        assert_eq!(
            thesaurus
                .get(&NormalizedTermValue::from("strategy documents"))
                .unwrap()
                .url,
            Some("https://example.com/strategy-documents".to_string())
        );
    }

    #[test]
    fn test_load_thesaurus_from_json_and_replace() {
        let json_str = r#"
{
  "name": "Engineering",
  "data": {
    "project management framework tailoring": {
      "id": 1,
      "nterm": "project tailoring strategy",
      "url": "https://example.com/project-tailoring-strategy"
    },
    "strategy documents": {
      "id": 2,
      "nterm": "strategy documents",
      "url": "https://example.com/strategy-documents"
    },
    "project constraints": {
      "id": 3,
      "nterm": "project constraints",
      "url": "https://example.com/project-constraints"
    }
  }
}"#;

        let content = "I like project constraints and strategy documents.";
        let replaced =
            load_thesaurus_from_json_and_replace(json_str, content, LinkType::MarkdownLinks)
                .unwrap();
        let replaced_str = String::from_utf8(replaced).unwrap();
        assert_eq!(
            replaced_str,
            "I like [project constraints](https://example.com/project-constraints) and [strategy documents](https://example.com/strategy-documents)."
        );

        // Test HTMLLinks
        let replaced =
            load_thesaurus_from_json_and_replace(json_str, content, LinkType::HTMLLinks).unwrap();
        let replaced_str = String::from_utf8(replaced).unwrap();
        assert_eq!(
            replaced_str,
            "I like <a href=\"https://example.com/project-constraints\">project constraints</a> and <a href=\"https://example.com/strategy-documents\">strategy documents</a>."
        );

        // Test WikiLinks
        let replaced =
            load_thesaurus_from_json_and_replace(json_str, content, LinkType::WikiLinks).unwrap();
        let replaced_str = String::from_utf8(replaced).unwrap();
        assert_eq!(
            replaced_str,
            "I like [[project constraints]] and [[strategy documents]]."
        );
    }

    #[test]
    fn test_load_thesaurus_from_json_invalid() {
        let invalid_json = "{invalid_json}";
        let result = load_thesaurus_from_json(invalid_json);
        assert!(result.is_err());
    }

    #[test]
    fn test_from_remote_accepts_https() {
        let result = AutomataPath::from_remote("https://example.com/thesaurus.json");
        assert!(result.is_ok());
        match result.unwrap() {
            AutomataPath::Remote(url) => {
                assert_eq!(url, "https://example.com/thesaurus.json");
            }
            AutomataPath::Local(_) => panic!("Expected Remote variant"),
        }
    }

    #[test]
    fn test_from_remote_accepts_http() {
        let result = AutomataPath::from_remote("http://example.com/thesaurus.json");
        assert!(result.is_ok());
        match result.unwrap() {
            AutomataPath::Remote(url) => {
                assert_eq!(url, "http://example.com/thesaurus.json");
            }
            AutomataPath::Local(_) => panic!("Expected Remote variant"),
        }
    }

    #[test]
    fn test_from_remote_rejects_ftp() {
        let result = AutomataPath::from_remote("ftp://example.com/thesaurus.json");
        assert!(result.is_err());
    }

    #[test]
    fn test_from_remote_rejects_file_path() {
        let result = AutomataPath::from_remote("/tmp/thesaurus.json");
        assert!(result.is_err());
    }

    #[test]
    fn test_from_remote_rejects_empty() {
        let result = AutomataPath::from_remote("");
        assert!(result.is_err());
    }
}