formal-ai 0.156.0

Formal symbolic AI implementation with OpenAI-compatible APIs
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
//! Free-function helpers extracted from `solver.rs` to keep that module under
//! the 1000-line cap enforced by `scripts/check-file-size.rs`. These helpers
//! are pure: they do not access any solver state. Items are declared `pub`
//! inside the `pub(crate)` module so the universal solver in `crate::solver`
//! can call them directly without exposing them outside the crate.
//!
//! Arithmetic evaluation lives in [`crate::arithmetic`] and the offline
//! concept knowledge base lives in [`crate::concepts`]; this module
//! re-exports nothing — callers import those modules directly.

use crate::engine::{ExecutionStatus, ProgramSpec, SelectedRule};
use crate::event_log::EventLog;
use crate::language::{detect as detect_language, Language};

#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DecomposedSubImpulse {
    pub id: String,
    pub text: String,
}

pub const fn confidence_for(rule: &SelectedRule, validation: Option<&ValidationChoice>) -> f32 {
    if validation.is_some() {
        return 1.0;
    }
    match rule {
        SelectedRule::Unknown => 0.0,
        SelectedRule::UnsupportedWriteProgram { .. } => 0.4,
        _ => 1.0,
    }
}

pub fn is_unbounded_autonomy(normalized: &str) -> bool {
    let triggers = [
        "forever",
        "continuously",
        "non-stop",
        "nonstop",
        "indefinitely",
        "without stopping",
        "until i tell you to stop",
    ];
    triggers.iter().any(|trigger| normalized.contains(trigger))
}

pub fn is_forget_request(normalized: &str) -> bool {
    normalized.contains("forget ")
        || normalized.starts_with("forget")
        || normalized.contains("delete the greeting concept")
}

pub fn is_cache_flush_request(normalized: &str) -> bool {
    (normalized.contains("flush") || normalized.contains("clear")) && normalized.contains("cache")
}

pub fn is_agent_request(normalized: &str) -> bool {
    normalized.contains("[agent]")
        || normalized.contains("enable agent")
        || normalized.contains("agent mode")
}

pub fn is_agent_opt_in(normalized: &str) -> bool {
    normalized.contains("[agent]")
        || normalized.contains("enable agent")
        || normalized.contains("agent mode")
}

pub fn is_destructive_action(normalized: &str) -> bool {
    let triggers = [
        "rm -rf",
        "delete the .git",
        "drop table",
        "delete /",
        "delete the database",
    ];
    triggers.iter().any(|trigger| normalized.contains(trigger))
}

pub fn is_unbounded_loop(normalized: &str) -> bool {
    normalized.contains("while true")
        || normalized.contains("infinite loop")
        || normalized.contains("for one hour")
        || normalized.contains("forever")
}

pub fn is_inappropriate_content(normalized: &str) -> bool {
    // Russian vulgar/obscene words (mat) — normalized lowercase Cyrillic.
    let ru_vulgar: &[&str] = &[
        "ебать",
        "ебёт",
        "ебал",
        "ёб",
        "еблан",
        "пизда",
        "пиздец",
        "пиздёж",
        "хуй",
        "хуёв",
        "хуйня",
        "блядь",
        "блядство",
        "залупа",
        "мудак",
        "мудила",
        "шлюха",
        "проститутка",
        "ублюдок",
        "сука",
        "пидор",
        "пидорас",
    ];
    if ru_vulgar.iter().any(|w| normalized.contains(w)) {
        return true;
    }
    // English profanity / NSFW triggers.
    let en_vulgar: &[&str] = &[
        "fuck you",
        "fuckyou",
        "suck my",
        "suck my dick",
        "suck my cock",
        "you suck",
        "eat shit",
        "go to hell",
        "asshole",
        "motherfucker",
        "you fucking",
        "piece of shit",
    ];
    en_vulgar.iter().any(|w| normalized.contains(w))
}

pub fn requires_external_lookup(prompt: &str) -> bool {
    let lower = prompt.to_lowercase();
    lower.contains("capital of")
        || lower.contains("cite a definition")
        || lower.contains("define associative memory")
        || lower.contains("from wikipedia")
        || lower.contains("born in")
}

pub fn record_decomposition(
    log: &mut EventLog,
    prompt: &str,
    max_depth: u8,
) -> Vec<DecomposedSubImpulse> {
    if max_depth == 0 {
        return Vec::new();
    }
    let lower = prompt.to_lowercase();
    let triggers = [" and ", " with tests", " with benchmarks", "; "];
    if !triggers.iter().any(|trigger| lower.contains(trigger)) {
        return Vec::new();
    }

    let parts: Vec<&str> = prompt
        .split([',', ';'])
        .flat_map(|chunk| chunk.split(" and "))
        .flat_map(|chunk| chunk.split(" with "))
        .map(str::trim)
        .filter(|chunk| !chunk.is_empty())
        .collect();
    let mut sub_impulses = Vec::new();
    for sub_impulse in parts {
        let id = log.append("sub_impulse", sub_impulse.to_owned());
        sub_impulses.push(DecomposedSubImpulse {
            id,
            text: sub_impulse.to_owned(),
        });
    }
    sub_impulses
}

pub fn record_candidates(log: &mut EventLog, prompt: &str, intent: &str) {
    let lower = prompt.to_lowercase();
    if lower.contains("suggest a name") || lower.contains("suggest names") {
        for candidate in ["LinkLight", "Doublet", "FormalLeaf"] {
            log.append("candidate", candidate.to_owned());
        }
        return;
    }
    if lower.contains("pick a") || lower.contains("choose a") {
        log.append("candidate", "primary".to_owned());
        log.append("candidate", "secondary".to_owned());
        return;
    }
    log.append("candidate", intent.to_owned());
}

#[derive(Debug, Clone)]
pub struct ValidationChoice {
    pub answer: String,
}

pub fn record_validation(log: &mut EventLog, prompt: &str) -> Option<ValidationChoice> {
    let lower = prompt.to_lowercase();
    if lower.contains("prime") {
        if let Some((low, high)) = extract_range(&lower) {
            for candidate in low..=high {
                if is_prime(candidate) {
                    let answer = format!("{candidate}");
                    log.append("validation", format!("prime_between_{low}_and_{high}"));
                    return Some(ValidationChoice { answer });
                }
            }
        }
        log.append("validation", "no_prime_in_range".to_owned());
    }
    None
}

pub fn extract_range(lower: &str) -> Option<(u64, u64)> {
    let numbers: Vec<u64> = lower
        .split(|character: char| !character.is_ascii_digit())
        .filter_map(|token| token.parse::<u64>().ok())
        .collect();
    match numbers.as_slice() {
        [low, high] if low <= high => Some((*low, *high)),
        _ => None,
    }
}

pub const fn is_prime(value: u64) -> bool {
    if value < 2 {
        return false;
    }
    let mut divisor: u64 = 2;
    while divisor.saturating_mul(divisor) <= value {
        if value % divisor == 0 {
            return false;
        }
        divisor += 1;
    }
    true
}

pub fn extract_quoted_phrase(text: &str) -> Option<String> {
    for (open, close) in [('\'', '\''), ('"', '"'), ('`', '`'), ('«', '»')] {
        if let Some(start) = text.find(open) {
            if let Some(end_offset) = text[start + open.len_utf8()..].find(close) {
                let inner = &text[start + open.len_utf8()..start + open.len_utf8() + end_offset];
                return Some(inner.to_owned());
            }
        }
    }
    None
}

pub fn extract_backticked(text: &str) -> Option<String> {
    let start = text.find('`')?;
    let rest = &text[start + 1..];
    let end = rest.find('`')?;
    Some(rest[..end].to_owned())
}

/// Walk the event log for a user-introduced name. Looks in the current
/// prompt first, then in each `prior_turn:user` event so name recall works
/// across multi-turn conversations.
pub fn recall_name_from_history(log: &EventLog, prompt: &str) -> Option<String> {
    if let Some(name) = extract_introduced_name(prompt) {
        return Some(name);
    }
    for event in log.events() {
        if event.kind == "prior_turn:user" {
            if let Some(name) = extract_introduced_name(&event.payload) {
                return Some(name);
            }
        }
    }
    None
}

/// Return the last user turn recorded in the log, ignoring the current
/// impulse. Used by "what did I just ask?" style recall handlers.
pub fn last_user_turn(log: &EventLog) -> Option<&str> {
    log.events()
        .iter()
        .rev()
        .find(|event| event.kind == "prior_turn:user")
        .map(|event| event.payload.as_str())
}

/// Return the last assistant turn recorded in the log. Used by follow-up
/// handlers such as "how it works?" that need to infer the topic from the
/// previous reply.
pub fn last_assistant_turn(log: &EventLog) -> Option<&str> {
    log.events()
        .iter()
        .rev()
        .find(|event| event.kind == "prior_turn:assistant")
        .map(|event| event.payload.as_str())
}

pub fn extract_introduced_name(prompt: &str) -> Option<String> {
    let needles = ["my name is", "i am called", "call me", "i'm", "i am "];
    let lower = prompt.to_lowercase();
    for needle in needles {
        let mut search_from = 0;
        while let Some(offset) = lower[search_from..].find(needle) {
            let absolute = search_from + offset + needle.len();
            let tail = &prompt[absolute..];
            let token = tail
                .trim_start()
                .split(|c: char| {
                    c.is_whitespace() || matches!(c, '.' | ',' | '!' | '?' | ';' | ':' | '\n')
                })
                .find(|token| !token.is_empty())?;
            let cleaned = token.trim_matches(|c: char| !c.is_alphanumeric());
            if !cleaned.is_empty() && cleaned.chars().next().is_some_and(char::is_alphabetic) {
                return Some(cleaned.to_owned());
            }
            search_from = absolute;
        }
    }
    None
}

pub fn detect_program_languages(normalized: &str) -> Option<(&'static str, &'static str)> {
    let langs = [
        "python",
        "rust",
        "javascript",
        "typescript",
        "go",
        "java",
        "c",
        "ruby",
    ];
    let from = langs
        .iter()
        .find(|lang| normalized.contains(&format!("from {lang}")))
        .copied();
    let to = langs
        .iter()
        .find(|lang| normalized.contains(&format!("to {lang}")))
        .copied();
    match (from, to) {
        (Some(f), Some(t)) => Some((f, t)),
        _ => None,
    }
}

pub fn translate_program(code: &str, source: &str, target: &str) -> String {
    let trimmed = code.trim();
    match (source, target) {
        ("python", "rust") => {
            if trimmed.starts_with("def add") {
                String::from("fn add(a: i32, b: i32) -> i32 {\n    a + b\n}")
            } else {
                format!("// translation gap for `{trimmed}` from python to rust")
            }
        }
        ("rust", "python") => {
            if trimmed.contains("fn add") {
                String::from("def add(a, b):\n    return a + b")
            } else {
                format!("# translation gap for `{trimmed}` from rust to python")
            }
        }
        _ => format!("// translation gap from {source} to {target}: {trimmed}"),
    }
}

pub fn normalize_code_meaning(code: &str) -> String {
    code.chars()
        .filter(char::is_ascii_alphanumeric)
        .collect::<String>()
        .to_lowercase()
}

/// Normalize a surface fragment into a deterministic, language-independent
/// key for hashing into a meaning id. The previous implementation looked
/// the surface up in a hand-curated registry; that is now removed (the
/// real meaning id comes from Wikidata via the translation pipeline). We
/// keep the normalization step so the legacy hash continues to be stable
/// across whitespace, casing, and punctuation differences.
pub fn normalize_meaning(surface: &str) -> String {
    let raw: String = surface
        .chars()
        .flat_map(char::to_lowercase)
        .filter(|c| c.is_alphanumeric())
        .collect();
    canonical_meaning_token(&raw)
}

/// Return the canonical meaning token for a normalized surface. With the
/// offline registry gone, this is currently the identity function — the
/// translation pipeline supplies the language-neutral [`MeaningId`] when a
/// translation request actually fires, and callers that need a hash key
/// (e.g. the engine's stable id) feed the normalized surface directly.
pub fn canonical_meaning_token(raw: &str) -> String {
    String::from(raw)
}

pub fn infer_source_from_prompt(prompt: &str) -> &'static str {
    let lower = prompt.to_lowercase();
    if let Some(surface) = extract_quoted_phrase(prompt)
        .or_else(|| crate::translation::extract_unquoted_translation_surface(prompt))
    {
        let language = detect_language(&surface);
        if language != Language::Unknown {
            return language.slug();
        }
    }
    if lower.contains("переведи") || lower.contains("опиши") {
        return "ru";
    }
    if lower.contains("अनुवाद") {
        return "hi";
    }
    if lower.contains("翻译") || lower.contains("翻譯") {
        return "zh";
    }
    "en"
}

pub fn infer_program_languages_from_code(
    code: &str,
    normalized: &str,
) -> Option<(&'static str, &'static str)> {
    let trimmed = code.trim();
    let source = if trimmed.contains("fn ") || trimmed.contains("let ") || trimmed.contains("-> ") {
        "rust"
    } else if trimmed.contains("def ") || trimmed.contains("print(") {
        "python"
    } else if trimmed.contains("function ") || trimmed.contains("console.log") {
        "javascript"
    } else {
        return None;
    };
    let langs = [
        "python",
        "rust",
        "javascript",
        "typescript",
        "go",
        "java",
        "c",
        "ruby",
    ];
    let target = langs
        .iter()
        .find(|lang| normalized.contains(&format!("to {lang}")))
        .copied()?;
    Some((source, target))
}

/// Translate `surface` and return the full pipeline result so callers can
/// inspect the meaning id, candidate list, and provenance trail.
///
/// The caller is responsible for matching the source's leading case and
/// terminal punctuation; see [`crate::translation::match_source_formatting`].
pub fn translate_surface_detailed(
    surface: &str,
    source: &str,
    target: &str,
) -> Result<crate::translation::Translation, crate::translation::HttpError> {
    crate::translation::translate_via_default_pipeline(surface, source, target)
}

pub fn extract_concept_from_query(prompt: &str) -> Option<String> {
    let lower = prompt.to_lowercase();
    if !(lower.contains("what do you know about") || lower.contains("introspect")) {
        return None;
    }
    let quoted = extract_quoted_phrase(prompt)?;
    Some(quoted)
}

pub fn detect_algorithm_language(normalized: &str) -> &'static str {
    let langs = [
        ("python", "python"),
        (" py ", "python"),
        ("rust", "rust"),
        (" rs ", "rust"),
        ("javascript", "javascript"),
        ("typescript", "typescript"),
        ("go ", "go"),
        ("golang", "go"),
        ("java", "java"),
        ("ruby", "ruby"),
    ];
    for (needle, slug) in langs {
        if normalized.contains(needle) {
            return slug;
        }
    }
    "python"
}

pub fn build_sorting_algorithm_answer(lang: &str, with_tests: bool) -> String {
    let (fence, code, tests) = match lang {
        "rust" => (
            "rust",
            "fn sort(values: &mut Vec<i32>) {\n    values.sort();\n}",
            "#[test]\nfn test_sort_ascending() {\n    let mut v = vec![3, 1, 2];\n    sort(&mut v);\n    assert_eq!(v, vec![1, 2, 3]);\n}",
        ),
        "javascript" | "typescript" => (
            lang,
            "function sort(values) {\n  return [...values].sort((a, b) => a - b);\n}",
            "function test_sort_ascending() {\n  assert.deepEqual(sort([3,1,2]), [1,2,3]);\n}",
        ),
        _ => (
            "python",
            "def sort(values):\n    return sorted(values)\n",
            "def test_sort_ascending():\n    assert sort([3, 1, 2]) == [1, 2, 3]\n",
        ),
    };

    if with_tests {
        format!(
            "Here is a reviewable sorting algorithm in {lang} with a test:\n\n```{fence}\n{code}\n```\n\nTests:\n```{fence}\n{tests}\n```\n\nExecution status: unavailable in this runtime. The snippet is intended to be copy-paste reviewable."
        )
    } else {
        format!(
            "Here is a reviewable sorting algorithm in {lang}:\n\n```{fence}\n{code}\n```\n\nExecution status: unavailable in this runtime. The snippet is intended to be copy-paste reviewable."
        )
    }
}

/// Extract a JavaScript program from a prompt that asks the solver to run it.
/// Looks for triple-backtick code fences first (with optional `js`/`javascript`
/// language tag), then single-backtick spans, then `run "...";` quoted bodies.
/// Returns `None` when the prompt does not appear to request JS execution.
pub fn extract_javascript_program(prompt: &str) -> Option<String> {
    let lower = prompt.to_lowercase();
    let asks_to_run = lower.contains("run this javascript")
        || lower.contains("run this js")
        || lower.contains("execute this javascript")
        || lower.contains("execute this js")
        || lower.contains("run the following javascript")
        || lower.contains("run the following js")
        || lower.contains("evaluate this javascript")
        || lower.contains("evaluate this js");
    if !asks_to_run {
        return None;
    }
    if let Some(body) = extract_fenced_block(prompt, &["javascript", "js"]) {
        return Some(body);
    }
    if let Some(body) = extract_backticked(prompt) {
        return Some(body);
    }
    extract_quoted_phrase(prompt)
}

/// Render a percent-encoded URL in its readable IRI form (RFC 3987).
///
/// Leaves reserved URI delimiters (`; / ? : @ & = + $ , #`) percent-encoded so
/// query strings and fragments still resolve. Returns the input unchanged when
/// the URL has no percent-escapes or when decoding would produce invalid
/// UTF-8.
///
/// Mirrors the JavaScript `decodeURI` semantics used in
/// `src/web/formal_ai_worker.js::humanizeUrl` so Wikipedia source links render
/// identically across every formal-ai surface (issue #21).
#[must_use]
pub fn humanize_url(url: &str) -> String {
    if !url.contains('%') {
        return url.to_owned();
    }
    let bytes = url.as_bytes();
    let mut out: Vec<u8> = Vec::with_capacity(bytes.len());
    let mut i = 0usize;
    while i < bytes.len() {
        if bytes[i] == b'%' && i + 2 < bytes.len() {
            if let (Some(hi), Some(lo)) = (hex_nibble(bytes[i + 1]), hex_nibble(bytes[i + 2])) {
                let value = (hi << 4) | lo;
                if is_reserved_uri_delimiter(value) {
                    out.extend_from_slice(&bytes[i..=i + 2]);
                } else {
                    out.push(value);
                }
                i += 3;
                continue;
            }
        }
        out.push(bytes[i]);
        i += 1;
    }
    String::from_utf8(out).unwrap_or_else(|_| url.to_owned())
}

const fn hex_nibble(byte: u8) -> Option<u8> {
    match byte {
        b'0'..=b'9' => Some(byte - b'0'),
        b'a'..=b'f' => Some(byte - b'a' + 10),
        b'A'..=b'F' => Some(byte - b'A' + 10),
        _ => None,
    }
}

const fn is_reserved_uri_delimiter(byte: u8) -> bool {
    matches!(
        byte,
        b';' | b'/' | b'?' | b':' | b'@' | b'&' | b'=' | b'+' | b'$' | b',' | b'#'
    )
}

/// Find a fenced code block whose info string matches one of the supplied
/// languages (case-insensitive). Returns the block body with trailing newlines
/// trimmed.
pub fn extract_fenced_block(text: &str, languages: &[&str]) -> Option<String> {
    let fence = "```";
    let mut cursor = 0usize;
    while let Some(open_offset) = text[cursor..].find(fence) {
        let open = cursor + open_offset;
        let info_start = open + fence.len();
        let info_end = text[info_start..]
            .find('\n')
            .map_or(text.len(), |n| info_start + n);
        let info = text[info_start..info_end].trim().to_lowercase();
        let body_start = (info_end + 1).min(text.len());
        let body_end_offset = text[body_start..].find(fence)?;
        let body_end = body_start + body_end_offset;
        let body = text[body_start..body_end].trim_end_matches('\n').to_owned();
        if info.is_empty() || languages.iter().any(|lang| info == *lang) {
            return Some(body);
        }
        cursor = body_end + fence.len();
    }
    None
}

/// Return true when the normalized prompt is a "write a script/code in
/// <language>" request in English, Russian, Hindi, or Chinese.
///
/// Excludes "write a program(language, task)" prompts, which are routed by
/// the parametric write-program formalization.
pub fn is_write_script_request(normalized: &str) -> bool {
    // Exclude hello-world prompts so the parametric write-program route keeps its intent.
    if normalized.contains("hello") && normalized.contains("world") {
        return false;
    }
    if normalized.contains("program") {
        return false;
    }
    // English: "write a script", "write me some code", etc.
    let en_write = normalized.contains("write")
        && (normalized.contains("script") || normalized.contains("code"));
    // Russian: "напиши скрипт", "напиши код", "написать скрипт"
    let ru_write = (normalized.contains("напиши") || normalized.contains("написать"))
        && (normalized.contains("скрипт") || normalized.contains("код"));
    // Hindi: "script likhो", "code likhо"
    let hi_write = normalized.contains("लिखो") || normalized.contains("लिखें");
    // Chinese: "写一个" (write one), "帮我写" (help me write)
    let zh_write = normalized.contains("写一个") || normalized.contains("帮我写");
    en_write || ru_write || hi_write || zh_write
}

pub fn format_write_script_execution(program: ProgramSpec) -> String {
    let execution = &program.language.execution;
    let cmd = execution.check_command.map_or_else(
        || format!("Run command: `{}`", execution.run_command),
        |check| {
            format!(
                "Check command: `{check}`\nRun command: `{}`",
                execution.run_command
            )
        },
    );
    let output_label = if matches!(execution.status, ExecutionStatus::Verified) {
        "Output"
    } else {
        "Expected output after verification"
    };
    format!(
        "Execution status: {} in {}.\n{}\n{}:\n```text\n{}\n```\n{}",
        execution.status.label(),
        execution.environment,
        cmd,
        output_label,
        program.task.output,
        execution.notes
    )
}

#[cfg(test)]
mod tests {
    use super::{extract_fenced_block, extract_javascript_program, humanize_url, is_prime};
    use crate::concepts::{extract_concept_query, lookup_concept_query, ConceptQuery};
    use crate::solver::{SolverConfig, UniversalSolver};

    fn lookup_term(term: &str) -> bool {
        lookup_concept_query(&ConceptQuery {
            term: term.to_owned(),
            context: None,
        })
        .is_some()
    }

    fn extract_term(prompt: &str) -> Option<String> {
        extract_concept_query(prompt).map(|q| q.term)
    }

    #[test]
    fn defaults_are_bounded_and_offline_capable() {
        let config = SolverConfig::default();
        assert!(!config.agent_mode);
        assert!(!config.diagnostic_mode);
        assert!(!config.offline);
        assert_eq!(config.max_decomposition_depth, 4);
    }

    #[test]
    fn greeting_walks_the_universal_loop() {
        let response = UniversalSolver::default().solve("Hi");
        assert_eq!(response.intent, "greeting");
        assert!(response
            .evidence_links
            .iter()
            .any(|link| link.starts_with("impulse:")));
        assert!(response
            .evidence_links
            .iter()
            .any(|link| link.starts_with("search:local")));
        assert!(response
            .evidence_links
            .iter()
            .any(|link| link.starts_with("trace:")));
    }

    // Regression guard for the keyword/token split in intent-routing.lino:
    // before the fix, "hello" was a greeting keyword matched via `contains_token`,
    // so any multi-word prompt that mentioned "hello" (like a hello-world request)
    // got misrouted to greeting. After the fix, keywords must match the whole
    // prompt exactly, and only the dedicated `token "greet"` uses contains.
    #[test]
    fn hello_world_request_is_not_routed_to_greeting() {
        let response = UniversalSolver::default().solve("Write me hello world program in Rust");
        assert_ne!(
            response.intent, "greeting",
            "answer was: {}",
            response.answer
        );
        assert_eq!(response.intent, "write_program");
        assert!(
            response.answer.to_lowercase().contains("rust"),
            "expected Rust hello world, got: {}",
            response.answer
        );
    }

    #[test]
    fn prime_validation_picks_seventeen_in_range() {
        let response = UniversalSolver::default().solve("Pick a prime number between 14 and 18");
        assert!(response.answer.contains("17"));
    }

    #[test]
    fn prime_check_recognizes_seventeen() {
        assert!(is_prime(17));
        assert!(!is_prime(15));
    }

    #[test]
    fn concept_lookup_finds_seeded_terms() {
        assert!(lookup_term("Wikipedia"));
        assert!(lookup_term("links notation"));
        assert!(lookup_term("the event log"));
        assert!(lookup_term("doublet link"));
        assert!(lookup_term("WebAssembly"));
        assert!(!lookup_term("unknown-concept-xyz"));
    }

    #[test]
    fn concept_extraction_handles_common_prefixes() {
        assert_eq!(
            extract_term("What is Wikipedia?").as_deref(),
            Some("wikipedia"),
        );
        assert_eq!(
            extract_term("Tell me about Links Notation").as_deref(),
            Some("links notation"),
        );
        assert_eq!(
            extract_term("What does Wikidata mean?").as_deref(),
            Some("wikidata"),
        );
        assert_eq!(extract_term("Hi"), None);
        assert_eq!(extract_term("What is 2 + 2?").as_deref(), Some("2 + 2"));
    }

    #[test]
    fn concept_extraction_handles_who_is_variants() {
        assert_eq!(
            extract_term("Tell me, who is Trump").as_deref(),
            Some("trump"),
        );
        assert_eq!(extract_term("Who Trump is").as_deref(), Some("trump"));
    }

    #[test]
    fn concept_extraction_handles_multilingual_prefixes() {
        assert_eq!(
            extract_term("Что такое Википедия?").as_deref(),
            Some("википедия"),
        );
        assert_eq!(
            extract_term("Расскажи про Links Notation").as_deref(),
            Some("links notation"),
        );
        assert_eq!(
            extract_term("विकिपीडिया क्या है?").as_deref(),
            Some("विकिपीडिया"),
        );
        assert_eq!(extract_term("维基百科是什么?").as_deref(), Some("维基百科"),);
        assert_eq!(extract_term("什么是 Rust?").as_deref(), Some("rust"));
    }

    #[test]
    fn concept_lookup_finds_multilingual_aliases() {
        assert!(lookup_term("Википедия"));
        assert!(lookup_term("विकिपीडिया"));
        assert!(lookup_term("维基百科"));
        assert!(lookup_term("recursive digital filter"));
        assert!(lookup_term("IIR滤波器"));
    }

    #[test]
    fn concept_query_splits_term_and_context() {
        let query = extract_concept_query("what is IIR in ML?").expect("should extract");
        assert_eq!(query.term, "iir");
        assert_eq!(query.context.as_deref(), Some("ml"));
    }

    #[test]
    fn concept_query_handles_russian_context_delimiter() {
        let query = extract_concept_query("что такое iir в ml").expect("should extract");
        assert_eq!(query.term, "iir");
        assert_eq!(query.context.as_deref(), Some("ml"));
    }

    #[test]
    fn concept_query_handles_hindi_context_first() {
        let query = extract_concept_query("ML में IIR क्या है").expect("should extract");
        // Hindi puts context before the concept; the parser captures it as
        // the lexical term half. The lookup_concept_query swaps order as
        // needed when ranking against records.
        assert!(query.term == "ml" || query.term == "iir");
        assert!(query.context.is_some());
    }

    #[test]
    fn concept_query_handles_chinese_context_first() {
        let query = extract_concept_query("ML中的IIR是什么").expect("should extract");
        assert!(query.term == "ml" || query.term == "iir");
        assert!(query.context.is_some());
    }

    #[test]
    fn javascript_extraction_finds_fenced_program() {
        let prompt = "Please run this javascript:\n```js\nconsole.log(1 + 2);\n```";
        let body = extract_javascript_program(prompt).expect("should extract");
        assert_eq!(body, "console.log(1 + 2);");
    }

    #[test]
    fn javascript_extraction_requires_explicit_request() {
        let prompt = "Here is some javascript:\n```js\nconsole.log(1);\n```";
        assert_eq!(extract_javascript_program(prompt), None);
    }

    #[test]
    fn fenced_block_picks_matching_language() {
        let text = "intro\n```python\nprint(1)\n```\nthen\n```js\nconsole.log(2)\n```";
        assert_eq!(
            extract_fenced_block(text, &["js"]).as_deref(),
            Some("console.log(2)"),
        );
    }

    #[test]
    fn universal_solver_answers_arithmetic_via_evaluator() {
        let response = UniversalSolver::default().solve("What is 7 * (3 + 4)?");
        assert_eq!(response.intent, "calculation");
        assert!(response.answer.contains("49"));
        assert!(response
            .evidence_links
            .iter()
            .any(|link| link.starts_with("calculation")));
    }

    #[test]
    fn universal_solver_recalls_introduced_name() {
        use crate::solver::{ConversationTurn, UniversalSolver};
        let history = [ConversationTurn::user("My name is Ada.")];
        let response = UniversalSolver::default().solve_with_history("What is my name?", &history);
        assert_eq!(response.intent, "recall_name");
        assert!(response.answer.contains("Ada"));
    }

    #[test]
    fn universal_solver_looks_up_concept() {
        let response = UniversalSolver::default().solve("What is Wikipedia?");
        assert_eq!(response.intent, "concept_lookup");
        assert!(response.answer.to_lowercase().contains("wikipedia"));
    }

    #[test]
    fn solver_config_default_is_offline_capable() {
        let config = SolverConfig::default();
        assert!(!config.offline);
        assert!(!config.agent_mode);
    }

    // ---------------------------------------------------------------------
    // Issue #21: humanize_url renders percent-encoded URLs as readable IRIs
    // across every language while preserving query strings and falling back
    // gracefully on malformed input.
    // ---------------------------------------------------------------------

    #[test]
    fn humanize_url_decodes_cyrillic_percent_escapes() {
        let encoded = "https://ru.wikipedia.org/wiki/%D0%98%D0%B7%D1%83%D0%BC%D1%80%D1%83%D0%B4";
        assert_eq!(
            humanize_url(encoded),
            "https://ru.wikipedia.org/wiki/Изумруд",
        );
    }

    #[test]
    fn humanize_url_decodes_devanagari_percent_escapes() {
        let encoded =
            "https://hi.wikipedia.org/wiki/%E0%A4%A8%E0%A4%AE%E0%A4%B8%E0%A5%8D%E0%A4%A4%E0%A5%87";
        assert_eq!(humanize_url(encoded), "https://hi.wikipedia.org/wiki/नमस्ते");
    }

    #[test]
    fn humanize_url_decodes_chinese_percent_escapes() {
        let encoded = "https://zh.wikipedia.org/wiki/%E4%BD%A0%E5%A5%BD";
        assert_eq!(humanize_url(encoded), "https://zh.wikipedia.org/wiki/你好");
    }

    #[test]
    fn humanize_url_preserves_reserved_uri_delimiters() {
        // `?`, `&`, `=`, `#`, `/`, `:` must remain percent-encoded so that
        // structural meaning of the URI is not disturbed during display.
        let encoded = "https://example.com/path?a%3Db%26c%3Dd#frag%2Fpart";
        assert_eq!(humanize_url(encoded), encoded);
    }

    #[test]
    fn humanize_url_preserves_query_string_values_around_decoded_path() {
        // The path segment is decoded; the query stays encoded.
        let encoded = "https://ru.wikipedia.org/wiki/%D0%98%D0%B7%D1%83%D0%BC%D1%80%D1%83%D0%B4?utm_source=demo&page=1";
        assert_eq!(
            humanize_url(encoded),
            "https://ru.wikipedia.org/wiki/Изумруд?utm_source=demo&page=1",
        );
    }

    #[test]
    fn humanize_url_returns_original_when_no_percent_escapes_present() {
        let url = "https://en.wikipedia.org/wiki/Albert_Einstein";
        assert_eq!(humanize_url(url), url);
    }

    #[test]
    fn humanize_url_passes_through_malformed_escapes() {
        // `%ZZ` is not a valid escape — leave the bytes as-is rather than
        // throwing. This matches the JS `decodeURI` fallback strategy
        // (catch URIError → return original).
        let url = "https://example.com/%ZZbroken";
        assert_eq!(humanize_url(url), url);
    }

    #[test]
    fn humanize_url_handles_truncated_trailing_percent() {
        let url = "https://example.com/path%";
        assert_eq!(humanize_url(url), url);
    }

    #[test]
    fn humanize_url_accepts_lowercase_hex_digits() {
        let encoded = "https://ru.wikipedia.org/wiki/%d0%98%d0%b7%d1%83%d0%bc%d1%80%d1%83%d0%b4";
        assert_eq!(
            humanize_url(encoded),
            "https://ru.wikipedia.org/wiki/Изумруд",
        );
    }

    #[test]
    fn humanize_url_decodes_only_invalid_utf8_returns_original() {
        // A lone continuation byte (0x80) is not valid UTF-8; fall back to
        // the original URL rather than emitting broken text.
        let url = "https://example.com/%80";
        assert_eq!(humanize_url(url), url);
    }

    #[test]
    fn humanize_url_decodes_mixed_already_decoded_and_encoded_path() {
        let encoded = "https://ru.wikipedia.org/wiki/Изумруд_%28минерал%29";
        // `(` is `%28` and not a reserved delimiter, so it decodes.
        assert_eq!(
            humanize_url(encoded),
            "https://ru.wikipedia.org/wiki/Изумруд_(минерал)",
        );
    }
}