Skip to main content

groundmodels_core/
soil_description.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4pub const DEFAULT_FUZZY_THRESHOLD: f64 = 0.8;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
7#[serde(rename_all = "kebab-case")]
8pub enum MaterialType {
9    Soil,
10    Rock,
11}
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
14#[serde(rename_all = "kebab-case")]
15pub enum Consistency {
16    VerySoft,
17    Soft,
18    Firm,
19    Stiff,
20    VeryStiff,
21    Hard,
22    SoftToFirm,
23    FirmToStiff,
24    StiffToVeryStiff,
25}
26
27#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
28#[serde(rename_all = "kebab-case")]
29pub enum Density {
30    VeryLoose,
31    Loose,
32    MediumDense,
33    Dense,
34    VeryDense,
35}
36
37#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
38#[serde(rename_all = "kebab-case")]
39pub enum SoilType {
40    Clay,
41    Silt,
42    Sand,
43    Gravel,
44    Peat,
45    Organic,
46    Cobbles,
47    Boulders,
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Hash)]
51#[serde(rename_all = "kebab-case")]
52pub enum RockType {
53    Limestone,
54    Sandstone,
55    Mudstone,
56    Shale,
57    Granite,
58    Basalt,
59}
60
61#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
62#[serde(rename_all = "kebab-case")]
63pub enum MoistureContent {
64    Dry,
65    Moist,
66    Wet,
67    Saturated,
68}
69
70#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
71pub enum Plasticity {
72    #[serde(rename = "Non-Plastic")]
73    NonPlastic,
74    #[serde(rename = "Low Plasticity")]
75    LowPlasticity,
76    #[serde(rename = "Intermediate Plasticity")]
77    IntermediatePlasticity,
78    #[serde(rename = "High Plasticity")]
79    HighPlasticity,
80    #[serde(rename = "Extremely High Plasticity")]
81    ExtremelyHighPlasticity,
82}
83
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
85#[serde(rename_all = "kebab-case")]
86pub enum StrengthParameterType {
87    Ucs,
88    UndrainedShear,
89    SptNValue,
90    FrictionAngle,
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
94pub struct SecondaryConstituent {
95    pub amount: Option<String>,
96    pub soil_type: String,
97}
98
99#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
100pub struct StrengthRange {
101    pub lower_bound: f64,
102    pub upper_bound: f64,
103    pub typical_value: f64,
104}
105
106#[derive(Debug, Clone, Serialize, Deserialize)]
107pub struct StrengthParameters {
108    pub parameter_type: StrengthParameterType,
109    pub value_range: StrengthRange,
110    pub confidence: f64,
111    pub units: String,
112}
113
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub struct SpellingCorrection {
116    pub original: String,
117    pub corrected: String,
118    pub score: f64,
119}
120
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct SoilDescription {
123    pub raw_description: String,
124    pub material_type: Option<MaterialType>,
125    pub confidence: f64,
126    pub is_valid: bool,
127    pub consistency: Option<Consistency>,
128    pub density: Option<Density>,
129    pub primary_soil_type: Option<SoilType>,
130    pub rock_strength: Option<String>,
131    pub weathering_grade: Option<String>,
132    pub rock_structure: Option<String>,
133    pub primary_rock_type: Option<RockType>,
134    pub secondary_constituents: Vec<SecondaryConstituent>,
135    pub color: Option<String>,
136    pub moisture_content: Option<MoistureContent>,
137    pub particle_size: Option<String>,
138    pub plasticity: Option<Plasticity>,
139    pub strength_parameters: Vec<StrengthParameters>,
140    pub spelling_corrections: Vec<SpellingCorrection>,
141    pub warnings: Vec<String>,
142}
143
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct SoilDescriptionBuilder {
146    pub raw_description: String,
147    pub material_type: Option<MaterialType>,
148    pub confidence: f64,
149    pub is_valid: bool,
150    pub consistency: Option<Consistency>,
151    pub density: Option<Density>,
152    pub primary_soil_type: Option<SoilType>,
153    pub rock_strength: Option<String>,
154    pub weathering_grade: Option<String>,
155    pub rock_structure: Option<String>,
156    pub primary_rock_type: Option<RockType>,
157    pub secondary_constituents: Vec<SecondaryConstituent>,
158    pub color: Option<String>,
159    pub moisture_content: Option<MoistureContent>,
160    pub particle_size: Option<String>,
161    pub plasticity: Option<Plasticity>,
162    pub strength_parameters: Vec<StrengthParameters>,
163    pub spelling_corrections: Vec<SpellingCorrection>,
164    pub warnings: Vec<String>,
165}
166
167pub fn create_empty_builder(raw_description: &str) -> SoilDescriptionBuilder {
168    SoilDescriptionBuilder {
169        raw_description: raw_description.to_string(),
170        material_type: None,
171        confidence: 0.0,
172        is_valid: false,
173        consistency: None,
174        density: None,
175        primary_soil_type: None,
176        rock_strength: None,
177        weathering_grade: None,
178        rock_structure: None,
179        primary_rock_type: None,
180        secondary_constituents: Vec::new(),
181        color: None,
182        moisture_content: None,
183        particle_size: None,
184        plasticity: None,
185        strength_parameters: Vec::new(),
186        spelling_corrections: Vec::new(),
187        warnings: Vec::new(),
188    }
189}
190
191pub fn build_soil_description(builder: SoilDescriptionBuilder) -> SoilDescription {
192    SoilDescription {
193        raw_description: builder.raw_description,
194        material_type: builder.material_type,
195        confidence: builder.confidence,
196        is_valid: builder.is_valid,
197        consistency: builder.consistency,
198        density: builder.density,
199        primary_soil_type: builder.primary_soil_type,
200        rock_strength: builder.rock_strength,
201        weathering_grade: builder.weathering_grade,
202        rock_structure: builder.rock_structure,
203        primary_rock_type: builder.primary_rock_type,
204        secondary_constituents: builder.secondary_constituents,
205        color: builder.color,
206        moisture_content: builder.moisture_content,
207        particle_size: builder.particle_size,
208        plasticity: builder.plasticity,
209        strength_parameters: builder.strength_parameters,
210        spelling_corrections: builder.spelling_corrections,
211        warnings: builder.warnings,
212    }
213}
214
215pub fn is_cohesive(soil_type: SoilType) -> bool {
216    matches!(
217        soil_type,
218        SoilType::Clay | SoilType::Silt | SoilType::Peat | SoilType::Organic
219    )
220}
221
222pub fn is_granular(soil_type: SoilType) -> bool {
223    matches!(
224        soil_type,
225        SoilType::Sand | SoilType::Gravel | SoilType::Cobbles | SoilType::Boulders
226    )
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct Token {
231    pub token_type: String,
232    pub value: String,
233    pub start: usize,
234    pub end: usize,
235    pub corrected_from: Option<String>,
236    pub similarity_score: Option<f64>,
237}
238
239#[derive(Debug, Clone, Copy)]
240pub struct FuzzyMatchResult<'a> {
241    pub matched: &'a str,
242    pub score: f64,
243}
244
245pub fn levenshtein_distance(a: &str, b: &str) -> usize {
246    let a = a.to_lowercase();
247    let b = b.to_lowercase();
248
249    if a == b {
250        return 0;
251    }
252    if a.is_empty() {
253        return b.len();
254    }
255    if b.is_empty() {
256        return a.len();
257    }
258
259    let mut prev: Vec<usize> = (0..=b.len()).collect();
260    let mut curr = vec![0; b.len() + 1];
261
262    for (i, ca) in a.chars().enumerate() {
263        curr[0] = i + 1;
264        for (j, cb) in b.chars().enumerate() {
265            let cost = if ca == cb { 0 } else { 1 };
266            curr[j + 1] =
267                std::cmp::min(std::cmp::min(prev[j + 1] + 1, curr[j] + 1), prev[j] + cost);
268        }
269        prev.clone_from(&curr);
270    }
271
272    prev[b.len()]
273}
274
275pub fn similarity(a: &str, b: &str) -> f64 {
276    let max_len = std::cmp::max(a.len(), b.len());
277    if max_len == 0 {
278        return 1.0;
279    }
280    let distance = levenshtein_distance(a, b) as f64;
281    1.0 - (distance / max_len as f64)
282}
283
284pub fn fuzzy_match<'a>(
285    target: &str,
286    options: &'a [String],
287    threshold: f64,
288) -> Option<FuzzyMatchResult<'a>> {
289    let mut best: Option<FuzzyMatchResult<'a>> = None;
290    for opt in options {
291        let score = similarity(target, opt);
292        if score >= threshold {
293            match best {
294                Some(b) if score <= b.score => {}
295                _ => {
296                    best = Some(FuzzyMatchResult {
297                        matched: opt,
298                        score,
299                    })
300                }
301            }
302        }
303    }
304    best
305}
306
307fn typo_dictionary() -> HashMap<&'static str, &'static str> {
308    HashMap::from([
309        ("firn", "firm"),
310        ("frim", "firm"),
311        ("stif", "stiff"),
312        ("stiif", "stiff"),
313        ("vrey", "very"),
314        ("vrry", "very"),
315        ("clai", "clay"),
316        ("caly", "clay"),
317        ("claey", "clay"),
318        ("snad", "sand"),
319        ("snda", "sand"),
320        ("gravle", "gravel"),
321        ("gravelley", "gravelly"),
322        ("limstone", "limestone"),
323        ("limstn", "limestone"),
324        ("sandston", "sandstone"),
325        ("mudstn", "mudstone"),
326        ("mudston", "mudstone"),
327        ("granit", "granite"),
328        ("weatherd", "weathered"),
329        ("wethered", "weathered"),
330        ("slighly", "slightly"),
331        ("moderatly", "moderately"),
332        ("dens", "dense"),
333        ("loos", "loose"),
334        ("medim", "medium"),
335    ])
336}
337
338fn check_typo_dictionary(word: &str) -> Option<&'static str> {
339    let lower = word.to_lowercase();
340    typo_dictionary().get(lower.as_str()).copied()
341}
342
343const SINGLE_COLORS: [&str; 14] = [
344    "brown", "gray", "grey", "red", "yellow", "orange", "black", "white", "green", "blue", "pink",
345    "purple", "tan", "buff",
346];
347
348const PROPORTION_WORDS: [&str; 3] = ["slightly", "moderately", "very"];
349const ADJECTIVE_WORDS: [&str; 4] = ["sandy", "silty", "gravelly", "clayey"];
350const MOISTURE_TERMS: [&str; 4] = ["dry", "moist", "wet", "saturated"];
351const PARTICLE_SIZE_TERMS: [&str; 3] = ["fine", "medium", "coarse"];
352const CONSISTENCY_TERMS: [&str; 6] = ["very soft", "soft", "firm", "stiff", "very stiff", "hard"];
353const DENSITY_TERMS: [&str; 5] = ["very loose", "loose", "medium dense", "dense", "very dense"];
354const SOIL_TYPE_TERMS: [&str; 8] = [
355    "clay", "silt", "sand", "gravel", "peat", "organic", "cobbles", "boulders",
356];
357const ROCK_TYPE_TERMS: [&str; 6] = [
358    "limestone",
359    "sandstone",
360    "mudstone",
361    "shale",
362    "granite",
363    "basalt",
364];
365
366#[derive(Debug, Clone, Copy)]
367struct MultiWordPattern {
368    pattern: &'static str,
369    token_type: &'static str,
370}
371
372const CONSISTENCY_RANGE_PATTERNS: [MultiWordPattern; 5] = [
373    MultiWordPattern {
374        pattern: "soft to firm",
375        token_type: "consistency-range",
376    },
377    MultiWordPattern {
378        pattern: "firm to stiff",
379        token_type: "consistency-range",
380    },
381    MultiWordPattern {
382        pattern: "stiff to very stiff",
383        token_type: "consistency-range",
384    },
385    MultiWordPattern {
386        pattern: "very soft to soft",
387        token_type: "consistency-range",
388    },
389    MultiWordPattern {
390        pattern: "very stiff to hard",
391        token_type: "consistency-range",
392    },
393];
394const CONSISTENCY_PATTERNS: [MultiWordPattern; 2] = [
395    MultiWordPattern {
396        pattern: "very soft",
397        token_type: "consistency",
398    },
399    MultiWordPattern {
400        pattern: "very stiff",
401        token_type: "consistency",
402    },
403];
404const DENSITY_PATTERNS: [MultiWordPattern; 3] = [
405    MultiWordPattern {
406        pattern: "very loose",
407        token_type: "density",
408    },
409    MultiWordPattern {
410        pattern: "medium dense",
411        token_type: "density",
412    },
413    MultiWordPattern {
414        pattern: "very dense",
415        token_type: "density",
416    },
417];
418const ROCK_STRENGTH_PATTERNS: [MultiWordPattern; 5] = [
419    MultiWordPattern {
420        pattern: "extremely strong",
421        token_type: "rock-strength",
422    },
423    MultiWordPattern {
424        pattern: "very strong",
425        token_type: "rock-strength",
426    },
427    MultiWordPattern {
428        pattern: "very weak",
429        token_type: "rock-strength",
430    },
431    MultiWordPattern {
432        pattern: "moderately strong",
433        token_type: "rock-strength",
434    },
435    MultiWordPattern {
436        pattern: "moderately weak",
437        token_type: "rock-strength",
438    },
439];
440const WEATHERING_PATTERNS: [MultiWordPattern; 5] = [
441    MultiWordPattern {
442        pattern: "completely weathered",
443        token_type: "weathering-grade",
444    },
445    MultiWordPattern {
446        pattern: "highly weathered",
447        token_type: "weathering-grade",
448    },
449    MultiWordPattern {
450        pattern: "moderately weathered",
451        token_type: "weathering-grade",
452    },
453    MultiWordPattern {
454        pattern: "slightly weathered",
455        token_type: "weathering-grade",
456    },
457    MultiWordPattern {
458        pattern: "fresh",
459        token_type: "weathering-grade",
460    },
461];
462const PARTICLE_SIZE_PATTERNS: [MultiWordPattern; 3] = [
463    MultiWordPattern {
464        pattern: "fine to medium",
465        token_type: "particle-size",
466    },
467    MultiWordPattern {
468        pattern: "medium to coarse",
469        token_type: "particle-size",
470    },
471    MultiWordPattern {
472        pattern: "fine to coarse",
473        token_type: "particle-size",
474    },
475];
476const COLOR_PATTERNS: [MultiWordPattern; 13] = [
477    MultiWordPattern {
478        pattern: "dark brown",
479        token_type: "color",
480    },
481    MultiWordPattern {
482        pattern: "light brown",
483        token_type: "color",
484    },
485    MultiWordPattern {
486        pattern: "reddish brown",
487        token_type: "color",
488    },
489    MultiWordPattern {
490        pattern: "yellowish brown",
491        token_type: "color",
492    },
493    MultiWordPattern {
494        pattern: "greyish brown",
495        token_type: "color",
496    },
497    MultiWordPattern {
498        pattern: "grayish brown",
499        token_type: "color",
500    },
501    MultiWordPattern {
502        pattern: "dark gray",
503        token_type: "color",
504    },
505    MultiWordPattern {
506        pattern: "dark grey",
507        token_type: "color",
508    },
509    MultiWordPattern {
510        pattern: "light gray",
511        token_type: "color",
512    },
513    MultiWordPattern {
514        pattern: "light grey",
515        token_type: "color",
516    },
517    MultiWordPattern {
518        pattern: "brownish gray",
519        token_type: "color",
520    },
521    MultiWordPattern {
522        pattern: "brownish grey",
523        token_type: "color",
524    },
525    MultiWordPattern {
526        pattern: "yellowish",
527        token_type: "color",
528    },
529];
530const PLASTICITY_PATTERNS: [MultiWordPattern; 6] = [
531    MultiWordPattern {
532        pattern: "extremely high plasticity",
533        token_type: "plasticity",
534    },
535    MultiWordPattern {
536        pattern: "high plasticity",
537        token_type: "plasticity",
538    },
539    MultiWordPattern {
540        pattern: "intermediate plasticity",
541        token_type: "plasticity",
542    },
543    MultiWordPattern {
544        pattern: "low plasticity",
545        token_type: "plasticity",
546    },
547    MultiWordPattern {
548        pattern: "non-plastic",
549        token_type: "plasticity",
550    },
551    MultiWordPattern {
552        pattern: "non plastic",
553        token_type: "plasticity",
554    },
555];
556
557fn match_pattern(input: &str, pos: usize, pattern: &str) -> Option<usize> {
558    let input_lower = input.to_lowercase();
559    let pattern_lower = pattern.to_lowercase();
560    if input_lower[pos..].starts_with(&pattern_lower) {
561        let end = pos + pattern_lower.len();
562        let next_char = input.chars().nth(end);
563        if end >= input.len() || next_char.map_or(true, |c| c.is_whitespace() || ",;.)".contains(c))
564        {
565            return Some(end);
566        }
567    }
568    None
569}
570
571fn try_multi_word_patterns(input: &str, pos: usize) -> Option<(Token, usize)> {
572    fn try_group(input: &str, pos: usize, group: &[MultiWordPattern]) -> Option<(Token, usize)> {
573        for pattern in group.iter() {
574            if let Some(end) = match_pattern(input, pos, pattern.pattern) {
575                return Some((
576                    Token {
577                        token_type: pattern.token_type.to_string(),
578                        value: input[pos..end].to_string(),
579                        start: pos,
580                        end,
581                        corrected_from: None,
582                        similarity_score: None,
583                    },
584                    end,
585                ));
586            }
587        }
588        None
589    }
590
591    try_group(input, pos, &CONSISTENCY_RANGE_PATTERNS)
592        .or_else(|| try_group(input, pos, &PLASTICITY_PATTERNS))
593        .or_else(|| try_group(input, pos, &ROCK_STRENGTH_PATTERNS))
594        .or_else(|| try_group(input, pos, &CONSISTENCY_PATTERNS))
595        .or_else(|| try_group(input, pos, &DENSITY_PATTERNS))
596        .or_else(|| try_group(input, pos, &WEATHERING_PATTERNS))
597        .or_else(|| try_group(input, pos, &PARTICLE_SIZE_PATTERNS))
598        .or_else(|| try_group(input, pos, &COLOR_PATTERNS))
599}
600
601fn classify_word(word: &str) -> (String, Option<String>, Option<f64>) {
602    let lower = word.to_lowercase();
603
604    if let Some(typo) = check_typo_dictionary(&lower) {
605        let (token_type, _, _) = classify_word(typo);
606        return (token_type, Some(typo.to_string()), Some(1.0));
607    }
608
609    if CONSISTENCY_TERMS.iter().any(|t| *t == lower) {
610        return ("consistency".to_string(), None, None);
611    }
612    if DENSITY_TERMS.iter().any(|t| *t == lower) {
613        return ("density".to_string(), None, None);
614    }
615    if lower == "strong" || lower == "weak" {
616        return ("rock-strength".to_string(), None, None);
617    }
618    if SINGLE_COLORS.iter().any(|t| *t == lower) {
619        return ("color".to_string(), None, None);
620    }
621    if PROPORTION_WORDS.iter().any(|t| *t == lower) {
622        return ("proportion".to_string(), None, None);
623    }
624    if ADJECTIVE_WORDS.iter().any(|t| *t == lower) {
625        return ("adjective".to_string(), None, None);
626    }
627    if MOISTURE_TERMS.iter().any(|t| *t == lower) {
628        return ("moisture-content".to_string(), None, None);
629    }
630    if PARTICLE_SIZE_TERMS.iter().any(|t| *t == lower) {
631        return ("particle-size".to_string(), None, None);
632    }
633    if SOIL_TYPE_TERMS.iter().any(|t| *t == lower) {
634        return ("soil-type".to_string(), None, None);
635    }
636    if ROCK_TYPE_TERMS.iter().any(|t| *t == lower) {
637        return ("rock-type".to_string(), None, None);
638    }
639
640    let all_terms: Vec<String> = SOIL_TYPE_TERMS
641        .iter()
642        .chain(ROCK_TYPE_TERMS.iter())
643        .chain(SINGLE_COLORS.iter())
644        .chain(PROPORTION_WORDS.iter())
645        .chain(ADJECTIVE_WORDS.iter())
646        .chain(MOISTURE_TERMS.iter())
647        .chain(PARTICLE_SIZE_TERMS.iter())
648        .map(|s| (*s).to_string())
649        .collect();
650
651    if let Some(result) = fuzzy_match(&lower, &all_terms, DEFAULT_FUZZY_THRESHOLD) {
652        let (token_type, _, _) = classify_word(result.matched);
653        return (
654            token_type,
655            Some(result.matched.to_string()),
656            Some(result.score),
657        );
658    }
659
660    ("unknown".to_string(), None, None)
661}
662
663pub fn tokenize(input: &str, max_tokens: Option<usize>) -> Vec<Token> {
664    let mut tokens = Vec::new();
665    let mut pos = 0;
666    let len = input.len();
667    let bytes = input.as_bytes();
668
669    while pos < len {
670        if let Some(max) = max_tokens {
671            if tokens.len() >= max {
672                break;
673            }
674        }
675
676        while pos < len {
677            let ch = bytes[pos] as char;
678            if !ch.is_whitespace() {
679                break;
680            }
681            pos += 1;
682        }
683
684        if pos >= len {
685            break;
686        }
687
688        let current_char = bytes[pos] as char;
689        if ",;().".contains(current_char) {
690            pos += 1;
691            continue;
692        }
693
694        if let Some((token, end)) = try_multi_word_patterns(input, pos) {
695            tokens.push(token);
696            pos = end;
697            continue;
698        }
699
700        let mut end = pos;
701        while end < len {
702            let c = bytes[end] as char;
703            if c.is_ascii_alphabetic() || c == '-' {
704                end += 1;
705            } else {
706                break;
707            }
708        }
709
710        if end > pos {
711            let word = &input[pos..end];
712            let (token_type, corrected_value, score) = classify_word(word);
713            let mut token = Token {
714                token_type,
715                value: word.to_string(),
716                start: pos,
717                end,
718                corrected_from: None,
719                similarity_score: None,
720            };
721
722            if let Some(corrected) = corrected_value {
723                token.corrected_from = Some(word.to_string());
724                token.value = corrected;
725            }
726            if let Some(sc) = score {
727                token.similarity_score = Some(sc);
728            }
729
730            tokens.push(token);
731            pos = end;
732            continue;
733        }
734
735        pos += 1;
736    }
737
738    tokens
739}
740
741fn consistency_from_string(s: &str) -> Option<Consistency> {
742    match s.to_lowercase().as_str() {
743        "very soft" => Some(Consistency::VerySoft),
744        "soft" => Some(Consistency::Soft),
745        "firm" => Some(Consistency::Firm),
746        "stiff" => Some(Consistency::Stiff),
747        "very stiff" => Some(Consistency::VeryStiff),
748        "hard" => Some(Consistency::Hard),
749        "soft to firm" => Some(Consistency::SoftToFirm),
750        "firm to stiff" => Some(Consistency::FirmToStiff),
751        "stiff to very stiff" => Some(Consistency::StiffToVeryStiff),
752        _ => None,
753    }
754}
755
756fn density_from_string(s: &str) -> Option<Density> {
757    match s.to_lowercase().as_str() {
758        "very loose" => Some(Density::VeryLoose),
759        "loose" => Some(Density::Loose),
760        "medium dense" => Some(Density::MediumDense),
761        "dense" => Some(Density::Dense),
762        "very dense" => Some(Density::VeryDense),
763        _ => None,
764    }
765}
766
767fn soil_type_from_string(s: &str) -> Option<SoilType> {
768    match s.to_lowercase().as_str() {
769        "clay" => Some(SoilType::Clay),
770        "silt" => Some(SoilType::Silt),
771        "sand" => Some(SoilType::Sand),
772        "gravel" => Some(SoilType::Gravel),
773        "peat" => Some(SoilType::Peat),
774        "organic" => Some(SoilType::Organic),
775        "cobbles" => Some(SoilType::Cobbles),
776        "boulders" => Some(SoilType::Boulders),
777        _ => None,
778    }
779}
780
781fn rock_type_from_string(s: &str) -> Option<RockType> {
782    match s.to_lowercase().as_str() {
783        "limestone" => Some(RockType::Limestone),
784        "sandstone" => Some(RockType::Sandstone),
785        "mudstone" => Some(RockType::Mudstone),
786        "shale" => Some(RockType::Shale),
787        "granite" => Some(RockType::Granite),
788        "basalt" => Some(RockType::Basalt),
789        _ => None,
790    }
791}
792
793pub fn consistency_to_display(c: Consistency) -> String {
794    format!("{:?}", c).to_lowercase().replace('_', " ")
795}
796
797pub fn parse_soil_description(description: &str) -> SoilDescription {
798    let mut builder = create_empty_builder(description);
799    builder.material_type = Some(MaterialType::Soil);
800    builder.confidence = 1.0;
801    builder.is_valid = true;
802
803    if description.trim().is_empty() {
804        builder.confidence = 0.5;
805        return build_soil_description(builder);
806    }
807
808    let tokens = tokenize(description, None);
809    let mut pending_proportion: Option<String> = None;
810
811    for tok in tokens {
812        let tok_value = tok.value;
813        match tok.token_type.as_str() {
814            "consistency" | "consistency-range" => {
815                if let Some(c) = consistency_from_string(&tok_value) {
816                    builder.consistency = Some(c);
817                }
818            }
819            "density" => {
820                if let Some(d) = density_from_string(&tok_value) {
821                    builder.density = Some(d);
822                }
823            }
824            "proportion" => {
825                pending_proportion = Some(tok_value.clone());
826            }
827            "adjective" => {
828                if let Some(amount) = pending_proportion.take() {
829                    builder.secondary_constituents.push(SecondaryConstituent {
830                        amount: Some(amount),
831                        soil_type: tok_value.clone(),
832                    });
833                } else {
834                    builder.secondary_constituents.push(SecondaryConstituent {
835                        amount: None,
836                        soil_type: tok_value.clone(),
837                    });
838                }
839            }
840            "soil-type" => {
841                if let Some(st) = soil_type_from_string(&tok_value) {
842                    builder.primary_soil_type = Some(st);
843                }
844                builder.material_type = Some(MaterialType::Soil);
845            }
846            "rock-type" => {
847                if let Some(rt) = rock_type_from_string(&tok_value) {
848                    builder.primary_rock_type = Some(rt);
849                }
850                builder.material_type = Some(MaterialType::Rock);
851            }
852            "rock-strength" => {
853                builder.rock_strength = Some(tok_value.replace(' ', "-"));
854                builder.material_type = Some(MaterialType::Rock);
855            }
856            "weathering-grade" => {
857                builder.weathering_grade = Some(tok_value.replace(' ', "-"));
858            }
859            "rock-structure" => {
860                builder.rock_structure = Some(tok_value.clone());
861                builder.material_type = Some(MaterialType::Rock);
862            }
863            "color" => {
864                builder.color = Some(tok_value.clone());
865            }
866            "moisture-content" => {
867                builder.moisture_content = match tok_value.to_lowercase().as_str() {
868                    "dry" => Some(MoistureContent::Dry),
869                    "moist" => Some(MoistureContent::Moist),
870                    "wet" => Some(MoistureContent::Wet),
871                    "saturated" => Some(MoistureContent::Saturated),
872                    _ => None,
873                };
874            }
875            "plasticity" => {
876                builder.plasticity = match tok_value.to_lowercase().as_str() {
877                    "non-plastic" | "non plastic" => Some(Plasticity::NonPlastic),
878                    "low plasticity" => Some(Plasticity::LowPlasticity),
879                    "intermediate plasticity" => Some(Plasticity::IntermediatePlasticity),
880                    "high plasticity" => Some(Plasticity::HighPlasticity),
881                    "extremely high plasticity" => Some(Plasticity::ExtremelyHighPlasticity),
882                    _ => None,
883                };
884            }
885            "particle-size" => {
886                builder.particle_size = Some(tok_value.clone());
887            }
888            _ => {}
889        }
890
891        if let Some(original) = tok.corrected_from {
892            builder.spelling_corrections.push(SpellingCorrection {
893                original,
894                corrected: tok_value.clone(),
895                score: tok.similarity_score.unwrap_or(1.0),
896            });
897        }
898    }
899
900    let mut confidence = 1.0;
901    for _ in &builder.spelling_corrections {
902        confidence *= 0.8;
903    }
904    if builder.primary_soil_type.is_none() && builder.primary_rock_type.is_none() {
905        confidence *= 0.7;
906    }
907    builder.confidence = confidence;
908
909    if let Some(strength) = derive_strength_parameters(&builder) {
910        builder.strength_parameters.push(strength);
911    }
912
913    build_soil_description(builder)
914}
915
916pub fn derive_strength_parameters(builder: &SoilDescriptionBuilder) -> Option<StrengthParameters> {
917    if builder.material_type == Some(MaterialType::Soil) {
918        if let Some(soil) = builder.primary_soil_type {
919            if is_cohesive(soil) {
920                if let Some(consistency) = builder.consistency {
921                    if let Some(range) = get_consistency_range_value(consistency) {
922                        return Some(StrengthParameters {
923                            parameter_type: StrengthParameterType::UndrainedShear,
924                            value_range: range,
925                            confidence: 0.8,
926                            units: "kPa".to_string(),
927                        });
928                    }
929                }
930            }
931
932            if is_granular(soil) {
933                if let Some(density) = builder.density {
934                    if let Some(range) = density_to_spt_range(density) {
935                        return Some(StrengthParameters {
936                            parameter_type: StrengthParameterType::SptNValue,
937                            value_range: range,
938                            confidence: 0.8,
939                            units: "blows".to_string(),
940                        });
941                    }
942                }
943            }
944        }
945    }
946
947    if builder.material_type == Some(MaterialType::Rock) {
948        if let Some(strength) = &builder.rock_strength {
949            if let Some(range) = rock_strength_to_ucs_range(strength) {
950                return Some(StrengthParameters {
951                    parameter_type: StrengthParameterType::Ucs,
952                    value_range: range,
953                    confidence: 0.8,
954                    units: "MPa".to_string(),
955                });
956            }
957        }
958    }
959
960    None
961}
962
963fn get_consistency_range_value(consistency: Consistency) -> Option<StrengthRange> {
964    match consistency {
965        Consistency::SoftToFirm => {
966            let soft = consistency_range(Consistency::Soft)?;
967            let firm = consistency_range(Consistency::Firm)?;
968            Some(StrengthRange {
969                lower_bound: soft.lower_bound,
970                upper_bound: firm.upper_bound,
971                typical_value: (soft.typical_value + firm.typical_value) / 2.0,
972            })
973        }
974        Consistency::FirmToStiff => {
975            let firm = consistency_range(Consistency::Firm)?;
976            let stiff = consistency_range(Consistency::Stiff)?;
977            Some(StrengthRange {
978                lower_bound: firm.lower_bound,
979                upper_bound: stiff.upper_bound,
980                typical_value: (firm.typical_value + stiff.typical_value) / 2.0,
981            })
982        }
983        Consistency::StiffToVeryStiff => {
984            let stiff = consistency_range(Consistency::Stiff)?;
985            let very = consistency_range(Consistency::VeryStiff)?;
986            Some(StrengthRange {
987                lower_bound: stiff.lower_bound,
988                upper_bound: very.upper_bound,
989                typical_value: (stiff.typical_value + very.typical_value) / 2.0,
990            })
991        }
992        _ => consistency_range(consistency),
993    }
994}
995
996fn consistency_range(consistency: Consistency) -> Option<StrengthRange> {
997    match consistency {
998        Consistency::VerySoft => Some(StrengthRange {
999            lower_bound: 0.0,
1000            upper_bound: 12.0,
1001            typical_value: 6.0,
1002        }),
1003        Consistency::Soft => Some(StrengthRange {
1004            lower_bound: 12.0,
1005            upper_bound: 25.0,
1006            typical_value: 18.0,
1007        }),
1008        Consistency::Firm => Some(StrengthRange {
1009            lower_bound: 25.0,
1010            upper_bound: 50.0,
1011            typical_value: 37.0,
1012        }),
1013        Consistency::Stiff => Some(StrengthRange {
1014            lower_bound: 50.0,
1015            upper_bound: 100.0,
1016            typical_value: 75.0,
1017        }),
1018        Consistency::VeryStiff => Some(StrengthRange {
1019            lower_bound: 100.0,
1020            upper_bound: 200.0,
1021            typical_value: 150.0,
1022        }),
1023        Consistency::Hard => Some(StrengthRange {
1024            lower_bound: 200.0,
1025            upper_bound: 400.0,
1026            typical_value: 300.0,
1027        }),
1028        _ => None,
1029    }
1030}
1031
1032fn density_to_spt_range(density: Density) -> Option<StrengthRange> {
1033    match density {
1034        Density::VeryLoose => Some(StrengthRange {
1035            lower_bound: 0.0,
1036            upper_bound: 4.0,
1037            typical_value: 2.0,
1038        }),
1039        Density::Loose => Some(StrengthRange {
1040            lower_bound: 4.0,
1041            upper_bound: 10.0,
1042            typical_value: 7.0,
1043        }),
1044        Density::MediumDense => Some(StrengthRange {
1045            lower_bound: 10.0,
1046            upper_bound: 30.0,
1047            typical_value: 20.0,
1048        }),
1049        Density::Dense => Some(StrengthRange {
1050            lower_bound: 30.0,
1051            upper_bound: 50.0,
1052            typical_value: 40.0,
1053        }),
1054        Density::VeryDense => Some(StrengthRange {
1055            lower_bound: 50.0,
1056            upper_bound: 100.0,
1057            typical_value: 75.0,
1058        }),
1059    }
1060}
1061
1062fn rock_strength_to_ucs_range(strength: &str) -> Option<StrengthRange> {
1063    match strength {
1064        "very-weak" => Some(StrengthRange {
1065            lower_bound: 0.25,
1066            upper_bound: 1.0,
1067            typical_value: 0.6,
1068        }),
1069        "weak" => Some(StrengthRange {
1070            lower_bound: 1.0,
1071            upper_bound: 5.0,
1072            typical_value: 2.5,
1073        }),
1074        "moderately-weak" => Some(StrengthRange {
1075            lower_bound: 5.0,
1076            upper_bound: 12.5,
1077            typical_value: 8.0,
1078        }),
1079        "moderately-strong" => Some(StrengthRange {
1080            lower_bound: 12.5,
1081            upper_bound: 50.0,
1082            typical_value: 25.0,
1083        }),
1084        "moderately" => Some(StrengthRange {
1085            lower_bound: 12.5,
1086            upper_bound: 50.0,
1087            typical_value: 25.0,
1088        }),
1089        "strong" => Some(StrengthRange {
1090            lower_bound: 50.0,
1091            upper_bound: 100.0,
1092            typical_value: 75.0,
1093        }),
1094        "very-strong" => Some(StrengthRange {
1095            lower_bound: 100.0,
1096            upper_bound: 200.0,
1097            typical_value: 150.0,
1098        }),
1099        "extremely-strong" => Some(StrengthRange {
1100            lower_bound: 200.0,
1101            upper_bound: 500.0,
1102            typical_value: 300.0,
1103        }),
1104        _ => None,
1105    }
1106}
1107
1108#[derive(Debug, Clone, Serialize, Deserialize)]
1109pub struct ValidationError {
1110    pub code: String,
1111    pub message: String,
1112    pub field: Option<String>,
1113}
1114
1115#[derive(Debug, Clone, Serialize, Deserialize)]
1116pub struct ValidationResult {
1117    pub is_valid: bool,
1118    pub errors: Vec<ValidationError>,
1119    pub warnings: Vec<String>,
1120}
1121
1122#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
1123pub struct ValidationOptions {
1124    pub strict: bool,
1125    pub require_strength_params: bool,
1126    pub require_primary_type: bool,
1127    pub check_correlations: bool,
1128}
1129
1130impl Default for ValidationOptions {
1131    fn default() -> Self {
1132        ValidationOptions {
1133            strict: false,
1134            require_strength_params: false,
1135            require_primary_type: false,
1136            check_correlations: true,
1137        }
1138    }
1139}
1140
1141#[derive(Debug, Clone, Copy)]
1142struct RangeMinMax {
1143    min: f64,
1144    max: f64,
1145}
1146
1147fn consistency_cu_ranges() -> HashMap<Consistency, RangeMinMax> {
1148    HashMap::from([
1149        (
1150            Consistency::VerySoft,
1151            RangeMinMax {
1152                min: 0.0,
1153                max: 12.0,
1154            },
1155        ),
1156        (
1157            Consistency::Soft,
1158            RangeMinMax {
1159                min: 12.0,
1160                max: 25.0,
1161            },
1162        ),
1163        (
1164            Consistency::SoftToFirm,
1165            RangeMinMax {
1166                min: 20.0,
1167                max: 40.0,
1168            },
1169        ),
1170        (
1171            Consistency::Firm,
1172            RangeMinMax {
1173                min: 25.0,
1174                max: 50.0,
1175            },
1176        ),
1177        (
1178            Consistency::FirmToStiff,
1179            RangeMinMax {
1180                min: 40.0,
1181                max: 75.0,
1182            },
1183        ),
1184        (
1185            Consistency::Stiff,
1186            RangeMinMax {
1187                min: 50.0,
1188                max: 100.0,
1189            },
1190        ),
1191        (
1192            Consistency::StiffToVeryStiff,
1193            RangeMinMax {
1194                min: 75.0,
1195                max: 150.0,
1196            },
1197        ),
1198        (
1199            Consistency::VeryStiff,
1200            RangeMinMax {
1201                min: 100.0,
1202                max: 200.0,
1203            },
1204        ),
1205        (
1206            Consistency::Hard,
1207            RangeMinMax {
1208                min: 200.0,
1209                max: 500.0,
1210            },
1211        ),
1212    ])
1213}
1214
1215fn density_spt_ranges() -> HashMap<Density, RangeMinMax> {
1216    HashMap::from([
1217        (Density::VeryLoose, RangeMinMax { min: 0.0, max: 4.0 }),
1218        (
1219            Density::Loose,
1220            RangeMinMax {
1221                min: 4.0,
1222                max: 10.0,
1223            },
1224        ),
1225        (
1226            Density::MediumDense,
1227            RangeMinMax {
1228                min: 10.0,
1229                max: 30.0,
1230            },
1231        ),
1232        (
1233            Density::Dense,
1234            RangeMinMax {
1235                min: 30.0,
1236                max: 50.0,
1237            },
1238        ),
1239        (
1240            Density::VeryDense,
1241            RangeMinMax {
1242                min: 50.0,
1243                max: 100.0,
1244            },
1245        ),
1246    ])
1247}
1248
1249fn rock_strength_ucs_ranges() -> HashMap<&'static str, RangeMinMax> {
1250    HashMap::from([
1251        (
1252            "extremely-weak",
1253            RangeMinMax {
1254                min: 0.25,
1255                max: 1.0,
1256            },
1257        ),
1258        ("very-weak", RangeMinMax { min: 1.0, max: 5.0 }),
1259        (
1260            "weak",
1261            RangeMinMax {
1262                min: 5.0,
1263                max: 25.0,
1264            },
1265        ),
1266        (
1267            "medium-strong",
1268            RangeMinMax {
1269                min: 25.0,
1270                max: 50.0,
1271            },
1272        ),
1273        (
1274            "strong",
1275            RangeMinMax {
1276                min: 50.0,
1277                max: 100.0,
1278            },
1279        ),
1280        (
1281            "very-strong",
1282            RangeMinMax {
1283                min: 100.0,
1284                max: 250.0,
1285            },
1286        ),
1287        (
1288            "extremely-strong",
1289            RangeMinMax {
1290                min: 250.0,
1291                max: 500.0,
1292            },
1293        ),
1294    ])
1295}
1296
1297fn validate_strength_parameters(
1298    params: &StrengthParameters,
1299    index: usize,
1300) -> (Vec<ValidationError>, Vec<String>) {
1301    let mut errors = Vec::new();
1302    let warnings = Vec::new();
1303    let field_prefix = format!("strength_parameters[{}]", index);
1304
1305    if params.value_range.lower_bound > params.value_range.upper_bound {
1306        errors.push(ValidationError {
1307            code: "INVALID_STRENGTH_RANGE".to_string(),
1308            message: format!(
1309                "Strength parameter {}: lower_bound exceeds upper_bound",
1310                index
1311            ),
1312            field: Some(format!("{}.value_range", field_prefix)),
1313        });
1314    }
1315
1316    let StrengthRange {
1317        lower_bound,
1318        upper_bound,
1319        typical_value,
1320    } = params.value_range;
1321    if typical_value < lower_bound || typical_value > upper_bound {
1322        errors.push(ValidationError {
1323            code: "TYPICAL_VALUE_OUT_OF_RANGE".to_string(),
1324            message: format!(
1325                "Strength parameter {}: typical_value not within bounds",
1326                index
1327            ),
1328            field: Some(format!("{}.value_range.typical_value", field_prefix)),
1329        });
1330    }
1331
1332    if params.confidence < 0.0 || params.confidence > 1.0 {
1333        errors.push(ValidationError {
1334            code: "INVALID_STRENGTH_CONFIDENCE".to_string(),
1335            message: format!("Strength parameter {}: confidence must be 0-1", index),
1336            field: Some(format!("{}.confidence", field_prefix)),
1337        });
1338    }
1339
1340    (errors, warnings)
1341}
1342
1343fn check_correlations(desc: &SoilDescription) -> (Vec<ValidationError>, Vec<String>) {
1344    let errors = Vec::new();
1345    let mut warnings = Vec::new();
1346
1347    for param in &desc.strength_parameters {
1348        match param.parameter_type {
1349            StrengthParameterType::UndrainedShear => {
1350                if let Some(consistency) = desc.consistency {
1351                    if let Some(range) = consistency_cu_ranges().get(&consistency) {
1352                        let typical = param.value_range.typical_value;
1353                        if typical < range.min {
1354                            warnings.push(format!(
1355                                "Cu value ({:.1} kPa) seems low for {:?} consistency (expected {:.0}-{:.0} kPa)",
1356                                typical, consistency, range.min, range.max
1357                            ));
1358                        } else if typical > range.max {
1359                            warnings.push(format!(
1360                                "Cu value ({:.1} kPa) seems high for {:?} consistency (expected {:.0}-{:.0} kPa)",
1361                                typical, consistency, range.min, range.max
1362                            ));
1363                        }
1364                    }
1365                }
1366            }
1367            StrengthParameterType::SptNValue => {
1368                if let Some(density) = desc.density {
1369                    if let Some(range) = density_spt_ranges().get(&density) {
1370                        let typical = param.value_range.typical_value;
1371                        if typical < range.min {
1372                            warnings.push(format!(
1373                                "SPT N-value ({:.1}) seems low for {:?} density (expected {:.0}-{:.0})",
1374                                typical, density, range.min, range.max
1375                            ));
1376                        } else if typical > range.max {
1377                            warnings.push(format!(
1378                                "SPT N-value ({:.1}) seems high for {:?} density (expected {:.0}-{:.0})",
1379                                typical, density, range.min, range.max
1380                            ));
1381                        }
1382                    }
1383                }
1384            }
1385            StrengthParameterType::Ucs => {
1386                if let Some(strength) = &desc.rock_strength {
1387                    if let Some(range) = rock_strength_ucs_ranges().get(strength.as_str()) {
1388                        let typical = param.value_range.typical_value;
1389                        if typical < range.min {
1390                            warnings.push(format!(
1391                                "UCS value ({:.1} MPa) seems low for {} rock (expected {:.1}-{:.1} MPa)",
1392                                typical, strength, range.min, range.max
1393                            ));
1394                        } else if typical > range.max {
1395                            warnings.push(format!(
1396                                "UCS value ({:.1} MPa) seems high for {} rock (expected {:.1}-{:.1} MPa)",
1397                                typical, strength, range.min, range.max
1398                            ));
1399                        }
1400                    }
1401                }
1402            }
1403            _ => {}
1404        }
1405    }
1406
1407    (errors, warnings)
1408}
1409
1410fn check_unusual_combinations(desc: &SoilDescription) -> Vec<String> {
1411    let mut warnings = Vec::new();
1412
1413    if let Some(soil) = desc.primary_soil_type {
1414        if is_granular(soil) {
1415            if let Some(plasticity) = desc.plasticity {
1416                if matches!(
1417                    plasticity,
1418                    Plasticity::HighPlasticity | Plasticity::ExtremelyHighPlasticity
1419                ) {
1420                    warnings.push(format!(
1421                        "Unusual combination: high plasticity in granular soil ({:?})",
1422                        soil
1423                    ));
1424                }
1425            }
1426        }
1427    }
1428
1429    if let Some(weathering) = &desc.weathering_grade {
1430        if weathering == "fresh" {
1431            if let Some(strength) = &desc.rock_strength {
1432                if ["extremely-weak", "very-weak", "weak"].contains(&strength.as_str()) {
1433                    warnings.push(format!(
1434                        "Unusual combination: fresh rock with {} strength",
1435                        strength
1436                    ));
1437                }
1438            }
1439        }
1440        if weathering == "completely-weathered" {
1441            if let Some(strength) = &desc.rock_strength {
1442                if ["strong", "very-strong", "extremely-strong"].contains(&strength.as_str()) {
1443                    warnings.push(format!(
1444                        "Unusual combination: completely weathered rock with {} strength",
1445                        strength
1446                    ));
1447                }
1448            }
1449        }
1450    }
1451
1452    warnings
1453}
1454
1455pub fn validate_soil_description(
1456    desc: &SoilDescription,
1457    opts: ValidationOptions,
1458) -> ValidationResult {
1459    let mut errors = Vec::new();
1460    let mut warnings = Vec::new();
1461
1462    if desc.material_type == Some(MaterialType::Soil) {
1463        if let Some(soil) = desc.primary_soil_type {
1464            if is_cohesive(soil) {
1465                if desc.density.is_some() {
1466                    errors.push(ValidationError {
1467                        code: "COHESIVE_WITH_DENSITY".to_string(),
1468                        message: format!("Cohesive soil ({:?}) should not have density", soil),
1469                        field: Some("density".to_string()),
1470                    });
1471                }
1472                if desc.consistency.is_none() {
1473                    warnings.push(format!(
1474                        "Cohesive soil ({:?}) should have consistency",
1475                        soil
1476                    ));
1477                }
1478            }
1479            if is_granular(soil) {
1480                if desc.consistency.is_some() {
1481                    errors.push(ValidationError {
1482                        code: "GRANULAR_WITH_CONSISTENCY".to_string(),
1483                        message: format!("Granular soil ({:?}) should not have consistency", soil),
1484                        field: Some("consistency".to_string()),
1485                    });
1486                }
1487                if desc.density.is_none() {
1488                    warnings.push(format!("Granular soil ({:?}) should have density", soil));
1489                }
1490            }
1491        }
1492    }
1493
1494    if desc.material_type == Some(MaterialType::Rock) {
1495        if desc.consistency.is_some() {
1496            errors.push(ValidationError {
1497                code: "ROCK_WITH_CONSISTENCY".to_string(),
1498                message: "Rock should not have consistency".to_string(),
1499                field: Some("consistency".to_string()),
1500            });
1501        }
1502        if desc.density.is_some() {
1503            errors.push(ValidationError {
1504                code: "ROCK_WITH_DENSITY".to_string(),
1505                message: "Rock should not have density".to_string(),
1506                field: Some("density".to_string()),
1507            });
1508        }
1509        if desc.primary_soil_type.is_some() {
1510            errors.push(ValidationError {
1511                code: "ROCK_WITH_SOIL_TYPE".to_string(),
1512                message: "Rock should not have soil type".to_string(),
1513                field: Some("primary_soil_type".to_string()),
1514            });
1515        }
1516    }
1517
1518    if desc.material_type == Some(MaterialType::Soil) {
1519        if desc.rock_strength.is_some() {
1520            errors.push(ValidationError {
1521                code: "SOIL_WITH_ROCK_STRENGTH".to_string(),
1522                message: "Soil should not have rock strength".to_string(),
1523                field: Some("rock_strength".to_string()),
1524            });
1525        }
1526        if desc.primary_rock_type.is_some() {
1527            errors.push(ValidationError {
1528                code: "SOIL_WITH_ROCK_TYPE".to_string(),
1529                message: "Soil should not have rock type".to_string(),
1530                field: Some("primary_rock_type".to_string()),
1531            });
1532        }
1533        if desc.weathering_grade.is_some() {
1534            errors.push(ValidationError {
1535                code: "SOIL_WITH_WEATHERING".to_string(),
1536                message: "Soil should not have weathering grade".to_string(),
1537                field: Some("weathering_grade".to_string()),
1538            });
1539        }
1540        if desc.rock_structure.is_some() {
1541            errors.push(ValidationError {
1542                code: "SOIL_WITH_ROCK_STRUCTURE".to_string(),
1543                message: "Soil should not have rock structure".to_string(),
1544                field: Some("rock_structure".to_string()),
1545            });
1546        }
1547    }
1548
1549    if opts.require_primary_type {
1550        match desc.material_type {
1551            Some(MaterialType::Soil) if desc.primary_soil_type.is_none() => {
1552                errors.push(ValidationError {
1553                    code: "MISSING_SOIL_TYPE".to_string(),
1554                    message: "Soil description missing primary soil type".to_string(),
1555                    field: Some("primary_soil_type".to_string()),
1556                })
1557            }
1558            Some(MaterialType::Rock) if desc.primary_rock_type.is_none() => {
1559                errors.push(ValidationError {
1560                    code: "MISSING_ROCK_TYPE".to_string(),
1561                    message: "Rock description missing primary rock type".to_string(),
1562                    field: Some("primary_rock_type".to_string()),
1563                })
1564            }
1565            _ => {}
1566        }
1567    }
1568
1569    if opts.require_strength_params {
1570        if desc.strength_parameters.is_empty() {
1571            let can_derive = desc.consistency.is_some()
1572                || desc.density.is_some()
1573                || desc.rock_strength.is_some();
1574            if can_derive {
1575                warnings.push(
1576                    "Strength parameters could be derived from description but are missing"
1577                        .to_string(),
1578                );
1579            }
1580        }
1581    }
1582
1583    if desc.confidence < 0.0 || desc.confidence > 1.0 {
1584        errors.push(ValidationError {
1585            code: "INVALID_CONFIDENCE".to_string(),
1586            message: "Confidence must be between 0 and 1".to_string(),
1587            field: Some("confidence".to_string()),
1588        });
1589    }
1590
1591    for (i, param) in desc.strength_parameters.iter().enumerate() {
1592        let (mut e, mut w) = validate_strength_parameters(param, i);
1593        errors.append(&mut e);
1594        warnings.append(&mut w);
1595    }
1596
1597    if opts.check_correlations {
1598        let (mut e, mut w) = check_correlations(desc);
1599        errors.append(&mut e);
1600        warnings.append(&mut w);
1601    }
1602
1603    warnings.extend(check_unusual_combinations(desc));
1604
1605    if opts.strict {
1606        let mut strict_errors = Vec::new();
1607        for warning in warnings {
1608            strict_errors.push(ValidationError {
1609                code: "STRICT_WARNING".to_string(),
1610                message: warning,
1611                field: None,
1612            });
1613        }
1614        return ValidationResult {
1615            is_valid: strict_errors.is_empty() && errors.is_empty(),
1616            errors: [errors, strict_errors].concat(),
1617            warnings: Vec::new(),
1618        };
1619    }
1620
1621    ValidationResult {
1622        is_valid: errors.is_empty(),
1623        errors,
1624        warnings,
1625    }
1626}
1627
1628pub fn is_valid_description(desc: &SoilDescription, opts: ValidationOptions) -> bool {
1629    validate_soil_description(desc, opts).is_valid
1630}
1631
1632pub fn generate_description(desc: &SoilDescription) -> String {
1633    let mut parts: Vec<String> = Vec::new();
1634
1635    if desc.material_type == Some(MaterialType::Rock) {
1636        if let Some(rs) = &desc.rock_strength {
1637            parts.push(rs.replace('-', " "));
1638        }
1639        if let Some(w) = &desc.weathering_grade {
1640            parts.push(w.replace('-', " "));
1641        }
1642        if let Some(s) = &desc.rock_structure {
1643            parts.push(s.to_string());
1644        }
1645        if let Some(c) = &desc.color {
1646            parts.push(c.to_string());
1647        }
1648        if let Some(rt) = &desc.primary_rock_type {
1649            parts.push(format!("{:?}", rt).to_uppercase());
1650        }
1651    } else {
1652        if let Some(c) = &desc.consistency {
1653            parts.push(consistency_to_display(*c));
1654        }
1655        if let Some(d) = &desc.density {
1656            parts.push(format!("{:?}", d).to_lowercase().replace('_', " "));
1657        }
1658        if let Some(c) = &desc.color {
1659            parts.push(c.to_string());
1660        }
1661        for sec in &desc.secondary_constituents {
1662            if let Some(amount) = &sec.amount {
1663                parts.push(format!("{} {}", amount, sec.soil_type));
1664            } else {
1665                parts.push(sec.soil_type.to_string());
1666            }
1667        }
1668        if let Some(m) = &desc.moisture_content {
1669            parts.push(format!("{:?}", m).to_lowercase());
1670        }
1671        if let Some(p) = &desc.plasticity {
1672            parts.push(format!("{:?}", p));
1673        }
1674        if let Some(p) = &desc.particle_size {
1675            parts.push(p.to_string());
1676        }
1677        if let Some(st) = &desc.primary_soil_type {
1678            parts.push(format!("{:?}", st).to_uppercase());
1679        }
1680    }
1681
1682    parts.join(" ")
1683}