furtif_core/structs/structures/lattice/
taxonomy.rs

1// This program is free software: you can redistribute it and/or modify
2// it under the terms of the Lesser GNU General Public License as published
3// by the Free Software Foundation, either version 3 of the License, or
4// (at your option) any later version.
5
6// This program is distributed in the hope that it will be useful,
7// but WITHOUT ANY WARRANTY; without even the implied warranty of
8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
9// Lesser GNU General Public License for more details.
10
11// You should have received a copy of the Lesser GNU General Public License
12// along with this program.  If not, see <https://www.gnu.org/licenses/>.
13
14// Copyright 2024 Frederic Dambreville, Jean Dezert Developers.
15
16
17use core::fmt::Debug;
18use std::{ 
19    collections::{ HashSet, HashMap, BTreeMap, hash_map, BTreeSet, },
20    vec, iter::once,
21};
22use hashed_type_def::{HashedTypeDef, add_hash_fnv1a};
23#[cfg(feature = "serde")] use serde::{ Serialize as SerdeSerialize, Deserialize as SerdeDeserialize, };
24#[cfg(feature = "rkyv")] use rkyv::{ Archive, Serialize as RkyvSerialize, Deserialize as RkyvDeserialize, };
25use rand::prelude::*;
26use rand_distr::{ Open01, Poisson, Distribution, };
27// #[cfg(feature = "silx-types")] use silx_types::{ u128slx, IntoSlx, SlxInto, f64slx, };
28// #[cfg(not(feature = "silx-types"))] use crate::fake_slx::{f64slx, u128slx,FakeSlx};
29
30use crate::{
31    types::{ u128slx, f64slx, SlxInto, IntoSlx, },
32    traits::{Lattice, IterableLattice, LatticeWithLeaves}, 
33    structs::SafeElement
34};
35
36#[derive(HashedTypeDef, Clone, Debug)]
37#[cfg_attr(feature = "rkyv", derive(Archive,RkyvSerialize,RkyvDeserialize))]
38/// Code table for a taxonomy (to be computed By Taxonomy contructor)
39pub struct TaxonCoder(HashMap<u128slx,u128slx>);
40
41impl TaxonCoder { // worst case for taxonomy depth depends on the number of subtaxon by taxon.
42    // maximum depth is 3 (including top and bottom) when the maximum of taxons is directly put under top 
43    //      (2^121+2 taxons, including top and bot and 2^121 subtaxons of top)
44    // maximum depth is 129 (including top and bottom) for a binary taxonomy (or for subtaxonomy of a binary one)
45    // with a maximum of  2^122 + 1 taxons (including top and bottom)
46    // If the number of subtaxons by taxon is limited to 8, then the maximal taxonomy depth should be 42 (2^3=8 and 121/3=40)
47
48    fn new(map: HashMap<u128slx,u128slx>) -> TaxonCoder { TaxonCoder(map) }
49
50    #[inline(always)]
51    fn min(a:u8,b:u8,c:u8) -> u8 {
52        if a < b {
53            if c < a { c }
54            else { a }
55        } else {
56            if c < b { c }
57            else { b }
58        }
59    }
60    #[inline(always)]
61    fn to_u8u128(w: u128) -> (u8, u128) { (((w>>121) as u8), (w & 0x01FFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFFu128)) }
62    #[inline(always)]
63    fn to_u128((h,l): (u8, u128)) -> u128 { ((h as u128) << 121) | l }
64    #[inline(always)]
65    fn to_u128slx((h,l): (u8, u128slx)) -> u128slx {  Self::to_u128((h,l.unslx())).slx() }
66    #[inline(always)]
67    pub fn above_top_u128() -> u128 { 0x02000000_00000000_00000000_00000000u128 }
68    #[inline(always)]
69    pub fn above_top_u128slx() -> u128slx { Self::above_top_u128().slx() }
70    #[inline(always)]
71    pub fn top_u128() -> u128 { 0x01FFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFFu128 }
72    #[inline(always)]
73    pub fn top_u128slx() -> u128slx { Self::top_u128().slx() }
74    #[inline(always)]
75    pub fn bottom_u128() -> u128 { 0x7Fu128 << 121 }
76    #[inline(always)]
77    pub fn bottom_u128slx() -> u128slx { Self::bottom_u128().slx() }
78    #[inline(always)]
79    fn bottom_u8u128() -> (u8, u128) { (0x7Fu8,0x0u128) }
80    #[inline(always)]
81    fn meet_u8u128(a: (u8, u128), b: (u8, u128)) -> (u8, u128) {
82        match (a,b) {
83            ((ah,al), (bh,bl)) if (ah < bh) && (al == bl | (Self::top_u128() >> ah)) => (bh,bl),
84            ((ah,al), (bh,bl)) if (ah > bh) && (bl == al | (Self::top_u128() >> bh)) => (ah,al),
85            (a, b) if a == b => a,
86            _ => Self::bottom_u8u128(),
87        } 
88    }
89    #[inline(always)]
90    fn join_u8u128(a: (u8, u128), b: (u8, u128)) -> (u8, u128) {
91        match (a,b) {
92            ((0x7Fu8,0x0u128),a) => a,
93            (a,(0x7Fu8,0x0u128)) => a,
94            ((ah,al), (bh,bl)) => {
95                let depth_join = Self::min((((al^bl).leading_zeros() as i8) - 7i8) as u8, ah, bh);
96                (depth_join, al | Self::top_u128()>>depth_join)        
97            }
98        }
99    }
100
101    /// Meet operator for this taxon coder for native codes
102    /// * `a: u128` : left operand
103    /// * `b: u128` : right operand
104    /// * Output: meet result
105    #[inline(always)]
106    pub fn meet(&self, a: u128, b: u128) -> u128 { 
107        Self::to_u128(Self::meet_u8u128(Self::to_u8u128(a), Self::to_u8u128(b))) 
108    }
109    /// Join operator for this taxon coder for native codes
110    /// * `a: u128` : left operand
111    /// * `b: u128` : right operand
112    /// * Output: join result
113    #[inline(always)]
114    pub fn join(&self, a: u128, b: u128) -> u128 {
115        let idx = Self::to_u128(Self::join_u8u128(Self::to_u8u128(a), Self::to_u8u128(b))).slx();
116        match self.0.get(&idx) { Some(&v) => v, _ => idx, }.unslx()
117    }
118
119    /// Meet operator for this taxon coder for slx codes
120    /// * `a: u128slx` : left operand
121    /// * `b: u128slx` : right operand
122    /// * Output: meet result
123    #[inline(always)]
124    pub fn meet_slx(&self, a: u128slx, b: u128slx) -> u128slx { self.meet(a.unslx(), b.unslx()).slx() }
125    /// Join operator for this taxon coder for slx codes
126    /// * `a: u128slx` : left operand
127    /// * `b: u128slx` : right operand
128    /// * Output: join result
129    #[inline(always)]
130    pub fn join_slx(&self, a: u128slx, b: u128slx) -> u128slx {
131        let idx = Self::to_u128(Self::join_u8u128(
132            Self::to_u8u128(a.unslx()), Self::to_u8u128(b.unslx())
133        )).slx();
134        match self.0.get(&idx) { Some(&v) => v, _ => idx, }
135    }
136    
137}
138
139/// Some useful functions for intern computation
140trait RandomTaxon {
141    const TAXO_NAMES: [&'static str;26]= [
142        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
143        "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
144    ];
145    #[inline] fn bot_str() -> &'static str { "\u{22A5}" } 
146    #[inline] fn bot_string() -> String { Self::bot_str().to_string() } 
147    #[inline] fn max_leaf() -> usize { Self::TAXO_NAMES.len() } 
148    #[inline] fn rand_letter<R: Rng>(rng: &mut R) -> &'static str { Self::TAXO_NAMES[rng.gen_range(0..26)] }
149    #[inline] fn append(a: &str, b: &str) -> String { format!("{a}{b}") }
150    #[inline] fn prefix(a: &str, b: &str) -> String {
151        let z = Self::bot_str();
152        if a == z { b.to_string() } else { 
153            if b == z { a.to_string() } else { 
154                a.chars().zip(b.chars()).take_while(|(x, y)| x == y).map(|(x,_)|x).collect() 
155            } 
156        }
157    }
158    #[inline] fn meet_string(a:&str, b:&str) -> String {
159        let common = a.chars().zip(b.chars()).take_while(|(x, y)| x == y).count();
160        if common == a.len() { b.to_string() } else { 
161            if common == b.len() { a.to_string() } else { Self::bot_string() } 
162        }
163    }
164} 
165impl RandomTaxon for () { }
166
167// Rkyv serialization is not possible here (easily) since the type is recursive
168#[derive(Debug,Clone,PartialEq, PartialOrd,)]
169#[cfg_attr(feature = "serde", derive(SerdeSerialize, SerdeDeserialize))]
170/// Taxonomy builder: definition of a taxonomy by means of main taxon and sorted children
171/// * This builder is not suitable for Lattice implementation, and Taxonomy should be inited from it
172pub enum TaxonomyBuilder { // a Taxon is characterized by its UNIQUE name
173    /// Node case
174    Node {
175        /// Name of the taxon of this node
176        name: String, 
177        /// List of children of the taxon of this node
178        children: BTreeSet<TaxonomyBuilder>,    
179    },
180    /// Leaf case
181    Leaf {
182        /// Name of the taxon of this Leaf
183        name: String,
184        /// Weight of the Leaf
185        /// * This weight is used typically for pignistic probability computation
186        weight: f64,
187    }
188}
189
190impl Eq for TaxonomyBuilder {}
191
192impl Ord for TaxonomyBuilder {
193    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
194        match self {
195            TaxonomyBuilder::Leaf { name, .. } | TaxonomyBuilder::Node { name, .. } => {
196                name.cmp(match other {
197                    TaxonomyBuilder::Leaf { name, .. } | TaxonomyBuilder::Node { name, .. } => name,
198                })
199            }
200        }
201    }
202}
203
204#[derive(HashedTypeDef, Clone, Debug, PartialEq,)]
205#[cfg_attr(feature = "rkyv", derive(Archive,RkyvSerialize,RkyvDeserialize))]
206/// Taxon code-based definition
207/// * For this type, children are mapped by means of an extern code table
208/// * For this reason, `Taxon` should be used within type `Taxons`
209pub enum Taxon { // a Taxon is characterized by its UNIQUE name
210    /// Node case
211    Node {
212        /// Name of the taxon of this Node
213        name: String, 
214        /// List of children of the taxon of this node
215        children: Vec<u128slx>,    
216    },
217    Leaf {
218        /// Name of the taxon of this Leaf
219        name: String,
220        /// Weight of the Leaf
221        /// * This weight is used typically for pignistic probability computation
222        weight: f64slx,
223    }
224}
225
226#[derive(HashedTypeDef, Clone, Debug, PartialEq,)]
227#[cfg_attr(feature = "rkyv", derive(Archive,RkyvSerialize,RkyvDeserialize))]
228/// Taxonomy described by a list of taxa and their codes
229pub struct Taxons { // a Taxon is characterized by its UNIQUE name
230    /// Code table for the taxa
231    taxons: HashMap<u128slx,Taxon>,
232    /// Code of root taxon
233    root: u128slx,
234}
235
236#[derive(Clone, Debug, PartialEq,)]
237/// taxon with sorted children: for internal computation
238enum TaxonOrd<D> {
239    Node {
240        index: u128,
241        name: D, 
242        children: BTreeMap<(u8,u128),TaxonOrd<D>>,     
243    },
244    Leaf {
245        index: u128,
246        name: D,
247        weight: f64,
248    }
249}
250
251
252#[derive(HashedTypeDef, Clone, Debug,)]
253#[cfg_attr(feature = "rkyv", derive(Archive,RkyvSerialize,RkyvDeserialize))]
254/// Structure of a taxonomy lattice
255pub struct Taxonomy {
256    taxons: Taxons,
257    top: SafeElement<u128slx>,
258    bottom: SafeElement<u128slx>,
259    coder: TaxonCoder,
260    tags: HashMap<u128slx,String,>,
261    untags: HashMap<String,u128slx,>,
262    leaves: Vec<u128slx>,
263    weighted_leaves: HashMap<u128slx,f64slx>,
264    top_to_bottom: Vec<u128slx>,
265}
266
267// implementation of Serde serialization
268#[cfg(feature = "serde")] mod serding {
269    use super::{ 
270        Taxonomy as SerdingTaxonomy, SerdeSerialize, SerdeDeserialize, TaxonomyBuilder,
271    };
272    impl<'de> SerdeDeserialize<'de> for SerdingTaxonomy {
273        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
274        where D: serde::Deserializer<'de> {
275            let taxons_builder = TaxonomyBuilder::deserialize(deserializer)?;
276            match Self::new(&taxons_builder) {
277                Ok(taxonomy) => Ok(taxonomy),
278                Err{..} => { // build empty taxonomy
279                    Ok(Self::empty())
280                },
281            }
282        }
283    }
284    impl SerdeSerialize for SerdingTaxonomy {
285        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
286        where S: serde::Serializer {
287            let taxon_builder = self.into_taxon_builder();
288            taxon_builder.serialize(serializer)
289        }
290    }
291}
292
293impl TaxonomyBuilder {
294    /// Create a new node of taxonomy builder
295    /// * `name: String` : name of the taxon node
296    /// * `children: I` : children of the taxon node
297    /// * `I` : type of collection
298    /// * Output: a taxonomy builder node
299    pub fn new_node<I>(name: String, children: I,) -> Result<Self,String> 
300                                where I: IntoIterator<Item = TaxonomyBuilder> {
301        let children = children.into_iter().collect::<BTreeSet<_>>(); 
302        if children.is_empty() {
303            Err("Node needs at least one child".to_string())
304        } else {
305            Ok(Self::Node { name, children, }) 
306        }
307    }
308
309    /// Create a new leaf of taxonomy builder
310    /// * `name: String` : name of the taxon leaf
311    /// * `weight: f64` : weight of the taxon leaf
312    /// * Output: a taxonomy builder leaf
313    pub fn new_leaf(name: String, weight: f64,) -> Self {
314        Self::Leaf { name, weight }
315    }
316
317    /// Internal use: compute into TaxonOrd
318    fn inner_into_taxonord<D, F: Fn(&str) -> D + Copy>(&self, next_index: &mut u128, f: F) -> TaxonOrd<D> {
319        let index = *next_index;
320        *next_index += 1;
321        match self {
322            TaxonomyBuilder::Node { name, children } => {
323                TaxonOrd::Node { name: f(name), 
324                    children: children.into_iter().map(|r| {
325                        let index = *next_index;
326                        ((0,index),Self::inner_into_taxonord(r, next_index, f))
327                    } ).collect(),
328                    index,
329                }
330            },
331            TaxonomyBuilder::Leaf { name, weight } => {
332                TaxonOrd::Leaf { name: f(name), weight: *weight, index }
333            },
334        }
335    }
336    
337    /// Internal use: produce TaxonOrd from TaxonomyBuilder together with the next free index 
338    fn into_taxonord<D, F: Fn(&str) -> D + Copy>(&self, f: F) -> (TaxonOrd<D>,u128) {
339        let mut next_index = 0u128;
340        (self.inner_into_taxonord(&mut next_index, f),next_index)
341    }
342    
343    /// Internal use: compute into code-to-taxon map
344    fn inner_into_taxons(&self, next_index: &mut u128slx, taxons: &mut HashMap<u128slx,Taxon>) -> u128slx {
345        let index = *next_index;
346        *next_index += 1u128.slx();
347        match self {
348            TaxonomyBuilder::Node { name, children } => {
349                let children = children.iter().map(|tb| {
350                    tb.inner_into_taxons(next_index,taxons)
351                }).collect();
352                taxons.insert(index, Taxon::Node { name: name.to_string(), children }).expect("unexpected error");
353            },
354            TaxonomyBuilder::Leaf { name, weight } => {
355                taxons.insert(index, Taxon::Leaf { name: name.to_string(), weight: (*weight).slx() });
356            },
357        }
358        index
359    }
360    
361    /// Convert taxonomy builder into taxons
362    /// * Output: taxons
363    pub fn into_taxons(&self) -> Taxons {
364        let mut next_index = 0u128.slx();
365        let root = next_index;
366        let mut taxons = HashMap::new();
367        self.inner_into_taxons(&mut next_index, &mut taxons);
368        Taxons { taxons, root, }
369    }
370    
371    /// Build random taxonomy builder
372    /// * `prefix: &str` : common prefix to the names of all taxa of the builder
373    /// * `rng: &mut R` : random number generator
374    /// * `rate: &Poisson<f32>` : law for sampling the umber of children
375    /// * `depth: i8` : maximal depth of the taxonomy
376    /// * `R: Rng` : type of random number generator
377    /// * Output: random taxonomy builder
378    pub fn rand_taxon_builder<R: Rng>(
379        prefix: &str, rng: &mut R, rate: &Poisson<f32>, depth: i8,
380    ) -> Self {
381        let nb_children = if depth > 0 { rate.sample(rng) as usize } else { 0usize };
382        if nb_children == 0 {
383            Self::Leaf { name: prefix.to_string(), weight: Open01.sample(rng), }
384        } else {
385            // using set prevents name repetition
386            let letters = (0..nb_children).map(|_| <()>::rand_letter(rng)).collect::<HashSet<_>>();
387            let children = letters.into_iter().map(|postfix| 
388                Self::rand_taxon_builder(&<()>::append(prefix,postfix),rng,rate, depth - 1)
389            ).collect();
390            Self::Node { name: prefix.to_string(), children, }
391        }
392    }        
393}
394
395impl Taxons {
396    /// Internal use: get back taxonomy builder from code-to-taxon map
397    fn inner_into_taxons_builder(taxons: &HashMap<u128slx,Taxon>, root: &u128slx) -> TaxonomyBuilder {
398        let taxon_root = &taxons[root];
399        match taxon_root {
400            Taxon::Node { name, children } => {
401                let children = children.iter().map(
402                    |idx| Self::inner_into_taxons_builder(taxons, idx)
403                ).collect();
404                TaxonomyBuilder::Node { name: name.to_string(), children, }
405            },
406            Taxon::Leaf { name, weight } => TaxonomyBuilder::Leaf { 
407                name: name.to_string(), weight: (*weight).unslx() 
408            },
409        }
410    }
411    /// Get back the taxonomy builder for these taxons
412    /// * Output: taxonomy builder
413    pub fn into_taxons_builder(&self) -> TaxonomyBuilder {
414        let Self { taxons, root } = self;
415        Self::inner_into_taxons_builder(taxons, root)
416    }
417}
418
419impl Taxonomy {
420    /// Internal use: taxonord is made into a binary tree by adding fake node (With None name)
421    /// the process runs by priorizing subtrees with small deep (that why reordering is necessary)
422    fn build_binary(rt: &mut TaxonOrd<(u8,Option<String>)>, next_index: &mut u128) -> bool {
423        match rt {
424            TaxonOrd::Node { children, .. } => { // compute on children
425                let mut new_children = BTreeMap::new();
426                std::mem::swap(children, &mut new_children);
427                // children upper indexes to be recomputed:
428                for((_,k), mut txn) in new_children {
429                    if Self::build_binary(&mut txn, next_index) { // Thus, the subtree is computed
430                        let h = match txn {
431                            TaxonOrd::Node { name: (h,_), .. } | TaxonOrd::Leaf { name: (h,_), .. } => h,
432                        }; 
433                        children.insert((h,k),txn); // <- and reordered with upper index 
434                    } else { return false; }
435                }
436            },
437            TaxonOrd::Leaf { .. } => (), // if no children, do nothing (leaf keep index 0)
438        }
439        match rt {
440            TaxonOrd::Node { name, children, .. } => {
441                while children.len() >= 3 {
442                    // take two childrens with least level
443                    let mut kys = children.keys().copied();
444                    let k1 = kys.next().unwrap();
445                    let k2 = kys.next().unwrap();
446                    let v1 = children.remove(&k1).unwrap();
447                    let v2 = children.remove(&k2).unwrap();
448                    // get the levels
449                    let h1 = match v1 {
450                        TaxonOrd::Node { name: (h,_), .. } | TaxonOrd::Leaf { name: (h,_), .. } => h,
451                    };
452                    let h2 = match v2 {
453                        TaxonOrd::Node { name: (h,_), .. } | TaxonOrd::Leaf { name: (h,_), .. } => h,
454                    };
455                    let hf = if h1 > h2 { h1 + 1 } else { h2 + 1 }; // increment best level by 1
456                    if hf > 121u8 { return false; }; // result level cannot exceed u128 capacity
457                    // Now buil a fake uppeer node
458                    let index = *next_index; *next_index += 1; 
459                    let btx = TaxonOrd::Node { 
460                        index, name: (hf,None), 
461                        children: [(k1,v1),(k2,v2)].into_iter().collect() // key/value should be Ok
462                    }; 
463                    children.insert((hf,index),btx);
464                }
465                // update taxon name
466                let new_h = children.keys().map(|(h,_)| *h).max().expect(
467                    "unexpected error: children cannot be empty!"
468                ) + 1;
469                name.0 = new_h;
470            },
471            TaxonOrd::Leaf { .. } => (), // nothing changes for leaf
472        }
473        true
474    }
475    /// Internal use: build coder from binary tree
476    fn build_code(
477        level: u8, code: u128slx, up_full_code: u128slx, rt: TaxonOrd<(u8,Option<String>)>,
478        rcoder_0: &mut HashMap::<u128slx,u128slx>, pre_taxons: &mut BTreeMap<(u8,u128slx),Taxon>,
479        in_father: &mut Vec<u128slx>,
480    ) {
481        match rt {
482            TaxonOrd::Node { name: (_, Some(name)), mut children, .. } => {
483                let full_code = TaxonCoder::to_u128slx((level,code));
484                // taxon is not fake:
485                let up_full_code = full_code;
486                // complete coder
487                rcoder_0.insert(full_code,up_full_code);
488                // prepare pre_taxons completion
489                let pre_taxons_key = (level,full_code);
490                let mut pre_taxons_children = Vec::new();
491                // insert this in father's list
492                in_father.push(full_code);
493                // this node becomes new father
494                let in_father = &mut pre_taxons_children;
495                // get the children of current ord_taxon (should be binary at this time!)
496                let mut keys = children.keys().cloned();
497                let ok0 = keys.next(); let ok1 = keys.next();
498                // a new level for the children
499                let level = level+1;
500                match (ok0,ok1) {
501                    // two children
502                    (Some(k0), Some(k1)) => {
503                        let mask = (!(TaxonCoder::above_top_u128() >> level)).slx();
504                        Self::build_code(
505                            level,code & mask, up_full_code, children.remove(&k0).unwrap(), 
506                            rcoder_0, pre_taxons, in_father
507                        );
508                        Self::build_code(
509                            level,code, up_full_code, children.remove(&k1).unwrap(),
510                            rcoder_0, pre_taxons, in_father
511                        );
512                    },
513                    // one children
514                    (Some(k0), _) => {
515                        let mask = (!(TaxonCoder::above_top_u128() >> level)).slx();
516                        Self::build_code(
517                            level,code & mask, up_full_code, children.remove(&k0).unwrap(),
518                            rcoder_0, pre_taxons, in_father
519                        );
520                    },
521                    _ => panic!("unexpected error: node without children"),
522                };
523                // complete pre_taxons
524                pre_taxons.insert(pre_taxons_key, Taxon::Node { name, children: pre_taxons_children });
525            },
526            TaxonOrd::Node { name: (_, None), mut children, .. } => {
527                let full_code = TaxonCoder::to_u128slx((level,code));
528                // taxon is fake; up_full_code is unchanged
529                // complete coder
530                rcoder_0.insert(full_code,up_full_code);
531                // this is fake and is not inserted in father's list
532                // get the children of current ord_taxon (should be binary at this time!)
533                let mut keys = children.keys().cloned();
534                let ok0 = keys.next(); let ok1 = keys.next();
535                // a new level for the children
536                let level = level+1;
537                match (ok0,ok1) {
538                    // two children
539                    (Some(k0), Some(k1)) => {
540                        let mask = (!(TaxonCoder::above_top_u128() >> level)).slx();
541                        Self::build_code(
542                            level,code & mask, up_full_code, children.remove(&k0).unwrap(), 
543                            rcoder_0, pre_taxons, in_father
544                        );
545                        Self::build_code(
546                            level,code, up_full_code, children.remove(&k1).unwrap(),
547                            rcoder_0, pre_taxons, in_father
548                        );
549                    },
550                    // one children
551                    (Some(k0), _) => {
552                        let mask = (!(TaxonCoder::above_top_u128() >> level)).slx();
553                        Self::build_code(
554                            level,code & mask, up_full_code, children.remove(&k0).unwrap(),
555                            rcoder_0, pre_taxons, in_father
556                        );
557                    },
558                    _ => panic!("unexpected error: node without children"),
559                };
560                // pre_taxons is unchanged
561            },
562            TaxonOrd::Leaf { name: (_,Some(name)), weight, .. } => {
563                let full_code = TaxonCoder::to_u128slx((level,code));
564                // taxon is not fake:
565                let up_full_code = full_code;
566                // complete coder
567                rcoder_0.insert(full_code,up_full_code);
568                // insert this in father's list
569                in_father.push(full_code);
570                // complete pre_taxons
571                pre_taxons.insert((level,full_code), Taxon::Leaf { name, weight: weight.slx(), });
572            },
573            TaxonOrd::Leaf { name: (_,None), .. } => {
574                panic!("unexpected error: leaf with fake marker")
575            },
576        }
577    }
578
579    /// Internal use: compute code-to-taxon map to taxonomy builder
580    fn inner_into_taxon_builder(node: &u128slx, taxons: &HashMap<u128slx,Taxon>) -> TaxonomyBuilder {
581        match taxons.get(node) {
582            Some(Taxon::Node { name, children }) => {
583                let children = children.iter().map(
584                    |node| Self::inner_into_taxon_builder(node, taxons)
585                ).collect();
586                TaxonomyBuilder::Node { name: name.to_string(), children }
587            },
588            Some(Taxon::Leaf { name, weight }) => {
589                TaxonomyBuilder::Leaf { name: name.to_string(), weight: (*weight).unslx() }
590            },
591            None => panic!("unexpected error"),
592        }
593    }
594
595    /// Derive taxonomy builder from the taxonomy
596    /// * Output: taxonomy builder
597    pub fn into_taxon_builder(&self) -> TaxonomyBuilder {
598        let Taxons { taxons, root } = &self.taxons;
599        Self::inner_into_taxon_builder(root, taxons)
600    }
601
602    /// Build Taxonomy from taxonomy builder
603    /// * `root: &TaxonomyBuilder` : taxonomy builder of the root taxon
604    /// * Output: a taxonomy or an error
605    pub fn new(root: &TaxonomyBuilder) -> Result<Taxonomy,String> {
606        // construction d'une taxonomie de calcul en vue de construire la structure de donnée
607        // (u8,u128) sera le code du Taxon
608        // bool indiquera un taxon actif (true) ou instrumental (embranchement pour une taxonomie non binaire)
609        // u128 indique le poids de la taxonomie dont le taxon est racine
610        let (
611            mut root_comp, mut next_index
612        ) = root.into_taxonord(|t|(0,Some(t.to_string())));
613        if Self::build_binary(&mut root_comp, &mut next_index) {
614            let bottom = TaxonCoder::bottom_u128slx();
615            let top = TaxonCoder::top_u128slx();
616            let mut coder = TaxonCoder::new(HashMap::<u128slx,u128slx>::new());
617            let mut pre_taxons = BTreeMap::new();
618            let mut root_wrapper = vec![];
619            Self::build_code( // nota: root level is 0
620                              // code and up_full_code of root are top
621                0u8,top,top,root_comp,
622                              // root does not have father, but we fake it
623                &mut coder.0, &mut pre_taxons, &mut root_wrapper,
624            );
625            // a small process control
626            if root_wrapper.len() != 1 || top != root_wrapper[0] {
627                panic!("root value mismatch: [{top}] =/= {:?}",root_wrapper);
628            }
629            let taxons = {
630                let root = top;
631                let taxons = pre_taxons.clone().into_iter()
632                                                            .map(|((_,e), t,)| (e,t) ).collect();
633                Taxons { taxons, root }
634            };
635            let (tags,untags) = pre_taxons.iter().map(|((_,e),t)| {
636                match t {
637                    Taxon::Node { name, .. } | Taxon::Leaf { name, .. } => {
638                        ((*e,name.to_string()),(name.to_string(),*e))       
639                    },
640                }
641            }).chain(once((
642                (bottom, <() as RandomTaxon>::bot_string()),(<() as RandomTaxon>::bot_string(),bottom)
643            ))).unzip();
644            let (leaves,weighted_leaves) = pre_taxons.iter().filter_map(|((_,e),t)| {
645                match t {
646                    Taxon::Node { .. } => None, Taxon::Leaf { weight, .. } => Some((*e,(*e,*weight))),
647                }
648            }).unzip();
649            let top_to_bottom = pre_taxons.into_iter()
650                                .map(|((_,e),_)| e).chain(once(bottom)).collect();
651            let lattice_hash = {
652                let mut lattice_hash = Taxonomy::TYPE_HASH_NATIVE;
653                lattice_hash = add_hash_fnv1a(&coder.0.len().to_le_bytes(), lattice_hash);
654                let coder = coder.0.iter().collect::<BTreeMap<_,_>>();
655                for (u,v) in coder {
656                    lattice_hash = add_hash_fnv1a(&(*u).unslx().to_le_bytes(), lattice_hash);
657                    lattice_hash = add_hash_fnv1a(&(*v).unslx().to_le_bytes(), lattice_hash);
658                }
659                let taxons = taxons.taxons.iter().collect::<BTreeMap<_,_>>();
660                for (u, taxon) in taxons {
661                    lattice_hash = add_hash_fnv1a(&(*u).unslx().to_le_bytes(), lattice_hash);
662                    match taxon {
663                        Taxon::Node { name, children } => {
664                            lattice_hash = add_hash_fnv1a(b"Node", lattice_hash);
665                            lattice_hash = add_hash_fnv1a(name.as_bytes(), lattice_hash);
666                            lattice_hash = add_hash_fnv1a(&children.len().to_le_bytes(), lattice_hash);
667                            let children = children.iter().collect::<BTreeSet<_>>();
668                            for child in children {
669                                lattice_hash = add_hash_fnv1a(&child.unslx().to_le_bytes(), lattice_hash);
670                            }
671                        },
672                        Taxon::Leaf { name, weight } => {
673                            lattice_hash = add_hash_fnv1a(b"Leaf", lattice_hash);
674                            lattice_hash = add_hash_fnv1a(name.as_bytes(), lattice_hash);
675                            lattice_hash = add_hash_fnv1a(&(*weight).unslx().to_le_bytes(), lattice_hash);
676                            
677                        },
678                    }
679                }
680                lattice_hash.slx()
681            };
682            let bottom = SafeElement { code: bottom, lattice_hash };
683            let top = SafeElement { code: top, lattice_hash};
684            Ok(Taxonomy { taxons, top, bottom, coder, tags, untags, leaves, weighted_leaves, top_to_bottom, })    
685        } else { Err("Failed to build taxonomy".to_string()) }
686    }
687
688    #[cfg(feature = "serde")] 
689    /// internal use for serde: an empty taxonomy -> produced in case of deserialization error
690    fn empty() -> Self {
691        let zero = 0u128.slx();
692        let taxons = Taxons { taxons: HashMap::new(), root: zero };
693        let top = SafeElement { code: zero, lattice_hash: zero };
694        let bottom = top;
695        let coder = TaxonCoder(HashMap::new());
696        let tags = HashMap::new();
697        let untags = HashMap::new();
698        let weighted_leaves = HashMap::new();
699        let leaves = Vec::new();
700        let top_to_bottom = Vec::new();
701        Taxonomy { 
702            taxons, top, bottom, coder, tags, untags, leaves, weighted_leaves, top_to_bottom, 
703        }
704    }
705
706    /// Generate random taxonomy
707    /// * `rng: &mut R` : random nuber generator
708    /// * `R: Rng` : type of random number generator
709    /// * Output: a random taxonomy
710    pub fn rand_taxonomy<R: Rng>(rng: &mut R) -> Taxonomy {
711        let rate = Poisson::new(3f32).unwrap();
712        let root = TaxonomyBuilder::rand_taxon_builder(
713            <()>::rand_letter(rng),rng,&rate,5
714        );
715        Self::new(&root).expect("unexpected taxonomy build failure")
716    }
717}
718
719impl Lattice for Taxonomy {
720    type Item = u128slx;
721
722    fn rand_lattice<R: Rng>(rng: &mut R) -> Self { Self::rand_taxonomy(rng) }
723
724    fn rand_element<R: Rng>(&self, rng: &mut R) -> SafeElement<Self::Item> {
725        let Self { bottom, top_to_bottom, .. } = self;
726        let element = top_to_bottom[rng.gen_range(0..top_to_bottom.len())];
727        let lattice_hash = bottom.lattice_hash;
728        SafeElement { code: element, lattice_hash }
729    }
730
731    fn ref_lattice_hash(&self) -> &u128slx { &self.bottom.lattice_hash }
732
733    fn contains(&self, element: &Self::Item) -> bool { self.taxons.taxons.contains_key(element) }
734
735    fn ref_bottom(&self) -> &crate::structs::SafeElement<Self::Item> { &self.bottom }
736
737    fn ref_top(&self) -> &crate::structs::SafeElement<Self::Item> { &self.top }
738
739    unsafe fn unsafe_meet(&self, element_left: &Self::Item, element_right: &Self::Item) -> Self::Item {
740        self.coder.meet_slx(*element_left, *element_right)
741    }
742
743    unsafe fn unsafe_join(&self, element_left: &Self::Item, element_right: &Self::Item) -> Self::Item {
744        self.coder.join_slx(*element_left, *element_right)
745    }
746
747    fn from_str(&self, s: &str) -> Result<crate::structs::SafeElement<Self::Item>,String> {
748        let element = match self.untags.get(s) {
749            Some(e) => *e,
750            None => return Err(format!("Taxon {s} is unknown")),
751        };
752        let lattice_hash = self.bottom.lattice_hash;
753        Ok(SafeElement { code: element, lattice_hash })
754    }
755
756    fn to_string(&self, element: &crate::structs::SafeElement<Self::Item>) -> Result<String,String> {
757        let SafeElement { code: element, lattice_hash } = element;
758        if lattice_hash == &self.bottom.lattice_hash {
759            match self.tags.get(element) {
760                Some(s) => Ok(s.to_string()),
761                None => Err("Unexpected: element is not within lattice, although lattice hashes are same".to_string()),
762            }
763        } else {
764            Err("Lattice hash mismatch: element is not within lattice".to_string())
765        }
766    }
767}
768
769impl IterableLattice for Taxonomy {
770    type IntoIterUp = vec::IntoIter<u128slx>;
771
772    type IntoIterDown = vec::IntoIter<u128slx>;
773
774    unsafe fn unsafe_bottom_to_top(&self) -> Result<Self::IntoIterUp,String> {
775        Ok(self.top_to_bottom.iter().rev().copied().collect::<Vec<_>>().into_iter())
776    }
777
778    unsafe fn unsafe_top_to_bottom(&self) -> Result<Self::IntoIterDown,String> {
779        Ok(self.top_to_bottom.clone().into_iter())
780    }
781}
782
783impl LatticeWithLeaves for Taxonomy {
784    type IntoIterLeaves = hash_map::IntoIter<Self::Item, f64slx>;
785
786    unsafe fn unsafe_leaves(&self) -> Result<Self::IntoIterLeaves,String> {
787        Ok(self.weighted_leaves.clone().into_iter())
788    }
789
790    unsafe fn unsafe_leaf(&self, u: usize) -> Result<&Self::Item,String> {
791        match self.leaves.get(u) {
792            Some(x) => Ok(x),
793            None => Err(format!("Leaf of index {u} is not found within lattice")),
794        }
795    }
796
797    unsafe fn unsafe_weighted_leaf(&self, u: usize) -> Result<(&Self::Item,&f64slx),String> {
798        let leaf = self.unsafe_leaf(u)?;
799        Ok((leaf,&self.weighted_leaves[leaf]))
800    }
801}
802
803pub mod experiment {
804    use rand::Rng;
805    use rand_distr::Poisson;
806
807    use crate::{structs::{ TaxonomyBuilder, Taxonomy }, traits::Lattice};
808    use super::RandomTaxon;
809
810    fn is_node(tb: &TaxonomyBuilder, prop: &str,) -> bool {
811        match tb {
812            TaxonomyBuilder::Node { name, .. } | TaxonomyBuilder::Leaf { name, .. } => name == prop,
813        }
814    }
815
816    fn contain(tb: &TaxonomyBuilder, prop: &str,) -> bool {
817        match tb {
818            TaxonomyBuilder::Node { name, children } => {
819                if name == prop { true } else {
820                    for tb in children {
821                        if contain(tb, prop) { return true; }
822                    }
823                    false
824                }
825            },
826            TaxonomyBuilder::Leaf { name, .. } => name == prop,
827        }
828    }
829
830    fn find_meet(tb: &TaxonomyBuilder, left: &str, right: &str) -> Option<String> {
831        match (left == <()>::bot_str(), right == <()>::bot_str()) {
832            (false, false) => {
833                match (is_node(tb, left), is_node(tb, right), contain(tb, left), contain(tb, right)) {
834                    (_, _, true, false) | (_, _, false, true) | (_, _, false, false)  => None,
835                    (_, true, _, _) => Some(left.to_string()),
836                    (true, false, _, _) => Some(right.to_string()),
837                    (false, false, _, _) => {
838                        match tb {
839                            TaxonomyBuilder::Node { children, .. } => {
840                                for child in children  {
841                                    match (contain(child, left),contain(child, right)) {
842                                        (true, true) => return find_meet(child,left,right),
843                                        (false, false) => (),
844                                        _ => return Some(<()>::bot_str().to_string()),                                    }
845                                }
846                                panic!("unexpected error")
847                            },
848                            TaxonomyBuilder::Leaf { .. } => panic!("unexpected error"),
849                        }
850                    },
851                }
852            },
853            (true,true) => Some(<()>::bot_str().to_string()),
854            (false, true) => if contain(tb, left) { Some(<()>::bot_str().to_string()) } else { None },
855            (true, false) => if contain(tb, right) { Some(<()>::bot_str().to_string()) } else { None },
856        }
857    }
858
859    fn find_join(tb: &TaxonomyBuilder, left: &str, right: &str) -> Option<String> {
860        match (left == <()>::bot_str(), right == <()>::bot_str()) {
861            (false, false) => {
862                match (is_node(tb, left), is_node(tb, right), contain(tb, left), contain(tb, right)) {
863                    (_, _, true, false) | (_, _, false, true) | (_, _, false, false)  => None,
864                    (true, _, _, _) => Some(left.to_string()),
865                    (false, true, _, _) => Some(right.to_string()),
866                    (false, false, _, _) => {
867                        match tb {
868                            TaxonomyBuilder::Node { name, children } => {
869                                for child in children  {
870                                    match (contain(child, left),contain(child, right)) {
871                                        (true, true) => return find_join(child,left,right),
872                                        (false, false) => (),
873                                        _ => return Some(name.to_string()),                                    }
874                                }
875                                panic!("unexpected error")
876                            },
877                            TaxonomyBuilder::Leaf { .. } => panic!("unexpected error"),
878                        }
879                    },
880                }
881            },
882            (false, true) => if contain(tb, left) { Some(left.to_string()) } else { None },
883            (true, false) => if contain(tb, right) { Some(right.to_string()) } else { None },
884            (true, true) => Some(<()>::bot_str().to_string()),
885        }
886    }
887
888    /// Experimentation 1 with taxonomy
889    pub fn exp_taxonomy_1() -> Result<(),String> { // OK
890        let taxon_e = TaxonomyBuilder::new_leaf("E".to_string(), 0.1);
891        let taxon_f = TaxonomyBuilder::new_leaf("F".to_string(), 0.05);
892        let taxon_g = TaxonomyBuilder::new_leaf("G".to_string(), 0.1);
893        let taxon_h = TaxonomyBuilder::new_leaf("H".to_string(), 0.15);
894        let taxon_i = TaxonomyBuilder::new_leaf("I".to_string(), 0.2);
895        let taxon_k = TaxonomyBuilder::new_leaf("K".to_string(), 0.05);
896        let taxon_l = TaxonomyBuilder::new_leaf("L".to_string(), 0.1);
897        let taxon_m = TaxonomyBuilder::new_leaf("M".to_string(), 0.05);
898        let taxon_n = TaxonomyBuilder::new_leaf("N".to_string(), 0.05);
899        let taxon_o = TaxonomyBuilder::new_leaf("O".to_string(), 0.15);
900        let taxon_b = TaxonomyBuilder::new_node(
901            "B".to_string(), vec![taxon_e,taxon_f,taxon_g],
902        )?;
903        let taxon_c = TaxonomyBuilder::new_node(
904            "C".to_string(), vec![taxon_h],
905        )?;
906        let taxon_j = TaxonomyBuilder::new_node(
907            "J".to_string(), vec![taxon_m,taxon_n,taxon_o],
908        )?;
909        let taxon_d = TaxonomyBuilder::new_node(
910            "D".to_string(), vec![taxon_i,taxon_j,taxon_k,taxon_l],
911        )?;
912        let taxon_a = TaxonomyBuilder::new_node(
913            "A".to_string(), vec![taxon_b,taxon_c,taxon_d],
914        )?;
915        let taxonomy = Taxonomy::new(&taxon_a)?;
916        println!("taxon_a: {:#?}", taxon_a);
917        println!("taxonomy.into_taaxon_builder(): {:#?}", taxonomy.into_taxon_builder());
918        println!("-------- meet ---");
919        let tx_names = taxonomy.tags
920            .iter().map(|(_,n)|n.to_string()).collect::<Vec<_>>();
921        let mut fail_meet = 0;
922        let mut success_meet = 0;
923        for n in &tx_names {
924            let c_n = &taxonomy.untags[n]; 
925            for m in &tx_names {
926                let c_m = &taxonomy.untags[m]; 
927                let meet = &taxonomy.tags[&unsafe { taxonomy.unsafe_meet(c_n, c_m) }];
928                let theoric_meet = find_meet(&taxon_a, &n, &m).unwrap();
929                if meet == &theoric_meet { success_meet += 1; } else { fail_meet +=1; }
930                println!("{n} & {m} -> {meet} ; theoric: {theoric_meet}");
931            }
932        }
933        println!("-------- join ---");
934        let mut fail_join = 0;
935        let mut success_join = 0;
936        for n in &tx_names {
937            let c_n = &taxonomy.untags[n]; 
938            for m in &tx_names {
939                let c_m = &taxonomy.untags[m]; 
940                let join = &taxonomy.tags[&unsafe { taxonomy.unsafe_join(c_n, c_m) }];
941                let theoric_join = find_join(&taxon_a, &n, &m).unwrap();
942                if join == &theoric_join { success_join += 1; } else { fail_join +=1; }
943                println!("{n} | {m} -> {join} ; theoric: {theoric_join}");
944            }
945        }
946        println!("------- stats ---");
947        println!("meet: fail / success = {fail_meet} / {success_meet}");
948        println!("join: fail / success = {fail_join} / {success_join}");
949        Ok(())
950    }
951
952    /// Experimentation 2 with taxonomy
953    pub fn exp_taxonomy_2() -> Result<(),String> { // OK
954        let mut rng = rand::thread_rng();
955        let mut fail_meet = 0;
956        let mut success_meet = 0;
957        let mut fail_join = 0;
958        let mut success_join = 0;
959        for u in 0..100 {
960            let rate = Poisson::new(3f32).unwrap();
961            let depth = rng.gen_range(2..7);
962            let root = TaxonomyBuilder::rand_taxon_builder(
963                <()>::rand_letter(&mut rng),&mut rng,&rate,depth
964            );
965            let taxonomy = Taxonomy::new(&root)?;
966            println!("taxonomy {u} ; size: {}", taxonomy.top_to_bottom.len());
967            let tx_names = taxonomy.tags
968                .iter().map(|(_,n)|n.to_string()).collect::<Vec<_>>();
969            for n in &tx_names {
970                let c_n = &taxonomy.untags[n]; 
971                for m in &tx_names {
972                    let c_m = &taxonomy.untags[m]; 
973                    let meet = &taxonomy.tags[&unsafe { taxonomy.unsafe_meet(c_n, c_m) }];
974                    let theoric_meet = find_meet(&root, &n, &m).unwrap();
975                    if meet == &theoric_meet { success_meet += 1; } else { fail_meet +=1; }
976                }
977            }
978            for n in &tx_names {
979                let c_n = &taxonomy.untags[n]; 
980                for m in &tx_names {
981                    let c_m = &taxonomy.untags[m]; 
982                    let join = &taxonomy.tags[&unsafe { taxonomy.unsafe_join(c_n, c_m) }];
983                    let theoric_join = find_join(&root, &n, &m).unwrap();
984                    if join == &theoric_join { success_join += 1; } else { fail_join +=1; }
985                }
986            }
987        }
988        println!("------- stats ---");
989        println!("meet: fail / success = {fail_meet} / {success_meet}");
990        println!("join: fail / success = {fail_join} / {success_join}");
991        Ok(())
992    }
993}