Skip to main content

ebi_objects/ebi_objects/
finite_stochastic_language.rs

1#[cfg(any(test, feature = "testactivities"))]
2use ebi_activity_key::TestActivityKey;
3use crate::{
4    Activity, ActivityKey, ActivityKeyTranslator, Exportable, HasActivityKey, Importable, Infoable,
5    IntoTraceProbabilityIterator, TranslateActivityKey,
6    constants::ebi_object::EbiObject,
7    iterators::{
8        parallel_ref_probability_trace_iterator::ParallelRefProbabilityTraceIterator,
9        parallel_ref_trace_iterator::ParallelRefTraceIterator,
10        ref_trace_iterator::RefTraceIterator,
11    },
12    line_reader::LineReader,
13    traits::{
14        importable::{ImporterParameter, ImporterParameterValues, from_string},
15        number_of_traces::NumberOfTraces,
16        trace_iterators::{
17            IntoRefProbabilityIterator, IntoRefTraceIterator, IntoRefTraceProbabilityIterator,
18        },
19    },
20};
21use anyhow::{Context, Result, anyhow};
22use ebi_arithmetic::{Fraction, One, Signed, Zero};
23use ebi_derive::ActivityKey;
24use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator};
25use std::{
26    collections::{
27        HashMap,
28        hash_map::{IntoIter, Iter},
29    },
30    fmt,
31    io::{BufRead, Write},
32};
33
34pub const HEADER: &str = "finite stochastic language";
35
36#[derive(Clone, Debug, ActivityKey)]
37pub struct FiniteStochasticLanguage {
38    pub activity_key: ActivityKey,
39    pub traces: HashMap<Vec<Activity>, Fraction>,
40}
41
42impl FiniteStochasticLanguage {
43    /**
44     * Does not normalise the distribution.
45     */
46    pub fn new_raw(traces: HashMap<Vec<Activity>, Fraction>, activity_key: ActivityKey) -> Self {
47        Self {
48            activity_key: activity_key,
49            traces: traces,
50        }
51    }
52
53    pub fn normalise_before(traces: &mut HashMap<Vec<String>, Fraction>) {
54        if traces.len() != 0 {
55            let sum = traces.values().fold(Fraction::zero(), |mut x, y| {
56                x += y;
57                x
58            });
59            log::info!("the extracted traces cover a sum of {}", sum);
60            traces.retain(|_, v| {
61                *v /= &sum;
62                true
63            });
64        }
65    }
66
67    pub fn normalise(&mut self) {
68        if self.number_of_traces() != 0 {
69            let sum = self.traces.values().fold(Fraction::zero(), |mut x, y| {
70                x += y;
71                x
72            });
73            log::info!("the extracted traces cover a sum of {}", sum);
74            self.traces.retain(|_, v| {
75                *v /= &sum;
76                true
77            });
78        }
79    }
80
81    fn contains(&self, atrace_b: Vec<&str>, probability_b: &Fraction) -> bool {
82        for trace_a in self.traces.iter() {
83            let atrace_a = self.activity_key.deprocess_trace(&trace_a.0);
84
85            if atrace_a == atrace_b && trace_a.1 == probability_b {
86                return true;
87            }
88        }
89        return false;
90    }
91}
92
93impl TranslateActivityKey for FiniteStochasticLanguage {
94    fn translate_using_activity_key(&mut self, to_activity_key: &mut ActivityKey) {
95        let translator = ActivityKeyTranslator::new(&self.activity_key, to_activity_key);
96
97        //a hashmap needs to be rebuilt, unfortunately
98        let translated_traces: HashMap<Vec<Activity>, Fraction> = self
99            .traces
100            .drain() // `drain` is used to take ownership of the original traces (use `into_iter()` or `drain()` if we want to consume)
101            .map(|(trace, fraction)| (translator.translate_trace(&trace), fraction))
102            .collect();
103
104        // Update the traces in the language with the translated ones
105        self.traces = translated_traces;
106
107        self.activity_key = to_activity_key.clone();
108    }
109}
110
111impl NumberOfTraces for FiniteStochasticLanguage {
112    fn number_of_traces(&self) -> usize {
113        self.traces.len()
114    }
115
116    fn number_of_events(&self) -> usize {
117        self.traces.iter().map(|(t, _)| t.len()).sum()
118    }
119}
120
121impl Eq for FiniteStochasticLanguage {}
122
123impl PartialEq for FiniteStochasticLanguage {
124    fn eq(&self, other: &Self) -> bool {
125        if self.traces.len() != other.traces.len() {
126            return false;
127        }
128        for trace_b in other.traces.iter() {
129            let atrace_b = other.activity_key.deprocess_trace(&trace_b.0);
130            if !self.contains(atrace_b, trace_b.1) {
131                return false;
132            }
133        }
134
135        return true;
136    }
137}
138
139impl Importable for FiniteStochasticLanguage {
140    const FILE_FORMAT_SPECIFICATION_LATEX: &str = "A finite language is a line-based structure. Lines starting with a \\# are ignored.
141    This first line is exactly `finite stochastic language'.
142    The second line is the number of traces in the language.
143    For each trace, the first line is the probability of the trace as a positive fraction or a decimal value.
144    The second line contains the number of events in the trace.
145    Then, each subsequent line contains the activity name of one event.
146
147    The sum of the probabilities of the traces in the language needs to be $\\leq$ 1.
148    
149    For instance:
150    \\lstinputlisting[language=ebilines, style=boxed]{../testfiles/aa-ab-ba.slang}";
151
152    const IMPORTER_PARAMETERS: &[ImporterParameter] = &[];
153
154    fn import_as_object(
155        reader: &mut dyn BufRead,
156        parameter_values: &ImporterParameterValues,
157    ) -> Result<EbiObject> {
158        Ok(EbiObject::FiniteStochasticLanguage(Self::import(
159            reader,
160            parameter_values,
161        )?))
162    }
163
164    fn import(reader: &mut dyn BufRead, _: &ImporterParameterValues) -> Result<Self> {
165        let mut lreader = LineReader::new(reader);
166
167        let head = lreader
168            .next_line_string()
169            .with_context(|| format!("failed to read header, which should be `{}`", HEADER))?;
170        if head != HEADER {
171            return Err(anyhow!(
172                "first line should be exactly `{}`, but found `{}`",
173                HEADER,
174                head
175            ));
176        }
177
178        let number_of_traces = lreader
179            .next_line_index()
180            .context("failed to read number of places")?;
181
182        let mut traces = HashMap::new();
183        let mut sum = Fraction::zero();
184        let mut activity_key = ActivityKey::new();
185        for trace_i in 0..number_of_traces {
186            let probability = lreader.next_line_weight().with_context(|| {
187                format!(
188                    "failed to read weight for trace {} at line {}",
189                    trace_i,
190                    lreader.get_last_line_number()
191                )
192            })?;
193
194            if !probability.is_positive() {
195                return Err(anyhow!(
196                    "trace {} at line {} has non-positive probability",
197                    trace_i,
198                    lreader.get_last_line_number()
199                ));
200            } else if probability > Fraction::one() {
201                return Err(anyhow!(
202                    "trace {} at line {} has a probability higher than 1",
203                    trace_i,
204                    lreader.get_last_line_number()
205                ));
206            }
207
208            sum += probability.clone();
209
210            let number_of_events = lreader.next_line_index().with_context(|| {
211                format!(
212                    "failed to read number of events for trace {} at line {}",
213                    trace_i,
214                    lreader.get_last_line_number()
215                )
216            })?;
217
218            let mut trace = vec![];
219            trace.reserve_exact(number_of_events);
220
221            for event_i in 0..number_of_events {
222                let event = lreader.next_line_string().with_context(|| {
223                    format!(
224                        "failed to read event {} of trace {} at line {}",
225                        event_i,
226                        trace_i,
227                        lreader.get_last_line_number()
228                    )
229                })?;
230                trace.push(event);
231            }
232
233            let trace = activity_key.process_trace(&trace);
234            if traces.insert(trace, probability).is_some() {
235                return Err(anyhow!(
236                    "trace {} ending at line {} appears twice in language",
237                    trace_i,
238                    lreader.get_last_line_number()
239                ));
240            }
241        }
242
243        if sum > Fraction::one() && !sum.is_one() {
244            //avoid rounding errors in approximate mode
245            return Err(anyhow!(
246                "probabilities in stochastic language sum to {}, which is greater than 1",
247                sum
248            ));
249        }
250
251        Ok(Self {
252            activity_key: activity_key,
253            traces: traces,
254        })
255    }
256}
257from_string!(FiniteStochasticLanguage);
258
259impl Exportable for FiniteStochasticLanguage {
260    fn export_from_object(object: EbiObject, f: &mut dyn Write) -> Result<()> {
261        match object {
262            EbiObject::FiniteStochasticLanguage(slang) => slang.export(f),
263            EbiObject::EventLog(log) => Into::<Self>::into(log).export(f),
264            EbiObject::EventLogTraceAttributes(log) => Into::<Self>::into(log).export(f),
265            EbiObject::EventLogXes(log) => Into::<Self>::into(log).export(f),
266            EbiObject::EventLogCsv(log) => Into::<Self>::into(log).export(f),
267            _ => Err(anyhow!("Cannot export as finite stochastic language.")),
268        }
269    }
270
271    fn export(&self, f: &mut dyn std::io::Write) -> Result<()> {
272        Ok(write!(f, "{}", self)?)
273    }
274}
275
276impl Infoable for FiniteStochasticLanguage {
277    fn info(&self, f: &mut impl std::io::Write) -> Result<()> {
278        writeln!(f, "Number of traces\t{}", self.traces.len())?;
279        writeln!(
280            f,
281            "Number of events\t{}",
282            self.traces.iter().map(|t| t.0.len()).sum::<usize>()
283        )?;
284        writeln!(
285            f,
286            "Number of activities\t{}",
287            self.activity_key().get_number_of_activities()
288        )?;
289        writeln!(
290            f,
291            "Sum of probabilities\t{:.4}",
292            self.traces.values().fold(Fraction::zero(), |mut x, y| {
293                x += y;
294                x
295            })
296        )?;
297
298        writeln!(f, "")?;
299        self.activity_key().info(f)?;
300
301        Ok(writeln!(f, "")?)
302    }
303}
304
305impl fmt::Display for FiniteStochasticLanguage {
306    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
307        writeln!(f, "{}", HEADER)?;
308        writeln!(f, "# number of traces\n{}", self.traces.len())?;
309
310        for (pos, (trace, probability)) in self.traces.iter().enumerate() {
311            writeln!(f, "# trace {}", pos)?;
312
313            writeln!(f, "# probability\n{}", probability)?;
314
315            writeln!(f, "# number of events\n{}", trace.len())?;
316            for event in trace {
317                writeln!(f, "{}", self.activity_key.get_activity_label(event))?;
318            }
319        }
320
321        write!(f, "")
322    }
323}
324
325impl IntoRefTraceIterator for FiniteStochasticLanguage {
326    fn iter_traces(&self) -> RefTraceIterator<'_> {
327        RefTraceIterator::Keys(self.traces.keys())
328    }
329
330    fn par_iter_traces(&self) -> ParallelRefTraceIterator<'_> {
331        ParallelRefTraceIterator::HashMap((&self.traces).into())
332    }
333}
334
335impl IntoRefProbabilityIterator for FiniteStochasticLanguage {
336    fn iter_probabilities(
337        &self,
338    ) -> std::collections::hash_map::Values<'_, Vec<Activity>, Fraction> {
339        self.traces.values()
340    }
341}
342
343impl IntoRefTraceProbabilityIterator for FiniteStochasticLanguage {
344    fn iter_traces_probabilities(
345        &'_ self,
346    ) -> std::collections::hash_map::Iter<'_, Vec<Activity>, Fraction> {
347        self.traces.iter()
348    }
349
350    fn par_iter_traces_probabilities(&'_ self) -> ParallelRefProbabilityTraceIterator<'_> {
351        self.into()
352    }
353}
354
355impl IntoTraceProbabilityIterator for FiniteStochasticLanguage {
356    fn into_iter_trace_probabilities(
357        self,
358    ) -> std::collections::hash_map::IntoIter<Vec<Activity>, Fraction> {
359        self.traces.into_iter()
360    }
361
362    fn into_par_iter_trace_probabilities(
363        self,
364    ) -> rayon::collections::hash_map::IntoIter<Vec<Activity>, Fraction> {
365        self.traces.into_par_iter()
366    }
367}
368
369impl IntoIterator for FiniteStochasticLanguage {
370    type Item = (Vec<Activity>, Fraction);
371    type IntoIter = IntoIter<Vec<Activity>, Fraction>;
372
373    fn into_iter(self) -> Self::IntoIter {
374        self.traces.into_iter()
375    }
376}
377
378impl<'a> IntoIterator for &'a FiniteStochasticLanguage {
379    type Item = (&'a Vec<Activity>, &'a Fraction);
380    type IntoIter = Iter<'a, Vec<Activity>, Fraction>;
381
382    fn into_iter(self) -> Self::IntoIter {
383        self.traces.iter()
384    }
385}
386
387impl IntoParallelIterator for FiniteStochasticLanguage {
388    type Iter = rayon::collections::hash_map::IntoIter<Vec<Activity>, Fraction>;
389    type Item = (Vec<Activity>, Fraction);
390
391    fn into_par_iter(self) -> Self::Iter {
392        self.traces.into_par_iter()
393    }
394}
395
396impl<'a> IntoParallelIterator for &'a FiniteStochasticLanguage {
397    type Iter = rayon::collections::hash_map::Iter<'a, Vec<Activity>, Fraction>;
398    type Item = (&'a Vec<Activity>, &'a Fraction);
399
400    fn into_par_iter(self) -> Self::Iter {
401        self.traces.par_iter()
402    }
403}
404
405#[cfg(any(test, feature = "testactivities"))]
406impl TestActivityKey for FiniteStochasticLanguage {
407    fn test_activity_key(&self) {
408        self.traces.iter().for_each(|(trace, _)| {
409            trace
410                .iter()
411                .for_each(|activity| self.activity_key().assert_activity_is_of_key(activity))
412        });
413    }
414}
415
416#[cfg(test)]
417mod tests {
418    use crate::{
419        StochasticLabelledPetriNet, StochasticNondeterministicFiniteAutomaton,
420        ebi_objects::finite_stochastic_language::FiniteStochasticLanguage,
421        traits::number_of_traces::NumberOfTraces,
422    };
423    use ebi_arithmetic::{Fraction, Zero};
424    use std::fs;
425
426    #[test]
427    fn empty_slang() {
428        let fin = fs::read_to_string("testfiles/empty.slang").unwrap();
429        let mut slang = fin.parse::<FiniteStochasticLanguage>().unwrap();
430        slang.normalise();
431
432        assert_eq!(slang.number_of_traces(), 0);
433        assert_eq!(
434            slang.traces.values().fold(Fraction::zero(), |mut x, y| {
435                x += y;
436                x
437            }),
438            Fraction::zero()
439        );
440    }
441
442    #[test]
443    fn slang_to_slpn_via_snfa() {
444        let fin = fs::read_to_string("testfiles/ba-aa-ab.slang").unwrap();
445        let slang = fin.parse::<FiniteStochasticLanguage>().unwrap();
446        slang.to_string();
447
448        let snfa = StochasticNondeterministicFiniteAutomaton::from(slang);
449        snfa.to_string();
450
451        let slpn = StochasticLabelledPetriNet::from(snfa);
452        slpn.to_string();
453    }
454}