1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
// Copyright (C) 2015 Élisabeth HENRY.
//
// This file is part of Caribon.
//
// Caribon is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published
// by the Free Software Foundation, either version 2.1 of the License, or
// (at your option) any later version.
//
// Caribon is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with Caribon.  If not, see <http://www.gnu.org/licenses/>.

/// `Word` type: some inner representation used by `Parser`.
///
/// You probably should not use this type directly.
#[repr(C)]
#[derive(Debug,Clone)]
pub enum Word {
    /// A String which is not part of the text (typically whitespace, HTML formatting, ...)
    Untracked(String),
    /// A word that is ignored, either because it is in `parser.ignored` or because it is
    /// a proper noun and `ignore_proper` has been set to `true`.
    Ignored(String),
    /// Tracked string, containing the string, the stemmed variant of the
    /// string, some value corresponding to the degree of repetitions and
    /// an option to a highlighting colour
    Tracked(String, String, f32, Option<&'static str>),
}

impl Word {
    /// Sets the stemmed value of a word.
    pub fn set_stemmed(&mut self, s: String) {
        if let Word::Tracked(_, ref mut stemmed, _, _) = *self {
            *stemmed = s;
        }
    }

    /// Sets the repetition value of a word.
    pub fn set_count(&mut self, x: f32) {
        if let Word::Tracked(_, _, ref mut v, _) = *self {
            *v = x;
        }
    }
}

#[repr(C)]
#[derive(Debug,Clone)]
/// The internal representation of the document.
///
/// Technically the name AST is not really well chosen, since it is not a tree, but mainly a vector of
/// `Word`s plus some additonal informations for HTML parsing, but the idea is that it is the internal
/// representation of the documentation.
pub struct Ast {
    /// Vector of `Word`s. The main data of the structure.
    pub words: Vec<Word>,
    /// Position of <head> tag, if any
    pub begin_head: Option<usize>,
    /// position of <body> tag, if any
    pub begin_body: Option<usize>,
    /// position of </body> tag, if any
    pub end_body: Option<usize>,
}

impl Ast {
    /// Creates a new, empty AST
    pub fn new() -> Ast {
        Ast {
            words: vec![],
            begin_head: None,
            begin_body: None,
            end_body: None,
        }
    }

    /// Sets begin_head to current last position of words
    ///
    /// This should be called *before* inserting the corresponding element.
    pub fn mark_begin_head(&mut self) {
        if self.begin_head.is_some() {
            return;
        }

        let i = self.words.len();
        self.begin_head = Some(i);
    }

    /// Sets begin_body to current last position of words
    ///
    /// This should be called *before* inserting the corresponding element.
    pub fn mark_begin_body(&mut self) {
        if self.begin_body.is_some() {
            return;
        }

        let i = self.words.len();
        self.begin_body = Some(i);
    }

    /// Sets end_body to current last position of words
    ///
    /// This should be called *before* inserting the corresponding element.
    pub fn mark_end_body(&mut self) {
        let i = self.words.len();
        self.end_body = Some(i);
    }

    /// Get only the words contained between <body> and </body>
    ///
    /// If body_begin and body_end are both set (and the first one is before the second),
    /// returns a slice that contains only words in this part; else, returns all words.
    pub fn get_body(&self) -> &[Word] {
        if let Some(begin) = self.begin_body {
            if let Some(end) = self.end_body {
                if begin < end {
                    return &self.words[begin + 1..end];
                }
            }
        }
        &self.words
    }

    /// Get only the words contained between <body> and </body>, mutable version
    ///
    /// Same as get_body, but takes and return a mutable reference.
    pub fn get_body_mut(&mut self) -> &mut [Word] {
        if let Some(begin) = self.begin_body {
            if let Some(end) = self.end_body {
                if begin < end {
                    return &mut self.words[begin + 1..end];
                }
            }
        }
        &mut self.words
    }
}