1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
//! # Doc Chunks
//!
//! `Documentation` is a representation of one or multiple documents.
//!
//! A `literal` is a token provided by `proc_macro2` or `ra_ap_syntax` crate, which is then converted by
//! means of `TrimmedLiteral` using `Cluster`ing into a `CheckableChunk` (mostly
//! named just `chunk`).
//!
//! `CheckableChunk`s can consist of multiple fragments, where each fragment can
//! span multiple lines, yet each fragment is covering a consecutive `Span` in
//! the origin content. Each fragment also has a direct mapping to the
//! `CheckableChunk` internal string representation.
//!
//! And `Documentation` holds one or many `CheckableChunks` per file path.

#![deny(unused_crate_dependencies)]

// contains test helpers
pub mod span;
pub mod testcase;
pub use self::span::Span;
pub use proc_macro2::LineColumn;

pub mod util;
use self::util::{load_span_from, sub_char_range};

use indexmap::IndexMap;
use proc_macro2::TokenTree;
use rayon::prelude::*;
use serde::Deserialize;
use std::path::PathBuf;
use toml::Spanned;

/// Range based on `usize`, simplification.
pub type Range = core::ops::Range<usize>;

/// Apply an offset to `start` and `end` members, equaling a shift of the range.
pub fn apply_offset(range: &mut Range, offset: usize) {
    range.start = range.start.saturating_add(offset);
    range.end = range.end.saturating_add(offset);
}

pub mod chunk;
pub mod cluster;
mod developer;
pub mod errors;
pub mod literal;
pub mod literalset;
pub mod markdown;

pub use chunk::*;
pub use cluster::*;
pub use errors::*;
pub use literal::*;
pub use literalset::*;
pub use markdown::*;

/// Collection of all the documentation entries across the project
#[derive(Debug, Clone)]
pub struct Documentation {
    /// Mapping of a path to documentation literals
    index: IndexMap<ContentOrigin, Vec<CheckableChunk>>,
}

impl Documentation {
    /// Create a new and empty doc.
    pub fn new() -> Self {
        Self {
            index: IndexMap::with_capacity(64),
        }
    }

    /// Check if a particular key is contained.
    pub fn contains_key(&self, key: &ContentOrigin) -> bool {
        self.index.contains_key(key)
    }

    /// Check if the document contains any checkable items.
    #[inline(always)]
    pub fn is_empty(&self) -> bool {
        self.index.is_empty()
    }

    /// Borrowing iterator across content origins and associated sets of chunks.
    #[inline(always)]
    pub fn iter(&self) -> impl Iterator<Item = (&ContentOrigin, &Vec<CheckableChunk>)> {
        self.index.iter()
    }

    /// Borrowing iterator across content origins and associated sets of chunks.
    pub fn par_iter(&self) -> impl ParallelIterator<Item = (&ContentOrigin, &Vec<CheckableChunk>)> {
        self.index.par_iter()
    }

    /// Consuming iterator across content origins and associated sets of chunks.
    pub fn into_par_iter(
        self,
    ) -> impl ParallelIterator<Item = (ContentOrigin, Vec<CheckableChunk>)> {
        self.index.into_par_iter()
    }

    /// Extend `self` by joining in other `Documentation`s.
    pub fn extend<I, J>(&mut self, other: I)
    where
        I: IntoIterator<Item = (ContentOrigin, Vec<CheckableChunk>), IntoIter = J>,
        J: Iterator<Item = (ContentOrigin, Vec<CheckableChunk>)>,
    {
        other
            .into_iter()
            .for_each(|(origin, chunks): (_, Vec<CheckableChunk>)| {
                let _ = self.add_inner(origin, chunks);
            });
    }

    /// Adds a set of `CheckableChunk`s to the documentation to be checked.
    pub fn add_inner(&mut self, origin: ContentOrigin, mut chunks: Vec<CheckableChunk>) {
        self.index
            .entry(origin)
            .and_modify(|acc: &mut Vec<CheckableChunk>| {
                acc.append(&mut chunks);
            })
            .or_insert_with(|| chunks);
        // Ok(()) TODO make this failable
    }

    /// Adds a rust content str to the documentation.
    pub fn add_rust(
        &mut self,
        origin: ContentOrigin,
        content: &str,
        doc_comments: bool,
        dev_comments: bool,
    ) -> Result<()> {
        let cluster = Clusters::load_from_str(content, doc_comments, dev_comments)?;

        let chunks = Vec::<CheckableChunk>::from(cluster);
        self.add_inner(origin, chunks);
        Ok(())
    }

    /// Adds a content string to the documentation sourced from the
    /// `description` field in a `Cargo.toml` manifest.
    pub fn add_cargo_manifest_description(
        &mut self,
        path: PathBuf,
        manifest_content: &str,
    ) -> Result<()> {
        fn extract_range_of_description(manifest_content: &str) -> Result<Range> {
            #[derive(Deserialize, Debug)]
            struct Manifest {
                package: Spanned<Package>,
            }

            #[derive(Deserialize, Debug)]
            struct Package {
                description: Spanned<String>,
            }

            let value: Manifest = toml::from_str(manifest_content)?;
            let d = value.package.into_inner().description;
            let range = d.span();
            Ok(range)
        }

        let mut range = extract_range_of_description(&manifest_content)?;
        let description = sub_char_range(&manifest_content, range.clone());

        // Attention: `description` does include `\"\"\"` as well as `\\\n`, the latter is not a big issue,
        // but the trailing start and end delimiters are.
        // TODO: split into multiple on `\\\n` and create multiple range/span mappings.
        let description = if range.len() > 6 {
            if description.starts_with("\"\"\"") {
                range.start += 3;
                range.end -= 3;
                assert!(!range.is_empty());
            }
            dbg!(&description[3..(description.len()) - 3])
        } else {
            description
        };

        fn convert_range_to_span(content: &str, range: Range) -> Option<Span> {
            let mut line = 0_usize;
            let mut column = 0_usize;
            let mut prev = '\n';
            let mut start = None;
            for (offset, c) in content.chars().enumerate() {
                if prev == '\n' {
                    column = 0;
                    line += 1;
                }
                prev = c;

                if offset == range.start {
                    start = Some(LineColumn { line, column });
                    continue;
                }
                // take care of inclusivity
                if offset + 1 == range.end {
                    let end = LineColumn { line, column };
                    return Some(Span {
                        start: start.unwrap(),
                        end,
                    });
                }
                column += 1;
            }
            None
        }

        let span = convert_range_to_span(manifest_content, range.clone()).expect(
            "Description is part of the manifest since it was parsed from the same source. qed",
        );
        let origin = ContentOrigin::CargoManifestDescription(path);
        let source_mapping = dbg!(indexmap::indexmap! {
            range => span
        });
        self.add_inner(
            origin,
            vec![CheckableChunk::from_str(
                description,
                source_mapping,
                CommentVariant::TomlEntry,
            )],
        );
        Ok(())
    }

    /// Adds a common mark content str to the documentation.
    pub fn add_commonmark(&mut self, origin: ContentOrigin, content: &str) -> Result<()> {
        // extract the full content span and range
        let start = LineColumn { line: 1, column: 0 };
        let end = content
            .lines()
            .enumerate()
            .last()
            .map(|(idx, linecontent)| (idx + 1, linecontent))
            .map(|(linenumber, linecontent)| LineColumn {
                line: linenumber,
                column: linecontent.chars().count().saturating_sub(1),
            })
            .ok_or_else(|| {
                Error::Span(
                    "Common mark / markdown file does not contain a single line".to_string(),
                )
            })?;

        let span = Span { start, end };
        let source_mapping = indexmap::indexmap! {
            0..content.chars().count() => span
        };
        self.add_inner(
            origin,
            vec![CheckableChunk::from_str(
                content,
                source_mapping,
                CommentVariant::CommonMark,
            )],
        );
        Ok(())
    }

    /// Obtain the set of chunks for a particular origin.
    #[inline(always)]
    pub fn get(&self, origin: &ContentOrigin) -> Option<&[CheckableChunk]> {
        self.index.get(origin).map(AsRef::as_ref)
    }

    /// Count the number of origins.
    #[inline(always)]
    pub fn entry_count(&self) -> usize {
        self.index.len()
    }

    /// Load a document from a single string with a defined origin.
    pub fn load_from_str(
        origin: ContentOrigin,
        content: &str,
        doc_comments: bool,
        dev_comments: bool,
    ) -> Self {
        let mut docs = Documentation::new();

        match origin.clone() {
            ContentOrigin::RustDocTest(_path, span) => {
                if let Ok(excerpt) = load_span_from(&mut content.as_bytes(), span.clone()) {
                    docs.add_rust(origin.clone(), excerpt.as_str(), doc_comments, dev_comments)
                } else {
                    // TODO
                    Ok(())
                }
            }
            origin @ ContentOrigin::RustSourceFile(_) => {
                docs.add_rust(origin, content, doc_comments, dev_comments)
            }
            ContentOrigin::CargoManifestDescription(path) => {
                docs.add_cargo_manifest_description(path, content)
            }
            origin @ ContentOrigin::CommonMarkFile(_) => docs.add_commonmark(origin, content),
            origin @ ContentOrigin::TestEntityRust => {
                docs.add_rust(origin, content, doc_comments, dev_comments)
            }
            origin @ ContentOrigin::TestEntityCommonMark => docs.add_commonmark(origin, content),
        }
        .unwrap_or_else(move |e| {
            log::warn!(
                "BUG: Failed to load content from {origin} (dev_comments={dev_comments:?}): {e:?}",
            );
        });
        docs
    }

    pub fn len(&self) -> usize {
        self.index.len()
    }
}

impl IntoIterator for Documentation {
    type Item = (ContentOrigin, Vec<CheckableChunk>);
    type IntoIter = indexmap::map::IntoIter<ContentOrigin, Vec<CheckableChunk>>;

    fn into_iter(self) -> Self::IntoIter {
        self.index.into_iter()
    }
}