tg_bot_api/
parser.rs

1mod sentence;
2mod tags;
3
4use crate::{
5    extractor::{
6        Extracted, RawArgument, RawDescription, RawField, RawMethod, RawObject, RawObjectData,
7    },
8    parser::sentence::Sentence,
9    util::{ElementRefExt, StrExt},
10    BOT_API_DOCS_URL,
11};
12use chrono::NaiveDate;
13use ego_tree::iter::Edge;
14use itertools::Itertools;
15use logos::Span;
16use scraper::{node::Element, ElementRef, Node};
17use semver::Version;
18use sentence::{Pattern, SentenceRef, Sentences};
19use std::{num::ParseIntError, ops::Deref, str::ParseBoolError};
20use tags::TagsHandlerFactory;
21
22type Result<T, E = ParseError> = std::result::Result<T, E>;
23
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26    #[error("Invalid Required: {0}")]
27    InvalidRequired(String),
28    #[error("Failed to extract type from description: {0:?}")]
29    TypeExtractionFailed(String),
30    #[error("chrono: {0}")]
31    ChronoParse(
32        #[from]
33        #[source]
34        chrono::ParseError,
35    ),
36    #[error("Missing `href` attribute")]
37    MissingHref,
38    #[error("Missing `alt` attribute")]
39    MissingAlt,
40    #[error("SemVer: {0}")]
41    SemVer(
42        #[from]
43        #[source]
44        semver::Error,
45    ),
46    #[error("Integer parsing: {0}")]
47    ParseInt(
48        #[from]
49        #[source]
50        ParseIntError,
51    ),
52    #[error("Boolean parsing: {0}")]
53    ParseBool(
54        #[from]
55        #[source]
56        ParseBoolError,
57    ),
58    #[error("Lexer error: {lexeme:?} ({span:?}) in {input:?}")]
59    Lexer {
60        input: String,
61        lexeme: String,
62        span: Span,
63    },
64}
65
66pub fn parse(raw: Extracted) -> Result<Parsed> {
67    let recent_changes = NaiveDate::parse_from_str(&raw.recent_changes, "%B %e, %Y")?;
68    let version = parse_version(raw.version)?;
69    let objects = raw
70        .objects
71        .into_iter()
72        .map(parse_object)
73        .collect::<Result<_>>()?;
74    let methods = raw
75        .methods
76        .into_iter()
77        .map(parse_method)
78        .collect::<Result<_>>()?;
79
80    Ok(Parsed {
81        recent_changes,
82        version,
83        methods,
84        objects,
85    })
86}
87
88fn parse_version(version: ElementRef) -> Result<Version> {
89    let version = version
90        .plain_text()
91        .chars()
92        .skip_while(|c| !c.is_ascii_digit())
93        .collect::<String>()
94        .trim_end_matches('.')
95        .to_string()
96        + ".0";
97    Ok(Version::parse(&version)?)
98}
99
100fn parse_object(raw_object: RawObject) -> Result<Object> {
101    let name = raw_object.name.plain_text();
102    let description = raw_object.description.markdown();
103    let data = match raw_object.data {
104        RawObjectData::Fields(fields) if !fields.is_empty() => {
105            ObjectData::Fields(fields.into_iter().map(parse_field).collect::<Result<_>>()?)
106        }
107        RawObjectData::Fields(_) => ObjectData::Unknown,
108        RawObjectData::Elements(elements) => ObjectData::Elements(
109            elements
110                .into_iter()
111                .map(|elem| elem.plain_text())
112                .map(|s| Type::new(&s))
113                .collect(),
114        ),
115    };
116    let docs_link = raw_object.name.a_href().map(make_url_from_fragment)?;
117    Ok(Object {
118        name,
119        description,
120        data,
121        docs_link,
122    })
123}
124
125fn parse_field(raw_field: RawField) -> Result<Field> {
126    let plain_description = raw_field.description.plain_text();
127    let required = !plain_description.starts_with("Optional.");
128    let kind = Type::new_with_description(
129        &raw_field.kind,
130        TypeParsingUnit::Element(&raw_field.description),
131    )?;
132
133    Ok(Field {
134        name: raw_field.name,
135        kind,
136        required,
137        description: raw_field.description.markdown(),
138    })
139}
140
141fn parse_method(raw_method: RawMethod) -> Result<Method> {
142    let name = raw_method.name.plain_text();
143    let docs_link = raw_method.name.a_href().map(make_url_from_fragment)?;
144    let return_type =
145        Type::extract_from_text(TypeParsingUnit::Description(&raw_method.description))?;
146    let args = raw_method
147        .args
148        .into_iter()
149        .map(parse_argument)
150        .collect::<Result<_>>()?;
151    Ok(Method {
152        name,
153        description: raw_method.description.markdown(),
154        args: MethodArgs::new(args),
155        return_type,
156        docs_link,
157    })
158}
159
160fn parse_argument(raw_arg: RawArgument) -> Result<Argument> {
161    let kind = Type::new_with_description(
162        &raw_arg.kind,
163        TypeParsingUnit::Element(&raw_arg.description),
164    )?;
165    let required = parse_required(raw_arg.required)?;
166    Ok(Argument {
167        name: raw_arg.name,
168        kind,
169        required,
170        description: raw_arg.description.markdown(),
171    })
172}
173
174fn parse_required(s: String) -> Result<bool> {
175    match s.as_str() {
176        "Yes" => Ok(true),
177        "Optional" => Ok(false),
178        _ => Err(ParseError::InvalidRequired(s)),
179    }
180}
181
182#[derive(Debug, Clone)]
183pub struct Parsed {
184    pub recent_changes: NaiveDate,
185    pub version: Version,
186    pub methods: Vec<Method>,
187    pub objects: Vec<Object>,
188}
189
190#[derive(Debug, Clone, Eq, PartialEq)]
191pub enum Type {
192    Integer {
193        default: Option<i64>,
194        min: Option<i64>,
195        max: Option<i64>,
196        one_of: Vec<i64>,
197    },
198    String {
199        default: Option<String>,
200        min_len: Option<u64>,
201        max_len: Option<u64>,
202        one_of: Vec<String>,
203    },
204    Bool {
205        default: Option<bool>,
206    },
207    Float,
208    Or(Vec<Type>),
209    Array(Box<Type>),
210    Object(String),
211}
212
213impl Type {
214    // this function parses types from `Type` column in docs
215    fn new(s: &str) -> Self {
216        const ARRAY_OF: &[&str] = &["Array", "of"];
217
218        fn types_from_sentence_ref(sentence: &SentenceRef) -> Vec<Type> {
219            sentence
220                .parts()
221                .iter()
222                .filter(|part| !part.as_inner().is_first_letter_lowercase())
223                .map(|part| part.as_inner().as_str())
224                .map(Type::new)
225                .collect()
226        }
227
228        match s {
229            "Integer" | "Int" => Self::Integer {
230                default: None,
231                min: None,
232                max: None,
233                one_of: vec![],
234            },
235            "String" => Self::String {
236                default: None,
237                min_len: None,
238                max_len: None,
239                one_of: vec![],
240            },
241            "Boolean" => Self::Bool { default: None },
242            "True" => Self::Bool {
243                default: Some(true),
244            },
245            "Float" | "Float number" => Self::Float,
246            _ => {
247                let parser = Sentences::parse(s);
248                if let Some(sentence) = parser.find(&["or"]) {
249                    let types = types_from_sentence_ref(sentence);
250                    Self::Or(types)
251                } else if let Some(sentence) = parser.find_and_crop(ARRAY_OF) {
252                    let sentence = &sentence[2..];
253                    let ty = if sentence.len() == 1 {
254                        Self::new(sentence.parts()[0].as_inner())
255                    } else if sentence.starts_with(ARRAY_OF) {
256                        Self::new(
257                            &sentence
258                                .parts()
259                                .iter()
260                                .map(|part| part.as_inner())
261                                .join(" "),
262                        )
263                    } else {
264                        Self::Or(types_from_sentence_ref(sentence))
265                    };
266                    Self::Array(Box::new(ty))
267                } else {
268                    Self::Object(s.to_string())
269                }
270            }
271        }
272    }
273
274    fn new_with_description(s: &str, description: TypeParsingUnit) -> Result<Self> {
275        let default = sentence::parse_type_custom(Pattern::Default, description, |sentence| {
276            sentence.parts().first().map(|part| part.as_inner().clone())
277        })?;
278        let min_max = sentence::parse_type_custom(Pattern::MinMax, description, |sentence| {
279            let values = sentence.parts().first()?.as_inner();
280            let mut split = values.split('-');
281            let min = split.next()?.to_string();
282            let max = split.next()?.to_string();
283            Some((min, max))
284        })?;
285        let one_of = sentence::parse_type_custom(Pattern::OneOf, description, |sentence| {
286            Some(
287                sentence
288                    .parts()
289                    .iter()
290                    .filter(|part| {
291                        part.has_quotes()
292                            || part.is_italic()
293                            || part.as_inner().chars().all(|c| c.is_ascii_digit())
294                    })
295                    .map(|part| part.as_inner())
296                    .cloned()
297                    .dedup()
298                    .collect::<Vec<_>>(),
299            )
300        })?;
301
302        let (min, max) = if let Some((min, max)) = min_max {
303            (Some(min), Some(max))
304        } else {
305            (None, None)
306        };
307
308        let ty = match Type::new(s) {
309            Type::Integer {
310                default: type_default,
311                min: type_min,
312                max: type_max,
313                one_of: type_one_of,
314            } => {
315                let one_of = if let Some(one_of) = one_of {
316                    one_of
317                        .into_iter()
318                        .map(|x| x.parse::<i64>())
319                        .collect::<Result<_, ParseIntError>>()?
320                } else {
321                    type_one_of
322                };
323
324                Type::Integer {
325                    default: default
326                        .as_deref()
327                        .map(str::parse)
328                        .transpose()?
329                        .or(type_default),
330                    min: min.as_deref().map(str::parse).transpose()?.or(type_min),
331                    max: max.as_deref().map(str::parse).transpose()?.or(type_max),
332                    one_of,
333                }
334            }
335            Type::Bool {
336                default: type_default,
337            } => Type::Bool {
338                default: default
339                    .as_deref()
340                    .map(str::to_lowercase)
341                    .as_deref()
342                    .map(str::parse)
343                    .transpose()?
344                    .or(type_default),
345            },
346            Type::String {
347                default: type_default,
348                min_len: type_min_len,
349                max_len: type_max_len,
350                one_of: type_one_if,
351            } if default.is_some() || min.is_some() || max.is_some() || one_of.is_some() => {
352                Type::String {
353                    default: default.or(type_default),
354                    min_len: min.as_deref().map(str::parse).transpose()?.or(type_min_len),
355                    max_len: max.as_deref().map(str::parse).transpose()?.or(type_max_len),
356                    one_of: one_of.unwrap_or(type_one_if),
357                }
358            }
359            x => x,
360        };
361
362        Ok(ty)
363    }
364
365    pub fn extract_from_text(text: TypeParsingUnit) -> Result<Self> {
366        fn strip_plural_ending(mut s: &str) -> &str {
367            if s.ends_with("es") {
368                s = s.strip_suffix('s').unwrap_or(s);
369            }
370
371            s
372        }
373
374        fn extract_type(sentence: &SentenceRef) -> Option<Type> {
375            const ARRAY: &str = "Array";
376            const AN_ARRAY_OF: &[&str] = &["an", "array", "of"];
377            const OTHERWISE: &[&str] = &["otherwise"];
378
379            if sentence.contains(OTHERWISE) {
380                let types = sentence
381                    .parts()
382                    .iter()
383                    .filter(|part| !part.as_inner().is_first_letter_lowercase())
384                    .map(SentenceRef::from_part)
385                    .map(extract_type)
386                    .collect::<Option<_>>()?;
387                Some(Type::Or(types))
388            } else {
389                let (pos, part) = sentence
390                    .parts()
391                    .iter()
392                    .find_position(|part| !part.as_inner().is_first_letter_lowercase())?;
393                let ty = part.as_inner();
394                let ty = strip_plural_ending(ty);
395
396                if ty == ARRAY {
397                    let sentence = &sentence[pos + 1..];
398                    let ty = extract_type(sentence)?;
399                    Some(Type::Array(Box::new(ty)))
400                } else if sentence[pos.saturating_sub(AN_ARRAY_OF.len())..].starts_with(AN_ARRAY_OF)
401                {
402                    let sentence = &sentence[pos..];
403                    let ty = extract_type(sentence)?;
404                    Some(Type::Array(Box::new(ty)))
405                } else {
406                    Some(Type::new(ty))
407                }
408            }
409        }
410
411        sentence::parse_type_custom(Pattern::ReturnType, text, extract_type)
412            .transpose()
413            .ok_or_else(|| ParseError::TypeExtractionFailed(text.plain_text()))?
414    }
415
416    pub fn maybe_file_to_send(&self) -> bool {
417        match self {
418            Type::Integer { .. } | Type::String { .. } | Type::Bool { .. } | Type::Float => false,
419            Type::Or(types) => types.iter().any(Self::maybe_file_to_send),
420            Type::Array(ty) => ty.maybe_file_to_send(),
421            Type::Object(object) => object.starts_with("Input"),
422        }
423    }
424}
425
426#[derive(Debug, Copy, Clone)]
427pub enum TypeParsingUnit<'a> {
428    Element(&'a ElementRef<'a>),
429    Description(&'a RawDescription<'a>),
430}
431
432impl TypeParsingUnit<'_> {
433    fn sentences(self) -> Result<Vec<Sentence>> {
434        match self {
435            TypeParsingUnit::Element(elem) => elem.sentences(),
436            TypeParsingUnit::Description(description) => description.sentences(),
437        }
438    }
439
440    fn plain_text(self) -> String {
441        match self {
442            TypeParsingUnit::Element(elem) => elem.plain_text(),
443            TypeParsingUnit::Description(description) => description.plain_text(),
444        }
445    }
446}
447
448#[derive(Debug, Clone)]
449pub struct Object {
450    pub name: String,
451    pub description: String,
452    pub data: ObjectData,
453    pub docs_link: String,
454}
455
456#[derive(Debug, Clone)]
457pub enum ObjectData {
458    Fields(Vec<Field>),
459    Elements(Vec<Type>),
460    /// Object without fields or elements
461    /// So we don't know what it will be in the future
462    Unknown,
463}
464
465#[derive(Debug, Clone)]
466pub struct Field {
467    pub name: String,
468    pub kind: Type,
469    pub required: bool,
470    pub description: String,
471}
472
473#[derive(Debug, Clone)]
474pub struct Method {
475    pub name: String,
476    pub description: String,
477    pub args: MethodArgs,
478    pub return_type: Type,
479    pub docs_link: String,
480}
481
482#[derive(Debug, Clone)]
483pub enum MethodArgs {
484    No,
485    Yes(Vec<Argument>),
486    WithMultipart(Vec<Argument>),
487}
488
489impl MethodArgs {
490    fn new(args: Vec<Argument>) -> Self {
491        if args.iter().any(|arg| arg.kind.maybe_file_to_send()) {
492            Self::WithMultipart(args)
493        } else if args.is_empty() {
494            Self::No
495        } else {
496            Self::Yes(args)
497        }
498    }
499}
500
501#[derive(Debug, Clone)]
502pub struct Argument {
503    pub name: String,
504    pub kind: Type,
505    pub required: bool,
506    pub description: String,
507}
508
509fn make_url_from_fragment(fragment: String) -> String {
510    assert!(fragment.starts_with('#'));
511    format!("{}{}", BOT_API_DOCS_URL, fragment)
512}
513
514trait RawDescriptionExt {
515    fn sentences(&self) -> Result<Vec<Sentence>>;
516
517    fn markdown(&self) -> String;
518
519    fn plain_text(&self) -> String;
520}
521
522impl RawDescriptionExt for RawDescription<'_> {
523    fn sentences(&self) -> Result<Vec<Sentence>> {
524        self.0
525            .iter()
526            .map(ElementRef::sentences)
527            .try_fold(Vec::new(), |mut acc, x| {
528                acc.extend(x?);
529                Ok(acc)
530            })
531    }
532
533    fn markdown(&self) -> String {
534        html2md::parse_html_custom(
535            &self.0.iter().map(ElementRef::html).join("\n"),
536            &TagsHandlerFactory::new_in_map(),
537        )
538    }
539
540    fn plain_text(&self) -> String {
541        self.0.iter().map(ElementRef::plain_text).join("\n")
542    }
543}
544
545trait ElementRefParserExt {
546    fn sentences(&self) -> Result<Vec<Sentence>>;
547
548    fn markdown(&self) -> String;
549
550    fn a_href(&self) -> Result<String>;
551}
552
553impl ElementRefParserExt for ElementRef<'_> {
554    fn sentences(&self) -> Result<Vec<Sentence>> {
555        sentence::parse_node(*self.deref())
556    }
557
558    fn markdown(&self) -> String {
559        html2md::parse_html_custom(&self.html(), &TagsHandlerFactory::new_in_map())
560    }
561
562    fn a_href(&self) -> Result<String> {
563        for edge in self.traverse() {
564            if let Edge::Open(node) = edge {
565                if let Node::Element(elem) = node.value() {
566                    if elem.name() == "a" {
567                        return elem.a_href();
568                    }
569                }
570            }
571        }
572
573        Err(ParseError::MissingHref)
574    }
575}
576
577trait ElementExt {
578    fn a_href(&self) -> Result<String>;
579}
580
581impl ElementExt for Element {
582    fn a_href(&self) -> Result<String> {
583        self.attr("href")
584            .map(str::to_string)
585            .ok_or(ParseError::MissingHref)
586    }
587}
588
589#[cfg(test)]
590mod tests {
591    use super::*;
592
593    #[test]
594    fn or_type() {
595        let ty = Type::new("Integer or String");
596        assert_eq!(
597            ty,
598            Type::Or(vec![
599                Type::Integer {
600                    default: None,
601                    min: None,
602                    max: None,
603                    one_of: vec![],
604                },
605                Type::String {
606                    default: None,
607                    min_len: None,
608                    max_len: None,
609                    one_of: vec![]
610                }
611            ])
612        )
613    }
614
615    #[test]
616    fn array_of_type() {
617        let ty = Type::new("Array of PhotoSize");
618        assert_eq!(
619            ty,
620            Type::Array(Box::new(Type::Object("PhotoSize".to_string())))
621        );
622    }
623
624    #[test]
625    fn array_of_array_type() {
626        let ty = Type::new("Array of Array of PhotoSize");
627        assert_eq!(
628            ty,
629            Type::Array(Box::new(Type::Array(Box::new(Type::Object(
630                "PhotoSize".to_string()
631            )))))
632        );
633    }
634}