1mod sentence;
2mod tags;
3
4use crate::{
5 extractor::{
6 Extracted, RawArgument, RawDescription, RawField, RawMethod, RawObject, RawObjectData,
7 },
8 parser::sentence::Sentence,
9 util::{ElementRefExt, StrExt},
10 BOT_API_DOCS_URL,
11};
12use chrono::NaiveDate;
13use ego_tree::iter::Edge;
14use itertools::Itertools;
15use logos::Span;
16use scraper::{node::Element, ElementRef, Node};
17use semver::Version;
18use sentence::{Pattern, SentenceRef, Sentences};
19use std::{num::ParseIntError, ops::Deref, str::ParseBoolError};
20use tags::TagsHandlerFactory;
21
22type Result<T, E = ParseError> = std::result::Result<T, E>;
23
24#[derive(Debug, thiserror::Error)]
25pub enum ParseError {
26 #[error("Invalid Required: {0}")]
27 InvalidRequired(String),
28 #[error("Failed to extract type from description: {0:?}")]
29 TypeExtractionFailed(String),
30 #[error("chrono: {0}")]
31 ChronoParse(
32 #[from]
33 #[source]
34 chrono::ParseError,
35 ),
36 #[error("Missing `href` attribute")]
37 MissingHref,
38 #[error("Missing `alt` attribute")]
39 MissingAlt,
40 #[error("SemVer: {0}")]
41 SemVer(
42 #[from]
43 #[source]
44 semver::Error,
45 ),
46 #[error("Integer parsing: {0}")]
47 ParseInt(
48 #[from]
49 #[source]
50 ParseIntError,
51 ),
52 #[error("Boolean parsing: {0}")]
53 ParseBool(
54 #[from]
55 #[source]
56 ParseBoolError,
57 ),
58 #[error("Lexer error: {lexeme:?} ({span:?}) in {input:?}")]
59 Lexer {
60 input: String,
61 lexeme: String,
62 span: Span,
63 },
64}
65
66pub fn parse(raw: Extracted) -> Result<Parsed> {
67 let recent_changes = NaiveDate::parse_from_str(&raw.recent_changes, "%B %e, %Y")?;
68 let version = parse_version(raw.version)?;
69 let objects = raw
70 .objects
71 .into_iter()
72 .map(parse_object)
73 .collect::<Result<_>>()?;
74 let methods = raw
75 .methods
76 .into_iter()
77 .map(parse_method)
78 .collect::<Result<_>>()?;
79
80 Ok(Parsed {
81 recent_changes,
82 version,
83 methods,
84 objects,
85 })
86}
87
88fn parse_version(version: ElementRef) -> Result<Version> {
89 let version = version
90 .plain_text()
91 .chars()
92 .skip_while(|c| !c.is_ascii_digit())
93 .collect::<String>()
94 .trim_end_matches('.')
95 .to_string()
96 + ".0";
97 Ok(Version::parse(&version)?)
98}
99
100fn parse_object(raw_object: RawObject) -> Result<Object> {
101 let name = raw_object.name.plain_text();
102 let description = raw_object.description.markdown();
103 let data = match raw_object.data {
104 RawObjectData::Fields(fields) if !fields.is_empty() => {
105 ObjectData::Fields(fields.into_iter().map(parse_field).collect::<Result<_>>()?)
106 }
107 RawObjectData::Fields(_) => ObjectData::Unknown,
108 RawObjectData::Elements(elements) => ObjectData::Elements(
109 elements
110 .into_iter()
111 .map(|elem| elem.plain_text())
112 .map(|s| Type::new(&s))
113 .collect(),
114 ),
115 };
116 let docs_link = raw_object.name.a_href().map(make_url_from_fragment)?;
117 Ok(Object {
118 name,
119 description,
120 data,
121 docs_link,
122 })
123}
124
125fn parse_field(raw_field: RawField) -> Result<Field> {
126 let plain_description = raw_field.description.plain_text();
127 let required = !plain_description.starts_with("Optional.");
128 let kind = Type::new_with_description(
129 &raw_field.kind,
130 TypeParsingUnit::Element(&raw_field.description),
131 )?;
132
133 Ok(Field {
134 name: raw_field.name,
135 kind,
136 required,
137 description: raw_field.description.markdown(),
138 })
139}
140
141fn parse_method(raw_method: RawMethod) -> Result<Method> {
142 let name = raw_method.name.plain_text();
143 let docs_link = raw_method.name.a_href().map(make_url_from_fragment)?;
144 let return_type =
145 Type::extract_from_text(TypeParsingUnit::Description(&raw_method.description))?;
146 let args = raw_method
147 .args
148 .into_iter()
149 .map(parse_argument)
150 .collect::<Result<_>>()?;
151 Ok(Method {
152 name,
153 description: raw_method.description.markdown(),
154 args: MethodArgs::new(args),
155 return_type,
156 docs_link,
157 })
158}
159
160fn parse_argument(raw_arg: RawArgument) -> Result<Argument> {
161 let kind = Type::new_with_description(
162 &raw_arg.kind,
163 TypeParsingUnit::Element(&raw_arg.description),
164 )?;
165 let required = parse_required(raw_arg.required)?;
166 Ok(Argument {
167 name: raw_arg.name,
168 kind,
169 required,
170 description: raw_arg.description.markdown(),
171 })
172}
173
174fn parse_required(s: String) -> Result<bool> {
175 match s.as_str() {
176 "Yes" => Ok(true),
177 "Optional" => Ok(false),
178 _ => Err(ParseError::InvalidRequired(s)),
179 }
180}
181
182#[derive(Debug, Clone)]
183pub struct Parsed {
184 pub recent_changes: NaiveDate,
185 pub version: Version,
186 pub methods: Vec<Method>,
187 pub objects: Vec<Object>,
188}
189
190#[derive(Debug, Clone, Eq, PartialEq)]
191pub enum Type {
192 Integer {
193 default: Option<i64>,
194 min: Option<i64>,
195 max: Option<i64>,
196 one_of: Vec<i64>,
197 },
198 String {
199 default: Option<String>,
200 min_len: Option<u64>,
201 max_len: Option<u64>,
202 one_of: Vec<String>,
203 },
204 Bool {
205 default: Option<bool>,
206 },
207 Float,
208 Or(Vec<Type>),
209 Array(Box<Type>),
210 Object(String),
211}
212
213impl Type {
214 fn new(s: &str) -> Self {
216 const ARRAY_OF: &[&str] = &["Array", "of"];
217
218 fn types_from_sentence_ref(sentence: &SentenceRef) -> Vec<Type> {
219 sentence
220 .parts()
221 .iter()
222 .filter(|part| !part.as_inner().is_first_letter_lowercase())
223 .map(|part| part.as_inner().as_str())
224 .map(Type::new)
225 .collect()
226 }
227
228 match s {
229 "Integer" | "Int" => Self::Integer {
230 default: None,
231 min: None,
232 max: None,
233 one_of: vec![],
234 },
235 "String" => Self::String {
236 default: None,
237 min_len: None,
238 max_len: None,
239 one_of: vec![],
240 },
241 "Boolean" => Self::Bool { default: None },
242 "True" => Self::Bool {
243 default: Some(true),
244 },
245 "Float" | "Float number" => Self::Float,
246 _ => {
247 let parser = Sentences::parse(s);
248 if let Some(sentence) = parser.find(&["or"]) {
249 let types = types_from_sentence_ref(sentence);
250 Self::Or(types)
251 } else if let Some(sentence) = parser.find_and_crop(ARRAY_OF) {
252 let sentence = &sentence[2..];
253 let ty = if sentence.len() == 1 {
254 Self::new(sentence.parts()[0].as_inner())
255 } else if sentence.starts_with(ARRAY_OF) {
256 Self::new(
257 &sentence
258 .parts()
259 .iter()
260 .map(|part| part.as_inner())
261 .join(" "),
262 )
263 } else {
264 Self::Or(types_from_sentence_ref(sentence))
265 };
266 Self::Array(Box::new(ty))
267 } else {
268 Self::Object(s.to_string())
269 }
270 }
271 }
272 }
273
274 fn new_with_description(s: &str, description: TypeParsingUnit) -> Result<Self> {
275 let default = sentence::parse_type_custom(Pattern::Default, description, |sentence| {
276 sentence.parts().first().map(|part| part.as_inner().clone())
277 })?;
278 let min_max = sentence::parse_type_custom(Pattern::MinMax, description, |sentence| {
279 let values = sentence.parts().first()?.as_inner();
280 let mut split = values.split('-');
281 let min = split.next()?.to_string();
282 let max = split.next()?.to_string();
283 Some((min, max))
284 })?;
285 let one_of = sentence::parse_type_custom(Pattern::OneOf, description, |sentence| {
286 Some(
287 sentence
288 .parts()
289 .iter()
290 .filter(|part| {
291 part.has_quotes()
292 || part.is_italic()
293 || part.as_inner().chars().all(|c| c.is_ascii_digit())
294 })
295 .map(|part| part.as_inner())
296 .cloned()
297 .dedup()
298 .collect::<Vec<_>>(),
299 )
300 })?;
301
302 let (min, max) = if let Some((min, max)) = min_max {
303 (Some(min), Some(max))
304 } else {
305 (None, None)
306 };
307
308 let ty = match Type::new(s) {
309 Type::Integer {
310 default: type_default,
311 min: type_min,
312 max: type_max,
313 one_of: type_one_of,
314 } => {
315 let one_of = if let Some(one_of) = one_of {
316 one_of
317 .into_iter()
318 .map(|x| x.parse::<i64>())
319 .collect::<Result<_, ParseIntError>>()?
320 } else {
321 type_one_of
322 };
323
324 Type::Integer {
325 default: default
326 .as_deref()
327 .map(str::parse)
328 .transpose()?
329 .or(type_default),
330 min: min.as_deref().map(str::parse).transpose()?.or(type_min),
331 max: max.as_deref().map(str::parse).transpose()?.or(type_max),
332 one_of,
333 }
334 }
335 Type::Bool {
336 default: type_default,
337 } => Type::Bool {
338 default: default
339 .as_deref()
340 .map(str::to_lowercase)
341 .as_deref()
342 .map(str::parse)
343 .transpose()?
344 .or(type_default),
345 },
346 Type::String {
347 default: type_default,
348 min_len: type_min_len,
349 max_len: type_max_len,
350 one_of: type_one_if,
351 } if default.is_some() || min.is_some() || max.is_some() || one_of.is_some() => {
352 Type::String {
353 default: default.or(type_default),
354 min_len: min.as_deref().map(str::parse).transpose()?.or(type_min_len),
355 max_len: max.as_deref().map(str::parse).transpose()?.or(type_max_len),
356 one_of: one_of.unwrap_or(type_one_if),
357 }
358 }
359 x => x,
360 };
361
362 Ok(ty)
363 }
364
365 pub fn extract_from_text(text: TypeParsingUnit) -> Result<Self> {
366 fn strip_plural_ending(mut s: &str) -> &str {
367 if s.ends_with("es") {
368 s = s.strip_suffix('s').unwrap_or(s);
369 }
370
371 s
372 }
373
374 fn extract_type(sentence: &SentenceRef) -> Option<Type> {
375 const ARRAY: &str = "Array";
376 const AN_ARRAY_OF: &[&str] = &["an", "array", "of"];
377 const OTHERWISE: &[&str] = &["otherwise"];
378
379 if sentence.contains(OTHERWISE) {
380 let types = sentence
381 .parts()
382 .iter()
383 .filter(|part| !part.as_inner().is_first_letter_lowercase())
384 .map(SentenceRef::from_part)
385 .map(extract_type)
386 .collect::<Option<_>>()?;
387 Some(Type::Or(types))
388 } else {
389 let (pos, part) = sentence
390 .parts()
391 .iter()
392 .find_position(|part| !part.as_inner().is_first_letter_lowercase())?;
393 let ty = part.as_inner();
394 let ty = strip_plural_ending(ty);
395
396 if ty == ARRAY {
397 let sentence = &sentence[pos + 1..];
398 let ty = extract_type(sentence)?;
399 Some(Type::Array(Box::new(ty)))
400 } else if sentence[pos.saturating_sub(AN_ARRAY_OF.len())..].starts_with(AN_ARRAY_OF)
401 {
402 let sentence = &sentence[pos..];
403 let ty = extract_type(sentence)?;
404 Some(Type::Array(Box::new(ty)))
405 } else {
406 Some(Type::new(ty))
407 }
408 }
409 }
410
411 sentence::parse_type_custom(Pattern::ReturnType, text, extract_type)
412 .transpose()
413 .ok_or_else(|| ParseError::TypeExtractionFailed(text.plain_text()))?
414 }
415
416 pub fn maybe_file_to_send(&self) -> bool {
417 match self {
418 Type::Integer { .. } | Type::String { .. } | Type::Bool { .. } | Type::Float => false,
419 Type::Or(types) => types.iter().any(Self::maybe_file_to_send),
420 Type::Array(ty) => ty.maybe_file_to_send(),
421 Type::Object(object) => object.starts_with("Input"),
422 }
423 }
424}
425
426#[derive(Debug, Copy, Clone)]
427pub enum TypeParsingUnit<'a> {
428 Element(&'a ElementRef<'a>),
429 Description(&'a RawDescription<'a>),
430}
431
432impl TypeParsingUnit<'_> {
433 fn sentences(self) -> Result<Vec<Sentence>> {
434 match self {
435 TypeParsingUnit::Element(elem) => elem.sentences(),
436 TypeParsingUnit::Description(description) => description.sentences(),
437 }
438 }
439
440 fn plain_text(self) -> String {
441 match self {
442 TypeParsingUnit::Element(elem) => elem.plain_text(),
443 TypeParsingUnit::Description(description) => description.plain_text(),
444 }
445 }
446}
447
448#[derive(Debug, Clone)]
449pub struct Object {
450 pub name: String,
451 pub description: String,
452 pub data: ObjectData,
453 pub docs_link: String,
454}
455
456#[derive(Debug, Clone)]
457pub enum ObjectData {
458 Fields(Vec<Field>),
459 Elements(Vec<Type>),
460 Unknown,
463}
464
465#[derive(Debug, Clone)]
466pub struct Field {
467 pub name: String,
468 pub kind: Type,
469 pub required: bool,
470 pub description: String,
471}
472
473#[derive(Debug, Clone)]
474pub struct Method {
475 pub name: String,
476 pub description: String,
477 pub args: MethodArgs,
478 pub return_type: Type,
479 pub docs_link: String,
480}
481
482#[derive(Debug, Clone)]
483pub enum MethodArgs {
484 No,
485 Yes(Vec<Argument>),
486 WithMultipart(Vec<Argument>),
487}
488
489impl MethodArgs {
490 fn new(args: Vec<Argument>) -> Self {
491 if args.iter().any(|arg| arg.kind.maybe_file_to_send()) {
492 Self::WithMultipart(args)
493 } else if args.is_empty() {
494 Self::No
495 } else {
496 Self::Yes(args)
497 }
498 }
499}
500
501#[derive(Debug, Clone)]
502pub struct Argument {
503 pub name: String,
504 pub kind: Type,
505 pub required: bool,
506 pub description: String,
507}
508
509fn make_url_from_fragment(fragment: String) -> String {
510 assert!(fragment.starts_with('#'));
511 format!("{}{}", BOT_API_DOCS_URL, fragment)
512}
513
514trait RawDescriptionExt {
515 fn sentences(&self) -> Result<Vec<Sentence>>;
516
517 fn markdown(&self) -> String;
518
519 fn plain_text(&self) -> String;
520}
521
522impl RawDescriptionExt for RawDescription<'_> {
523 fn sentences(&self) -> Result<Vec<Sentence>> {
524 self.0
525 .iter()
526 .map(ElementRef::sentences)
527 .try_fold(Vec::new(), |mut acc, x| {
528 acc.extend(x?);
529 Ok(acc)
530 })
531 }
532
533 fn markdown(&self) -> String {
534 html2md::parse_html_custom(
535 &self.0.iter().map(ElementRef::html).join("\n"),
536 &TagsHandlerFactory::new_in_map(),
537 )
538 }
539
540 fn plain_text(&self) -> String {
541 self.0.iter().map(ElementRef::plain_text).join("\n")
542 }
543}
544
545trait ElementRefParserExt {
546 fn sentences(&self) -> Result<Vec<Sentence>>;
547
548 fn markdown(&self) -> String;
549
550 fn a_href(&self) -> Result<String>;
551}
552
553impl ElementRefParserExt for ElementRef<'_> {
554 fn sentences(&self) -> Result<Vec<Sentence>> {
555 sentence::parse_node(*self.deref())
556 }
557
558 fn markdown(&self) -> String {
559 html2md::parse_html_custom(&self.html(), &TagsHandlerFactory::new_in_map())
560 }
561
562 fn a_href(&self) -> Result<String> {
563 for edge in self.traverse() {
564 if let Edge::Open(node) = edge {
565 if let Node::Element(elem) = node.value() {
566 if elem.name() == "a" {
567 return elem.a_href();
568 }
569 }
570 }
571 }
572
573 Err(ParseError::MissingHref)
574 }
575}
576
577trait ElementExt {
578 fn a_href(&self) -> Result<String>;
579}
580
581impl ElementExt for Element {
582 fn a_href(&self) -> Result<String> {
583 self.attr("href")
584 .map(str::to_string)
585 .ok_or(ParseError::MissingHref)
586 }
587}
588
589#[cfg(test)]
590mod tests {
591 use super::*;
592
593 #[test]
594 fn or_type() {
595 let ty = Type::new("Integer or String");
596 assert_eq!(
597 ty,
598 Type::Or(vec![
599 Type::Integer {
600 default: None,
601 min: None,
602 max: None,
603 one_of: vec![],
604 },
605 Type::String {
606 default: None,
607 min_len: None,
608 max_len: None,
609 one_of: vec![]
610 }
611 ])
612 )
613 }
614
615 #[test]
616 fn array_of_type() {
617 let ty = Type::new("Array of PhotoSize");
618 assert_eq!(
619 ty,
620 Type::Array(Box::new(Type::Object("PhotoSize".to_string())))
621 );
622 }
623
624 #[test]
625 fn array_of_array_type() {
626 let ty = Type::new("Array of Array of PhotoSize");
627 assert_eq!(
628 ty,
629 Type::Array(Box::new(Type::Array(Box::new(Type::Object(
630 "PhotoSize".to_string()
631 )))))
632 );
633 }
634}