1use crate::attribute::Attribute;
2use crate::parse::Parse;
3use crate::prolog::subset::entity::entity_value::EntityValue;
4use crate::prolog::subset::Subset;
5use crate::Name;
6
7use crate::config::Config;
8use crate::prolog::subset::entity::EntitySource;
9use crate::prolog::textdecl::TextDecl;
10use crate::reference::Reference;
11use crate::{error::Error, Document};
12use encoding_rs::*;
13use nom::branch::alt;
14use nom::combinator::{map, opt};
15
16use nom::multi::many1;
17
18use std::cell::RefCell;
19use std::collections::HashMap;
20use std::io::BufReader;
21
22use std::rc::Rc;
23use std::{fs::File, io::Read};
24
25pub fn read_file(file: &mut File) -> std::io::Result<String> {
27 let mut reader = BufReader::new(file);
28 let mut bytes = vec![];
29
30 reader.read_to_end(&mut bytes)?;
31
32 let (encoding, bom_length) = match Encoding::for_bom(&bytes) {
33 Some((enc, len)) => (enc, len),
34 None => (UTF_8, 0),
35 };
36 let (decoded_str, _, _) = encoding.decode(&bytes[bom_length..]);
37
38 let mut data = decoded_str.into_owned();
39
40 data = data.replace("\r\n", "\n").replace('\r', "\n");
41
42 Ok(data)
43}
44
45pub fn parse_entire_file(
49 file: &mut File,
50 config: &Config,
51) -> Result<Document, Box<dyn std::error::Error>> {
52 let data = read_file(file)?;
53
54 let parse_result = Document::parse(&data, config);
55 match parse_result {
56 Ok((_, document)) => Ok(document),
57 Err(nom::Err::Error(e) | nom::Err::Failure(e)) => {
58 Err(Error::NomError(nom::error::Error::new(
60 e.to_string(),
61 nom::error::ErrorKind::Fail,
62 ))
63 .into())
64 }
65 Err(nom::Err::Incomplete(_)) => Err(Error::NomError(nom::error::Error::new(
66 "parse_file: Incomplete parsing".to_string(),
67 nom::error::ErrorKind::Fail,
68 ))
69 .into()),
70 }
71}
72
73pub(crate) fn parse_external_entity_file(
74 file: &mut File,
75 config: &Config,
76 external_entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
77) -> Result<(Vec<EntityValue>, Option<Vec<Subset>>), Box<dyn std::error::Error>> {
78 let mut data = read_file(file)?;
79 data = data.replace("\r\n", "\n").replace('\r', "\n");
80 let (input, _text_decl) = opt(|i| TextDecl::parse(i, ()))(data.as_str())?;
81 let args = (
83 external_entity_references.clone(),
84 config,
85 EntitySource::External,
86 );
87 let (input, subsets) = match Subset::parse(input, args) {
88 Ok((input, subsets)) => {
89 if subsets.is_empty() {
90 (input, None)
91 } else {
92 (input, Some(subsets))
93 }
94 }
95 _ => (input, None),
96 };
97
98 let (_, entity_values) = alt((
99 many1(map(
100 |i| Reference::parse(i, EntitySource::External),
101 EntityValue::Reference,
102 )),
103 map(
104 |i| Document::parse_content(i, &external_entity_references, EntitySource::External),
105 |doc| vec![EntityValue::Document(doc)],
106 ),
107 ))(input)
108 .map_err(|err| match err {
109 nom::Err::Error(_e) | nom::Err::Failure(_e) => Box::new(Error::NomError(
110 nom::error::Error::new(input.to_string(), nom::error::ErrorKind::Fail),
111 )),
112 nom::Err::Incomplete(_) => Box::new(Error::NomError(nom::error::Error::new(
113 "parse_external_ent_file: Incomplete input.".to_string(),
114 nom::error::ErrorKind::Fail,
115 ))),
116 })?;
117 Ok((entity_values, subsets))
118}