nom_xml/
io.rs

1use crate::attribute::Attribute;
2use crate::parse::Parse;
3use crate::prolog::subset::entity::entity_value::EntityValue;
4use crate::prolog::subset::Subset;
5use crate::Name;
6
7use crate::config::Config;
8use crate::prolog::subset::entity::EntitySource;
9use crate::prolog::textdecl::TextDecl;
10use crate::reference::Reference;
11use crate::{error::Error, Document};
12use encoding_rs::*;
13use nom::branch::alt;
14use nom::combinator::{map, opt};
15
16use nom::multi::many1;
17
18use std::cell::RefCell;
19use std::collections::HashMap;
20use std::io::BufReader;
21
22use std::rc::Rc;
23use std::{fs::File, io::Read};
24
25/// Read the file and decode the contents into a String
26pub fn read_file(file: &mut File) -> std::io::Result<String> {
27    let mut reader = BufReader::new(file);
28    let mut bytes = vec![];
29
30    reader.read_to_end(&mut bytes)?;
31
32    let (encoding, bom_length) = match Encoding::for_bom(&bytes) {
33        Some((enc, len)) => (enc, len),
34        None => (UTF_8, 0),
35    };
36    let (decoded_str, _, _) = encoding.decode(&bytes[bom_length..]);
37
38    let mut data = decoded_str.into_owned();
39
40    data = data.replace("\r\n", "\n").replace('\r', "\n");
41
42    Ok(data)
43}
44
45/// Parse the entire file into a Document
46///
47/// Note: Beware using for extremely large files as it will load the entire file into memory
48pub fn parse_entire_file(
49    file: &mut File,
50    config: &Config,
51) -> Result<Document, Box<dyn std::error::Error>> {
52    let data = read_file(file)?;
53
54    let parse_result = Document::parse(&data, config);
55    match parse_result {
56        Ok((_, document)) => Ok(document),
57        Err(nom::Err::Error(e) | nom::Err::Failure(e)) => {
58            // Handle Nom parsing errors
59            Err(Error::NomError(nom::error::Error::new(
60                e.to_string(),
61                nom::error::ErrorKind::Fail,
62            ))
63            .into())
64        }
65        Err(nom::Err::Incomplete(_)) => Err(Error::NomError(nom::error::Error::new(
66            "parse_file: Incomplete parsing".to_string(),
67            nom::error::ErrorKind::Fail,
68        ))
69        .into()),
70    }
71}
72
73pub(crate) fn parse_external_entity_file(
74    file: &mut File,
75    config: &Config,
76    external_entity_references: Rc<RefCell<HashMap<(Name, EntitySource), EntityValue>>>,
77) -> Result<(Vec<EntityValue>, Option<Vec<Subset>>), Box<dyn std::error::Error>> {
78    let mut data = read_file(file)?;
79    data = data.replace("\r\n", "\n").replace('\r', "\n");
80    let (input, _text_decl) = opt(|i| TextDecl::parse(i, ()))(data.as_str())?;
81    //TODO: handle the text_decl such that if the encoding being used to parse the file is different, then the encoding is handled accordingly, i.e file being parsed again with the proper decoding
82    let args = (
83        external_entity_references.clone(),
84        config,
85        EntitySource::External,
86    );
87    let (input, subsets) = match Subset::parse(input, args) {
88        Ok((input, subsets)) => {
89            if subsets.is_empty() {
90                (input, None)
91            } else {
92                (input, Some(subsets))
93            }
94        }
95        _ => (input, None),
96    };
97
98    let (_, entity_values) = alt((
99        many1(map(
100            |i| Reference::parse(i, EntitySource::External),
101            EntityValue::Reference,
102        )),
103        map(
104            |i| Document::parse_content(i, &external_entity_references, EntitySource::External),
105            |doc| vec![EntityValue::Document(doc)],
106        ),
107    ))(input)
108    .map_err(|err| match err {
109        nom::Err::Error(_e) | nom::Err::Failure(_e) => Box::new(Error::NomError(
110            nom::error::Error::new(input.to_string(), nom::error::ErrorKind::Fail),
111        )),
112        nom::Err::Incomplete(_) => Box::new(Error::NomError(nom::error::Error::new(
113            "parse_external_ent_file: Incomplete input.".to_string(),
114            nom::error::ErrorKind::Fail,
115        ))),
116    })?;
117    Ok((entity_values, subsets))
118}