Skip to main content

vastlint_core/
inspect.rs

1use quick_xml::{events::Event, Reader, XmlVersion};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
4pub enum InspectAdType {
5    Wrapper,
6    InLine,
7    Unknown,
8}
9
10impl InspectAdType {
11    pub fn as_str(&self) -> &'static str {
12        match self {
13            InspectAdType::Wrapper => "Wrapper",
14            InspectAdType::InLine => "InLine",
15            InspectAdType::Unknown => "Unknown",
16        }
17    }
18}
19
20#[derive(Debug, Clone, PartialEq, Eq)]
21pub struct InspectMediaFile {
22    pub url: String,
23    pub mime_type: String,
24    pub delivery: String,
25    pub width: String,
26    pub height: String,
27    pub bitrate: String,
28}
29
30#[derive(Debug, Clone, PartialEq, Eq)]
31pub struct InspectDocumentMeta {
32    pub ad_type: InspectAdType,
33    pub ad_system: String,
34    pub ad_title: String,
35    pub duration: String,
36    pub impression_count: usize,
37    pub tracking_event_count: usize,
38    pub media_files: Vec<InspectMediaFile>,
39    pub companion_count: usize,
40    pub wrapper_uri: Option<String>,
41}
42
43enum TextTarget {
44    None,
45    AdSystem,
46    AdTitle,
47    Duration,
48    WrapperUri,
49    MediaFileUrl,
50}
51
52pub fn inspect_document(xml: &str) -> InspectDocumentMeta {
53    let mut meta = InspectDocumentMeta {
54        ad_type: InspectAdType::Unknown,
55        ad_system: String::new(),
56        ad_title: String::new(),
57        duration: String::new(),
58        impression_count: 0,
59        tracking_event_count: 0,
60        media_files: Vec::new(),
61        companion_count: 0,
62        wrapper_uri: None,
63    };
64    let mut reader = Reader::from_str(xml);
65    let mut target = TextTarget::None;
66    let mut pending_media_file: Option<(String, String, String, String, String)> = None;
67
68    loop {
69        match reader.read_event() {
70            Ok(Event::Eof) | Err(_) => break,
71            Ok(Event::Start(element)) => {
72                let name = std::str::from_utf8(element.name().as_ref())
73                    .unwrap_or("")
74                    .to_owned();
75                match name.as_str() {
76                    "InLine" => meta.ad_type = InspectAdType::InLine,
77                    "Wrapper" => meta.ad_type = InspectAdType::Wrapper,
78                    "Impression" => meta.impression_count += 1,
79                    "Tracking" => meta.tracking_event_count += 1,
80                    "Companion" => meta.companion_count += 1,
81                    "AdSystem" => target = TextTarget::AdSystem,
82                    "AdTitle" => target = TextTarget::AdTitle,
83                    "Duration" => target = TextTarget::Duration,
84                    "VASTAdTagURI" => target = TextTarget::WrapperUri,
85                    "MediaFile" => {
86                        let mut mime_type = String::new();
87                        let mut delivery = String::new();
88                        let mut width = String::new();
89                        let mut height = String::new();
90                        let mut bitrate = String::new();
91                        for attr in element.attributes().flatten() {
92                            let key = std::str::from_utf8(attr.key.as_ref())
93                                .unwrap_or("")
94                                .to_owned();
95                            let value = attr
96                                .decoded_and_normalized_value(
97                                    XmlVersion::Implicit1_0,
98                                    reader.decoder(),
99                                )
100                                .map(|value| value.into_owned())
101                                .unwrap_or_default();
102                            match key.as_str() {
103                                "type" => mime_type = value,
104                                "delivery" => delivery = value,
105                                "width" => width = value,
106                                "height" => height = value,
107                                "bitrate" => bitrate = value,
108                                _ => {}
109                            }
110                        }
111                        pending_media_file = Some((mime_type, delivery, width, height, bitrate));
112                        target = TextTarget::MediaFileUrl;
113                    }
114                    _ => {}
115                }
116            }
117            Ok(Event::Text(text)) => {
118                if let Ok(value) = text.xml10_content() {
119                    apply_text(
120                        value.trim(),
121                        &mut meta,
122                        &mut target,
123                        &mut pending_media_file,
124                    );
125                }
126            }
127            Ok(Event::CData(text)) => {
128                let bytes = text.into_inner();
129                if let Ok(value) = std::str::from_utf8(&bytes) {
130                    apply_text(
131                        value.trim(),
132                        &mut meta,
133                        &mut target,
134                        &mut pending_media_file,
135                    );
136                }
137            }
138            _ => {}
139        }
140    }
141
142    meta
143}
144
145fn apply_text(
146    value: &str,
147    meta: &mut InspectDocumentMeta,
148    target: &mut TextTarget,
149    pending_media_file: &mut Option<(String, String, String, String, String)>,
150) {
151    if value.is_empty() {
152        return;
153    }
154
155    match target {
156        TextTarget::AdSystem => {
157            meta.ad_system = value.to_string();
158            *target = TextTarget::None;
159        }
160        TextTarget::AdTitle => {
161            meta.ad_title = value.to_string();
162            *target = TextTarget::None;
163        }
164        TextTarget::Duration => {
165            meta.duration = value.to_string();
166            *target = TextTarget::None;
167        }
168        TextTarget::WrapperUri => {
169            meta.wrapper_uri = Some(value.to_string());
170            *target = TextTarget::None;
171        }
172        TextTarget::MediaFileUrl => {
173            if let Some((mime_type, delivery, width, height, bitrate)) = pending_media_file.take() {
174                meta.media_files.push(InspectMediaFile {
175                    url: value.to_string(),
176                    mime_type,
177                    delivery,
178                    width,
179                    height,
180                    bitrate,
181                });
182            }
183            *target = TextTarget::None;
184        }
185        TextTarget::None => {}
186    }
187}
188
189#[cfg(test)]
190mod tests {
191    use super::{inspect_document, InspectAdType};
192
193    #[test]
194    fn extracts_wrapper_metadata() {
195        let xml = r#"<VAST version="4.2">
196  <Ad>
197    <Wrapper>
198      <AdSystem>Wrapper Co</AdSystem>
199      <AdTitle>Wrapper title</AdTitle>
200      <Impression>https://example.com/imp</Impression>
201      <VASTAdTagURI><![CDATA[https://ads.example.com/downstream.xml]]></VASTAdTagURI>
202      <Creatives>
203        <Creative>
204          <Linear>
205            <TrackingEvents>
206                            <Tracking event="start">https://example.com/start</Tracking>
207            </TrackingEvents>
208            <MediaFiles>
209                            <MediaFile delivery="progressive" type="video/mp4" width="640" height="360" bitrate="800">
210                https://cdn.example.com/ad.mp4
211              </MediaFile>
212            </MediaFiles>
213          </Linear>
214        </Creative>
215      </Creatives>
216      <CompanionAds>
217                <Companion width="300" height="250">
218                    <StaticResource creativeType="image/png">https://cdn.example.com/companion.png</StaticResource>
219        </Companion>
220      </CompanionAds>
221    </Wrapper>
222  </Ad>
223</VAST>"#;
224
225        let meta = inspect_document(xml);
226        assert_eq!(meta.ad_type, InspectAdType::Wrapper);
227        assert_eq!(meta.ad_system, "Wrapper Co");
228        assert_eq!(meta.ad_title, "Wrapper title");
229        assert_eq!(meta.impression_count, 1);
230        assert_eq!(meta.tracking_event_count, 1);
231        assert_eq!(meta.companion_count, 1);
232        assert_eq!(
233            meta.wrapper_uri.as_deref(),
234            Some("https://ads.example.com/downstream.xml")
235        );
236        assert_eq!(meta.media_files.len(), 1);
237        assert_eq!(meta.media_files[0].url, "https://cdn.example.com/ad.mp4");
238        assert_eq!(meta.media_files[0].mime_type, "video/mp4");
239    }
240}