1use scraper::element_ref::ElementRef;
64use scraper::{Html, Selector};
65use std::collections::HashMap;
66
67pub type Headers = HashMap<String, usize>;
80
81#[derive(Clone, Debug, Eq, PartialEq)]
85pub struct Table {
86 headers: Headers,
87 data: Vec<Vec<String>>,
88}
89
90impl Table {
91 pub fn find_first(html: &str) -> Option<Table> {
93 let html = Html::parse_fragment(html);
94 html.select(&css("table")).next().map(Table::new)
95 }
96
97 pub fn find_by_id(html: &str, id: &str) -> Option<Table> {
99 let html = Html::parse_fragment(html);
100 let selector = format!("table#{}", id);
101 Selector::parse(&selector)
102 .ok()
103 .as_ref()
104 .map(|s| html.select(s))
105 .and_then(|mut s| s.next())
106 .map(Table::new)
107 }
108
109 pub fn find_by_headers<T>(html: &str, headers: &[T]) -> Option<Table>
115 where
116 T: AsRef<str>,
117 {
118 if headers.is_empty() {
119 return Table::find_first(html);
120 }
121
122 let sel_table = css("table");
123 let sel_tr = css("tr");
124 let sel_th = css("th");
125
126 let html = Html::parse_fragment(html);
127 html.select(&sel_table)
128 .find(|table| {
129 table.select(&sel_tr).next().map_or(false, |tr| {
130 let cells = select_cells(tr, &sel_th);
131 headers.iter().all(|h| contains_str(&cells, h.as_ref()))
132 })
133 })
134 .map(Table::new)
135 }
136
137 pub fn headers(&self) -> &Headers {
142 &self.headers
143 }
144
145 pub fn iter(&self) -> Iter {
152 Iter {
153 headers: &self.headers,
154 iter: self.data.iter(),
155 }
156 }
157
158 fn new(element: ElementRef) -> Table {
159 let sel_tr = css("tr");
160 let sel_th = css("th");
161 let sel_td = css("td");
162
163 let mut headers = HashMap::new();
164 let mut rows = element.select(&sel_tr).peekable();
165 if let Some(tr) = rows.peek() {
166 for (i, th) in tr.select(&sel_th).enumerate() {
167 headers.insert(cell_content(th), i);
168 }
169 }
170 if !headers.is_empty() {
171 rows.next();
172 }
173 let data = rows.map(|tr| select_cells(tr, &sel_td)).collect();
174
175 Table { headers, data }
176 }
177}
178
179impl<'a> IntoIterator for &'a Table {
180 type Item = Row<'a>;
181 type IntoIter = Iter<'a>;
182
183 fn into_iter(self) -> Self::IntoIter {
184 self.iter()
185 }
186}
187
188pub struct Iter<'a> {
190 headers: &'a Headers,
191 iter: std::slice::Iter<'a, Vec<String>>,
192}
193
194impl<'a> Iterator for Iter<'a> {
195 type Item = Row<'a>;
196
197 fn next(&mut self) -> Option<Self::Item> {
198 let headers = self.headers;
199 self.iter.next().map(|cells| Row { headers, cells })
200 }
201}
202
203#[derive(Clone, Copy, Debug, Eq, PartialEq)]
214pub struct Row<'a> {
215 headers: &'a Headers,
216 cells: &'a [String],
217}
218
219impl<'a> Row<'a> {
220 pub fn len(&self) -> usize {
222 self.cells.len()
223 }
224
225 pub fn is_empty(&self) -> bool {
227 self.cells.is_empty()
228 }
229
230 pub fn get(&self, header: &str) -> Option<&'a str> {
235 self.headers
236 .get(header)
237 .and_then(|&i| self.cells.get(i).map(String::as_str))
238 }
239
240 pub fn as_slice(&self) -> &'a [String] {
242 self.cells
243 }
244
245 pub fn iter(&self) -> std::slice::Iter<String> {
247 self.cells.iter()
248 }
249}
250
251impl<'a> IntoIterator for Row<'a> {
252 type Item = &'a String;
253 type IntoIter = std::slice::Iter<'a, String>;
254
255 fn into_iter(self) -> Self::IntoIter {
256 self.cells.iter()
257 }
258}
259
260fn css(selector: &'static str) -> Selector {
261 Selector::parse(selector).unwrap()
262}
263
264fn select_cells(element: ElementRef, selector: &Selector) -> Vec<String> {
265 element.select(selector).map(cell_content).collect()
266}
267
268fn cell_content(element: ElementRef) -> String {
269 element.inner_html().trim().to_string()
270}
271
272fn contains_str(slice: &[String], item: &str) -> bool {
273 slice.iter().any(|s| s == item)
274}
275
276#[cfg(test)]
277mod tests {
278 use super::*;
279
280 const TABLE_EMPTY: &'static str = r#"
281<table></table>
282"#;
283
284 const TABLE_TH: &'static str = r#"
285<table>
286 <tr><th>Name</th><th>Age</th></tr>
287</table>
288"#;
289
290 const TABLE_TD: &'static str = r#"
291<table>
292 <tr><td>Name</td><td>Age</td></tr>
293</table>
294"#;
295
296 const TABLE_TH_TD: &'static str = r#"
297<table>
298 <tr><th>Name</th><th>Age</th></tr>
299 <tr><td>John</td><td>20</td></tr>
300</table>
301"#;
302
303 const TABLE_TD_TD: &'static str = r#"
304<table>
305 <tr><td>Name</td><td>Age</td></tr>
306 <tr><td>John</td><td>20</td></tr>
307</table>
308"#;
309
310 const TABLE_TH_TH: &'static str = r#"
311<table>
312 <tr><th>Name</th><th>Age</th></tr>
313 <tr><th>John</th><th>20</th></tr>
314</table>
315"#;
316
317 const TABLE_COMPLEX: &'static str = r#"
318<table>
319 <tr><th>Name</th><th>Age</th><th>Extra</th></tr>
320 <tr><td>John</td><td>20</td></tr>
321 <tr><td>May</td><td>30</td><td>foo</td></tr>
322 <tr></tr>
323 <tr><td>a</td><td>b</td><td>c</td><td>d</td></tr>
324</table>
325"#;
326
327 const HTML_NO_TABLE: &'static str = r#"
328<!doctype HTML>
329<html>
330 <head><title>foo</title></head>
331 <body><p>Hi.</p></body>
332</html>
333"#;
334
335 const HTML_TWO_TABLES: &'static str = r#"
336<!doctype HTML>
337<html>
338 <head><title>foo</title></head>
339 <body>
340 <table id="first">
341 <tr><th>Name</th><th>Age</th></tr>
342 <tr><td>John</td><td>20</td></tr>
343 </table>
344 <table id="second">
345 <tr><th>Name</th><th>Weight</th></tr>
346 <tr><td>John</td><td>150</td></tr>
347 </table>
348 </body>
349</html>
350"#;
351
352 const HTML_TABLE_FRAGMENT: &'static str = r#"
353 <table id="first">
354 <tr><th>Name</th><th>Age</th></tr>
355 <tr><td>John</td><td>20</td></tr>
356 </table>
357 </body>
358</html>
359"#;
360
361 #[test]
362 fn test_find_first_none() {
363 assert_eq!(None, Table::find_first(""));
364 assert_eq!(None, Table::find_first("foo"));
365 assert_eq!(None, Table::find_first(HTML_NO_TABLE));
366 }
367
368 #[test]
369 fn test_find_first_empty() {
370 let empty = Table {
371 headers: HashMap::new(),
372 data: Vec::new(),
373 };
374 assert_eq!(Some(empty), Table::find_first(TABLE_EMPTY));
375 }
376
377 #[test]
378 fn test_find_first_some() {
379 assert!(Table::find_first(TABLE_TH).is_some());
380 assert!(Table::find_first(TABLE_TD).is_some());
381 }
382
383 #[test]
384 fn test_find_by_id_none() {
385 assert_eq!(None, Table::find_by_id("", ""));
386 assert_eq!(None, Table::find_by_id("foo", "id"));
387 assert_eq!(None, Table::find_by_id(HTML_NO_TABLE, "id"));
388
389 assert_eq!(None, Table::find_by_id(TABLE_EMPTY, "id"));
390 assert_eq!(None, Table::find_by_id(TABLE_TH, "id"));
391 assert_eq!(None, Table::find_by_id(TABLE_TH, ""));
392 assert_eq!(None, Table::find_by_id(HTML_TWO_TABLES, "id"));
393 }
394
395 #[test]
396 fn test_find_by_id_some() {
397 assert!(Table::find_by_id(HTML_TWO_TABLES, "first").is_some());
398 assert!(Table::find_by_id(HTML_TWO_TABLES, "second").is_some());
399 }
400
401 #[test]
402 fn test_find_by_headers_empty() {
403 let headers: [&str; 0] = [];
404
405 assert_eq!(None, Table::find_by_headers("", &headers));
406 assert_eq!(None, Table::find_by_headers("foo", &headers));
407 assert_eq!(None, Table::find_by_headers(HTML_NO_TABLE, &headers));
408
409 assert!(Table::find_by_headers(TABLE_EMPTY, &headers).is_some());
410 assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some());
411 }
412
413 #[test]
414 fn test_find_by_headers_none() {
415 let headers = ["Name", "Age"];
416 let bad_headers = ["Name", "BAD"];
417
418 assert_eq!(None, Table::find_by_headers("", &headers));
419 assert_eq!(None, Table::find_by_headers("foo", &headers));
420 assert_eq!(None, Table::find_by_headers(HTML_NO_TABLE, &headers));
421
422 assert_eq!(None, Table::find_by_headers(TABLE_EMPTY, &bad_headers));
423 assert_eq!(None, Table::find_by_headers(TABLE_TH, &bad_headers));
424
425 assert_eq!(None, Table::find_by_headers(TABLE_TD, &headers));
426 assert_eq!(None, Table::find_by_headers(TABLE_TD, &bad_headers));
427 }
428
429 #[test]
430 fn test_find_by_headers_some() {
431 let headers: [&str; 0] = [];
432 assert!(Table::find_by_headers(TABLE_TH, &headers).is_some());
433 assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some());
434 assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some());
435
436 let headers = ["Name"];
437 assert!(Table::find_by_headers(TABLE_TH, &headers).is_some());
438 assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some());
439 assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some());
440
441 let headers = ["Age", "Name"];
442 assert!(Table::find_by_headers(TABLE_TH, &headers).is_some());
443 assert!(Table::find_by_headers(TABLE_TH_TD, &headers).is_some());
444 assert!(Table::find_by_headers(HTML_TWO_TABLES, &headers).is_some());
445 }
446
447 #[test]
448 fn test_find_first_incomplete_fragment() {
449 assert!(Table::find_first(HTML_TABLE_FRAGMENT).is_some());
450 }
451
452 #[test]
453 fn test_headers_empty() {
454 let empty = HashMap::new();
455 assert_eq!(&empty, Table::find_first(TABLE_TD).unwrap().headers());
456 assert_eq!(&empty, Table::find_first(TABLE_TD_TD).unwrap().headers());
457 }
458
459 #[test]
460 fn test_headers_nonempty() {
461 let mut headers = HashMap::new();
462 headers.insert("Name".to_string(), 0);
463 headers.insert("Age".to_string(), 1);
464
465 assert_eq!(&headers, Table::find_first(TABLE_TH).unwrap().headers());
466 assert_eq!(&headers, Table::find_first(TABLE_TH_TD).unwrap().headers());
467 assert_eq!(&headers, Table::find_first(TABLE_TH_TH).unwrap().headers());
468
469 headers.insert("Extra".to_string(), 2);
470 assert_eq!(
471 &headers,
472 Table::find_first(TABLE_COMPLEX).unwrap().headers()
473 );
474 }
475
476 #[test]
477 fn test_iter_empty() {
478 assert_eq!(0, Table::find_first(TABLE_EMPTY).unwrap().iter().count());
479 assert_eq!(0, Table::find_first(TABLE_TH).unwrap().iter().count());
480 }
481
482 #[test]
483 fn test_iter_nonempty() {
484 assert_eq!(1, Table::find_first(TABLE_TD).unwrap().iter().count());
485 assert_eq!(1, Table::find_first(TABLE_TH_TD).unwrap().iter().count());
486 assert_eq!(2, Table::find_first(TABLE_TD_TD).unwrap().iter().count());
487 assert_eq!(1, Table::find_first(TABLE_TH_TH).unwrap().iter().count());
488 assert_eq!(4, Table::find_first(TABLE_COMPLEX).unwrap().iter().count());
489 }
490
491 #[test]
492 fn test_row_is_empty() {
493 let table = Table::find_first(TABLE_TD).unwrap();
494 assert_eq!(
495 vec![false],
496 table.iter().map(|r| r.is_empty()).collect::<Vec<_>>()
497 );
498
499 let table = Table::find_first(TABLE_COMPLEX).unwrap();
500 assert_eq!(
501 vec![false, false, true, false],
502 table.iter().map(|r| r.is_empty()).collect::<Vec<_>>()
503 );
504 }
505
506 #[test]
507 fn test_row_len() {
508 let table = Table::find_first(TABLE_TD).unwrap();
509 assert_eq!(vec![2], table.iter().map(|r| r.len()).collect::<Vec<_>>());
510
511 let table = Table::find_first(TABLE_COMPLEX).unwrap();
512 assert_eq!(
513 vec![2, 3, 0, 4],
514 table.iter().map(|r| r.len()).collect::<Vec<_>>()
515 );
516 }
517
518 #[test]
519 fn test_row_get_without_headers() {
520 let table = Table::find_first(TABLE_TD).unwrap();
521 let mut iter = table.iter();
522 let row = iter.next().unwrap();
523
524 assert_eq!(None, row.get(""));
525 assert_eq!(None, row.get("foo"));
526 assert_eq!(None, row.get("Name"));
527 assert_eq!(None, row.get("Age"));
528
529 assert_eq!(None, iter.next());
530 }
531
532 #[test]
533 fn test_row_get_with_headers() {
534 let table = Table::find_first(TABLE_TH_TD).unwrap();
535 let mut iter = table.iter();
536 let row = iter.next().unwrap();
537
538 assert_eq!(None, row.get(""));
539 assert_eq!(None, row.get("foo"));
540 assert_eq!(Some("John"), row.get("Name"));
541 assert_eq!(Some("20"), row.get("Age"));
542
543 assert_eq!(None, iter.next());
544 }
545
546 #[test]
547 fn test_row_get_complex() {
548 let table = Table::find_first(TABLE_COMPLEX).unwrap();
549 let mut iter = table.iter();
550
551 let row = iter.next().unwrap();
552 assert_eq!(Some("John"), row.get("Name"));
553 assert_eq!(Some("20"), row.get("Age"));
554 assert_eq!(None, row.get("Extra"));
555
556 let row = iter.next().unwrap();
557 assert_eq!(Some("May"), row.get("Name"));
558 assert_eq!(Some("30"), row.get("Age"));
559 assert_eq!(Some("foo"), row.get("Extra"));
560
561 let row = iter.next().unwrap();
562 assert_eq!(None, row.get("Name"));
563 assert_eq!(None, row.get("Age"));
564 assert_eq!(None, row.get("Extra"));
565
566 let row = iter.next().unwrap();
567 assert_eq!(Some("a"), row.get("Name"));
568 assert_eq!(Some("b"), row.get("Age"));
569 assert_eq!(Some("c"), row.get("Extra"));
570
571 assert_eq!(None, iter.next());
572 }
573
574 #[test]
575 fn test_row_as_slice_without_headers() {
576 let table = Table::find_first(TABLE_TD).unwrap();
577 let mut iter = table.iter();
578
579 assert_eq!(&["Name", "Age"], iter.next().unwrap().as_slice());
580 assert_eq!(None, iter.next());
581 }
582
583 #[test]
584 fn test_row_as_slice_with_headers() {
585 let table = Table::find_first(TABLE_TH_TD).unwrap();
586 let mut iter = table.iter();
587
588 assert_eq!(&["John", "20"], iter.next().unwrap().as_slice());
589 assert_eq!(None, iter.next());
590 }
591
592 #[test]
593 fn test_row_as_slice_complex() {
594 let table = Table::find_first(TABLE_COMPLEX).unwrap();
595 let mut iter = table.iter();
596 let empty: [&str; 0] = [];
597
598 assert_eq!(&["John", "20"], iter.next().unwrap().as_slice());
599 assert_eq!(&["May", "30", "foo"], iter.next().unwrap().as_slice());
600 assert_eq!(&empty, iter.next().unwrap().as_slice());
601 assert_eq!(&["a", "b", "c", "d"], iter.next().unwrap().as_slice());
602 assert_eq!(None, iter.next());
603 }
604
605 #[test]
606 fn test_row_iter_simple() {
607 let table = Table::find_first(TABLE_TD).unwrap();
608 let row = table.iter().next().unwrap();
609 let mut iter = row.iter();
610
611 assert_eq!(Some("Name"), iter.next().map(String::as_str));
612 assert_eq!(Some("Age"), iter.next().map(String::as_str));
613 assert_eq!(None, iter.next());
614 }
615
616 #[test]
617 fn test_row_iter_complex() {
618 let table = Table::find_first(TABLE_COMPLEX).unwrap();
619 let mut table_iter = table.iter();
620
621 let row = table_iter.next().unwrap();
622 let mut iter = row.iter();
623 assert_eq!(Some("John"), iter.next().map(String::as_str));
624 assert_eq!(Some("20"), iter.next().map(String::as_str));
625 assert_eq!(None, iter.next());
626
627 let row = table_iter.next().unwrap();
628 let mut iter = row.iter();
629 assert_eq!(Some("May"), iter.next().map(String::as_str));
630 assert_eq!(Some("30"), iter.next().map(String::as_str));
631 assert_eq!(Some("foo"), iter.next().map(String::as_str));
632 assert_eq!(None, iter.next());
633
634 let row = table_iter.next().unwrap();
635 let mut iter = row.iter();
636 assert_eq!(None, iter.next());
637
638 let row = table_iter.next().unwrap();
639 let mut iter = row.iter();
640 assert_eq!(Some("a"), iter.next().map(String::as_str));
641 assert_eq!(Some("b"), iter.next().map(String::as_str));
642 assert_eq!(Some("c"), iter.next().map(String::as_str));
643 assert_eq!(Some("d"), iter.next().map(String::as_str));
644 assert_eq!(None, iter.next());
645 }
646}