use std::collections::BTreeMap;
use acick_util::{regex, select};
use anyhow::Context as _;
use once_cell::sync::Lazy;
use regex::Regex;
use reqwest::blocking::Client;
use reqwest::Url;
use scraper::{ElementRef, Html, Selector};
use crate::config::SessionConfig;
use crate::model::{ContestId, ProblemId, Sample};
use crate::page::{FetchRestricted, BASE_URL};
use crate::service::scrape::{parse_zenkaku_digits, ElementRefExt as _, HasUrl, Scrape};
use crate::{Console, Result};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TasksPrintPageBuilder<'a> {
contest_id: &'a ContestId,
session: &'a SessionConfig,
}
impl<'a> TasksPrintPageBuilder<'a> {
pub fn new(contest_id: &'a ContestId, session: &'a SessionConfig) -> Self {
Self {
contest_id,
session,
}
}
pub fn build(self, client: &Client, cnsl: &mut Console) -> Result<TasksPrintPage<'a>> {
self.fetch_restricted(client, self.session, cnsl)
.map(|html| TasksPrintPage {
builder: self,
content: html,
})
}
}
impl HasUrl for TasksPrintPageBuilder<'_> {
fn url(&self) -> Result<Url> {
let path = format!("/contests/{}/tasks_print", self.contest_id);
BASE_URL
.join(&path)
.context(format!("Could not parse url path: {}", path))
}
}
impl FetchRestricted for TasksPrintPageBuilder<'_> {}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct TasksPrintPage<'a> {
builder: TasksPrintPageBuilder<'a>,
content: Html,
}
impl TasksPrintPage<'_> {
pub fn extract_samples_map(&self) -> Result<BTreeMap<ProblemId, Vec<Sample>>> {
let mut samples_map = BTreeMap::new();
for elem in self.select_problems() {
let (id, _) = elem.extract_id_name()?;
let samples = elem.select_statement()?.extract_samples();
samples_map.insert(id, samples);
}
Ok(samples_map)
}
fn select_problems(&self) -> impl Iterator<Item = ProblemElem> {
self.content
.select(select!(
"#main-container > .row > .col-sm-12:not(.next-page)"
))
.map(ProblemElem)
}
}
impl HasUrl for TasksPrintPage<'_> {
fn url(&self) -> Result<Url> {
self.builder.url()
}
}
impl Scrape for TasksPrintPage<'_> {
fn elem(&self) -> ElementRef {
self.content.root_element()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct ProblemElem<'a>(ElementRef<'a>);
impl ProblemElem<'_> {
fn extract_id_name(&self) -> Result<(ProblemId, String)> {
let title = self
.find_first(select!(".h2"))
.context("Could not find problem title")?
.inner_text();
let mut id_name = title.splitn(2, '-');
let id = id_name.next().context("Could not find problem id")?.trim();
let name = id_name
.next()
.context("Could not find problem name")?
.trim();
Ok((id.into(), name.to_owned()))
}
fn select_statement(&self) -> Result<StatementElem> {
self.find_first(select!("#task-statement"))
.context("Could not find task statement")
.map(StatementElem)
}
}
impl Scrape for ProblemElem<'_> {
fn elem(&self) -> ElementRef {
self.0
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct StatementElem<'a>(ElementRef<'a>);
impl StatementElem<'_> {
fn extract_samples(&self) -> Vec<Sample> {
static IN_OUT_REGEXS: &[(&Lazy<Regex>, &Lazy<Regex>)] = &[
(
regex!(r"\ASample Input\s?([0-9]{1,2}).*\z"),
regex!(r"\ASample Output\s?([0-9]{1,2}).*\z"),
),
(
regex!(r"\A[\s\n]*入力例\s*(\d{1,2})[.\n]*\z"),
regex!(r"\A[\s\n]*出力例\s*(\d{1,2})[.\n]*\z"),
),
];
static PS: &[&Lazy<Selector>] = &[
select!("span.lang > span.lang-ja > div.part > section > h3, span.lang > span.lang-ja > div.part > section > pre"),
select!("span.lang > span.lang-en > div.part > section > h3, span.lang > span.lang-en > div.part > section > pre"),
select!("div.part > section > h3, div.part > section > pre"),
select!("div.part > h3, div.part > section > pre"),
select!("h3, section > pre"),
select!("section > h3, section > pre"),
select!("span.lang > span.lang-ja > section > h3, span.lang > span.lang-ja > section > pre"),
select!("span.lang > span.lang-ja > div.part > h3, span.lang > span.lang-ja > div.part > section > pre"),
select!("h3, pre"),
];
for p in PS {
for (re_in, re_out) in IN_OUT_REGEXS {
if let Some(samples) = self.try_extract_samples(p, re_in, re_out) {
return samples;
}
}
}
return vec![];
}
fn try_extract_samples(
&self,
selector: &'static Selector,
re_input: &'static Regex,
re_output: &'static Regex,
) -> Option<Vec<Sample>> {
let mut inputs = BTreeMap::<usize, _>::new();
let mut outputs = BTreeMap::<usize, _>::new();
let mut next = None;
for elem in self.0.select(&selector) {
let elem_name = elem.value().name();
if elem_name == "h3" {
let text = elem.inner_text();
if let Some(caps) = re_input.captures(&text) {
next = Some((true, parse_zenkaku_digits(&caps[1]).ok()?));
} else if let Some(caps) = re_output.captures(&text) {
next = Some((false, parse_zenkaku_digits(&caps[1]).ok()?));
}
} else if ["pre", "section"].contains(&elem_name) {
if let Some((is_input, n)) = next {
let text = elem.inner_text();
if is_input {
inputs.insert(n, text);
} else {
outputs.insert(n, text);
}
}
next = None;
}
}
let mut samples = vec![];
for (i, input) in inputs {
if let Some(output) = outputs.remove(&i) {
samples.push(Sample::new(i.to_string(), input, output));
}
}
if samples.is_empty() {
None
} else {
Some(samples)
}
}
}
impl Scrape for StatementElem<'_> {
fn elem(&self) -> ElementRef {
self.0
}
}