use std::collections::BTreeMap;
use std::fmt::{Display, Formatter};
use crate::{IndexUrl, UrlError};
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct AdapterId(String);
impl AdapterId {
#[must_use]
pub fn new(input: impl Into<String>) -> Self {
Self(input.into())
}
#[must_use]
pub fn as_str(&self) -> &str {
&self.0
}
}
impl Display for AdapterId {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct IndexDocument {
pub title: String,
pub nodes: Vec<IndexNode>,
pub metadata: Metadata,
}
impl IndexDocument {
#[must_use]
pub fn titled(title: impl Into<String>) -> Self {
Self {
title: title.into(),
nodes: Vec::new(),
metadata: Metadata::default(),
}
}
pub fn push(&mut self, node: IndexNode) {
self.nodes.push(node);
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.nodes.iter().all(IndexNode::is_layout_only)
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Metadata {
pub canonical_url: Option<String>,
pub author: Option<String>,
pub language: Option<String>,
pub description: Option<String>,
pub open_graph_title: Option<String>,
pub open_graph_description: Option<String>,
pub adapter_id: Option<AdapterId>,
pub quality: Option<DocumentQuality>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum DocumentQualityCategory {
Adapter,
StrongGeneric,
PartialGeneric,
Fallback,
Failed,
}
impl DocumentQualityCategory {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::Adapter => "adapter",
Self::StrongGeneric => "strong-generic",
Self::PartialGeneric => "partial-generic",
Self::Fallback => "fallback",
Self::Failed => "failed",
}
}
}
impl Display for DocumentQualityCategory {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DocumentQuality {
pub category: DocumentQualityCategory,
pub score: u8,
pub reasons: Vec<String>,
}
impl DocumentQuality {
#[must_use]
pub fn new(
category: DocumentQualityCategory,
score: u8,
reasons: impl IntoIterator<Item = impl Into<String>>,
) -> Self {
Self {
category,
score: score.min(100),
reasons: reasons.into_iter().map(Into::into).collect(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum IndexNode {
Heading {
level: u8,
text: String,
},
Paragraph(String),
Link(Link),
List {
ordered: bool,
items: Vec<String>,
},
CodeBlock {
language: Option<String>,
code: String,
},
Table {
rows: Vec<Vec<String>>,
},
Spacer {
lines: u8,
},
Section {
role: SectionRole,
title: Option<String>,
collapsed: bool,
nodes: Vec<IndexNode>,
},
Image {
alt: String,
src: Option<String>,
},
Form(Form),
Error(String),
}
impl IndexNode {
fn is_layout_only(&self) -> bool {
match self {
Self::Spacer { .. } => true,
Self::Section { title, nodes, .. } => {
title.as_deref().unwrap_or_default().trim().is_empty()
&& nodes.iter().all(Self::is_layout_only)
}
_ => false,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SectionRole {
Main,
Navigation,
Aside,
Footer,
Comments,
Related,
Unknown,
}
impl SectionRole {
#[must_use]
pub const fn as_str(self) -> &'static str {
match self {
Self::Main => "main",
Self::Navigation => "navigation",
Self::Aside => "aside",
Self::Footer => "footer",
Self::Comments => "comments",
Self::Related => "related",
Self::Unknown => "section",
}
}
}
impl Display for SectionRole {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Link {
pub text: String,
pub href: String,
}
impl Link {
#[must_use]
pub fn new(text: impl Into<String>, href: impl Into<String>) -> Self {
Self {
text: text.into(),
href: href.into(),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Form {
pub name: String,
pub method: String,
pub action: String,
pub inputs: Vec<Input>,
pub buttons: Vec<ButtonAction>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Input {
pub name: String,
pub kind: String,
pub value: Option<String>,
pub required: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ButtonAction {
pub name: Option<String>,
pub value: Option<String>,
pub label: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FormMethod {
Get,
Post,
}
impl FormMethod {
#[must_use]
pub fn parse(input: &str) -> Self {
match input.trim().to_ascii_uppercase().as_str() {
"POST" => Self::Post,
_ => Self::Get,
}
}
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Get => "GET",
Self::Post => "POST",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ValidationState {
Valid,
MissingRequiredField(String),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct FormSubmission {
pub method: FormMethod,
pub action: IndexUrl,
pub body: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FormSubmitError {
MissingRequiredField(String),
RelativeActionWithoutBase(String),
InvalidAction(UrlError),
}
impl Display for FormSubmitError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::MissingRequiredField(name) => write!(f, "required form field is missing: {name}"),
Self::RelativeActionWithoutBase(action) => {
write!(f, "form action requires a base URL: {action}")
}
Self::InvalidAction(error) => write!(f, "form action is invalid: {error}"),
}
}
}
impl std::error::Error for FormSubmitError {}
impl Form {
#[must_use]
pub fn form_method(&self) -> FormMethod {
FormMethod::parse(&self.method)
}
pub fn submit(
&self,
base_url: Option<&IndexUrl>,
values: &[(&str, &str)],
) -> Result<FormSubmission, FormSubmitError> {
let fields = self.submission_fields(values)?;
let method = self.form_method();
let action = resolve_action(&self.action, base_url)?;
match method {
FormMethod::Get => {
let mut url = ::url::Url::parse(action.as_str()).map_err(|error| {
FormSubmitError::InvalidAction(UrlError::Invalid(error.to_string()))
})?;
{
let mut pairs = url.query_pairs_mut();
for (name, value) in &fields {
pairs.append_pair(name, value);
}
}
Ok(FormSubmission {
method,
action: IndexUrl::parse(url.as_str())
.map_err(FormSubmitError::InvalidAction)?,
body: None,
})
}
FormMethod::Post => {
let mut serializer = ::url::form_urlencoded::Serializer::new(String::new());
for (name, value) in &fields {
serializer.append_pair(name, value);
}
Ok(FormSubmission {
method,
action,
body: Some(serializer.finish()),
})
}
}
}
pub fn validate(&self, values: &[(&str, &str)]) -> ValidationState {
match self.submission_fields(values) {
Ok(_fields) => ValidationState::Valid,
Err(FormSubmitError::MissingRequiredField(name)) => {
ValidationState::MissingRequiredField(name)
}
Err(_) => ValidationState::Valid,
}
}
fn submission_fields(
&self,
values: &[(&str, &str)],
) -> Result<Vec<(String, String)>, FormSubmitError> {
let overrides = values
.iter()
.map(|(name, value)| ((*name).to_owned(), (*value).to_owned()))
.collect::<BTreeMap<_, _>>();
let mut fields = Vec::new();
for input in &self.inputs {
if input.name.is_empty() || is_button_like(&input.kind) {
continue;
}
let value = overrides
.get(&input.name)
.cloned()
.or_else(|| input.value.clone())
.unwrap_or_default();
if input.required && value.is_empty() {
return Err(FormSubmitError::MissingRequiredField(input.name.clone()));
}
fields.push((input.name.clone(), value));
}
for (name, value) in overrides {
if !fields.iter().any(|(field_name, _)| field_name == &name) {
fields.push((name, value));
}
}
Ok(fields)
}
}
fn resolve_action(action: &str, base_url: Option<&IndexUrl>) -> Result<IndexUrl, FormSubmitError> {
if let Ok(url) = IndexUrl::parse(action) {
return Ok(url);
}
let Some(base_url) = base_url else {
return Err(FormSubmitError::RelativeActionWithoutBase(
action.to_owned(),
));
};
let base = ::url::Url::parse(base_url.as_str())
.map_err(|error| FormSubmitError::InvalidAction(UrlError::Invalid(error.to_string())))?;
let joined = base
.join(action)
.map_err(|error| FormSubmitError::InvalidAction(UrlError::Invalid(error.to_string())))?;
IndexUrl::parse(joined.as_str()).map_err(FormSubmitError::InvalidAction)
}
fn is_button_like(kind: &str) -> bool {
matches!(
kind.trim().to_ascii_lowercase().as_str(),
"button" | "submit" | "reset" | "image"
)
}
#[cfg(test)]
mod tests {
use super::{
AdapterId, DocumentQuality, DocumentQualityCategory, Form, FormMethod, FormSubmitError,
IndexDocument, IndexNode, Input, Link, SectionRole, ValidationState,
};
use crate::IndexUrl;
#[test]
fn document_starts_empty() {
let doc = IndexDocument::titled("Example");
assert_eq!(doc.title, "Example");
assert!(doc.is_empty());
}
#[test]
fn document_accepts_nodes() {
let mut doc = IndexDocument::titled("Example");
doc.push(IndexNode::Paragraph("Hello".to_owned()));
assert!(!doc.is_empty());
}
#[test]
fn document_with_only_layout_spacers_is_empty() {
let mut doc = IndexDocument::titled("Example");
doc.push(IndexNode::Spacer { lines: 2 });
assert!(doc.is_empty());
}
#[test]
fn document_with_only_empty_section_is_empty() {
let mut doc = IndexDocument::titled("Example");
doc.push(IndexNode::Section {
role: SectionRole::Aside,
title: None,
collapsed: true,
nodes: vec![IndexNode::Spacer { lines: 1 }],
});
assert!(doc.is_empty());
}
#[test]
fn section_role_names_are_stable() {
let roles = [
(SectionRole::Main, "main"),
(SectionRole::Navigation, "navigation"),
(SectionRole::Aside, "aside"),
(SectionRole::Footer, "footer"),
(SectionRole::Comments, "comments"),
(SectionRole::Related, "related"),
(SectionRole::Unknown, "section"),
];
for (role, label) in roles {
assert_eq!(role.as_str(), label);
assert_eq!(role.to_string(), label);
}
}
#[test]
fn link_constructor_preserves_text_and_href() {
let link = Link::new("Docs", "https://example.com/docs");
assert_eq!(link.text, "Docs");
assert_eq!(link.href, "https://example.com/docs");
}
#[test]
fn adapter_id_displays_stable_value() {
let id = AdapterId::new("github.repository");
assert_eq!(id.as_str(), "github.repository");
assert_eq!(id.to_string(), "github.repository");
}
#[test]
fn document_quality_category_names_are_stable() {
let categories = [
(DocumentQualityCategory::Adapter, "adapter"),
(DocumentQualityCategory::StrongGeneric, "strong-generic"),
(DocumentQualityCategory::PartialGeneric, "partial-generic"),
(DocumentQualityCategory::Fallback, "fallback"),
(DocumentQualityCategory::Failed, "failed"),
];
for (category, name) in categories {
assert_eq!(category.as_str(), name);
assert_eq!(category.to_string(), name);
}
}
#[test]
fn document_quality_clamps_score() {
let quality = DocumentQuality::new(
DocumentQualityCategory::StrongGeneric,
250,
["readable body"],
);
assert_eq!(quality.score, 100);
assert_eq!(quality.reasons, vec!["readable body".to_owned()]);
}
#[test]
fn get_form_submission_resolves_query_url() -> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "search".to_owned(),
method: "GET".to_owned(),
action: "/search".to_owned(),
inputs: vec![Input {
name: "q".to_owned(),
kind: "search".to_owned(),
value: None,
required: true,
}],
buttons: Vec::new(),
};
let base = IndexUrl::parse("https://example.com/docs/")?;
let submission = form.submit(Some(&base), &[("q", "index browser")])?;
assert_eq!(submission.method, FormMethod::Get);
assert_eq!(
submission.action.as_str(),
"https://example.com/search?q=index+browser"
);
assert_eq!(submission.body, None);
Ok(())
}
#[test]
fn post_form_submission_uses_encoded_body() -> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "login".to_owned(),
method: "POST".to_owned(),
action: "https://example.com/login".to_owned(),
inputs: vec![Input {
name: "token".to_owned(),
kind: "hidden".to_owned(),
value: Some("abc".to_owned()),
required: false,
}],
buttons: Vec::new(),
};
let submission = form.submit(None, &[("user", "ada")])?;
assert_eq!(submission.method, FormMethod::Post);
assert_eq!(submission.action.as_str(), "https://example.com/login");
assert_eq!(submission.body.as_deref(), Some("token=abc&user=ada"));
Ok(())
}
#[test]
fn form_submission_uses_default_field_values_and_allows_overrides()
-> Result<(), Box<dyn std::error::Error>> {
let form = Form {
name: "filters".to_owned(),
method: "GET".to_owned(),
action: "https://example.com/search".to_owned(),
inputs: vec![
Input {
name: "q".to_owned(),
kind: "search".to_owned(),
value: None,
required: true,
},
Input {
name: "sort".to_owned(),
kind: "select".to_owned(),
value: Some("recent".to_owned()),
required: false,
},
],
buttons: Vec::new(),
};
let submission = form.submit(None, &[("q", "index"), ("sort", "relevance")])?;
assert_eq!(
submission.action.as_str(),
"https://example.com/search?q=index&sort=relevance"
);
let defaulted = form.submit(None, &[("q", "index")])?;
assert_eq!(
defaulted.action.as_str(),
"https://example.com/search?q=index&sort=recent"
);
Ok(())
}
#[test]
fn form_submission_reports_missing_required_field() {
let form = Form {
name: "search".to_owned(),
method: "GET".to_owned(),
action: "https://example.com/search".to_owned(),
inputs: vec![Input {
name: "q".to_owned(),
kind: "search".to_owned(),
value: None,
required: true,
}],
buttons: Vec::new(),
};
assert_eq!(
form.validate(&[]),
ValidationState::MissingRequiredField("q".to_owned())
);
assert_eq!(
form.submit(None, &[]),
Err(FormSubmitError::MissingRequiredField("q".to_owned()))
);
}
#[test]
fn relative_action_without_base_is_diagnostic() {
let form = Form {
name: "search".to_owned(),
method: "GET".to_owned(),
action: "/search".to_owned(),
inputs: Vec::new(),
buttons: Vec::new(),
};
assert_eq!(
form.submit(None, &[]),
Err(FormSubmitError::RelativeActionWithoutBase(
"/search".to_owned()
))
);
}
}