#![allow(
clippy::expect_used,
clippy::unwrap_used,
clippy::panic,
clippy::missing_panics_doc
)]
use std::time::Duration;
use serde_json::json;
use stygian_graph::adapters::rest_api::{RestApiAdapter, RestApiConfig};
use stygian_graph::domain::error::{ServiceError, StygianError};
use stygian_graph::ports::{ScrapingService, ServiceInput};
fn no_retry_adapter() -> RestApiAdapter {
RestApiAdapter::with_config(RestApiConfig {
timeout: Duration::from_secs(15),
max_retries: 0,
..Default::default()
})
}
fn input(url: &str) -> ServiceInput {
ServiceInput {
url: url.to_string(),
params: json!({}),
}
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn status_200_returns_ok() {
let result = no_retry_adapter()
.execute(input("https://crawllab.dev/200"))
.await;
assert!(result.is_ok(), "expected Ok for HTTP 200, got: {result:?}");
let out = result.unwrap();
assert!(
!out.data.is_empty(),
"200 response body should be non-empty"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn status_404_returns_unavailable_error() {
let err = no_retry_adapter()
.execute(input("https://crawllab.dev/404"))
.await
.expect_err("expected Err for HTTP 404");
assert!(
matches!(err, StygianError::Service(ServiceError::Unavailable(_))),
"expected ServiceError::Unavailable, got: {err:?}"
);
let msg = err.to_string();
assert!(
msg.contains("404"),
"error should mention status 404, got: {msg}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn status_429_returns_rate_limited_error() {
let err = no_retry_adapter()
.execute(input("https://crawllab.dev/429"))
.await
.expect_err("expected Err for HTTP 429");
assert!(
matches!(err, StygianError::Service(ServiceError::RateLimited { .. })),
"expected ServiceError::RateLimited, got: {err:?}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn status_500_returns_unavailable_error() {
let err = no_retry_adapter()
.execute(input("https://crawllab.dev/500"))
.await
.expect_err("expected Err for HTTP 500");
assert!(
matches!(err, StygianError::Service(ServiceError::Unavailable(_))),
"expected ServiceError::Unavailable, got: {err:?}"
);
let msg = err.to_string();
assert!(
msg.contains("500"),
"error should mention status 500, got: {msg}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn temporary_redirect_follows_to_200() {
let result = no_retry_adapter()
.execute(input("https://crawllab.dev/redirect/temporary-to-200"))
.await;
assert!(
result.is_ok(),
"reqwest should follow 302 transparently; got: {result:?}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn permanent_redirect_follows_to_200() {
let result = no_retry_adapter()
.execute(input("https://crawllab.dev/redirect/permanent-to-200"))
.await;
assert!(
result.is_ok(),
"reqwest should follow 301 transparently; got: {result:?}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn redirect_cycle_returns_error() {
let err = no_retry_adapter()
.execute(input("https://crawllab.dev/redirect/cycle-a"))
.await
.expect_err("infinite redirect cycle should produce an error");
assert!(
matches!(err, StygianError::Service(ServiceError::Unavailable(_))),
"expected ServiceError::Unavailable for redirect loop, got: {err:?}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn json_endpoint_returns_non_empty_data() {
let out = no_retry_adapter()
.execute(input("https://crawllab.dev/json"))
.await
.expect("json endpoint should succeed");
assert!(!out.data.is_empty(), "/json body should be non-empty");
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn text_endpoint_returns_non_empty_data() {
let out = no_retry_adapter()
.execute(input("https://crawllab.dev/text"))
.await
.expect("text endpoint should succeed");
assert!(!out.data.is_empty(), "/text body should be non-empty");
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn empty_204_response_is_ok() {
let result = no_retry_adapter()
.execute(input("https://crawllab.dev/empty"))
.await;
assert!(
result.is_ok(),
"204 No Content is 2xx and must not be treated as an error, got: {result:?}"
);
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn random_content_endpoint_succeeds() {
let out = no_retry_adapter()
.execute(input("https://crawllab.dev/random"))
.await
.expect("/random endpoint should succeed");
assert!(!out.data.is_empty(), "/random body should be non-empty");
}
#[tokio::test]
#[ignore = "requires network access to crawllab.dev"]
async fn forum_page_html_body_is_non_empty() {
let out = no_retry_adapter()
.execute(input("https://crawllab.dev/forum?page=1"))
.await
.expect("forum page 1 should succeed");
assert!(
!out.data.is_empty(),
"/forum?page=1 HTML body should not be empty"
);
assert_eq!(
out.metadata
.get("page_count")
.and_then(serde_json::Value::as_u64),
Some(1),
"single request should record page_count=1"
);
}