#[macro_export]
macro_rules! chain {
(@process $items:expr, $ctx:expr $(,)?) => { $items };
(@process_list_dedupe $items:expr, $ctx:expr, $fn:expr $(, $rest:ident)*) => {{
let data: Vec<String> = $items.iter().map(|(_, d)| d.clone()).collect();
let cleaned = $fn(&data);
let items: Vec<(String, String)> = cleaned.into_iter().map(|d| (d.clone(), d)).collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process_per_url_list $items:expr, $ctx:expr, $fn:expr $(, $rest:ident)*) => {{
let mut cleaned_items = Vec::new();
for (url, list) in $items {
let cleaned = $fn(&list);
cleaned_items.push((url, cleaned));
}
let items: Vec<(String, Vec<String>)> = cleaned_items;
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process_flatten_and_clean $items:expr, $ctx:expr, $fn:expr $(, $rest:ident)*) => {{
let data: Vec<String> = $items.into_iter()
.flat_map(|(_, list): (String, Vec<String>)| list)
.collect();
let cleaned = $fn(&data);
let items: Vec<(String, String)> = cleaned.into_iter().map(|d| (d.clone(), d)).collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process_extract $items:expr, $ctx:expr, $fn:expr $(, $rest:ident)*) => {{
let concurrency = $ctx.concurrency;
let items: Vec<(String, Vec<String>)> = $crate::tools::batch::batch(
$items,
concurrency,
|(url, data): (String, String)| async move {
let result = $fn(&data).await;
Some((url, result))
}
).await
.into_iter()
.flatten()
.collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, clean_urls $(, $rest:ident)*) => {{
$crate::chain!(@process_list_dedupe $items, $ctx, $crate::tools::clean::clean_urls $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, clean_emails $(, $rest:ident)*) => {{
$crate::chain!(@process_flatten_and_clean $items, $ctx, $crate::tools::clean::clean_emails $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, clean_phones $(, $rest:ident)*) => {{
$crate::chain!(@process_flatten_and_clean $items, $ctx, $crate::tools::clean::clean_phones $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, extract_emails $(, $rest:ident)*) => {{
$crate::chain!(@process_extract $items, $ctx, $crate::tools::extract::extract_emails $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, extract_phones $(, $rest:ident)*) => {{
$crate::chain!(@process_extract $items, $ctx, $crate::tools::extract::extract_phones $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, map_children $(, $rest:ident)*) => {{
let concurrency = $ctx.concurrency;
let items: Vec<(String, String)> = $crate::tools::batch::batch(
$items,
concurrency,
|(url, html): (String, String)| async move {
let children = $crate::tools::map::map_children(&html, &url).await;
children.into_iter()
.map(|child| (child.clone(), child))
.collect::<Vec<(String, String)>>()
}
).await
.into_iter()
.flatten()
.collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, map_page $(, $rest:ident)*) => {{
let concurrency = $ctx.concurrency;
let items: Vec<(String, String)> = $crate::tools::batch::batch(
$items,
concurrency,
|(url, html): (String, String)| async move {
let links = $crate::tools::map::map_page(&html, &url).await;
links.into_iter()
.map(|link| (link.clone(), link))
.collect::<Vec<(String, String)>>()
}
).await
.into_iter()
.flatten()
.collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, clean_html $(, $rest:ident)*) => {{
let concurrency = $ctx.concurrency;
let items: Vec<(String, String)> = $crate::tools::batch::batch(
$items,
concurrency,
|(url, data): (String, String)| async move {
let result = $crate::tools::clean::clean_html(&data).await;
Some((url, result))
}
).await
.into_iter()
.flatten()
.collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
(@process $items:expr, $ctx:expr, $fn:ident $(, $rest:ident)*) => {{
let concurrency = $ctx.concurrency;
let items: Vec<(String, String)> = $crate::tools::batch::batch(
$items,
concurrency,
|(url, data): (String, String)| async move {
$fn(&data).await.ok().map(|result| (url, result))
}
).await
.into_iter()
.flatten()
.collect();
$crate::chain!(@process items, $ctx $(, $rest)*)
}};
($urls:expr, $ctx:expr => $first:ident $(-> $rest:ident)*) => {{
async move {
use std::sync::Arc;
let ctx = Arc::new($ctx);
let items: Vec<(String, String)> = $urls.into_iter().map(|u| (u.clone(), u)).collect();
let fetch_cache = $crate::types::fetch_cache_new();
$crate::types::CTX.scope(ctx.clone(), async move {
$crate::types::FETCH_CACHE.scope(fetch_cache, async move {
$crate::chain!(@process items, ctx, $first $(, $rest)*)
}).await
}).await
}
}};
}
#[macro_export]
macro_rules! merge {
($($vec:expr),+ $(,)?) => {{
let mut result = Vec::new();
$(result.extend($vec);)+
result
}};
}
#[macro_export]
macro_rules! run {
(@vec_async $input:expr, $processor:expr $(, $arg:expr)* $(,)?) => {{
let result = $crate::runtime::block_on($processor(&$input $(, $arg)*));
$crate::cli::print_json(&result);
}};
(@vec $input:expr, $processor:expr $(, $arg:expr)* $(,)?) => {{
let result = $processor(&$input $(, $arg)*);
$crate::cli::print_json(&result);
}};
(@template $input:expr, $processor:expr, $ctx:expr $(,)?) => {{
let url = $input;
let result = $crate::runtime::block_on($processor(vec![url.to_string()], $ctx));
$crate::cli::print_json(&result);
}};
(@template $input:expr, $processor:expr $(,)?) => {{
$crate::run!(@template $input, $processor, $crate::types::Context::auto())
}};
(@async $ctx:expr, $input:expr, [$first:expr, $second:expr] $(,)?) => {{
let data = $crate::cli::read_input(&$input, $ctx);
let result = $crate::runtime::block_on(async move {
let intermediate = $first(&data).await;
$second(&intermediate) });
$crate::cli::print_json(&result);
}};
(@async $ctx:expr, $input:expr, $processor:expr $(, $arg:expr)* $(,)?) => {{
let data = $crate::cli::read_input(&$input, $ctx);
let result = $crate::runtime::block_on($processor(&data $(, $arg)*));
$crate::cli::print_json(&result);
}};
}
#[macro_export]
macro_rules! dedupe {
($list:expr) => {{
let mut seen = std::collections::HashSet::new();
let mut result = Vec::new();
for item in $list {
if seen.insert(item.clone()) {
result.push(item);
}
}
result
}};
($list:expr, $fn:expr) => {{
let mut seen = std::collections::HashSet::new();
let mut result = Vec::new();
for item in $list {
let processed = $fn(item);
if !processed.is_empty() && seen.insert(processed.clone()) {
result.push(processed);
}
}
result
}};
}