use crate::runtime::Runtime;
use crate::script::support::into_vec_of_strings;
use crate::support::W;
use crate::support::text::trim_if_needed;
use crate::{Result, support};
use html_helpers::Elem;
use mlua::{IntoLua, Lua, Table, Value};
pub fn init_module(lua: &Lua, _runtime: &Runtime) -> Result<Table> {
let table = lua.create_table()?;
let slim_fn = lua.create_function(html_slim)?;
table.set("slim", slim_fn.clone())?;
let select_fn = lua.create_function(move |lua, (html_content, selectors): (String, Value)| {
html_select(lua, html_content, selectors)
})?;
table.set("select", select_fn)?;
let to_md_fn = lua.create_function(html_to_md)?;
table.set("to_md", to_md_fn)?;
table.set("prune_to_content", slim_fn)?;
Ok(table)
}
fn html_slim(_lua: &Lua, html_content: String) -> mlua::Result<String> {
support::html::slim(html_content)
.map_err(|err| mlua::Error::RuntimeError(format!("Failed to prune HTML content: {err}")))
}
fn html_select(lua: &Lua, html_content: String, selectors: Value) -> mlua::Result<Value> {
if selectors.is_nil() {
let seq = lua.create_sequence_from(Vec::<String>::new())?;
return Ok(Value::Table(seq));
}
let selectors = into_vec_of_strings(selectors, "aip.html.select")?;
let els = html_helpers::select(&html_content, &selectors)
.map_err(|err| crate::Error::custom(format!("Cannot apply selector '{selectors:?}'.\nCause: {err}")))?;
let els: Vec<mlua::Value> = els
.into_iter()
.map(|el| W(el).into_lua(lua))
.collect::<mlua::Result<Vec<_>>>()
.map_err(|err| {
crate::Error::custom(format!("aip.html.select cannot make elem into Lua object. Cause {err}"))
})?;
let seq = lua.create_sequence_from(els)?;
Ok(Value::Table(seq))
}
fn html_to_md(_lua: &Lua, html_content: String) -> mlua::Result<String> {
support::html::to_md(html_content)
.map_err(|err| mlua::Error::RuntimeError(format!("Failed to convert HTML to Markdown: {err}")))
}
impl IntoLua for W<Elem> {
fn into_lua(self, lua: &Lua) -> mlua::Result<Value> {
let el = self.0;
let table = lua.create_table()?;
table.set("tag", el.tag)?;
if let Some(attrs) = el.attrs {
table.set("attrs", attrs)?;
}
table.set("text", el.text.map(trim_if_needed))?;
table.set("inner_html", el.inner_html.map(trim_if_needed))?;
Ok(Value::Table(table))
}
}
#[cfg(test)]
mod tests {
type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>;
use crate::_test_support::{eval_lua, setup_lua};
use crate::script::aip_modules::aip_html;
use value_ext::JsonValueExt;
#[tokio::test]
async fn test_lua_html_slim_ok() -> Result<()> {
let lua = setup_lua(aip_html::init_module, "html").await?;
let fx_script = r#"
local html_content = [[
<!DOCTYPE html>
<html>
<head>
<script>alert('test');</script>
<style>body { color: red; }</style>
</head>
<body>
<div class="content">Hello World</div>
<!-- comment -->
</body>
</html>
]]
return aip.html.slim(html_content)
"#;
let res = eval_lua(&lua, fx_script)?;
let cleaned_html = res.as_str().ok_or("Should have res")?;
assert!(!cleaned_html.contains("<script>"));
assert!(!cleaned_html.contains("<style>"));
assert!(!cleaned_html.contains("<!-- comment -->"));
assert!(cleaned_html.contains(r#"<div class="content">Hello World</div>"#));
Ok(())
}
#[tokio::test]
async fn test_lua_html_select_simple() -> Result<()> {
let lua = setup_lua(aip_html::init_module, "html").await?;
let fx_script = r#"
local html_content = [[
<div>First div<div>
<li class="me">Bullet One </li>
<section>
<p>Some text</p>
<DIV class="me other " TITLE = " Some Title" > Div <strong>Two </strong></DIV>
</section>
]]
return aip.html.select(html_content, ".me")
"#;
let res = eval_lua(&lua, fx_script)?;
let res = res.as_array().ok_or("Should be array")?;
assert_eq!(res.len(), 2);
let el = res.first().ok_or("Should have at least one")?;
assert_eq!(el.x_get_str("tag")?, "li");
assert_eq!(el.x_get_str("/attrs/class")?, "me");
assert_eq!(el.x_get_str("text")?, "Bullet One");
assert_eq!(el.x_get_str("inner_html")?, "Bullet One");
let el = res.get(1).ok_or("Should have at least two")?;
assert_eq!(el.x_get_str("tag")?, "div");
assert_eq!(el.x_get_str("/attrs/class")?, "me other ");
assert_eq!(el.x_get_str("/attrs/title")?, " Some Title");
assert_eq!(el.x_get_str("text")?, "Div Two");
assert_eq!(el.x_get_str("inner_html")?, "Div <strong>Two </strong>");
Ok(())
}
#[tokio::test]
async fn test_lua_html_to_md_ok() -> Result<()> {
let lua = setup_lua(aip_html::init_module, "html").await?;
let fx_script = r#"
local html_content = "<h1>Title</h1><p>Some <strong>bold</strong> text.</p><ul><li>Item 1</li><li>Item 2</li></ul>"
return aip.html.to_md(html_content)
"#;
let res = eval_lua(&lua, fx_script)?;
let md_content = res.as_str().ok_or("Result should be string")?;
let expected_md = "# Title\n\nSome **bold** text.\n\n- Item 1\n- Item 2";
assert_eq!(md_content, expected_md);
Ok(())
}
#[tokio::test]
async fn test_lua_html_to_md_empty_input() -> Result<()> {
let lua = setup_lua(aip_html::init_module, "html").await?;
let fx_script = r#"
return aip.html.to_md("")
"#;
let res = eval_lua(&lua, fx_script)?;
let md_content = res.as_str().ok_or("Should have res")?;
assert_eq!(md_content, "");
Ok(())
}
}