aipack 0.7.7-WIP

Command Agent runner to accelerate production coding with genai.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
//! Defines the `md` module, used in the lua engine.
//!
//! ---
//!
//! ## Lua documentation
//!
//! The `md` module exposes functions that process markdown content. Useful for
//! processing LLM responses.
//!
//! ### Functions
//!
//! - `aip.md.extract_blocks(md_content: string, lang?: string) -> Vec<MdBlock>`
//! - `aip.md.extract_blocks(md_content: string, {lang?: string, extrude: "content"}) -> Vec<MdBlock>, extruded_content`
//! - `aip.md.extract_meta(md_content) -> Table, String`
//! - `aip.md.outer_block_content_or_raw(md_content: string) -> string`

use crate::Result;
use crate::runtime::Runtime;
use crate::support::md::{self};
use crate::support::{Extrude, W};
use crate::types::MdBlock;
use mlua::{IntoLua, Lua, LuaSerdeExt, MultiValue, Table, Value};

pub fn init_module(lua: &Lua, _runtime: &Runtime) -> Result<Table> {
	let table = lua.create_table()?;

	let extract_blocks_fn = lua.create_function(extract_blocks)?;
	let outer_block_content_or_raw_fn = lua.create_function(outer_block_content_or_raw)?;
	let extract_meta_fn = lua.create_function(extract_meta)?;

	table.set("extract_blocks", extract_blocks_fn)?;
	table.set("extract_meta", extract_meta_fn)?;
	table.set("outer_block_content_or_raw", outer_block_content_or_raw_fn)?;

	Ok(table)
}

/// ## Lua Documentation
///
/// Extracts markdown blocks from a string, optionally filtering by language or extracting remaining content.
///
/// ```lua
/// -- API Signatures
/// aip.md.extract_blocks(md_content: string): Vec<MdBlock>
/// aip.md.extract_blocks(md_content: string, lang: string): Vec<MdBlock>
/// aip.md.extract_blocks(md_content: string, {lang?: string, extrude: "content"}): Vec<MdBlock>, string
/// ```
///
/// ### Arguments
///
/// - `md_content: string`: The markdown content to process.
/// - `options: string | table (optional)`:
///   - If a string, it's treated as the language filter.
///   - If a table:
///     - `lang: string (optional)`:  Filters blocks by this language.
///     - `extrude: "content" (optional)`:  If present, extracts the content outside the blocks.
///
/// ### Returns
///
/// When `extrude = "content"` is not specified:
///
/// ```ts
/// MdBlock[]
/// ```
///
/// When `extrude = "content"` is specified:
///
/// ```ts
/// [MdBlock[], string]
/// ```
///
/// - `MdBlock[]`: A list of markdown blocks matching the specified criteria.
/// - `string`: The content of the markdown outside of the extracted blocks.
///
/// ### Error
///
/// Returns an error if the `extrude` option is not equal to `"content"`.
fn extract_blocks(lua: &Lua, (md_content, options): (String, Option<Value>)) -> mlua::Result<MultiValue> {
	let (lang, extrude): (Option<String>, Option<Extrude>) = match options {
		// if options is of type string, then, just lang name
		Some(Value::String(string)) => (Some(string.to_string_lossy()), None),
		// if it is a table
		Some(Value::Table(table)) => {
			let lang = table.get::<Option<Value>>("lang")?;
			let lang = lang
				.map(|v| {
					v.to_string()
						.map_err(|_err| crate::Error::custom("md_extract_blocks lang options must be of type string"))
				})
				.transpose()?;

			let extrude = table.get::<Option<Value>>("extrude")?;
			let extrude = extrude
				.map(|extrude| match extrude {
					Value::String(extrude) => {
						if extrude.to_str().unwrap() == "content" {
							Ok(Some(Extrude::Content))
						} else {
							Err(crate::Error::custom(
								"md_extract_blocks extrude must be = to 'content' for now",
							))
						}
					}
					_ => Ok(None),
				})
				.transpose()?
				.flatten();

			(lang, extrude)
		}
		// TODO: Probably need to send error
		_ => (None, None),
	};

	let blocks_it = md::MdBlockIter::new(&md_content, lang.as_deref(), extrude);
	let mut values = MultiValue::new();

	match extrude {
		Some(Extrude::Content) => {
			let (blocks, content) = blocks_it.collect_blocks_and_extruded_content();
			values.push_back(blocks.into_lua(lua)?);
			let content = lua.create_string(&content)?;
			values.push_back(Value::String(content));
		}
		_ => {
			let blocks: Vec<MdBlock> = blocks_it.collect();
			values.push_back(blocks.into_lua(lua)?)
		}
	}

	Ok(values)
}

/// ## Lua Documentation
///
/// Extracts meta blocks from markdown content and returns a table of the merged meta values and the remaining content.
///
/// ```lua
/// -- API Signature
/// aip.md.extract_meta(md_content: string): Table, string
/// ```
///
/// ### Arguments
///
/// - `md_content: string`: The markdown content to process.
///
/// ### Returns
///
/// ```ts
/// [Table, string]
/// ```
///
/// - `Table`:  A Lua table containing the merged meta values from the meta blocks.
/// - `string`: The remaining content of the markdown after removing the meta blocks.
fn extract_meta(lua: &Lua, md_content: String) -> mlua::Result<MultiValue> {
	let (value, remain) = md::extract_meta(&md_content)?;
	let lua_value = lua.to_value(&value)?;
	let values = MultiValue::from_vec(vec![lua_value, W(remain).into_lua(lua)?]);
	Ok(values)
}

/// ## Lua Documentation
///
/// Extracts the content within the outermost code block, or returns the raw content if no such block exists.
///
/// ```lua
/// -- API Signature
/// aip.md.outer_block_content_or_raw(md_content: string): string
/// ```
///
/// ### Arguments
///
/// - `md_content: string`: The markdown content to process.
///
/// ### Returns
///
/// ```ts
/// string
/// ```
///
/// Returns the content within the outer code block if it exists; otherwise, returns the original markdown content.
///
/// If the markdown starts with a code block (```), this function removes the first and last
/// code block (triple back tick). If it does not start with a code block, the raw content is returned.
///
/// > Note: This is useful in the GenAI context because often LLMs return a top block (e.g., markdown, Rust)
/// >       And while it is better to try to handle this with the prompt, gpt-4o-mini or other models still put in markdown block
fn outer_block_content_or_raw(_lua: &Lua, md_content: String) -> mlua::Result<String> {
	let res = md::outer_block_content_or_raw(&md_content);
	Ok(res.into_owned())
}

// region:    --- Tests

#[cfg(test)]
mod tests {
	type Result<T> = core::result::Result<T, Box<dyn std::error::Error>>; // For tests.

	use crate::_test_support::{assert_contains, assert_not_contains, eval_lua, run_reflective_agent, setup_lua};
	use serde_json::Value;
	use value_ext::JsonValueExt;

	#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
	async fn test_lua_md_extract_blocks_simple() -> Result<()> {
		// -- Setup & Fixtures
		// NOTE: the [[ ]] for multi line in lua breaks with the ``` for code block, so, reading files.
		let fx_script = r#"
local file = aip.file.load("agent-script/agent-before-all-inputs-gen.aip")
return aip.md.extract_blocks(file.content, {lang = "lua"})
		"#;

		// -- Exec
		let res = run_reflective_agent(fx_script, None).await?;

		// -- Check
		assert!(res.is_array());
		let blocks = res.as_array().ok_or("Res should be array")?;
		assert_eq!(blocks.len(), 4, "Should have found 4 lua blocks");

		// Check first block
		let first_block = &blocks[0];
		assert_eq!(first_block.x_get_str("lang")?, "lua");
		assert!(first_block.x_get_str("content")?.contains("before_all_response"));

		// Check second block
		let second_block = &blocks[1];
		assert_eq!(second_block.x_get_str("lang")?, "lua");
		assert!(second_block.x_get_str("content")?.contains("Data with input"));

		Ok(())
	}

	#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
	async fn test_lua_md_extract_blocks_with_lang_and_extruded_content() -> Result<()> {
		// -- Setup & Fixtures
		// NOTE: the [[ ]] for multi line in lua breaks when line starts with ```, so work around
		let fx_script = r#"
local content = "This is some content\n"
content = content .. "\n```lua\n--some lua \n```\n"
content = content .. "and other block\n\n```rust\n//! some rust block \n```\n"
content = content .. "The end"

local blocks, extruded_content = aip.md.extract_blocks(content, {lang = "lua", extrude = "content"})
return {
		blocks = blocks,
		extruded_content = extruded_content
}
		"#;

		// -- Exec
		let res = run_reflective_agent(fx_script, None).await?;

		// -- Check Blocks
		let blocks = res.pointer("/blocks").ok_or("Should have blocks")?;
		assert!(blocks.is_array());
		let blocks = blocks.as_array().unwrap();
		assert_eq!(blocks.len(), 1, "Should have found 1 lua blocks");

		// Check first and only blockblock
		let first_block = &blocks[0];
		assert_eq!(first_block.x_get_str("lang")?, "lua");
		assert!(first_block.x_get_str("content")?.contains("some lua"));

		// -- Check Extruded Content
		let content = res.x_get_str("extruded_content")?;
		assert_contains(content, "This is some content");
		assert_contains(content, "and other block");
		assert_contains(content, "```rust");
		assert_contains(content, "```\n");
		assert_contains(content, "The end");

		Ok(())
	}

	#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
	async fn test_lua_md_extract_blocks_with_all_lang_and_extruded_content() -> Result<()> {
		// -- Setup & Fixtures
		// NOTE: the [[ ]] for multi line in lua breaks when line starts with ```, so work around
		let fx_script = r#"
local content = "This is some content\n"
content = content .. "\n```lua\n--some lua \n```\n"
content = content .. "and other block\n\n```rust\n//! some rust block \n```\n"
content = content .. "The end"

local blocks, extruded_content = aip.md.extract_blocks(content, {extrude = "content"})
return {
		blocks = blocks,
		extruded_content = extruded_content
}
		"#;

		// -- Exec
		let res = run_reflective_agent(fx_script, None).await?;

		// -- Check Blocks
		let blocks = res.pointer("/blocks").ok_or("Should have blocks")?;
		assert!(blocks.is_array());
		let blocks = blocks.as_array().unwrap();
		assert_eq!(blocks.len(), 2, "Should have found 2 blocks, lua and rust");

		// Check first and only blockblock
		let block = &blocks[0];
		assert_eq!(block.x_get_str("lang")?, "lua");
		assert!(block.x_get_str("content")?.contains("some lua"));
		// Check second block
		let block = &blocks[1];
		assert_eq!(block.x_get_str("lang")?, "rust");
		assert!(block.x_get_str("content")?.contains("some rust"));

		// -- Check Extruded Content
		let content = res.x_get_str("extruded_content")?;
		assert_contains(content, "This is some content");
		assert_contains(content, "and other block");
		assert_not_contains(content, "```lua");
		assert_not_contains(content, "```rust");
		assert_not_contains(content, "```");
		assert_contains(content, "The end");

		Ok(())
	}

	#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
	async fn test_lua_md_extract_meta() -> Result<()> {
		// -- Setup & Fixtures
		let lua = setup_lua(super::init_module, "md")?;
		let lua_code = r#"
local content = [[
Some content
```toml
#!meta
some = "stuff"
```
some more content
```toml
#!meta
# Another meta block
num = 123
```
And this is the end
]]
local meta, remain = aip.md.extract_meta(content)
return {
   meta   = meta,
	 remain = remain
}
		"#;

		// -- Exec
		let res: Value = eval_lua(&lua, lua_code)?;

		// -- Check meta
		let meta = res.get("meta").ok_or("Should have meta")?;
		assert_eq!(meta.x_get_str("some")?, "stuff");
		assert_eq!(meta.x_get_i64("num")?, 123);

		// -- Check remain
		let remain = res.x_get_str("remain")?;
		assert_contains(remain, "Some content");
		assert_contains(remain, "some more content");
		assert_contains(remain, "And this is the end");
		assert_not_contains(remain, "Another meta block");
		assert_not_contains(remain, "num = 123");
		assert_not_contains(remain, "#!meta");

		Ok(())
	}

	#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
	async fn test_lua_md_outer_block_content_or_raw() -> Result<()> {
		// -- Setup & Fixtures
		// NOTE: Here we put the ``` out of the multiline [[ ]]
		let fx_script = r#"        
local content = "```" .. [[rust
fn main() {
    // Some nested blocks
    let example = ```typescript
    const x = 42;
    ```;
    println!("Hello!");
}
]] .. "```"

return aip.md.outer_block_content_or_raw(content)
		"#;

		// -- Exec
		let res = run_reflective_agent(fx_script, None).await?;

		// -- Check
		let content = res.as_str().unwrap();
		assert!(content.contains("fn main()"));
		assert!(content.contains("const x = 42"));
		assert!(!content.contains("```rust")); // Should not contain the outer markers

		// Test with raw content (no blocks)
		let fx_script_raw = r#"
local content = [[Just some plain
text without any code blocks]]

return aip.md.outer_block_content_or_raw(content)
		"#;

		let res_raw = run_reflective_agent(fx_script_raw, None).await?;
		let content_raw = res_raw.as_str().unwrap();
		assert_eq!(content_raw, "Just some plain\ntext without any code blocks");

		Ok(())
	}
}

// endregion: --- Tests