1use serde_json::Value;
2
3pub async fn scientific_compute(args: &Value) -> Result<String, String> {
4 let mode = args["mode"].as_str().ok_or(
5 "Missing 'mode' (symbolic, units, complexity, ledger, dataset, regression, hypothesis, matrix)",
6 )?;
7
8 match mode {
9 "symbolic" => solve_symbolic(args).await,
10 "units" => verify_units(args).await,
11 "complexity" => audit_complexity(args).await,
12 "ledger" => manage_ledger(args).await,
13 "dataset" => calculate_on_dataset(args).await,
14 "regression" => run_regression(args).await,
15 "hypothesis" => run_hypothesis(args).await,
16 "matrix" => run_matrix(args).await,
17 _ => Err(format!("Unknown scientific mode: {}", mode)),
18 }
19}
20
21async fn solve_symbolic(args: &Value) -> Result<String, String> {
22 let expr = args["expr"]
23 .as_str()
24 .ok_or("Missing 'expr' for symbolic mode")?;
25 let target = args["target"].as_str().unwrap_or("solve"); let latex = args["latex"].as_bool().unwrap_or(false);
27
28 let python_script = format!(
29 "import sympy\n\
30 from sympy import symbols, solve, simplify, integrate, diff, Eq, latex\n\
31 # Attempt to find symbols automatically\n\
32 import re\n\
33 raw_expr = r\"{}\"\n\
34 # Extract likely symbols (single letters or words starting with letter)\n\
35 sym_names = set(re.findall(r'\\b[a-zA-Z][a-zA-Z0-9]*\\b', raw_expr))\n\
36 # Remove common functions\n\
37 sym_names -= {{'sin', 'cos', 'tan', 'exp', 'log', 'sqrt', 'pi', 'E', 'oo', 'solve', 'simplify', 'integrate', 'diff'}}\n\
38 sym_dict = {{name: symbols(name) for name in sym_names}}\n\
39 \n\
40 try:\n\
41 if \"=\" in raw_expr and \"{}\" == \"solve\":\n\
42 lhs, rhs = raw_expr.split(\"=\")\n\
43 result = solve(Eq(eval(lhs, {{'__builtins__': None}}, sym_dict), eval(rhs, {{'__builtins__': None}}, sym_dict)))\n\
44 else:\n\
45 expr_obj = eval(raw_expr, {{'__builtins__': None}}, sym_dict)\n\
46 if \"{}\" == \"simplify\": result = simplify(expr_obj)\n\
47 elif \"{}\" == \"integrate\": result = integrate(expr_obj)\n\
48 elif \"{}\" == \"diff\": result = diff(expr_obj)\n\
49 else: result = solve(expr_obj)\n\
50 \n\
51 print(f\"RESULT: {{result}}\")\n\
52 if {}:\n\
53 print(f\"LATEX: {{latex(result)}}\")\n\
54 except Exception as e:\n\
55 print(f\"ERROR: {{e}}\")\n",
56 expr, target, target, target, target, latex
57 );
58
59 execute_in_sandbox(&python_script).await
60}
61
62async fn verify_units(args: &Value) -> Result<String, String> {
63 let calculation = args["calculation"]
64 .as_str()
65 .ok_or("Missing 'calculation' for units mode")?;
66
67 let python_script = format!(
68 "try:\n\
69 # Simple Unit System (SI focus)\n\
70 class UnitValue:\n\
71 def __init__(self, val, dims):\n\
72 self.val = val\n\
73 self.dims = dims # {{'m': 1, 's': -1, etc}}\n\
74 def __add__(self, other):\n\
75 if self.dims != other.dims: raise ValueError(f\"Dimension mismatch: {{self.dims}} vs {{other.dims}}\")\n\
76 return UnitValue(self.val + other.val, self.dims)\n\
77 def __mul__(self, other):\n\
78 new_dims = self.dims.copy()\n\
79 for k, v in other.dims.items(): new_dims[k] = new_dims.get(k, 0) + v\n\
80 return UnitValue(self.val * other.val, new_dims)\n\
81 def __truediv__(self, other):\n\
82 new_dims = self.dims.copy()\n\
83 for k, v in other.dims.items(): new_dims[k] = new_dims.get(k, 0) - v\n\
84 return UnitValue(self.val / other.val, new_dims)\n\
85 def __repr__(self): return f\"{{self.val}} ({{self.dims}})\"\n\
86 \n\
87 # Helper to parse strings like '10m'\n\
88 def u(s):\n\
89 m = __import__('re').match(r'([\\d\\.]+)([a-zA-Z]+)', s)\n\
90 val = float(m.group(1))\n\
91 unit = m.group(2)\n\
92 return UnitValue(val, {{unit: 1}})\n\
93 \n\
94 # Executing the calculation with unit objects\n\
95 # User input is expected to use u('10m') etc.\n\
96 raw_calc = r\"{}\"\n\
97 # Basic auto-wrap for units in the expression if they look like 10m\n\
98 wrapped = __import__('re').sub(r'(\\d+)([a-z]+)', r\"u('\\1\\2')\", raw_calc)\n\
99 result = eval(wrapped, {{'u': u}})\n\
100 print(f\"RESULT: {{result}}\")\n\
101 except Exception as e:\n\
102 print(f\"ERROR: {{e}}\")\n",
103 calculation
104 );
105
106 execute_in_sandbox(&python_script).await
107}
108
109async fn audit_complexity(args: &Value) -> Result<String, String> {
110 let snippet = args["snippet"]
111 .as_str()
112 .ok_or("Missing 'snippet' for complexity mode")?;
113
114 let python_script = format!(
115 "import time\n\
116 import math\n\
117 def run_target(n):\n\
118 {}\n\
119 \n\
120 samples = [10, 50, 100, 200, 500]\n\
121 times = []\n\
122 for n in samples:\n\
123 start = time.perf_counter()\n\
124 run_target(n)\n\
125 times.append(time.perf_counter() - start)\n\
126 \n\
127 # Simplified regression to guess Big-O\n\
128 # Compare growth rates: t/n, t/n^2, t/log(n)\n\
129 ratios_n = [t/n for t, n in zip(times, samples) if n > 0]\n\
130 ratios_n2 = [t/(n**2) for t, n in zip(times, samples) if n > 0]\n\
131 \n\
132 def variance(data):\n\
133 if not data: return 1.0\n\
134 avg = sum(data)/len(data)\n\
135 return sum((x-avg)**2 for x in data)/len(data)\n\
136 \n\
137 v_n = variance(ratios_n)\n\
138 v_n2 = variance(ratios_n2)\n\
139 \n\
140 if v_n < v_n2: complexity = \"O(N)\"\n\
141 elif v_n2 < v_n: complexity = \"O(N^2)\"\n\
142 else: complexity = \"O(Unknown)\"\n\
143 \n\
144 print(f\"RESULT: Empirically detected {{complexity}}\")\n\
145 print(f\"STATS: n={{samples}}, times={{[f'{{t:.6f}}s' for t in times]}}\")\n",
146 snippet.replace("\n", "\n ")
147 );
148
149 execute_in_sandbox(&python_script).await
150}
151
152pub async fn analyze_dataset(path_str: &str) -> Result<String, String> {
158 if path_str.trim().is_empty() {
159 return Err("Missing file path for --analyze.".into());
160 }
161
162 let safe_path = path_str.replace('\\', "\\\\").replace('"', "\\\"");
165
166 let script = format!(
167 r####"import os, sys, csv as _csv, sqlite3 as _sql3
168from collections import Counter
169
170_path = "{safe_path}"
171_ext = os.path.splitext(_path)[1].lower().lstrip('.')
172_data = []
173_col_order = None
174
175if _ext in ('csv', 'tsv'):
176 _delim = '\t' if _ext == 'tsv' else ','
177 try:
178 with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
179 _rdr = _csv.DictReader(_fh, delimiter=_delim)
180 _col_order = list(_rdr.fieldnames) if _rdr.fieldnames else []
181 for _i, _row in enumerate(_rdr):
182 if _i >= 5000: break
183 _data.append(dict(_row))
184 except Exception as _e:
185 print("ERROR loading file: " + str(_e))
186 sys.exit(1)
187elif _ext == 'json':
188 try:
189 with open(_path, encoding='utf-8') as _fh:
190 _raw = json.load(_fh)
191 if isinstance(_raw, list):
192 _data = _raw[:5000]
193 elif isinstance(_raw, dict):
194 for _v in _raw.values():
195 if isinstance(_v, list):
196 _data = _v[:5000]
197 break
198 except Exception as _e:
199 print("ERROR loading file: " + str(_e))
200 sys.exit(1)
201elif _ext in ('db', 'sqlite', 'sqlite3'):
202 try:
203 with _sql3.connect(_path) as _con:
204 _cur = _con.cursor()
205 _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
206 _tbl = _cur.fetchone()
207 if _tbl:
208 _cur.execute("SELECT * FROM [%s] LIMIT 5000" % _tbl[0])
209 _col_order = [_d[0] for _d in _cur.description]
210 _data = [dict(zip(_col_order, _r)) for _r in _cur.fetchall()]
211 except Exception as _e:
212 print("ERROR loading file: " + str(_e))
213 sys.exit(1)
214else:
215 print("ERROR: unsupported format '." + _ext + "'. Supported: csv, tsv, json, db/sqlite/sqlite3.")
216 sys.exit(1)
217
218if not _data:
219 print("No data found in: " + _path)
220 sys.exit(0)
221
222columns = _col_order if _col_order else list(_data[0].keys())
223row_count = len(_data)
224data = _data
225
226def _try_num(v):
227 if v is None: return None
228 try: return float(str(v).replace(',', '').replace('$', '').replace('%', '').strip())
229 except (ValueError, TypeError): return None
230
231def _ncol(c):
232 return [f for r in data for f in (_try_num(r.get(c)),) if f is not None]
233
234def _quart(vals, q):
235 s = sorted(vals)
236 n = len(s)
237 if n == 0: return float('nan')
238 if n == 1: return s[0]
239 idx = q * (n - 1)
240 lo, hi = int(idx), min(int(idx) + 1, n - 1)
241 return s[lo] + (idx - lo) * (s[hi] - s[lo])
242
243num_cols = []
244txt_cols = []
245for c in columns:
246 _nonempty = [r.get(c) for r in data
247 if r.get(c) is not None and str(r.get(c, '')).strip() != '']
248 if not _nonempty:
249 txt_cols.append(c)
250 continue
251 _s = _nonempty[:min(200, len(_nonempty))]
252 _hits = sum(1 for v in _s if _try_num(v) is not None)
253 (num_cols if _hits >= len(_s) * 0.8 else txt_cols).append(c)
254
255_miss = [(c, sum(1 for r in data
256 if r.get(c) is None or str(r.get(c, '')).strip() == ''))
257 for c in columns]
258_miss = [(c, n) for c, n in _miss if n > 0]
259
260_sample_note = " (5000-row sample)" if row_count == 5000 else ""
261_fname = os.path.basename(_path)
262_H2 = "##"
263_H3 = "###"
264_out = []
265_out.append(_H2 + " Dataset Profile: " + _fname)
266_out.append("")
267_out.append("**File:** " + _path)
268_out.append("**Shape:** " + str(row_count) + " rows" + _sample_note + " x " + str(len(columns)) + " columns")
269_out.append("**Numeric (%d):** %s" % (len(num_cols), ", ".join(num_cols) if num_cols else "none"))
270_out.append("**Text/Mixed (%d):** %s" % (len(txt_cols), ", ".join(txt_cols) if txt_cols else "none"))
271_out.append("")
272
273if _miss:
274 _total_miss = sum(n for _, n in _miss)
275 _out.append("**Missing values:** " + str(_total_miss) + " cell(s) across " + str(len(_miss)) + " column(s)")
276 for c, n in _miss:
277 _pct = round(n * 100.0 / row_count, 1)
278 _out.append(" - " + c + ": " + str(n) + " missing (" + str(_pct) + "%)")
279 _out.append("")
280
281if num_cols:
282 _out.append(_H3 + " Numeric Column Statistics")
283 _out.append("")
284 _hdr = "%-22s %6s %10s %10s %10s %10s %10s %10s %10s %8s" % (
285 "Column", "N", "Min", "P25", "Median", "P75", "Max", "Mean", "Std Dev", "Outliers")
286 _out.append(_hdr)
287 _out.append("-" * len(_hdr))
288 for c in num_cols:
289 _vals = _ncol(c)
290 if not _vals:
291 _out.append("%-22s (no numeric values)" % c[:22])
292 continue
293 _mn, _mx = min(_vals), max(_vals)
294 _mean = sum(_vals) / len(_vals)
295 _med = statistics.median(_vals)
296 _std = statistics.stdev(_vals) if len(_vals) >= 2 else 0.0
297 _q1 = _quart(_vals, 0.25)
298 _q3 = _quart(_vals, 0.75)
299 _iqr = _q3 - _q1
300 _otl = sum(1 for v in _vals if v < _q1 - 1.5 * _iqr or v > _q3 + 1.5 * _iqr)
301 _out.append("%-22s %6d %10.4g %10.4g %10.4g %10.4g %10.4g %10.4g %10.4g %8d" % (
302 c[:22], len(_vals), _mn, _q1, _med, _q3, _mx, _mean, _std, _otl))
303 _out.append("")
304
305if txt_cols:
306 _out.append(_H3 + " Text Column Statistics")
307 _out.append("")
308 for c in txt_cols:
309 _vals = [str(r.get(c, '') or '').strip() for r in data
310 if r.get(c) is not None and str(r.get(c, '')).strip() != '']
311 if not _vals:
312 _out.append("**" + c + "**: (all missing)")
313 _out.append("")
314 continue
315 _uniq = len(set(_vals))
316 _card = round(_uniq * 100.0 / len(_vals), 1)
317 _out.append("**" + c + "**: " + str(len(_vals)) + " non-null, " +
318 str(_uniq) + " unique (" + str(_card) + "% cardinality)")
319 for _v, _n in Counter(_vals).most_common(5):
320 _short = (_v[:42] + "...") if len(_v) > 42 else _v
321 _vpct = round(_n * 100.0 / len(_vals), 1)
322 _out.append(" - `" + _short + "`: " + str(_n) + " (" + str(_vpct) + "%)")
323 _out.append("")
324
325if HAS_NUMPY and len(num_cols) >= 2:
326 try:
327 import pandas as pd
328 _df = pd.DataFrame(data)[num_cols]
329 for _c in _df.columns:
330 _df[_c] = pd.to_numeric(_df[_c], errors='coerce')
331 _corr = _df.corr()
332 _out.append(_H3 + " Correlation Matrix")
333 _out.append("")
334 _heads = [c[:10] for c in num_cols]
335 _out.append(" " + "".join(" %10s" % h for h in _heads))
336 for _i, c in enumerate(num_cols):
337 _rs = "%12s" % _heads[_i]
338 for _j in range(len(num_cols)):
339 _rs += " %10.3f" % _corr.iloc[_i, _j]
340 _out.append(_rs)
341 _out.append("")
342 except Exception:
343 pass
344
345_out.append(_H3 + " Sample Rows (first 5)")
346_out.append("")
347_out.append(" | ".join(columns))
348_out.append(" | ".join("---" for _ in columns))
349for _row in data[:5]:
350 _out.append(" | ".join(str(_row.get(c, '') or '')[:20] for c in columns))
351
352print("\n".join(_out))
353"####,
354 safe_path = safe_path,
355 );
356
357 let sandbox_args = serde_json::json!({
358 "language": "python",
359 "code": script,
360 "timeout_seconds": 30
361 });
362 crate::tools::code_sandbox::execute(&sandbox_args).await
363}
364
365async fn execute_in_sandbox(script: &str) -> Result<String, String> {
366 let sandbox_args = serde_json::json!({
367 "language": "python",
368 "code": script
369 });
370
371 crate::tools::code_sandbox::execute(&sandbox_args).await
372}
373
374async fn manage_ledger(args: &Value) -> Result<String, String> {
375 let action = args["action"]
376 .as_str()
377 .ok_or("Missing 'action' (read, append)")?;
378 let ledger_path = std::path::Path::new(".hematite/docs/scientific_ledger.md");
379
380 if let Some(parent) = ledger_path.parent() {
381 std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
382 }
383
384 match action {
385 "read" => {
386 if !ledger_path.exists() {
387 return Ok("Scientific Ledger is currently empty.".to_string());
388 }
389 std::fs::read_to_string(ledger_path).map_err(|e| e.to_string())
390 }
391 "append" => {
392 let content = args["content"]
393 .as_str()
394 .ok_or("Missing 'content' to append")?;
395 let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
396 let entry = format!(
397 "\n### [{}] Scientific Derivation\n{}\n---\n",
398 timestamp, content
399 );
400
401 use std::io::Write;
402 let mut file = std::fs::OpenOptions::new()
403 .create(true)
404 .append(true)
405 .open(ledger_path)
406 .map_err(|e| e.to_string())?;
407
408 file.write_all(entry.as_bytes())
409 .map_err(|e| e.to_string())?;
410 Ok("Derivation successfully persisted to Scientific Ledger (RAG-indexed).".to_string())
411 }
412 _ => Err(format!("Unknown ledger action: {}", action)),
413 }
414}
415
416async fn calculate_on_dataset(args: &Value) -> Result<String, String> {
417 let path_str = args["path"].as_str().ok_or("Missing 'path' to dataset")?;
418 let sql = args["sql"]
419 .as_str()
420 .unwrap_or("SELECT * FROM data LIMIT 10000");
421 let python_op = args["python_op"]
422 .as_str()
423 .unwrap_or("print(f'{row_count} rows loaded. Columns: {columns}')");
424
425 let path = std::path::PathBuf::from(path_str);
426 let data = crate::tools::data_query::query_to_json_helper(&path, sql).await?;
427 let data_json = serde_json::to_string(&data).map_err(|e| e.to_string())?;
428
429 let python_script = format!(
437 r#"import json, math, statistics, datetime, decimal, re
438from collections import Counter, defaultdict
439
440data = {data_json}
441columns = list(data[0].keys()) if data else []
442row_count = len(data)
443
444def col(name):
445 """All values for a named column."""
446 return [row.get(name) for row in data]
447
448def ncol(name):
449 """Numeric-only values for a named column (skips None/blank/non-numeric)."""
450 out = []
451 for row in data:
452 v = row.get(name)
453 if v is not None and v != '':
454 try:
455 out.append(float(v))
456 except (ValueError, TypeError):
457 pass
458 return out
459
460def top(n=10, by=None):
461 """Top N rows sorted descending by column name."""
462 key = by or (columns[0] if columns else None)
463 def _key(r):
464 try: return float(r.get(key, 0) or 0)
465 except: return 0.0
466 return sorted(data, key=_key, reverse=True)[:n]
467
468def group_sum(group_col, value_col):
469 """Sum value_col grouped by group_col. Returns dict sorted by value desc."""
470 acc = defaultdict(float)
471 for row in data:
472 k = row.get(group_col, 'unknown') or 'unknown'
473 try: acc[k] += float(row.get(value_col, 0) or 0)
474 except (ValueError, TypeError): pass
475 return dict(sorted(acc.items(), key=lambda x: x[1], reverse=True))
476
477def group_count(group_col):
478 """Count rows per unique value in group_col."""
479 return dict(Counter(str(row.get(group_col, '')) for row in data).most_common())
480
481def group_mean(group_col, value_col):
482 """Mean of value_col grouped by group_col."""
483 acc = defaultdict(list)
484 for row in data:
485 k = row.get(group_col, 'unknown') or 'unknown'
486 try: acc[k].append(float(row.get(value_col, 0) or 0))
487 except (ValueError, TypeError): pass
488 return {{k: statistics.mean(v) for k, v in acc.items() if v}}
489
490def missing(name):
491 """Count of missing/None/blank values in a column."""
492 return sum(1 for row in data if row.get(name) is None or row.get(name) == '')
493
494try:
495 import pandas as pd
496 import numpy as np
497 df = pd.DataFrame(data)
498 for c in df.columns:
499 try: df[c] = pd.to_numeric(df[c])
500 except (ValueError, TypeError): pass
501 HAS_PANDAS = True
502except ImportError:
503 HAS_PANDAS = False
504
505print(f"Loaded: {{row_count}} rows x {{len(columns)}} columns")
506print(f"Columns: {{columns}}")
507print(f"Pandas: {{HAS_PANDAS}}")
508print()
509
510{python_op}
511"#,
512 data_json = data_json,
513 python_op = python_op
514 );
515
516 execute_in_sandbox(&python_script).await
517}
518
519async fn run_regression(args: &Value) -> Result<String, String> {
520 let path_str = args["path"]
521 .as_str()
522 .ok_or("Missing 'path' for regression mode")?;
523 let y_col = args["y"]
524 .as_str()
525 .ok_or("Missing 'y' (target column) for regression mode")?;
526
527 let x_cols: Vec<String> = match &args["x"] {
528 Value::String(s) => vec![s.clone()],
529 Value::Array(arr) => arr
530 .iter()
531 .filter_map(|v| v.as_str().map(|s| s.to_string()))
532 .collect(),
533 _ => return Err("Missing 'x' (predictor column(s)) for regression mode".into()),
534 };
535 if x_cols.is_empty() {
536 return Err("'x' must specify at least one predictor column".into());
537 }
538
539 let reg_type = args["type"].as_str().unwrap_or("linear");
540 let degree = args["degree"].as_u64().unwrap_or(2).min(10) as usize;
541
542 let safe_path = path_str.replace('\\', "\\\\").replace('"', "\\\"");
543 let safe_y = y_col.replace('"', "\\\"");
544 let x_json = serde_json::to_string(&x_cols).unwrap_or_else(|_| "[]".to_string());
545
546 let script = format!(
547 r####"import os, sys, csv as _csv, sqlite3 as _sql3, math
548
549_path = "{safe_path}"
550_xcols = {x_json}
551_ycol = "{safe_y}"
552_rtype = "{reg_type}"
553_degree = {degree}
554_ext = os.path.splitext(_path)[1].lower().lstrip('.')
555_data = []
556
557if _ext in ('csv', 'tsv'):
558 _delim = '\t' if _ext == 'tsv' else ','
559 try:
560 with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
561 _rdr = _csv.DictReader(_fh, delimiter=_delim)
562 for _i, _row in enumerate(_rdr):
563 if _i >= 5000: break
564 _data.append(dict(_row))
565 except Exception as _e:
566 print("ERROR loading file: " + str(_e))
567 sys.exit(1)
568elif _ext == 'json':
569 try:
570 with open(_path, encoding='utf-8') as _fh:
571 _raw = json.load(_fh)
572 if isinstance(_raw, list):
573 _data = _raw[:5000]
574 elif isinstance(_raw, dict):
575 for _v in _raw.values():
576 if isinstance(_v, list):
577 _data = _v[:5000]
578 break
579 except Exception as _e:
580 print("ERROR loading file: " + str(_e))
581 sys.exit(1)
582elif _ext in ('db', 'sqlite', 'sqlite3'):
583 try:
584 with _sql3.connect(_path) as _con:
585 _cur = _con.cursor()
586 _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
587 _tbl = _cur.fetchone()
588 if _tbl:
589 _cur.execute("SELECT * FROM [%s] LIMIT 5000" % _tbl[0])
590 _col_order = [_d[0] for _d in _cur.description]
591 _data = [dict(zip(_col_order, _r)) for _r in _cur.fetchall()]
592 except Exception as _e:
593 print("ERROR loading file: " + str(_e))
594 sys.exit(1)
595else:
596 print("ERROR: unsupported format '." + _ext + "'. Supported: csv, tsv, json, db/sqlite/sqlite3.")
597 sys.exit(1)
598
599if not _data:
600 print("No data found in: " + _path)
601 sys.exit(0)
602
603def _tryf(v):
604 if v is None: return None
605 try: return float(str(v).replace(',', '').replace('$', '').replace('%', '').strip())
606 except: return None
607
608_yx = []
609for _row in _data:
610 _yv = _tryf(_row.get(_ycol))
611 if _yv is None: continue
612 _xvs = [_tryf(_row.get(_xc)) for _xc in _xcols]
613 if any(v is None for v in _xvs): continue
614 _yx.append((_yv, _xvs))
615
616_n = len(_yx)
617if _n < 3:
618 print("ERROR: insufficient numeric data (need >=3 valid rows, got %d)" % _n)
619 sys.exit(1)
620
621_ys = [p[0] for p in _yx]
622_xmat = [p[1] for p in _yx]
623_ym = sum(_ys) / _n
624
625_out = []
626_out.append("## Regression Results")
627_out.append("")
628_out.append("**File:** " + os.path.basename(_path))
629_out.append("**Y (target):** " + _ycol)
630_out.append("**X (predictors):** " + ", ".join(_xcols))
631_out.append("**N (valid rows):** %d" % _n)
632_out.append("")
633
634if len(_xcols) == 1 and _rtype == "linear":
635 _xv = [r[0] for r in _xmat]
636 _xm = sum(_xv) / _n
637 _ssxy = sum((_x - _xm) * (_y - _ym) for _x, _y in zip(_xv, _ys))
638 _ssx = sum((_x - _xm)**2 for _x in _xv)
639 _ssy = sum((_y - _ym)**2 for _y in _ys)
640 if _ssx == 0:
641 print("ERROR: predictor has zero variance.")
642 sys.exit(1)
643 _slope = _ssxy / _ssx
644 _inter = _ym - _slope * _xm
645 _preds = [_slope * _x + _inter for _x in _xv]
646 _res = [_y - _p for _y, _p in zip(_ys, _preds)]
647 _sse = sum(r**2 for r in _res)
648 _r2 = 1.0 - _sse / _ssy if _ssy > 0 else 0.0
649 _rmse = math.sqrt(_sse / _n)
650 _pr = _ssxy / math.sqrt(_ssx * _ssy) if _ssx > 0 and _ssy > 0 else 0.0
651 _rm = sum(_res) / _n
652 _rstd = math.sqrt(sum((r - _rm)**2 for r in _res) / _n)
653 _out.append("**Type:** Simple Linear Regression (pure-Python OLS)")
654 _out.append("**Equation:** y = %+.6g x %+.6g" % (_slope, _inter))
655 _out.append("**R-squared:** %.4f" % _r2)
656 _out.append("**RMSE:** %.4g" % _rmse)
657 _out.append("**Pearson r:** %.4f" % _pr)
658 _out.append("**Residuals:** min=%.4g max=%.4g mean=%.4g std=%.4g" % (
659 min(_res), max(_res), _rm, _rstd))
660elif HAS_NUMPY:
661 import numpy as _np
662 if _rtype == "polynomial" and len(_xcols) == 1:
663 _xv = _np.array([r[0] for r in _xmat])
664 _ya = _np.array(_ys)
665 _coeffs = _np.polyfit(_xv, _ya, _degree)
666 _preds = _np.polyval(_coeffs, _xv)
667 _res = _ya - _preds
668 _sse = float(_np.sum(_res**2))
669 _sst = float(_np.sum((_ya - _ym)**2))
670 _r2 = 1.0 - _sse / _sst if _sst > 0 else 0.0
671 _rmse = float(_np.sqrt(_np.mean(_res**2)))
672 _out.append("**Type:** Polynomial Regression degree=%d (numpy polyfit)" % _degree)
673 _out.append("**Coefficients (highest power first):** " + ", ".join("%.6g" % c for c in _coeffs))
674 _out.append("**R-squared:** %.4f" % _r2)
675 _out.append("**RMSE:** %.4g" % _rmse)
676 _out.append("**Residuals:** min=%.4g max=%.4g mean=%.4g std=%.4g" % (
677 float(_np.min(_res)), float(_np.max(_res)),
678 float(_np.mean(_res)), float(_np.std(_res))))
679 else:
680 _Xm = _np.column_stack([_np.ones(_n)] + [[r[i] for r in _xmat] for i in range(len(_xcols))])
681 _ya = _np.array(_ys)
682 _coeffs, _, _, _ = _np.linalg.lstsq(_Xm, _ya, rcond=None)
683 _preds = _Xm @ _coeffs
684 _res = _ya - _preds
685 _sse = float(_np.sum(_res**2))
686 _sst = float(_np.sum((_ya - _ym)**2))
687 _r2 = 1.0 - _sse / _sst if _sst > 0 else 0.0
688 _rmse = float(_np.sqrt(_np.mean(_res**2)))
689 _rm = float(_np.mean(_res))
690 _rstd = float(_np.std(_res))
691 _out.append("**Type:** Multiple Linear Regression (numpy lstsq OLS)")
692 _out.append("**Intercept:** %.6g" % _coeffs[0])
693 for _i, _xc in enumerate(_xcols):
694 _out.append("**%s coeff:** %.6g" % (_xc, _coeffs[_i + 1]))
695 _out.append("**R-squared:** %.4f" % _r2)
696 _out.append("**RMSE:** %.4g" % _rmse)
697 _out.append("**Residuals:** min=%.4g max=%.4g mean=%.4g std=%.4g" % (
698 float(_np.min(_res)), float(_np.max(_res)), _rm, _rstd))
699else:
700 _out.append("**Type:** Multiple/Polynomial Regression requires numpy.")
701 _out.append("Use a single predictor with type=linear for pure-Python OLS, or install numpy.")
702
703print("\n".join(_out))
704"####,
705 safe_path = safe_path,
706 x_json = x_json,
707 safe_y = safe_y,
708 reg_type = reg_type,
709 degree = degree,
710 );
711
712 let sandbox_args = serde_json::json!({
713 "language": "python",
714 "code": script,
715 "timeout_seconds": 30
716 });
717 crate::tools::code_sandbox::execute(&sandbox_args).await
718}
719
720pub async fn compute_expr(expr: &str) -> Result<String, String> {
724 if expr.trim().is_empty() {
725 return Err("No expression provided.".into());
726 }
727 let safe_expr = expr.replace('\\', "\\\\").replace('"', "\\\"");
728
729 let script = format!(
730 r####"from math import *
731import statistics as _stat, re as _re, sys
732
733# ── Physical & mathematical constants ────────────────────────────────
734c_light = 299_792_458.0 # m/s — speed of light (exact)
735h_planck = 6.62607015e-34 # J·s — Planck constant (exact)
736hbar = h_planck / (2 * pi) # J·s — reduced Planck constant
737G_grav = 6.67430e-11 # m³/(kg·s²) — gravitational constant
738k_B = 1.380649e-23 # J/K — Boltzmann constant (exact)
739N_A = 6.02214076e23 # /mol — Avogadro's number (exact)
740R_gas = 8.314462618 # J/(mol·K) — molar gas constant
741g_std = 9.80665 # m/s² — standard gravity (exact)
742e_q = 1.602176634e-19 # C — elementary charge (exact)
743m_e = 9.1093837015e-31 # kg — electron mass
744m_p = 1.67262192369e-27 # kg — proton mass
745sigma_SB = 5.670374419e-8 # W/(m²·K⁴) — Stefan-Boltzmann
746eps_0 = 8.8541878128e-12 # F/m — vacuum permittivity
747mu_0 = 1.25663706212e-6 # H/m — vacuum permeability
748alpha_fs = 7.2973525693e-3 # — fine-structure constant
749atm = 101_325.0 # Pa — standard atmosphere
750
751# ── Statistics helpers ────────────────────────────────────────────────
752mean = _stat.mean
753median = _stat.median
754stdev = _stat.stdev
755variance = _stat.variance
756try: mode = _stat.mode
757except Exception: pass
758
759# ── Financial functions ───────────────────────────────────────────────
760def pmt(rate, nper, pv, fv=0, when=0):
761 """Periodic loan payment. pmt(0.05/12, 360, 300000)"""
762 if rate == 0: return -(pv + fv) / nper
763 pvif = (1 + rate) ** nper
764 r = rate / (pvif - 1) * -(pv * pvif + fv)
765 return r / (1 + rate) if when == 1 else r
766
767def fv(rate, nper, pmt_v, pv=0, when=0):
768 """Future value. fv(0.06/12, 120, -500)"""
769 if rate == 0: return -pv - pmt_v * nper
770 pvif = (1 + rate) ** nper
771 return -(pv * pvif + pmt_v * (1 + rate * when) * (pvif - 1) / rate)
772
773def pv(rate, nper, pmt_v, fv=0, when=0):
774 """Present value. pv(0.05/12, 360, -1500)"""
775 if rate == 0: return -fv - pmt_v * nper
776 pvif = (1 + rate) ** nper
777 return -(fv + pmt_v * (1 + rate * when) * (pvif - 1) / rate) / pvif
778
779def npv(rate, cashflows):
780 """Net present value. npv(0.1, [-1000, 200, 300, 400, 500])"""
781 return sum(cf / (1 + rate) ** t for t, cf in enumerate(cashflows))
782
783def irr(cashflows, guess=0.1):
784 """Internal rate of return (Newton-Raphson). irr([-1000, 300, 400, 500])"""
785 r = guess
786 for _ in range(200):
787 f = sum(cf / (1 + r) ** t for t, cf in enumerate(cashflows))
788 df = sum(-t * cf / (1 + r) ** (t + 1) for t, cf in enumerate(cashflows))
789 if df == 0: break
790 r2 = r - f / df
791 if abs(r2 - r) < 1e-10: return r2
792 r = r2
793 return r
794
795def compound(principal, rate, n=1, t=1):
796 """Compound interest. compound(1000, 0.05, 12, 10)"""
797 return principal * (1 + rate / n) ** (n * t)
798
799def cagr(start, end, years):
800 """Compound annual growth rate. cagr(1000, 2000, 5) -> 0.1487"""
801 return (end / start) ** (1.0 / years) - 1
802
803def roi(gain, cost):
804 """Return on investment %. roi(1500, 1000) -> 50.0"""
805 return (gain - cost) / cost * 100.0
806
807def breakeven(fixed, price, var_cost):
808 """Break-even units. breakeven(10000, 25, 15) -> 1000"""
809 return fixed / (price - var_cost)
810
811def _fmt(v):
812 if isinstance(v, bool): return str(v)
813 if isinstance(v, int): return str(v)
814 if isinstance(v, float):
815 if isnan(v): return "nan"
816 if isinf(v): return "inf" if v > 0 else "-inf"
817 if v == int(v) and abs(v) < 1e15:
818 return str(int(v))
819 return "%.10g" % v
820 if isinstance(v, complex): return str(v)
821 if isinstance(v, (list, tuple)):
822 return "[" + ", ".join(_fmt(x) for x in v) + "]"
823 return str(v)
824
825_raw = "{safe_expr}"
826_clean = _raw.strip()
827if _clean.endswith('='): _clean = _clean[:-1].strip()
828_clean = _clean.replace('^', '**').replace('×', '*').replace('÷', '/')
829
830# "X% of Y" — e.g. "15% of 89.99"
831_pm = _re.match(r'^([\d.]+)\s*(?:%%|percent)\s+of\s+([\d,. ]+)$', _clean, _re.I)
832if _pm:
833 print(_fmt(float(_pm.group(1)) / 100.0 *
834 float(_pm.group(2).replace(',','').replace(' ',''))))
835 sys.exit(0)
836
837try:
838 _r = eval(_clean)
839 print(_fmt(_r))
840except SyntaxError as _se:
841 print("Syntax error: " + str(_se))
842 sys.exit(1)
843except Exception as _e:
844 print("Error: " + str(_e))
845 sys.exit(1)
846"####,
847 safe_expr = safe_expr,
848 );
849
850 let sandbox_args = serde_json::json!({
851 "language": "python",
852 "code": script,
853 "timeout_seconds": 15
854 });
855 crate::tools::code_sandbox::execute(&sandbox_args).await
856}
857
858async fn run_hypothesis(args: &Value) -> Result<String, String> {
859 let test_type = args["test"].as_str().unwrap_or("ttest_ind");
860 let alpha = args["alpha"].as_f64().unwrap_or(0.05);
861 let mu = args["mu"].as_f64().unwrap_or(0.0);
862
863 let a_json = match &args["a"] {
864 Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
865 _ => "None".to_string(),
866 };
867 let b_json = match &args["b"] {
868 Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
869 _ => "None".to_string(),
870 };
871 let safe_path = args["path"]
872 .as_str()
873 .unwrap_or("")
874 .replace('\\', "\\\\")
875 .replace('"', "\\\"");
876 let col_a = args["column_a"]
877 .as_str()
878 .unwrap_or("a")
879 .replace('"', "\\\"");
880 let col_b = args["column_b"].as_str().unwrap_or("").replace('"', "\\\"");
881
882 let script = format!(
883 r####"import math, sys, os
884
885_test = "{test_type}"
886_alpha = {alpha}
887_mu = {mu}
888_a = {a_json}
889_b = {b_json}
890_path = "{safe_path}"
891_col_a = "{col_a}"
892_col_b = "{col_b}"
893
894if _a is None and _path:
895 import csv as _csv, sqlite3 as _sql3
896 _ext = os.path.splitext(_path)[1].lower().lstrip('.')
897 _rows = []
898 if _ext in ('csv', 'tsv'):
899 _delim = '\t' if _ext == 'tsv' else ','
900 with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
901 for _r in _csv.DictReader(_fh, delimiter=_delim):
902 _rows.append(_r)
903 elif _ext in ('db', 'sqlite', 'sqlite3'):
904 with _sql3.connect(_path) as _con:
905 _cur = _con.cursor()
906 _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
907 _t = _cur.fetchone()
908 if _t:
909 _cur.execute("SELECT * FROM [%s]" % _t[0])
910 _cs = [_d[0] for _d in _cur.description]
911 _rows = [dict(zip(_cs, _r)) for _r in _cur.fetchall()]
912 def _tryf(v):
913 try: return float(str(v or '').replace(',','').strip())
914 except: return None
915 _a = [_tryf(_r.get(_col_a)) for _r in _rows]
916 _a = [v for v in _a if v is not None]
917 if _col_b:
918 _b = [_tryf(_r.get(_col_b)) for _r in _rows]
919 _b = [v for v in _b if v is not None]
920
921if not _a:
922 print("ERROR: no numeric data found for group A")
923 sys.exit(1)
924
925_na = len(_a)
926_nb = len(_b) if _b else 0
927
928try:
929 from scipy import stats as _sc
930 _HAS_SCI = True
931except ImportError:
932 _HAS_SCI = False
933
934def _betainc(a, b, x):
935 if x <= 0: return 0.0
936 if x >= 1: return 1.0
937 if x > (a + 1.0) / (a + b + 2.0):
938 return 1.0 - _betainc(b, a, 1.0 - x)
939 TINY = 1e-30; EPS = 3e-7
940 lbeta = math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)
941 front = math.exp(a*math.log(x) + b*math.log(1.0-x) - lbeta) / a
942 f = 1.0; C = 1.0
943 D = 1.0 - (a+b)*x/(a+1.0)
944 if abs(D) < TINY: D = TINY
945 D = 1.0/D; f = D
946 for m in range(1, 201):
947 n1 = m*(b-m)*x/((a+2*m-1)*(a+2*m))
948 D = 1.0+n1*D; C = 1.0+n1/C
949 if abs(D) < TINY: D = TINY
950 if abs(C) < TINY: C = TINY
951 D = 1.0/D; f *= D*C
952 n2 = -(a+m)*(a+b+m)*x/((a+2*m)*(a+2*m+1))
953 D = 1.0+n2*D; C = 1.0+n2/C
954 if abs(D) < TINY: D = TINY
955 if abs(C) < TINY: C = TINY
956 D = 1.0/D; delta = D*C; f *= delta
957 if abs(delta-1.0) < EPS: break
958 return front * f
959
960def _t2p(t, df):
961 return _betainc(df/2.0, 0.5, df/(df + t*t))
962
963def _gammaincc(a, x):
964 if x <= 0: return 1.0
965 if x < a + 1:
966 _ap = a; _s = 1.0/a; _d = 1.0/a
967 for _ in range(200):
968 _ap += 1; _d *= x/_ap; _s += _d
969 if abs(_d) < abs(_s)*3e-7: break
970 return 1.0 - _s*math.exp(-x + a*math.log(x) - math.lgamma(a))
971 _b2 = x+1-a; _c = 1e30; _d = 1.0/_b2; _h = _d
972 for i in range(1, 201):
973 _an = -i*(i-a); _b2 += 2
974 _d = _an*_d + _b2
975 if abs(_d) < 1e-30: _d = 1e-30
976 _c = _b2 + _an/_c
977 if abs(_c) < 1e-30: _c = 1e-30
978 _d = 1.0/_d; _del = _d*_c; _h *= _del
979 if abs(_del-1.0) < 3e-7: break
980 return math.exp(-x + a*math.log(x) - math.lgamma(a)) * _h
981
982_stat_v = None; _p_val = None; _extra = []; _test_name = ""; _n_info = ""
983
984if _test == "ttest_1samp":
985 _test_name = "One-Sample t-Test"
986 _ma = sum(_a)/_na
987 _sd = math.sqrt(sum((x-_ma)**2 for x in _a)/(_na-1)) if _na>1 else 0.0
988 _se = _sd/math.sqrt(_na)
989 _stat_v = (_ma - _mu)/_se if _se > 0 else 0.0
990 _df = _na - 1
991 _n_info = "n=%d H0: mean=%.6g" % (_na, _mu)
992 if _HAS_SCI:
993 _res = _sc.ttest_1samp(_a, _mu)
994 _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
995 else:
996 _p_val = _t2p(abs(_stat_v), _df)
997 _extra = ["Sample mean: %.6g" % _ma, "Sample std dev: %.6g" % _sd, "df: %d" % _df]
998
999elif _test == "ttest_ind":
1000 _test_name = "Independent-Samples t-Test (Welch)"
1001 if not _b:
1002 print("ERROR: ttest_ind requires two groups — provide 'a' and 'b'"); sys.exit(1)
1003 _ma = sum(_a)/_na; _mb = sum(_b)/_nb
1004 _va = sum((x-_ma)**2 for x in _a)/(_na-1) if _na>1 else 0.0
1005 _vb = sum((x-_mb)**2 for x in _b)/(_nb-1) if _nb>1 else 0.0
1006 _se = math.sqrt(_va/_na + _vb/_nb)
1007 _stat_v = (_ma - _mb)/_se if _se > 0 else 0.0
1008 _df_n = (_va/_na + _vb/_nb)**2
1009 _df_d = (_va/_na)**2/(_na-1) + (_vb/_nb)**2/(_nb-1) if _na>1 and _nb>1 else 1
1010 _df = _df_n/_df_d if _df_d > 0 else 1.0
1011 _n_info = "n_a=%d n_b=%d" % (_na, _nb)
1012 if _HAS_SCI:
1013 _res = _sc.ttest_ind(_a, _b, equal_var=False)
1014 _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1015 else:
1016 _p_val = _t2p(abs(_stat_v), _df)
1017 _extra = ["Mean A: %.6g" % _ma, "Mean B: %.6g" % _mb,
1018 "Std Dev A: %.6g" % math.sqrt(_va),
1019 "Std Dev B: %.6g" % math.sqrt(_vb),
1020 "df (Welch): %.1f" % _df]
1021
1022elif _test == "ttest_rel":
1023 _test_name = "Paired t-Test"
1024 if not _b:
1025 print("ERROR: ttest_rel requires two paired groups — provide 'a' and 'b'"); sys.exit(1)
1026 _np2 = min(_na, _nb)
1027 _diffs = [_a[i]-_b[i] for i in range(_np2)]
1028 _md = sum(_diffs)/_np2
1029 _sd = math.sqrt(sum((d-_md)**2 for d in _diffs)/(_np2-1)) if _np2>1 else 0.0
1030 _se = _sd/math.sqrt(_np2) if _np2>0 else 0.0
1031 _stat_v = _md/_se if _se > 0 else 0.0
1032 _df = _np2 - 1
1033 _n_info = "n_pairs=%d" % _np2
1034 if _HAS_SCI:
1035 _res = _sc.ttest_rel(_a[:_np2], _b[:_np2])
1036 _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1037 else:
1038 _p_val = _t2p(abs(_stat_v), _df)
1039 _extra = ["Mean difference: %.6g" % _md,
1040 "Std dev of diffs: %.6g" % _sd, "df: %d" % _df]
1041
1042elif _test == "mannwhitney":
1043 _test_name = "Mann-Whitney U Test (non-parametric)"
1044 if not _b:
1045 print("ERROR: mannwhitney requires two groups — provide 'a' and 'b'"); sys.exit(1)
1046 _n_info = "n_a=%d n_b=%d" % (_na, _nb)
1047 if _HAS_SCI:
1048 _res = _sc.mannwhitneyu(_a, _b, alternative='two-sided')
1049 _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1050 else:
1051 _U = sum(1 if x>y else 0.5 if x==y else 0 for x in _a for y in _b)
1052 _stat_v = _U
1053 _mu_U = _na*_nb/2.0
1054 _sg_U = math.sqrt(_na*_nb*(_na+_nb+1)/12.0)
1055 _z = (_U - _mu_U)/_sg_U if _sg_U > 0 else 0.0
1056 _p_val = math.erfc(abs(_z)/math.sqrt(2))
1057 _extra.append("(Normal approximation — install scipy for exact result)")
1058
1059elif _test == "chi2":
1060 _test_name = "Chi-Squared Goodness-of-Fit"
1061 _n_info = "k=%d bins" % _na
1062 _expected = list(_b) if _b else [sum(_a)/_na]*_na
1063 if len(_expected) != _na:
1064 print("ERROR: 'a' (observed) and 'b' (expected) must have equal length"); sys.exit(1)
1065 if _HAS_SCI:
1066 _res = _sc.chisquare(_a, f_exp=_expected)
1067 _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1068 else:
1069 _stat_v = sum((o-e)**2/e for o, e in zip(_a, _expected) if e > 0)
1070 _df2 = _na - 1
1071 _p_val = _gammaincc(_df2/2.0, _stat_v/2.0)
1072 _extra.append("df=%d" % _df2)
1073else:
1074 print("ERROR: unknown test '%s'. Supported: ttest_1samp, ttest_ind, ttest_rel, mannwhitney, chi2" % _test)
1075 sys.exit(1)
1076
1077_H2 = "##"
1078_out = []
1079_out.append(_H2 + " Hypothesis Test Results")
1080_out.append("")
1081_out.append("**Test:** " + _test_name)
1082_out.append("**Alpha:** %.3g" % _alpha)
1083_out.append("**Samples:** " + _n_info)
1084for _ex in _extra:
1085 _out.append(" - " + _ex)
1086_out.append("")
1087if _stat_v is not None:
1088 _out.append("**Test Statistic:** %.6g" % _stat_v)
1089if _p_val is not None:
1090 _out.append("**p-value:** %.6g" % _p_val)
1091 _out.append("")
1092 if _p_val < _alpha:
1093 _out.append("**Result: REJECT H0** (p=%.5f < alpha=%.3g)" % (_p_val, _alpha))
1094 _out.append("Statistically significant — unlikely under the null hypothesis.")
1095 else:
1096 _out.append("**Result: FAIL TO REJECT H0** (p=%.5f >= alpha=%.3g)" % (_p_val, _alpha))
1097 _out.append("Insufficient evidence to reject the null hypothesis.")
1098_out.append("")
1099_out.append("*Engine: %s*" % ("scipy.stats" if _HAS_SCI else "pure-Python (Lentz CF)"))
1100print("\n".join(_out))
1101"####,
1102 test_type = test_type,
1103 alpha = alpha,
1104 mu = mu,
1105 a_json = a_json,
1106 b_json = b_json,
1107 safe_path = safe_path,
1108 col_a = col_a,
1109 col_b = col_b,
1110 );
1111
1112 let sandbox_args = serde_json::json!({
1113 "language": "python",
1114 "code": script,
1115 "timeout_seconds": 30
1116 });
1117 crate::tools::code_sandbox::execute(&sandbox_args).await
1118}
1119
1120async fn run_matrix(args: &Value) -> Result<String, String> {
1123 let operation = args["operation"].as_str().unwrap_or("det");
1124
1125 let a_json = match &args["a"] {
1126 Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
1127 _ => return Err("Missing 'a' (matrix as nested array) for matrix mode".into()),
1128 };
1129 let b_json = match &args["b"] {
1130 Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
1131 _ => "None".to_string(),
1132 };
1133
1134 let script = format!(
1135 r####"import sys, math
1136
1137_op = "{operation}"
1138_a = {a_json}
1139_b = {b_json}
1140
1141try:
1142 import numpy as _np
1143 _HAS_NP = True
1144except ImportError:
1145 _HAS_NP = False
1146
1147_A = _np.array(_a, dtype=float) if _HAS_NP else _a
1148_B = _np.array(_b, dtype=float) if (_HAS_NP and _b is not None) else _b
1149
1150_H2 = "##"
1151_out = []
1152
1153def _fmt_row(row):
1154 return " " + " ".join("%12.6g" % float(x) for x in row)
1155
1156def _pp(M):
1157 if _HAS_NP:
1158 if M.ndim == 1:
1159 _out.append(" [" + ", ".join("%.6g" % x for x in M) + "]")
1160 else:
1161 for row in M: _out.append(_fmt_row(row))
1162 else:
1163 if isinstance(M[0], list):
1164 for row in M: _out.append(_fmt_row(row))
1165 else:
1166 _out.append(" [" + ", ".join("%.6g" % x for x in M) + "]")
1167
1168def _det_py(m):
1169 n = len(m); m = [list(r) for r in m]; sign = 1
1170 for i in range(n):
1171 p = max(range(i, n), key=lambda r: abs(m[r][i]))
1172 if abs(m[p][i]) < 1e-12: return 0.0
1173 if p != i: m[i], m[p] = m[p], m[i]; sign *= -1
1174 for j in range(i+1, n):
1175 f = m[j][i] / m[i][i]
1176 for k in range(i, n): m[j][k] -= f * m[i][k]
1177 d = sign
1178 for i in range(n): d *= m[i][i]
1179 return d
1180
1181def _matmul_py(A, B):
1182 n, m = len(A), len(A[0])
1183 if isinstance(B[0], list):
1184 p = len(B[0])
1185 return [[sum(A[i][k]*B[k][j] for k in range(m)) for j in range(p)] for i in range(n)]
1186 return [sum(A[i][k]*B[k] for k in range(m)) for i in range(n)]
1187
1188if _op == "det":
1189 _out.append(_H2 + " Determinant")
1190 _out.append("")
1191 _d = float(_np.linalg.det(_A)) if _HAS_NP else _det_py(_a)
1192 _out.append("det(A) = %.10g" % _d)
1193 if _HAS_NP:
1194 _out.append("Shape: %dx%d" % (_A.shape[0], _A.shape[1]))
1195
1196elif _op == "invert":
1197 if not _HAS_NP:
1198 print("ERROR: invert requires numpy (pip install numpy)"); sys.exit(1)
1199 _out.append(_H2 + " Matrix Inverse")
1200 _out.append("")
1201 try:
1202 _R = _np.linalg.inv(_A)
1203 _pp(_R)
1204 _out.append("")
1205 _out.append("Condition number: %.4g" % _np.linalg.cond(_A))
1206 except _np.linalg.LinAlgError as _e:
1207 print("ERROR: " + str(_e)); sys.exit(1)
1208
1209elif _op == "eigenvalues":
1210 if not _HAS_NP:
1211 print("ERROR: eigenvalues requires numpy (pip install numpy)"); sys.exit(1)
1212 _out.append(_H2 + " Eigenvalues & Eigenvectors")
1213 _out.append("")
1214 _evals, _evecs = _np.linalg.eig(_A)
1215 for i, (ev, vec) in enumerate(zip(_evals, _evecs.T)):
1216 if abs(ev.imag) < 1e-10:
1217 _out.append("lambda_%d = %.8g" % (i+1, ev.real))
1218 else:
1219 _out.append("lambda_%d = %.6g + %.6gi" % (i+1, ev.real, ev.imag))
1220 _out.append(" eigenvector: [" + ", ".join("%.4f" % x.real for x in vec) + "]")
1221
1222elif _op == "solve":
1223 if _b is None:
1224 print("ERROR: solve requires 'b' (right-hand side vector or matrix)"); sys.exit(1)
1225 if not _HAS_NP:
1226 print("ERROR: solve requires numpy (pip install numpy)"); sys.exit(1)
1227 _out.append(_H2 + " Solution to Ax = b")
1228 _out.append("")
1229 try:
1230 _x = _np.linalg.solve(_A, _B.flatten() if _B.ndim > 1 else _B)
1231 _out.append("x = [" + ", ".join("%.8g" % v for v in _x) + "]")
1232 _out.append("")
1233 _out.append("Residual ||Ax-b||: %.2e" % float(_np.linalg.norm(_A @ _x - _B.flatten())))
1234 except _np.linalg.LinAlgError as _e:
1235 print("ERROR: " + str(_e)); sys.exit(1)
1236
1237elif _op == "transpose":
1238 _out.append(_H2 + " Transpose")
1239 _out.append("")
1240 if _HAS_NP:
1241 _pp(_A.T)
1242 else:
1243 _pp([[_a[j][i] for j in range(len(_a))] for i in range(len(_a[0]))])
1244
1245elif _op == "multiply":
1246 if _b is None:
1247 print("ERROR: multiply requires both 'a' and 'b'"); sys.exit(1)
1248 _out.append(_H2 + " Matrix Product (A @ B)")
1249 _out.append("")
1250 if _HAS_NP:
1251 _pp(_A @ _B)
1252 else:
1253 _pp(_matmul_py(_a, _b))
1254
1255elif _op == "rank":
1256 if not _HAS_NP:
1257 print("ERROR: rank requires numpy (pip install numpy)"); sys.exit(1)
1258 _out.append(_H2 + " Matrix Rank")
1259 _out.append("")
1260 _out.append("rank(A) = %d" % _np.linalg.matrix_rank(_A))
1261 _out.append("Shape: %dx%d" % (_A.shape[0], _A.shape[1]))
1262
1263elif _op == "svd":
1264 if not _HAS_NP:
1265 print("ERROR: SVD requires numpy (pip install numpy)"); sys.exit(1)
1266 _out.append(_H2 + " Singular Value Decomposition")
1267 _out.append("")
1268 _U, _S, _Vt = _np.linalg.svd(_A)
1269 _out.append("Singular values: [" + ", ".join("%.6g" % s for s in _S) + "]")
1270 _out.append("Rank (numerical): %d" % _np.linalg.matrix_rank(_A))
1271 _out.append("")
1272 _out.append("U (%dx%d):" % (_U.shape[0], _U.shape[1]))
1273 _pp(_U)
1274 _out.append("Vt (%dx%d):" % (_Vt.shape[0], _Vt.shape[1]))
1275 _pp(_Vt)
1276
1277else:
1278 print("ERROR: unknown operation '%s'. Supported: det, invert, eigenvalues, solve, transpose, multiply, rank, svd" % _op)
1279 sys.exit(1)
1280
1281print("\n".join(_out))
1282"####,
1283 operation = operation,
1284 a_json = a_json,
1285 b_json = b_json,
1286 );
1287
1288 let sandbox_args = serde_json::json!({
1289 "language": "python",
1290 "code": script,
1291 "timeout_seconds": 20
1292 });
1293 crate::tools::code_sandbox::execute(&sandbox_args).await
1294}
1295
1296const UNIT_TABLE_PY: &str = r####"
1300_U = {}
1301def _r(names, factor, cat):
1302 for n in names: _U[n] = (factor, cat)
1303
1304# Length (SI base: metre)
1305_r(['m','meter','meters','metre','metres'], 1.0, 'length')
1306_r(['km','kilometer','kilometers','kilometre','kilometres'], 1e3, 'length')
1307_r(['cm','centimeter','centimeters'], 1e-2, 'length')
1308_r(['mm','millimeter','millimeters'], 1e-3, 'length')
1309_r(['um','micrometer','micron','microns'], 1e-6, 'length')
1310_r(['nm','nanometer','nanometers'], 1e-9, 'length')
1311_r(['pm','picometer'], 1e-12, 'length')
1312_r(['in','inch','inches'], 0.0254, 'length')
1313_r(['ft','foot','feet'], 0.3048, 'length')
1314_r(['yd','yard','yards'], 0.9144, 'length')
1315_r(['mi','mile','miles'], 1609.344, 'length')
1316_r(['nmi','nautical_mile','nautical_miles'], 1852.0, 'length')
1317_r(['ly','lightyear','light_year','lightyears'], 9.4607304725808e15, 'length')
1318_r(['au','astronomical_unit'], 1.495978707e11, 'length')
1319_r(['pc','parsec','parsecs'], 3.085677581e16, 'length')
1320_r(['ang','angstrom'], 1e-10, 'length')
1321_r(['fathom','fathoms'], 1.8288, 'length')
1322# Mass (SI base: kilogram)
1323_r(['kg','kilogram','kilograms'], 1.0, 'mass')
1324_r(['g','gram','grams'], 1e-3, 'mass')
1325_r(['mg','milligram','milligrams'], 1e-6, 'mass')
1326_r(['ug','microgram','micrograms'], 1e-9, 'mass')
1327_r(['t','tonne','metric_ton','metric_tons'], 1e3, 'mass')
1328_r(['lb','lbs','pound','pounds'], 0.45359237, 'mass')
1329_r(['oz','ounce','ounces'], 0.028349523125, 'mass')
1330_r(['ton','short_ton'], 907.18474, 'mass')
1331_r(['long_ton'], 1016.0469088, 'mass')
1332_r(['stone','stones'], 6.35029318, 'mass')
1333_r(['slug','slugs'], 14.593903, 'mass')
1334_r(['carat','carats','ct'], 2e-4, 'mass')
1335# Time (SI base: second)
1336_r(['s','sec','second','seconds'], 1.0, 'time')
1337_r(['ms','millisecond','milliseconds'], 1e-3, 'time')
1338_r(['us','microsecond','microseconds'], 1e-6, 'time')
1339_r(['ns','nanosecond','nanoseconds'], 1e-9, 'time')
1340_r(['min','minute','minutes'], 60.0, 'time')
1341_r(['h','hr','hour','hours'], 3600.0, 'time')
1342_r(['d','day','days'], 86400.0, 'time')
1343_r(['wk','week','weeks'], 604800.0, 'time')
1344_r(['month','months'], 2629746.0, 'time')
1345_r(['yr','year','years'], 31556952.0, 'time')
1346_r(['decade','decades'], 315569520.0, 'time')
1347_r(['century','centuries'], 3155695200.0, 'time')
1348# Speed (SI base: m/s)
1349_r(['m/s','mps','meters_per_second'], 1.0, 'speed')
1350_r(['km/h','kph','kmh','kilometers_per_hour'], 1.0/3.6, 'speed')
1351_r(['mph','miles_per_hour'], 0.44704, 'speed')
1352_r(['knot','knots','kn'], 0.514444, 'speed')
1353_r(['ft/s','fps','feet_per_second'], 0.3048, 'speed')
1354_r(['mach'], 340.29, 'speed')
1355_r(['c_speed','speed_of_light'], 299792458.0, 'speed')
1356# Energy (SI base: joule)
1357_r(['j','joule','joules'], 1.0, 'energy')
1358_r(['kj','kilojoule','kilojoules'], 1e3, 'energy')
1359_r(['mj','megajoule','megajoules'], 1e6, 'energy')
1360_r(['gj','gigajoule','gigajoules'], 1e9, 'energy')
1361_r(['cal','calorie','calories'], 4.184, 'energy')
1362_r(['kcal','kilocalorie','kilocalories','cal_food'], 4184.0, 'energy')
1363_r(['kwh','kw*h','kilowatt_hour','kilowatt_hours'], 3.6e6, 'energy')
1364_r(['mwh','megawatt_hour'], 3.6e9, 'energy')
1365_r(['ev','electronvolt','electronvolts'], 1.602176634e-19, 'energy')
1366_r(['btu','british_thermal_unit'], 1055.06, 'energy')
1367_r(['erg','ergs'], 1e-7, 'energy')
1368_r(['therm'], 1.05506e8, 'energy')
1369# Power (SI base: watt)
1370_r(['w','watt','watts'], 1.0, 'power')
1371_r(['kw','kilowatt','kilowatts'], 1e3, 'power')
1372_r(['mw','megawatt','megawatts'], 1e6, 'power')
1373_r(['gw','gigawatt','gigawatts'], 1e9, 'power')
1374_r(['hp','horsepower'], 745.69987, 'power')
1375_r(['ps','metric_horsepower'], 735.49875, 'power')
1376_r(['btu/h','btu_per_hour'], 0.293071, 'power')
1377# Pressure (SI base: pascal)
1378_r(['pa','pascal','pascals'], 1.0, 'pressure')
1379_r(['kpa','kilopascal','kilopascals'], 1e3, 'pressure')
1380_r(['mpa','megapascal','megapascals'], 1e6, 'pressure')
1381_r(['gpa','gigapascal','gigapascals'], 1e9, 'pressure')
1382_r(['atm','atmosphere','atmospheres'], 101325.0, 'pressure')
1383_r(['bar','bars'], 1e5, 'pressure')
1384_r(['mbar','millibar','millibars'], 100.0, 'pressure')
1385_r(['psi','pounds_per_square_inch'], 6894.757, 'pressure')
1386_r(['mmhg','torr'], 133.322, 'pressure')
1387_r(['inhg','inches_of_mercury'], 3386.39, 'pressure')
1388_r(['atm_tech','at','technical_atmosphere'], 98066.5, 'pressure')
1389# Temperature — special (handled separately, marker category)
1390_r(['c','celsius','degc','deg_c'], ('temp', 'C'), 'temperature')
1391_r(['f','fahrenheit','degf','deg_f'], ('temp', 'F'), 'temperature')
1392_r(['k','kelvin','degk','deg_k'], ('temp', 'K'), 'temperature')
1393_r(['r','rankine','degr','deg_r'], ('temp', 'R'), 'temperature')
1394# Volume (SI base: litre)
1395_r(['l','liter','liters','litre','litres'], 1.0, 'volume')
1396_r(['ml','milliliter','milliliters'], 1e-3, 'volume')
1397_r(['cl','centiliter','centiliters'], 1e-2, 'volume')
1398_r(['dl','deciliter','deciliters'], 0.1, 'volume')
1399_r(['ul','microliter','microliters'], 1e-6, 'volume')
1400_r(['m3','cubic_meter','cubic_meters'], 1e3, 'volume')
1401_r(['cm3','cc','cubic_centimeter'], 1e-3, 'volume')
1402_r(['mm3','cubic_millimeter'], 1e-6, 'volume')
1403_r(['gal','gallon','gallons','us_gal'], 3.785411784, 'volume')
1404_r(['qt','quart','quarts'], 0.946352946, 'volume')
1405_r(['pt','pint','pints'], 0.473176473, 'volume')
1406_r(['cup','cups'], 0.2365882365, 'volume')
1407_r(['fl_oz','fluid_ounce','fluid_ounces'], 0.0295735296, 'volume')
1408_r(['tsp','teaspoon','teaspoons'], 0.00492892, 'volume')
1409_r(['tbsp','tablespoon','tablespoons'], 0.01478676, 'volume')
1410_r(['imp_gal','imperial_gallon','imperial_gallons'], 4.54609, 'volume')
1411_r(['barrel','bbl'], 158.9873, 'volume')
1412# Area (SI base: square metre)
1413_r(['m2','sq_m','square_meter','square_meters'], 1.0, 'area')
1414_r(['km2','square_kilometer','square_kilometers'], 1e6, 'area')
1415_r(['cm2','square_centimeter'], 1e-4, 'area')
1416_r(['mm2','square_millimeter'], 1e-6, 'area')
1417_r(['ft2','sq_ft','square_foot','square_feet'], 0.09290304, 'area')
1418_r(['in2','sq_in','square_inch','square_inches'], 6.4516e-4, 'area')
1419_r(['yd2','sq_yd','square_yard','square_yards'], 0.83612736, 'area')
1420_r(['mi2','square_mile','square_miles'], 2589988.11, 'area')
1421_r(['acre','acres'], 4046.8564224, 'area')
1422_r(['ha','hectare','hectares'], 1e4, 'area')
1423# Digital storage (SI base: byte)
1424_r(['bit','bits'], 0.125, 'digital')
1425_r(['b','byte','bytes'], 1.0, 'digital')
1426_r(['kb','kilobyte','kilobytes'], 1e3, 'digital')
1427_r(['mb','megabyte','megabytes'], 1e6, 'digital')
1428_r(['gb','gigabyte','gigabytes'], 1e9, 'digital')
1429_r(['tb','terabyte','terabytes'], 1e12, 'digital')
1430_r(['pb','petabyte','petabytes'], 1e15, 'digital')
1431_r(['kib','kibibyte','kibibytes'], 1024.0, 'digital')
1432_r(['mib','mebibyte','mebibytes'], 1048576.0, 'digital')
1433_r(['gib','gibibyte','gibibytes'], 1073741824.0, 'digital')
1434_r(['tib','tebibyte','tebibytes'], 1099511627776.0, 'digital')
1435# Force (SI base: newton)
1436_r(['n','newton','newtons'], 1.0, 'force')
1437_r(['kn','kilonewton','kilonewtons'], 1e3, 'force')
1438_r(['mn_force','meganewton'], 1e6, 'force')
1439_r(['lbf','pound_force','pounds_force'], 4.44822, 'force')
1440_r(['kgf','kilogram_force'], 9.80665, 'force')
1441_r(['dyn','dyne','dynes'], 1e-5, 'force')
1442# Frequency (SI base: Hz)
1443_r(['hz','hertz'], 1.0, 'frequency')
1444_r(['khz','kilohertz'], 1e3, 'frequency')
1445_r(['mhz','megahertz'], 1e6, 'frequency')
1446_r(['ghz','gigahertz'], 1e9, 'frequency')
1447_r(['thz','terahertz'], 1e12, 'frequency')
1448_r(['rpm','rev_per_min','revolutions_per_minute'], 1.0/60, 'frequency')
1449# Angle (SI base: radian)
1450_r(['rad','radian','radians'], 1.0, 'angle')
1451_r(['deg','degree','degrees'], 3.14159265358979/180, 'angle')
1452_r(['grad','gradian','gradians'], 3.14159265358979/200, 'angle')
1453_r(['arcmin','arcminute','arcminutes'], 3.14159265358979/10800, 'angle')
1454_r(['arcsec','arcsecond','arcseconds'], 3.14159265358979/648000, 'angle')
1455_r(['rev','revolution','revolutions','turn','turns'], 2*3.14159265358979, 'angle')
1456
1457def _to_celsius(v, scale):
1458 if scale=='C': return v
1459 if scale=='F': return (v-32)*5/9
1460 if scale=='K': return v-273.15
1461 if scale=='R': return (v-491.67)*5/9
1462 return None
1463
1464def _from_celsius(c, scale):
1465 if scale=='C': return c
1466 if scale=='F': return c*9/5+32
1467 if scale=='K': return c+273.15
1468 if scale=='R': return (c+273.15)*9/5
1469 return None
1470
1471def _convert(val, from_u, to_u):
1472 _fk = from_u.lower().strip().replace(' ','_').replace('/','/')
1473 _tk = to_u.lower().strip().replace(' ','_').replace('/','/')
1474 _fi = _U.get(_fk)
1475 _ti = _U.get(_tk)
1476 if _fi is None: return None, "Unknown unit: " + from_u
1477 if _ti is None: return None, "Unknown unit: " + to_u
1478 if _fi[1] == 'temperature' or _ti[1] == 'temperature':
1479 if _fi[1] != 'temperature' or _ti[1] != 'temperature':
1480 return None, "Cannot mix temperature and non-temperature units"
1481 _c = _to_celsius(val, _fi[0][1])
1482 return _from_celsius(_c, _ti[0][1]), None
1483 if _fi[1] != _ti[1]:
1484 return None, "Dimension mismatch: %s (%s) vs %s (%s)" % (from_u, _fi[1], to_u, _ti[1])
1485 return val * _fi[0] / _ti[0], None
1486"####;
1487
1488pub async fn convert_units(expr: &str) -> Result<String, String> {
1489 if expr.trim().is_empty() {
1490 return Err("No expression provided.".into());
1491 }
1492 let safe_expr = expr.replace('\\', "\\\\").replace('"', "\\\"");
1493
1494 let script = format!(
1495 r####"{unit_table}
1496import re as _re, sys, math
1497
1498_raw = "{safe_expr}"
1499_expr = _raw.strip()
1500
1501# ── Number base conversion (prefix check) ────────────────────────────
1502_bm = _re.match(
1503 r'^(0x[0-9a-fA-F]+|0b[01]+|0o[0-7]+|\d+)\s+to\s+(hex(?:adecimal)?|dec(?:imal)?|bin(?:ary)?|oct(?:al)?)\s*$',
1504 _expr, _re.I)
1505if _bm:
1506 _bv, _bt = _bm.group(1), _bm.group(2).lower()
1507 try:
1508 _n = int(_bv, 0)
1509 if _bt.startswith('hex'): _out = hex(_n)
1510 elif _bt.startswith('bin'): _out = bin(_n)
1511 elif _bt.startswith('oct'): _out = oct(_n)
1512 else: _out = str(_n)
1513 print("%s = %s" % (_bv, _out))
1514 except ValueError as _e:
1515 print("Error: " + str(_e)); sys.exit(1)
1516 sys.exit(0)
1517
1518_m = _re.match(
1519 r'^([\d.,eE+\-]+)\s+(.+?)\s+(?:to|->|=|in)\s+(.+)$', _expr, _re.I)
1520if not _m:
1521 print("Format: VALUE UNIT to UNIT")
1522 print("Examples: 100 mph to km/h | 72 F to C | 1 lightyear to km | 5 kg to lbs")
1523 sys.exit(1)
1524
1525_val = float(_m.group(1).replace(',',''))
1526_from = _m.group(2).strip()
1527_to = _m.group(3).strip()
1528
1529_result, _err = _convert(_val, _from, _to)
1530if _err:
1531 print("Error: " + _err)
1532 sys.exit(1)
1533
1534def _fmtv(v):
1535 if v == 0: return "0"
1536 if abs(v) >= 1e12 or (abs(v) < 1e-4 and abs(v) > 0):
1537 return "%.6e" % v
1538 if v == int(v) and abs(v) < 1e15: return str(int(v))
1539 return "%.10g" % v
1540
1541print("%s %s = %s %s" % (_fmtv(_val), _from, _fmtv(_result), _to))
1542"####,
1543 unit_table = UNIT_TABLE_PY,
1544 safe_expr = safe_expr,
1545 );
1546
1547 let sandbox_args = serde_json::json!({
1548 "language": "python",
1549 "code": script,
1550 "timeout_seconds": 15
1551 });
1552 crate::tools::code_sandbox::execute(&sandbox_args).await
1553}
1554
1555pub async fn plot_dataset(
1558 path_str: &str,
1559 plot_type: &str,
1560 x_col: &str,
1561 y_col: &str,
1562 out_path: &str,
1563) -> Result<String, String> {
1564 let safe_path = path_str.replace('\\', "\\\\").replace('"', "\\\"");
1565 let safe_out = out_path.replace('\\', "\\\\").replace('"', "\\\"");
1566 let safe_x = x_col.replace('"', "\\\"");
1567 let safe_y = y_col.replace('"', "\\\"");
1568
1569 let script = format!(
1570 r####"import os, sys, csv as _csv, sqlite3 as _sql3
1571
1572os.environ['MPLBACKEND'] = 'Agg'
1573os.environ['MPLCONFIGDIR'] = os.environ.get('TEMP', os.environ.get('TMP', '/tmp')) + '/hematite_mpl'
1574
1575_path = "{safe_path}"
1576_out_path = "{safe_out}"
1577_plot_type = "{plot_type}"
1578_x_col = "{safe_x}"
1579_y_col = "{safe_y}"
1580_ext = os.path.splitext(_path)[1].lower().lstrip('.')
1581_data = []
1582
1583if _ext in ('csv', 'tsv'):
1584 _delim = '\t' if _ext == 'tsv' else ','
1585 with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
1586 _rdr = _csv.DictReader(_fh, delimiter=_delim)
1587 for _i, _r in enumerate(_rdr):
1588 if _i >= 10000: break
1589 _data.append(_r)
1590elif _ext == 'json':
1591 with open(_path, encoding='utf-8') as _fh:
1592 _raw2 = json.load(_fh)
1593 _data = _raw2[:10000] if isinstance(_raw2, list) else list(_raw2.values())[0][:10000] if isinstance(_raw2, dict) else []
1594elif _ext in ('db','sqlite','sqlite3'):
1595 with _sql3.connect(_path) as _con:
1596 _cur = _con.cursor()
1597 _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
1598 _t = _cur.fetchone()
1599 if _t:
1600 _cur.execute("SELECT * FROM [%s] LIMIT 10000" % _t[0])
1601 _cs = [_d[0] for _d in _cur.description]
1602 _data = [dict(zip(_cs, _r)) for _r in _cur.fetchall()]
1603else:
1604 print("ERROR: unsupported format"); sys.exit(1)
1605
1606if not _data:
1607 print("No data found."); sys.exit(1)
1608
1609_cols = list(_data[0].keys())
1610
1611def _tryf(v):
1612 try: return float(str(v or '').replace(',','').strip())
1613 except: return None
1614
1615_num_cols = []
1616for _c in _cols:
1617 _s = [_tryf(_r.get(_c)) for _r in _data[:200]]
1618 if sum(1 for x in _s if x is not None) >= len(_s)*0.8: _num_cols.append(_c)
1619
1620_x_col2 = _x_col or (_num_cols[0] if _num_cols else _cols[0])
1621_y_col2 = _y_col or (_num_cols[1] if len(_num_cols) > 1 else None)
1622
1623_x_vals = [_tryf(_r.get(_x_col2)) for _r in _data]
1624_x_vals = [v for v in _x_vals if v is not None]
1625_y_vals = []
1626if _y_col2:
1627 _y_vals = [_tryf(_r.get(_y_col2)) for _r in _data]
1628 _y_vals = [v for v in _y_vals if v is not None]
1629
1630_title = os.path.basename(_path)
1631if _y_col2:
1632 _sub = "%s vs %s" % (_x_col2, _y_col2)
1633else:
1634 _sub = _x_col2
1635
1636# ── Attempt matplotlib ────────────────────────────────────────────────
1637_used_mpl = False
1638_svg_str = ""
1639try:
1640 import matplotlib
1641 matplotlib.use('Agg')
1642 import matplotlib.pyplot as _plt
1643 _fig, _ax = _plt.subplots(figsize=(8, 5))
1644 _fig.patch.set_facecolor('#0d0d1a')
1645 _ax.set_facecolor('#16213e')
1646 for _sp in _ax.spines.values(): _sp.set_color('#444')
1647 _ax.tick_params(colors='#999', labelsize=9)
1648 _ax.xaxis.label.set_color('#bbb')
1649 _ax.yaxis.label.set_color('#bbb')
1650 _ax.title.set_color('#7fc3ff')
1651 _C = '#4a9eff'
1652 if _plot_type == 'histogram':
1653 _ax.hist(_x_vals, bins=min(40, max(10, int(len(_x_vals)**0.5)+1)),
1654 color=_C, alpha=0.85, edgecolor='#0d0d1a')
1655 _ax.set_xlabel(_x_col2); _ax.set_ylabel('Count')
1656 _ax.set_title('Histogram — ' + _x_col2)
1657 elif _plot_type in ('scatter',''):
1658 _nx = min(len(_x_vals), len(_y_vals))
1659 _ax.scatter(_x_vals[:_nx], _y_vals[:_nx], color=_C, alpha=0.6, s=15)
1660 _ax.set_xlabel(_x_col2); _ax.set_ylabel(_y_col2 or '')
1661 _ax.set_title('Scatter — ' + _sub)
1662 elif _plot_type == 'line':
1663 _pairs = sorted(zip(_x_vals, _y_vals))
1664 _ax.plot([p[0] for p in _pairs], [p[1] for p in _pairs], color=_C, lw=1.5)
1665 _ax.set_xlabel(_x_col2); _ax.set_ylabel(_y_col2 or '')
1666 _ax.set_title('Line — ' + _sub)
1667 elif _plot_type == 'bar':
1668 from collections import Counter as _Ctr
1669 _raw_x = [str(_r.get(_x_col2, '') or '').strip() for _r in _data if _r.get(_x_col2)]
1670 _ct = _Ctr(_raw_x)
1671 _lbls = [k for k, _ in _ct.most_common(20)]
1672 _vals2 = [_ct[k] for k in _lbls]
1673 _ax.bar(range(len(_lbls)), _vals2, color=_C, alpha=0.85)
1674 _ax.set_xticks(list(range(len(_lbls))))
1675 _ax.set_xticklabels(_lbls, rotation=40, ha='right', fontsize=8)
1676 _ax.set_title('Bar — ' + _x_col2)
1677 from io import StringIO as _SIO
1678 _buf = _SIO()
1679 _fig.tight_layout(pad=1.2)
1680 _fig.savefig(_buf, format='svg', bbox_inches='tight', facecolor=_fig.get_facecolor())
1681 _plt.close(_fig)
1682 _sv = _buf.getvalue()
1683 _svg_str = _sv[_sv.find('<svg'):]
1684 _used_mpl = True
1685except Exception:
1686 pass
1687
1688# ── Pure-Python SVG fallback ──────────────────────────────────────────
1689if not _used_mpl:
1690 def _hist_svg(vals, lbl, W=640, H=380):
1691 if not vals: return ""
1692 mn, mx = min(vals), max(vals)
1693 if mn == mx: mn -= 0.5; mx += 0.5
1694 nb = min(30, max(8, int(len(vals)**0.5)+1))
1695 bw2 = (mx-mn)/nb
1696 bins = [0]*nb
1697 for v in vals:
1698 i = min(int((v-mn)/bw2), nb-1)
1699 bins[i] += 1
1700 mc = max(bins) or 1
1701 P=50; PW=W-2*P; PH=H-2*P
1702 rects = ''.join(
1703 '<rect x="%.1f" y="%.1f" width="%.1f" height="%.1f" fill="#4a9eff" opacity=".82"/>' %
1704 (P+i*PW/nb, P+PH-bins[i]/mc*PH, max(PW/nb-1,1), bins[i]/mc*PH)
1705 for i in range(nb))
1706 xt = ''.join('<text x="%.1f" y="%d" text-anchor="middle" font-size="10" fill="#888">%.3g</text>' %
1707 (P+k*PW/4, H-8, mn+(mx-mn)*k/4) for k in range(5))
1708 yt = ''.join('<text x="%d" y="%.1f" text-anchor="end" font-size="10" fill="#888">%d</text>' %
1709 (P-4, P+PH-k*PH/4+4, int(mc*k/4)) for k in range(5))
1710 axs = '<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/><line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/>'%(P,P,P,P+PH,P,P+PH,P+PW,P+PH)
1711 ttl = '<text x="%d" y="22" text-anchor="middle" font-size="13" fill="#7fc3ff" font-weight="bold">Histogram — %s</text>'%(W//2,lbl[:50])
1712 return '<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" style="background:#16213e">%s%s%s%s%s</svg>'%(W,H,ttl,axs,rects,xt,yt)
1713
1714 def _scatter_svg(xs, ys, xl, yl, W=640, H=400):
1715 if not xs or not ys: return ""
1716 xmn,xmx=min(xs),max(xs); ymn,ymx=min(ys),max(ys)
1717 if xmn==xmx: xmn-=1;xmx+=1
1718 if ymn==ymx: ymn-=1;ymx+=1
1719 P=60; PW=W-2*P; PH=H-2*P
1720 def xp(v): return P+(v-xmn)/(xmx-xmn)*PW
1721 def yp(v): return P+PH-(v-ymn)/(ymx-ymn)*PH
1722 dots=''.join('<circle cx="%.1f" cy="%.1f" r="3" fill="#4a9eff" opacity=".65"/>'%(xp(x),yp(y)) for x,y in zip(xs[:3000],ys[:3000]))
1723 axs='<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/><line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/>'%(P,P,P,P+PH,P,P+PH,P+PW,P+PH)
1724 xt=''.join('<text x="%.1f" y="%d" text-anchor="middle" font-size="10" fill="#888">%.3g</text>'%(P+k*PW/4,P+PH+16,xmn+(xmx-xmn)*k/4) for k in range(5))
1725 yt=''.join('<text x="%d" y="%.1f" text-anchor="end" font-size="10" fill="#888">%.3g</text>'%(P-4,P+PH-k*PH/4+4,ymn+(ymx-ymn)*k/4) for k in range(5))
1726 xl2='<text x="%d" y="%d" text-anchor="middle" font-size="11" fill="#bbb">%s</text>'%(W//2,H-2,xl[:40])
1727 ttl='<text x="%d" y="20" text-anchor="middle" font-size="13" fill="#7fc3ff" font-weight="bold">Scatter — %s vs %s</text>'%(W//2,xl[:25],yl[:25])
1728 return '<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" style="background:#16213e">%s%s%s%s%s%s</svg>'%(W,H,ttl,axs,dots,xt,yt,xl2)
1729
1730 if _plot_type in ('scatter','line') and _x_vals and _y_vals:
1731 _nx = min(len(_x_vals), len(_y_vals))
1732 _svg_str = _scatter_svg(_x_vals[:_nx], _y_vals[:_nx], _x_col2, _y_col2 or '')
1733 else:
1734 _svg_str = _hist_svg(_x_vals, _x_col2)
1735
1736# ── Write HTML ────────────────────────────────────────────────────────
1737_engine = "matplotlib" if _used_mpl else "pure-Python SVG"
1738_html = (
1739 "<!DOCTYPE html><html><head><meta charset='utf-8'><title>" + _title + "</title>"
1740 "<style>body{{background:#0d0d1a;color:#e0e0e0;font-family:monospace;padding:24px;margin:0}}"
1741 "h2{{color:#7fc3ff;margin-bottom:4px}}p{{color:#666;font-size:.85em;margin:0 0 20px}}"
1742 ".chart{{display:block;margin:0 auto;max-width:700px}}</style></head><body>"
1743 "<h2>" + _title + " — " + _sub + "</h2>"
1744 "<p>Generated by Hematite · engine: " + _engine + " · n=" + str(len(_x_vals)) + " rows</p>"
1745 "<div class='chart'>" + _svg_str + "</div>"
1746 "</body></html>"
1747)
1748os.makedirs(os.path.dirname(_out_path), exist_ok=True)
1749with open(_out_path, 'w', encoding='utf-8') as _f:
1750 _f.write(_html)
1751print(_out_path)
1752"####,
1753 safe_path = safe_path,
1754 safe_out = safe_out,
1755 plot_type = plot_type,
1756 safe_x = safe_x,
1757 safe_y = safe_y,
1758 );
1759
1760 let sandbox_args = serde_json::json!({
1761 "language": "python",
1762 "code": script,
1763 "timeout_seconds": 30
1764 });
1765 crate::tools::code_sandbox::execute(&sandbox_args).await
1766}
1767
1768pub async fn query_data(file_path: &str, sql: &str) -> Result<String, String> {
1771 if file_path.trim().is_empty() {
1772 return Err("No data file specified.".into());
1773 }
1774 if sql.trim().is_empty() {
1775 return Err("No SQL query specified.".into());
1776 }
1777 let safe_path = file_path.replace('\\', "\\\\").replace('"', "\\\"");
1778 let sql_hex: String = sql.bytes().map(|b| format!("{:02x}", b)).collect();
1780
1781 let script = format!(
1782 r####"import sqlite3 as _sq, csv as _csv, json as _js, sys, os
1783
1784_path = "{safe_path}"
1785_sql = bytes.fromhex("{sql_hex}").decode()
1786_ext = os.path.splitext(_path)[1].lower().lstrip('.')
1787_con = _sq.connect(':memory:')
1788
1789def _load_csv(path, delim):
1790 with open(path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
1791 _rdr = _csv.DictReader(_fh, delimiter=delim)
1792 _rows = list(_rdr)
1793 if not _rows:
1794 print("No data in file."); sys.exit(1)
1795 _cols = list(_rows[0].keys())
1796 _con.execute('CREATE TABLE data (' + ', '.join('"' + c + '"' for c in _cols) + ')')
1797 _con.executemany(
1798 'INSERT INTO data VALUES (' + ','.join(['?'] * len(_cols)) + ')',
1799 [tuple(_r.get(c, '') for c in _cols) for _r in _rows])
1800
1801def _load_json(path):
1802 with open(path, encoding='utf-8') as _fh:
1803 _d = _js.load(_fh)
1804 _rows = _d if isinstance(_d, list) else next(iter(_d.values()), []) if isinstance(_d, dict) else []
1805 if not _rows:
1806 print("No rows found in JSON."); sys.exit(1)
1807 _cols = list(_rows[0].keys()) if isinstance(_rows[0], dict) else [str(i) for i in range(len(_rows[0]))]
1808 _con.execute('CREATE TABLE data (' + ', '.join('"' + c + '"' for c in _cols) + ')')
1809 _con.executemany(
1810 'INSERT INTO data VALUES (' + ','.join(['?'] * len(_cols)) + ')',
1811 [tuple(str(_r.get(c, '') if isinstance(_r, dict) else _r[i]) for i, c in enumerate(_cols)) for _r in _rows])
1812
1813try:
1814 if _ext == 'csv': _load_csv(_path, ',')
1815 elif _ext == 'tsv': _load_csv(_path, '\t')
1816 elif _ext == 'json': _load_json(_path)
1817 elif _ext in ('db','sqlite','sqlite3'):
1818 _src = _sq.connect(_path); _src.backup(_con); _src.close()
1819 else:
1820 print("Unsupported format: " + _ext + ". Use csv, tsv, json, or sqlite.")
1821 sys.exit(1)
1822except Exception as _e:
1823 print("Load error: " + str(_e), file=sys.stderr); sys.exit(1)
1824
1825try:
1826 _cur = _con.execute(_sql)
1827except Exception as _e:
1828 print("Query error: " + str(_e), file=sys.stderr); sys.exit(1)
1829
1830_hdrs = [_d[0] for _d in _cur.description] if _cur.description else []
1831_rows2 = _cur.fetchall()
1832_con.close()
1833
1834if not _rows2:
1835 print("(no rows returned)")
1836 sys.exit(0)
1837
1838_rs = [[str(c) if c is not None else 'NULL' for c in _r] for _r in _rows2[:2000]]
1839_ws = [max(len(_h), max((len(_r[_i]) for _r in _rs), default=0))
1840 for _i, _h in enumerate(_hdrs)]
1841_sep = '+-' + '-+-'.join('-' * _w for _w in _ws) + '-+'
1842_hr = '| ' + ' | '.join(_h.ljust(_ws[_i]) for _i, _h in enumerate(_hdrs)) + ' |'
1843print(_sep)
1844print(_hr)
1845print(_sep)
1846for _r in _rs:
1847 print('| ' + ' | '.join(_r[_i].ljust(_ws[_i]) for _i in range(len(_hdrs))) + ' |')
1848print(_sep)
1849_total = len(_rows2)
1850_label = str(_total) + (' rows' if _total != 1 else ' row')
1851if _total > 2000: _label += ' (showing first 2000)'
1852print('(' + _label + ')')
1853"####,
1854 safe_path = safe_path,
1855 sql_hex = sql_hex,
1856 );
1857
1858 let sandbox_args = serde_json::json!({
1859 "language": "python",
1860 "code": script,
1861 "timeout_seconds": 30
1862 });
1863 crate::tools::code_sandbox::execute(&sandbox_args).await
1864}
1865
1866const ELEMENTS_DATA: &str = r#"1|H|Hydrogen|1.008|nonmetal|1|1|2.20|G
18732|He|Helium|4.003|noble|1|18|0|G
18743|Li|Lithium|6.941|alkali|2|1|0.98|S
18754|Be|Beryllium|9.012|alkaline|2|2|1.57|S
18765|B|Boron|10.811|metalloid|2|13|2.04|S
18776|C|Carbon|12.011|nonmetal|2|14|2.55|S
18787|N|Nitrogen|14.007|nonmetal|2|15|3.04|G
18798|O|Oxygen|15.999|nonmetal|2|16|3.44|G
18809|F|Fluorine|18.998|halogen|2|17|3.98|G
188110|Ne|Neon|20.180|noble|2|18|0|G
188211|Na|Sodium|22.990|alkali|3|1|0.93|S
188312|Mg|Magnesium|24.305|alkaline|3|2|1.31|S
188413|Al|Aluminium|26.982|post-trans|3|13|1.61|S
188514|Si|Silicon|28.085|metalloid|3|14|1.90|S
188615|P|Phosphorus|30.974|nonmetal|3|15|2.19|S
188716|S|Sulfur|32.06|nonmetal|3|16|2.58|S
188817|Cl|Chlorine|35.45|halogen|3|17|3.16|G
188918|Ar|Argon|39.948|noble|3|18|0|G
189019|K|Potassium|39.098|alkali|4|1|0.82|S
189120|Ca|Calcium|40.078|alkaline|4|2|1.00|S
189221|Sc|Scandium|44.956|transition|4|3|1.36|S
189322|Ti|Titanium|47.867|transition|4|4|1.54|S
189423|V|Vanadium|50.942|transition|4|5|1.63|S
189524|Cr|Chromium|51.996|transition|4|6|1.66|S
189625|Mn|Manganese|54.938|transition|4|7|1.55|S
189726|Fe|Iron|55.845|transition|4|8|1.83|S
189827|Co|Cobalt|58.933|transition|4|9|1.88|S
189928|Ni|Nickel|58.693|transition|4|10|1.91|S
190029|Cu|Copper|63.546|transition|4|11|1.90|S
190130|Zn|Zinc|65.38|transition|4|12|1.65|S
190231|Ga|Gallium|69.723|post-trans|4|13|1.81|S
190332|Ge|Germanium|72.630|metalloid|4|14|2.01|S
190433|As|Arsenic|74.922|metalloid|4|15|2.18|S
190534|Se|Selenium|78.971|nonmetal|4|16|2.55|S
190635|Br|Bromine|79.904|halogen|4|17|2.96|L
190736|Kr|Krypton|83.798|noble|4|18|3.00|G
190837|Rb|Rubidium|85.468|alkali|5|1|0.82|S
190938|Sr|Strontium|87.62|alkaline|5|2|0.95|S
191039|Y|Yttrium|88.906|transition|5|3|1.22|S
191140|Zr|Zirconium|91.224|transition|5|4|1.33|S
191241|Nb|Niobium|92.906|transition|5|5|1.60|S
191342|Mo|Molybdenum|95.96|transition|5|6|2.16|S
191443|Tc|Technetium|98|transition|5|7|1.90|S
191544|Ru|Ruthenium|101.07|transition|5|8|2.20|S
191645|Rh|Rhodium|102.906|transition|5|9|2.28|S
191746|Pd|Palladium|106.42|transition|5|10|2.20|S
191847|Ag|Silver|107.868|transition|5|11|1.93|S
191948|Cd|Cadmium|112.414|transition|5|12|1.69|S
192049|In|Indium|114.818|post-trans|5|13|1.78|S
192150|Sn|Tin|118.710|post-trans|5|14|1.96|S
192251|Sb|Antimony|121.760|metalloid|5|15|2.05|S
192352|Te|Tellurium|127.60|metalloid|5|16|2.10|S
192453|I|Iodine|126.904|halogen|5|17|2.66|S
192554|Xe|Xenon|131.293|noble|5|18|2.60|G
192655|Cs|Caesium|132.905|alkali|6|1|0.79|S
192756|Ba|Barium|137.327|alkaline|6|2|0.89|S
192857|La|Lanthanum|138.905|lanthanide|6|0|1.10|S
192958|Ce|Cerium|140.116|lanthanide|6|0|1.12|S
193059|Pr|Praseodymium|140.908|lanthanide|6|0|1.13|S
193160|Nd|Neodymium|144.242|lanthanide|6|0|1.14|S
193261|Pm|Promethium|145|lanthanide|6|0|0|S
193362|Sm|Samarium|150.36|lanthanide|6|0|1.17|S
193463|Eu|Europium|151.964|lanthanide|6|0|0|S
193564|Gd|Gadolinium|157.25|lanthanide|6|0|1.20|S
193665|Tb|Terbium|158.925|lanthanide|6|0|0|S
193766|Dy|Dysprosium|162.500|lanthanide|6|0|1.22|S
193867|Ho|Holmium|164.930|lanthanide|6|0|1.23|S
193968|Er|Erbium|167.259|lanthanide|6|0|1.24|S
194069|Tm|Thulium|168.934|lanthanide|6|0|1.25|S
194170|Yb|Ytterbium|173.054|lanthanide|6|0|0|S
194271|Lu|Lutetium|174.967|lanthanide|6|0|1.27|S
194372|Hf|Hafnium|178.49|transition|6|4|1.30|S
194473|Ta|Tantalum|180.948|transition|6|5|1.50|S
194574|W|Tungsten|183.84|transition|6|6|2.36|S
194675|Re|Rhenium|186.207|transition|6|7|1.90|S
194776|Os|Osmium|190.23|transition|6|8|2.20|S
194877|Ir|Iridium|192.217|transition|6|9|2.20|S
194978|Pt|Platinum|195.084|transition|6|10|2.28|S
195079|Au|Gold|196.967|transition|6|11|2.54|S
195180|Hg|Mercury|200.592|transition|6|12|2.00|L
195281|Tl|Thallium|204.38|post-trans|6|13|1.62|S
195382|Pb|Lead|207.2|post-trans|6|14|2.33|S
195483|Bi|Bismuth|208.980|post-trans|6|15|2.02|S
195584|Po|Polonium|209|metalloid|6|16|2.00|S
195685|At|Astatine|210|halogen|6|17|2.20|S
195786|Rn|Radon|222|noble|6|18|0|G
195887|Fr|Francium|223|alkali|7|1|0.70|S
195988|Ra|Radium|226|alkaline|7|2|0.90|S
196089|Ac|Actinium|227|actinide|7|0|1.10|S
196190|Th|Thorium|232.038|actinide|7|0|1.30|S
196291|Pa|Protactinium|231.036|actinide|7|0|1.50|S
196392|U|Uranium|238.029|actinide|7|0|1.38|S
196493|Np|Neptunium|237|actinide|7|0|1.36|S
196594|Pu|Plutonium|244|actinide|7|0|1.28|S
196695|Am|Americium|243|actinide|7|0|1.30|S
196796|Cm|Curium|247|actinide|7|0|1.30|S
196897|Bk|Berkelium|247|actinide|7|0|1.30|S
196998|Cf|Californium|251|actinide|7|0|1.30|S
197099|Es|Einsteinium|252|actinide|7|0|1.30|S
1971100|Fm|Fermium|257|actinide|7|0|1.30|S
1972101|Md|Mendelevium|258|actinide|7|0|1.30|S
1973102|No|Nobelium|259|actinide|7|0|1.30|S
1974103|Lr|Lawrencium|266|actinide|7|0|0|S
1975104|Rf|Rutherfordium|267|transition|7|4|0|S
1976105|Db|Dubnium|268|transition|7|5|0|S
1977106|Sg|Seaborgium|271|transition|7|6|0|S
1978107|Bh|Bohrium|272|transition|7|7|0|S
1979108|Hs|Hassium|270|transition|7|8|0|S
1980109|Mt|Meitnerium|276|transition|7|9|0|S
1981110|Ds|Darmstadtium|281|transition|7|10|0|S
1982111|Rg|Roentgenium|280|transition|7|11|0|S
1983112|Cn|Copernicium|285|transition|7|12|0|S
1984113|Nh|Nihonium|284|post-trans|7|13|0|S
1985114|Fl|Flerovium|289|post-trans|7|14|0|S
1986115|Mc|Moscovium|288|post-trans|7|15|0|S
1987116|Lv|Livermorium|293|post-trans|7|16|0|S
1988117|Ts|Tennessine|294|halogen|7|17|0|S
1989118|Og|Oganesson|294|noble|7|18|0|G"#;
1990
1991pub fn lookup_element(query: &str) -> Result<String, String> {
1992 let q = query.trim();
1993 if q.is_empty() {
1994 return Err(
1995 "No element specified. Try a symbol (H, Au), name (Gold), or atomic number (79)."
1996 .into(),
1997 );
1998 }
1999 let q_lower = q.to_ascii_lowercase();
2000 let q_num: Option<u32> = q.parse().ok();
2001
2002 for line in ELEMENTS_DATA.lines() {
2003 let f: Vec<&str> = line.splitn(9, '|').collect();
2004 if f.len() < 9 {
2005 continue;
2006 }
2007 let z: u32 = f[0].parse().unwrap_or(0);
2008 let sym = f[1];
2009 let name = f[2];
2010
2011 let matched = (q_num == Some(z))
2012 || sym.eq_ignore_ascii_case(q)
2013 || name.to_ascii_lowercase().starts_with(&q_lower);
2014 if !matched {
2015 continue;
2016 }
2017
2018 let mass_raw = f[3];
2019 let cat_raw = f[4];
2020 let period = f[5];
2021 let group = f[6];
2022 let en_raw = f[7];
2023 let state_raw = f[8];
2024
2025 let category = match cat_raw {
2026 "alkali" => "Alkali Metal",
2027 "alkaline" => "Alkaline Earth Metal",
2028 "transition" => "Transition Metal",
2029 "post-trans" => "Post-Transition Metal",
2030 "metalloid" => "Metalloid",
2031 "nonmetal" => "Nonmetal",
2032 "halogen" => "Halogen",
2033 "noble" => "Noble Gas",
2034 "lanthanide" => "Lanthanide",
2035 "actinide" => "Actinide",
2036 other => other,
2037 };
2038 let group_disp = if group == "0" {
2039 match cat_raw {
2040 "lanthanide" => "La series",
2041 "actinide" => "Ac series",
2042 _ => "\u{2014}",
2043 }
2044 } else {
2045 group
2046 };
2047 let en_disp = if en_raw == "0" {
2048 "N/A".to_string()
2049 } else {
2050 format!("{} (Pauling)", en_raw)
2051 };
2052 let state_disp = match state_raw {
2053 "S" => "Solid",
2054 "L" => "Liquid",
2055 "G" => "Gas",
2056 _ => "Unknown",
2057 };
2058 let mass_disp = if mass_raw.contains('.') {
2059 format!("{} u", mass_raw)
2060 } else {
2061 format!("{} u (most stable isotope)", mass_raw)
2062 };
2063
2064 return Ok(format!(
2065 "{sym} {name} (Z = {z})\n\
2066 {sep}\n\
2067 Atomic Mass: {mass_disp}\n\
2068 Category: {category}\n\
2069 Period / Group: {period} / {group_disp}\n\
2070 Electronegativity: {en_disp}\n\
2071 State at STP: {state_disp}",
2072 sep = "\u{2500}".repeat(42),
2073 ));
2074 }
2075
2076 Err(format!(
2077 "Element '{}' not found.\nTry: symbol (H, Au, Fe), name (Gold, Iron), or atomic number (79, 26).",
2078 q
2079 ))
2080}
2081
2082pub async fn hash_input(input: &str, algo: &str) -> Result<String, String> {
2085 let safe_input = input.replace('\\', "\\\\").replace('"', "\\\"");
2086 let safe_algo = algo.trim().to_ascii_lowercase().replace('"', "");
2087
2088 let script = format!(
2089 r####"import hashlib, os, sys
2090
2091_target = "{safe_input}"
2092_algo = "{safe_algo}"
2093
2094_is_file = os.path.isfile(_target)
2095if _is_file:
2096 with open(_target, 'rb') as _fh:
2097 _data = _fh.read()
2098 _sz = len(_data)
2099 if _sz >= 1_048_576: _szlbl = "%.2f MB" % (_sz / 1_048_576)
2100 elif _sz >= 1024: _szlbl = "%.1f KB" % (_sz / 1024)
2101 else: _szlbl = str(_sz) + " bytes"
2102 _label = "File: " + _target + " (" + _szlbl + ")"
2103else:
2104 _data = _target.encode('utf-8')
2105 _label = 'Text: "' + _target + '"'
2106
2107_algos = ['md5', 'sha1', 'sha256', 'sha512'] if _algo in ('all', '') else [_algo]
2108print(_label)
2109print()
2110for _a in _algos:
2111 try:
2112 _h = hashlib.new(_a)
2113 _h.update(_data)
2114 print(_a.upper().ljust(10) + _h.hexdigest())
2115 except ValueError as _e:
2116 print(_a + ": " + str(_e), file=sys.stderr); sys.exit(1)
2117"####,
2118 safe_input = safe_input,
2119 safe_algo = safe_algo,
2120 );
2121
2122 let sandbox_args = serde_json::json!({
2123 "language": "python",
2124 "code": script,
2125 "timeout_seconds": 30
2126 });
2127 crate::tools::code_sandbox::execute(&sandbox_args).await
2128}
2129
2130pub async fn encode_decode(text: &str, codec: &str, is_decode: bool) -> Result<String, String> {
2133 let text_hex: String = text.bytes().map(|b| format!("{:02x}", b)).collect();
2134 let safe_codec = codec.trim().to_ascii_lowercase().replace('"', "");
2135 let mode = if is_decode { "decode" } else { "encode" };
2136
2137 let script = format!(
2138 r####"import base64 as _b64, binascii as _ba, sys
2139import urllib.parse as _up
2140
2141_text = bytes.fromhex("{text_hex}").decode('utf-8', errors='replace')
2142_codec = "{safe_codec}"
2143_mode = "{mode}"
2144_CODECS = "base64 hex url rot13 html binary"
2145
2146try:
2147 if _mode == "encode":
2148 if _codec in ("base64", "b64", ""):
2149 print(_b64.b64encode(_text.encode('utf-8')).decode())
2150 elif _codec in ("hex", "hexadecimal"):
2151 print(_ba.hexlify(_text.encode('utf-8')).decode())
2152 elif _codec in ("url", "urlencode", "percent"):
2153 print(_up.quote(_text, safe=''))
2154 elif _codec == "rot13":
2155 import codecs as _cd; print(_cd.encode(_text, 'rot_13'))
2156 elif _codec in ("html", "htmlentities"):
2157 import html as _ht; print(_ht.escape(_text))
2158 elif _codec in ("binary", "bin"):
2159 print(' '.join(bin(b)[2:].zfill(8) for b in _text.encode('utf-8')))
2160 else:
2161 print("Unknown codec: " + _codec + ". Supported: " + _CODECS, file=sys.stderr); sys.exit(1)
2162 else:
2163 if _codec in ("base64", "b64", ""):
2164 print(_b64.b64decode(_text.strip() + "==").decode('utf-8', errors='replace'))
2165 elif _codec in ("hex", "hexadecimal"):
2166 print(_ba.unhexlify(_text.replace(' ', '')).decode('utf-8', errors='replace'))
2167 elif _codec in ("url", "urlencode", "percent"):
2168 print(_up.unquote(_text))
2169 elif _codec == "rot13":
2170 import codecs as _cd; print(_cd.decode(_text, 'rot_13'))
2171 elif _codec in ("html", "htmlentities"):
2172 import html as _ht; print(_ht.unescape(_text))
2173 elif _codec in ("binary", "bin"):
2174 _bytes = bytes(int(b, 2) for b in _text.split() if b)
2175 print(_bytes.decode('utf-8', errors='replace'))
2176 else:
2177 print("Unknown codec: " + _codec + ". Supported: " + _CODECS, file=sys.stderr); sys.exit(1)
2178except Exception as _e:
2179 print("Error: " + str(_e), file=sys.stderr); sys.exit(1)
2180"####,
2181 text_hex = text_hex,
2182 safe_codec = safe_codec,
2183 mode = mode,
2184 );
2185
2186 let sandbox_args = serde_json::json!({
2187 "language": "python",
2188 "code": script,
2189 "timeout_seconds": 10
2190 });
2191 crate::tools::code_sandbox::execute(&sandbox_args).await
2192}