hematite/tools/
scientific.rs

1use serde_json::Value;
2
3pub async fn scientific_compute(args: &Value) -> Result<String, String> {
4    let mode = args["mode"].as_str().ok_or(
5        "Missing 'mode' (symbolic, units, complexity, ledger, dataset, regression, hypothesis, matrix)",
6    )?;
7
8    match mode {
9        "symbolic" => solve_symbolic(args).await,
10        "units" => verify_units(args).await,
11        "complexity" => audit_complexity(args).await,
12        "ledger" => manage_ledger(args).await,
13        "dataset" => calculate_on_dataset(args).await,
14        "regression" => run_regression(args).await,
15        "hypothesis" => run_hypothesis(args).await,
16        "matrix" => run_matrix(args).await,
17        _ => Err(format!("Unknown scientific mode: {}", mode)),
18    }
19}
20
21async fn solve_symbolic(args: &Value) -> Result<String, String> {
22    let expr = args["expr"]
23        .as_str()
24        .ok_or("Missing 'expr' for symbolic mode")?;
25    let target = args["target"].as_str().unwrap_or("solve"); // solve, simplify, integrate, diff
26    let latex = args["latex"].as_bool().unwrap_or(false);
27
28    let python_script = format!(
29        "import sympy\n\
30         from sympy import symbols, solve, simplify, integrate, diff, Eq, latex\n\
31         # Attempt to find symbols automatically\n\
32         import re\n\
33         raw_expr = r\"{}\"\n\
34         # Extract likely symbols (single letters or words starting with letter)\n\
35         sym_names = set(re.findall(r'\\b[a-zA-Z][a-zA-Z0-9]*\\b', raw_expr))\n\
36         # Remove common functions\n\
37         sym_names -= {{'sin', 'cos', 'tan', 'exp', 'log', 'sqrt', 'pi', 'E', 'oo', 'solve', 'simplify', 'integrate', 'diff'}}\n\
38         sym_dict = {{name: symbols(name) for name in sym_names}}\n\
39         \n\
40         try:\n\
41             if \"=\" in raw_expr and \"{}\" == \"solve\":\n\
42                 lhs, rhs = raw_expr.split(\"=\")\n\
43                 result = solve(Eq(eval(lhs, {{'__builtins__': None}}, sym_dict), eval(rhs, {{'__builtins__': None}}, sym_dict)))\n\
44             else:\n\
45                 expr_obj = eval(raw_expr, {{'__builtins__': None}}, sym_dict)\n\
46                 if \"{}\" == \"simplify\": result = simplify(expr_obj)\n\
47                 elif \"{}\" == \"integrate\": result = integrate(expr_obj)\n\
48                 elif \"{}\" == \"diff\": result = diff(expr_obj)\n\
49                 else: result = solve(expr_obj)\n\
50             \n\
51             print(f\"RESULT: {{result}}\")\n\
52             if {}:\n\
53                 print(f\"LATEX: {{latex(result)}}\")\n\
54         except Exception as e:\n\
55             print(f\"ERROR: {{e}}\")\n",
56        expr, target, target, target, target, latex
57    );
58
59    execute_in_sandbox(&python_script).await
60}
61
62async fn verify_units(args: &Value) -> Result<String, String> {
63    let calculation = args["calculation"]
64        .as_str()
65        .ok_or("Missing 'calculation' for units mode")?;
66
67    let python_script = format!(
68        "try:\n\
69         # Simple Unit System (SI focus)\n\
70         class UnitValue:\n\
71             def __init__(self, val, dims):\n\
72                 self.val = val\n\
73                 self.dims = dims # {{'m': 1, 's': -1, etc}}\n\
74             def __add__(self, other):\n\
75                 if self.dims != other.dims: raise ValueError(f\"Dimension mismatch: {{self.dims}} vs {{other.dims}}\")\n\
76                 return UnitValue(self.val + other.val, self.dims)\n\
77             def __mul__(self, other):\n\
78                 new_dims = self.dims.copy()\n\
79                 for k, v in other.dims.items(): new_dims[k] = new_dims.get(k, 0) + v\n\
80                 return UnitValue(self.val * other.val, new_dims)\n\
81             def __truediv__(self, other):\n\
82                 new_dims = self.dims.copy()\n\
83                 for k, v in other.dims.items(): new_dims[k] = new_dims.get(k, 0) - v\n\
84                 return UnitValue(self.val / other.val, new_dims)\n\
85             def __repr__(self): return f\"{{self.val}} ({{self.dims}})\"\n\
86         \n\
87         # Helper to parse strings like '10m'\n\
88         def u(s):\n\
89             m = __import__('re').match(r'([\\d\\.]+)([a-zA-Z]+)', s)\n\
90             val = float(m.group(1))\n\
91             unit = m.group(2)\n\
92             return UnitValue(val, {{unit: 1}})\n\
93         \n\
94         # Executing the calculation with unit objects\n\
95         # User input is expected to use u('10m') etc.\n\
96         raw_calc = r\"{}\"\n\
97         # Basic auto-wrap for units in the expression if they look like 10m\n\
98         wrapped = __import__('re').sub(r'(\\d+)([a-z]+)', r\"u('\\1\\2')\", raw_calc)\n\
99         result = eval(wrapped, {{'u': u}})\n\
100         print(f\"RESULT: {{result}}\")\n\
101         except Exception as e:\n\
102         print(f\"ERROR: {{e}}\")\n",
103        calculation
104    );
105
106    execute_in_sandbox(&python_script).await
107}
108
109async fn audit_complexity(args: &Value) -> Result<String, String> {
110    let snippet = args["snippet"]
111        .as_str()
112        .ok_or("Missing 'snippet' for complexity mode")?;
113
114    let python_script = format!(
115        "import time\n\
116         import math\n\
117         def run_target(n):\n\
118             {}\n\
119         \n\
120         samples = [10, 50, 100, 200, 500]\n\
121         times = []\n\
122         for n in samples:\n\
123             start = time.perf_counter()\n\
124             run_target(n)\n\
125             times.append(time.perf_counter() - start)\n\
126         \n\
127         # Simplified regression to guess Big-O\n\
128         # Compare growth rates: t/n, t/n^2, t/log(n)\n\
129         ratios_n = [t/n for t, n in zip(times, samples) if n > 0]\n\
130         ratios_n2 = [t/(n**2) for t, n in zip(times, samples) if n > 0]\n\
131         \n\
132         def variance(data):\n\
133             if not data: return 1.0\n\
134             avg = sum(data)/len(data)\n\
135             return sum((x-avg)**2 for x in data)/len(data)\n\
136         \n\
137         v_n = variance(ratios_n)\n\
138         v_n2 = variance(ratios_n2)\n\
139         \n\
140         if v_n < v_n2: complexity = \"O(N)\"\n\
141         elif v_n2 < v_n: complexity = \"O(N^2)\"\n\
142         else: complexity = \"O(Unknown)\"\n\
143         \n\
144         print(f\"RESULT: Empirically detected {{complexity}}\")\n\
145         print(f\"STATS: n={{samples}}, times={{[f'{{t:.6f}}s' for t in times]}}\")\n",
146        snippet.replace("\n", "\n    ")
147    );
148
149    execute_in_sandbox(&python_script).await
150}
151
152/// Headless dataset profiler — loads CSV / TSV / JSON / SQLite and produces a
153/// real computed statistical profile without requiring the model or a LIMIT clause.
154///
155/// The file is read directly inside the Python sandbox (no Rust-side JSON
156/// embedding), so even large files stay within the sandbox process limits.
157pub async fn analyze_dataset(path_str: &str) -> Result<String, String> {
158    if path_str.trim().is_empty() {
159        return Err("Missing file path for --analyze.".into());
160    }
161
162    // Escape backslashes (Windows paths) and double-quotes so the path can be
163    // safely embedded inside a Python double-quoted string literal.
164    let safe_path = path_str.replace('\\', "\\\\").replace('"', "\\\"");
165
166    let script = format!(
167        r####"import os, sys, csv as _csv, sqlite3 as _sql3
168from collections import Counter
169
170_path = "{safe_path}"
171_ext  = os.path.splitext(_path)[1].lower().lstrip('.')
172_data = []
173_col_order = None
174
175if _ext in ('csv', 'tsv'):
176    _delim = '\t' if _ext == 'tsv' else ','
177    try:
178        with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
179            _rdr = _csv.DictReader(_fh, delimiter=_delim)
180            _col_order = list(_rdr.fieldnames) if _rdr.fieldnames else []
181            for _i, _row in enumerate(_rdr):
182                if _i >= 5000: break
183                _data.append(dict(_row))
184    except Exception as _e:
185        print("ERROR loading file: " + str(_e))
186        sys.exit(1)
187elif _ext == 'json':
188    try:
189        with open(_path, encoding='utf-8') as _fh:
190            _raw = json.load(_fh)
191        if isinstance(_raw, list):
192            _data = _raw[:5000]
193        elif isinstance(_raw, dict):
194            for _v in _raw.values():
195                if isinstance(_v, list):
196                    _data = _v[:5000]
197                    break
198    except Exception as _e:
199        print("ERROR loading file: " + str(_e))
200        sys.exit(1)
201elif _ext in ('db', 'sqlite', 'sqlite3'):
202    try:
203        with _sql3.connect(_path) as _con:
204            _cur = _con.cursor()
205            _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
206            _tbl = _cur.fetchone()
207            if _tbl:
208                _cur.execute("SELECT * FROM [%s] LIMIT 5000" % _tbl[0])
209                _col_order = [_d[0] for _d in _cur.description]
210                _data = [dict(zip(_col_order, _r)) for _r in _cur.fetchall()]
211    except Exception as _e:
212        print("ERROR loading file: " + str(_e))
213        sys.exit(1)
214else:
215    print("ERROR: unsupported format '." + _ext + "'. Supported: csv, tsv, json, db/sqlite/sqlite3.")
216    sys.exit(1)
217
218if not _data:
219    print("No data found in: " + _path)
220    sys.exit(0)
221
222columns   = _col_order if _col_order else list(_data[0].keys())
223row_count = len(_data)
224data      = _data
225
226def _try_num(v):
227    if v is None: return None
228    try: return float(str(v).replace(',', '').replace('$', '').replace('%', '').strip())
229    except (ValueError, TypeError): return None
230
231def _ncol(c):
232    return [f for r in data for f in (_try_num(r.get(c)),) if f is not None]
233
234def _quart(vals, q):
235    s = sorted(vals)
236    n = len(s)
237    if n == 0: return float('nan')
238    if n == 1: return s[0]
239    idx = q * (n - 1)
240    lo, hi = int(idx), min(int(idx) + 1, n - 1)
241    return s[lo] + (idx - lo) * (s[hi] - s[lo])
242
243num_cols = []
244txt_cols = []
245for c in columns:
246    _nonempty = [r.get(c) for r in data
247                 if r.get(c) is not None and str(r.get(c, '')).strip() != '']
248    if not _nonempty:
249        txt_cols.append(c)
250        continue
251    _s    = _nonempty[:min(200, len(_nonempty))]
252    _hits = sum(1 for v in _s if _try_num(v) is not None)
253    (num_cols if _hits >= len(_s) * 0.8 else txt_cols).append(c)
254
255_miss = [(c, sum(1 for r in data
256                 if r.get(c) is None or str(r.get(c, '')).strip() == ''))
257         for c in columns]
258_miss = [(c, n) for c, n in _miss if n > 0]
259
260_sample_note = " (5000-row sample)" if row_count == 5000 else ""
261_fname = os.path.basename(_path)
262_H2 = "##"
263_H3 = "###"
264_out = []
265_out.append(_H2 + " Dataset Profile: " + _fname)
266_out.append("")
267_out.append("**File:** " + _path)
268_out.append("**Shape:** " + str(row_count) + " rows" + _sample_note + " x " + str(len(columns)) + " columns")
269_out.append("**Numeric (%d):** %s" % (len(num_cols), ", ".join(num_cols) if num_cols else "none"))
270_out.append("**Text/Mixed (%d):** %s" % (len(txt_cols), ", ".join(txt_cols) if txt_cols else "none"))
271_out.append("")
272
273if _miss:
274    _total_miss = sum(n for _, n in _miss)
275    _out.append("**Missing values:** " + str(_total_miss) + " cell(s) across " + str(len(_miss)) + " column(s)")
276    for c, n in _miss:
277        _pct = round(n * 100.0 / row_count, 1)
278        _out.append("  - " + c + ": " + str(n) + " missing (" + str(_pct) + "%)")
279    _out.append("")
280
281if num_cols:
282    _out.append(_H3 + " Numeric Column Statistics")
283    _out.append("")
284    _hdr = "%-22s  %6s  %10s  %10s  %10s  %10s  %10s  %10s  %10s  %8s" % (
285        "Column", "N", "Min", "P25", "Median", "P75", "Max", "Mean", "Std Dev", "Outliers")
286    _out.append(_hdr)
287    _out.append("-" * len(_hdr))
288    for c in num_cols:
289        _vals = _ncol(c)
290        if not _vals:
291            _out.append("%-22s  (no numeric values)" % c[:22])
292            continue
293        _mn, _mx = min(_vals), max(_vals)
294        _mean = sum(_vals) / len(_vals)
295        _med  = statistics.median(_vals)
296        _std  = statistics.stdev(_vals) if len(_vals) >= 2 else 0.0
297        _q1   = _quart(_vals, 0.25)
298        _q3   = _quart(_vals, 0.75)
299        _iqr  = _q3 - _q1
300        _otl  = sum(1 for v in _vals if v < _q1 - 1.5 * _iqr or v > _q3 + 1.5 * _iqr)
301        _out.append("%-22s  %6d  %10.4g  %10.4g  %10.4g  %10.4g  %10.4g  %10.4g  %10.4g  %8d" % (
302            c[:22], len(_vals), _mn, _q1, _med, _q3, _mx, _mean, _std, _otl))
303    _out.append("")
304
305if txt_cols:
306    _out.append(_H3 + " Text Column Statistics")
307    _out.append("")
308    for c in txt_cols:
309        _vals = [str(r.get(c, '') or '').strip() for r in data
310                 if r.get(c) is not None and str(r.get(c, '')).strip() != '']
311        if not _vals:
312            _out.append("**" + c + "**: (all missing)")
313            _out.append("")
314            continue
315        _uniq = len(set(_vals))
316        _card = round(_uniq * 100.0 / len(_vals), 1)
317        _out.append("**" + c + "**: " + str(len(_vals)) + " non-null, " +
318                    str(_uniq) + " unique (" + str(_card) + "% cardinality)")
319        for _v, _n in Counter(_vals).most_common(5):
320            _short = (_v[:42] + "...") if len(_v) > 42 else _v
321            _vpct  = round(_n * 100.0 / len(_vals), 1)
322            _out.append("  - `" + _short + "`: " + str(_n) + " (" + str(_vpct) + "%)")
323        _out.append("")
324
325if HAS_NUMPY and len(num_cols) >= 2:
326    try:
327        import pandas as pd
328        _df = pd.DataFrame(data)[num_cols]
329        for _c in _df.columns:
330            _df[_c] = pd.to_numeric(_df[_c], errors='coerce')
331        _corr = _df.corr()
332        _out.append(_H3 + " Correlation Matrix")
333        _out.append("")
334        _heads = [c[:10] for c in num_cols]
335        _out.append("            " + "".join("  %10s" % h for h in _heads))
336        for _i, c in enumerate(num_cols):
337            _rs = "%12s" % _heads[_i]
338            for _j in range(len(num_cols)):
339                _rs += "  %10.3f" % _corr.iloc[_i, _j]
340            _out.append(_rs)
341        _out.append("")
342    except Exception:
343        pass
344
345_out.append(_H3 + " Sample Rows (first 5)")
346_out.append("")
347_out.append(" | ".join(columns))
348_out.append(" | ".join("---" for _ in columns))
349for _row in data[:5]:
350    _out.append(" | ".join(str(_row.get(c, '') or '')[:20] for c in columns))
351
352print("\n".join(_out))
353"####,
354        safe_path = safe_path,
355    );
356
357    let sandbox_args = serde_json::json!({
358        "language": "python",
359        "code": script,
360        "timeout_seconds": 30
361    });
362    crate::tools::code_sandbox::execute(&sandbox_args).await
363}
364
365async fn execute_in_sandbox(script: &str) -> Result<String, String> {
366    let sandbox_args = serde_json::json!({
367        "language": "python",
368        "code": script
369    });
370
371    crate::tools::code_sandbox::execute(&sandbox_args).await
372}
373
374async fn manage_ledger(args: &Value) -> Result<String, String> {
375    let action = args["action"]
376        .as_str()
377        .ok_or("Missing 'action' (read, append)")?;
378    let ledger_path = std::path::Path::new(".hematite/docs/scientific_ledger.md");
379
380    if let Some(parent) = ledger_path.parent() {
381        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
382    }
383
384    match action {
385        "read" => {
386            if !ledger_path.exists() {
387                return Ok("Scientific Ledger is currently empty.".to_string());
388            }
389            std::fs::read_to_string(ledger_path).map_err(|e| e.to_string())
390        }
391        "append" => {
392            let content = args["content"]
393                .as_str()
394                .ok_or("Missing 'content' to append")?;
395            let timestamp = chrono::Local::now().format("%Y-%m-%d %H:%M:%S").to_string();
396            let entry = format!(
397                "\n### [{}] Scientific Derivation\n{}\n---\n",
398                timestamp, content
399            );
400
401            use std::io::Write;
402            let mut file = std::fs::OpenOptions::new()
403                .create(true)
404                .append(true)
405                .open(ledger_path)
406                .map_err(|e| e.to_string())?;
407
408            file.write_all(entry.as_bytes())
409                .map_err(|e| e.to_string())?;
410            Ok("Derivation successfully persisted to Scientific Ledger (RAG-indexed).".to_string())
411        }
412        _ => Err(format!("Unknown ledger action: {}", action)),
413    }
414}
415
416async fn calculate_on_dataset(args: &Value) -> Result<String, String> {
417    let path_str = args["path"].as_str().ok_or("Missing 'path' to dataset")?;
418    let sql = args["sql"]
419        .as_str()
420        .unwrap_or("SELECT * FROM data LIMIT 10000");
421    let python_op = args["python_op"]
422        .as_str()
423        .unwrap_or("print(f'{row_count} rows loaded. Columns: {columns}')");
424
425    let path = std::path::PathBuf::from(path_str);
426    let data = crate::tools::data_query::query_to_json_helper(&path, sql).await?;
427    let data_json = serde_json::to_string(&data).map_err(|e| e.to_string())?;
428
429    // Column-aware data analysis environment:
430    // - col(name)        → all values for a named column (including None)
431    // - ncol(name)       → numeric-only values for a column (skips blanks/non-numeric)
432    // - top(n, by)       → top N rows sorted descending by column name
433    // - group_sum(g, v)  → {group_key: sum_of_value_col}
434    // - group_count(g)   → {group_key: count}
435    // - df / HAS_PANDAS  → pandas DataFrame if pandas is installed
436    let python_script = format!(
437        r#"import json, math, statistics, datetime, decimal, re
438from collections import Counter, defaultdict
439
440data = {data_json}
441columns = list(data[0].keys()) if data else []
442row_count = len(data)
443
444def col(name):
445    """All values for a named column."""
446    return [row.get(name) for row in data]
447
448def ncol(name):
449    """Numeric-only values for a named column (skips None/blank/non-numeric)."""
450    out = []
451    for row in data:
452        v = row.get(name)
453        if v is not None and v != '':
454            try:
455                out.append(float(v))
456            except (ValueError, TypeError):
457                pass
458    return out
459
460def top(n=10, by=None):
461    """Top N rows sorted descending by column name."""
462    key = by or (columns[0] if columns else None)
463    def _key(r):
464        try: return float(r.get(key, 0) or 0)
465        except: return 0.0
466    return sorted(data, key=_key, reverse=True)[:n]
467
468def group_sum(group_col, value_col):
469    """Sum value_col grouped by group_col. Returns dict sorted by value desc."""
470    acc = defaultdict(float)
471    for row in data:
472        k = row.get(group_col, 'unknown') or 'unknown'
473        try: acc[k] += float(row.get(value_col, 0) or 0)
474        except (ValueError, TypeError): pass
475    return dict(sorted(acc.items(), key=lambda x: x[1], reverse=True))
476
477def group_count(group_col):
478    """Count rows per unique value in group_col."""
479    return dict(Counter(str(row.get(group_col, '')) for row in data).most_common())
480
481def group_mean(group_col, value_col):
482    """Mean of value_col grouped by group_col."""
483    acc = defaultdict(list)
484    for row in data:
485        k = row.get(group_col, 'unknown') or 'unknown'
486        try: acc[k].append(float(row.get(value_col, 0) or 0))
487        except (ValueError, TypeError): pass
488    return {{k: statistics.mean(v) for k, v in acc.items() if v}}
489
490def missing(name):
491    """Count of missing/None/blank values in a column."""
492    return sum(1 for row in data if row.get(name) is None or row.get(name) == '')
493
494try:
495    import pandas as pd
496    import numpy as np
497    df = pd.DataFrame(data)
498    for c in df.columns:
499        try: df[c] = pd.to_numeric(df[c])
500        except (ValueError, TypeError): pass
501    HAS_PANDAS = True
502except ImportError:
503    HAS_PANDAS = False
504
505print(f"Loaded: {{row_count}} rows x {{len(columns)}} columns")
506print(f"Columns: {{columns}}")
507print(f"Pandas: {{HAS_PANDAS}}")
508print()
509
510{python_op}
511"#,
512        data_json = data_json,
513        python_op = python_op
514    );
515
516    execute_in_sandbox(&python_script).await
517}
518
519async fn run_regression(args: &Value) -> Result<String, String> {
520    let path_str = args["path"]
521        .as_str()
522        .ok_or("Missing 'path' for regression mode")?;
523    let y_col = args["y"]
524        .as_str()
525        .ok_or("Missing 'y' (target column) for regression mode")?;
526
527    let x_cols: Vec<String> = match &args["x"] {
528        Value::String(s) => vec![s.clone()],
529        Value::Array(arr) => arr
530            .iter()
531            .filter_map(|v| v.as_str().map(|s| s.to_string()))
532            .collect(),
533        _ => return Err("Missing 'x' (predictor column(s)) for regression mode".into()),
534    };
535    if x_cols.is_empty() {
536        return Err("'x' must specify at least one predictor column".into());
537    }
538
539    let reg_type = args["type"].as_str().unwrap_or("linear");
540    let degree = args["degree"].as_u64().unwrap_or(2).min(10) as usize;
541
542    let safe_path = path_str.replace('\\', "\\\\").replace('"', "\\\"");
543    let safe_y = y_col.replace('"', "\\\"");
544    let x_json = serde_json::to_string(&x_cols).unwrap_or_else(|_| "[]".to_string());
545
546    let script = format!(
547        r####"import os, sys, csv as _csv, sqlite3 as _sql3, math
548
549_path   = "{safe_path}"
550_xcols  = {x_json}
551_ycol   = "{safe_y}"
552_rtype  = "{reg_type}"
553_degree = {degree}
554_ext    = os.path.splitext(_path)[1].lower().lstrip('.')
555_data   = []
556
557if _ext in ('csv', 'tsv'):
558    _delim = '\t' if _ext == 'tsv' else ','
559    try:
560        with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
561            _rdr = _csv.DictReader(_fh, delimiter=_delim)
562            for _i, _row in enumerate(_rdr):
563                if _i >= 5000: break
564                _data.append(dict(_row))
565    except Exception as _e:
566        print("ERROR loading file: " + str(_e))
567        sys.exit(1)
568elif _ext == 'json':
569    try:
570        with open(_path, encoding='utf-8') as _fh:
571            _raw = json.load(_fh)
572        if isinstance(_raw, list):
573            _data = _raw[:5000]
574        elif isinstance(_raw, dict):
575            for _v in _raw.values():
576                if isinstance(_v, list):
577                    _data = _v[:5000]
578                    break
579    except Exception as _e:
580        print("ERROR loading file: " + str(_e))
581        sys.exit(1)
582elif _ext in ('db', 'sqlite', 'sqlite3'):
583    try:
584        with _sql3.connect(_path) as _con:
585            _cur = _con.cursor()
586            _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
587            _tbl = _cur.fetchone()
588            if _tbl:
589                _cur.execute("SELECT * FROM [%s] LIMIT 5000" % _tbl[0])
590                _col_order = [_d[0] for _d in _cur.description]
591                _data = [dict(zip(_col_order, _r)) for _r in _cur.fetchall()]
592    except Exception as _e:
593        print("ERROR loading file: " + str(_e))
594        sys.exit(1)
595else:
596    print("ERROR: unsupported format '." + _ext + "'. Supported: csv, tsv, json, db/sqlite/sqlite3.")
597    sys.exit(1)
598
599if not _data:
600    print("No data found in: " + _path)
601    sys.exit(0)
602
603def _tryf(v):
604    if v is None: return None
605    try: return float(str(v).replace(',', '').replace('$', '').replace('%', '').strip())
606    except: return None
607
608_yx = []
609for _row in _data:
610    _yv  = _tryf(_row.get(_ycol))
611    if _yv is None: continue
612    _xvs = [_tryf(_row.get(_xc)) for _xc in _xcols]
613    if any(v is None for v in _xvs): continue
614    _yx.append((_yv, _xvs))
615
616_n = len(_yx)
617if _n < 3:
618    print("ERROR: insufficient numeric data (need >=3 valid rows, got %d)" % _n)
619    sys.exit(1)
620
621_ys   = [p[0] for p in _yx]
622_xmat = [p[1] for p in _yx]
623_ym   = sum(_ys) / _n
624
625_out = []
626_out.append("## Regression Results")
627_out.append("")
628_out.append("**File:** " + os.path.basename(_path))
629_out.append("**Y (target):** " + _ycol)
630_out.append("**X (predictors):** " + ", ".join(_xcols))
631_out.append("**N (valid rows):** %d" % _n)
632_out.append("")
633
634if len(_xcols) == 1 and _rtype == "linear":
635    _xv   = [r[0] for r in _xmat]
636    _xm   = sum(_xv) / _n
637    _ssxy = sum((_x - _xm) * (_y - _ym) for _x, _y in zip(_xv, _ys))
638    _ssx  = sum((_x - _xm)**2 for _x in _xv)
639    _ssy  = sum((_y - _ym)**2 for _y in _ys)
640    if _ssx == 0:
641        print("ERROR: predictor has zero variance.")
642        sys.exit(1)
643    _slope = _ssxy / _ssx
644    _inter = _ym - _slope * _xm
645    _preds = [_slope * _x + _inter for _x in _xv]
646    _res   = [_y - _p for _y, _p in zip(_ys, _preds)]
647    _sse   = sum(r**2 for r in _res)
648    _r2    = 1.0 - _sse / _ssy if _ssy > 0 else 0.0
649    _rmse  = math.sqrt(_sse / _n)
650    _pr    = _ssxy / math.sqrt(_ssx * _ssy) if _ssx > 0 and _ssy > 0 else 0.0
651    _rm    = sum(_res) / _n
652    _rstd  = math.sqrt(sum((r - _rm)**2 for r in _res) / _n)
653    _out.append("**Type:** Simple Linear Regression (pure-Python OLS)")
654    _out.append("**Equation:**  y = %+.6g x %+.6g" % (_slope, _inter))
655    _out.append("**R-squared:** %.4f" % _r2)
656    _out.append("**RMSE:** %.4g" % _rmse)
657    _out.append("**Pearson r:** %.4f" % _pr)
658    _out.append("**Residuals:**  min=%.4g  max=%.4g  mean=%.4g  std=%.4g" % (
659        min(_res), max(_res), _rm, _rstd))
660elif HAS_NUMPY:
661    import numpy as _np
662    if _rtype == "polynomial" and len(_xcols) == 1:
663        _xv     = _np.array([r[0] for r in _xmat])
664        _ya     = _np.array(_ys)
665        _coeffs = _np.polyfit(_xv, _ya, _degree)
666        _preds  = _np.polyval(_coeffs, _xv)
667        _res    = _ya - _preds
668        _sse    = float(_np.sum(_res**2))
669        _sst    = float(_np.sum((_ya - _ym)**2))
670        _r2     = 1.0 - _sse / _sst if _sst > 0 else 0.0
671        _rmse   = float(_np.sqrt(_np.mean(_res**2)))
672        _out.append("**Type:** Polynomial Regression  degree=%d  (numpy polyfit)" % _degree)
673        _out.append("**Coefficients (highest power first):** " + ", ".join("%.6g" % c for c in _coeffs))
674        _out.append("**R-squared:** %.4f" % _r2)
675        _out.append("**RMSE:** %.4g" % _rmse)
676        _out.append("**Residuals:**  min=%.4g  max=%.4g  mean=%.4g  std=%.4g" % (
677            float(_np.min(_res)), float(_np.max(_res)),
678            float(_np.mean(_res)), float(_np.std(_res))))
679    else:
680        _Xm     = _np.column_stack([_np.ones(_n)] + [[r[i] for r in _xmat] for i in range(len(_xcols))])
681        _ya     = _np.array(_ys)
682        _coeffs, _, _, _ = _np.linalg.lstsq(_Xm, _ya, rcond=None)
683        _preds  = _Xm @ _coeffs
684        _res    = _ya - _preds
685        _sse    = float(_np.sum(_res**2))
686        _sst    = float(_np.sum((_ya - _ym)**2))
687        _r2     = 1.0 - _sse / _sst if _sst > 0 else 0.0
688        _rmse   = float(_np.sqrt(_np.mean(_res**2)))
689        _rm     = float(_np.mean(_res))
690        _rstd   = float(_np.std(_res))
691        _out.append("**Type:** Multiple Linear Regression (numpy lstsq OLS)")
692        _out.append("**Intercept:** %.6g" % _coeffs[0])
693        for _i, _xc in enumerate(_xcols):
694            _out.append("**%s coeff:** %.6g" % (_xc, _coeffs[_i + 1]))
695        _out.append("**R-squared:** %.4f" % _r2)
696        _out.append("**RMSE:** %.4g" % _rmse)
697        _out.append("**Residuals:**  min=%.4g  max=%.4g  mean=%.4g  std=%.4g" % (
698            float(_np.min(_res)), float(_np.max(_res)), _rm, _rstd))
699else:
700    _out.append("**Type:** Multiple/Polynomial Regression requires numpy.")
701    _out.append("Use a single predictor with type=linear for pure-Python OLS, or install numpy.")
702
703print("\n".join(_out))
704"####,
705        safe_path = safe_path,
706        x_json = x_json,
707        safe_y = safe_y,
708        reg_type = reg_type,
709        degree = degree,
710    );
711
712    let sandbox_args = serde_json::json!({
713        "language": "python",
714        "code": script,
715        "timeout_seconds": 30
716    });
717    crate::tools::code_sandbox::execute(&sandbox_args).await
718}
719
720/// Zero-model expression evaluator for `hematite --compute`.
721/// Evaluates arithmetic, trig, statistics, and common physical constants
722/// entirely inside the Python sandbox — no network, no model required.
723pub async fn compute_expr(expr: &str) -> Result<String, String> {
724    if expr.trim().is_empty() {
725        return Err("No expression provided.".into());
726    }
727    let safe_expr = expr.replace('\\', "\\\\").replace('"', "\\\"");
728
729    let script = format!(
730        r####"from math import *
731import statistics as _stat, re as _re, sys
732
733# ── Physical & mathematical constants ────────────────────────────────
734c_light  = 299_792_458.0          # m/s  — speed of light (exact)
735h_planck = 6.62607015e-34         # J·s  — Planck constant (exact)
736hbar     = h_planck / (2 * pi)    # J·s  — reduced Planck constant
737G_grav   = 6.67430e-11            # m³/(kg·s²) — gravitational constant
738k_B      = 1.380649e-23           # J/K  — Boltzmann constant (exact)
739N_A      = 6.02214076e23          # /mol — Avogadro's number (exact)
740R_gas    = 8.314462618            # J/(mol·K) — molar gas constant
741g_std    = 9.80665                # m/s² — standard gravity (exact)
742e_q      = 1.602176634e-19        # C    — elementary charge (exact)
743m_e      = 9.1093837015e-31       # kg   — electron mass
744m_p      = 1.67262192369e-27      # kg   — proton mass
745sigma_SB = 5.670374419e-8         # W/(m²·K⁴) — Stefan-Boltzmann
746eps_0    = 8.8541878128e-12       # F/m  — vacuum permittivity
747mu_0     = 1.25663706212e-6       # H/m  — vacuum permeability
748alpha_fs = 7.2973525693e-3        # — fine-structure constant
749atm      = 101_325.0              # Pa   — standard atmosphere
750
751# ── Statistics helpers ────────────────────────────────────────────────
752mean     = _stat.mean
753median   = _stat.median
754stdev    = _stat.stdev
755variance = _stat.variance
756try:    mode = _stat.mode
757except Exception: pass
758
759# ── Financial functions ───────────────────────────────────────────────
760def pmt(rate, nper, pv, fv=0, when=0):
761    """Periodic loan payment. pmt(0.05/12, 360, 300000)"""
762    if rate == 0: return -(pv + fv) / nper
763    pvif = (1 + rate) ** nper
764    r = rate / (pvif - 1) * -(pv * pvif + fv)
765    return r / (1 + rate) if when == 1 else r
766
767def fv(rate, nper, pmt_v, pv=0, when=0):
768    """Future value. fv(0.06/12, 120, -500)"""
769    if rate == 0: return -pv - pmt_v * nper
770    pvif = (1 + rate) ** nper
771    return -(pv * pvif + pmt_v * (1 + rate * when) * (pvif - 1) / rate)
772
773def pv(rate, nper, pmt_v, fv=0, when=0):
774    """Present value. pv(0.05/12, 360, -1500)"""
775    if rate == 0: return -fv - pmt_v * nper
776    pvif = (1 + rate) ** nper
777    return -(fv + pmt_v * (1 + rate * when) * (pvif - 1) / rate) / pvif
778
779def npv(rate, cashflows):
780    """Net present value. npv(0.1, [-1000, 200, 300, 400, 500])"""
781    return sum(cf / (1 + rate) ** t for t, cf in enumerate(cashflows))
782
783def irr(cashflows, guess=0.1):
784    """Internal rate of return (Newton-Raphson). irr([-1000, 300, 400, 500])"""
785    r = guess
786    for _ in range(200):
787        f  = sum(cf / (1 + r) ** t for t, cf in enumerate(cashflows))
788        df = sum(-t * cf / (1 + r) ** (t + 1) for t, cf in enumerate(cashflows))
789        if df == 0: break
790        r2 = r - f / df
791        if abs(r2 - r) < 1e-10: return r2
792        r = r2
793    return r
794
795def compound(principal, rate, n=1, t=1):
796    """Compound interest. compound(1000, 0.05, 12, 10)"""
797    return principal * (1 + rate / n) ** (n * t)
798
799def cagr(start, end, years):
800    """Compound annual growth rate. cagr(1000, 2000, 5) -> 0.1487"""
801    return (end / start) ** (1.0 / years) - 1
802
803def roi(gain, cost):
804    """Return on investment %. roi(1500, 1000) -> 50.0"""
805    return (gain - cost) / cost * 100.0
806
807def breakeven(fixed, price, var_cost):
808    """Break-even units. breakeven(10000, 25, 15) -> 1000"""
809    return fixed / (price - var_cost)
810
811def _fmt(v):
812    if isinstance(v, bool):    return str(v)
813    if isinstance(v, int):     return str(v)
814    if isinstance(v, float):
815        if isnan(v):           return "nan"
816        if isinf(v):           return "inf" if v > 0 else "-inf"
817        if v == int(v) and abs(v) < 1e15:
818            return str(int(v))
819        return "%.10g" % v
820    if isinstance(v, complex): return str(v)
821    if isinstance(v, (list, tuple)):
822        return "[" + ", ".join(_fmt(x) for x in v) + "]"
823    return str(v)
824
825_raw   = "{safe_expr}"
826_clean = _raw.strip()
827if _clean.endswith('='): _clean = _clean[:-1].strip()
828_clean = _clean.replace('^', '**').replace('×', '*').replace('÷', '/')
829
830# "X% of Y" — e.g. "15% of 89.99"
831_pm = _re.match(r'^([\d.]+)\s*(?:%%|percent)\s+of\s+([\d,. ]+)$', _clean, _re.I)
832if _pm:
833    print(_fmt(float(_pm.group(1)) / 100.0 *
834               float(_pm.group(2).replace(',','').replace(' ',''))))
835    sys.exit(0)
836
837try:
838    _r = eval(_clean)
839    print(_fmt(_r))
840except SyntaxError as _se:
841    print("Syntax error: " + str(_se))
842    sys.exit(1)
843except Exception as _e:
844    print("Error: " + str(_e))
845    sys.exit(1)
846"####,
847        safe_expr = safe_expr,
848    );
849
850    let sandbox_args = serde_json::json!({
851        "language": "python",
852        "code": script,
853        "timeout_seconds": 15
854    });
855    crate::tools::code_sandbox::execute(&sandbox_args).await
856}
857
858async fn run_hypothesis(args: &Value) -> Result<String, String> {
859    let test_type = args["test"].as_str().unwrap_or("ttest_ind");
860    let alpha = args["alpha"].as_f64().unwrap_or(0.05);
861    let mu = args["mu"].as_f64().unwrap_or(0.0);
862
863    let a_json = match &args["a"] {
864        Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
865        _ => "None".to_string(),
866    };
867    let b_json = match &args["b"] {
868        Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
869        _ => "None".to_string(),
870    };
871    let safe_path = args["path"]
872        .as_str()
873        .unwrap_or("")
874        .replace('\\', "\\\\")
875        .replace('"', "\\\"");
876    let col_a = args["column_a"]
877        .as_str()
878        .unwrap_or("a")
879        .replace('"', "\\\"");
880    let col_b = args["column_b"].as_str().unwrap_or("").replace('"', "\\\"");
881
882    let script = format!(
883        r####"import math, sys, os
884
885_test  = "{test_type}"
886_alpha = {alpha}
887_mu    = {mu}
888_a     = {a_json}
889_b     = {b_json}
890_path  = "{safe_path}"
891_col_a = "{col_a}"
892_col_b = "{col_b}"
893
894if _a is None and _path:
895    import csv as _csv, sqlite3 as _sql3
896    _ext  = os.path.splitext(_path)[1].lower().lstrip('.')
897    _rows = []
898    if _ext in ('csv', 'tsv'):
899        _delim = '\t' if _ext == 'tsv' else ','
900        with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
901            for _r in _csv.DictReader(_fh, delimiter=_delim):
902                _rows.append(_r)
903    elif _ext in ('db', 'sqlite', 'sqlite3'):
904        with _sql3.connect(_path) as _con:
905            _cur = _con.cursor()
906            _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
907            _t = _cur.fetchone()
908            if _t:
909                _cur.execute("SELECT * FROM [%s]" % _t[0])
910                _cs = [_d[0] for _d in _cur.description]
911                _rows = [dict(zip(_cs, _r)) for _r in _cur.fetchall()]
912    def _tryf(v):
913        try: return float(str(v or '').replace(',','').strip())
914        except: return None
915    _a = [_tryf(_r.get(_col_a)) for _r in _rows]
916    _a = [v for v in _a if v is not None]
917    if _col_b:
918        _b = [_tryf(_r.get(_col_b)) for _r in _rows]
919        _b = [v for v in _b if v is not None]
920
921if not _a:
922    print("ERROR: no numeric data found for group A")
923    sys.exit(1)
924
925_na = len(_a)
926_nb = len(_b) if _b else 0
927
928try:
929    from scipy import stats as _sc
930    _HAS_SCI = True
931except ImportError:
932    _HAS_SCI = False
933
934def _betainc(a, b, x):
935    if x <= 0: return 0.0
936    if x >= 1: return 1.0
937    if x > (a + 1.0) / (a + b + 2.0):
938        return 1.0 - _betainc(b, a, 1.0 - x)
939    TINY = 1e-30; EPS = 3e-7
940    lbeta = math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)
941    front = math.exp(a*math.log(x) + b*math.log(1.0-x) - lbeta) / a
942    f = 1.0; C = 1.0
943    D = 1.0 - (a+b)*x/(a+1.0)
944    if abs(D) < TINY: D = TINY
945    D = 1.0/D; f = D
946    for m in range(1, 201):
947        n1 = m*(b-m)*x/((a+2*m-1)*(a+2*m))
948        D = 1.0+n1*D; C = 1.0+n1/C
949        if abs(D) < TINY: D = TINY
950        if abs(C) < TINY: C = TINY
951        D = 1.0/D; f *= D*C
952        n2 = -(a+m)*(a+b+m)*x/((a+2*m)*(a+2*m+1))
953        D = 1.0+n2*D; C = 1.0+n2/C
954        if abs(D) < TINY: D = TINY
955        if abs(C) < TINY: C = TINY
956        D = 1.0/D; delta = D*C; f *= delta
957        if abs(delta-1.0) < EPS: break
958    return front * f
959
960def _t2p(t, df):
961    return _betainc(df/2.0, 0.5, df/(df + t*t))
962
963def _gammaincc(a, x):
964    if x <= 0: return 1.0
965    if x < a + 1:
966        _ap = a; _s = 1.0/a; _d = 1.0/a
967        for _ in range(200):
968            _ap += 1; _d *= x/_ap; _s += _d
969            if abs(_d) < abs(_s)*3e-7: break
970        return 1.0 - _s*math.exp(-x + a*math.log(x) - math.lgamma(a))
971    _b2 = x+1-a; _c = 1e30; _d = 1.0/_b2; _h = _d
972    for i in range(1, 201):
973        _an = -i*(i-a); _b2 += 2
974        _d = _an*_d + _b2
975        if abs(_d) < 1e-30: _d = 1e-30
976        _c = _b2 + _an/_c
977        if abs(_c) < 1e-30: _c = 1e-30
978        _d = 1.0/_d; _del = _d*_c; _h *= _del
979        if abs(_del-1.0) < 3e-7: break
980    return math.exp(-x + a*math.log(x) - math.lgamma(a)) * _h
981
982_stat_v = None; _p_val = None; _extra = []; _test_name = ""; _n_info = ""
983
984if _test == "ttest_1samp":
985    _test_name = "One-Sample t-Test"
986    _ma = sum(_a)/_na
987    _sd = math.sqrt(sum((x-_ma)**2 for x in _a)/(_na-1)) if _na>1 else 0.0
988    _se = _sd/math.sqrt(_na)
989    _stat_v = (_ma - _mu)/_se if _se > 0 else 0.0
990    _df = _na - 1
991    _n_info = "n=%d  H0: mean=%.6g" % (_na, _mu)
992    if _HAS_SCI:
993        _res = _sc.ttest_1samp(_a, _mu)
994        _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
995    else:
996        _p_val = _t2p(abs(_stat_v), _df)
997    _extra = ["Sample mean: %.6g" % _ma, "Sample std dev: %.6g" % _sd, "df: %d" % _df]
998
999elif _test == "ttest_ind":
1000    _test_name = "Independent-Samples t-Test (Welch)"
1001    if not _b:
1002        print("ERROR: ttest_ind requires two groups — provide 'a' and 'b'"); sys.exit(1)
1003    _ma = sum(_a)/_na; _mb = sum(_b)/_nb
1004    _va = sum((x-_ma)**2 for x in _a)/(_na-1) if _na>1 else 0.0
1005    _vb = sum((x-_mb)**2 for x in _b)/(_nb-1) if _nb>1 else 0.0
1006    _se = math.sqrt(_va/_na + _vb/_nb)
1007    _stat_v = (_ma - _mb)/_se if _se > 0 else 0.0
1008    _df_n = (_va/_na + _vb/_nb)**2
1009    _df_d = (_va/_na)**2/(_na-1) + (_vb/_nb)**2/(_nb-1) if _na>1 and _nb>1 else 1
1010    _df = _df_n/_df_d if _df_d > 0 else 1.0
1011    _n_info = "n_a=%d  n_b=%d" % (_na, _nb)
1012    if _HAS_SCI:
1013        _res = _sc.ttest_ind(_a, _b, equal_var=False)
1014        _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1015    else:
1016        _p_val = _t2p(abs(_stat_v), _df)
1017    _extra = ["Mean A: %.6g" % _ma, "Mean B: %.6g" % _mb,
1018              "Std Dev A: %.6g" % math.sqrt(_va),
1019              "Std Dev B: %.6g" % math.sqrt(_vb),
1020              "df (Welch): %.1f" % _df]
1021
1022elif _test == "ttest_rel":
1023    _test_name = "Paired t-Test"
1024    if not _b:
1025        print("ERROR: ttest_rel requires two paired groups — provide 'a' and 'b'"); sys.exit(1)
1026    _np2 = min(_na, _nb)
1027    _diffs = [_a[i]-_b[i] for i in range(_np2)]
1028    _md = sum(_diffs)/_np2
1029    _sd = math.sqrt(sum((d-_md)**2 for d in _diffs)/(_np2-1)) if _np2>1 else 0.0
1030    _se = _sd/math.sqrt(_np2) if _np2>0 else 0.0
1031    _stat_v = _md/_se if _se > 0 else 0.0
1032    _df = _np2 - 1
1033    _n_info = "n_pairs=%d" % _np2
1034    if _HAS_SCI:
1035        _res = _sc.ttest_rel(_a[:_np2], _b[:_np2])
1036        _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1037    else:
1038        _p_val = _t2p(abs(_stat_v), _df)
1039    _extra = ["Mean difference: %.6g" % _md,
1040              "Std dev of diffs: %.6g" % _sd, "df: %d" % _df]
1041
1042elif _test == "mannwhitney":
1043    _test_name = "Mann-Whitney U Test (non-parametric)"
1044    if not _b:
1045        print("ERROR: mannwhitney requires two groups — provide 'a' and 'b'"); sys.exit(1)
1046    _n_info = "n_a=%d  n_b=%d" % (_na, _nb)
1047    if _HAS_SCI:
1048        _res = _sc.mannwhitneyu(_a, _b, alternative='two-sided')
1049        _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1050    else:
1051        _U = sum(1 if x>y else 0.5 if x==y else 0 for x in _a for y in _b)
1052        _stat_v = _U
1053        _mu_U = _na*_nb/2.0
1054        _sg_U = math.sqrt(_na*_nb*(_na+_nb+1)/12.0)
1055        _z = (_U - _mu_U)/_sg_U if _sg_U > 0 else 0.0
1056        _p_val = math.erfc(abs(_z)/math.sqrt(2))
1057        _extra.append("(Normal approximation — install scipy for exact result)")
1058
1059elif _test == "chi2":
1060    _test_name = "Chi-Squared Goodness-of-Fit"
1061    _n_info = "k=%d bins" % _na
1062    _expected = list(_b) if _b else [sum(_a)/_na]*_na
1063    if len(_expected) != _na:
1064        print("ERROR: 'a' (observed) and 'b' (expected) must have equal length"); sys.exit(1)
1065    if _HAS_SCI:
1066        _res = _sc.chisquare(_a, f_exp=_expected)
1067        _stat_v, _p_val = float(_res.statistic), float(_res.pvalue)
1068    else:
1069        _stat_v = sum((o-e)**2/e for o, e in zip(_a, _expected) if e > 0)
1070        _df2 = _na - 1
1071        _p_val = _gammaincc(_df2/2.0, _stat_v/2.0)
1072        _extra.append("df=%d" % _df2)
1073else:
1074    print("ERROR: unknown test '%s'. Supported: ttest_1samp, ttest_ind, ttest_rel, mannwhitney, chi2" % _test)
1075    sys.exit(1)
1076
1077_H2 = "##"
1078_out = []
1079_out.append(_H2 + " Hypothesis Test Results")
1080_out.append("")
1081_out.append("**Test:** " + _test_name)
1082_out.append("**Alpha:** %.3g" % _alpha)
1083_out.append("**Samples:** " + _n_info)
1084for _ex in _extra:
1085    _out.append("  - " + _ex)
1086_out.append("")
1087if _stat_v is not None:
1088    _out.append("**Test Statistic:** %.6g" % _stat_v)
1089if _p_val is not None:
1090    _out.append("**p-value:** %.6g" % _p_val)
1091    _out.append("")
1092    if _p_val < _alpha:
1093        _out.append("**Result: REJECT H0**  (p=%.5f < alpha=%.3g)" % (_p_val, _alpha))
1094        _out.append("Statistically significant — unlikely under the null hypothesis.")
1095    else:
1096        _out.append("**Result: FAIL TO REJECT H0**  (p=%.5f >= alpha=%.3g)" % (_p_val, _alpha))
1097        _out.append("Insufficient evidence to reject the null hypothesis.")
1098_out.append("")
1099_out.append("*Engine: %s*" % ("scipy.stats" if _HAS_SCI else "pure-Python (Lentz CF)"))
1100print("\n".join(_out))
1101"####,
1102        test_type = test_type,
1103        alpha = alpha,
1104        mu = mu,
1105        a_json = a_json,
1106        b_json = b_json,
1107        safe_path = safe_path,
1108        col_a = col_a,
1109        col_b = col_b,
1110    );
1111
1112    let sandbox_args = serde_json::json!({
1113        "language": "python",
1114        "code": script,
1115        "timeout_seconds": 30
1116    });
1117    crate::tools::code_sandbox::execute(&sandbox_args).await
1118}
1119
1120// ─── Matrix operations ────────────────────────────────────────────────────────
1121
1122async fn run_matrix(args: &Value) -> Result<String, String> {
1123    let operation = args["operation"].as_str().unwrap_or("det");
1124
1125    let a_json = match &args["a"] {
1126        Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
1127        _ => return Err("Missing 'a' (matrix as nested array) for matrix mode".into()),
1128    };
1129    let b_json = match &args["b"] {
1130        Value::Array(arr) => serde_json::to_string(arr).unwrap_or_else(|_| "None".to_string()),
1131        _ => "None".to_string(),
1132    };
1133
1134    let script = format!(
1135        r####"import sys, math
1136
1137_op = "{operation}"
1138_a  = {a_json}
1139_b  = {b_json}
1140
1141try:
1142    import numpy as _np
1143    _HAS_NP = True
1144except ImportError:
1145    _HAS_NP = False
1146
1147_A = _np.array(_a, dtype=float) if _HAS_NP else _a
1148_B = _np.array(_b, dtype=float) if (_HAS_NP and _b is not None) else _b
1149
1150_H2 = "##"
1151_out = []
1152
1153def _fmt_row(row):
1154    return "  " + "  ".join("%12.6g" % float(x) for x in row)
1155
1156def _pp(M):
1157    if _HAS_NP:
1158        if M.ndim == 1:
1159            _out.append("  [" + ", ".join("%.6g" % x for x in M) + "]")
1160        else:
1161            for row in M: _out.append(_fmt_row(row))
1162    else:
1163        if isinstance(M[0], list):
1164            for row in M: _out.append(_fmt_row(row))
1165        else:
1166            _out.append("  [" + ", ".join("%.6g" % x for x in M) + "]")
1167
1168def _det_py(m):
1169    n = len(m); m = [list(r) for r in m]; sign = 1
1170    for i in range(n):
1171        p = max(range(i, n), key=lambda r: abs(m[r][i]))
1172        if abs(m[p][i]) < 1e-12: return 0.0
1173        if p != i: m[i], m[p] = m[p], m[i]; sign *= -1
1174        for j in range(i+1, n):
1175            f = m[j][i] / m[i][i]
1176            for k in range(i, n): m[j][k] -= f * m[i][k]
1177    d = sign
1178    for i in range(n): d *= m[i][i]
1179    return d
1180
1181def _matmul_py(A, B):
1182    n, m = len(A), len(A[0])
1183    if isinstance(B[0], list):
1184        p = len(B[0])
1185        return [[sum(A[i][k]*B[k][j] for k in range(m)) for j in range(p)] for i in range(n)]
1186    return [sum(A[i][k]*B[k] for k in range(m)) for i in range(n)]
1187
1188if _op == "det":
1189    _out.append(_H2 + " Determinant")
1190    _out.append("")
1191    _d = float(_np.linalg.det(_A)) if _HAS_NP else _det_py(_a)
1192    _out.append("det(A) = %.10g" % _d)
1193    if _HAS_NP:
1194        _out.append("Shape: %dx%d" % (_A.shape[0], _A.shape[1]))
1195
1196elif _op == "invert":
1197    if not _HAS_NP:
1198        print("ERROR: invert requires numpy (pip install numpy)"); sys.exit(1)
1199    _out.append(_H2 + " Matrix Inverse")
1200    _out.append("")
1201    try:
1202        _R = _np.linalg.inv(_A)
1203        _pp(_R)
1204        _out.append("")
1205        _out.append("Condition number: %.4g" % _np.linalg.cond(_A))
1206    except _np.linalg.LinAlgError as _e:
1207        print("ERROR: " + str(_e)); sys.exit(1)
1208
1209elif _op == "eigenvalues":
1210    if not _HAS_NP:
1211        print("ERROR: eigenvalues requires numpy (pip install numpy)"); sys.exit(1)
1212    _out.append(_H2 + " Eigenvalues & Eigenvectors")
1213    _out.append("")
1214    _evals, _evecs = _np.linalg.eig(_A)
1215    for i, (ev, vec) in enumerate(zip(_evals, _evecs.T)):
1216        if abs(ev.imag) < 1e-10:
1217            _out.append("lambda_%d = %.8g" % (i+1, ev.real))
1218        else:
1219            _out.append("lambda_%d = %.6g + %.6gi" % (i+1, ev.real, ev.imag))
1220        _out.append("  eigenvector: [" + ", ".join("%.4f" % x.real for x in vec) + "]")
1221
1222elif _op == "solve":
1223    if _b is None:
1224        print("ERROR: solve requires 'b' (right-hand side vector or matrix)"); sys.exit(1)
1225    if not _HAS_NP:
1226        print("ERROR: solve requires numpy (pip install numpy)"); sys.exit(1)
1227    _out.append(_H2 + " Solution to Ax = b")
1228    _out.append("")
1229    try:
1230        _x = _np.linalg.solve(_A, _B.flatten() if _B.ndim > 1 else _B)
1231        _out.append("x = [" + ", ".join("%.8g" % v for v in _x) + "]")
1232        _out.append("")
1233        _out.append("Residual ||Ax-b||: %.2e" % float(_np.linalg.norm(_A @ _x - _B.flatten())))
1234    except _np.linalg.LinAlgError as _e:
1235        print("ERROR: " + str(_e)); sys.exit(1)
1236
1237elif _op == "transpose":
1238    _out.append(_H2 + " Transpose")
1239    _out.append("")
1240    if _HAS_NP:
1241        _pp(_A.T)
1242    else:
1243        _pp([[_a[j][i] for j in range(len(_a))] for i in range(len(_a[0]))])
1244
1245elif _op == "multiply":
1246    if _b is None:
1247        print("ERROR: multiply requires both 'a' and 'b'"); sys.exit(1)
1248    _out.append(_H2 + " Matrix Product (A @ B)")
1249    _out.append("")
1250    if _HAS_NP:
1251        _pp(_A @ _B)
1252    else:
1253        _pp(_matmul_py(_a, _b))
1254
1255elif _op == "rank":
1256    if not _HAS_NP:
1257        print("ERROR: rank requires numpy (pip install numpy)"); sys.exit(1)
1258    _out.append(_H2 + " Matrix Rank")
1259    _out.append("")
1260    _out.append("rank(A) = %d" % _np.linalg.matrix_rank(_A))
1261    _out.append("Shape:   %dx%d" % (_A.shape[0], _A.shape[1]))
1262
1263elif _op == "svd":
1264    if not _HAS_NP:
1265        print("ERROR: SVD requires numpy (pip install numpy)"); sys.exit(1)
1266    _out.append(_H2 + " Singular Value Decomposition")
1267    _out.append("")
1268    _U, _S, _Vt = _np.linalg.svd(_A)
1269    _out.append("Singular values: [" + ", ".join("%.6g" % s for s in _S) + "]")
1270    _out.append("Rank (numerical): %d" % _np.linalg.matrix_rank(_A))
1271    _out.append("")
1272    _out.append("U (%dx%d):" % (_U.shape[0], _U.shape[1]))
1273    _pp(_U)
1274    _out.append("Vt (%dx%d):" % (_Vt.shape[0], _Vt.shape[1]))
1275    _pp(_Vt)
1276
1277else:
1278    print("ERROR: unknown operation '%s'. Supported: det, invert, eigenvalues, solve, transpose, multiply, rank, svd" % _op)
1279    sys.exit(1)
1280
1281print("\n".join(_out))
1282"####,
1283        operation = operation,
1284        a_json = a_json,
1285        b_json = b_json,
1286    );
1287
1288    let sandbox_args = serde_json::json!({
1289        "language": "python",
1290        "code": script,
1291        "timeout_seconds": 20
1292    });
1293    crate::tools::code_sandbox::execute(&sandbox_args).await
1294}
1295
1296// ─── Unit conversion ─────────────────────────────────────────────────────────
1297
1298// The unit table is a `const` so Python dict braces never touch format!().
1299const UNIT_TABLE_PY: &str = r####"
1300_U = {}
1301def _r(names, factor, cat):
1302    for n in names: _U[n] = (factor, cat)
1303
1304# Length (SI base: metre)
1305_r(['m','meter','meters','metre','metres'], 1.0, 'length')
1306_r(['km','kilometer','kilometers','kilometre','kilometres'], 1e3, 'length')
1307_r(['cm','centimeter','centimeters'], 1e-2, 'length')
1308_r(['mm','millimeter','millimeters'], 1e-3, 'length')
1309_r(['um','micrometer','micron','microns'], 1e-6, 'length')
1310_r(['nm','nanometer','nanometers'], 1e-9, 'length')
1311_r(['pm','picometer'], 1e-12, 'length')
1312_r(['in','inch','inches'], 0.0254, 'length')
1313_r(['ft','foot','feet'], 0.3048, 'length')
1314_r(['yd','yard','yards'], 0.9144, 'length')
1315_r(['mi','mile','miles'], 1609.344, 'length')
1316_r(['nmi','nautical_mile','nautical_miles'], 1852.0, 'length')
1317_r(['ly','lightyear','light_year','lightyears'], 9.4607304725808e15, 'length')
1318_r(['au','astronomical_unit'], 1.495978707e11, 'length')
1319_r(['pc','parsec','parsecs'], 3.085677581e16, 'length')
1320_r(['ang','angstrom'], 1e-10, 'length')
1321_r(['fathom','fathoms'], 1.8288, 'length')
1322# Mass (SI base: kilogram)
1323_r(['kg','kilogram','kilograms'], 1.0, 'mass')
1324_r(['g','gram','grams'], 1e-3, 'mass')
1325_r(['mg','milligram','milligrams'], 1e-6, 'mass')
1326_r(['ug','microgram','micrograms'], 1e-9, 'mass')
1327_r(['t','tonne','metric_ton','metric_tons'], 1e3, 'mass')
1328_r(['lb','lbs','pound','pounds'], 0.45359237, 'mass')
1329_r(['oz','ounce','ounces'], 0.028349523125, 'mass')
1330_r(['ton','short_ton'], 907.18474, 'mass')
1331_r(['long_ton'], 1016.0469088, 'mass')
1332_r(['stone','stones'], 6.35029318, 'mass')
1333_r(['slug','slugs'], 14.593903, 'mass')
1334_r(['carat','carats','ct'], 2e-4, 'mass')
1335# Time (SI base: second)
1336_r(['s','sec','second','seconds'], 1.0, 'time')
1337_r(['ms','millisecond','milliseconds'], 1e-3, 'time')
1338_r(['us','microsecond','microseconds'], 1e-6, 'time')
1339_r(['ns','nanosecond','nanoseconds'], 1e-9, 'time')
1340_r(['min','minute','minutes'], 60.0, 'time')
1341_r(['h','hr','hour','hours'], 3600.0, 'time')
1342_r(['d','day','days'], 86400.0, 'time')
1343_r(['wk','week','weeks'], 604800.0, 'time')
1344_r(['month','months'], 2629746.0, 'time')
1345_r(['yr','year','years'], 31556952.0, 'time')
1346_r(['decade','decades'], 315569520.0, 'time')
1347_r(['century','centuries'], 3155695200.0, 'time')
1348# Speed (SI base: m/s)
1349_r(['m/s','mps','meters_per_second'], 1.0, 'speed')
1350_r(['km/h','kph','kmh','kilometers_per_hour'], 1.0/3.6, 'speed')
1351_r(['mph','miles_per_hour'], 0.44704, 'speed')
1352_r(['knot','knots','kn'], 0.514444, 'speed')
1353_r(['ft/s','fps','feet_per_second'], 0.3048, 'speed')
1354_r(['mach'], 340.29, 'speed')
1355_r(['c_speed','speed_of_light'], 299792458.0, 'speed')
1356# Energy (SI base: joule)
1357_r(['j','joule','joules'], 1.0, 'energy')
1358_r(['kj','kilojoule','kilojoules'], 1e3, 'energy')
1359_r(['mj','megajoule','megajoules'], 1e6, 'energy')
1360_r(['gj','gigajoule','gigajoules'], 1e9, 'energy')
1361_r(['cal','calorie','calories'], 4.184, 'energy')
1362_r(['kcal','kilocalorie','kilocalories','cal_food'], 4184.0, 'energy')
1363_r(['kwh','kw*h','kilowatt_hour','kilowatt_hours'], 3.6e6, 'energy')
1364_r(['mwh','megawatt_hour'], 3.6e9, 'energy')
1365_r(['ev','electronvolt','electronvolts'], 1.602176634e-19, 'energy')
1366_r(['btu','british_thermal_unit'], 1055.06, 'energy')
1367_r(['erg','ergs'], 1e-7, 'energy')
1368_r(['therm'], 1.05506e8, 'energy')
1369# Power (SI base: watt)
1370_r(['w','watt','watts'], 1.0, 'power')
1371_r(['kw','kilowatt','kilowatts'], 1e3, 'power')
1372_r(['mw','megawatt','megawatts'], 1e6, 'power')
1373_r(['gw','gigawatt','gigawatts'], 1e9, 'power')
1374_r(['hp','horsepower'], 745.69987, 'power')
1375_r(['ps','metric_horsepower'], 735.49875, 'power')
1376_r(['btu/h','btu_per_hour'], 0.293071, 'power')
1377# Pressure (SI base: pascal)
1378_r(['pa','pascal','pascals'], 1.0, 'pressure')
1379_r(['kpa','kilopascal','kilopascals'], 1e3, 'pressure')
1380_r(['mpa','megapascal','megapascals'], 1e6, 'pressure')
1381_r(['gpa','gigapascal','gigapascals'], 1e9, 'pressure')
1382_r(['atm','atmosphere','atmospheres'], 101325.0, 'pressure')
1383_r(['bar','bars'], 1e5, 'pressure')
1384_r(['mbar','millibar','millibars'], 100.0, 'pressure')
1385_r(['psi','pounds_per_square_inch'], 6894.757, 'pressure')
1386_r(['mmhg','torr'], 133.322, 'pressure')
1387_r(['inhg','inches_of_mercury'], 3386.39, 'pressure')
1388_r(['atm_tech','at','technical_atmosphere'], 98066.5, 'pressure')
1389# Temperature — special (handled separately, marker category)
1390_r(['c','celsius','degc','deg_c'], ('temp', 'C'), 'temperature')
1391_r(['f','fahrenheit','degf','deg_f'], ('temp', 'F'), 'temperature')
1392_r(['k','kelvin','degk','deg_k'], ('temp', 'K'), 'temperature')
1393_r(['r','rankine','degr','deg_r'], ('temp', 'R'), 'temperature')
1394# Volume (SI base: litre)
1395_r(['l','liter','liters','litre','litres'], 1.0, 'volume')
1396_r(['ml','milliliter','milliliters'], 1e-3, 'volume')
1397_r(['cl','centiliter','centiliters'], 1e-2, 'volume')
1398_r(['dl','deciliter','deciliters'], 0.1, 'volume')
1399_r(['ul','microliter','microliters'], 1e-6, 'volume')
1400_r(['m3','cubic_meter','cubic_meters'], 1e3, 'volume')
1401_r(['cm3','cc','cubic_centimeter'], 1e-3, 'volume')
1402_r(['mm3','cubic_millimeter'], 1e-6, 'volume')
1403_r(['gal','gallon','gallons','us_gal'], 3.785411784, 'volume')
1404_r(['qt','quart','quarts'], 0.946352946, 'volume')
1405_r(['pt','pint','pints'], 0.473176473, 'volume')
1406_r(['cup','cups'], 0.2365882365, 'volume')
1407_r(['fl_oz','fluid_ounce','fluid_ounces'], 0.0295735296, 'volume')
1408_r(['tsp','teaspoon','teaspoons'], 0.00492892, 'volume')
1409_r(['tbsp','tablespoon','tablespoons'], 0.01478676, 'volume')
1410_r(['imp_gal','imperial_gallon','imperial_gallons'], 4.54609, 'volume')
1411_r(['barrel','bbl'], 158.9873, 'volume')
1412# Area (SI base: square metre)
1413_r(['m2','sq_m','square_meter','square_meters'], 1.0, 'area')
1414_r(['km2','square_kilometer','square_kilometers'], 1e6, 'area')
1415_r(['cm2','square_centimeter'], 1e-4, 'area')
1416_r(['mm2','square_millimeter'], 1e-6, 'area')
1417_r(['ft2','sq_ft','square_foot','square_feet'], 0.09290304, 'area')
1418_r(['in2','sq_in','square_inch','square_inches'], 6.4516e-4, 'area')
1419_r(['yd2','sq_yd','square_yard','square_yards'], 0.83612736, 'area')
1420_r(['mi2','square_mile','square_miles'], 2589988.11, 'area')
1421_r(['acre','acres'], 4046.8564224, 'area')
1422_r(['ha','hectare','hectares'], 1e4, 'area')
1423# Digital storage (SI base: byte)
1424_r(['bit','bits'], 0.125, 'digital')
1425_r(['b','byte','bytes'], 1.0, 'digital')
1426_r(['kb','kilobyte','kilobytes'], 1e3, 'digital')
1427_r(['mb','megabyte','megabytes'], 1e6, 'digital')
1428_r(['gb','gigabyte','gigabytes'], 1e9, 'digital')
1429_r(['tb','terabyte','terabytes'], 1e12, 'digital')
1430_r(['pb','petabyte','petabytes'], 1e15, 'digital')
1431_r(['kib','kibibyte','kibibytes'], 1024.0, 'digital')
1432_r(['mib','mebibyte','mebibytes'], 1048576.0, 'digital')
1433_r(['gib','gibibyte','gibibytes'], 1073741824.0, 'digital')
1434_r(['tib','tebibyte','tebibytes'], 1099511627776.0, 'digital')
1435# Force (SI base: newton)
1436_r(['n','newton','newtons'], 1.0, 'force')
1437_r(['kn','kilonewton','kilonewtons'], 1e3, 'force')
1438_r(['mn_force','meganewton'], 1e6, 'force')
1439_r(['lbf','pound_force','pounds_force'], 4.44822, 'force')
1440_r(['kgf','kilogram_force'], 9.80665, 'force')
1441_r(['dyn','dyne','dynes'], 1e-5, 'force')
1442# Frequency (SI base: Hz)
1443_r(['hz','hertz'], 1.0, 'frequency')
1444_r(['khz','kilohertz'], 1e3, 'frequency')
1445_r(['mhz','megahertz'], 1e6, 'frequency')
1446_r(['ghz','gigahertz'], 1e9, 'frequency')
1447_r(['thz','terahertz'], 1e12, 'frequency')
1448_r(['rpm','rev_per_min','revolutions_per_minute'], 1.0/60, 'frequency')
1449# Angle (SI base: radian)
1450_r(['rad','radian','radians'], 1.0, 'angle')
1451_r(['deg','degree','degrees'], 3.14159265358979/180, 'angle')
1452_r(['grad','gradian','gradians'], 3.14159265358979/200, 'angle')
1453_r(['arcmin','arcminute','arcminutes'], 3.14159265358979/10800, 'angle')
1454_r(['arcsec','arcsecond','arcseconds'], 3.14159265358979/648000, 'angle')
1455_r(['rev','revolution','revolutions','turn','turns'], 2*3.14159265358979, 'angle')
1456
1457def _to_celsius(v, scale):
1458    if scale=='C': return v
1459    if scale=='F': return (v-32)*5/9
1460    if scale=='K': return v-273.15
1461    if scale=='R': return (v-491.67)*5/9
1462    return None
1463
1464def _from_celsius(c, scale):
1465    if scale=='C': return c
1466    if scale=='F': return c*9/5+32
1467    if scale=='K': return c+273.15
1468    if scale=='R': return (c+273.15)*9/5
1469    return None
1470
1471def _convert(val, from_u, to_u):
1472    _fk = from_u.lower().strip().replace(' ','_').replace('/','/')
1473    _tk = to_u.lower().strip().replace(' ','_').replace('/','/')
1474    _fi = _U.get(_fk)
1475    _ti = _U.get(_tk)
1476    if _fi is None: return None, "Unknown unit: " + from_u
1477    if _ti is None: return None, "Unknown unit: " + to_u
1478    if _fi[1] == 'temperature' or _ti[1] == 'temperature':
1479        if _fi[1] != 'temperature' or _ti[1] != 'temperature':
1480            return None, "Cannot mix temperature and non-temperature units"
1481        _c = _to_celsius(val, _fi[0][1])
1482        return _from_celsius(_c, _ti[0][1]), None
1483    if _fi[1] != _ti[1]:
1484        return None, "Dimension mismatch: %s (%s) vs %s (%s)" % (from_u, _fi[1], to_u, _ti[1])
1485    return val * _fi[0] / _ti[0], None
1486"####;
1487
1488pub async fn convert_units(expr: &str) -> Result<String, String> {
1489    if expr.trim().is_empty() {
1490        return Err("No expression provided.".into());
1491    }
1492    let safe_expr = expr.replace('\\', "\\\\").replace('"', "\\\"");
1493
1494    let script = format!(
1495        r####"{unit_table}
1496import re as _re, sys, math
1497
1498_raw  = "{safe_expr}"
1499_expr = _raw.strip()
1500
1501# ── Number base conversion (prefix check) ────────────────────────────
1502_bm = _re.match(
1503    r'^(0x[0-9a-fA-F]+|0b[01]+|0o[0-7]+|\d+)\s+to\s+(hex(?:adecimal)?|dec(?:imal)?|bin(?:ary)?|oct(?:al)?)\s*$',
1504    _expr, _re.I)
1505if _bm:
1506    _bv, _bt = _bm.group(1), _bm.group(2).lower()
1507    try:
1508        _n = int(_bv, 0)
1509        if   _bt.startswith('hex'): _out = hex(_n)
1510        elif _bt.startswith('bin'): _out = bin(_n)
1511        elif _bt.startswith('oct'): _out = oct(_n)
1512        else:                        _out = str(_n)
1513        print("%s  =  %s" % (_bv, _out))
1514    except ValueError as _e:
1515        print("Error: " + str(_e)); sys.exit(1)
1516    sys.exit(0)
1517
1518_m = _re.match(
1519    r'^([\d.,eE+\-]+)\s+(.+?)\s+(?:to|->|=|in)\s+(.+)$', _expr, _re.I)
1520if not _m:
1521    print("Format: VALUE UNIT to UNIT")
1522    print("Examples:  100 mph to km/h  |  72 F to C  |  1 lightyear to km  |  5 kg to lbs")
1523    sys.exit(1)
1524
1525_val   = float(_m.group(1).replace(',',''))
1526_from  = _m.group(2).strip()
1527_to    = _m.group(3).strip()
1528
1529_result, _err = _convert(_val, _from, _to)
1530if _err:
1531    print("Error: " + _err)
1532    sys.exit(1)
1533
1534def _fmtv(v):
1535    if v == 0: return "0"
1536    if abs(v) >= 1e12 or (abs(v) < 1e-4 and abs(v) > 0):
1537        return "%.6e" % v
1538    if v == int(v) and abs(v) < 1e15: return str(int(v))
1539    return "%.10g" % v
1540
1541print("%s %s  =  %s %s" % (_fmtv(_val), _from, _fmtv(_result), _to))
1542"####,
1543        unit_table = UNIT_TABLE_PY,
1544        safe_expr = safe_expr,
1545    );
1546
1547    let sandbox_args = serde_json::json!({
1548        "language": "python",
1549        "code": script,
1550        "timeout_seconds": 15
1551    });
1552    crate::tools::code_sandbox::execute(&sandbox_args).await
1553}
1554
1555// ─── Data visualization ───────────────────────────────────────────────────────
1556
1557pub async fn plot_dataset(
1558    path_str: &str,
1559    plot_type: &str,
1560    x_col: &str,
1561    y_col: &str,
1562    out_path: &str,
1563) -> Result<String, String> {
1564    let safe_path = path_str.replace('\\', "\\\\").replace('"', "\\\"");
1565    let safe_out = out_path.replace('\\', "\\\\").replace('"', "\\\"");
1566    let safe_x = x_col.replace('"', "\\\"");
1567    let safe_y = y_col.replace('"', "\\\"");
1568
1569    let script = format!(
1570        r####"import os, sys, csv as _csv, sqlite3 as _sql3
1571
1572os.environ['MPLBACKEND']   = 'Agg'
1573os.environ['MPLCONFIGDIR'] = os.environ.get('TEMP', os.environ.get('TMP', '/tmp')) + '/hematite_mpl'
1574
1575_path      = "{safe_path}"
1576_out_path  = "{safe_out}"
1577_plot_type = "{plot_type}"
1578_x_col     = "{safe_x}"
1579_y_col     = "{safe_y}"
1580_ext       = os.path.splitext(_path)[1].lower().lstrip('.')
1581_data      = []
1582
1583if _ext in ('csv', 'tsv'):
1584    _delim = '\t' if _ext == 'tsv' else ','
1585    with open(_path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
1586        _rdr = _csv.DictReader(_fh, delimiter=_delim)
1587        for _i, _r in enumerate(_rdr):
1588            if _i >= 10000: break
1589            _data.append(_r)
1590elif _ext == 'json':
1591    with open(_path, encoding='utf-8') as _fh:
1592        _raw2 = json.load(_fh)
1593    _data = _raw2[:10000] if isinstance(_raw2, list) else list(_raw2.values())[0][:10000] if isinstance(_raw2, dict) else []
1594elif _ext in ('db','sqlite','sqlite3'):
1595    with _sql3.connect(_path) as _con:
1596        _cur = _con.cursor()
1597        _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
1598        _t = _cur.fetchone()
1599        if _t:
1600            _cur.execute("SELECT * FROM [%s] LIMIT 10000" % _t[0])
1601            _cs = [_d[0] for _d in _cur.description]
1602            _data = [dict(zip(_cs, _r)) for _r in _cur.fetchall()]
1603else:
1604    print("ERROR: unsupported format"); sys.exit(1)
1605
1606if not _data:
1607    print("No data found."); sys.exit(1)
1608
1609_cols = list(_data[0].keys())
1610
1611def _tryf(v):
1612    try: return float(str(v or '').replace(',','').strip())
1613    except: return None
1614
1615_num_cols = []
1616for _c in _cols:
1617    _s = [_tryf(_r.get(_c)) for _r in _data[:200]]
1618    if sum(1 for x in _s if x is not None) >= len(_s)*0.8: _num_cols.append(_c)
1619
1620_x_col2 = _x_col or (_num_cols[0] if _num_cols else _cols[0])
1621_y_col2 = _y_col or (_num_cols[1] if len(_num_cols) > 1 else None)
1622
1623_x_vals = [_tryf(_r.get(_x_col2)) for _r in _data]
1624_x_vals = [v for v in _x_vals if v is not None]
1625_y_vals = []
1626if _y_col2:
1627    _y_vals = [_tryf(_r.get(_y_col2)) for _r in _data]
1628    _y_vals = [v for v in _y_vals if v is not None]
1629
1630_title = os.path.basename(_path)
1631if _y_col2:
1632    _sub = "%s vs %s" % (_x_col2, _y_col2)
1633else:
1634    _sub = _x_col2
1635
1636# ── Attempt matplotlib ────────────────────────────────────────────────
1637_used_mpl = False
1638_svg_str   = ""
1639try:
1640    import matplotlib
1641    matplotlib.use('Agg')
1642    import matplotlib.pyplot as _plt
1643    _fig, _ax = _plt.subplots(figsize=(8, 5))
1644    _fig.patch.set_facecolor('#0d0d1a')
1645    _ax.set_facecolor('#16213e')
1646    for _sp in _ax.spines.values(): _sp.set_color('#444')
1647    _ax.tick_params(colors='#999', labelsize=9)
1648    _ax.xaxis.label.set_color('#bbb')
1649    _ax.yaxis.label.set_color('#bbb')
1650    _ax.title.set_color('#7fc3ff')
1651    _C = '#4a9eff'
1652    if _plot_type == 'histogram':
1653        _ax.hist(_x_vals, bins=min(40, max(10, int(len(_x_vals)**0.5)+1)),
1654                 color=_C, alpha=0.85, edgecolor='#0d0d1a')
1655        _ax.set_xlabel(_x_col2); _ax.set_ylabel('Count')
1656        _ax.set_title('Histogram — ' + _x_col2)
1657    elif _plot_type in ('scatter',''):
1658        _nx = min(len(_x_vals), len(_y_vals))
1659        _ax.scatter(_x_vals[:_nx], _y_vals[:_nx], color=_C, alpha=0.6, s=15)
1660        _ax.set_xlabel(_x_col2); _ax.set_ylabel(_y_col2 or '')
1661        _ax.set_title('Scatter — ' + _sub)
1662    elif _plot_type == 'line':
1663        _pairs = sorted(zip(_x_vals, _y_vals))
1664        _ax.plot([p[0] for p in _pairs], [p[1] for p in _pairs], color=_C, lw=1.5)
1665        _ax.set_xlabel(_x_col2); _ax.set_ylabel(_y_col2 or '')
1666        _ax.set_title('Line — ' + _sub)
1667    elif _plot_type == 'bar':
1668        from collections import Counter as _Ctr
1669        _raw_x = [str(_r.get(_x_col2, '') or '').strip() for _r in _data if _r.get(_x_col2)]
1670        _ct = _Ctr(_raw_x)
1671        _lbls = [k for k, _ in _ct.most_common(20)]
1672        _vals2 = [_ct[k] for k in _lbls]
1673        _ax.bar(range(len(_lbls)), _vals2, color=_C, alpha=0.85)
1674        _ax.set_xticks(list(range(len(_lbls))))
1675        _ax.set_xticklabels(_lbls, rotation=40, ha='right', fontsize=8)
1676        _ax.set_title('Bar — ' + _x_col2)
1677    from io import StringIO as _SIO
1678    _buf = _SIO()
1679    _fig.tight_layout(pad=1.2)
1680    _fig.savefig(_buf, format='svg', bbox_inches='tight', facecolor=_fig.get_facecolor())
1681    _plt.close(_fig)
1682    _sv = _buf.getvalue()
1683    _svg_str = _sv[_sv.find('<svg'):]
1684    _used_mpl = True
1685except Exception:
1686    pass
1687
1688# ── Pure-Python SVG fallback ──────────────────────────────────────────
1689if not _used_mpl:
1690    def _hist_svg(vals, lbl, W=640, H=380):
1691        if not vals: return ""
1692        mn, mx = min(vals), max(vals)
1693        if mn == mx: mn -= 0.5; mx += 0.5
1694        nb = min(30, max(8, int(len(vals)**0.5)+1))
1695        bw2 = (mx-mn)/nb
1696        bins = [0]*nb
1697        for v in vals:
1698            i = min(int((v-mn)/bw2), nb-1)
1699            bins[i] += 1
1700        mc = max(bins) or 1
1701        P=50; PW=W-2*P; PH=H-2*P
1702        rects = ''.join(
1703            '<rect x="%.1f" y="%.1f" width="%.1f" height="%.1f" fill="#4a9eff" opacity=".82"/>' %
1704            (P+i*PW/nb, P+PH-bins[i]/mc*PH, max(PW/nb-1,1), bins[i]/mc*PH)
1705            for i in range(nb))
1706        xt = ''.join('<text x="%.1f" y="%d" text-anchor="middle" font-size="10" fill="#888">%.3g</text>' %
1707                     (P+k*PW/4, H-8, mn+(mx-mn)*k/4) for k in range(5))
1708        yt = ''.join('<text x="%d" y="%.1f" text-anchor="end" font-size="10" fill="#888">%d</text>' %
1709                     (P-4, P+PH-k*PH/4+4, int(mc*k/4)) for k in range(5))
1710        axs = '<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/><line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/>'%(P,P,P,P+PH,P,P+PH,P+PW,P+PH)
1711        ttl = '<text x="%d" y="22" text-anchor="middle" font-size="13" fill="#7fc3ff" font-weight="bold">Histogram — %s</text>'%(W//2,lbl[:50])
1712        return '<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" style="background:#16213e">%s%s%s%s%s</svg>'%(W,H,ttl,axs,rects,xt,yt)
1713
1714    def _scatter_svg(xs, ys, xl, yl, W=640, H=400):
1715        if not xs or not ys: return ""
1716        xmn,xmx=min(xs),max(xs); ymn,ymx=min(ys),max(ys)
1717        if xmn==xmx: xmn-=1;xmx+=1
1718        if ymn==ymx: ymn-=1;ymx+=1
1719        P=60; PW=W-2*P; PH=H-2*P
1720        def xp(v): return P+(v-xmn)/(xmx-xmn)*PW
1721        def yp(v): return P+PH-(v-ymn)/(ymx-ymn)*PH
1722        dots=''.join('<circle cx="%.1f" cy="%.1f" r="3" fill="#4a9eff" opacity=".65"/>'%(xp(x),yp(y)) for x,y in zip(xs[:3000],ys[:3000]))
1723        axs='<line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/><line x1="%d" y1="%d" x2="%d" y2="%d" stroke="#444"/>'%(P,P,P,P+PH,P,P+PH,P+PW,P+PH)
1724        xt=''.join('<text x="%.1f" y="%d" text-anchor="middle" font-size="10" fill="#888">%.3g</text>'%(P+k*PW/4,P+PH+16,xmn+(xmx-xmn)*k/4) for k in range(5))
1725        yt=''.join('<text x="%d" y="%.1f" text-anchor="end" font-size="10" fill="#888">%.3g</text>'%(P-4,P+PH-k*PH/4+4,ymn+(ymx-ymn)*k/4) for k in range(5))
1726        xl2='<text x="%d" y="%d" text-anchor="middle" font-size="11" fill="#bbb">%s</text>'%(W//2,H-2,xl[:40])
1727        ttl='<text x="%d" y="20" text-anchor="middle" font-size="13" fill="#7fc3ff" font-weight="bold">Scatter — %s vs %s</text>'%(W//2,xl[:25],yl[:25])
1728        return '<svg xmlns="http://www.w3.org/2000/svg" width="%d" height="%d" style="background:#16213e">%s%s%s%s%s%s</svg>'%(W,H,ttl,axs,dots,xt,yt,xl2)
1729
1730    if _plot_type in ('scatter','line') and _x_vals and _y_vals:
1731        _nx = min(len(_x_vals), len(_y_vals))
1732        _svg_str = _scatter_svg(_x_vals[:_nx], _y_vals[:_nx], _x_col2, _y_col2 or '')
1733    else:
1734        _svg_str = _hist_svg(_x_vals, _x_col2)
1735
1736# ── Write HTML ────────────────────────────────────────────────────────
1737_engine = "matplotlib" if _used_mpl else "pure-Python SVG"
1738_html = (
1739    "<!DOCTYPE html><html><head><meta charset='utf-8'><title>" + _title + "</title>"
1740    "<style>body{{background:#0d0d1a;color:#e0e0e0;font-family:monospace;padding:24px;margin:0}}"
1741    "h2{{color:#7fc3ff;margin-bottom:4px}}p{{color:#666;font-size:.85em;margin:0 0 20px}}"
1742    ".chart{{display:block;margin:0 auto;max-width:700px}}</style></head><body>"
1743    "<h2>" + _title + " &mdash; " + _sub + "</h2>"
1744    "<p>Generated by Hematite &middot; engine: " + _engine + " &middot; n=" + str(len(_x_vals)) + " rows</p>"
1745    "<div class='chart'>" + _svg_str + "</div>"
1746    "</body></html>"
1747)
1748os.makedirs(os.path.dirname(_out_path), exist_ok=True)
1749with open(_out_path, 'w', encoding='utf-8') as _f:
1750    _f.write(_html)
1751print(_out_path)
1752"####,
1753        safe_path = safe_path,
1754        safe_out = safe_out,
1755        plot_type = plot_type,
1756        safe_x = safe_x,
1757        safe_y = safe_y,
1758    );
1759
1760    let sandbox_args = serde_json::json!({
1761        "language": "python",
1762        "code": script,
1763        "timeout_seconds": 30
1764    });
1765    crate::tools::code_sandbox::execute(&sandbox_args).await
1766}
1767
1768// ─── SQL-on-local-files ────────────────────────────────────────────────────────
1769
1770pub async fn query_data(file_path: &str, sql: &str) -> Result<String, String> {
1771    if file_path.trim().is_empty() {
1772        return Err("No data file specified.".into());
1773    }
1774    if sql.trim().is_empty() {
1775        return Err("No SQL query specified.".into());
1776    }
1777    let safe_path = file_path.replace('\\', "\\\\").replace('"', "\\\"");
1778    // Hex-encode the SQL to eliminate all escaping concerns.
1779    let sql_hex: String = sql.bytes().map(|b| format!("{:02x}", b)).collect();
1780
1781    let script = format!(
1782        r####"import sqlite3 as _sq, csv as _csv, json as _js, sys, os
1783
1784_path = "{safe_path}"
1785_sql  = bytes.fromhex("{sql_hex}").decode()
1786_ext  = os.path.splitext(_path)[1].lower().lstrip('.')
1787_con  = _sq.connect(':memory:')
1788
1789def _load_csv(path, delim):
1790    with open(path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
1791        _rdr = _csv.DictReader(_fh, delimiter=delim)
1792        _rows = list(_rdr)
1793    if not _rows:
1794        print("No data in file."); sys.exit(1)
1795    _cols = list(_rows[0].keys())
1796    _con.execute('CREATE TABLE data (' + ', '.join('"' + c + '"' for c in _cols) + ')')
1797    _con.executemany(
1798        'INSERT INTO data VALUES (' + ','.join(['?'] * len(_cols)) + ')',
1799        [tuple(_r.get(c, '') for c in _cols) for _r in _rows])
1800
1801def _load_json(path):
1802    with open(path, encoding='utf-8') as _fh:
1803        _d = _js.load(_fh)
1804    _rows = _d if isinstance(_d, list) else next(iter(_d.values()), []) if isinstance(_d, dict) else []
1805    if not _rows:
1806        print("No rows found in JSON."); sys.exit(1)
1807    _cols = list(_rows[0].keys()) if isinstance(_rows[0], dict) else [str(i) for i in range(len(_rows[0]))]
1808    _con.execute('CREATE TABLE data (' + ', '.join('"' + c + '"' for c in _cols) + ')')
1809    _con.executemany(
1810        'INSERT INTO data VALUES (' + ','.join(['?'] * len(_cols)) + ')',
1811        [tuple(str(_r.get(c, '') if isinstance(_r, dict) else _r[i]) for i, c in enumerate(_cols)) for _r in _rows])
1812
1813try:
1814    if _ext == 'csv':                      _load_csv(_path, ',')
1815    elif _ext == 'tsv':                    _load_csv(_path, '\t')
1816    elif _ext == 'json':                   _load_json(_path)
1817    elif _ext in ('db','sqlite','sqlite3'):
1818        _src = _sq.connect(_path); _src.backup(_con); _src.close()
1819    else:
1820        print("Unsupported format: " + _ext + ". Use csv, tsv, json, or sqlite.")
1821        sys.exit(1)
1822except Exception as _e:
1823    print("Load error: " + str(_e), file=sys.stderr); sys.exit(1)
1824
1825try:
1826    _cur = _con.execute(_sql)
1827except Exception as _e:
1828    print("Query error: " + str(_e), file=sys.stderr); sys.exit(1)
1829
1830_hdrs  = [_d[0] for _d in _cur.description] if _cur.description else []
1831_rows2 = _cur.fetchall()
1832_con.close()
1833
1834if not _rows2:
1835    print("(no rows returned)")
1836    sys.exit(0)
1837
1838_rs = [[str(c) if c is not None else 'NULL' for c in _r] for _r in _rows2[:2000]]
1839_ws = [max(len(_h), max((len(_r[_i]) for _r in _rs), default=0))
1840       for _i, _h in enumerate(_hdrs)]
1841_sep = '+-' + '-+-'.join('-' * _w for _w in _ws) + '-+'
1842_hr  = '| ' + ' | '.join(_h.ljust(_ws[_i]) for _i, _h in enumerate(_hdrs)) + ' |'
1843print(_sep)
1844print(_hr)
1845print(_sep)
1846for _r in _rs:
1847    print('| ' + ' | '.join(_r[_i].ljust(_ws[_i]) for _i in range(len(_hdrs))) + ' |')
1848print(_sep)
1849_total = len(_rows2)
1850_label = str(_total) + (' rows' if _total != 1 else ' row')
1851if _total > 2000: _label += ' (showing first 2000)'
1852print('(' + _label + ')')
1853"####,
1854        safe_path = safe_path,
1855        sql_hex = sql_hex,
1856    );
1857
1858    let sandbox_args = serde_json::json!({
1859        "language": "python",
1860        "code": script,
1861        "timeout_seconds": 30
1862    });
1863    crate::tools::code_sandbox::execute(&sandbox_args).await
1864}
1865
1866// ─── Periodic table ───────────────────────────────────────────────────────────
1867// Data columns: Z|symbol|name|mass|category|period|group|electronegativity|state
1868// category: nonmetal/halogen/noble/alkali/alkaline/transition/post-trans/metalloid/lanthanide/actinide
1869// state: S=solid  L=liquid  G=gas
1870// en: 0 = not applicable / unknown on Pauling scale
1871
1872const ELEMENTS_DATA: &str = r#"1|H|Hydrogen|1.008|nonmetal|1|1|2.20|G
18732|He|Helium|4.003|noble|1|18|0|G
18743|Li|Lithium|6.941|alkali|2|1|0.98|S
18754|Be|Beryllium|9.012|alkaline|2|2|1.57|S
18765|B|Boron|10.811|metalloid|2|13|2.04|S
18776|C|Carbon|12.011|nonmetal|2|14|2.55|S
18787|N|Nitrogen|14.007|nonmetal|2|15|3.04|G
18798|O|Oxygen|15.999|nonmetal|2|16|3.44|G
18809|F|Fluorine|18.998|halogen|2|17|3.98|G
188110|Ne|Neon|20.180|noble|2|18|0|G
188211|Na|Sodium|22.990|alkali|3|1|0.93|S
188312|Mg|Magnesium|24.305|alkaline|3|2|1.31|S
188413|Al|Aluminium|26.982|post-trans|3|13|1.61|S
188514|Si|Silicon|28.085|metalloid|3|14|1.90|S
188615|P|Phosphorus|30.974|nonmetal|3|15|2.19|S
188716|S|Sulfur|32.06|nonmetal|3|16|2.58|S
188817|Cl|Chlorine|35.45|halogen|3|17|3.16|G
188918|Ar|Argon|39.948|noble|3|18|0|G
189019|K|Potassium|39.098|alkali|4|1|0.82|S
189120|Ca|Calcium|40.078|alkaline|4|2|1.00|S
189221|Sc|Scandium|44.956|transition|4|3|1.36|S
189322|Ti|Titanium|47.867|transition|4|4|1.54|S
189423|V|Vanadium|50.942|transition|4|5|1.63|S
189524|Cr|Chromium|51.996|transition|4|6|1.66|S
189625|Mn|Manganese|54.938|transition|4|7|1.55|S
189726|Fe|Iron|55.845|transition|4|8|1.83|S
189827|Co|Cobalt|58.933|transition|4|9|1.88|S
189928|Ni|Nickel|58.693|transition|4|10|1.91|S
190029|Cu|Copper|63.546|transition|4|11|1.90|S
190130|Zn|Zinc|65.38|transition|4|12|1.65|S
190231|Ga|Gallium|69.723|post-trans|4|13|1.81|S
190332|Ge|Germanium|72.630|metalloid|4|14|2.01|S
190433|As|Arsenic|74.922|metalloid|4|15|2.18|S
190534|Se|Selenium|78.971|nonmetal|4|16|2.55|S
190635|Br|Bromine|79.904|halogen|4|17|2.96|L
190736|Kr|Krypton|83.798|noble|4|18|3.00|G
190837|Rb|Rubidium|85.468|alkali|5|1|0.82|S
190938|Sr|Strontium|87.62|alkaline|5|2|0.95|S
191039|Y|Yttrium|88.906|transition|5|3|1.22|S
191140|Zr|Zirconium|91.224|transition|5|4|1.33|S
191241|Nb|Niobium|92.906|transition|5|5|1.60|S
191342|Mo|Molybdenum|95.96|transition|5|6|2.16|S
191443|Tc|Technetium|98|transition|5|7|1.90|S
191544|Ru|Ruthenium|101.07|transition|5|8|2.20|S
191645|Rh|Rhodium|102.906|transition|5|9|2.28|S
191746|Pd|Palladium|106.42|transition|5|10|2.20|S
191847|Ag|Silver|107.868|transition|5|11|1.93|S
191948|Cd|Cadmium|112.414|transition|5|12|1.69|S
192049|In|Indium|114.818|post-trans|5|13|1.78|S
192150|Sn|Tin|118.710|post-trans|5|14|1.96|S
192251|Sb|Antimony|121.760|metalloid|5|15|2.05|S
192352|Te|Tellurium|127.60|metalloid|5|16|2.10|S
192453|I|Iodine|126.904|halogen|5|17|2.66|S
192554|Xe|Xenon|131.293|noble|5|18|2.60|G
192655|Cs|Caesium|132.905|alkali|6|1|0.79|S
192756|Ba|Barium|137.327|alkaline|6|2|0.89|S
192857|La|Lanthanum|138.905|lanthanide|6|0|1.10|S
192958|Ce|Cerium|140.116|lanthanide|6|0|1.12|S
193059|Pr|Praseodymium|140.908|lanthanide|6|0|1.13|S
193160|Nd|Neodymium|144.242|lanthanide|6|0|1.14|S
193261|Pm|Promethium|145|lanthanide|6|0|0|S
193362|Sm|Samarium|150.36|lanthanide|6|0|1.17|S
193463|Eu|Europium|151.964|lanthanide|6|0|0|S
193564|Gd|Gadolinium|157.25|lanthanide|6|0|1.20|S
193665|Tb|Terbium|158.925|lanthanide|6|0|0|S
193766|Dy|Dysprosium|162.500|lanthanide|6|0|1.22|S
193867|Ho|Holmium|164.930|lanthanide|6|0|1.23|S
193968|Er|Erbium|167.259|lanthanide|6|0|1.24|S
194069|Tm|Thulium|168.934|lanthanide|6|0|1.25|S
194170|Yb|Ytterbium|173.054|lanthanide|6|0|0|S
194271|Lu|Lutetium|174.967|lanthanide|6|0|1.27|S
194372|Hf|Hafnium|178.49|transition|6|4|1.30|S
194473|Ta|Tantalum|180.948|transition|6|5|1.50|S
194574|W|Tungsten|183.84|transition|6|6|2.36|S
194675|Re|Rhenium|186.207|transition|6|7|1.90|S
194776|Os|Osmium|190.23|transition|6|8|2.20|S
194877|Ir|Iridium|192.217|transition|6|9|2.20|S
194978|Pt|Platinum|195.084|transition|6|10|2.28|S
195079|Au|Gold|196.967|transition|6|11|2.54|S
195180|Hg|Mercury|200.592|transition|6|12|2.00|L
195281|Tl|Thallium|204.38|post-trans|6|13|1.62|S
195382|Pb|Lead|207.2|post-trans|6|14|2.33|S
195483|Bi|Bismuth|208.980|post-trans|6|15|2.02|S
195584|Po|Polonium|209|metalloid|6|16|2.00|S
195685|At|Astatine|210|halogen|6|17|2.20|S
195786|Rn|Radon|222|noble|6|18|0|G
195887|Fr|Francium|223|alkali|7|1|0.70|S
195988|Ra|Radium|226|alkaline|7|2|0.90|S
196089|Ac|Actinium|227|actinide|7|0|1.10|S
196190|Th|Thorium|232.038|actinide|7|0|1.30|S
196291|Pa|Protactinium|231.036|actinide|7|0|1.50|S
196392|U|Uranium|238.029|actinide|7|0|1.38|S
196493|Np|Neptunium|237|actinide|7|0|1.36|S
196594|Pu|Plutonium|244|actinide|7|0|1.28|S
196695|Am|Americium|243|actinide|7|0|1.30|S
196796|Cm|Curium|247|actinide|7|0|1.30|S
196897|Bk|Berkelium|247|actinide|7|0|1.30|S
196998|Cf|Californium|251|actinide|7|0|1.30|S
197099|Es|Einsteinium|252|actinide|7|0|1.30|S
1971100|Fm|Fermium|257|actinide|7|0|1.30|S
1972101|Md|Mendelevium|258|actinide|7|0|1.30|S
1973102|No|Nobelium|259|actinide|7|0|1.30|S
1974103|Lr|Lawrencium|266|actinide|7|0|0|S
1975104|Rf|Rutherfordium|267|transition|7|4|0|S
1976105|Db|Dubnium|268|transition|7|5|0|S
1977106|Sg|Seaborgium|271|transition|7|6|0|S
1978107|Bh|Bohrium|272|transition|7|7|0|S
1979108|Hs|Hassium|270|transition|7|8|0|S
1980109|Mt|Meitnerium|276|transition|7|9|0|S
1981110|Ds|Darmstadtium|281|transition|7|10|0|S
1982111|Rg|Roentgenium|280|transition|7|11|0|S
1983112|Cn|Copernicium|285|transition|7|12|0|S
1984113|Nh|Nihonium|284|post-trans|7|13|0|S
1985114|Fl|Flerovium|289|post-trans|7|14|0|S
1986115|Mc|Moscovium|288|post-trans|7|15|0|S
1987116|Lv|Livermorium|293|post-trans|7|16|0|S
1988117|Ts|Tennessine|294|halogen|7|17|0|S
1989118|Og|Oganesson|294|noble|7|18|0|G"#;
1990
1991pub fn lookup_element(query: &str) -> Result<String, String> {
1992    let q = query.trim();
1993    if q.is_empty() {
1994        return Err(
1995            "No element specified. Try a symbol (H, Au), name (Gold), or atomic number (79)."
1996                .into(),
1997        );
1998    }
1999    let q_lower = q.to_ascii_lowercase();
2000    let q_num: Option<u32> = q.parse().ok();
2001
2002    for line in ELEMENTS_DATA.lines() {
2003        let f: Vec<&str> = line.splitn(9, '|').collect();
2004        if f.len() < 9 {
2005            continue;
2006        }
2007        let z: u32 = f[0].parse().unwrap_or(0);
2008        let sym = f[1];
2009        let name = f[2];
2010
2011        let matched = (q_num == Some(z))
2012            || sym.eq_ignore_ascii_case(q)
2013            || name.to_ascii_lowercase().starts_with(&q_lower);
2014        if !matched {
2015            continue;
2016        }
2017
2018        let mass_raw = f[3];
2019        let cat_raw = f[4];
2020        let period = f[5];
2021        let group = f[6];
2022        let en_raw = f[7];
2023        let state_raw = f[8];
2024
2025        let category = match cat_raw {
2026            "alkali" => "Alkali Metal",
2027            "alkaline" => "Alkaline Earth Metal",
2028            "transition" => "Transition Metal",
2029            "post-trans" => "Post-Transition Metal",
2030            "metalloid" => "Metalloid",
2031            "nonmetal" => "Nonmetal",
2032            "halogen" => "Halogen",
2033            "noble" => "Noble Gas",
2034            "lanthanide" => "Lanthanide",
2035            "actinide" => "Actinide",
2036            other => other,
2037        };
2038        let group_disp = if group == "0" {
2039            match cat_raw {
2040                "lanthanide" => "La series",
2041                "actinide" => "Ac series",
2042                _ => "\u{2014}",
2043            }
2044        } else {
2045            group
2046        };
2047        let en_disp = if en_raw == "0" {
2048            "N/A".to_string()
2049        } else {
2050            format!("{} (Pauling)", en_raw)
2051        };
2052        let state_disp = match state_raw {
2053            "S" => "Solid",
2054            "L" => "Liquid",
2055            "G" => "Gas",
2056            _ => "Unknown",
2057        };
2058        let mass_disp = if mass_raw.contains('.') {
2059            format!("{} u", mass_raw)
2060        } else {
2061            format!("{} u  (most stable isotope)", mass_raw)
2062        };
2063
2064        return Ok(format!(
2065            "{sym}  {name}  (Z = {z})\n\
2066             {sep}\n\
2067             Atomic Mass:         {mass_disp}\n\
2068             Category:            {category}\n\
2069             Period / Group:      {period} / {group_disp}\n\
2070             Electronegativity:   {en_disp}\n\
2071             State at STP:        {state_disp}",
2072            sep = "\u{2500}".repeat(42),
2073        ));
2074    }
2075
2076    Err(format!(
2077        "Element '{}' not found.\nTry: symbol (H, Au, Fe), name (Gold, Iron), or atomic number (79, 26).",
2078        q
2079    ))
2080}
2081
2082// ─── File / text hash ─────────────────────────────────────────────────────────
2083
2084pub async fn hash_input(input: &str, algo: &str) -> Result<String, String> {
2085    let safe_input = input.replace('\\', "\\\\").replace('"', "\\\"");
2086    let safe_algo = algo.trim().to_ascii_lowercase().replace('"', "");
2087
2088    let script = format!(
2089        r####"import hashlib, os, sys
2090
2091_target = "{safe_input}"
2092_algo   = "{safe_algo}"
2093
2094_is_file = os.path.isfile(_target)
2095if _is_file:
2096    with open(_target, 'rb') as _fh:
2097        _data = _fh.read()
2098    _sz = len(_data)
2099    if _sz >= 1_048_576:   _szlbl = "%.2f MB" % (_sz / 1_048_576)
2100    elif _sz >= 1024:      _szlbl = "%.1f KB" % (_sz / 1024)
2101    else:                  _szlbl = str(_sz) + " bytes"
2102    _label = "File: " + _target + "  (" + _szlbl + ")"
2103else:
2104    _data  = _target.encode('utf-8')
2105    _label = 'Text: "' + _target + '"'
2106
2107_algos = ['md5', 'sha1', 'sha256', 'sha512'] if _algo in ('all', '') else [_algo]
2108print(_label)
2109print()
2110for _a in _algos:
2111    try:
2112        _h = hashlib.new(_a)
2113        _h.update(_data)
2114        print(_a.upper().ljust(10) + _h.hexdigest())
2115    except ValueError as _e:
2116        print(_a + ": " + str(_e), file=sys.stderr); sys.exit(1)
2117"####,
2118        safe_input = safe_input,
2119        safe_algo = safe_algo,
2120    );
2121
2122    let sandbox_args = serde_json::json!({
2123        "language": "python",
2124        "code": script,
2125        "timeout_seconds": 30
2126    });
2127    crate::tools::code_sandbox::execute(&sandbox_args).await
2128}
2129
2130// ─── Encoding utilities ───────────────────────────────────────────────────────
2131
2132pub async fn encode_decode(text: &str, codec: &str, is_decode: bool) -> Result<String, String> {
2133    let text_hex: String = text.bytes().map(|b| format!("{:02x}", b)).collect();
2134    let safe_codec = codec.trim().to_ascii_lowercase().replace('"', "");
2135    let mode = if is_decode { "decode" } else { "encode" };
2136
2137    let script = format!(
2138        r####"import base64 as _b64, binascii as _ba, sys
2139import urllib.parse as _up
2140
2141_text  = bytes.fromhex("{text_hex}").decode('utf-8', errors='replace')
2142_codec = "{safe_codec}"
2143_mode  = "{mode}"
2144_CODECS = "base64  hex  url  rot13  html  binary"
2145
2146try:
2147    if _mode == "encode":
2148        if _codec in ("base64", "b64", ""):
2149            print(_b64.b64encode(_text.encode('utf-8')).decode())
2150        elif _codec in ("hex", "hexadecimal"):
2151            print(_ba.hexlify(_text.encode('utf-8')).decode())
2152        elif _codec in ("url", "urlencode", "percent"):
2153            print(_up.quote(_text, safe=''))
2154        elif _codec == "rot13":
2155            import codecs as _cd; print(_cd.encode(_text, 'rot_13'))
2156        elif _codec in ("html", "htmlentities"):
2157            import html as _ht; print(_ht.escape(_text))
2158        elif _codec in ("binary", "bin"):
2159            print(' '.join(bin(b)[2:].zfill(8) for b in _text.encode('utf-8')))
2160        else:
2161            print("Unknown codec: " + _codec + ".  Supported: " + _CODECS, file=sys.stderr); sys.exit(1)
2162    else:
2163        if _codec in ("base64", "b64", ""):
2164            print(_b64.b64decode(_text.strip() + "==").decode('utf-8', errors='replace'))
2165        elif _codec in ("hex", "hexadecimal"):
2166            print(_ba.unhexlify(_text.replace(' ', '')).decode('utf-8', errors='replace'))
2167        elif _codec in ("url", "urlencode", "percent"):
2168            print(_up.unquote(_text))
2169        elif _codec == "rot13":
2170            import codecs as _cd; print(_cd.decode(_text, 'rot_13'))
2171        elif _codec in ("html", "htmlentities"):
2172            import html as _ht; print(_ht.unescape(_text))
2173        elif _codec in ("binary", "bin"):
2174            _bytes = bytes(int(b, 2) for b in _text.split() if b)
2175            print(_bytes.decode('utf-8', errors='replace'))
2176        else:
2177            print("Unknown codec: " + _codec + ".  Supported: " + _CODECS, file=sys.stderr); sys.exit(1)
2178except Exception as _e:
2179    print("Error: " + str(_e), file=sys.stderr); sys.exit(1)
2180"####,
2181        text_hex = text_hex,
2182        safe_codec = safe_codec,
2183        mode = mode,
2184    );
2185
2186    let sandbox_args = serde_json::json!({
2187        "language": "python",
2188        "code": script,
2189        "timeout_seconds": 10
2190    });
2191    crate::tools::code_sandbox::execute(&sandbox_args).await
2192}
hematite/tools/scientific.rs

hematite/tools/
scientific.rs