Skip to main content

hematite/tools/
scientific_ext.rs

1// ─── Formula library, random generation, and data diff ───────────────────────
2// Compiled into scientific.rs via `mod scientific_ext` in tools/mod.rs — kept
3// in a separate file purely to avoid making scientific.rs even larger.
4
5// ── Formula library ───────────────────────────────────────────────────────────
6// Tuple layout: (name, formula, category, variables)
7
8const FORMULAS: &[(&str, &str, &str, &str)] = &[
9    // Mechanics
10    ("Newton's Second Law",        "F = m \u{00D7} a",                         "mechanics",      "F=force(N)  m=mass(kg)  a=acceleration(m/s\u{00B2})"),
11    ("Kinematic: velocity",        "v = u + a\u{00D7}t",                        "mechanics",      "v=final velocity  u=initial velocity  a=acceleration  t=time"),
12    ("Kinematic: displacement",    "s = u\u{00D7}t + \u{00BD}\u{00D7}a\u{00D7}t\u{00B2}",  "mechanics", "s=displacement  u=initial velocity  a=acceleration  t=time"),
13    ("Kinematic: v\u{00B2} relation", "v\u{00B2} = u\u{00B2} + 2\u{00D7}a\u{00D7}s",   "mechanics", "v=final velocity  u=initial velocity  a=acceleration  s=displacement"),
14    ("Kinetic Energy",             "KE = \u{00BD} \u{00D7} m \u{00D7} v\u{00B2}",  "mechanics",  "KE=kinetic energy(J)  m=mass(kg)  v=velocity(m/s)"),
15    ("Gravitational Potential Energy", "PE = m \u{00D7} g \u{00D7} h",         "mechanics",      "PE=potential energy(J)  m=mass(kg)  g=9.80665 m/s\u{00B2}  h=height(m)"),
16    ("Momentum",                   "p = m \u{00D7} v",                          "mechanics",      "p=momentum(kg\u{00B7}m/s)  m=mass(kg)  v=velocity(m/s)"),
17    ("Impulse-Momentum Theorem",   "J = F \u{00D7} \u{0394}t = \u{0394}p",     "mechanics",      "J=impulse(N\u{00B7}s)  F=force(N)  \u{0394}t=time interval  \u{0394}p=change in momentum"),
18    ("Work",                       "W = F \u{00D7} d \u{00D7} cos(\u{03B8})",   "mechanics",      "W=work(J)  F=force(N)  d=distance(m)  \u{03B8}=angle between F and d"),
19    ("Power",                      "P = W / t = F \u{00D7} v",                  "mechanics",      "P=power(W)  W=work(J)  t=time(s)  F=force(N)  v=velocity(m/s)"),
20    ("Torque",                     "\u{03C4} = r \u{00D7} F \u{00D7} sin(\u{03B8})", "mechanics", "\u{03C4}=torque(N\u{00B7}m)  r=moment arm(m)  F=force(N)  \u{03B8}=angle"),
21    ("Universal Gravitation",      "F = G \u{00D7} m\u{2081} \u{00D7} m\u{2082} / r\u{00B2}", "mechanics", "G=6.674\u{00D7}10\u{207B}\u{00B9}\u{00B9} N\u{00B7}m\u{00B2}/kg\u{00B2}  m\u{2081},m\u{2082}=masses(kg)  r=distance(m)"),
22    ("Centripetal Acceleration",   "a_c = v\u{00B2} / r = \u{03C9}\u{00B2} \u{00D7} r", "mechanics", "a_c=centripetal acceleration(m/s\u{00B2})  v=speed(m/s)  r=radius(m)  \u{03C9}=angular velocity(rad/s)"),
23
24    // Waves & Optics
25    ("Wave Speed",                 "v = f \u{00D7} \u{03BB}",                   "waves",          "v=wave speed(m/s)  f=frequency(Hz)  \u{03BB}=wavelength(m)"),
26    ("Photon Energy",              "E = h \u{00D7} f = h\u{00D7}c / \u{03BB}",  "waves",          "h=6.626\u{00D7}10\u{207B}\u{00B3}\u{2074} J\u{00B7}s  f=frequency(Hz)  c=3\u{00D7}10\u{2078} m/s  \u{03BB}=wavelength(m)"),
27    ("Doppler Effect",             "f\u{2019} = f \u{00D7} (v + v_o) / (v \u{2212} v_s)", "waves", "f\u{2019}=observed freq  f=source freq  v=wave speed  v_o=observer speed  v_s=source speed"),
28    ("Snell's Law",                "n\u{2081} \u{00D7} sin(\u{03B8}\u{2081}) = n\u{2082} \u{00D7} sin(\u{03B8}\u{2082})", "waves", "n=refractive index  \u{03B8}=angle to normal"),
29
30    // Thermodynamics
31    ("Ideal Gas Law",              "P \u{00D7} V = n \u{00D7} R \u{00D7} T",   "thermodynamics", "P=pressure(Pa)  V=volume(m\u{00B3})  n=moles  R=8.314 J/(mol\u{00B7}K)  T=temperature(K)"),
32    ("Heat Transfer",              "Q = m \u{00D7} c \u{00D7} \u{0394}T",       "thermodynamics", "Q=heat(J)  m=mass(kg)  c=specific heat capacity  \u{0394}T=temperature change(K)"),
33    ("Carnot Efficiency",          "\u{03B7} = 1 \u{2212} T_c / T_h",           "thermodynamics", "\u{03B7}=max efficiency(0\u{2013}1)  T_c=cold reservoir(K)  T_h=hot reservoir(K)"),
34    ("Stefan-Boltzmann Law",       "P = \u{03C3} \u{00D7} A \u{00D7} T\u{2074}", "thermodynamics", "\u{03C3}=5.67\u{00D7}10\u{207B}\u{2078} W/(m\u{00B2}\u{00B7}K\u{2074})  A=area(m\u{00B2})  T=temperature(K)"),
35
36    // Electricity & Magnetism
37    ("Ohm's Law",                  "V = I \u{00D7} R",                          "electricity",    "V=voltage(V)  I=current(A)  R=resistance(\u{03A9})"),
38    ("Electrical Power",           "P = I \u{00D7} V = I\u{00B2} \u{00D7} R = V\u{00B2} / R", "electricity", "P=power(W)  I=current(A)  V=voltage(V)  R=resistance(\u{03A9})"),
39    ("Coulomb's Law",              "F = k \u{00D7} q\u{2081} \u{00D7} q\u{2082} / r\u{00B2}", "electricity", "k=8.99\u{00D7}10\u{2079} N\u{00B7}m\u{00B2}/C\u{00B2}  q=charges(C)  r=distance(m)"),
40    ("Capacitance",                "C = Q / V",                                 "electricity",    "C=capacitance(F)  Q=charge(C)  V=voltage(V)"),
41    ("Energy in Capacitor",        "E = \u{00BD} \u{00D7} C \u{00D7} V\u{00B2}", "electricity",  "E=energy(J)  C=capacitance(F)  V=voltage(V)"),
42    ("LC Resonant Frequency",      "f = 1 / (2\u{03C0} \u{00D7} \u{221A}(L\u{00D7}C))", "electricity", "f=frequency(Hz)  L=inductance(H)  C=capacitance(F)"),
43    ("Faraday's Law",              "EMF = \u{2212}N \u{00D7} \u{0394}\u{03A6} / \u{0394}t", "electricity", "N=turns  \u{0394}\u{03A6}=flux change(Wb)  \u{0394}t=time(s)"),
44    ("Magnetic Force on Current",  "F = I \u{00D7} L \u{00D7} B \u{00D7} sin(\u{03B8})", "electricity", "I=current(A)  L=length(m)  B=magnetic field(T)  \u{03B8}=angle"),
45
46    // Special Relativity
47    ("Mass-Energy Equivalence",    "E = m \u{00D7} c\u{00B2}",                 "relativity",     "E=energy(J)  m=mass(kg)  c=2.998\u{00D7}10\u{2078} m/s"),
48    ("Lorentz Factor",             "\u{03B3} = 1 / \u{221A}(1 \u{2212} v\u{00B2}/c\u{00B2})", "relativity", "\u{03B3}=Lorentz factor  v=velocity  c=speed of light"),
49    ("Time Dilation",              "t = t\u{2080} \u{00D7} \u{03B3}",          "relativity",     "t=dilated time  t\u{2080}=proper time  \u{03B3}=Lorentz factor"),
50    ("Length Contraction",         "L = L\u{2080} / \u{03B3}",                 "relativity",     "L=contracted length  L\u{2080}=proper length  \u{03B3}=Lorentz factor"),
51
52    // Chemistry
53    ("pH Definition",              "pH = \u{2212}log\u{2081}\u{2080}[H\u{207A}]", "chemistry",  "[H\u{207A}]=hydrogen ion concentration(mol/L)"),
54    ("Henderson-Hasselbalch",      "pH = pKa + log([A\u{207B}]/[HA])",         "chemistry",      "pKa=acid dissociation constant  [A\u{207B}]=conjugate base  [HA]=acid concentration"),
55    ("Arrhenius Equation",         "k = A \u{00D7} e^(\u{2212}Ea/(R\u{00D7}T))", "chemistry",  "A=pre-exponential factor  Ea=activation energy(J/mol)  R=8.314  T=temperature(K)"),
56    ("Nernst Equation",            "E = E\u{00B0} \u{2212} (RT)/(nF) \u{00D7} ln(Q)", "chemistry", "E\u{00B0}=standard potential  n=electrons  F=96485 C/mol  Q=reaction quotient"),
57    ("Beer-Lambert Law",           "A = \u{03B5} \u{00D7} c \u{00D7} l",       "chemistry",      "A=absorbance  \u{03B5}=molar absorptivity(L/(mol\u{00B7}cm))  c=concentration(mol/L)  l=path length(cm)"),
58
59    // Mathematics
60    ("Quadratic Formula",          "x = (\u{2212}b \u{00B1} \u{221A}(b\u{00B2}\u{2212}4ac)) / (2a)", "mathematics", "coefficients of ax\u{00B2}+bx+c=0"),
61    ("Pythagorean Theorem",        "a\u{00B2} + b\u{00B2} = c\u{00B2}",        "mathematics",    "a,b=legs of right triangle  c=hypotenuse"),
62    ("Circle Area",                "A = \u{03C0} \u{00D7} r\u{00B2}",          "mathematics",    "A=area  r=radius"),
63    ("Circle Circumference",       "C = 2 \u{00D7} \u{03C0} \u{00D7} r",      "mathematics",    "C=circumference  r=radius"),
64    ("Sphere Volume",              "V = (4/3) \u{00D7} \u{03C0} \u{00D7} r\u{00B3}", "mathematics", "V=volume  r=radius"),
65    ("Sphere Surface Area",        "A = 4 \u{00D7} \u{03C0} \u{00D7} r\u{00B2}", "mathematics", "A=surface area  r=radius"),
66    ("Cylinder Volume",            "V = \u{03C0} \u{00D7} r\u{00B2} \u{00D7} h", "mathematics", "V=volume  r=radius  h=height"),
67    ("Euler's Formula",            "e^(i\u{03B8}) = cos(\u{03B8}) + i\u{00D7}sin(\u{03B8})", "mathematics", "\u{03B8}=angle(radians)  i=imaginary unit"),
68    ("Bayes' Theorem",             "P(A|B) = P(B|A) \u{00D7} P(A) / P(B)",    "mathematics",    "P(A|B)=probability of A given B"),
69    ("Normal Distribution PDF",    "f(x) = (1/(\u{03C3}\u{221A}(2\u{03C0}))) \u{00D7} e^(\u{2212}(x\u{2212}\u{03BC})\u{00B2}/(2\u{03C3}\u{00B2}))", "mathematics", "\u{03BC}=mean  \u{03C3}=standard deviation"),
70
71    // Finance
72    ("Compound Interest",          "A = P \u{00D7} (1 + r/n)^(n\u{00D7}t)",   "finance",        "A=final amount  P=principal  r=annual rate(decimal)  n=compounds/year  t=years"),
73    ("Simple Interest",            "I = P \u{00D7} r \u{00D7} t",              "finance",        "I=interest  P=principal  r=annual rate(decimal)  t=years"),
74    ("Present Value",              "PV = FV / (1 + r)^n",                       "finance",        "PV=present value  FV=future value  r=rate per period  n=periods"),
75];
76
77pub fn search_formulas(query: &str) -> String {
78    let q = query.trim();
79
80    if q.is_empty() || q.eq_ignore_ascii_case("list") || q.eq_ignore_ascii_case("all") {
81        let mut cats: std::collections::BTreeMap<&str, Vec<&&str>> = Default::default();
82        for (name, _, cat, _) in FORMULAS {
83            cats.entry(cat).or_default().push(name);
84        }
85        let mut out = format!("Formula library — {} entries\n\n", FORMULAS.len());
86        for (cat, names) in &cats {
87            out.push_str(&format!(
88                "{}  ({} formulas)\n",
89                cat.to_uppercase(),
90                names.len()
91            ));
92            for n in names {
93                out.push_str(&format!("  {n}\n"));
94            }
95            out.push('\n');
96        }
97        out.push_str("Search: hematite --formula \"kinetic energy\"  or  hematite --formula ohms");
98        return out;
99    }
100
101    let q_lower = q.to_ascii_lowercase();
102    let hits: Vec<_> = FORMULAS
103        .iter()
104        .filter(|(name, formula, cat, vars)| {
105            name.to_ascii_lowercase().contains(&q_lower)
106                || cat.to_ascii_lowercase().contains(&q_lower)
107                || formula.to_ascii_lowercase().contains(&q_lower)
108                || vars.to_ascii_lowercase().contains(&q_lower)
109        })
110        .collect();
111
112    if hits.is_empty() {
113        return format!(
114            "No formulas found for '{q}'.\nRun  hematite --formula list  to browse all {} entries.",
115            FORMULAS.len()
116        );
117    }
118
119    let sep = "\u{2500}".repeat(50);
120    let mut out = String::new();
121    for (name, formula, cat, vars) in &hits {
122        out.push_str(&format!(
123            "{name}\n{sep}\n  Formula:    {formula}\n  Variables:  {vars}\n  Category:   {cat}\n\n"
124        ));
125    }
126    out.trim_end().to_string()
127}
128
129// ── Cryptographically secure random generation ────────────────────────────────
130
131pub async fn generate_random(
132    kind: &str,
133    length: usize,
134    count: usize,
135    extra: &str,
136) -> Result<String, String> {
137    let safe_kind = kind.trim().to_ascii_lowercase().replace('"', "");
138    let safe_extra: String = extra.bytes().map(|b| format!("{:02x}", b)).collect();
139    let eff_count = count.clamp(1, 1000);
140    let eff_length = length.clamp(1, 4096);
141
142    let script = format!(
143        r####"import secrets, uuid as _uuid, sys, string, re as _re
144
145_kind  = "{safe_kind}"
146_len   = {eff_length}
147_count = {eff_count}
148_extra = bytes.fromhex("{safe_extra}").decode('utf-8', errors='replace').strip()
149
150_TYPES = "uuid  password  token  hex  urlsafe  pin  bytes  int  dice"
151
152for _i in range(_count):
153    try:
154        if _kind == "uuid":
155            print(str(_uuid.uuid4()))
156        elif _kind in ("password", "pwd", "pass"):
157            _cs = _extra if _extra else string.ascii_letters + string.digits + "!@#$%^&*-_=+"
158            print(''.join(secrets.choice(_cs) for _ in range(_len)))
159        elif _kind in ("token", "hex"):
160            nb = max(1, _len // 2)
161            print(secrets.token_hex(nb))
162        elif _kind in ("urlsafe", "url"):
163            print(secrets.token_urlsafe(_len))
164        elif _kind == "pin":
165            print(''.join(secrets.choice(string.digits) for _ in range(_len)))
166        elif _kind == "bytes":
167            print(secrets.token_bytes(_len).hex())
168        elif _kind in ("int", "integer", "number"):
169            _parts = _extra.split()
170            _lo = int(_parts[0]) if len(_parts) > 0 else 1
171            _hi = int(_parts[1]) if len(_parts) > 1 else 100
172            if _lo > _hi: _lo, _hi = _hi, _lo
173            print(secrets.randbelow(_hi - _lo + 1) + _lo)
174        elif _kind == "dice":
175            _notation = _extra or "1d6"
176            _dm = _re.match(r'^(\d*)d(\d+)([+-]\d+)?$', _notation, _re.I)
177            if not _dm:
178                print("Dice format: NdN or NdN+M (e.g. 2d6  d20  3d8+2)", file=sys.stderr)
179                sys.exit(1)
180            _nd  = int(_dm.group(1) or 1)
181            _ds  = int(_dm.group(2))
182            _mod = int(_dm.group(3) or 0)
183            _rolls = [secrets.randbelow(_ds) + 1 for _ in range(_nd)]
184            _total = sum(_rolls) + _mod
185            if _nd > 1 or _mod:
186                _breakdown = " + ".join(str(r) for r in _rolls)
187                _suffix = ("  +" + str(_mod)) if _mod > 0 else (("  " + str(_mod)) if _mod < 0 else "")
188                print(str(_total) + "  [" + _breakdown + _suffix + "]")
189            else:
190                print(str(_total))
191        else:
192            print("Unknown type: " + _kind + ".  Supported: " + _TYPES, file=sys.stderr)
193            sys.exit(1)
194    except Exception as _e:
195        print("Error: " + str(_e), file=sys.stderr); sys.exit(1)
196"####,
197        safe_kind = safe_kind,
198        safe_extra = safe_extra,
199        eff_length = eff_length,
200        eff_count = eff_count,
201    );
202
203    let sandbox_args = serde_json::json!({
204        "language": "python",
205        "code": script,
206        "timeout_seconds": 10
207    });
208    crate::tools::code_sandbox::execute(&sandbox_args).await
209}
210
211// ── Row-level data diff ───────────────────────────────────────────────────────
212
213pub async fn diff_data(path_a: &str, path_b: &str, key_col: &str) -> Result<String, String> {
214    let hex_a: String = path_a.bytes().map(|b| format!("{:02x}", b)).collect();
215    let hex_b: String = path_b.bytes().map(|b| format!("{:02x}", b)).collect();
216    let hex_key: String = key_col.bytes().map(|b| format!("{:02x}", b)).collect();
217
218    let script = format!(
219        r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys
220
221_pa  = bytes.fromhex("{hex_a}").decode()
222_pb  = bytes.fromhex("{hex_b}").decode()
223_key = bytes.fromhex("{hex_key}").decode().strip()
224
225def _load(path):
226    ext = os.path.splitext(path)[1].lower().lstrip('.')
227    if ext in ('csv', 'tsv'):
228        with open(path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
229            _r = _csv.DictReader(_fh, delimiter='\t' if ext == 'tsv' else ',')
230            return list(_r)
231    elif ext == 'json':
232        with open(path, encoding='utf-8') as _fh:
233            _d = _js.load(_fh)
234        return _d if isinstance(_d, list) else next(iter(_d.values()), [])
235    elif ext in ('db', 'sqlite', 'sqlite3'):
236        _con = _sq.connect(path)
237        _cur = _con.cursor()
238        _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
239        _t = _cur.fetchone()
240        if not _t: return []
241        _cur.execute("SELECT * FROM [%s]" % _t[0])
242        _cols2 = [_d[0] for _d in _cur.description]
243        _rows2 = [dict(zip(_cols2, _r)) for _r in _cur.fetchall()]
244        _con.close()
245        return _rows2
246    else:
247        print("Unsupported format: " + ext, file=sys.stderr); sys.exit(1)
248
249_ra = _load(_pa)
250_rb = _load(_pb)
251
252if not _ra and not _rb:
253    print("Both files are empty."); sys.exit(0)
254
255_cols = list((_ra[0] if _ra else _rb[0]).keys())
256_kc   = _key if _key else _cols[0]
257
258def _idx(rows):
259    d = {{}}
260    for r in rows:
261        k = str(r.get(_kc, ''))
262        d.setdefault(k, []).append(r)
263    return d
264
265_da = _idx(_ra)
266_db = _idx(_rb)
267_ka = set(_da); _kb = set(_db)
268
269_added    = sorted(_kb - _ka)
270_removed  = sorted(_ka - _kb)
271_common   = sorted(_ka & _kb)
272
273_modified = []
274for _k in _common:
275    _r1 = _da[_k][0]; _r2 = _db[_k][0]
276    _diffs = {{c: (str(_r1.get(c,'')), str(_r2.get(c,'')))
277              for c in _cols if str(_r1.get(c,'')) != str(_r2.get(c,'')) and c != _kc}}
278    if _diffs:
279        _modified.append((_k, _diffs))
280
281_sep = "─" * 52
282print(_sep)
283print("Data diff:  A = %s" % os.path.basename(_pa))
284print("            B = %s" % os.path.basename(_pb))
285print("Key column: %s  |  A: %d rows  B: %d rows" % (_kc, len(_ra), len(_rb)))
286print(_sep)
287print()
288
289if not _added and not _removed and not _modified:
290    print("✓ No differences found.  Files are identical on key column '%s'." % _kc)
291    sys.exit(0)
292
293def _preview(row, n=4):
294    items = list(row.items())[:n]
295    return "  ".join(("%s=%s" % (k, str(v)[:20])) for k, v in items if k != _kc)
296
297if _added:
298    print("+ Added (%d rows in B not in A):" % len(_added))
299    for _k in _added[:25]:
300        print("  + %s  %s" % (_k, _preview(_db[_k][0])))
301    if len(_added) > 25: print("  ... and %d more" % (len(_added)-25))
302    print()
303
304if _removed:
305    print("- Removed (%d rows in A not in B):" % len(_removed))
306    for _k in _removed[:25]:
307        print("  - %s  %s" % (_k, _preview(_da[_k][0])))
308    if len(_removed) > 25: print("  ... and %d more" % (len(_removed)-25))
309    print()
310
311if _modified:
312    print("~ Modified (%d rows with changed values):" % len(_modified))
313    for _k, _diffs in _modified[:25]:
314        print("  ~ %s" % _k)
315        for _c, (_va, _vb) in _diffs.items():
316            print("      %-20s %s  →  %s" % (_c + ":", _va[:40], _vb[:40]))
317    if len(_modified) > 25: print("  ... and %d more" % (len(_modified)-25))
318    print()
319
320print("Summary:  +%d added  -%d removed  ~%d modified" % (len(_added), len(_removed), len(_modified)))
321"####,
322        hex_a = hex_a,
323        hex_b = hex_b,
324        hex_key = hex_key,
325    );
326
327    let sandbox_args = serde_json::json!({
328        "language": "python",
329        "code": script,
330        "timeout_seconds": 30
331    });
332    crate::tools::code_sandbox::execute(&sandbox_args).await
333}
334
335// ── Column statistics ─────────────────────────────────────────────────────────
336
337pub async fn column_stats(file_path: &str, column: &str) -> Result<String, String> {
338    let hex_path: String = file_path.bytes().map(|b| format!("{:02x}", b)).collect();
339    let hex_col: String = column.bytes().map(|b| format!("{:02x}", b)).collect();
340
341    let script = format!(
342        r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys, math
343
344_path = bytes.fromhex("{hex_path}").decode()
345_col  = bytes.fromhex("{hex_col}").decode().strip()
346
347def _load(path):
348    ext = os.path.splitext(path)[1].lower().lstrip('.')
349    if ext in ('csv','tsv'):
350        with open(path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
351            _r = _csv.DictReader(_fh, delimiter='\t' if ext=='tsv' else ',')
352            return list(_r), None
353    elif ext == 'json':
354        with open(path, encoding='utf-8') as _fh: _d = _js.load(_fh)
355        rows = _d if isinstance(_d, list) else next(iter(_d.values()), [])
356        return rows, None
357    elif ext in ('db','sqlite','sqlite3'):
358        _con = _sq.connect(path)
359        _cur = _con.cursor()
360        _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
361        _t = _cur.fetchone()
362        if not _t: return [], None
363        _cur.execute("SELECT * FROM [%s]" % _t[0])
364        _cols2 = [_d[0] for _d in _cur.description]
365        _rows2 = [dict(zip(_cols2, _r)) for _r in _cur.fetchall()]
366        _con.close()
367        return _rows2, None
368    else:
369        return None, "Unsupported format: " + ext
370
371def _try_float(v):
372    try: return float(str(v).replace(',','').strip())
373    except: return None
374
375def _stats_for(nums, label):
376    n = len(nums)
377    if n == 0:
378        print("  %s: no numeric values found" % label); return
379    nums_s = sorted(nums)
380    mean   = sum(nums_s) / n
381    def _pct(p):
382        idx = p * (n - 1) / 100
383        lo = int(idx); hi = min(lo+1, n-1)
384        return nums_s[lo] + (idx - lo) * (nums_s[hi] - nums_s[lo])
385    median = _pct(50)
386    q1     = _pct(25)
387    q3     = _pct(75)
388    iqr    = q3 - q1
389    var    = sum((x - mean)**2 for x in nums_s) / n
390    std    = math.sqrt(var)
391    # Population skewness
392    if std > 0:
393        skew = (sum((x-mean)**3 for x in nums_s)/n) / (std**3)
394    else:
395        skew = 0.0
396    # Mode (most common value, rounded to 4 sig figs)
397    from collections import Counter
398    rounded = [round(x, 4) for x in nums_s]
399    mode_val, mode_cnt = Counter(rounded).most_common(1)[0]
400
401    W = 52
402    print("=" * W)
403    print(" Statistics: %s  (n=%d)" % (label, n))
404    print("-" * W)
405    print("  Min       %g" % nums_s[0])
406    print("  Max       %g" % nums_s[-1])
407    print("  Range     %g" % (nums_s[-1] - nums_s[0]))
408    print("  Mean      %g" % mean)
409    print("  Median    %g" % median)
410    print("  Mode      %g  (count: %d)" % (mode_val, mode_cnt))
411    print("  Std Dev   %g" % std)
412    print("  Variance  %g" % var)
413    print("  Q1        %g" % q1)
414    print("  Q3        %g" % q3)
415    print("  IQR       %g" % iqr)
416    print("  Skewness  %.4f" % skew)
417    # ASCII histogram (10 bins)
418    bins = 10
419    lo = nums_s[0]; hi_v = nums_s[-1]
420    if lo == hi_v: hi_v = lo + 1
421    step = (hi_v - lo) / bins
422    counts = [0]*bins
423    for x in nums_s:
424        idx = min(int((x-lo)/step), bins-1)
425        counts[idx] += 1
426    max_c = max(counts) if max(counts) > 0 else 1
427    bar_w = 30
428    print("-" * W)
429    print("  Histogram (%d bins):" % bins)
430    for i in range(bins):
431        lo_b = lo + i*step; hi_b = lo_b + step
432        bar = "#" * int(counts[i] / max_c * bar_w)
433        print("  [%8.3g,%8.3g)  %-30s %d" % (lo_b, hi_b, bar, counts[i]))
434    print("=" * W)
435
436rows, err = _load(_path)
437if err:
438    print("Error:", err, file=sys.stderr); sys.exit(1)
439if not rows:
440    print("No rows found in %s" % _path); sys.exit(0)
441
442all_cols = list(rows[0].keys())
443if _col:
444    if _col not in all_cols:
445        print("Column '%s' not found. Available: %s" % (_col, ', '.join(all_cols)))
446        sys.exit(1)
447    nums = [v for v in (_try_float(r.get(_col,'')) for r in rows) if v is not None]
448    _stats_for(nums, _col)
449else:
450    numeric_cols = [c for c in all_cols if sum(1 for r in rows if _try_float(r.get(c,'')) is not None) > len(rows)*0.5]
451    if not numeric_cols:
452        print("No numeric columns detected. Columns: %s" % ', '.join(all_cols))
453        sys.exit(0)
454    print("Numeric columns: %s" % ', '.join(numeric_cols))
455    print("Pass --column NAME to focus on one, or showing all:\n")
456    for c in numeric_cols:
457        nums = [v for v in (_try_float(r.get(c,'')) for r in rows) if v is not None]
458        _stats_for(nums, c)
459        print()
460"####,
461        hex_path = hex_path,
462        hex_col = hex_col,
463    );
464
465    let sandbox_args = serde_json::json!({
466        "language": "python",
467        "code": script,
468        "timeout_seconds": 30
469    });
470    crate::tools::code_sandbox::execute(&sandbox_args).await
471}
472
473// ── Matrix operations ─────────────────────────────────────────────────────────
474// Supports: det, inv, solve, multiply, transpose, eigenvalues
475// Matrix input format: "[[1,2],[3,4]]" (JSON array of rows)
476
477pub async fn matrix_op(op: &str, matrix_a: &str, matrix_b: &str) -> Result<String, String> {
478    let hex_op: String = op.bytes().map(|b| format!("{:02x}", b)).collect();
479    let hex_a: String = matrix_a.bytes().map(|b| format!("{:02x}", b)).collect();
480    let hex_b: String = matrix_b.bytes().map(|b| format!("{:02x}", b)).collect();
481
482    let script = format!(
483        r####"import json as _js, math, sys
484
485_op = bytes.fromhex("{hex_op}").decode().strip().lower()
486_sa = bytes.fromhex("{hex_a}").decode().strip()
487_sb = bytes.fromhex("{hex_b}").decode().strip()
488
489def _parse(s):
490    if not s: return None
491    try:
492        d = _js.loads(s)
493        if isinstance(d, list):
494            if isinstance(d[0], list): return [list(map(float,r)) for r in d]
495            return [[float(x)] for x in d]  # column vector
496        return None
497    except Exception as e:
498        print("Parse error:", e, file=sys.stderr); sys.exit(1)
499
500A = _parse(_sa)
501B = _parse(_sb)
502
503def _rows(M): return len(M)
504def _cols(M): return len(M[0]) if M else 0
505def _shape(M): return "(%d×%d)" % (_rows(M), _cols(M))
506
507def _mul(A, B):
508    r,k,c = _rows(A), _cols(A), _cols(B)
509    if k != _rows(B): raise ValueError("Shape mismatch: A%s B%s" % (_shape(A), _shape(B)))
510    return [[sum(A[i][p]*B[p][j] for p in range(k)) for j in range(c)] for i in range(r)]
511
512def _T(M):
513    return [[M[i][j] for i in range(_rows(M))] for j in range(_cols(M))]
514
515def _LU(M):
516    n = _rows(M)
517    if n != _cols(M): raise ValueError("Square matrix required")
518    U = [row[:] for row in M]
519    L = [[1.0 if i==j else 0.0 for j in range(n)] for i in range(n)]
520    P = list(range(n)); sign = 1
521    for col in range(n):
522        pivot = max(range(col, n), key=lambda r: abs(U[r][col]))
523        if abs(U[pivot][col]) < 1e-14: raise ValueError("Matrix is singular")
524        if pivot != col:
525            U[col], U[pivot] = U[pivot], U[col]
526            P[col], P[pivot] = P[pivot], P[col]
527            if col > 0:
528                for k in range(col): L[col][k], L[pivot][k] = L[pivot][k], L[col][k]
529            sign *= -1
530        for row in range(col+1, n):
531            f = U[row][col] / U[col][col]
532            L[row][col] = f
533            for k in range(col, n): U[row][k] -= f * U[col][k]
534    return L, U, P, sign
535
536def _det(M):
537    L, U, P, sign = _LU(M)
538    d = sign
539    for i in range(_rows(M)): d *= U[i][i]
540    return d
541
542def _inv(M):
543    n = _rows(M)
544    L, U, P, _ = _LU(M)
545    inv = [[0.0]*n for _ in range(n)]
546    for col in range(n):
547        e = [1.0 if P[i]==col else 0.0 for i in range(n)]
548        y = [0.0]*n
549        for i in range(n):
550            y[i] = e[i] - sum(L[i][k]*y[k] for k in range(i))
551        x = [0.0]*n
552        for i in range(n-1, -1, -1):
553            x[i] = (y[i] - sum(U[i][k]*x[k] for k in range(i+1,n))) / U[i][i]
554        for i in range(n): inv[i][col] = x[i]
555    return inv
556
557def _solve(M, b):
558    n = _rows(M)
559    bv = [row[0] for row in b]
560    L, U, P, _ = _LU(M)
561    pb = [bv[P[i]] for i in range(n)]
562    y = [0.0]*n
563    for i in range(n):
564        y[i] = pb[i] - sum(L[i][k]*y[k] for k in range(i))
565    x = [0.0]*n
566    for i in range(n-1,-1,-1):
567        x[i] = (y[i] - sum(U[i][k]*x[k] for k in range(i+1,n))) / U[i][i]
568    return x
569
570def _fmt_num(v):
571    if abs(v) < 1e-10: return "0"
572    return ("%.6g" % v)
573
574def _print_matrix(M, label=""):
575    if label: print(label)
576    for row in M:
577        print("  [ " + "  ".join("%10s" % _fmt_num(v) for v in row) + " ]")
578
579def _qr_eigenvalues(M, iters=200):
580    # QR iteration (Gram-Schmidt) — real eigenvalues only for symmetric matrices
581    n = _rows(M)
582    Ak = [row[:] for row in M]
583    for _ in range(iters):
584        # Gram-Schmidt QR
585        Q = [[0.0]*n for _ in range(n)]
586        R = [[0.0]*n for _ in range(n)]
587        for j in range(n):
588            v = [Ak[i][j] for i in range(n)]
589            for k in range(j):
590                R[k][j] = sum(Q[i][k]*v[i] for i in range(n))
591                for i in range(n): v[i] -= R[k][j]*Q[i][k]
592            norm = math.sqrt(sum(x*x for x in v))
593            if norm < 1e-14: norm = 1.0
594            R[j][j] = norm
595            for i in range(n): Q[i][j] = v[i]/norm
596        Ak = _mul(R, Q)
597    return sorted([Ak[i][i] for i in range(n)], reverse=True)
598
599if not A:
600    print("Error: no matrix provided. Use JSON format: [[1,2],[3,4]]")
601    sys.exit(1)
602
603try:
604    if _op in ('det', 'determinant'):
605        d = _det(A)
606        print("Determinant of %s matrix:" % _shape(A))
607        print("  det(A) = %s" % _fmt_num(d))
608    elif _op in ('inv', 'inverse', 'invert'):
609        Ai = _inv(A)
610        _print_matrix(A, "A %s:" % _shape(A))
611        print()
612        _print_matrix(Ai, "A⁻¹:")
613    elif _op in ('T', 'transpose'):
614        At = _T(A)
615        _print_matrix(A, "A %s:" % _shape(A))
616        print()
617        _print_matrix(At, "Aᵀ %s:" % _shape(At))
618    elif _op in ('mul', 'multiply', 'matmul'):
619        if not B:
620            print("Error: --matrix-b required for multiply"); sys.exit(1)
621        C = _mul(A, B)
622        _print_matrix(A, "A %s:" % _shape(A))
623        print()
624        _print_matrix(B, "B %s:" % _shape(B))
625        print()
626        _print_matrix(C, "A × B = %s:" % _shape(C))
627    elif _op in ('solve',):
628        if not B:
629            print("Error: --matrix-b required for solve (the b vector/matrix)"); sys.exit(1)
630        x = _solve(A, B)
631        _print_matrix(A, "A %s (coefficient matrix):" % _shape(A))
632        print()
633        _print_matrix(B, "b %s (right-hand side):" % _shape(B))
634        print()
635        print("Solution x (Ax = b):")
636        print("  [ " + "  ".join("%10s" % _fmt_num(v) for v in x) + " ]")
637    elif _op in ('eig', 'eigen', 'eigenvalues'):
638        eigs = _qr_eigenvalues(A)
639        print("Eigenvalues of %s matrix (real, via QR iteration):" % _shape(A))
640        for i, e in enumerate(eigs):
641            print("  λ%d = %s" % (i+1, _fmt_num(e)))
642        print()
643        print("Note: QR iteration gives real eigenvalues for symmetric matrices.")
644        print("Complex eigenvalues are shown only as real parts for non-symmetric matrices.")
645    elif _op in ('rank',):
646        # Gaussian elimination row rank
647        M2 = [row[:] for row in A]
648        r = 0
649        for col in range(_cols(M2)):
650            pivot = next((i for i in range(r, _rows(M2)) if abs(M2[i][col]) > 1e-10), None)
651            if pivot is None: continue
652            M2[r], M2[pivot] = M2[pivot], M2[r]
653            for i in range(_rows(M2)):
654                if i != r and abs(M2[i][col]) > 1e-10:
655                    f = M2[i][col] / M2[r][col]
656                    for k in range(_cols(M2)): M2[i][k] -= f * M2[r][k]
657            r += 1
658        print("Rank of %s matrix: %d" % (_shape(A), r))
659    elif _op in ('trace',):
660        if _rows(A) != _cols(A): print("Warning: trace of non-square matrix (using diagonal)")
661        t = sum(A[i][i] for i in range(min(_rows(A),_cols(A))))
662        print("Trace of %s matrix: %s" % (_shape(A), _fmt_num(t)))
663    else:
664        print("Unknown operation: '%s'" % _op)
665        print("Available: det  inv  transpose  multiply  solve  eigenvalues  rank  trace")
666        sys.exit(1)
667except ValueError as e:
668    print("Error:", e, file=sys.stderr); sys.exit(1)
669"####,
670        hex_op = hex_op,
671        hex_a = hex_a,
672        hex_b = hex_b,
673    );
674
675    let sandbox_args = serde_json::json!({
676        "language": "python",
677        "code": script,
678        "timeout_seconds": 30
679    });
680    crate::tools::code_sandbox::execute(&sandbox_args).await
681}
682
683// ── Equation solver ───────────────────────────────────────────────────────────
684
685pub async fn solve_equation(equation: &str, var: &str, x0: f64, x1: f64) -> Result<String, String> {
686    let hex_eq: String = equation.bytes().map(|b| format!("{:02x}", b)).collect();
687    let hex_var: String = var.bytes().map(|b| format!("{:02x}", b)).collect();
688
689    let script = format!(
690        r####"import math, sys
691from math import (sin,cos,tan,asin,acos,atan,atan2,sinh,cosh,tanh,
692                  sqrt,log,log2,log10,exp,floor,ceil,pi,e,inf,nan)
693
694_eq  = bytes.fromhex("{hex_eq}").decode().strip()
695_var = bytes.fromhex("{hex_var}").decode().strip() or "x"
696_x0  = {x0}
697_x1  = {x1}
698
699if '=' in _eq:
700    parts = _eq.split('=', 1)
701    _expr = "(%s) - (%s)" % (parts[0].strip(), parts[1].strip())
702else:
703    _expr = _eq
704_expr = _expr.replace('^', '**')
705
706_safe = dict(sin=sin,cos=cos,tan=tan,asin=asin,acos=acos,atan=atan,atan2=atan2,
707             sinh=sinh,cosh=cosh,tanh=tanh,sqrt=sqrt,log=log,log2=log2,log10=log10,
708             exp=exp,floor=floor,ceil=ceil,pi=pi,e=e,inf=inf,nan=nan,abs=abs)
709
710def _f(xv):
711    ns = dict(_safe); ns[_var] = xv
712    return eval(_expr, {{"__builtins__": {{}}}}, ns)
713
714def _bisect(lo, hi, tol=1e-12, iters=200):
715    try: flo = _f(lo); fhi = _f(hi)
716    except Exception as err: return None, str(err)
717    if flo * fhi > 0: return None, "no sign change"
718    for _ in range(iters):
719        mid = (lo+hi)/2
720        if (hi-lo) < tol: return mid, None
721        try: fm = _f(mid)
722        except: return None, "eval error"
723        if flo*fm <= 0: hi=mid; fhi=fm
724        else: lo=mid; flo=fm
725    return (lo+hi)/2, None
726
727def _newton(x, tol=1e-12, iters=100):
728    for _ in range(iters):
729        try: fx = _f(x)
730        except: break
731        if abs(fx) < tol: return x, None
732        h = max(abs(x)*1e-7, 1e-9)
733        try: fpx = (_f(x+h) - _f(x-h))/(2*h)
734        except: break
735        if abs(fpx) < 1e-30: break
736        x2 = x - fx/fpx
737        if abs(x2-x) < tol: return x2, None
738        x = x2
739    return None, "did not converge"
740
741print("Equation: %s = 0" % _expr)
742print("Variable: %s  |  Search: [%g, %g]" % (_var, _x0, _x1))
743print()
744
745candidates = []
746n_scan = 50; step = (_x1-_x0)/n_scan
747for i in range(n_scan):
748    lo = _x0+i*step; hi = lo+step
749    try:
750        if _f(lo)*_f(hi) <= 0:
751            root, _ = _bisect(lo, hi)
752            if root is not None:
753                nr, _ = _newton(root)
754                if nr is not None: root = nr
755                if not any(abs(root-c) < 1e-8 for c in candidates):
756                    candidates.append(root)
757    except: pass
758
759if not candidates:
760    nr, _ = _newton((_x0+_x1)/2)
761    if nr is not None: candidates.append(nr)
762
763if not candidates:
764    print("No roots found in [%g, %g]." % (_x0, _x1))
765    print("Try --solve-range to widen the search interval.")
766    sys.exit(0)
767
768print("Root(s) found:")
769for r in sorted(candidates):
770    try: chk = abs(_f(r))
771    except: chk = float('nan')
772    flag = "" if chk < 1e-8 else "  [residual: %.2e]" % chk
773    print("  %s = %.10g%s" % (_var, r, flag))
774"####,
775        hex_eq = hex_eq,
776        hex_var = hex_var,
777        x0 = x0,
778        x1 = x1,
779    );
780
781    let sandbox_args = serde_json::json!({
782        "language": "python",
783        "code": script,
784        "timeout_seconds": 20
785    });
786    crate::tools::code_sandbox::execute(&sandbox_args).await
787}
788
789// ── Curve fitting ─────────────────────────────────────────────────────────────
790
791pub async fn curve_fit(
792    file_path: &str,
793    x_col: &str,
794    y_col: &str,
795    model: &str,
796) -> Result<String, String> {
797    let hex_path: String = file_path.bytes().map(|b| format!("{:02x}", b)).collect();
798    let hex_xcol: String = x_col.bytes().map(|b| format!("{:02x}", b)).collect();
799    let hex_ycol: String = y_col.bytes().map(|b| format!("{:02x}", b)).collect();
800    let hex_model: String = model.bytes().map(|b| format!("{:02x}", b)).collect();
801
802    let script = format!(
803        r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys, math
804
805_path  = bytes.fromhex("{hex_path}").decode().strip()
806_xcol  = bytes.fromhex("{hex_xcol}").decode().strip()
807_ycol  = bytes.fromhex("{hex_ycol}").decode().strip()
808_model = bytes.fromhex("{hex_model}").decode().strip().lower() or "auto"
809
810def _load(path):
811    ext = os.path.splitext(path)[1].lower().lstrip('.')
812    if ext in ('csv','tsv'):
813        with open(path, encoding='utf-8-sig', errors='replace', newline='') as fh:
814            r = _csv.DictReader(fh, delimiter='\t' if ext=='tsv' else ',')
815            return list(r)
816    elif ext == 'json':
817        with open(path, encoding='utf-8') as fh: d = _js.load(fh)
818        return d if isinstance(d, list) else next(iter(d.values()), [])
819    elif ext in ('db','sqlite','sqlite3'):
820        con = _sq.connect(path)
821        cur = con.cursor()
822        cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
823        t = cur.fetchone()
824        if not t: return []
825        cur.execute("SELECT * FROM [%s]" % t[0])
826        cols2 = [d[0] for d in cur.description]
827        rows2 = [dict(zip(cols2, r)) for r in cur.fetchall()]
828        con.close()
829        return rows2
830    print("Unsupported: "+ext, file=sys.stderr); sys.exit(1)
831
832def _tf(v):
833    try: return float(str(v).replace(',','').strip())
834    except: return None
835
836rows = _load(_path)
837if not rows:
838    print("No rows found."); sys.exit(0)
839
840all_cols = list(rows[0].keys())
841numeric  = [c for c in all_cols if sum(1 for r in rows if _tf(r.get(c,'')) is not None) > len(rows)*0.5]
842
843if not _xcol: _xcol = numeric[0] if numeric else all_cols[0]
844if not _ycol: _ycol = numeric[1] if len(numeric)>1 else all_cols[1]
845
846pairs = [(_tf(r.get(_xcol,'')), _tf(r.get(_ycol,''))) for r in rows]
847pairs = [(x,y) for x,y in pairs if x is not None and y is not None]
848if len(pairs) < 3:
849    print("Need at least 3 numeric rows. Found: %d" % len(pairs)); sys.exit(0)
850
851xs = [p[0] for p in pairs]; ys = [p[1] for p in pairs]; n = len(xs)
852
853def _dot(a,b): return sum(x*y for x,y in zip(a,b))
854def _mean(v):  return sum(v)/len(v)
855def _r2(pred):
856    ybar = _mean(ys)
857    ss_res = sum((y-p)**2 for y,p in zip(ys,pred))
858    ss_tot = sum((y-ybar)**2 for y in ys)
859    return 1.0 - ss_res/ss_tot if ss_tot else 0.0
860
861def _vfit(deg):
862    m = deg+1
863    X = [[xi**j for j in range(m)] for xi in xs]
864    Xt = [[X[i][j] for i in range(n)] for j in range(m)]
865    XtX = [[sum(Xt[r][k]*X[k][c] for k in range(n)) for c in range(m)] for r in range(m)]
866    Xty = [sum(Xt[r][k]*ys[k] for k in range(n)) for r in range(m)]
867    A = [row[:]+[Xty[i]] for i,row in enumerate(XtX)]
868    for col in range(m):
869        pv = max(range(col,m), key=lambda r: abs(A[r][col]))
870        A[col],A[pv] = A[pv],A[col]
871        if abs(A[col][col]) < 1e-30: raise ValueError("Singular")
872        f = A[col][col]; A[col] = [v/f for v in A[col]]
873        for r in range(m):
874            if r!=col:
875                mu = A[r][col]; A[r] = [A[r][k]-mu*A[col][k] for k in range(m+1)]
876    c = [A[i][m] for i in range(m)]
877    return c, _r2([sum(c[j]*xi**j for j in range(m)) for xi in xs])
878
879def _fp(c):
880    t = []
881    for j in range(len(c)-1,-1,-1):
882        v = c[j]
883        if abs(v)<1e-14: continue
884        if j==0: t.append("%.6g"%v)
885        elif j==1: t.append("%.6g*x"%v)
886        else: t.append("%.6g*x^%d"%(v,j))
887    return " + ".join(t) or "0"
888
889res = {{}}
890def _try(nm,fn):
891    try: eq,r2=fn(); res[nm]=(r2,eq)
892    except Exception as err: res[nm]=(None,str(err))
893
894def _lin():
895    c,r2=_vfit(1); return "y = %.6g + %.6g*x"%(c[0],c[1]),r2
896def _q2():
897    c,r2=_vfit(2); return "y = "+_fp(c),r2
898def _q3():
899    c,r2=_vfit(3); return "y = "+_fp(c),r2
900def _ef():
901    if any(y<=0 for y in ys): raise ValueError("y must be >0")
902    lny=[math.log(y) for y in ys]; xm=_mean(xs); lym=_mean(lny)
903    b=(_dot(xs,lny)-n*xm*lym)/(_dot(xs,xs)-n*xm**2); a=math.exp(lym-b*xm)
904    return "y = %.6g*e^(%.6g*x)"%(a,b),_r2([a*math.exp(b*xi) for xi in xs])
905def _pf():
906    if any(x<=0 for x in xs) or any(y<=0 for y in ys): raise ValueError("x,y must be >0")
907    lx=[math.log(x) for x in xs]; ly=[math.log(y) for y in ys]
908    lxm=_mean(lx); lym=_mean(ly)
909    b=(_dot(lx,ly)-n*lxm*lym)/(_dot(lx,lx)-n*lxm**2); a=math.exp(lym-b*lxm)
910    return "y = %.6g*x^%.6g"%(a,b),_r2([a*(xi**b) for xi in xs])
911def _lf():
912    if any(x<=0 for x in xs): raise ValueError("x must be >0")
913    lx=[math.log(x) for x in xs]; lxm=_mean(lx); ym=_mean(ys)
914    b=(_dot(lx,ys)-n*lxm*ym)/(_dot(lx,lx)-n*lxm**2); a=ym-b*lxm
915    return "y = %.6g + %.6g*ln(x)"%(a,b),_r2([a+b*lxi for lxi in lx])
916
917mm = dict(linear=_lin,lin=_lin,poly2=_q2,quadratic=_q2,quad=_q2,poly3=_q3,cubic=_q3,
918          exp=_ef,exponential=_ef,power=_pf,pow=_pf,log=_lf,logarithmic=_lf)
919
920if _model in ('auto','all'):
921    for nm,fn in [('linear',_lin),('poly2',_q2),('poly3',_q3),('exp',_ef),('power',_pf),('log',_lf)]:
922        _try(nm,fn)
923elif _model in mm:
924    _try(_model, mm[_model])
925else:
926    print("Unknown model '%s'. Available: linear  poly2  poly3  exp  power  log  auto" % _model); sys.exit(1)
927
928W=60
929print("="*W)
930print(" Curve Fit:  %s -> %s  (n=%d)" % (_xcol,_ycol,n))
931print("-"*W)
932valid=[(nm,(r2,eq)) for nm,(r2,eq) in res.items() if r2 is not None]
933invalid=[(nm,(r2,eq)) for nm,(r2,eq) in res.items() if r2 is None]
934valid.sort(key=lambda t:-t[1][0])
935best=valid[0][0] if valid else None
936for nm,(r2,eq) in valid:
937    star=" *" if nm==best else ""
938    print("  %-12s R2=%.4f%s"%(nm,r2,star))
939    print("    %s"%eq)
940if invalid:
941    print(); print("  Skipped (domain):")
942    for nm,(_,err) in invalid:
943        print("    %-12s %s"%(nm+":",err))
944print("="*W)
945"####,
946        hex_path = hex_path,
947        hex_xcol = hex_xcol,
948        hex_ycol = hex_ycol,
949        hex_model = hex_model,
950    );
951
952    let sandbox_args = serde_json::json!({
953        "language": "python",
954        "code": script,
955        "timeout_seconds": 30
956    });
957    crate::tools::code_sandbox::execute(&sandbox_args).await
958}
959
960// ── Numerical integration ─────────────────────────────────────────────────────
961
962pub async fn integrate(
963    expr: &str,
964    var: &str,
965    lo: f64,
966    hi: f64,
967    n: usize,
968) -> Result<String, String> {
969    let hex_expr: String = expr.bytes().map(|b| format!("{:02x}", b)).collect();
970    let hex_var: String = var.bytes().map(|b| format!("{:02x}", b)).collect();
971
972    let script = format!(
973        r####"import math, sys
974from math import (sin,cos,tan,asin,acos,atan,atan2,sinh,cosh,tanh,
975                  sqrt,log,log2,log10,exp,floor,ceil,pi,e,inf,nan)
976
977_expr = bytes.fromhex("{hex_expr}").decode().strip().replace('^','**')
978_var  = bytes.fromhex("{hex_var}").decode().strip() or "x"
979_lo   = {lo}
980_hi   = {hi}
981_n    = {n}
982if _n < 2: _n = 1000
983
984_safe = dict(sin=sin,cos=cos,tan=tan,asin=asin,acos=acos,atan=atan,atan2=atan2,
985             sinh=sinh,cosh=cosh,tanh=tanh,sqrt=sqrt,log=log,log2=log2,log10=log10,
986             exp=exp,floor=floor,ceil=ceil,pi=pi,e=e,inf=inf,nan=nan,abs=abs)
987
988def _f(v):
989    ns = dict(_safe); ns[_var] = v
990    return eval(_expr, {{"__builtins__":{{}}}}, ns)
991
992# Adaptive Simpson's rule (recursive)
993def _simp(a, b, fa, fm, fb, tol, depth):
994    m1 = (a+b)/4; m2 = 3*(a+b)/4
995    fm1 = _f(m1); fm2 = _f(m2)
996    s1 = (b-a)/12*(fa + 4*fm1 + 2*fm + 4*fm2 + fb)
997    s0 = (b-a)/6*(fa + 4*fm + fb)
998    err = abs(s1 - s0)/15
999    if depth >= 12 or err < tol:
1000        return s1 + (s1-s0)/15
1001    else:
1002        mid = (a+b)/2
1003        return (_simp(a,mid,fa,fm1,fm,tol/2,depth+1) +
1004                _simp(mid,b,fm,fm2,fb,tol/2,depth+1))
1005
1006# Also compute via Simpson's 1/3 rule with _n intervals for comparison
1007h = (_hi - _lo) / _n
1008vals = [_f(_lo + i*h) for i in range(_n+1)]
1009simp_basic = h/3 * sum(
1010    (vals[i] + 4*vals[i+1] + vals[i+2]) if (i+2 <= _n) else 0
1011    for i in range(0, _n, 2)
1012)
1013
1014try:
1015    fa = _f(_lo); fm = _f((_lo+_hi)/2); fb = _f(_hi)
1016    result = _simp(_lo, _hi, fa, fm, fb, 1e-10, 0)
1017    method = "Adaptive Simpson"
1018except RecursionError:
1019    result = simp_basic
1020    method = "Simpson 1/3 (%d intervals)" % _n
1021
1022print("Integral of  %s  d%s" % (_expr, _var))
1023print("From %g  to  %g" % (_lo, _hi))
1024print()
1025print("Result:  %.10g  (via %s)" % (result, method))
1026print()
1027# Relative error estimate vs basic Simpson
1028if method.startswith("Adaptive"):
1029    err_est = abs(result - simp_basic)
1030    print("Est. error: %.2e  (vs basic Simpson/%d)" % (err_est, _n))
1031"####,
1032        hex_expr = hex_expr,
1033        hex_var = hex_var,
1034        lo = lo,
1035        hi = hi,
1036        n = n,
1037    );
1038
1039    let sandbox_args = serde_json::json!({
1040        "language": "python",
1041        "code": script,
1042        "timeout_seconds": 20
1043    });
1044    crate::tools::code_sandbox::execute(&sandbox_args).await
1045}
1046
1047// ── Numerical derivative ──────────────────────────────────────────────────────
1048
1049pub async fn differentiate(expr: &str, var: &str, at: f64, order: u8) -> Result<String, String> {
1050    let hex_expr: String = expr.bytes().map(|b| format!("{:02x}", b)).collect();
1051    let hex_var: String = var.bytes().map(|b| format!("{:02x}", b)).collect();
1052
1053    let script = format!(
1054        r####"import math, sys
1055from math import (sin,cos,tan,asin,acos,atan,atan2,sinh,cosh,tanh,
1056                  sqrt,log,log2,log10,exp,floor,ceil,pi,e,inf,nan)
1057
1058_expr  = bytes.fromhex("{hex_expr}").decode().strip().replace('^','**')
1059_var   = bytes.fromhex("{hex_var}").decode().strip() or "x"
1060_at    = {at}
1061_order = {order}
1062
1063_safe = dict(sin=sin,cos=cos,tan=tan,asin=asin,acos=acos,atan=atan,atan2=atan2,
1064             sinh=sinh,cosh=cosh,tanh=tanh,sqrt=sqrt,log=log,log2=log2,log10=log10,
1065             exp=exp,floor=floor,ceil=ceil,pi=pi,e=e,inf=inf,nan=nan,abs=abs)
1066
1067def _f(v):
1068    ns = dict(_safe); ns[_var] = v
1069    return eval(_expr, {{"__builtins__":{{}}}}, ns)
1070
1071def _deriv(f, x, h=None, order=1):
1072    if h is None: h = max(abs(x)*1e-5, 1e-7)
1073    if order == 1:
1074        # 5-point stencil: (-f(x+2h)+8f(x+h)-8f(x-h)+f(x-2h)) / 12h
1075        return (-f(x+2*h) + 8*f(x+h) - 8*f(x-h) + f(x-2*h)) / (12*h)
1076    elif order == 2:
1077        return (f(x+h) - 2*f(x) + f(x-h)) / (h*h)
1078    elif order == 3:
1079        return (-f(x+2*h) + 2*f(x+h) - 2*f(x-h) + f(x-2*h)) / (2*h**3)
1080    elif order == 4:
1081        return (f(x+2*h) - 4*f(x+h) + 6*f(x) - 4*f(x-h) + f(x-2*h)) / (h**4)
1082    else:
1083        # Higher orders: repeated difference quotient
1084        d = [f(x+i*h) for i in range(-order, order+1)]
1085        return sum((-1)**(order-i)*math.comb(order,i)*d[i] for i in range(order+1)) / (h**order)
1086
1087ordinal = ["","1st","2nd","3rd","4th","5th","6th","7th","8th"]
1088label = ordinal[_order] if _order < len(ordinal) else "%dth"%_order
1089
1090print("f(%s) = %s" % (_var, _expr))
1091print("%s derivative  at  %s = %g" % (label, _var, _at))
1092print()
1093
1094try:
1095    fval = _f(_at)
1096    dval = _deriv(_f, _at, order=_order)
1097    print("f(%g)        = %.10g" % (_at, fval))
1098    print("f'(%g) [%s] = %.10g" % (_at, label, dval))
1099    if _order == 1:
1100        slope = dval
1101        print()
1102        print("Tangent line at %s=%g:  y = %.6g + %.6g*(%s - %g)" % (_var,_at,fval,slope,_var,_at))
1103except Exception as ex:
1104    print("Error:", ex, file=sys.stderr); sys.exit(1)
1105"####,
1106        hex_expr = hex_expr,
1107        hex_var = hex_var,
1108        at = at,
1109        order = order,
1110    );
1111
1112    let sandbox_args = serde_json::json!({
1113        "language": "python",
1114        "code": script,
1115        "timeout_seconds": 15
1116    });
1117    crate::tools::code_sandbox::execute(&sandbox_args).await
1118}
1119
1120// ── Data profile / summarize ──────────────────────────────────────────────────
1121// Model-free natural language summary of a data file: column types, ranges,
1122// missing values, outliers, duplicate rows.
1123
1124pub async fn data_profile(file_path: &str) -> Result<String, String> {
1125    let hex_path: String = file_path.bytes().map(|b| format!("{:02x}", b)).collect();
1126
1127    let script = format!(
1128        r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys, math
1129
1130_path = bytes.fromhex("{hex_path}").decode().strip()
1131
1132def _load(path):
1133    ext = os.path.splitext(path)[1].lower().lstrip('.')
1134    if ext in ('csv','tsv'):
1135        with open(path, encoding='utf-8-sig', errors='replace', newline='') as fh:
1136            r = _csv.DictReader(fh, delimiter='\t' if ext=='tsv' else ',')
1137            return list(r), None
1138    elif ext == 'json':
1139        with open(path, encoding='utf-8') as fh: d = _js.load(fh)
1140        rows = d if isinstance(d, list) else next(iter(d.values()), [])
1141        return rows, None
1142    elif ext in ('db','sqlite','sqlite3'):
1143        con = _sq.connect(path)
1144        cur = con.cursor()
1145        cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
1146        t = cur.fetchone()
1147        if not t: return [], None
1148        cur.execute("SELECT * FROM [%s]" % t[0])
1149        cols2 = [d[0] for d in cur.description]
1150        rows2 = [dict(zip(cols2, r)) for r in cur.fetchall()]
1151        con.close()
1152        return rows2, None
1153    return None, "Unsupported format: "+ext
1154
1155rows, err = _load(_path)
1156if err: print("Error:", err, file=sys.stderr); sys.exit(1)
1157if not rows: print("No rows found."); sys.exit(0)
1158
1159n = len(rows)
1160cols = list(rows[0].keys())
1161W = 64
1162
1163print("=" * W)
1164print(" Data Profile:  %s" % os.path.basename(_path))
1165print(" Rows: %d  |  Columns: %d" % (n, len(cols)))
1166print("=" * W)
1167
1168def _tf(v):
1169    try: return float(str(v).replace(',','').strip())
1170    except: return None
1171
1172dup_check = set()
1173dups = 0
1174for r in rows:
1175    k = tuple(str(r.get(c,'')) for c in cols)
1176    if k in dup_check: dups += 1
1177    else: dup_check.add(k)
1178
1179if dups:
1180    print(" Duplicate rows: %d (%.1f%%)" % (dups, 100*dups/n))
1181else:
1182    print(" No duplicate rows.")
1183print()
1184
1185for col in cols:
1186    vals = [r.get(col,'') for r in rows]
1187    missing = sum(1 for v in vals if str(v).strip() in ('','None','null','NULL','NA','N/A','NaN'))
1188    nums = [_tf(v) for v in vals if _tf(v) is not None]
1189    pct_miss = 100*missing/n if n else 0
1190
1191    print("-" * W)
1192    print("  %s" % col)
1193    if pct_miss > 0:
1194        flag = "  [!]" if pct_miss > 20 else ""
1195        print("    Missing:  %d / %d  (%.1f%%)%s" % (missing, n, pct_miss, flag))
1196
1197    if len(nums) > n * 0.5:
1198        # Numeric column
1199        nums_s = sorted(nums)
1200        mn = _mean = sum(nums_s)/len(nums_s)
1201        med_idx = len(nums_s)//2
1202        med = nums_s[med_idx] if len(nums_s)%2 else (nums_s[med_idx-1]+nums_s[med_idx])/2
1203        std = math.sqrt(sum((x-mn)**2 for x in nums_s)/len(nums_s)) if len(nums_s)>1 else 0
1204        q1_i = len(nums_s)//4; q3_i = 3*len(nums_s)//4
1205        q1 = nums_s[q1_i]; q3 = nums_s[q3_i]; iqr = q3 - q1
1206        lo_fence = q1 - 1.5*iqr; hi_fence = q3 + 1.5*iqr
1207        outliers = sum(1 for x in nums_s if x < lo_fence or x > hi_fence)
1208        print("    Type:     numeric  (%d values)" % len(nums_s))
1209        print("    Range:    %g  to  %g" % (nums_s[0], nums_s[-1]))
1210        print("    Mean:     %g  |  Median: %g  |  Std: %g" % (mn, med, std))
1211        if outliers:
1212            print("    Outliers: %d (IQR fence: [%g, %g])" % (outliers, lo_fence, hi_fence))
1213    else:
1214        # Categorical column
1215        from collections import Counter
1216        vc = Counter(str(v).strip() for v in vals if str(v).strip() not in ('','None','null','NULL'))
1217        top = vc.most_common(5)
1218        unique = len(vc)
1219        print("    Type:     categorical  (%d unique values)" % unique)
1220        if unique <= 20:
1221            print("    Values:   " + "  ".join("%s(%d)"%(k,c) for k,c in top))
1222        else:
1223            print("    Top 5:    " + "  ".join("%s(%d)"%(k,c) for k,c in top))
1224
1225print("=" * W)
1226"####,
1227        hex_path = hex_path,
1228    );
1229
1230    let sandbox_args = serde_json::json!({
1231        "language": "python",
1232        "code": script,
1233        "timeout_seconds": 30
1234    });
1235    crate::tools::code_sandbox::execute(&sandbox_args).await
1236}