1const FORMULAS: &[(&str, &str, &str, &str)] = &[
9 ("Newton's Second Law", "F = m \u{00D7} a", "mechanics", "F=force(N) m=mass(kg) a=acceleration(m/s\u{00B2})"),
11 ("Kinematic: velocity", "v = u + a\u{00D7}t", "mechanics", "v=final velocity u=initial velocity a=acceleration t=time"),
12 ("Kinematic: displacement", "s = u\u{00D7}t + \u{00BD}\u{00D7}a\u{00D7}t\u{00B2}", "mechanics", "s=displacement u=initial velocity a=acceleration t=time"),
13 ("Kinematic: v\u{00B2} relation", "v\u{00B2} = u\u{00B2} + 2\u{00D7}a\u{00D7}s", "mechanics", "v=final velocity u=initial velocity a=acceleration s=displacement"),
14 ("Kinetic Energy", "KE = \u{00BD} \u{00D7} m \u{00D7} v\u{00B2}", "mechanics", "KE=kinetic energy(J) m=mass(kg) v=velocity(m/s)"),
15 ("Gravitational Potential Energy", "PE = m \u{00D7} g \u{00D7} h", "mechanics", "PE=potential energy(J) m=mass(kg) g=9.80665 m/s\u{00B2} h=height(m)"),
16 ("Momentum", "p = m \u{00D7} v", "mechanics", "p=momentum(kg\u{00B7}m/s) m=mass(kg) v=velocity(m/s)"),
17 ("Impulse-Momentum Theorem", "J = F \u{00D7} \u{0394}t = \u{0394}p", "mechanics", "J=impulse(N\u{00B7}s) F=force(N) \u{0394}t=time interval \u{0394}p=change in momentum"),
18 ("Work", "W = F \u{00D7} d \u{00D7} cos(\u{03B8})", "mechanics", "W=work(J) F=force(N) d=distance(m) \u{03B8}=angle between F and d"),
19 ("Power", "P = W / t = F \u{00D7} v", "mechanics", "P=power(W) W=work(J) t=time(s) F=force(N) v=velocity(m/s)"),
20 ("Torque", "\u{03C4} = r \u{00D7} F \u{00D7} sin(\u{03B8})", "mechanics", "\u{03C4}=torque(N\u{00B7}m) r=moment arm(m) F=force(N) \u{03B8}=angle"),
21 ("Universal Gravitation", "F = G \u{00D7} m\u{2081} \u{00D7} m\u{2082} / r\u{00B2}", "mechanics", "G=6.674\u{00D7}10\u{207B}\u{00B9}\u{00B9} N\u{00B7}m\u{00B2}/kg\u{00B2} m\u{2081},m\u{2082}=masses(kg) r=distance(m)"),
22 ("Centripetal Acceleration", "a_c = v\u{00B2} / r = \u{03C9}\u{00B2} \u{00D7} r", "mechanics", "a_c=centripetal acceleration(m/s\u{00B2}) v=speed(m/s) r=radius(m) \u{03C9}=angular velocity(rad/s)"),
23
24 ("Wave Speed", "v = f \u{00D7} \u{03BB}", "waves", "v=wave speed(m/s) f=frequency(Hz) \u{03BB}=wavelength(m)"),
26 ("Photon Energy", "E = h \u{00D7} f = h\u{00D7}c / \u{03BB}", "waves", "h=6.626\u{00D7}10\u{207B}\u{00B3}\u{2074} J\u{00B7}s f=frequency(Hz) c=3\u{00D7}10\u{2078} m/s \u{03BB}=wavelength(m)"),
27 ("Doppler Effect", "f\u{2019} = f \u{00D7} (v + v_o) / (v \u{2212} v_s)", "waves", "f\u{2019}=observed freq f=source freq v=wave speed v_o=observer speed v_s=source speed"),
28 ("Snell's Law", "n\u{2081} \u{00D7} sin(\u{03B8}\u{2081}) = n\u{2082} \u{00D7} sin(\u{03B8}\u{2082})", "waves", "n=refractive index \u{03B8}=angle to normal"),
29
30 ("Ideal Gas Law", "P \u{00D7} V = n \u{00D7} R \u{00D7} T", "thermodynamics", "P=pressure(Pa) V=volume(m\u{00B3}) n=moles R=8.314 J/(mol\u{00B7}K) T=temperature(K)"),
32 ("Heat Transfer", "Q = m \u{00D7} c \u{00D7} \u{0394}T", "thermodynamics", "Q=heat(J) m=mass(kg) c=specific heat capacity \u{0394}T=temperature change(K)"),
33 ("Carnot Efficiency", "\u{03B7} = 1 \u{2212} T_c / T_h", "thermodynamics", "\u{03B7}=max efficiency(0\u{2013}1) T_c=cold reservoir(K) T_h=hot reservoir(K)"),
34 ("Stefan-Boltzmann Law", "P = \u{03C3} \u{00D7} A \u{00D7} T\u{2074}", "thermodynamics", "\u{03C3}=5.67\u{00D7}10\u{207B}\u{2078} W/(m\u{00B2}\u{00B7}K\u{2074}) A=area(m\u{00B2}) T=temperature(K)"),
35
36 ("Ohm's Law", "V = I \u{00D7} R", "electricity", "V=voltage(V) I=current(A) R=resistance(\u{03A9})"),
38 ("Electrical Power", "P = I \u{00D7} V = I\u{00B2} \u{00D7} R = V\u{00B2} / R", "electricity", "P=power(W) I=current(A) V=voltage(V) R=resistance(\u{03A9})"),
39 ("Coulomb's Law", "F = k \u{00D7} q\u{2081} \u{00D7} q\u{2082} / r\u{00B2}", "electricity", "k=8.99\u{00D7}10\u{2079} N\u{00B7}m\u{00B2}/C\u{00B2} q=charges(C) r=distance(m)"),
40 ("Capacitance", "C = Q / V", "electricity", "C=capacitance(F) Q=charge(C) V=voltage(V)"),
41 ("Energy in Capacitor", "E = \u{00BD} \u{00D7} C \u{00D7} V\u{00B2}", "electricity", "E=energy(J) C=capacitance(F) V=voltage(V)"),
42 ("LC Resonant Frequency", "f = 1 / (2\u{03C0} \u{00D7} \u{221A}(L\u{00D7}C))", "electricity", "f=frequency(Hz) L=inductance(H) C=capacitance(F)"),
43 ("Faraday's Law", "EMF = \u{2212}N \u{00D7} \u{0394}\u{03A6} / \u{0394}t", "electricity", "N=turns \u{0394}\u{03A6}=flux change(Wb) \u{0394}t=time(s)"),
44 ("Magnetic Force on Current", "F = I \u{00D7} L \u{00D7} B \u{00D7} sin(\u{03B8})", "electricity", "I=current(A) L=length(m) B=magnetic field(T) \u{03B8}=angle"),
45
46 ("Mass-Energy Equivalence", "E = m \u{00D7} c\u{00B2}", "relativity", "E=energy(J) m=mass(kg) c=2.998\u{00D7}10\u{2078} m/s"),
48 ("Lorentz Factor", "\u{03B3} = 1 / \u{221A}(1 \u{2212} v\u{00B2}/c\u{00B2})", "relativity", "\u{03B3}=Lorentz factor v=velocity c=speed of light"),
49 ("Time Dilation", "t = t\u{2080} \u{00D7} \u{03B3}", "relativity", "t=dilated time t\u{2080}=proper time \u{03B3}=Lorentz factor"),
50 ("Length Contraction", "L = L\u{2080} / \u{03B3}", "relativity", "L=contracted length L\u{2080}=proper length \u{03B3}=Lorentz factor"),
51
52 ("pH Definition", "pH = \u{2212}log\u{2081}\u{2080}[H\u{207A}]", "chemistry", "[H\u{207A}]=hydrogen ion concentration(mol/L)"),
54 ("Henderson-Hasselbalch", "pH = pKa + log([A\u{207B}]/[HA])", "chemistry", "pKa=acid dissociation constant [A\u{207B}]=conjugate base [HA]=acid concentration"),
55 ("Arrhenius Equation", "k = A \u{00D7} e^(\u{2212}Ea/(R\u{00D7}T))", "chemistry", "A=pre-exponential factor Ea=activation energy(J/mol) R=8.314 T=temperature(K)"),
56 ("Nernst Equation", "E = E\u{00B0} \u{2212} (RT)/(nF) \u{00D7} ln(Q)", "chemistry", "E\u{00B0}=standard potential n=electrons F=96485 C/mol Q=reaction quotient"),
57 ("Beer-Lambert Law", "A = \u{03B5} \u{00D7} c \u{00D7} l", "chemistry", "A=absorbance \u{03B5}=molar absorptivity(L/(mol\u{00B7}cm)) c=concentration(mol/L) l=path length(cm)"),
58
59 ("Quadratic Formula", "x = (\u{2212}b \u{00B1} \u{221A}(b\u{00B2}\u{2212}4ac)) / (2a)", "mathematics", "coefficients of ax\u{00B2}+bx+c=0"),
61 ("Pythagorean Theorem", "a\u{00B2} + b\u{00B2} = c\u{00B2}", "mathematics", "a,b=legs of right triangle c=hypotenuse"),
62 ("Circle Area", "A = \u{03C0} \u{00D7} r\u{00B2}", "mathematics", "A=area r=radius"),
63 ("Circle Circumference", "C = 2 \u{00D7} \u{03C0} \u{00D7} r", "mathematics", "C=circumference r=radius"),
64 ("Sphere Volume", "V = (4/3) \u{00D7} \u{03C0} \u{00D7} r\u{00B3}", "mathematics", "V=volume r=radius"),
65 ("Sphere Surface Area", "A = 4 \u{00D7} \u{03C0} \u{00D7} r\u{00B2}", "mathematics", "A=surface area r=radius"),
66 ("Cylinder Volume", "V = \u{03C0} \u{00D7} r\u{00B2} \u{00D7} h", "mathematics", "V=volume r=radius h=height"),
67 ("Euler's Formula", "e^(i\u{03B8}) = cos(\u{03B8}) + i\u{00D7}sin(\u{03B8})", "mathematics", "\u{03B8}=angle(radians) i=imaginary unit"),
68 ("Bayes' Theorem", "P(A|B) = P(B|A) \u{00D7} P(A) / P(B)", "mathematics", "P(A|B)=probability of A given B"),
69 ("Normal Distribution PDF", "f(x) = (1/(\u{03C3}\u{221A}(2\u{03C0}))) \u{00D7} e^(\u{2212}(x\u{2212}\u{03BC})\u{00B2}/(2\u{03C3}\u{00B2}))", "mathematics", "\u{03BC}=mean \u{03C3}=standard deviation"),
70
71 ("Compound Interest", "A = P \u{00D7} (1 + r/n)^(n\u{00D7}t)", "finance", "A=final amount P=principal r=annual rate(decimal) n=compounds/year t=years"),
73 ("Simple Interest", "I = P \u{00D7} r \u{00D7} t", "finance", "I=interest P=principal r=annual rate(decimal) t=years"),
74 ("Present Value", "PV = FV / (1 + r)^n", "finance", "PV=present value FV=future value r=rate per period n=periods"),
75];
76
77pub fn search_formulas(query: &str) -> String {
78 let q = query.trim();
79
80 if q.is_empty() || q.eq_ignore_ascii_case("list") || q.eq_ignore_ascii_case("all") {
81 let mut cats: std::collections::BTreeMap<&str, Vec<&&str>> = Default::default();
82 for (name, _, cat, _) in FORMULAS {
83 cats.entry(cat).or_default().push(name);
84 }
85 let mut out = format!("Formula library — {} entries\n\n", FORMULAS.len());
86 for (cat, names) in &cats {
87 out.push_str(&format!(
88 "{} ({} formulas)\n",
89 cat.to_uppercase(),
90 names.len()
91 ));
92 for n in names {
93 out.push_str(&format!(" {n}\n"));
94 }
95 out.push('\n');
96 }
97 out.push_str("Search: hematite --formula \"kinetic energy\" or hematite --formula ohms");
98 return out;
99 }
100
101 let q_lower = q.to_ascii_lowercase();
102 let hits: Vec<_> = FORMULAS
103 .iter()
104 .filter(|(name, formula, cat, vars)| {
105 name.to_ascii_lowercase().contains(&q_lower)
106 || cat.to_ascii_lowercase().contains(&q_lower)
107 || formula.to_ascii_lowercase().contains(&q_lower)
108 || vars.to_ascii_lowercase().contains(&q_lower)
109 })
110 .collect();
111
112 if hits.is_empty() {
113 return format!(
114 "No formulas found for '{q}'.\nRun hematite --formula list to browse all {} entries.",
115 FORMULAS.len()
116 );
117 }
118
119 let sep = "\u{2500}".repeat(50);
120 let mut out = String::new();
121 for (name, formula, cat, vars) in &hits {
122 out.push_str(&format!(
123 "{name}\n{sep}\n Formula: {formula}\n Variables: {vars}\n Category: {cat}\n\n"
124 ));
125 }
126 out.trim_end().to_string()
127}
128
129pub async fn generate_random(
132 kind: &str,
133 length: usize,
134 count: usize,
135 extra: &str,
136) -> Result<String, String> {
137 let safe_kind = kind.trim().to_ascii_lowercase().replace('"', "");
138 let safe_extra: String = extra.bytes().map(|b| format!("{:02x}", b)).collect();
139 let eff_count = count.clamp(1, 1000);
140 let eff_length = length.clamp(1, 4096);
141
142 let script = format!(
143 r####"import secrets, uuid as _uuid, sys, string, re as _re
144
145_kind = "{safe_kind}"
146_len = {eff_length}
147_count = {eff_count}
148_extra = bytes.fromhex("{safe_extra}").decode('utf-8', errors='replace').strip()
149
150_TYPES = "uuid password token hex urlsafe pin bytes int dice"
151
152for _i in range(_count):
153 try:
154 if _kind == "uuid":
155 print(str(_uuid.uuid4()))
156 elif _kind in ("password", "pwd", "pass"):
157 _cs = _extra if _extra else string.ascii_letters + string.digits + "!@#$%^&*-_=+"
158 print(''.join(secrets.choice(_cs) for _ in range(_len)))
159 elif _kind in ("token", "hex"):
160 nb = max(1, _len // 2)
161 print(secrets.token_hex(nb))
162 elif _kind in ("urlsafe", "url"):
163 print(secrets.token_urlsafe(_len))
164 elif _kind == "pin":
165 print(''.join(secrets.choice(string.digits) for _ in range(_len)))
166 elif _kind == "bytes":
167 print(secrets.token_bytes(_len).hex())
168 elif _kind in ("int", "integer", "number"):
169 _parts = _extra.split()
170 _lo = int(_parts[0]) if len(_parts) > 0 else 1
171 _hi = int(_parts[1]) if len(_parts) > 1 else 100
172 if _lo > _hi: _lo, _hi = _hi, _lo
173 print(secrets.randbelow(_hi - _lo + 1) + _lo)
174 elif _kind == "dice":
175 _notation = _extra or "1d6"
176 _dm = _re.match(r'^(\d*)d(\d+)([+-]\d+)?$', _notation, _re.I)
177 if not _dm:
178 print("Dice format: NdN or NdN+M (e.g. 2d6 d20 3d8+2)", file=sys.stderr)
179 sys.exit(1)
180 _nd = int(_dm.group(1) or 1)
181 _ds = int(_dm.group(2))
182 _mod = int(_dm.group(3) or 0)
183 _rolls = [secrets.randbelow(_ds) + 1 for _ in range(_nd)]
184 _total = sum(_rolls) + _mod
185 if _nd > 1 or _mod:
186 _breakdown = " + ".join(str(r) for r in _rolls)
187 _suffix = (" +" + str(_mod)) if _mod > 0 else ((" " + str(_mod)) if _mod < 0 else "")
188 print(str(_total) + " [" + _breakdown + _suffix + "]")
189 else:
190 print(str(_total))
191 else:
192 print("Unknown type: " + _kind + ". Supported: " + _TYPES, file=sys.stderr)
193 sys.exit(1)
194 except Exception as _e:
195 print("Error: " + str(_e), file=sys.stderr); sys.exit(1)
196"####,
197 safe_kind = safe_kind,
198 safe_extra = safe_extra,
199 eff_length = eff_length,
200 eff_count = eff_count,
201 );
202
203 let sandbox_args = serde_json::json!({
204 "language": "python",
205 "code": script,
206 "timeout_seconds": 10
207 });
208 crate::tools::code_sandbox::execute(&sandbox_args).await
209}
210
211pub async fn diff_data(path_a: &str, path_b: &str, key_col: &str) -> Result<String, String> {
214 let hex_a: String = path_a.bytes().map(|b| format!("{:02x}", b)).collect();
215 let hex_b: String = path_b.bytes().map(|b| format!("{:02x}", b)).collect();
216 let hex_key: String = key_col.bytes().map(|b| format!("{:02x}", b)).collect();
217
218 let script = format!(
219 r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys
220
221_pa = bytes.fromhex("{hex_a}").decode()
222_pb = bytes.fromhex("{hex_b}").decode()
223_key = bytes.fromhex("{hex_key}").decode().strip()
224
225def _load(path):
226 ext = os.path.splitext(path)[1].lower().lstrip('.')
227 if ext in ('csv', 'tsv'):
228 with open(path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
229 _r = _csv.DictReader(_fh, delimiter='\t' if ext == 'tsv' else ',')
230 return list(_r)
231 elif ext == 'json':
232 with open(path, encoding='utf-8') as _fh:
233 _d = _js.load(_fh)
234 return _d if isinstance(_d, list) else next(iter(_d.values()), [])
235 elif ext in ('db', 'sqlite', 'sqlite3'):
236 _con = _sq.connect(path)
237 _cur = _con.cursor()
238 _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
239 _t = _cur.fetchone()
240 if not _t: return []
241 _cur.execute("SELECT * FROM [%s]" % _t[0])
242 _cols2 = [_d[0] for _d in _cur.description]
243 _rows2 = [dict(zip(_cols2, _r)) for _r in _cur.fetchall()]
244 _con.close()
245 return _rows2
246 else:
247 print("Unsupported format: " + ext, file=sys.stderr); sys.exit(1)
248
249_ra = _load(_pa)
250_rb = _load(_pb)
251
252if not _ra and not _rb:
253 print("Both files are empty."); sys.exit(0)
254
255_cols = list((_ra[0] if _ra else _rb[0]).keys())
256_kc = _key if _key else _cols[0]
257
258def _idx(rows):
259 d = {{}}
260 for r in rows:
261 k = str(r.get(_kc, ''))
262 d.setdefault(k, []).append(r)
263 return d
264
265_da = _idx(_ra)
266_db = _idx(_rb)
267_ka = set(_da); _kb = set(_db)
268
269_added = sorted(_kb - _ka)
270_removed = sorted(_ka - _kb)
271_common = sorted(_ka & _kb)
272
273_modified = []
274for _k in _common:
275 _r1 = _da[_k][0]; _r2 = _db[_k][0]
276 _diffs = {{c: (str(_r1.get(c,'')), str(_r2.get(c,'')))
277 for c in _cols if str(_r1.get(c,'')) != str(_r2.get(c,'')) and c != _kc}}
278 if _diffs:
279 _modified.append((_k, _diffs))
280
281_sep = "─" * 52
282print(_sep)
283print("Data diff: A = %s" % os.path.basename(_pa))
284print(" B = %s" % os.path.basename(_pb))
285print("Key column: %s | A: %d rows B: %d rows" % (_kc, len(_ra), len(_rb)))
286print(_sep)
287print()
288
289if not _added and not _removed and not _modified:
290 print("✓ No differences found. Files are identical on key column '%s'." % _kc)
291 sys.exit(0)
292
293def _preview(row, n=4):
294 items = list(row.items())[:n]
295 return " ".join(("%s=%s" % (k, str(v)[:20])) for k, v in items if k != _kc)
296
297if _added:
298 print("+ Added (%d rows in B not in A):" % len(_added))
299 for _k in _added[:25]:
300 print(" + %s %s" % (_k, _preview(_db[_k][0])))
301 if len(_added) > 25: print(" ... and %d more" % (len(_added)-25))
302 print()
303
304if _removed:
305 print("- Removed (%d rows in A not in B):" % len(_removed))
306 for _k in _removed[:25]:
307 print(" - %s %s" % (_k, _preview(_da[_k][0])))
308 if len(_removed) > 25: print(" ... and %d more" % (len(_removed)-25))
309 print()
310
311if _modified:
312 print("~ Modified (%d rows with changed values):" % len(_modified))
313 for _k, _diffs in _modified[:25]:
314 print(" ~ %s" % _k)
315 for _c, (_va, _vb) in _diffs.items():
316 print(" %-20s %s → %s" % (_c + ":", _va[:40], _vb[:40]))
317 if len(_modified) > 25: print(" ... and %d more" % (len(_modified)-25))
318 print()
319
320print("Summary: +%d added -%d removed ~%d modified" % (len(_added), len(_removed), len(_modified)))
321"####,
322 hex_a = hex_a,
323 hex_b = hex_b,
324 hex_key = hex_key,
325 );
326
327 let sandbox_args = serde_json::json!({
328 "language": "python",
329 "code": script,
330 "timeout_seconds": 30
331 });
332 crate::tools::code_sandbox::execute(&sandbox_args).await
333}
334
335pub async fn column_stats(file_path: &str, column: &str) -> Result<String, String> {
338 let hex_path: String = file_path.bytes().map(|b| format!("{:02x}", b)).collect();
339 let hex_col: String = column.bytes().map(|b| format!("{:02x}", b)).collect();
340
341 let script = format!(
342 r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys, math
343
344_path = bytes.fromhex("{hex_path}").decode()
345_col = bytes.fromhex("{hex_col}").decode().strip()
346
347def _load(path):
348 ext = os.path.splitext(path)[1].lower().lstrip('.')
349 if ext in ('csv','tsv'):
350 with open(path, encoding='utf-8-sig', errors='replace', newline='') as _fh:
351 _r = _csv.DictReader(_fh, delimiter='\t' if ext=='tsv' else ',')
352 return list(_r), None
353 elif ext == 'json':
354 with open(path, encoding='utf-8') as _fh: _d = _js.load(_fh)
355 rows = _d if isinstance(_d, list) else next(iter(_d.values()), [])
356 return rows, None
357 elif ext in ('db','sqlite','sqlite3'):
358 _con = _sq.connect(path)
359 _cur = _con.cursor()
360 _cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
361 _t = _cur.fetchone()
362 if not _t: return [], None
363 _cur.execute("SELECT * FROM [%s]" % _t[0])
364 _cols2 = [_d[0] for _d in _cur.description]
365 _rows2 = [dict(zip(_cols2, _r)) for _r in _cur.fetchall()]
366 _con.close()
367 return _rows2, None
368 else:
369 return None, "Unsupported format: " + ext
370
371def _try_float(v):
372 try: return float(str(v).replace(',','').strip())
373 except: return None
374
375def _stats_for(nums, label):
376 n = len(nums)
377 if n == 0:
378 print(" %s: no numeric values found" % label); return
379 nums_s = sorted(nums)
380 mean = sum(nums_s) / n
381 def _pct(p):
382 idx = p * (n - 1) / 100
383 lo = int(idx); hi = min(lo+1, n-1)
384 return nums_s[lo] + (idx - lo) * (nums_s[hi] - nums_s[lo])
385 median = _pct(50)
386 q1 = _pct(25)
387 q3 = _pct(75)
388 iqr = q3 - q1
389 var = sum((x - mean)**2 for x in nums_s) / n
390 std = math.sqrt(var)
391 # Population skewness
392 if std > 0:
393 skew = (sum((x-mean)**3 for x in nums_s)/n) / (std**3)
394 else:
395 skew = 0.0
396 # Mode (most common value, rounded to 4 sig figs)
397 from collections import Counter
398 rounded = [round(x, 4) for x in nums_s]
399 mode_val, mode_cnt = Counter(rounded).most_common(1)[0]
400
401 W = 52
402 print("=" * W)
403 print(" Statistics: %s (n=%d)" % (label, n))
404 print("-" * W)
405 print(" Min %g" % nums_s[0])
406 print(" Max %g" % nums_s[-1])
407 print(" Range %g" % (nums_s[-1] - nums_s[0]))
408 print(" Mean %g" % mean)
409 print(" Median %g" % median)
410 print(" Mode %g (count: %d)" % (mode_val, mode_cnt))
411 print(" Std Dev %g" % std)
412 print(" Variance %g" % var)
413 print(" Q1 %g" % q1)
414 print(" Q3 %g" % q3)
415 print(" IQR %g" % iqr)
416 print(" Skewness %.4f" % skew)
417 # ASCII histogram (10 bins)
418 bins = 10
419 lo = nums_s[0]; hi_v = nums_s[-1]
420 if lo == hi_v: hi_v = lo + 1
421 step = (hi_v - lo) / bins
422 counts = [0]*bins
423 for x in nums_s:
424 idx = min(int((x-lo)/step), bins-1)
425 counts[idx] += 1
426 max_c = max(counts) if max(counts) > 0 else 1
427 bar_w = 30
428 print("-" * W)
429 print(" Histogram (%d bins):" % bins)
430 for i in range(bins):
431 lo_b = lo + i*step; hi_b = lo_b + step
432 bar = "#" * int(counts[i] / max_c * bar_w)
433 print(" [%8.3g,%8.3g) %-30s %d" % (lo_b, hi_b, bar, counts[i]))
434 print("=" * W)
435
436rows, err = _load(_path)
437if err:
438 print("Error:", err, file=sys.stderr); sys.exit(1)
439if not rows:
440 print("No rows found in %s" % _path); sys.exit(0)
441
442all_cols = list(rows[0].keys())
443if _col:
444 if _col not in all_cols:
445 print("Column '%s' not found. Available: %s" % (_col, ', '.join(all_cols)))
446 sys.exit(1)
447 nums = [v for v in (_try_float(r.get(_col,'')) for r in rows) if v is not None]
448 _stats_for(nums, _col)
449else:
450 numeric_cols = [c for c in all_cols if sum(1 for r in rows if _try_float(r.get(c,'')) is not None) > len(rows)*0.5]
451 if not numeric_cols:
452 print("No numeric columns detected. Columns: %s" % ', '.join(all_cols))
453 sys.exit(0)
454 print("Numeric columns: %s" % ', '.join(numeric_cols))
455 print("Pass --column NAME to focus on one, or showing all:\n")
456 for c in numeric_cols:
457 nums = [v for v in (_try_float(r.get(c,'')) for r in rows) if v is not None]
458 _stats_for(nums, c)
459 print()
460"####,
461 hex_path = hex_path,
462 hex_col = hex_col,
463 );
464
465 let sandbox_args = serde_json::json!({
466 "language": "python",
467 "code": script,
468 "timeout_seconds": 30
469 });
470 crate::tools::code_sandbox::execute(&sandbox_args).await
471}
472
473pub async fn matrix_op(op: &str, matrix_a: &str, matrix_b: &str) -> Result<String, String> {
478 let hex_op: String = op.bytes().map(|b| format!("{:02x}", b)).collect();
479 let hex_a: String = matrix_a.bytes().map(|b| format!("{:02x}", b)).collect();
480 let hex_b: String = matrix_b.bytes().map(|b| format!("{:02x}", b)).collect();
481
482 let script = format!(
483 r####"import json as _js, math, sys
484
485_op = bytes.fromhex("{hex_op}").decode().strip().lower()
486_sa = bytes.fromhex("{hex_a}").decode().strip()
487_sb = bytes.fromhex("{hex_b}").decode().strip()
488
489def _parse(s):
490 if not s: return None
491 try:
492 d = _js.loads(s)
493 if isinstance(d, list):
494 if isinstance(d[0], list): return [list(map(float,r)) for r in d]
495 return [[float(x)] for x in d] # column vector
496 return None
497 except Exception as e:
498 print("Parse error:", e, file=sys.stderr); sys.exit(1)
499
500A = _parse(_sa)
501B = _parse(_sb)
502
503def _rows(M): return len(M)
504def _cols(M): return len(M[0]) if M else 0
505def _shape(M): return "(%d×%d)" % (_rows(M), _cols(M))
506
507def _mul(A, B):
508 r,k,c = _rows(A), _cols(A), _cols(B)
509 if k != _rows(B): raise ValueError("Shape mismatch: A%s B%s" % (_shape(A), _shape(B)))
510 return [[sum(A[i][p]*B[p][j] for p in range(k)) for j in range(c)] for i in range(r)]
511
512def _T(M):
513 return [[M[i][j] for i in range(_rows(M))] for j in range(_cols(M))]
514
515def _LU(M):
516 n = _rows(M)
517 if n != _cols(M): raise ValueError("Square matrix required")
518 U = [row[:] for row in M]
519 L = [[1.0 if i==j else 0.0 for j in range(n)] for i in range(n)]
520 P = list(range(n)); sign = 1
521 for col in range(n):
522 pivot = max(range(col, n), key=lambda r: abs(U[r][col]))
523 if abs(U[pivot][col]) < 1e-14: raise ValueError("Matrix is singular")
524 if pivot != col:
525 U[col], U[pivot] = U[pivot], U[col]
526 P[col], P[pivot] = P[pivot], P[col]
527 if col > 0:
528 for k in range(col): L[col][k], L[pivot][k] = L[pivot][k], L[col][k]
529 sign *= -1
530 for row in range(col+1, n):
531 f = U[row][col] / U[col][col]
532 L[row][col] = f
533 for k in range(col, n): U[row][k] -= f * U[col][k]
534 return L, U, P, sign
535
536def _det(M):
537 L, U, P, sign = _LU(M)
538 d = sign
539 for i in range(_rows(M)): d *= U[i][i]
540 return d
541
542def _inv(M):
543 n = _rows(M)
544 L, U, P, _ = _LU(M)
545 inv = [[0.0]*n for _ in range(n)]
546 for col in range(n):
547 e = [1.0 if P[i]==col else 0.0 for i in range(n)]
548 y = [0.0]*n
549 for i in range(n):
550 y[i] = e[i] - sum(L[i][k]*y[k] for k in range(i))
551 x = [0.0]*n
552 for i in range(n-1, -1, -1):
553 x[i] = (y[i] - sum(U[i][k]*x[k] for k in range(i+1,n))) / U[i][i]
554 for i in range(n): inv[i][col] = x[i]
555 return inv
556
557def _solve(M, b):
558 n = _rows(M)
559 bv = [row[0] for row in b]
560 L, U, P, _ = _LU(M)
561 pb = [bv[P[i]] for i in range(n)]
562 y = [0.0]*n
563 for i in range(n):
564 y[i] = pb[i] - sum(L[i][k]*y[k] for k in range(i))
565 x = [0.0]*n
566 for i in range(n-1,-1,-1):
567 x[i] = (y[i] - sum(U[i][k]*x[k] for k in range(i+1,n))) / U[i][i]
568 return x
569
570def _fmt_num(v):
571 if abs(v) < 1e-10: return "0"
572 return ("%.6g" % v)
573
574def _print_matrix(M, label=""):
575 if label: print(label)
576 for row in M:
577 print(" [ " + " ".join("%10s" % _fmt_num(v) for v in row) + " ]")
578
579def _qr_eigenvalues(M, iters=200):
580 # QR iteration (Gram-Schmidt) — real eigenvalues only for symmetric matrices
581 n = _rows(M)
582 Ak = [row[:] for row in M]
583 for _ in range(iters):
584 # Gram-Schmidt QR
585 Q = [[0.0]*n for _ in range(n)]
586 R = [[0.0]*n for _ in range(n)]
587 for j in range(n):
588 v = [Ak[i][j] for i in range(n)]
589 for k in range(j):
590 R[k][j] = sum(Q[i][k]*v[i] for i in range(n))
591 for i in range(n): v[i] -= R[k][j]*Q[i][k]
592 norm = math.sqrt(sum(x*x for x in v))
593 if norm < 1e-14: norm = 1.0
594 R[j][j] = norm
595 for i in range(n): Q[i][j] = v[i]/norm
596 Ak = _mul(R, Q)
597 return sorted([Ak[i][i] for i in range(n)], reverse=True)
598
599if not A:
600 print("Error: no matrix provided. Use JSON format: [[1,2],[3,4]]")
601 sys.exit(1)
602
603try:
604 if _op in ('det', 'determinant'):
605 d = _det(A)
606 print("Determinant of %s matrix:" % _shape(A))
607 print(" det(A) = %s" % _fmt_num(d))
608 elif _op in ('inv', 'inverse', 'invert'):
609 Ai = _inv(A)
610 _print_matrix(A, "A %s:" % _shape(A))
611 print()
612 _print_matrix(Ai, "A⁻¹:")
613 elif _op in ('T', 'transpose'):
614 At = _T(A)
615 _print_matrix(A, "A %s:" % _shape(A))
616 print()
617 _print_matrix(At, "Aᵀ %s:" % _shape(At))
618 elif _op in ('mul', 'multiply', 'matmul'):
619 if not B:
620 print("Error: --matrix-b required for multiply"); sys.exit(1)
621 C = _mul(A, B)
622 _print_matrix(A, "A %s:" % _shape(A))
623 print()
624 _print_matrix(B, "B %s:" % _shape(B))
625 print()
626 _print_matrix(C, "A × B = %s:" % _shape(C))
627 elif _op in ('solve',):
628 if not B:
629 print("Error: --matrix-b required for solve (the b vector/matrix)"); sys.exit(1)
630 x = _solve(A, B)
631 _print_matrix(A, "A %s (coefficient matrix):" % _shape(A))
632 print()
633 _print_matrix(B, "b %s (right-hand side):" % _shape(B))
634 print()
635 print("Solution x (Ax = b):")
636 print(" [ " + " ".join("%10s" % _fmt_num(v) for v in x) + " ]")
637 elif _op in ('eig', 'eigen', 'eigenvalues'):
638 eigs = _qr_eigenvalues(A)
639 print("Eigenvalues of %s matrix (real, via QR iteration):" % _shape(A))
640 for i, e in enumerate(eigs):
641 print(" λ%d = %s" % (i+1, _fmt_num(e)))
642 print()
643 print("Note: QR iteration gives real eigenvalues for symmetric matrices.")
644 print("Complex eigenvalues are shown only as real parts for non-symmetric matrices.")
645 elif _op in ('rank',):
646 # Gaussian elimination row rank
647 M2 = [row[:] for row in A]
648 r = 0
649 for col in range(_cols(M2)):
650 pivot = next((i for i in range(r, _rows(M2)) if abs(M2[i][col]) > 1e-10), None)
651 if pivot is None: continue
652 M2[r], M2[pivot] = M2[pivot], M2[r]
653 for i in range(_rows(M2)):
654 if i != r and abs(M2[i][col]) > 1e-10:
655 f = M2[i][col] / M2[r][col]
656 for k in range(_cols(M2)): M2[i][k] -= f * M2[r][k]
657 r += 1
658 print("Rank of %s matrix: %d" % (_shape(A), r))
659 elif _op in ('trace',):
660 if _rows(A) != _cols(A): print("Warning: trace of non-square matrix (using diagonal)")
661 t = sum(A[i][i] for i in range(min(_rows(A),_cols(A))))
662 print("Trace of %s matrix: %s" % (_shape(A), _fmt_num(t)))
663 else:
664 print("Unknown operation: '%s'" % _op)
665 print("Available: det inv transpose multiply solve eigenvalues rank trace")
666 sys.exit(1)
667except ValueError as e:
668 print("Error:", e, file=sys.stderr); sys.exit(1)
669"####,
670 hex_op = hex_op,
671 hex_a = hex_a,
672 hex_b = hex_b,
673 );
674
675 let sandbox_args = serde_json::json!({
676 "language": "python",
677 "code": script,
678 "timeout_seconds": 30
679 });
680 crate::tools::code_sandbox::execute(&sandbox_args).await
681}
682
683pub async fn solve_equation(equation: &str, var: &str, x0: f64, x1: f64) -> Result<String, String> {
686 let hex_eq: String = equation.bytes().map(|b| format!("{:02x}", b)).collect();
687 let hex_var: String = var.bytes().map(|b| format!("{:02x}", b)).collect();
688
689 let script = format!(
690 r####"import math, sys
691from math import (sin,cos,tan,asin,acos,atan,atan2,sinh,cosh,tanh,
692 sqrt,log,log2,log10,exp,floor,ceil,pi,e,inf,nan)
693
694_eq = bytes.fromhex("{hex_eq}").decode().strip()
695_var = bytes.fromhex("{hex_var}").decode().strip() or "x"
696_x0 = {x0}
697_x1 = {x1}
698
699if '=' in _eq:
700 parts = _eq.split('=', 1)
701 _expr = "(%s) - (%s)" % (parts[0].strip(), parts[1].strip())
702else:
703 _expr = _eq
704_expr = _expr.replace('^', '**')
705
706_safe = dict(sin=sin,cos=cos,tan=tan,asin=asin,acos=acos,atan=atan,atan2=atan2,
707 sinh=sinh,cosh=cosh,tanh=tanh,sqrt=sqrt,log=log,log2=log2,log10=log10,
708 exp=exp,floor=floor,ceil=ceil,pi=pi,e=e,inf=inf,nan=nan,abs=abs)
709
710def _f(xv):
711 ns = dict(_safe); ns[_var] = xv
712 return eval(_expr, {{"__builtins__": {{}}}}, ns)
713
714def _bisect(lo, hi, tol=1e-12, iters=200):
715 try: flo = _f(lo); fhi = _f(hi)
716 except Exception as err: return None, str(err)
717 if flo * fhi > 0: return None, "no sign change"
718 for _ in range(iters):
719 mid = (lo+hi)/2
720 if (hi-lo) < tol: return mid, None
721 try: fm = _f(mid)
722 except: return None, "eval error"
723 if flo*fm <= 0: hi=mid; fhi=fm
724 else: lo=mid; flo=fm
725 return (lo+hi)/2, None
726
727def _newton(x, tol=1e-12, iters=100):
728 for _ in range(iters):
729 try: fx = _f(x)
730 except: break
731 if abs(fx) < tol: return x, None
732 h = max(abs(x)*1e-7, 1e-9)
733 try: fpx = (_f(x+h) - _f(x-h))/(2*h)
734 except: break
735 if abs(fpx) < 1e-30: break
736 x2 = x - fx/fpx
737 if abs(x2-x) < tol: return x2, None
738 x = x2
739 return None, "did not converge"
740
741print("Equation: %s = 0" % _expr)
742print("Variable: %s | Search: [%g, %g]" % (_var, _x0, _x1))
743print()
744
745candidates = []
746n_scan = 50; step = (_x1-_x0)/n_scan
747for i in range(n_scan):
748 lo = _x0+i*step; hi = lo+step
749 try:
750 if _f(lo)*_f(hi) <= 0:
751 root, _ = _bisect(lo, hi)
752 if root is not None:
753 nr, _ = _newton(root)
754 if nr is not None: root = nr
755 if not any(abs(root-c) < 1e-8 for c in candidates):
756 candidates.append(root)
757 except: pass
758
759if not candidates:
760 nr, _ = _newton((_x0+_x1)/2)
761 if nr is not None: candidates.append(nr)
762
763if not candidates:
764 print("No roots found in [%g, %g]." % (_x0, _x1))
765 print("Try --solve-range to widen the search interval.")
766 sys.exit(0)
767
768print("Root(s) found:")
769for r in sorted(candidates):
770 try: chk = abs(_f(r))
771 except: chk = float('nan')
772 flag = "" if chk < 1e-8 else " [residual: %.2e]" % chk
773 print(" %s = %.10g%s" % (_var, r, flag))
774"####,
775 hex_eq = hex_eq,
776 hex_var = hex_var,
777 x0 = x0,
778 x1 = x1,
779 );
780
781 let sandbox_args = serde_json::json!({
782 "language": "python",
783 "code": script,
784 "timeout_seconds": 20
785 });
786 crate::tools::code_sandbox::execute(&sandbox_args).await
787}
788
789pub async fn curve_fit(
792 file_path: &str,
793 x_col: &str,
794 y_col: &str,
795 model: &str,
796) -> Result<String, String> {
797 let hex_path: String = file_path.bytes().map(|b| format!("{:02x}", b)).collect();
798 let hex_xcol: String = x_col.bytes().map(|b| format!("{:02x}", b)).collect();
799 let hex_ycol: String = y_col.bytes().map(|b| format!("{:02x}", b)).collect();
800 let hex_model: String = model.bytes().map(|b| format!("{:02x}", b)).collect();
801
802 let script = format!(
803 r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys, math
804
805_path = bytes.fromhex("{hex_path}").decode().strip()
806_xcol = bytes.fromhex("{hex_xcol}").decode().strip()
807_ycol = bytes.fromhex("{hex_ycol}").decode().strip()
808_model = bytes.fromhex("{hex_model}").decode().strip().lower() or "auto"
809
810def _load(path):
811 ext = os.path.splitext(path)[1].lower().lstrip('.')
812 if ext in ('csv','tsv'):
813 with open(path, encoding='utf-8-sig', errors='replace', newline='') as fh:
814 r = _csv.DictReader(fh, delimiter='\t' if ext=='tsv' else ',')
815 return list(r)
816 elif ext == 'json':
817 with open(path, encoding='utf-8') as fh: d = _js.load(fh)
818 return d if isinstance(d, list) else next(iter(d.values()), [])
819 elif ext in ('db','sqlite','sqlite3'):
820 con = _sq.connect(path)
821 cur = con.cursor()
822 cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
823 t = cur.fetchone()
824 if not t: return []
825 cur.execute("SELECT * FROM [%s]" % t[0])
826 cols2 = [d[0] for d in cur.description]
827 rows2 = [dict(zip(cols2, r)) for r in cur.fetchall()]
828 con.close()
829 return rows2
830 print("Unsupported: "+ext, file=sys.stderr); sys.exit(1)
831
832def _tf(v):
833 try: return float(str(v).replace(',','').strip())
834 except: return None
835
836rows = _load(_path)
837if not rows:
838 print("No rows found."); sys.exit(0)
839
840all_cols = list(rows[0].keys())
841numeric = [c for c in all_cols if sum(1 for r in rows if _tf(r.get(c,'')) is not None) > len(rows)*0.5]
842
843if not _xcol: _xcol = numeric[0] if numeric else all_cols[0]
844if not _ycol: _ycol = numeric[1] if len(numeric)>1 else all_cols[1]
845
846pairs = [(_tf(r.get(_xcol,'')), _tf(r.get(_ycol,''))) for r in rows]
847pairs = [(x,y) for x,y in pairs if x is not None and y is not None]
848if len(pairs) < 3:
849 print("Need at least 3 numeric rows. Found: %d" % len(pairs)); sys.exit(0)
850
851xs = [p[0] for p in pairs]; ys = [p[1] for p in pairs]; n = len(xs)
852
853def _dot(a,b): return sum(x*y for x,y in zip(a,b))
854def _mean(v): return sum(v)/len(v)
855def _r2(pred):
856 ybar = _mean(ys)
857 ss_res = sum((y-p)**2 for y,p in zip(ys,pred))
858 ss_tot = sum((y-ybar)**2 for y in ys)
859 return 1.0 - ss_res/ss_tot if ss_tot else 0.0
860
861def _vfit(deg):
862 m = deg+1
863 X = [[xi**j for j in range(m)] for xi in xs]
864 Xt = [[X[i][j] for i in range(n)] for j in range(m)]
865 XtX = [[sum(Xt[r][k]*X[k][c] for k in range(n)) for c in range(m)] for r in range(m)]
866 Xty = [sum(Xt[r][k]*ys[k] for k in range(n)) for r in range(m)]
867 A = [row[:]+[Xty[i]] for i,row in enumerate(XtX)]
868 for col in range(m):
869 pv = max(range(col,m), key=lambda r: abs(A[r][col]))
870 A[col],A[pv] = A[pv],A[col]
871 if abs(A[col][col]) < 1e-30: raise ValueError("Singular")
872 f = A[col][col]; A[col] = [v/f for v in A[col]]
873 for r in range(m):
874 if r!=col:
875 mu = A[r][col]; A[r] = [A[r][k]-mu*A[col][k] for k in range(m+1)]
876 c = [A[i][m] for i in range(m)]
877 return c, _r2([sum(c[j]*xi**j for j in range(m)) for xi in xs])
878
879def _fp(c):
880 t = []
881 for j in range(len(c)-1,-1,-1):
882 v = c[j]
883 if abs(v)<1e-14: continue
884 if j==0: t.append("%.6g"%v)
885 elif j==1: t.append("%.6g*x"%v)
886 else: t.append("%.6g*x^%d"%(v,j))
887 return " + ".join(t) or "0"
888
889res = {{}}
890def _try(nm,fn):
891 try: eq,r2=fn(); res[nm]=(r2,eq)
892 except Exception as err: res[nm]=(None,str(err))
893
894def _lin():
895 c,r2=_vfit(1); return "y = %.6g + %.6g*x"%(c[0],c[1]),r2
896def _q2():
897 c,r2=_vfit(2); return "y = "+_fp(c),r2
898def _q3():
899 c,r2=_vfit(3); return "y = "+_fp(c),r2
900def _ef():
901 if any(y<=0 for y in ys): raise ValueError("y must be >0")
902 lny=[math.log(y) for y in ys]; xm=_mean(xs); lym=_mean(lny)
903 b=(_dot(xs,lny)-n*xm*lym)/(_dot(xs,xs)-n*xm**2); a=math.exp(lym-b*xm)
904 return "y = %.6g*e^(%.6g*x)"%(a,b),_r2([a*math.exp(b*xi) for xi in xs])
905def _pf():
906 if any(x<=0 for x in xs) or any(y<=0 for y in ys): raise ValueError("x,y must be >0")
907 lx=[math.log(x) for x in xs]; ly=[math.log(y) for y in ys]
908 lxm=_mean(lx); lym=_mean(ly)
909 b=(_dot(lx,ly)-n*lxm*lym)/(_dot(lx,lx)-n*lxm**2); a=math.exp(lym-b*lxm)
910 return "y = %.6g*x^%.6g"%(a,b),_r2([a*(xi**b) for xi in xs])
911def _lf():
912 if any(x<=0 for x in xs): raise ValueError("x must be >0")
913 lx=[math.log(x) for x in xs]; lxm=_mean(lx); ym=_mean(ys)
914 b=(_dot(lx,ys)-n*lxm*ym)/(_dot(lx,lx)-n*lxm**2); a=ym-b*lxm
915 return "y = %.6g + %.6g*ln(x)"%(a,b),_r2([a+b*lxi for lxi in lx])
916
917mm = dict(linear=_lin,lin=_lin,poly2=_q2,quadratic=_q2,quad=_q2,poly3=_q3,cubic=_q3,
918 exp=_ef,exponential=_ef,power=_pf,pow=_pf,log=_lf,logarithmic=_lf)
919
920if _model in ('auto','all'):
921 for nm,fn in [('linear',_lin),('poly2',_q2),('poly3',_q3),('exp',_ef),('power',_pf),('log',_lf)]:
922 _try(nm,fn)
923elif _model in mm:
924 _try(_model, mm[_model])
925else:
926 print("Unknown model '%s'. Available: linear poly2 poly3 exp power log auto" % _model); sys.exit(1)
927
928W=60
929print("="*W)
930print(" Curve Fit: %s -> %s (n=%d)" % (_xcol,_ycol,n))
931print("-"*W)
932valid=[(nm,(r2,eq)) for nm,(r2,eq) in res.items() if r2 is not None]
933invalid=[(nm,(r2,eq)) for nm,(r2,eq) in res.items() if r2 is None]
934valid.sort(key=lambda t:-t[1][0])
935best=valid[0][0] if valid else None
936for nm,(r2,eq) in valid:
937 star=" *" if nm==best else ""
938 print(" %-12s R2=%.4f%s"%(nm,r2,star))
939 print(" %s"%eq)
940if invalid:
941 print(); print(" Skipped (domain):")
942 for nm,(_,err) in invalid:
943 print(" %-12s %s"%(nm+":",err))
944print("="*W)
945"####,
946 hex_path = hex_path,
947 hex_xcol = hex_xcol,
948 hex_ycol = hex_ycol,
949 hex_model = hex_model,
950 );
951
952 let sandbox_args = serde_json::json!({
953 "language": "python",
954 "code": script,
955 "timeout_seconds": 30
956 });
957 crate::tools::code_sandbox::execute(&sandbox_args).await
958}
959
960pub async fn integrate(
963 expr: &str,
964 var: &str,
965 lo: f64,
966 hi: f64,
967 n: usize,
968) -> Result<String, String> {
969 let hex_expr: String = expr.bytes().map(|b| format!("{:02x}", b)).collect();
970 let hex_var: String = var.bytes().map(|b| format!("{:02x}", b)).collect();
971
972 let script = format!(
973 r####"import math, sys
974from math import (sin,cos,tan,asin,acos,atan,atan2,sinh,cosh,tanh,
975 sqrt,log,log2,log10,exp,floor,ceil,pi,e,inf,nan)
976
977_expr = bytes.fromhex("{hex_expr}").decode().strip().replace('^','**')
978_var = bytes.fromhex("{hex_var}").decode().strip() or "x"
979_lo = {lo}
980_hi = {hi}
981_n = {n}
982if _n < 2: _n = 1000
983
984_safe = dict(sin=sin,cos=cos,tan=tan,asin=asin,acos=acos,atan=atan,atan2=atan2,
985 sinh=sinh,cosh=cosh,tanh=tanh,sqrt=sqrt,log=log,log2=log2,log10=log10,
986 exp=exp,floor=floor,ceil=ceil,pi=pi,e=e,inf=inf,nan=nan,abs=abs)
987
988def _f(v):
989 ns = dict(_safe); ns[_var] = v
990 return eval(_expr, {{"__builtins__":{{}}}}, ns)
991
992# Adaptive Simpson's rule (recursive)
993def _simp(a, b, fa, fm, fb, tol, depth):
994 m1 = (a+b)/4; m2 = 3*(a+b)/4
995 fm1 = _f(m1); fm2 = _f(m2)
996 s1 = (b-a)/12*(fa + 4*fm1 + 2*fm + 4*fm2 + fb)
997 s0 = (b-a)/6*(fa + 4*fm + fb)
998 err = abs(s1 - s0)/15
999 if depth >= 12 or err < tol:
1000 return s1 + (s1-s0)/15
1001 else:
1002 mid = (a+b)/2
1003 return (_simp(a,mid,fa,fm1,fm,tol/2,depth+1) +
1004 _simp(mid,b,fm,fm2,fb,tol/2,depth+1))
1005
1006# Also compute via Simpson's 1/3 rule with _n intervals for comparison
1007h = (_hi - _lo) / _n
1008vals = [_f(_lo + i*h) for i in range(_n+1)]
1009simp_basic = h/3 * sum(
1010 (vals[i] + 4*vals[i+1] + vals[i+2]) if (i+2 <= _n) else 0
1011 for i in range(0, _n, 2)
1012)
1013
1014try:
1015 fa = _f(_lo); fm = _f((_lo+_hi)/2); fb = _f(_hi)
1016 result = _simp(_lo, _hi, fa, fm, fb, 1e-10, 0)
1017 method = "Adaptive Simpson"
1018except RecursionError:
1019 result = simp_basic
1020 method = "Simpson 1/3 (%d intervals)" % _n
1021
1022print("Integral of %s d%s" % (_expr, _var))
1023print("From %g to %g" % (_lo, _hi))
1024print()
1025print("Result: %.10g (via %s)" % (result, method))
1026print()
1027# Relative error estimate vs basic Simpson
1028if method.startswith("Adaptive"):
1029 err_est = abs(result - simp_basic)
1030 print("Est. error: %.2e (vs basic Simpson/%d)" % (err_est, _n))
1031"####,
1032 hex_expr = hex_expr,
1033 hex_var = hex_var,
1034 lo = lo,
1035 hi = hi,
1036 n = n,
1037 );
1038
1039 let sandbox_args = serde_json::json!({
1040 "language": "python",
1041 "code": script,
1042 "timeout_seconds": 20
1043 });
1044 crate::tools::code_sandbox::execute(&sandbox_args).await
1045}
1046
1047pub async fn differentiate(expr: &str, var: &str, at: f64, order: u8) -> Result<String, String> {
1050 let hex_expr: String = expr.bytes().map(|b| format!("{:02x}", b)).collect();
1051 let hex_var: String = var.bytes().map(|b| format!("{:02x}", b)).collect();
1052
1053 let script = format!(
1054 r####"import math, sys
1055from math import (sin,cos,tan,asin,acos,atan,atan2,sinh,cosh,tanh,
1056 sqrt,log,log2,log10,exp,floor,ceil,pi,e,inf,nan)
1057
1058_expr = bytes.fromhex("{hex_expr}").decode().strip().replace('^','**')
1059_var = bytes.fromhex("{hex_var}").decode().strip() or "x"
1060_at = {at}
1061_order = {order}
1062
1063_safe = dict(sin=sin,cos=cos,tan=tan,asin=asin,acos=acos,atan=atan,atan2=atan2,
1064 sinh=sinh,cosh=cosh,tanh=tanh,sqrt=sqrt,log=log,log2=log2,log10=log10,
1065 exp=exp,floor=floor,ceil=ceil,pi=pi,e=e,inf=inf,nan=nan,abs=abs)
1066
1067def _f(v):
1068 ns = dict(_safe); ns[_var] = v
1069 return eval(_expr, {{"__builtins__":{{}}}}, ns)
1070
1071def _deriv(f, x, h=None, order=1):
1072 if h is None: h = max(abs(x)*1e-5, 1e-7)
1073 if order == 1:
1074 # 5-point stencil: (-f(x+2h)+8f(x+h)-8f(x-h)+f(x-2h)) / 12h
1075 return (-f(x+2*h) + 8*f(x+h) - 8*f(x-h) + f(x-2*h)) / (12*h)
1076 elif order == 2:
1077 return (f(x+h) - 2*f(x) + f(x-h)) / (h*h)
1078 elif order == 3:
1079 return (-f(x+2*h) + 2*f(x+h) - 2*f(x-h) + f(x-2*h)) / (2*h**3)
1080 elif order == 4:
1081 return (f(x+2*h) - 4*f(x+h) + 6*f(x) - 4*f(x-h) + f(x-2*h)) / (h**4)
1082 else:
1083 # Higher orders: repeated difference quotient
1084 d = [f(x+i*h) for i in range(-order, order+1)]
1085 return sum((-1)**(order-i)*math.comb(order,i)*d[i] for i in range(order+1)) / (h**order)
1086
1087ordinal = ["","1st","2nd","3rd","4th","5th","6th","7th","8th"]
1088label = ordinal[_order] if _order < len(ordinal) else "%dth"%_order
1089
1090print("f(%s) = %s" % (_var, _expr))
1091print("%s derivative at %s = %g" % (label, _var, _at))
1092print()
1093
1094try:
1095 fval = _f(_at)
1096 dval = _deriv(_f, _at, order=_order)
1097 print("f(%g) = %.10g" % (_at, fval))
1098 print("f'(%g) [%s] = %.10g" % (_at, label, dval))
1099 if _order == 1:
1100 slope = dval
1101 print()
1102 print("Tangent line at %s=%g: y = %.6g + %.6g*(%s - %g)" % (_var,_at,fval,slope,_var,_at))
1103except Exception as ex:
1104 print("Error:", ex, file=sys.stderr); sys.exit(1)
1105"####,
1106 hex_expr = hex_expr,
1107 hex_var = hex_var,
1108 at = at,
1109 order = order,
1110 );
1111
1112 let sandbox_args = serde_json::json!({
1113 "language": "python",
1114 "code": script,
1115 "timeout_seconds": 15
1116 });
1117 crate::tools::code_sandbox::execute(&sandbox_args).await
1118}
1119
1120pub async fn data_profile(file_path: &str) -> Result<String, String> {
1125 let hex_path: String = file_path.bytes().map(|b| format!("{:02x}", b)).collect();
1126
1127 let script = format!(
1128 r####"import csv as _csv, json as _js, sqlite3 as _sq, os, sys, math
1129
1130_path = bytes.fromhex("{hex_path}").decode().strip()
1131
1132def _load(path):
1133 ext = os.path.splitext(path)[1].lower().lstrip('.')
1134 if ext in ('csv','tsv'):
1135 with open(path, encoding='utf-8-sig', errors='replace', newline='') as fh:
1136 r = _csv.DictReader(fh, delimiter='\t' if ext=='tsv' else ',')
1137 return list(r), None
1138 elif ext == 'json':
1139 with open(path, encoding='utf-8') as fh: d = _js.load(fh)
1140 rows = d if isinstance(d, list) else next(iter(d.values()), [])
1141 return rows, None
1142 elif ext in ('db','sqlite','sqlite3'):
1143 con = _sq.connect(path)
1144 cur = con.cursor()
1145 cur.execute("SELECT name FROM sqlite_master WHERE type='table' LIMIT 1")
1146 t = cur.fetchone()
1147 if not t: return [], None
1148 cur.execute("SELECT * FROM [%s]" % t[0])
1149 cols2 = [d[0] for d in cur.description]
1150 rows2 = [dict(zip(cols2, r)) for r in cur.fetchall()]
1151 con.close()
1152 return rows2, None
1153 return None, "Unsupported format: "+ext
1154
1155rows, err = _load(_path)
1156if err: print("Error:", err, file=sys.stderr); sys.exit(1)
1157if not rows: print("No rows found."); sys.exit(0)
1158
1159n = len(rows)
1160cols = list(rows[0].keys())
1161W = 64
1162
1163print("=" * W)
1164print(" Data Profile: %s" % os.path.basename(_path))
1165print(" Rows: %d | Columns: %d" % (n, len(cols)))
1166print("=" * W)
1167
1168def _tf(v):
1169 try: return float(str(v).replace(',','').strip())
1170 except: return None
1171
1172dup_check = set()
1173dups = 0
1174for r in rows:
1175 k = tuple(str(r.get(c,'')) for c in cols)
1176 if k in dup_check: dups += 1
1177 else: dup_check.add(k)
1178
1179if dups:
1180 print(" Duplicate rows: %d (%.1f%%)" % (dups, 100*dups/n))
1181else:
1182 print(" No duplicate rows.")
1183print()
1184
1185for col in cols:
1186 vals = [r.get(col,'') for r in rows]
1187 missing = sum(1 for v in vals if str(v).strip() in ('','None','null','NULL','NA','N/A','NaN'))
1188 nums = [_tf(v) for v in vals if _tf(v) is not None]
1189 pct_miss = 100*missing/n if n else 0
1190
1191 print("-" * W)
1192 print(" %s" % col)
1193 if pct_miss > 0:
1194 flag = " [!]" if pct_miss > 20 else ""
1195 print(" Missing: %d / %d (%.1f%%)%s" % (missing, n, pct_miss, flag))
1196
1197 if len(nums) > n * 0.5:
1198 # Numeric column
1199 nums_s = sorted(nums)
1200 mn = _mean = sum(nums_s)/len(nums_s)
1201 med_idx = len(nums_s)//2
1202 med = nums_s[med_idx] if len(nums_s)%2 else (nums_s[med_idx-1]+nums_s[med_idx])/2
1203 std = math.sqrt(sum((x-mn)**2 for x in nums_s)/len(nums_s)) if len(nums_s)>1 else 0
1204 q1_i = len(nums_s)//4; q3_i = 3*len(nums_s)//4
1205 q1 = nums_s[q1_i]; q3 = nums_s[q3_i]; iqr = q3 - q1
1206 lo_fence = q1 - 1.5*iqr; hi_fence = q3 + 1.5*iqr
1207 outliers = sum(1 for x in nums_s if x < lo_fence or x > hi_fence)
1208 print(" Type: numeric (%d values)" % len(nums_s))
1209 print(" Range: %g to %g" % (nums_s[0], nums_s[-1]))
1210 print(" Mean: %g | Median: %g | Std: %g" % (mn, med, std))
1211 if outliers:
1212 print(" Outliers: %d (IQR fence: [%g, %g])" % (outliers, lo_fence, hi_fence))
1213 else:
1214 # Categorical column
1215 from collections import Counter
1216 vc = Counter(str(v).strip() for v in vals if str(v).strip() not in ('','None','null','NULL'))
1217 top = vc.most_common(5)
1218 unique = len(vc)
1219 print(" Type: categorical (%d unique values)" % unique)
1220 if unique <= 20:
1221 print(" Values: " + " ".join("%s(%d)"%(k,c) for k,c in top))
1222 else:
1223 print(" Top 5: " + " ".join("%s(%d)"%(k,c) for k,c in top))
1224
1225print("=" * W)
1226"####,
1227 hex_path = hex_path,
1228 );
1229
1230 let sandbox_args = serde_json::json!({
1231 "language": "python",
1232 "code": script,
1233 "timeout_seconds": 30
1234 });
1235 crate::tools::code_sandbox::execute(&sandbox_args).await
1236}