1use std::sync::Arc;
16use std::collections::HashMap;
17
18use super::vm::{Program, Opcode, BuiltinMethod};
19use super::ast::KindType;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum VType {
25 Bottom,
27 Null,
28 Bool,
29 Int,
30 Float,
31 Num,
33 Str,
34 Arr,
35 Obj,
36 Unknown,
38}
39
40impl VType {
41 pub fn join(self, other: VType) -> VType {
43 if self == other { return self; }
44 match (self, other) {
45 (VType::Bottom, x) | (x, VType::Bottom) => x,
46 (VType::Int, VType::Float) | (VType::Float, VType::Int)
47 | (VType::Int, VType::Num) | (VType::Num, VType::Int)
48 | (VType::Float, VType::Num) | (VType::Num, VType::Float)
49 => VType::Num,
50 _ => VType::Unknown,
51 }
52 }
53
54 pub fn is_array_like(self) -> bool { matches!(self, VType::Arr) }
55 pub fn is_object_like(self) -> bool { matches!(self, VType::Obj) }
56 pub fn is_numeric(self) -> bool { matches!(self, VType::Int | VType::Float | VType::Num) }
57 pub fn is_string(self) -> bool { matches!(self, VType::Str) }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63pub enum Nullness {
64 AlwaysNull,
66 NonNull,
68 MaybeNull,
70}
71
72impl Nullness {
73 pub fn join(self, other: Nullness) -> Nullness {
74 if self == other { return self; }
75 Nullness::MaybeNull
76 }
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
82pub enum Cardinality {
83 Zero,
85 One,
87 ZeroOrOne,
89 Many,
91 NotArray,
93 Unknown,
95}
96
97impl Cardinality {
98 pub fn join(self, other: Cardinality) -> Cardinality {
99 if self == other { return self; }
100 match (self, other) {
101 (Cardinality::Zero, Cardinality::One)
102 | (Cardinality::One, Cardinality::Zero) => Cardinality::ZeroOrOne,
103 _ => Cardinality::Unknown,
104 }
105 }
106}
107
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111pub struct AbstractVal {
112 pub ty: VType,
113 pub null: Nullness,
114 pub card: Cardinality,
115}
116
117impl AbstractVal {
118 pub const UNKNOWN: Self = Self {
119 ty: VType::Unknown, null: Nullness::MaybeNull, card: Cardinality::Unknown,
120 };
121 pub const NULL: Self = Self {
122 ty: VType::Null, null: Nullness::AlwaysNull, card: Cardinality::NotArray,
123 };
124 pub fn scalar(ty: VType) -> Self {
125 Self { ty, null: Nullness::NonNull, card: Cardinality::NotArray }
126 }
127 pub fn array() -> Self {
128 Self { ty: VType::Arr, null: Nullness::NonNull, card: Cardinality::Many }
129 }
130 pub fn object() -> Self {
131 Self { ty: VType::Obj, null: Nullness::NonNull, card: Cardinality::NotArray }
132 }
133 pub fn join(self, other: AbstractVal) -> AbstractVal {
134 AbstractVal {
135 ty: self.ty.join(other.ty),
136 null: self.null.join(other.null),
137 card: self.card.join(other.card),
138 }
139 }
140}
141
142pub fn infer_result_type(program: &Program) -> AbstractVal {
147 let mut stack: Vec<AbstractVal> = Vec::with_capacity(16);
148 let mut env: HashMap<Arc<str>, AbstractVal> = HashMap::new();
149 for op in program.ops.iter() { apply_op_env(op, &mut stack, &mut env); }
150 stack.pop().unwrap_or(AbstractVal::UNKNOWN)
151}
152
153pub fn infer_result_type_with_env(program: &Program)
156 -> (AbstractVal, HashMap<Arc<str>, AbstractVal>)
157{
158 let mut stack: Vec<AbstractVal> = Vec::with_capacity(16);
159 let mut env: HashMap<Arc<str>, AbstractVal> = HashMap::new();
160 for op in program.ops.iter() { apply_op_env(op, &mut stack, &mut env); }
161 (stack.pop().unwrap_or(AbstractVal::UNKNOWN), env)
162}
163
164fn apply_op_env(op: &Opcode, stack: &mut Vec<AbstractVal>,
165 env: &mut HashMap<Arc<str>, AbstractVal>) {
166 match op {
168 Opcode::LoadIdent(name) => {
169 let av = env.get(name).copied().unwrap_or(AbstractVal::UNKNOWN);
170 stack.push(av);
171 }
172 Opcode::BindVar(name) => {
173 if let Some(top) = stack.last().copied() { env.insert(name.clone(), top); }
175 }
176 Opcode::StoreVar(name) => {
177 let v = stack.pop().unwrap_or(AbstractVal::UNKNOWN);
178 env.insert(name.clone(), v);
179 }
180 Opcode::LetExpr { name, body } => {
181 let init = stack.pop().unwrap_or(AbstractVal::UNKNOWN);
182 let saved = env.get(name).copied();
183 env.insert(name.clone(), init);
184 let mut sub_stack: Vec<AbstractVal> = Vec::with_capacity(8);
185 for op2 in body.ops.iter() { apply_op_env(op2, &mut sub_stack, env); }
186 let res = sub_stack.pop().unwrap_or(AbstractVal::UNKNOWN);
187 match saved {
189 Some(v) => { env.insert(name.clone(), v); }
190 None => { env.remove(name); }
191 }
192 stack.push(res);
193 }
194 _ => apply_op(op, stack),
195 }
196}
197
198fn apply_op(op: &Opcode, stack: &mut Vec<AbstractVal>) {
199 macro_rules! pop2 { () => {{ let r = stack.pop().unwrap_or(AbstractVal::UNKNOWN); let l = stack.pop().unwrap_or(AbstractVal::UNKNOWN); (l, r) }} }
200 macro_rules! pop1 { () => { stack.pop().unwrap_or(AbstractVal::UNKNOWN) } }
201 match op {
202 Opcode::PushNull => stack.push(AbstractVal::NULL),
203 Opcode::PushBool(_) => stack.push(AbstractVal::scalar(VType::Bool)),
204 Opcode::PushInt(_) => stack.push(AbstractVal::scalar(VType::Int)),
205 Opcode::PushFloat(_) => stack.push(AbstractVal::scalar(VType::Float)),
206 Opcode::PushStr(_) => stack.push(AbstractVal::scalar(VType::Str)),
207 Opcode::PushRoot | Opcode::PushCurrent | Opcode::LoadIdent(_)
208 => stack.push(AbstractVal::UNKNOWN),
209 Opcode::GetField(_) | Opcode::OptField(_) => {
210 pop1!();
211 stack.push(AbstractVal::UNKNOWN);
212 }
213 Opcode::GetIndex(_) | Opcode::DynIndex(_) => {
214 pop1!();
215 stack.push(AbstractVal::UNKNOWN);
216 }
217 Opcode::GetSlice(_, _) => {
218 pop1!();
219 stack.push(AbstractVal::array());
220 }
221 Opcode::Descendant(_) | Opcode::DescendAll => {
222 pop1!();
223 stack.push(AbstractVal::array());
224 }
225 Opcode::InlineFilter(_) => {
226 pop1!();
227 stack.push(AbstractVal::array());
228 }
229 Opcode::Quantifier(kind) => {
230 pop1!();
231 use super::ast::QuantifierKind;
232 let card = match kind {
233 QuantifierKind::First => Cardinality::ZeroOrOne,
234 QuantifierKind::One => Cardinality::One,
235 };
236 stack.push(AbstractVal { ty: VType::Unknown, null: Nullness::MaybeNull, card });
237 }
238 Opcode::RootChain(_) => stack.push(AbstractVal::UNKNOWN),
239 Opcode::FilterCount(_) => {
240 pop1!();
241 stack.push(AbstractVal::scalar(VType::Int));
242 }
243 Opcode::FindFirst(_) | Opcode::FindOne(_) => {
244 pop1!();
245 stack.push(AbstractVal::UNKNOWN);
246 }
247 Opcode::FilterMap { .. } | Opcode::FilterFilter { .. } | Opcode::MapMap { .. } => {
248 pop1!();
249 stack.push(AbstractVal::array());
250 }
251 Opcode::MapSum(_) => {
252 pop1!();
253 stack.push(AbstractVal::scalar(VType::Num));
254 }
255 Opcode::MapAvg(_) => {
256 pop1!();
257 stack.push(AbstractVal::scalar(VType::Float));
258 }
259 Opcode::TopN { .. } | Opcode::MapFlatten(_) | Opcode::FilterTakeWhile { .. }
260 | Opcode::FilterDropWhile { .. } | Opcode::MapUnique(_)
261 | Opcode::EquiJoin { .. } => {
262 pop1!();
263 stack.push(AbstractVal::array());
264 }
265 Opcode::Add | Opcode::Sub | Opcode::Mul | Opcode::Mod => {
266 let (l, r) = pop2!();
267 stack.push(AbstractVal::scalar(l.ty.join(r.ty)));
268 }
269 Opcode::Div => {
270 pop2!();
271 stack.push(AbstractVal::scalar(VType::Float));
272 }
273 Opcode::Eq | Opcode::Neq | Opcode::Lt | Opcode::Lte
274 | Opcode::Gt | Opcode::Gte | Opcode::Fuzzy => {
275 pop2!();
276 stack.push(AbstractVal::scalar(VType::Bool));
277 }
278 Opcode::Not => { pop1!(); stack.push(AbstractVal::scalar(VType::Bool)); }
279 Opcode::Neg => { let v = pop1!(); stack.push(AbstractVal::scalar(v.ty)); }
280 Opcode::AndOp(_) | Opcode::OrOp(_) => {
281 pop1!();
282 stack.push(AbstractVal::scalar(VType::Bool));
283 }
284 Opcode::CoalesceOp(_) => { pop1!(); stack.push(AbstractVal::UNKNOWN); }
285 Opcode::CallMethod(call) | Opcode::CallOptMethod(call) => {
286 pop1!();
287 stack.push(method_result_type(call.method));
288 }
289 Opcode::MakeObj(_) => stack.push(AbstractVal::object()),
290 Opcode::MakeArr(_) => stack.push(AbstractVal::array()),
291 Opcode::FString(_) => stack.push(AbstractVal::scalar(VType::Str)),
292 Opcode::KindCheck { .. } => { pop1!(); stack.push(AbstractVal::scalar(VType::Bool)); }
293 Opcode::SetCurrent => {} Opcode::BindVar(_) => {} Opcode::StoreVar(_) => { pop1!(); }
296 Opcode::BindObjDestructure(_) | Opcode::BindArrDestructure(_) => {} Opcode::LetExpr { .. } => {
298 pop1!();
299 stack.push(AbstractVal::UNKNOWN);
300 }
301 Opcode::ListComp(_) | Opcode::SetComp(_) => stack.push(AbstractVal::array()),
302 Opcode::DictComp(_) => stack.push(AbstractVal::object()),
303 Opcode::GetPointer(_) => stack.push(AbstractVal::UNKNOWN),
304 Opcode::PatchEval(_) => stack.push(AbstractVal::UNKNOWN),
305 Opcode::CastOp(ty) => {
306 pop1!();
307 use super::ast::CastType;
308 let av = match ty {
309 CastType::Int => AbstractVal::scalar(VType::Int),
310 CastType::Float => AbstractVal::scalar(VType::Float),
311 CastType::Number => AbstractVal::scalar(VType::Num),
312 CastType::Str => AbstractVal::scalar(VType::Str),
313 CastType::Bool => AbstractVal::scalar(VType::Bool),
314 CastType::Array => AbstractVal::array(),
315 CastType::Object => AbstractVal::object(),
316 CastType::Null => AbstractVal::NULL,
317 };
318 stack.push(av);
319 }
320 }
321}
322
323pub fn method_result_type(m: BuiltinMethod) -> AbstractVal {
325 use BuiltinMethod::*;
326 match m {
327 Len | Count | Sum | IndexOf | LastIndexOf => AbstractVal::scalar(VType::Int),
329 Any | All | Has | Missing | Includes | StartsWith | EndsWith => AbstractVal::scalar(VType::Bool),
331 Upper | Lower | Capitalize | TitleCase | Trim | TrimLeft | TrimRight
333 | ToString | ToJson | ToBase64 | FromBase64 | UrlEncode | UrlDecode
334 | HtmlEscape | HtmlUnescape | Repeat | PadLeft | PadRight
335 | Replace | ReplaceAll | StripPrefix | StripSuffix | Indent | Dedent
336 | Join | ToCsv | ToTsv | Type
337 => AbstractVal::scalar(VType::Str),
338 Avg => AbstractVal::scalar(VType::Float),
340 Min | Max | ToNumber => AbstractVal::scalar(VType::Num),
342 ToBool => AbstractVal::scalar(VType::Bool),
344 Keys | Values | Entries | ToPairs | Reverse | Unique | Flatten | Compact
346 | Chars | Lines | Words | Split | Sort | Filter | Map | FlatMap
347 | Enumerate | Pairwise | Window | Chunk | TakeWhile | DropWhile
348 | Accumulate | Zip | ZipLongest | Diff | Intersect | Union
349 | Append | Prepend | Remove | Matches | Scan | Slice
350 => AbstractVal::array(),
351 FromPairs | Invert | Pick | Omit | Merge | DeepMerge | Defaults | Rename
353 | TransformKeys | TransformValues | FilterKeys | FilterValues | Pivot
354 | GroupBy | CountBy | IndexBy | Partition | FlattenKeys | UnflattenKeys
355 | SetPath | DelPath | DelPaths | Set | Update
356 => AbstractVal::object(),
357 First | Last | Nth | GetPath => AbstractVal::UNKNOWN,
359 HasPath => AbstractVal::scalar(VType::Bool),
360 FromJson | Or => AbstractVal::UNKNOWN,
361 EquiJoin => AbstractVal::array(),
362 Unknown => AbstractVal::UNKNOWN,
363 }
364}
365
366pub fn count_ident_uses(program: &Program, name: &str) -> usize {
370 let mut n = 0;
371 count_ident_uses_in_ops(&program.ops, name, &mut n);
372 n
373}
374
375fn count_ident_uses_in_ops(ops: &[Opcode], name: &str, acc: &mut usize) {
376 for op in ops {
377 match op {
378 Opcode::LoadIdent(s) if s.as_ref() == name => *acc += 1,
379 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
380 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
381 | Opcode::FindFirst(p) | Opcode::FindOne(p)
382 | Opcode::DynIndex(p)
383 | Opcode::MapSum(p) | Opcode::MapAvg(p)
384 | Opcode::MapFlatten(p) => count_ident_uses_in_ops(&p.ops, name, acc),
385 Opcode::FilterTakeWhile { pred, stop } => {
386 count_ident_uses_in_ops(&pred.ops, name, acc);
387 count_ident_uses_in_ops(&stop.ops, name, acc);
388 }
389 Opcode::FilterMap { pred, map } => {
390 count_ident_uses_in_ops(&pred.ops, name, acc);
391 count_ident_uses_in_ops(&map.ops, name, acc);
392 }
393 Opcode::FilterFilter { p1, p2 } => {
394 count_ident_uses_in_ops(&p1.ops, name, acc);
395 count_ident_uses_in_ops(&p2.ops, name, acc);
396 }
397 Opcode::MapMap { f1, f2 } => {
398 count_ident_uses_in_ops(&f1.ops, name, acc);
399 count_ident_uses_in_ops(&f2.ops, name, acc);
400 }
401 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
402 for p in c.sub_progs.iter() { count_ident_uses_in_ops(&p.ops, name, acc); }
403 }
404 Opcode::LetExpr { body, .. } => count_ident_uses_in_ops(&body.ops, name, acc),
405 Opcode::ListComp(spec) | Opcode::SetComp(spec) => {
406 count_ident_uses_in_ops(&spec.expr.ops, name, acc);
407 count_ident_uses_in_ops(&spec.iter.ops, name, acc);
408 if let Some(c) = &spec.cond { count_ident_uses_in_ops(&c.ops, name, acc); }
409 }
410 Opcode::DictComp(spec) => {
411 count_ident_uses_in_ops(&spec.key.ops, name, acc);
412 count_ident_uses_in_ops(&spec.val.ops, name, acc);
413 count_ident_uses_in_ops(&spec.iter.ops, name, acc);
414 if let Some(c) = &spec.cond { count_ident_uses_in_ops(&c.ops, name, acc); }
415 }
416 Opcode::MakeObj(entries) => {
417 use super::vm::CompiledObjEntry;
418 for e in entries.iter() {
419 match e {
420 CompiledObjEntry::Short(_) => {}
421 CompiledObjEntry::Kv { prog, cond, .. } => {
422 count_ident_uses_in_ops(&prog.ops, name, acc);
423 if let Some(c) = cond { count_ident_uses_in_ops(&c.ops, name, acc); }
424 }
425 CompiledObjEntry::Dynamic { key, val } => {
426 count_ident_uses_in_ops(&key.ops, name, acc);
427 count_ident_uses_in_ops(&val.ops, name, acc);
428 }
429 CompiledObjEntry::Spread(p) => count_ident_uses_in_ops(&p.ops, name, acc),
430 CompiledObjEntry::SpreadDeep(p) => count_ident_uses_in_ops(&p.ops, name, acc),
431 }
432 }
433 }
434 Opcode::MakeArr(progs) => {
435 for p in progs.iter() { count_ident_uses_in_ops(&p.ops, name, acc); }
436 }
437 Opcode::FString(parts) => {
438 use super::vm::CompiledFSPart;
439 for p in parts.iter() {
440 if let CompiledFSPart::Interp { prog, .. } = p {
441 count_ident_uses_in_ops(&prog.ops, name, acc);
442 }
443 }
444 }
445 _ => {}
446 }
447 }
448}
449
450pub fn collect_accessed_fields(program: &Program) -> Vec<Arc<str>> {
456 let mut set = Vec::new();
457 collect_fields_in_ops(&program.ops, &mut set);
458 set
459}
460
461fn collect_fields_in_ops(ops: &[Opcode], acc: &mut Vec<Arc<str>>) {
462 for op in ops {
463 match op {
464 Opcode::GetField(k) | Opcode::OptField(k) | Opcode::Descendant(k) => {
465 if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
466 }
467 Opcode::RootChain(chain) => {
468 for k in chain.iter() {
469 if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
470 }
471 }
472 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
473 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
474 | Opcode::FindFirst(p) | Opcode::FindOne(p)
475 | Opcode::DynIndex(p)
476 | Opcode::MapSum(p) | Opcode::MapAvg(p)
477 | Opcode::MapFlatten(p) => collect_fields_in_ops(&p.ops, acc),
478 Opcode::FilterTakeWhile { pred, stop } => {
479 collect_fields_in_ops(&pred.ops, acc);
480 collect_fields_in_ops(&stop.ops, acc);
481 }
482 Opcode::FilterMap { pred, map } => {
483 collect_fields_in_ops(&pred.ops, acc);
484 collect_fields_in_ops(&map.ops, acc);
485 }
486 Opcode::FilterFilter { p1, p2 } => {
487 collect_fields_in_ops(&p1.ops, acc);
488 collect_fields_in_ops(&p2.ops, acc);
489 }
490 Opcode::MapMap { f1, f2 } => {
491 collect_fields_in_ops(&f1.ops, acc);
492 collect_fields_in_ops(&f2.ops, acc);
493 }
494 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
495 for p in c.sub_progs.iter() { collect_fields_in_ops(&p.ops, acc); }
496 }
497 Opcode::LetExpr { body, .. } => collect_fields_in_ops(&body.ops, acc),
498 _ => {}
499 }
500 }
501}
502
503pub fn program_signature(program: &Program) -> u64 {
509 use std::collections::hash_map::DefaultHasher;
510 use std::hash::Hasher;
511 let mut h = DefaultHasher::new();
512 hash_ops(&program.ops, &mut h);
513 h.finish()
514}
515
516fn hash_ops(ops: &[Opcode], h: &mut impl std::hash::Hasher) {
517 use std::hash::Hash;
518 for op in ops {
519 std::mem::discriminant(op).hash(h);
521 match op {
522 Opcode::PushInt(n) => n.hash(h),
523 Opcode::PushStr(s) => s.as_bytes().hash(h),
524 Opcode::PushBool(b) => b.hash(h),
525 Opcode::GetField(k) | Opcode::OptField(k) | Opcode::Descendant(k)
526 | Opcode::LoadIdent(k) | Opcode::GetPointer(k)
527 => k.as_bytes().hash(h),
528 Opcode::GetIndex(i) => i.hash(h),
529 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
530 (c.method as u8).hash(h);
531 for p in c.sub_progs.iter() { hash_ops(&p.ops, h); }
532 }
533 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
534 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
535 | Opcode::FindFirst(p) | Opcode::FindOne(p)
536 | Opcode::DynIndex(p)
537 | Opcode::MapSum(p) | Opcode::MapAvg(p)
538 | Opcode::MapFlatten(p) => hash_ops(&p.ops, h),
539 Opcode::FilterTakeWhile { pred, stop } => {
540 hash_ops(&pred.ops, h);
541 hash_ops(&stop.ops, h);
542 }
543 Opcode::RootChain(chain) => {
544 for k in chain.iter() { k.as_bytes().hash(h); }
545 }
546 _ => {}
547 }
548 }
549}
550
551pub fn find_common_subexprs(program: &Program) -> HashMap<u64, usize> {
557 let mut map: HashMap<u64, usize> = HashMap::new();
558 walk_subprograms(&program.ops, &mut map);
559 map.retain(|_, &mut n| n >= 2);
560 map
561}
562
563fn walk_subprograms(ops: &[Opcode], map: &mut HashMap<u64, usize>) {
564 for op in ops {
565 let sub_progs: Vec<&Arc<Program>> = match op {
566 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
567 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
568 | Opcode::FindFirst(p) | Opcode::FindOne(p)
569 | Opcode::DynIndex(p)
570 | Opcode::MapSum(p) | Opcode::MapAvg(p)
571 | Opcode::MapFlatten(p) => vec![p],
572 Opcode::FilterTakeWhile { pred, stop } => vec![pred, stop],
573 Opcode::FilterMap { pred, map: m } => vec![pred, m],
574 Opcode::FilterFilter { p1, p2 } => vec![p1, p2],
575 Opcode::MapMap { f1, f2 } => vec![f1, f2],
576 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) =>
577 c.sub_progs.iter().collect(),
578 Opcode::LetExpr { body, .. } => vec![body],
579 Opcode::MakeArr(progs) => progs.iter().collect(),
580 Opcode::MakeObj(entries) => {
581 use super::vm::CompiledObjEntry;
582 let mut v = Vec::new();
583 for e in entries.iter() {
584 match e {
585 CompiledObjEntry::Short(_) => {}
586 CompiledObjEntry::Kv { prog, cond, .. } => {
587 v.push(prog);
588 if let Some(c) = cond { v.push(c); }
589 }
590 CompiledObjEntry::Dynamic { key, val } => { v.push(key); v.push(val); }
591 CompiledObjEntry::Spread(p) => v.push(p),
592 CompiledObjEntry::SpreadDeep(p) => v.push(p),
593 }
594 }
595 v
596 }
597 _ => continue,
598 };
599 for p in sub_progs {
600 let sig = program_signature(p);
601 *map.entry(sig).or_insert(0) += 1;
602 walk_subprograms(&p.ops, map);
603 }
604 }
605}
606
607pub fn expr_uses_ident(expr: &super::ast::Expr, name: &str) -> bool {
614 use super::ast::{Expr, Step, PipeStep, BindTarget, FStringPart, ArrayElem, ObjField, Arg};
615 match expr {
616 Expr::Ident(n) => n == name,
617 Expr::Null | Expr::Bool(_) | Expr::Int(_) | Expr::Float(_)
618 | Expr::Str(_) | Expr::Root | Expr::Current => false,
619 Expr::FString(parts) => parts.iter().any(|p| match p {
620 FStringPart::Lit(_) => false,
621 FStringPart::Interp { expr, .. } => expr_uses_ident(expr, name),
622 }),
623 Expr::Chain(base, steps) => {
624 if expr_uses_ident(base, name) { return true; }
625 steps.iter().any(|s| match s {
626 Step::DynIndex(e) | Step::InlineFilter(e) => expr_uses_ident(e, name),
627 Step::Method(_, args) | Step::OptMethod(_, args) =>
628 args.iter().any(|a| match a {
629 Arg::Pos(e) | Arg::Named(_, e) => expr_uses_ident(e, name),
630 }),
631 _ => false,
632 })
633 }
634 Expr::BinOp(l, _, r) => expr_uses_ident(l, name) || expr_uses_ident(r, name),
635 Expr::UnaryNeg(e) | Expr::Not(e) => expr_uses_ident(e, name),
636 Expr::Kind { expr, .. } => expr_uses_ident(expr, name),
637 Expr::Coalesce(l, r) => expr_uses_ident(l, name) || expr_uses_ident(r, name),
638 Expr::Object(fields) => fields.iter().any(|f| match f {
639 ObjField::Kv { val, cond, .. } =>
640 expr_uses_ident(val, name)
641 || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name)),
642 ObjField::Short(n) => n == name,
643 ObjField::Dynamic { key, val } =>
644 expr_uses_ident(key, name) || expr_uses_ident(val, name),
645 ObjField::Spread(e) => expr_uses_ident(e, name),
646 ObjField::SpreadDeep(e) => expr_uses_ident(e, name),
647 }),
648 Expr::Array(elems) => elems.iter().any(|e| match e {
649 ArrayElem::Expr(e) | ArrayElem::Spread(e) => expr_uses_ident(e, name),
650 }),
651 Expr::Pipeline { base, steps } => {
652 if expr_uses_ident(base, name) { return true; }
653 steps.iter().any(|s| match s {
654 PipeStep::Forward(e) => expr_uses_ident(e, name),
655 PipeStep::Bind(bt) => match bt {
656 BindTarget::Name(n) => n == name,
657 BindTarget::Obj { fields, rest } =>
658 fields.iter().any(|f| f == name)
659 || rest.as_ref().map_or(false, |r| r == name),
660 BindTarget::Arr(ns) => ns.iter().any(|n| n == name),
661 },
662 })
663 }
664 Expr::ListComp { expr, vars, iter, cond }
665 | Expr::SetComp { expr, vars, iter, cond }
666 | Expr::GenComp { expr, vars, iter, cond } => {
667 if expr_uses_ident(iter, name) { return true; }
668 if vars.iter().any(|v| v == name) { return false; } expr_uses_ident(expr, name)
670 || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
671 }
672 Expr::DictComp { key, val, vars, iter, cond } => {
673 if expr_uses_ident(iter, name) { return true; }
674 if vars.iter().any(|v| v == name) { return false; }
675 expr_uses_ident(key, name) || expr_uses_ident(val, name)
676 || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
677 }
678 Expr::Lambda { params, body } => {
679 if params.iter().any(|p| p == name) { return false; }
680 expr_uses_ident(body, name)
681 }
682 Expr::Let { name: n, init, body } => {
683 if expr_uses_ident(init, name) { return true; }
684 if n == name { return false; } expr_uses_ident(body, name)
686 }
687 Expr::GlobalCall { args, .. } => args.iter().any(|a| match a {
688 Arg::Pos(e) | Arg::Named(_, e) => expr_uses_ident(e, name),
689 }),
690 Expr::Cast { expr, .. } => expr_uses_ident(expr, name),
691 Expr::Patch { root, ops } => {
692 use super::ast::PathStep;
693 if expr_uses_ident(root, name) { return true; }
694 ops.iter().any(|op| {
695 op.path.iter().any(|s| match s {
696 PathStep::WildcardFilter(e) => expr_uses_ident(e, name),
697 _ => false,
698 })
699 || expr_uses_ident(&op.val, name)
700 || op.cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
701 })
702 }
703 Expr::DeleteMark => false,
704 }
705}
706
707pub fn expr_is_pure(expr: &super::ast::Expr) -> bool {
710 use super::ast::{Expr, Step, Arg};
711 match expr {
712 Expr::Null | Expr::Bool(_) | Expr::Int(_) | Expr::Float(_)
713 | Expr::Str(_) | Expr::Root | Expr::Current | Expr::Ident(_) => true,
714 Expr::FString(_) => true,
715 Expr::Chain(base, steps) => {
716 if !expr_is_pure(base) { return false; }
717 steps.iter().all(|s| match s {
718 Step::DynIndex(e) | Step::InlineFilter(e) => expr_is_pure(e),
719 Step::Method(_, args) | Step::OptMethod(_, args) =>
720 args.iter().all(|a| match a {
721 Arg::Pos(e) | Arg::Named(_, e) => expr_is_pure(e),
722 }),
723 _ => true,
724 })
725 }
726 Expr::BinOp(l, _, r) | Expr::Coalesce(l, r) =>
727 expr_is_pure(l) && expr_is_pure(r),
728 Expr::UnaryNeg(e) | Expr::Not(e) | Expr::Kind { expr: e, .. } =>
729 expr_is_pure(e),
730 _ => true,
732 }
733}
734
735pub fn dedup_subprograms(program: &Program) -> Arc<Program> {
744 let mut cache: HashMap<u64, Arc<Program>> = HashMap::new();
745 dedup_rec(program, &mut cache)
746}
747
748fn dedup_rec(program: &Program, cache: &mut HashMap<u64, Arc<Program>>) -> Arc<Program> {
749 let sig = program_signature(program);
750 if let Some(a) = cache.get(&sig) { return Arc::clone(a); }
751 let new_ops: Vec<Opcode> = program.ops.iter().map(|op| rewrite_op(op, cache)).collect();
752 let out = Arc::new(Program {
753 ops: new_ops.into(),
754 source: program.source.clone(),
755 id: program.id,
756 is_structural: program.is_structural,
757 });
758 cache.insert(sig, Arc::clone(&out));
759 out
760}
761
762fn rewrite_op(op: &Opcode, cache: &mut HashMap<u64, Arc<Program>>) -> Opcode {
763 use super::vm::{CompiledObjEntry, CompiledFSPart, CompSpec, DictCompSpec};
764 match op {
765 Opcode::AndOp(p) => Opcode::AndOp(dedup_rec(p, cache)),
766 Opcode::OrOp(p) => Opcode::OrOp(dedup_rec(p, cache)),
767 Opcode::CoalesceOp(p) => Opcode::CoalesceOp(dedup_rec(p, cache)),
768 Opcode::InlineFilter(p) => Opcode::InlineFilter(dedup_rec(p, cache)),
769 Opcode::FilterCount(p) => Opcode::FilterCount(dedup_rec(p, cache)),
770 Opcode::MapSum(p) => Opcode::MapSum(dedup_rec(p, cache)),
771 Opcode::MapAvg(p) => Opcode::MapAvg(dedup_rec(p, cache)),
772 Opcode::MapFlatten(p) => Opcode::MapFlatten(dedup_rec(p, cache)),
773 Opcode::FilterTakeWhile { pred, stop } => Opcode::FilterTakeWhile {
774 pred: dedup_rec(pred, cache),
775 stop: dedup_rec(stop, cache),
776 },
777 Opcode::FindFirst(p) => Opcode::FindFirst(dedup_rec(p, cache)),
778 Opcode::FindOne(p) => Opcode::FindOne(dedup_rec(p, cache)),
779 Opcode::DynIndex(p) => Opcode::DynIndex(dedup_rec(p, cache)),
780 Opcode::FilterMap { pred, map } => Opcode::FilterMap {
781 pred: dedup_rec(pred, cache),
782 map: dedup_rec(map, cache),
783 },
784 Opcode::FilterFilter { p1, p2 } => Opcode::FilterFilter {
785 p1: dedup_rec(p1, cache),
786 p2: dedup_rec(p2, cache),
787 },
788 Opcode::MapMap { f1, f2 } => Opcode::MapMap {
789 f1: dedup_rec(f1, cache),
790 f2: dedup_rec(f2, cache),
791 },
792 Opcode::LetExpr { name, body } => Opcode::LetExpr {
793 name: name.clone(),
794 body: dedup_rec(body, cache),
795 },
796 Opcode::CallMethod(c) => Opcode::CallMethod(rewrite_call(c, cache)),
797 Opcode::CallOptMethod(c) => Opcode::CallOptMethod(rewrite_call(c, cache)),
798 Opcode::MakeArr(progs) => {
799 let new_progs: Vec<Arc<Program>> = progs.iter().map(|p| dedup_rec(p, cache)).collect();
800 Opcode::MakeArr(new_progs.into())
801 }
802 Opcode::MakeObj(entries) => {
803 let new_entries: Vec<CompiledObjEntry> = entries.iter().map(|e| match e {
804 CompiledObjEntry::Short(s) => CompiledObjEntry::Short(s.clone()),
805 CompiledObjEntry::Kv { key, prog, optional, cond } => CompiledObjEntry::Kv {
806 key: key.clone(),
807 prog: dedup_rec(prog, cache),
808 optional: *optional,
809 cond: cond.as_ref().map(|c| dedup_rec(c, cache)),
810 },
811 CompiledObjEntry::Dynamic { key, val } => CompiledObjEntry::Dynamic {
812 key: dedup_rec(key, cache),
813 val: dedup_rec(val, cache),
814 },
815 CompiledObjEntry::Spread(p) => CompiledObjEntry::Spread(dedup_rec(p, cache)),
816 CompiledObjEntry::SpreadDeep(p) => CompiledObjEntry::SpreadDeep(dedup_rec(p, cache)),
817 }).collect();
818 Opcode::MakeObj(new_entries.into())
819 }
820 Opcode::FString(parts) => {
821 let new_parts: Vec<CompiledFSPart> = parts.iter().map(|p| match p {
822 CompiledFSPart::Lit(s) => CompiledFSPart::Lit(s.clone()),
823 CompiledFSPart::Interp { prog, fmt } => CompiledFSPart::Interp {
824 prog: dedup_rec(prog, cache),
825 fmt: fmt.clone(),
826 },
827 }).collect();
828 Opcode::FString(new_parts.into())
829 }
830 Opcode::ListComp(spec) => {
831 let new_spec = CompSpec {
832 expr: dedup_rec(&spec.expr, cache),
833 iter: dedup_rec(&spec.iter, cache),
834 cond: spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
835 vars: spec.vars.clone(),
836 };
837 Opcode::ListComp(Arc::new(new_spec))
838 }
839 Opcode::SetComp(spec) => {
840 let new_spec = CompSpec {
841 expr: dedup_rec(&spec.expr, cache),
842 iter: dedup_rec(&spec.iter, cache),
843 cond: spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
844 vars: spec.vars.clone(),
845 };
846 Opcode::SetComp(Arc::new(new_spec))
847 }
848 Opcode::DictComp(spec) => {
849 let new_spec = DictCompSpec {
850 key: dedup_rec(&spec.key, cache),
851 val: dedup_rec(&spec.val, cache),
852 iter: dedup_rec(&spec.iter, cache),
853 cond: spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
854 vars: spec.vars.clone(),
855 };
856 Opcode::DictComp(Arc::new(new_spec))
857 }
858 _ => op.clone(),
859 }
860}
861
862fn rewrite_call(c: &Arc<super::vm::CompiledCall>, cache: &mut HashMap<u64, Arc<Program>>) -> Arc<super::vm::CompiledCall> {
863 use super::vm::CompiledCall;
864 let new_subs: Vec<Arc<Program>> = c.sub_progs.iter().map(|p| dedup_rec(p, cache)).collect();
865 Arc::new(CompiledCall {
866 method: c.method,
867 name: c.name.clone(),
868 sub_progs: new_subs.into(),
869 orig_args: c.orig_args.clone(),
870 })
871}
872
873pub fn opcode_cost(op: &Opcode) -> u32 {
878 match op {
879 Opcode::PushNull | Opcode::PushBool(_) | Opcode::PushInt(_)
880 | Opcode::PushFloat(_) | Opcode::PushStr(_)
881 | Opcode::PushRoot | Opcode::PushCurrent | Opcode::LoadIdent(_) => 1,
882 Opcode::GetField(_) | Opcode::OptField(_) | Opcode::GetIndex(_)
883 | Opcode::RootChain(_) | Opcode::GetPointer(_) => 2,
884 Opcode::GetSlice(..) | Opcode::Descendant(_) => 5,
885 Opcode::DescendAll => 20,
886 Opcode::Not | Opcode::Neg | Opcode::SetCurrent
887 | Opcode::BindVar(_) | Opcode::StoreVar(_)
888 | Opcode::BindObjDestructure(_) | Opcode::BindArrDestructure(_) => 1,
889 Opcode::Add | Opcode::Sub | Opcode::Mul | Opcode::Div | Opcode::Mod
890 | Opcode::Eq | Opcode::Neq | Opcode::Lt | Opcode::Lte
891 | Opcode::Gt | Opcode::Gte | Opcode::Fuzzy => 2,
892 Opcode::KindCheck { .. } => 2,
893 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p) => 2 + program_cost(p),
894 Opcode::InlineFilter(p) | Opcode::FilterCount(p)
895 | Opcode::FindFirst(p) | Opcode::FindOne(p)
896 | Opcode::MapSum(p) | Opcode::MapAvg(p)
897 | Opcode::MapFlatten(p)
898 | Opcode::DynIndex(p) => 10 + program_cost(p),
899 Opcode::FilterTakeWhile { pred, stop } => 10 + program_cost(pred) + program_cost(stop),
900 Opcode::FilterDropWhile { pred, drop } => 10 + program_cost(pred) + program_cost(drop),
901 Opcode::MapUnique(p) => 15 + program_cost(p),
902 Opcode::EquiJoin { rhs, .. } => 25 + program_cost(rhs),
903 Opcode::FilterMap { pred, map } => 10 + program_cost(pred) + program_cost(map),
904 Opcode::FilterFilter { p1, p2 } => 10 + program_cost(p1) + program_cost(p2),
905 Opcode::MapMap { f1, f2 } => 10 + program_cost(f1) + program_cost(f2),
906 Opcode::TopN { n, .. } => 15 + *n as u32,
907 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
908 let base = match c.method {
909 BuiltinMethod::Filter | BuiltinMethod::Map | BuiltinMethod::FlatMap => 10,
910 BuiltinMethod::Sort => 30,
911 BuiltinMethod::GroupBy | BuiltinMethod::IndexBy => 25,
912 BuiltinMethod::Len | BuiltinMethod::Count => 2,
913 _ => 8,
914 };
915 base + c.sub_progs.iter().map(|p| program_cost(p)).sum::<u32>()
916 }
917 Opcode::MakeObj(entries) => {
918 use super::vm::CompiledObjEntry;
919 entries.iter().map(|e| match e {
920 CompiledObjEntry::Short(_) => 2,
921 CompiledObjEntry::Kv { prog, cond, .. } =>
922 2 + program_cost(prog) + cond.as_ref().map_or(0, |c| program_cost(c)),
923 CompiledObjEntry::Dynamic { key, val } => 3 + program_cost(key) + program_cost(val),
924 CompiledObjEntry::Spread(p) => 5 + program_cost(p),
925 CompiledObjEntry::SpreadDeep(p) => 8 + program_cost(p),
926 }).sum()
927 }
928 Opcode::MakeArr(progs) => progs.iter().map(|p| 1 + program_cost(p)).sum(),
929 Opcode::FString(parts) => {
930 use super::vm::CompiledFSPart;
931 parts.iter().map(|p| match p {
932 CompiledFSPart::Lit(_) => 1,
933 CompiledFSPart::Interp { prog, .. } => 3 + program_cost(prog),
934 }).sum()
935 }
936 Opcode::ListComp(s) | Opcode::SetComp(s) =>
937 15 + program_cost(&s.expr) + program_cost(&s.iter)
938 + s.cond.as_ref().map_or(0, |c| program_cost(c)),
939 Opcode::DictComp(s) =>
940 15 + program_cost(&s.key) + program_cost(&s.val) + program_cost(&s.iter)
941 + s.cond.as_ref().map_or(0, |c| program_cost(c)),
942 Opcode::LetExpr { body, .. } => 2 + program_cost(body),
943 Opcode::Quantifier(_) => 2,
944 Opcode::CastOp(_) => 2,
945 Opcode::PatchEval(_) => 50,
946 }
947}
948
949pub fn program_cost(program: &Program) -> u32 {
951 program.ops.iter().map(opcode_cost).sum()
952}
953
954#[derive(Debug, Clone, Copy, PartialEq, Eq)]
958pub enum Monotonicity {
959 Unknown,
961 Asc,
963 Desc,
965 NotArray,
967}
968
969impl Monotonicity {
970 pub fn after(self, op: &Opcode) -> Monotonicity {
971 match op {
972 Opcode::CallMethod(c) if c.sub_progs.is_empty() => match c.method {
973 BuiltinMethod::Sort => Monotonicity::Asc,
974 BuiltinMethod::Reverse => match self {
975 Monotonicity::Asc => Monotonicity::Desc,
976 Monotonicity::Desc => Monotonicity::Asc,
977 x => x,
978 },
979 BuiltinMethod::Filter => self, BuiltinMethod::Map => Monotonicity::Unknown, _ => Monotonicity::Unknown,
982 }
983 Opcode::TopN { asc, .. } => if *asc { Monotonicity::Asc } else { Monotonicity::Desc },
984 Opcode::MakeArr(_) | Opcode::ListComp(_) => Monotonicity::Unknown,
985 _ => self,
986 }
987 }
988}
989
990pub fn infer_monotonicity(program: &Program) -> Monotonicity {
992 let mut m = Monotonicity::Unknown;
993 for op in program.ops.iter() { m = m.after(op); }
994 m
995}
996
997pub fn escapes_doc(program: &Program) -> bool {
1006 for op in program.ops.iter() {
1007 match op {
1008 Opcode::PushRoot | Opcode::PushCurrent | Opcode::RootChain(_)
1009 | Opcode::GetField(_) | Opcode::GetIndex(_) | Opcode::GetSlice(..)
1010 | Opcode::Descendant(_) | Opcode::DescendAll
1011 | Opcode::GetPointer(_) | Opcode::OptField(_) => return true,
1012 Opcode::CallMethod(c) | Opcode::CallOptMethod(c)
1013 if c.sub_progs.iter().any(|p| escapes_doc(p)) => return true,
1014 _ => {}
1015 }
1016 }
1017 false
1018}
1019
1020pub fn selectivity_score(expr: &super::ast::Expr) -> u32 {
1026 use super::ast::{Expr, BinOp};
1027 match expr {
1028 Expr::Bool(true) => 1000, Expr::Bool(false) => 0, Expr::BinOp(_, BinOp::Eq, _) => 1,
1031 Expr::BinOp(_, BinOp::Neq, _) => 5,
1032 Expr::BinOp(_, BinOp::Lt, _) | Expr::BinOp(_, BinOp::Gt, _)
1033 | Expr::BinOp(_, BinOp::Lte, _) | Expr::BinOp(_, BinOp::Gte, _) => 3,
1034 Expr::BinOp(_, BinOp::Fuzzy, _) => 2,
1035 Expr::BinOp(l, BinOp::And, r) =>
1036 selectivity_score(l).min(selectivity_score(r)),
1037 Expr::BinOp(l, BinOp::Or, r) =>
1038 selectivity_score(l) + selectivity_score(r),
1039 Expr::Not(e) => 10u32.saturating_sub(selectivity_score(e)),
1040 Expr::Kind { .. } => 2,
1041 _ => 5,
1042 }
1043}
1044
1045pub fn fold_kind_check(val_ty: VType, target: KindType, negate: bool) -> Option<bool> {
1050 let matches = match (val_ty, target) {
1051 (VType::Null, KindType::Null) => true,
1052 (VType::Bool, KindType::Bool) => true,
1053 (VType::Int | VType::Float | VType::Num, KindType::Number) => true,
1054 (VType::Str, KindType::Str) => true,
1055 (VType::Arr, KindType::Array) => true,
1056 (VType::Obj, KindType::Object) => true,
1057 (VType::Unknown, _) => return None,
1058 _ => false,
1059 };
1060 Some(if negate { !matches } else { matches })
1061}