1use std::sync::Arc;
16use std::collections::HashMap;
17
18use super::vm::{Program, Opcode, BuiltinMethod};
19use super::ast::KindType;
20
21#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
24pub enum VType {
25 Bottom,
27 Null,
28 Bool,
29 Int,
30 Float,
31 Num,
33 Str,
34 Arr,
35 Obj,
36 Unknown,
38}
39
40impl VType {
41 pub fn join(self, other: VType) -> VType {
43 if self == other { return self; }
44 match (self, other) {
45 (VType::Bottom, x) | (x, VType::Bottom) => x,
46 (VType::Int, VType::Float) | (VType::Float, VType::Int)
47 | (VType::Int, VType::Num) | (VType::Num, VType::Int)
48 | (VType::Float, VType::Num) | (VType::Num, VType::Float)
49 => VType::Num,
50 _ => VType::Unknown,
51 }
52 }
53
54 pub fn is_array_like(self) -> bool { matches!(self, VType::Arr) }
55 pub fn is_object_like(self) -> bool { matches!(self, VType::Obj) }
56 pub fn is_numeric(self) -> bool { matches!(self, VType::Int | VType::Float | VType::Num) }
57 pub fn is_string(self) -> bool { matches!(self, VType::Str) }
58}
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
63pub enum Nullness {
64 AlwaysNull,
66 NonNull,
68 MaybeNull,
70}
71
72impl Nullness {
73 pub fn join(self, other: Nullness) -> Nullness {
74 if self == other { return self; }
75 Nullness::MaybeNull
76 }
77}
78
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
82pub enum Cardinality {
83 Zero,
85 One,
87 ZeroOrOne,
89 Many,
91 NotArray,
93 Unknown,
95}
96
97impl Cardinality {
98 pub fn join(self, other: Cardinality) -> Cardinality {
99 if self == other { return self; }
100 match (self, other) {
101 (Cardinality::Zero, Cardinality::One)
102 | (Cardinality::One, Cardinality::Zero) => Cardinality::ZeroOrOne,
103 _ => Cardinality::Unknown,
104 }
105 }
106}
107
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
111pub struct AbstractVal {
112 pub ty: VType,
113 pub null: Nullness,
114 pub card: Cardinality,
115}
116
117impl AbstractVal {
118 pub const UNKNOWN: Self = Self {
119 ty: VType::Unknown, null: Nullness::MaybeNull, card: Cardinality::Unknown,
120 };
121 pub const NULL: Self = Self {
122 ty: VType::Null, null: Nullness::AlwaysNull, card: Cardinality::NotArray,
123 };
124 pub fn scalar(ty: VType) -> Self {
125 Self { ty, null: Nullness::NonNull, card: Cardinality::NotArray }
126 }
127 pub fn array() -> Self {
128 Self { ty: VType::Arr, null: Nullness::NonNull, card: Cardinality::Many }
129 }
130 pub fn object() -> Self {
131 Self { ty: VType::Obj, null: Nullness::NonNull, card: Cardinality::NotArray }
132 }
133 pub fn join(self, other: AbstractVal) -> AbstractVal {
134 AbstractVal {
135 ty: self.ty.join(other.ty),
136 null: self.null.join(other.null),
137 card: self.card.join(other.card),
138 }
139 }
140}
141
142pub fn infer_result_type(program: &Program) -> AbstractVal {
147 let mut stack: Vec<AbstractVal> = Vec::with_capacity(16);
148 let mut env: HashMap<Arc<str>, AbstractVal> = HashMap::new();
149 for op in program.ops.iter() { apply_op_env(op, &mut stack, &mut env); }
150 stack.pop().unwrap_or(AbstractVal::UNKNOWN)
151}
152
153pub fn infer_result_type_with_env(program: &Program)
156 -> (AbstractVal, HashMap<Arc<str>, AbstractVal>)
157{
158 let mut stack: Vec<AbstractVal> = Vec::with_capacity(16);
159 let mut env: HashMap<Arc<str>, AbstractVal> = HashMap::new();
160 for op in program.ops.iter() { apply_op_env(op, &mut stack, &mut env); }
161 (stack.pop().unwrap_or(AbstractVal::UNKNOWN), env)
162}
163
164fn apply_op_env(op: &Opcode, stack: &mut Vec<AbstractVal>,
165 env: &mut HashMap<Arc<str>, AbstractVal>) {
166 match op {
168 Opcode::LoadIdent(name) => {
169 let av = env.get(name).copied().unwrap_or(AbstractVal::UNKNOWN);
170 stack.push(av);
171 }
172 Opcode::BindVar(name) => {
173 if let Some(top) = stack.last().copied() { env.insert(name.clone(), top); }
175 }
176 Opcode::StoreVar(name) => {
177 let v = stack.pop().unwrap_or(AbstractVal::UNKNOWN);
178 env.insert(name.clone(), v);
179 }
180 Opcode::LetExpr { name, body } => {
181 let init = stack.pop().unwrap_or(AbstractVal::UNKNOWN);
182 let saved = env.get(name).copied();
183 env.insert(name.clone(), init);
184 let mut sub_stack: Vec<AbstractVal> = Vec::with_capacity(8);
185 for op2 in body.ops.iter() { apply_op_env(op2, &mut sub_stack, env); }
186 let res = sub_stack.pop().unwrap_or(AbstractVal::UNKNOWN);
187 match saved {
189 Some(v) => { env.insert(name.clone(), v); }
190 None => { env.remove(name); }
191 }
192 stack.push(res);
193 }
194 _ => apply_op(op, stack),
195 }
196}
197
198fn apply_op(op: &Opcode, stack: &mut Vec<AbstractVal>) {
199 macro_rules! pop2 { () => {{ let r = stack.pop().unwrap_or(AbstractVal::UNKNOWN); let l = stack.pop().unwrap_or(AbstractVal::UNKNOWN); (l, r) }} }
200 macro_rules! pop1 { () => { stack.pop().unwrap_or(AbstractVal::UNKNOWN) } }
201 match op {
202 Opcode::PushNull => stack.push(AbstractVal::NULL),
203 Opcode::PushBool(_) => stack.push(AbstractVal::scalar(VType::Bool)),
204 Opcode::PushInt(_) => stack.push(AbstractVal::scalar(VType::Int)),
205 Opcode::PushFloat(_) => stack.push(AbstractVal::scalar(VType::Float)),
206 Opcode::PushStr(_) => stack.push(AbstractVal::scalar(VType::Str)),
207 Opcode::PushRoot | Opcode::PushCurrent | Opcode::LoadIdent(_)
208 => stack.push(AbstractVal::UNKNOWN),
209 Opcode::GetField(_) | Opcode::OptField(_) | Opcode::FieldChain(_) => {
210 pop1!();
211 stack.push(AbstractVal::UNKNOWN);
212 }
213 Opcode::GetIndex(_) | Opcode::DynIndex(_) => {
214 pop1!();
215 stack.push(AbstractVal::UNKNOWN);
216 }
217 Opcode::GetSlice(_, _) => {
218 pop1!();
219 stack.push(AbstractVal::array());
220 }
221 Opcode::Descendant(_) | Opcode::DescendAll => {
222 pop1!();
223 stack.push(AbstractVal::array());
224 }
225 Opcode::InlineFilter(_) => {
226 pop1!();
227 stack.push(AbstractVal::array());
228 }
229 Opcode::Quantifier(kind) => {
230 pop1!();
231 use super::ast::QuantifierKind;
232 let card = match kind {
233 QuantifierKind::First => Cardinality::ZeroOrOne,
234 QuantifierKind::One => Cardinality::One,
235 };
236 stack.push(AbstractVal { ty: VType::Unknown, null: Nullness::MaybeNull, card });
237 }
238 Opcode::RootChain(_) => stack.push(AbstractVal::UNKNOWN),
239 Opcode::FilterCount(_) => {
240 pop1!();
241 stack.push(AbstractVal::scalar(VType::Int));
242 }
243 Opcode::FindFirst(_) | Opcode::FindOne(_) => {
244 pop1!();
245 stack.push(AbstractVal::UNKNOWN);
246 }
247 Opcode::FilterMap { .. } | Opcode::FilterFilter { .. }
248 | Opcode::MapMap { .. } | Opcode::MapFilter { .. } => {
249 pop1!();
250 stack.push(AbstractVal::array());
251 }
252 Opcode::MapSum(_) | Opcode::FilterMapSum { .. }
253 | Opcode::MapMin(_) | Opcode::MapMax(_)
254 | Opcode::FilterMapMin { .. } | Opcode::FilterMapMax { .. }
255 | Opcode::MapFieldSum(_) | Opcode::MapFieldMin(_) | Opcode::MapFieldMax(_) => {
256 pop1!();
257 stack.push(AbstractVal::scalar(VType::Num));
258 }
259 Opcode::MapAvg(_) | Opcode::FilterMapAvg { .. } | Opcode::MapFieldAvg(_) => {
260 pop1!();
261 stack.push(AbstractVal::scalar(VType::Float));
262 }
263 Opcode::MapToJsonJoin { .. } => {
264 pop1!();
265 stack.push(AbstractVal::scalar(VType::Str));
266 }
267 Opcode::StrTrimUpper | Opcode::StrTrimLower
268 | Opcode::StrUpperTrim | Opcode::StrLowerTrim
269 | Opcode::StrSplitReverseJoin { .. } => {
270 pop1!();
271 stack.push(AbstractVal::scalar(VType::Str));
272 }
273 Opcode::MapReplaceLit { .. }
274 | Opcode::MapUpperReplaceLit { .. }
275 | Opcode::MapLowerReplaceLit { .. }
276 | Opcode::MapStrConcat { .. } => {
277 pop1!();
278 stack.push(AbstractVal::array());
279 }
280 Opcode::MapSplitCount { .. }
281 | Opcode::MapSplitFirst { .. }
282 | Opcode::MapSplitNth { .. } => {
283 pop1!();
284 stack.push(AbstractVal::array());
285 }
286 Opcode::MapSplitCountSum { .. } => {
287 pop1!();
288 stack.push(AbstractVal::scalar(VType::Int));
289 }
290 Opcode::MapSplitLenSum { .. }
291 | Opcode::MapProject { .. }
292 | Opcode::MapStrSlice { .. }
293 | Opcode::MapFString(_) => {
294 pop1!();
295 stack.push(AbstractVal::array());
296 }
297 Opcode::FilterFieldEqLitCount(_, _)
298 | Opcode::FilterFieldCmpLitCount(_, _, _)
299 | Opcode::FilterFieldCmpFieldCount(_, _, _)
300 | Opcode::FilterFieldsAllEqLitCount(_)
301 | Opcode::FilterFieldsAllCmpLitCount(_)
302 | Opcode::UniqueCount => {
303 pop1!();
304 stack.push(AbstractVal::scalar(VType::Int));
305 }
306 Opcode::TopN { .. } | Opcode::MapFlatten(_) | Opcode::FilterTakeWhile { .. }
307 | Opcode::FilterDropWhile { .. } | Opcode::MapUnique(_)
308 | Opcode::EquiJoin { .. }
309 | Opcode::MapField(_) | Opcode::MapFieldChain(_) | Opcode::MapFieldUnique(_)
310 | Opcode::MapFieldChainUnique(_)
311 | Opcode::FlatMapChain(_)
312 | Opcode::FilterFieldEqLit(_, _) | Opcode::FilterFieldCmpLit(_, _, _)
313 | Opcode::FilterCurrentCmpLit(_, _)
314 | Opcode::FilterStrVecStartsWith(_)
315 | Opcode::FilterStrVecEndsWith(_)
316 | Opcode::FilterStrVecContains(_)
317 | Opcode::MapStrVecUpper
318 | Opcode::MapStrVecLower
319 | Opcode::MapStrVecTrim
320 | Opcode::MapNumVecArith { .. }
321 | Opcode::MapNumVecNeg
322 | Opcode::FilterFieldCmpField(_, _, _)
323 | Opcode::FilterFieldEqLitMapField(_, _, _)
324 | Opcode::FilterFieldCmpLitMapField(_, _, _, _)
325 | Opcode::GroupByField(_)
326 | Opcode::CountByField(_)
327 | Opcode::UniqueByField(_) => {
328 pop1!();
329 stack.push(AbstractVal::array());
330 }
331 Opcode::MapFirst(_) | Opcode::MapLast(_) | Opcode::FilterMapFirst { .. }
332 | Opcode::FilterLast { .. }
333 | Opcode::ArgExtreme { .. } => {
334 pop1!();
335 stack.push(AbstractVal::UNKNOWN);
336 }
337 Opcode::Add | Opcode::Sub | Opcode::Mul | Opcode::Mod => {
338 let (l, r) = pop2!();
339 stack.push(AbstractVal::scalar(l.ty.join(r.ty)));
340 }
341 Opcode::Div => {
342 pop2!();
343 stack.push(AbstractVal::scalar(VType::Float));
344 }
345 Opcode::Eq | Opcode::Neq | Opcode::Lt | Opcode::Lte
346 | Opcode::Gt | Opcode::Gte | Opcode::Fuzzy => {
347 pop2!();
348 stack.push(AbstractVal::scalar(VType::Bool));
349 }
350 Opcode::Not => { pop1!(); stack.push(AbstractVal::scalar(VType::Bool)); }
351 Opcode::Neg => { let v = pop1!(); stack.push(AbstractVal::scalar(v.ty)); }
352 Opcode::AndOp(_) | Opcode::OrOp(_) => {
353 pop1!();
354 stack.push(AbstractVal::scalar(VType::Bool));
355 }
356 Opcode::CoalesceOp(_) => { pop1!(); stack.push(AbstractVal::UNKNOWN); }
357 Opcode::IfElse { .. } => { pop1!(); stack.push(AbstractVal::UNKNOWN); }
358 Opcode::CallMethod(call) | Opcode::CallOptMethod(call) => {
359 pop1!();
360 stack.push(method_result_type(call.method));
361 }
362 Opcode::MakeObj(_) => stack.push(AbstractVal::object()),
363 Opcode::MakeArr(_) => stack.push(AbstractVal::array()),
364 Opcode::FString(_) => stack.push(AbstractVal::scalar(VType::Str)),
365 Opcode::KindCheck { .. } => { pop1!(); stack.push(AbstractVal::scalar(VType::Bool)); }
366 Opcode::SetCurrent => {} Opcode::BindVar(_) => {} Opcode::StoreVar(_) => { pop1!(); }
369 Opcode::BindObjDestructure(_) | Opcode::BindArrDestructure(_) => {} Opcode::LetExpr { .. } => {
371 pop1!();
372 stack.push(AbstractVal::UNKNOWN);
373 }
374 Opcode::ListComp(_) | Opcode::SetComp(_) => stack.push(AbstractVal::array()),
375 Opcode::DictComp(_) => stack.push(AbstractVal::object()),
376 Opcode::GetPointer(_) => stack.push(AbstractVal::UNKNOWN),
377 Opcode::PatchEval(_) => stack.push(AbstractVal::UNKNOWN),
378 Opcode::CastOp(ty) => {
379 pop1!();
380 use super::ast::CastType;
381 let av = match ty {
382 CastType::Int => AbstractVal::scalar(VType::Int),
383 CastType::Float => AbstractVal::scalar(VType::Float),
384 CastType::Number => AbstractVal::scalar(VType::Num),
385 CastType::Str => AbstractVal::scalar(VType::Str),
386 CastType::Bool => AbstractVal::scalar(VType::Bool),
387 CastType::Array => AbstractVal::array(),
388 CastType::Object => AbstractVal::object(),
389 CastType::Null => AbstractVal::NULL,
390 };
391 stack.push(av);
392 }
393 }
394}
395
396pub fn method_result_type(m: BuiltinMethod) -> AbstractVal {
398 use BuiltinMethod::*;
399 match m {
400 Len | Count | Sum | IndexOf | LastIndexOf => AbstractVal::scalar(VType::Int),
402 Any | All | Has | Missing | Includes | StartsWith | EndsWith => AbstractVal::scalar(VType::Bool),
404 Upper | Lower | Capitalize | TitleCase | Trim | TrimLeft | TrimRight
406 | ToString | ToJson | ToBase64 | FromBase64 | UrlEncode | UrlDecode
407 | HtmlEscape | HtmlUnescape | Repeat | PadLeft | PadRight
408 | Replace | ReplaceAll | StripPrefix | StripSuffix | Indent | Dedent
409 | Join | ToCsv | ToTsv | Type
410 => AbstractVal::scalar(VType::Str),
411 Avg => AbstractVal::scalar(VType::Float),
413 Min | Max | ToNumber => AbstractVal::scalar(VType::Num),
415 ToBool => AbstractVal::scalar(VType::Bool),
417 Keys | Values | Entries | ToPairs | Reverse | Unique | Flatten | Compact
419 | Chars | Lines | Words | Split | Sort | Filter | Map | FlatMap
420 | Enumerate | Pairwise | Window | Chunk | TakeWhile | DropWhile
421 | Accumulate | Zip | ZipLongest | Diff | Intersect | Union
422 | Append | Prepend | Remove | Matches | Scan | Slice
423 => AbstractVal::array(),
424 FromPairs | Invert | Pick | Omit | Merge | DeepMerge | Defaults | Rename
426 | TransformKeys | TransformValues | FilterKeys | FilterValues | Pivot
427 | GroupBy | CountBy | IndexBy | Partition | FlattenKeys | UnflattenKeys
428 | SetPath | DelPath | DelPaths | Set | Update
429 => AbstractVal::object(),
430 First | Last | Nth | GetPath => AbstractVal::UNKNOWN,
432 HasPath => AbstractVal::scalar(VType::Bool),
433 FromJson | Or => AbstractVal::UNKNOWN,
434 EquiJoin => AbstractVal::array(),
435 Unknown => AbstractVal::UNKNOWN,
436 }
437}
438
439pub fn count_ident_uses(program: &Program, name: &str) -> usize {
443 let mut n = 0;
444 count_ident_uses_in_ops(&program.ops, name, &mut n);
445 n
446}
447
448fn count_ident_uses_in_ops(ops: &[Opcode], name: &str, acc: &mut usize) {
449 for op in ops {
450 match op {
451 Opcode::LoadIdent(s) if s.as_ref() == name => *acc += 1,
452 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
453 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
454 | Opcode::FindFirst(p) | Opcode::FindOne(p)
455 | Opcode::DynIndex(p)
456 | Opcode::MapSum(p) | Opcode::MapAvg(p)
457 | Opcode::MapMin(p) | Opcode::MapMax(p)
458 | Opcode::MapFlatten(p)
459 | Opcode::MapFirst(p) | Opcode::MapLast(p)
460 | Opcode::FilterLast { pred: p }
461 => count_ident_uses_in_ops(&p.ops, name, acc),
462 Opcode::FilterTakeWhile { pred, stop } => {
463 count_ident_uses_in_ops(&pred.ops, name, acc);
464 count_ident_uses_in_ops(&stop.ops, name, acc);
465 }
466 Opcode::IfElse { then_, else_ } => {
467 count_ident_uses_in_ops(&then_.ops, name, acc);
468 count_ident_uses_in_ops(&else_.ops, name, acc);
469 }
470 Opcode::FilterMap { pred, map }
471 | Opcode::FilterMapSum { pred, map }
472 | Opcode::FilterMapAvg { pred, map }
473 | Opcode::FilterMapFirst { pred, map }
474 | Opcode::FilterMapMin { pred, map }
475 | Opcode::FilterMapMax { pred, map } => {
476 count_ident_uses_in_ops(&pred.ops, name, acc);
477 count_ident_uses_in_ops(&map.ops, name, acc);
478 }
479 Opcode::MapFilter { map, pred } => {
480 count_ident_uses_in_ops(&map.ops, name, acc);
481 count_ident_uses_in_ops(&pred.ops, name, acc);
482 }
483 Opcode::FilterFilter { p1, p2 } => {
484 count_ident_uses_in_ops(&p1.ops, name, acc);
485 count_ident_uses_in_ops(&p2.ops, name, acc);
486 }
487 Opcode::MapMap { f1, f2 } => {
488 count_ident_uses_in_ops(&f1.ops, name, acc);
489 count_ident_uses_in_ops(&f2.ops, name, acc);
490 }
491 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
492 for p in c.sub_progs.iter() { count_ident_uses_in_ops(&p.ops, name, acc); }
493 }
494 Opcode::LetExpr { body, .. } => count_ident_uses_in_ops(&body.ops, name, acc),
495 Opcode::ListComp(spec) | Opcode::SetComp(spec) => {
496 count_ident_uses_in_ops(&spec.expr.ops, name, acc);
497 count_ident_uses_in_ops(&spec.iter.ops, name, acc);
498 if let Some(c) = &spec.cond { count_ident_uses_in_ops(&c.ops, name, acc); }
499 }
500 Opcode::DictComp(spec) => {
501 count_ident_uses_in_ops(&spec.key.ops, name, acc);
502 count_ident_uses_in_ops(&spec.val.ops, name, acc);
503 count_ident_uses_in_ops(&spec.iter.ops, name, acc);
504 if let Some(c) = &spec.cond { count_ident_uses_in_ops(&c.ops, name, acc); }
505 }
506 Opcode::MakeObj(entries) => {
507 use super::vm::CompiledObjEntry;
508 for e in entries.iter() {
509 match e {
510 CompiledObjEntry::Short { .. } => {}
511 CompiledObjEntry::Kv { prog, cond, .. } => {
512 count_ident_uses_in_ops(&prog.ops, name, acc);
513 if let Some(c) = cond { count_ident_uses_in_ops(&c.ops, name, acc); }
514 }
515 CompiledObjEntry::KvPath { .. } => {}
516 CompiledObjEntry::Dynamic { key, val } => {
517 count_ident_uses_in_ops(&key.ops, name, acc);
518 count_ident_uses_in_ops(&val.ops, name, acc);
519 }
520 CompiledObjEntry::Spread(p) => count_ident_uses_in_ops(&p.ops, name, acc),
521 CompiledObjEntry::SpreadDeep(p) => count_ident_uses_in_ops(&p.ops, name, acc),
522 }
523 }
524 }
525 Opcode::MakeArr(progs) => {
526 for p in progs.iter() { count_ident_uses_in_ops(&p.ops, name, acc); }
527 }
528 Opcode::FString(parts) => {
529 use super::vm::CompiledFSPart;
530 for p in parts.iter() {
531 if let CompiledFSPart::Interp { prog, .. } = p {
532 count_ident_uses_in_ops(&prog.ops, name, acc);
533 }
534 }
535 }
536 _ => {}
537 }
538 }
539}
540
541pub fn collect_accessed_fields(program: &Program) -> Vec<Arc<str>> {
547 let mut set = Vec::new();
548 collect_fields_in_ops(&program.ops, &mut set);
549 set
550}
551
552fn collect_fields_in_ops(ops: &[Opcode], acc: &mut Vec<Arc<str>>) {
553 for op in ops {
554 match op {
555 Opcode::GetField(k) | Opcode::OptField(k) | Opcode::Descendant(k)
556 | Opcode::MapFieldSum(k) | Opcode::MapFieldAvg(k)
557 | Opcode::MapFieldMin(k) | Opcode::MapFieldMax(k)
558 | Opcode::MapField(k) | Opcode::MapFieldUnique(k)
559 | Opcode::GroupByField(k)
560 | Opcode::CountByField(k)
561 | Opcode::UniqueByField(k)
562 | Opcode::FilterFieldEqLit(k, _) | Opcode::FilterFieldCmpLit(k, _, _)
563 | Opcode::FilterFieldEqLitCount(k, _) | Opcode::FilterFieldCmpLitCount(k, _, _)
564 => {
565 if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
566 }
567 Opcode::FilterFieldCmpField(k1, _, k2)
568 | Opcode::FilterFieldCmpFieldCount(k1, _, k2) => {
569 if !acc.iter().any(|a: &Arc<str>| a == k1) { acc.push(k1.clone()); }
570 if !acc.iter().any(|a: &Arc<str>| a == k2) { acc.push(k2.clone()); }
571 }
572 Opcode::FlatMapChain(ks) => {
573 for k in ks.iter() {
574 if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
575 }
576 }
577 Opcode::RootChain(chain) => {
578 for k in chain.iter() {
579 if !acc.iter().any(|a: &Arc<str>| a == k) { acc.push(k.clone()); }
580 }
581 }
582 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
583 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
584 | Opcode::FindFirst(p) | Opcode::FindOne(p)
585 | Opcode::DynIndex(p)
586 | Opcode::MapSum(p) | Opcode::MapAvg(p)
587 | Opcode::MapMin(p) | Opcode::MapMax(p)
588 | Opcode::MapFlatten(p)
589 | Opcode::MapFirst(p) | Opcode::MapLast(p)
590 | Opcode::FilterLast { pred: p }
591 => collect_fields_in_ops(&p.ops, acc),
592 Opcode::FilterTakeWhile { pred, stop } => {
593 collect_fields_in_ops(&pred.ops, acc);
594 collect_fields_in_ops(&stop.ops, acc);
595 }
596 Opcode::IfElse { then_, else_ } => {
597 collect_fields_in_ops(&then_.ops, acc);
598 collect_fields_in_ops(&else_.ops, acc);
599 }
600 Opcode::FilterMap { pred, map }
601 | Opcode::FilterMapSum { pred, map }
602 | Opcode::FilterMapAvg { pred, map }
603 | Opcode::FilterMapFirst { pred, map }
604 | Opcode::FilterMapMin { pred, map }
605 | Opcode::FilterMapMax { pred, map } => {
606 collect_fields_in_ops(&pred.ops, acc);
607 collect_fields_in_ops(&map.ops, acc);
608 }
609 Opcode::MapFilter { map, pred } => {
610 collect_fields_in_ops(&map.ops, acc);
611 collect_fields_in_ops(&pred.ops, acc);
612 }
613 Opcode::FilterFilter { p1, p2 } => {
614 collect_fields_in_ops(&p1.ops, acc);
615 collect_fields_in_ops(&p2.ops, acc);
616 }
617 Opcode::MapMap { f1, f2 } => {
618 collect_fields_in_ops(&f1.ops, acc);
619 collect_fields_in_ops(&f2.ops, acc);
620 }
621 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
622 for p in c.sub_progs.iter() { collect_fields_in_ops(&p.ops, acc); }
623 }
624 Opcode::LetExpr { body, .. } => collect_fields_in_ops(&body.ops, acc),
625 _ => {}
626 }
627 }
628}
629
630pub fn program_signature(program: &Program) -> u64 {
636 use std::collections::hash_map::DefaultHasher;
637 use std::hash::Hasher;
638 let mut h = DefaultHasher::new();
639 hash_ops(&program.ops, &mut h);
640 h.finish()
641}
642
643fn hash_ops(ops: &[Opcode], h: &mut impl std::hash::Hasher) {
644 use std::hash::Hash;
645 for op in ops {
646 std::mem::discriminant(op).hash(h);
648 match op {
649 Opcode::PushInt(n) => n.hash(h),
650 Opcode::PushStr(s) => s.as_bytes().hash(h),
651 Opcode::PushBool(b) => b.hash(h),
652 Opcode::GetField(k) | Opcode::OptField(k) | Opcode::Descendant(k)
653 | Opcode::LoadIdent(k) | Opcode::GetPointer(k)
654 => k.as_bytes().hash(h),
655 Opcode::GetIndex(i) => i.hash(h),
656 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
657 (c.method as u8).hash(h);
658 for p in c.sub_progs.iter() { hash_ops(&p.ops, h); }
659 }
660 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
661 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
662 | Opcode::FindFirst(p) | Opcode::FindOne(p)
663 | Opcode::DynIndex(p)
664 | Opcode::MapSum(p) | Opcode::MapAvg(p)
665 | Opcode::MapMin(p) | Opcode::MapMax(p)
666 | Opcode::MapFlatten(p)
667 | Opcode::MapFirst(p) | Opcode::MapLast(p)
668 | Opcode::FilterLast { pred: p }
669 => hash_ops(&p.ops, h),
670 Opcode::FilterTakeWhile { pred, stop } => {
671 hash_ops(&pred.ops, h);
672 hash_ops(&stop.ops, h);
673 }
674 Opcode::IfElse { then_, else_ } => {
675 hash_ops(&then_.ops, h);
676 hash_ops(&else_.ops, h);
677 }
678 Opcode::RootChain(chain) => {
679 for k in chain.iter() { k.as_bytes().hash(h); }
680 }
681 _ => {}
682 }
683 }
684}
685
686pub fn find_common_subexprs(program: &Program) -> HashMap<u64, usize> {
692 let mut map: HashMap<u64, usize> = HashMap::new();
693 walk_subprograms(&program.ops, &mut map);
694 map.retain(|_, &mut n| n >= 2);
695 map
696}
697
698fn walk_subprograms(ops: &[Opcode], map: &mut HashMap<u64, usize>) {
699 for op in ops {
700 let sub_progs: Vec<&Arc<Program>> = match op {
701 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p)
702 | Opcode::InlineFilter(p) | Opcode::FilterCount(p)
703 | Opcode::FindFirst(p) | Opcode::FindOne(p)
704 | Opcode::DynIndex(p)
705 | Opcode::MapSum(p) | Opcode::MapAvg(p)
706 | Opcode::MapMin(p) | Opcode::MapMax(p)
707 | Opcode::MapFlatten(p)
708 | Opcode::MapFirst(p) | Opcode::MapLast(p)
709 | Opcode::FilterLast { pred: p }
710 => vec![p],
711 Opcode::FilterTakeWhile { pred, stop } => vec![pred, stop],
712 Opcode::IfElse { then_, else_ } => vec![then_, else_],
713 Opcode::FilterMap { pred, map: m }
714 | Opcode::FilterMapSum { pred, map: m }
715 | Opcode::FilterMapAvg { pred, map: m }
716 | Opcode::FilterMapFirst { pred, map: m }
717 | Opcode::FilterMapMin { pred, map: m }
718 | Opcode::FilterMapMax { pred, map: m } => vec![pred, m],
719 Opcode::MapFilter { map: m, pred } => vec![m, pred],
720 Opcode::FilterFilter { p1, p2 } => vec![p1, p2],
721 Opcode::MapMap { f1, f2 } => vec![f1, f2],
722 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) =>
723 c.sub_progs.iter().collect(),
724 Opcode::LetExpr { body, .. } => vec![body],
725 Opcode::MakeArr(progs) => progs.iter().collect(),
726 Opcode::MakeObj(entries) => {
727 use super::vm::CompiledObjEntry;
728 let mut v = Vec::new();
729 for e in entries.iter() {
730 match e {
731 CompiledObjEntry::Short { .. } => {}
732 CompiledObjEntry::Kv { prog, cond, .. } => {
733 v.push(prog);
734 if let Some(c) = cond { v.push(c); }
735 }
736 CompiledObjEntry::KvPath { .. } => {}
737 CompiledObjEntry::Dynamic { key, val } => { v.push(key); v.push(val); }
738 CompiledObjEntry::Spread(p) => v.push(p),
739 CompiledObjEntry::SpreadDeep(p) => v.push(p),
740 }
741 }
742 v
743 }
744 _ => continue,
745 };
746 for p in sub_progs {
747 let sig = program_signature(p);
748 *map.entry(sig).or_insert(0) += 1;
749 walk_subprograms(&p.ops, map);
750 }
751 }
752}
753
754pub fn expr_uses_ident(expr: &super::ast::Expr, name: &str) -> bool {
761 use super::ast::{Expr, Step, PipeStep, BindTarget, FStringPart, ArrayElem, ObjField, Arg};
762 match expr {
763 Expr::Ident(n) => n == name,
764 Expr::Null | Expr::Bool(_) | Expr::Int(_) | Expr::Float(_)
765 | Expr::Str(_) | Expr::Root | Expr::Current => false,
766 Expr::FString(parts) => parts.iter().any(|p| match p {
767 FStringPart::Lit(_) => false,
768 FStringPart::Interp { expr, .. } => expr_uses_ident(expr, name),
769 }),
770 Expr::Chain(base, steps) => {
771 if expr_uses_ident(base, name) { return true; }
772 steps.iter().any(|s| match s {
773 Step::DynIndex(e) | Step::InlineFilter(e) => expr_uses_ident(e, name),
774 Step::Method(_, args) | Step::OptMethod(_, args) =>
775 args.iter().any(|a| match a {
776 Arg::Pos(e) | Arg::Named(_, e) => expr_uses_ident(e, name),
777 }),
778 _ => false,
779 })
780 }
781 Expr::BinOp(l, _, r) => expr_uses_ident(l, name) || expr_uses_ident(r, name),
782 Expr::UnaryNeg(e) | Expr::Not(e) => expr_uses_ident(e, name),
783 Expr::Kind { expr, .. } => expr_uses_ident(expr, name),
784 Expr::Coalesce(l, r) => expr_uses_ident(l, name) || expr_uses_ident(r, name),
785 Expr::Object(fields) => fields.iter().any(|f| match f {
786 ObjField::Kv { val, cond, .. } =>
787 expr_uses_ident(val, name)
788 || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name)),
789 ObjField::Short(n) => n == name,
790 ObjField::Dynamic { key, val } =>
791 expr_uses_ident(key, name) || expr_uses_ident(val, name),
792 ObjField::Spread(e) => expr_uses_ident(e, name),
793 ObjField::SpreadDeep(e) => expr_uses_ident(e, name),
794 }),
795 Expr::Array(elems) => elems.iter().any(|e| match e {
796 ArrayElem::Expr(e) | ArrayElem::Spread(e) => expr_uses_ident(e, name),
797 }),
798 Expr::Pipeline { base, steps } => {
799 if expr_uses_ident(base, name) { return true; }
800 steps.iter().any(|s| match s {
801 PipeStep::Forward(e) => expr_uses_ident(e, name),
802 PipeStep::Bind(bt) => match bt {
803 BindTarget::Name(n) => n == name,
804 BindTarget::Obj { fields, rest } =>
805 fields.iter().any(|f| f == name)
806 || rest.as_ref().map_or(false, |r| r == name),
807 BindTarget::Arr(ns) => ns.iter().any(|n| n == name),
808 },
809 })
810 }
811 Expr::ListComp { expr, vars, iter, cond }
812 | Expr::SetComp { expr, vars, iter, cond }
813 | Expr::GenComp { expr, vars, iter, cond } => {
814 if expr_uses_ident(iter, name) { return true; }
815 if vars.iter().any(|v| v == name) { return false; } expr_uses_ident(expr, name)
817 || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
818 }
819 Expr::DictComp { key, val, vars, iter, cond } => {
820 if expr_uses_ident(iter, name) { return true; }
821 if vars.iter().any(|v| v == name) { return false; }
822 expr_uses_ident(key, name) || expr_uses_ident(val, name)
823 || cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
824 }
825 Expr::Lambda { params, body } => {
826 if params.iter().any(|p| p == name) { return false; }
827 expr_uses_ident(body, name)
828 }
829 Expr::Let { name: n, init, body } => {
830 if expr_uses_ident(init, name) { return true; }
831 if n == name { return false; } expr_uses_ident(body, name)
833 }
834 Expr::IfElse { cond, then_, else_ } => {
835 expr_uses_ident(cond, name)
836 || expr_uses_ident(then_, name)
837 || expr_uses_ident(else_, name)
838 }
839 Expr::GlobalCall { args, .. } => args.iter().any(|a| match a {
840 Arg::Pos(e) | Arg::Named(_, e) => expr_uses_ident(e, name),
841 }),
842 Expr::Cast { expr, .. } => expr_uses_ident(expr, name),
843 Expr::Patch { root, ops } => {
844 use super::ast::PathStep;
845 if expr_uses_ident(root, name) { return true; }
846 ops.iter().any(|op| {
847 op.path.iter().any(|s| match s {
848 PathStep::WildcardFilter(e) => expr_uses_ident(e, name),
849 _ => false,
850 })
851 || expr_uses_ident(&op.val, name)
852 || op.cond.as_ref().map_or(false, |c| expr_uses_ident(c, name))
853 })
854 }
855 Expr::DeleteMark => false,
856 }
857}
858
859pub fn expr_is_pure(expr: &super::ast::Expr) -> bool {
862 use super::ast::{Expr, Step, Arg};
863 match expr {
864 Expr::Null | Expr::Bool(_) | Expr::Int(_) | Expr::Float(_)
865 | Expr::Str(_) | Expr::Root | Expr::Current | Expr::Ident(_) => true,
866 Expr::FString(_) => true,
867 Expr::Chain(base, steps) => {
868 if !expr_is_pure(base) { return false; }
869 steps.iter().all(|s| match s {
870 Step::DynIndex(e) | Step::InlineFilter(e) => expr_is_pure(e),
871 Step::Method(_, args) | Step::OptMethod(_, args) =>
872 args.iter().all(|a| match a {
873 Arg::Pos(e) | Arg::Named(_, e) => expr_is_pure(e),
874 }),
875 _ => true,
876 })
877 }
878 Expr::BinOp(l, _, r) | Expr::Coalesce(l, r) =>
879 expr_is_pure(l) && expr_is_pure(r),
880 Expr::UnaryNeg(e) | Expr::Not(e) | Expr::Kind { expr: e, .. } =>
881 expr_is_pure(e),
882 _ => true,
884 }
885}
886
887pub fn dedup_subprograms(program: &Program) -> Arc<Program> {
896 let mut cache: HashMap<u64, Arc<Program>> = HashMap::new();
897 dedup_rec(program, &mut cache)
898}
899
900fn dedup_rec(program: &Program, cache: &mut HashMap<u64, Arc<Program>>) -> Arc<Program> {
901 let sig = program_signature(program);
902 if let Some(a) = cache.get(&sig) { return Arc::clone(a); }
903 let new_ops: Vec<Opcode> = program.ops.iter().map(|op| rewrite_op(op, cache)).collect();
904 let ics = crate::vm::fresh_ics(new_ops.len());
905 let out = Arc::new(Program {
906 ops: new_ops.into(),
907 source: program.source.clone(),
908 id: program.id,
909 is_structural: program.is_structural,
910 ics,
911 });
912 cache.insert(sig, Arc::clone(&out));
913 out
914}
915
916fn rewrite_op(op: &Opcode, cache: &mut HashMap<u64, Arc<Program>>) -> Opcode {
917 use super::vm::{CompiledObjEntry, CompiledFSPart, CompSpec, DictCompSpec};
918 match op {
919 Opcode::AndOp(p) => Opcode::AndOp(dedup_rec(p, cache)),
920 Opcode::OrOp(p) => Opcode::OrOp(dedup_rec(p, cache)),
921 Opcode::CoalesceOp(p) => Opcode::CoalesceOp(dedup_rec(p, cache)),
922 Opcode::IfElse { then_, else_ } => Opcode::IfElse {
923 then_: dedup_rec(then_, cache),
924 else_: dedup_rec(else_, cache),
925 },
926 Opcode::InlineFilter(p) => Opcode::InlineFilter(dedup_rec(p, cache)),
927 Opcode::FilterCount(p) => Opcode::FilterCount(dedup_rec(p, cache)),
928 Opcode::MapSum(p) => Opcode::MapSum(dedup_rec(p, cache)),
929 Opcode::MapAvg(p) => Opcode::MapAvg(dedup_rec(p, cache)),
930 Opcode::MapMin(p) => Opcode::MapMin(dedup_rec(p, cache)),
931 Opcode::MapMax(p) => Opcode::MapMax(dedup_rec(p, cache)),
932 Opcode::MapFlatten(p) => Opcode::MapFlatten(dedup_rec(p, cache)),
933 Opcode::MapFirst(p) => Opcode::MapFirst(dedup_rec(p, cache)),
934 Opcode::MapLast(p) => Opcode::MapLast(dedup_rec(p, cache)),
935 Opcode::FilterLast { pred } => Opcode::FilterLast { pred: dedup_rec(pred, cache) },
936 Opcode::FilterTakeWhile { pred, stop } => Opcode::FilterTakeWhile {
937 pred: dedup_rec(pred, cache),
938 stop: dedup_rec(stop, cache),
939 },
940 Opcode::FindFirst(p) => Opcode::FindFirst(dedup_rec(p, cache)),
941 Opcode::FindOne(p) => Opcode::FindOne(dedup_rec(p, cache)),
942 Opcode::DynIndex(p) => Opcode::DynIndex(dedup_rec(p, cache)),
943 Opcode::FilterMap { pred, map } => Opcode::FilterMap {
944 pred: dedup_rec(pred, cache),
945 map: dedup_rec(map, cache),
946 },
947 Opcode::FilterMapSum { pred, map } => Opcode::FilterMapSum {
948 pred: dedup_rec(pred, cache),
949 map: dedup_rec(map, cache),
950 },
951 Opcode::FilterMapAvg { pred, map } => Opcode::FilterMapAvg {
952 pred: dedup_rec(pred, cache),
953 map: dedup_rec(map, cache),
954 },
955 Opcode::FilterMapFirst { pred, map } => Opcode::FilterMapFirst {
956 pred: dedup_rec(pred, cache),
957 map: dedup_rec(map, cache),
958 },
959 Opcode::FilterMapMin { pred, map } => Opcode::FilterMapMin {
960 pred: dedup_rec(pred, cache),
961 map: dedup_rec(map, cache),
962 },
963 Opcode::FilterMapMax { pred, map } => Opcode::FilterMapMax {
964 pred: dedup_rec(pred, cache),
965 map: dedup_rec(map, cache),
966 },
967 Opcode::MapFilter { map, pred } => Opcode::MapFilter {
968 map: dedup_rec(map, cache),
969 pred: dedup_rec(pred, cache),
970 },
971 Opcode::FilterFilter { p1, p2 } => Opcode::FilterFilter {
972 p1: dedup_rec(p1, cache),
973 p2: dedup_rec(p2, cache),
974 },
975 Opcode::MapMap { f1, f2 } => Opcode::MapMap {
976 f1: dedup_rec(f1, cache),
977 f2: dedup_rec(f2, cache),
978 },
979 Opcode::LetExpr { name, body } => Opcode::LetExpr {
980 name: name.clone(),
981 body: dedup_rec(body, cache),
982 },
983 Opcode::CallMethod(c) => Opcode::CallMethod(rewrite_call(c, cache)),
984 Opcode::CallOptMethod(c) => Opcode::CallOptMethod(rewrite_call(c, cache)),
985 Opcode::MakeArr(progs) => {
986 let new_progs: Vec<Arc<Program>> = progs.iter().map(|p| dedup_rec(p, cache)).collect();
987 Opcode::MakeArr(new_progs.into())
988 }
989 Opcode::MakeObj(entries) => {
990 let new_entries: Vec<CompiledObjEntry> = entries.iter().map(|e| match e {
991 CompiledObjEntry::Short { name, ic } =>
992 CompiledObjEntry::Short { name: name.clone(), ic: ic.clone() },
993 CompiledObjEntry::Kv { key, prog, optional, cond } => CompiledObjEntry::Kv {
994 key: key.clone(),
995 prog: dedup_rec(prog, cache),
996 optional: *optional,
997 cond: cond.as_ref().map(|c| dedup_rec(c, cache)),
998 },
999 CompiledObjEntry::KvPath { key, steps, optional, ics } => CompiledObjEntry::KvPath {
1000 key: key.clone(),
1001 steps: steps.clone(),
1002 optional: *optional,
1003 ics: ics.clone(),
1004 },
1005 CompiledObjEntry::Dynamic { key, val } => CompiledObjEntry::Dynamic {
1006 key: dedup_rec(key, cache),
1007 val: dedup_rec(val, cache),
1008 },
1009 CompiledObjEntry::Spread(p) => CompiledObjEntry::Spread(dedup_rec(p, cache)),
1010 CompiledObjEntry::SpreadDeep(p) => CompiledObjEntry::SpreadDeep(dedup_rec(p, cache)),
1011 }).collect();
1012 Opcode::MakeObj(new_entries.into())
1013 }
1014 Opcode::FString(parts) => {
1015 let new_parts: Vec<CompiledFSPart> = parts.iter().map(|p| match p {
1016 CompiledFSPart::Lit(s) => CompiledFSPart::Lit(s.clone()),
1017 CompiledFSPart::Interp { prog, fmt } => CompiledFSPart::Interp {
1018 prog: dedup_rec(prog, cache),
1019 fmt: fmt.clone(),
1020 },
1021 }).collect();
1022 Opcode::FString(new_parts.into())
1023 }
1024 Opcode::ListComp(spec) => {
1025 let new_spec = CompSpec {
1026 expr: dedup_rec(&spec.expr, cache),
1027 iter: dedup_rec(&spec.iter, cache),
1028 cond: spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
1029 vars: spec.vars.clone(),
1030 };
1031 Opcode::ListComp(Arc::new(new_spec))
1032 }
1033 Opcode::SetComp(spec) => {
1034 let new_spec = CompSpec {
1035 expr: dedup_rec(&spec.expr, cache),
1036 iter: dedup_rec(&spec.iter, cache),
1037 cond: spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
1038 vars: spec.vars.clone(),
1039 };
1040 Opcode::SetComp(Arc::new(new_spec))
1041 }
1042 Opcode::DictComp(spec) => {
1043 let new_spec = DictCompSpec {
1044 key: dedup_rec(&spec.key, cache),
1045 val: dedup_rec(&spec.val, cache),
1046 iter: dedup_rec(&spec.iter, cache),
1047 cond: spec.cond.as_ref().map(|c| dedup_rec(c, cache)),
1048 vars: spec.vars.clone(),
1049 };
1050 Opcode::DictComp(Arc::new(new_spec))
1051 }
1052 _ => op.clone(),
1053 }
1054}
1055
1056fn rewrite_call(c: &Arc<super::vm::CompiledCall>, cache: &mut HashMap<u64, Arc<Program>>) -> Arc<super::vm::CompiledCall> {
1057 use super::vm::CompiledCall;
1058 let new_subs: Vec<Arc<Program>> = c.sub_progs.iter().map(|p| dedup_rec(p, cache)).collect();
1059 Arc::new(CompiledCall {
1060 method: c.method,
1061 name: c.name.clone(),
1062 sub_progs: new_subs.into(),
1063 orig_args: c.orig_args.clone(),
1064 })
1065}
1066
1067pub fn opcode_cost(op: &Opcode) -> u32 {
1072 match op {
1073 Opcode::PushNull | Opcode::PushBool(_) | Opcode::PushInt(_)
1074 | Opcode::PushFloat(_) | Opcode::PushStr(_)
1075 | Opcode::PushRoot | Opcode::PushCurrent | Opcode::LoadIdent(_) => 1,
1076 Opcode::GetField(_) | Opcode::OptField(_) | Opcode::GetIndex(_)
1077 | Opcode::RootChain(_) | Opcode::FieldChain(_)
1078 | Opcode::GetPointer(_) => 2,
1079 Opcode::GetSlice(..) | Opcode::Descendant(_) => 5,
1080 Opcode::DescendAll => 20,
1081 Opcode::Not | Opcode::Neg | Opcode::SetCurrent
1082 | Opcode::BindVar(_) | Opcode::StoreVar(_)
1083 | Opcode::BindObjDestructure(_) | Opcode::BindArrDestructure(_) => 1,
1084 Opcode::Add | Opcode::Sub | Opcode::Mul | Opcode::Div | Opcode::Mod
1085 | Opcode::Eq | Opcode::Neq | Opcode::Lt | Opcode::Lte
1086 | Opcode::Gt | Opcode::Gte | Opcode::Fuzzy => 2,
1087 Opcode::KindCheck { .. } => 2,
1088 Opcode::AndOp(p) | Opcode::OrOp(p) | Opcode::CoalesceOp(p) => 2 + program_cost(p),
1089 Opcode::IfElse { then_, else_ } => 2 + program_cost(then_) + program_cost(else_),
1090 Opcode::InlineFilter(p) | Opcode::FilterCount(p)
1091 | Opcode::FindFirst(p) | Opcode::FindOne(p)
1092 | Opcode::MapSum(p) | Opcode::MapAvg(p)
1093 | Opcode::MapMin(p) | Opcode::MapMax(p)
1094 | Opcode::MapFlatten(p)
1095 | Opcode::MapFirst(p) | Opcode::MapLast(p)
1096 | Opcode::FilterLast { pred: p }
1097 | Opcode::DynIndex(p) => 10 + program_cost(p),
1098 Opcode::FilterTakeWhile { pred, stop } => 10 + program_cost(pred) + program_cost(stop),
1099 Opcode::FilterDropWhile { pred, drop } => 10 + program_cost(pred) + program_cost(drop),
1100 Opcode::MapUnique(p) => 15 + program_cost(p),
1101 Opcode::EquiJoin { rhs, .. } => 25 + program_cost(rhs),
1102 Opcode::FilterMap { pred, map }
1103 | Opcode::FilterMapSum { pred, map }
1104 | Opcode::FilterMapAvg { pred, map }
1105 | Opcode::FilterMapFirst { pred, map }
1106 | Opcode::FilterMapMin { pred, map }
1107 | Opcode::FilterMapMax { pred, map } => 10 + program_cost(pred) + program_cost(map),
1108 Opcode::MapFilter { map, pred } => 10 + program_cost(map) + program_cost(pred),
1109 Opcode::FilterFilter { p1, p2 } => 10 + program_cost(p1) + program_cost(p2),
1110 Opcode::MapMap { f1, f2 } => 10 + program_cost(f1) + program_cost(f2),
1111 Opcode::TopN { n, .. } => 15 + *n as u32,
1112 Opcode::CallMethod(c) | Opcode::CallOptMethod(c) => {
1113 let base = match c.method {
1114 BuiltinMethod::Filter | BuiltinMethod::Map | BuiltinMethod::FlatMap => 10,
1115 BuiltinMethod::Sort => 30,
1116 BuiltinMethod::GroupBy | BuiltinMethod::IndexBy => 25,
1117 BuiltinMethod::Len | BuiltinMethod::Count => 2,
1118 _ => 8,
1119 };
1120 base + c.sub_progs.iter().map(|p| program_cost(p)).sum::<u32>()
1121 }
1122 Opcode::MakeObj(entries) => {
1123 use super::vm::CompiledObjEntry;
1124 entries.iter().map(|e| match e {
1125 CompiledObjEntry::Short { .. } => 2,
1126 CompiledObjEntry::Kv { prog, cond, .. } =>
1127 2 + program_cost(prog) + cond.as_ref().map_or(0, |c| program_cost(c)),
1128 CompiledObjEntry::KvPath { steps, .. } => 2 + steps.len() as u32,
1129 CompiledObjEntry::Dynamic { key, val } => 3 + program_cost(key) + program_cost(val),
1130 CompiledObjEntry::Spread(p) => 5 + program_cost(p),
1131 CompiledObjEntry::SpreadDeep(p) => 8 + program_cost(p),
1132 }).sum()
1133 }
1134 Opcode::MakeArr(progs) => progs.iter().map(|p| 1 + program_cost(p)).sum(),
1135 Opcode::FString(parts) => {
1136 use super::vm::CompiledFSPart;
1137 parts.iter().map(|p| match p {
1138 CompiledFSPart::Lit(_) => 1,
1139 CompiledFSPart::Interp { prog, .. } => 3 + program_cost(prog),
1140 }).sum()
1141 }
1142 Opcode::ListComp(s) | Opcode::SetComp(s) =>
1143 15 + program_cost(&s.expr) + program_cost(&s.iter)
1144 + s.cond.as_ref().map_or(0, |c| program_cost(c)),
1145 Opcode::DictComp(s) =>
1146 15 + program_cost(&s.key) + program_cost(&s.val) + program_cost(&s.iter)
1147 + s.cond.as_ref().map_or(0, |c| program_cost(c)),
1148 Opcode::LetExpr { body, .. } => 2 + program_cost(body),
1149 Opcode::Quantifier(_) => 2,
1150 Opcode::CastOp(_) => 2,
1151 Opcode::PatchEval(_) => 50,
1152 Opcode::MapFieldSum(_) | Opcode::MapFieldAvg(_)
1153 | Opcode::MapFieldMin(_) | Opcode::MapFieldMax(_)
1154 | Opcode::MapField(_) => 5,
1155 Opcode::MapFieldChain(ks) => 5 + ks.len() as u32 * 2,
1156 Opcode::MapFieldChainUnique(ks) => 8 + ks.len() as u32 * 2,
1157 Opcode::MapFieldUnique(_) => 8,
1158 Opcode::FlatMapChain(ks) => 5 + ks.len() as u32 * 3,
1159 Opcode::FilterFieldEqLit(_, _) | Opcode::FilterFieldCmpLit(_, _, _)
1160 | Opcode::FilterCurrentCmpLit(_, _)
1161 | Opcode::FilterStrVecStartsWith(_)
1162 | Opcode::FilterStrVecEndsWith(_)
1163 | Opcode::FilterStrVecContains(_)
1164 | Opcode::MapStrVecUpper
1165 | Opcode::MapStrVecLower
1166 | Opcode::MapStrVecTrim
1167 | Opcode::MapNumVecArith { .. }
1168 | Opcode::MapNumVecNeg
1169 | Opcode::FilterFieldCmpField(_, _, _) => 5,
1170 Opcode::FilterFieldEqLitMapField(_, _, _)
1171 | Opcode::FilterFieldCmpLitMapField(_, _, _, _) => 6,
1172 Opcode::FilterFieldEqLitCount(_, _) | Opcode::FilterFieldCmpLitCount(_, _, _)
1173 | Opcode::FilterFieldCmpFieldCount(_, _, _) => 4,
1174 Opcode::FilterFieldsAllEqLitCount(pairs) => 4 + pairs.len() as u32 * 2,
1175 Opcode::FilterFieldsAllCmpLitCount(triples) => 4 + triples.len() as u32 * 2,
1176 Opcode::GroupByField(_) => 15,
1177 Opcode::CountByField(_) => 12,
1178 Opcode::UniqueByField(_) => 14,
1179 Opcode::UniqueCount => 6,
1180 Opcode::ArgExtreme { .. } => 12,
1181 Opcode::MapToJsonJoin { .. } => 8,
1182 Opcode::StrTrimUpper | Opcode::StrTrimLower
1183 | Opcode::StrUpperTrim | Opcode::StrLowerTrim => 2,
1184 Opcode::StrSplitReverseJoin { .. } => 4,
1185 Opcode::MapReplaceLit { .. } => 10,
1186 Opcode::MapUpperReplaceLit { .. } | Opcode::MapLowerReplaceLit { .. } => 11,
1187 Opcode::MapStrConcat { .. } => 6,
1188 Opcode::MapSplitLenSum { .. } => 4,
1189 Opcode::MapProject { .. } => 7,
1190 Opcode::MapStrSlice { .. } => 4,
1191 Opcode::MapFString(_) => 8,
1192 Opcode::MapSplitCount { .. } => 3,
1193 Opcode::MapSplitFirst { .. } => 3,
1194 Opcode::MapSplitNth { .. } => 3,
1195 Opcode::MapSplitCountSum { .. } => 3,
1196 }
1197}
1198
1199pub fn program_cost(program: &Program) -> u32 {
1201 program.ops.iter().map(opcode_cost).sum()
1202}
1203
1204#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1208pub enum Monotonicity {
1209 Unknown,
1211 Asc,
1213 Desc,
1215 NotArray,
1217}
1218
1219impl Monotonicity {
1220 pub fn after(self, op: &Opcode) -> Monotonicity {
1221 match op {
1222 Opcode::CallMethod(c) if c.sub_progs.is_empty() => match c.method {
1223 BuiltinMethod::Sort => Monotonicity::Asc,
1224 BuiltinMethod::Reverse => match self {
1225 Monotonicity::Asc => Monotonicity::Desc,
1226 Monotonicity::Desc => Monotonicity::Asc,
1227 x => x,
1228 },
1229 BuiltinMethod::Filter => self, BuiltinMethod::Map => Monotonicity::Unknown, _ => Monotonicity::Unknown,
1232 }
1233 Opcode::TopN { asc, .. } => if *asc { Monotonicity::Asc } else { Monotonicity::Desc },
1234 Opcode::MakeArr(_) | Opcode::ListComp(_) => Monotonicity::Unknown,
1235 _ => self,
1236 }
1237 }
1238}
1239
1240pub fn infer_monotonicity(program: &Program) -> Monotonicity {
1242 let mut m = Monotonicity::Unknown;
1243 for op in program.ops.iter() { m = m.after(op); }
1244 m
1245}
1246
1247pub fn escapes_doc(program: &Program) -> bool {
1256 for op in program.ops.iter() {
1257 match op {
1258 Opcode::PushRoot | Opcode::PushCurrent | Opcode::RootChain(_)
1259 | Opcode::GetField(_) | Opcode::GetIndex(_) | Opcode::GetSlice(..)
1260 | Opcode::Descendant(_) | Opcode::DescendAll
1261 | Opcode::GetPointer(_) | Opcode::OptField(_) => return true,
1262 Opcode::CallMethod(c) | Opcode::CallOptMethod(c)
1263 if c.sub_progs.iter().any(|p| escapes_doc(p)) => return true,
1264 _ => {}
1265 }
1266 }
1267 false
1268}
1269
1270pub fn selectivity_score(expr: &super::ast::Expr) -> u32 {
1276 use super::ast::{Expr, BinOp};
1277 match expr {
1278 Expr::Bool(true) => 1000, Expr::Bool(false) => 0, Expr::BinOp(_, BinOp::Eq, _) => 1,
1281 Expr::BinOp(_, BinOp::Neq, _) => 5,
1282 Expr::BinOp(_, BinOp::Lt, _) | Expr::BinOp(_, BinOp::Gt, _)
1283 | Expr::BinOp(_, BinOp::Lte, _) | Expr::BinOp(_, BinOp::Gte, _) => 3,
1284 Expr::BinOp(_, BinOp::Fuzzy, _) => 2,
1285 Expr::BinOp(l, BinOp::And, r) =>
1286 selectivity_score(l).min(selectivity_score(r)),
1287 Expr::BinOp(l, BinOp::Or, r) =>
1288 selectivity_score(l) + selectivity_score(r),
1289 Expr::Not(e) => 10u32.saturating_sub(selectivity_score(e)),
1290 Expr::Kind { .. } => 2,
1291 _ => 5,
1292 }
1293}
1294
1295pub fn fold_kind_check(val_ty: VType, target: KindType, negate: bool) -> Option<bool> {
1300 let matches = match (val_ty, target) {
1301 (VType::Null, KindType::Null) => true,
1302 (VType::Bool, KindType::Bool) => true,
1303 (VType::Int | VType::Float | VType::Num, KindType::Number) => true,
1304 (VType::Str, KindType::Str) => true,
1305 (VType::Arr, KindType::Array) => true,
1306 (VType::Obj, KindType::Object) => true,
1307 (VType::Unknown, _) => return None,
1308 _ => false,
1309 };
1310 Some(if negate { !matches } else { matches })
1311}