binaryen-sys 0.13.0

Bindings to the binaryen library
Documentation
/*
 * Copyright 2023 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "stringify-walker.h"

namespace wasm {

size_t StringifyHasher::operator()(Expression* curr) const {
  if (Properties::isControlFlowStructure(curr)) {
    if (auto* iff = curr->dynCast<If>()) {
      size_t digest = wasm::hash(iff->_id);
      rehash(digest, ExpressionAnalyzer::hash(iff->ifTrue));
      if (iff->ifFalse) {
        rehash(digest, ExpressionAnalyzer::hash(iff->ifFalse));
      }
      return digest;
    }

    return ExpressionAnalyzer::hash(curr);
  }

  return ExpressionAnalyzer::shallowHash(curr);
}

bool StringifyEquator::operator()(Expression* lhs, Expression* rhs) const {
  if (Properties::isControlFlowStructure(lhs) &&
      Properties::isControlFlowStructure(rhs)) {
    auto* iffl = lhs->dynCast<If>();
    auto* iffr = rhs->dynCast<If>();

    if (iffl && iffr) {
      return ExpressionAnalyzer::equal(iffl->ifTrue, iffr->ifTrue) &&
             ExpressionAnalyzer::equal(iffl->ifFalse, iffr->ifFalse);
    }

    return ExpressionAnalyzer::equal(lhs, rhs);
  }

  return ExpressionAnalyzer::shallowEqual(lhs, rhs);
}

void HashStringifyWalker::addUniqueSymbol(SeparatorReason reason) {
  // Use a negative value to distinguish symbols for separators from symbols
  // for Expressions
  assert((uint32_t)nextSeparatorVal >= nextVal);
  if (auto funcStart = reason.getFuncStart()) {
    idxToFuncName.insert({hashString.size(), funcStart->func->name});
  }
  hashString.push_back((uint32_t)nextSeparatorVal);
  nextSeparatorVal--;
  exprs.push_back(nullptr);
}

void HashStringifyWalker::visitExpression(Expression* curr) {
  auto [it, inserted] = exprToCounter.insert({curr, nextVal});
  hashString.push_back(it->second);
  exprs.push_back(curr);
  if (inserted) {
    nextVal++;
  }
}

std::pair<uint32_t, Name>
HashStringifyWalker::makeRelative(uint32_t idx) const {
  // The upper_bound function returns an iterator to the first value in the set
  // that is true for idx < value. We subtract one from this returned value to
  // tell us which function actually contains the the idx.
  auto [funcIdx, func] = *--idxToFuncName.upper_bound(idx);
  return {idx - funcIdx, func};
}

std::vector<SuffixTree::RepeatedSubstring>
StringifyProcessor::repeatSubstrings(std::vector<uint32_t>& hashString) {
  SuffixTree st(hashString);
  std::vector<SuffixTree::RepeatedSubstring> substrings(st.begin(), st.end());
  for (auto& substring : substrings) {
    // Sort by increasing start index to ensure determinism.
    std::sort(substring.StartIndices.begin(), substring.StartIndices.end());
  }
  // Substrings are sorted so that the longest substring that repeats the most
  // times is ordered first. This is done so that we can assume the most
  // worthwhile substrings to outline come first.
  std::sort(
    substrings.begin(),
    substrings.end(),
    [](SuffixTree::RepeatedSubstring a, SuffixTree::RepeatedSubstring b) {
      size_t aWeight = a.Length * a.StartIndices.size();
      size_t bWeight = b.Length * b.StartIndices.size();
      if (aWeight == bWeight) {
        return a.StartIndices[0] < b.StartIndices[0];
      }
      return aWeight > bWeight;
    });
  return substrings;
}

// Deduplicate substrings by iterating through the list of substrings, keeping
// only those whose list of end indices is disjoint from the set of end indices
// for all substrings kept so far. Substrings that are contained within other
// substrings will always share an end index with those other substrings. Note
// that this deduplication may be over-aggressive, since it will remove
// substrings that are contained within any previous substring, even if they
// have many other occurrences that are not inside other substrings. Part of the
// reason dedupe can be so aggressive is an assumption 1) that the input
// substrings have been sorted so that the longest substrings with the most
// repeats come first and 2) these are more worthwhile to keep than subsequent
// substrings of substrings, even if they appear more times.
std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::dedupe(
  const std::vector<SuffixTree::RepeatedSubstring>& substrings) {
  std::unordered_set<uint32_t> seen;
  std::vector<SuffixTree::RepeatedSubstring> result;
  for (auto substring : substrings) {
    std::vector<uint32_t> idxToInsert;
    bool seenEndIdx = false;
    for (auto startIdx : substring.StartIndices) {
      // We are using the end index to ensure that each repeated substring
      // reported by the SuffixTree is unique. This is because LLVM's SuffixTree
      // reports back repeat sequences that are substrings of longer repeat
      // sequences with the same endIdx, and we generally prefer to outline
      // longer repeat sequences.
      uint32_t endIdx = substring.Length + startIdx;
      if (seen.count(endIdx)) {
        seenEndIdx = true;
        break;
      }
      idxToInsert.push_back(endIdx);
    }
    if (!seenEndIdx) {
      seen.insert(idxToInsert.begin(), idxToInsert.end());
      result.push_back(substring);
    }
  }

  return result;
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filter(
  const std::vector<SuffixTree::RepeatedSubstring>& substrings,
  const std::vector<Expression*>& exprs,
  std::function<bool(const Expression*)> condition) {

  struct FilterStringifyWalker : public StringifyWalker<FilterStringifyWalker> {
    bool hasFilterValue = false;
    std::function<bool(const Expression*)> condition;

    FilterStringifyWalker(std::function<bool(const Expression*)> condition)
      : condition(condition){};

    void walk(Expression* curr) {
      hasFilterValue = false;
      Super::walk(curr);
    }

    void addUniqueSymbol(SeparatorReason reason) {}

    void visitExpression(Expression* curr) {
      if (condition(curr)) {
        hasFilterValue = true;
      }
    }
  };

  FilterStringifyWalker walker(condition);

  std::vector<SuffixTree::RepeatedSubstring> result;
  for (auto substring : substrings) {
    bool hasFilterValue = false;
    for (auto idx = substring.StartIndices[0],
              endIdx = substring.StartIndices[0] + substring.Length;
         idx < endIdx;
         idx++) {
      Expression* curr = exprs[idx];
      if (Properties::isControlFlowStructure(curr)) {
        walker.walk(curr);
        if (walker.hasFilterValue) {
          hasFilterValue = true;
          break;
        }
      }
      if (condition(curr)) {
        hasFilterValue = true;
        break;
      }
    }
    if (!hasFilterValue) {
      result.push_back(substring);
    }
  }

  return result;
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalSets(
  const std::vector<SuffixTree::RepeatedSubstring>& substrings,
  const std::vector<Expression*>& exprs) {
  return StringifyProcessor::filter(
    substrings, exprs, [](const Expression* curr) {
      return curr->is<LocalSet>();
    });
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterLocalGets(
  const std::vector<SuffixTree::RepeatedSubstring>& substrings,
  const std::vector<Expression*>& exprs) {
  return StringifyProcessor::filter(
    substrings, exprs, [](const Expression* curr) {
      return curr->is<LocalGet>();
    });
}

std::vector<SuffixTree::RepeatedSubstring> StringifyProcessor::filterBranches(
  const std::vector<SuffixTree::RepeatedSubstring>& substrings,
  const std::vector<Expression*>& exprs) {
  return StringifyProcessor::filter(
    substrings, exprs, [](const Expression* curr) {
      return Properties::isBranch(curr) || curr->is<Return>();
    });
}

} // namespace wasm