v8 147.3.0 - Docs.rs

# Copyright 2025 The Chromium Authors
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

from __future__ import annotations

import collections
import contextlib
import dataclasses
import enum
import functools
import itertools
import pathlib
import re

# Almost every sysroot just uses #include <>, but fuchsia uses #include ""
# sometimes.
_INCLUDES = re.compile(r'#\s*(?:include|import)(_next)?\s*["<]([^>"]+)[>"]')


class CompileStatus(enum.Enum):
  NotCompiled = 1
  Success = 2
  Failure = 3


class IncludeDir(enum.Enum):
  # Ordered by include order for clang
  LibCxx = 1
  Builtin = 2
  SysrootModule = 3
  Sysroot = 4
  Framework = 5

  def __lt__(self, other):
    return self.value < other.value


@dataclasses.dataclass
class Header:
  include_dir: IncludeDir
  # The string to use in your #include statement to get this file.
  rel: str
  # The absolute path to the file
  # This should be filled by the time the compiler finishes.
  abs: pathlib.Path = None
  # Prev and next come from include_next / include_prev
  prev: None | Header = None
  next: None | Header = None
  root_module: None | str = None
  textual: bool = False
  umbrella: bool = False
  compile_status: CompileStatus = CompileStatus.NotCompiled

  deps: list[Header] = dataclasses.field(default_factory=list)
  direct_deps: set[Header] = dataclasses.field(default_factory=set)
  # Here, None means no exports, and the empty list means 'export *'
  # We default to exporting all to preserve the behaviour of includes.
  exports: None | list[str] = dataclasses.field(default_factory=list)

  # Kwargs that will end up on the BUILD.gn targets.
  kwargs: dict[str, list[str]] = dataclasses.field(
      default_factory=lambda: collections.defaultdict(list))

  def __hash__(self):
    return hash((self.include_dir, self.rel))

  def __eq__(self, other):
    if isinstance(other, Header):
      return (self.include_dir, self.rel) == (other.include_dir, other.rel)
    else:
      # This allows you to write (Sysroot, 'foo.h') in set[Header]
      return (self.include_dir, self.rel) == other

  def __lt__(self, other):
    return (self.include_dir, self.rel) < (other.include_dir, other.rel)

  @property
  def pretty_name(self):
    return f'{self.include_dir.name}/{self.rel}'

  def __repr__(self):
    return self.pretty_name

  @property
  def submodule_name(self):
    return self.rel.replace('.', '_').replace('/', '_').replace('-', '_')

  @functools.cached_property
  def content(self) -> str:
    return self.abs.read_text(errors='ignore')

  def calculate_direct_deps(self, includes: dict[str, Header],
                            sysroot: pathlib.Path) -> set[Header]:

    direct = set()
    found_includes = _INCLUDES.findall(self.content)

    def find_include(is_next, include) -> bool:
      header = None
      first = includes.get(include, None)
      if first is not None:
        # When modules are enabled, #include_next<foo.h> from any file other
        # than foo.h is treated the same as #include <foo.h>.
        if not is_next or (is_next and self.rel != include):
          header = first
        elif self.next is not None:
          header = self.next

        # It might have been conditionally included.
        if header is not None and header in self.deps:
          direct.add(header)
          return True
      return False

    for is_next, include in found_includes:
      if not find_include(is_next, include):
        # This is required, because, for example, libcxx's threading includes
        # pthread.h, but the include scanner sees pthread/pthread.h (the
        # symlink target).
        with contextlib.suppress(OSError, FileNotFoundError):
          find_include(is_next, str((sysroot / include).readlink()))

    return direct

  @functools.cache
  def _required_deps(self) -> tuple[set[Header], set[Header]]:
    nontextual = set()
    textual = set()
    todo = [self]
    while todo:
      hdr = todo.pop()
      for dep in hdr.direct_deps:
        if dep.textual and dep not in textual:
          todo.append(dep)
          textual.add(dep)
        elif not dep.textual:
          nontextual.add(dep)
    return nontextual, textual

  @property
  def required_deps(self) -> set[Header]:
    """The header files required to be built before we can build."""
    return self._required_deps()[0]

  @property
  def required_textual_deps(self) -> set[Header]:
    """The textual header files we directly include.

    This includes textual headers included via other textual headers"""
    return self._required_deps()[1]

  def find_loop(self) -> list[Header] | None:
    """Finds a loop of #includes, if it exists."""
    chain = [self]
    has_chain = True
    while has_chain:
      has_chain = False
      if self in chain[-1].direct_deps:
        return chain + [self]
      for dep in chain[-1].direct_deps:
        if dep not in chain and self in dep.deps:
          chain.append(dep)
          has_chain = True
          break
    # It shouldn't be possible to have a node that has you as a transitive
    # dep without having a dep that has you as a transitive dep.
    assert len(chain) == 1


def calculate_rdeps(headers: list[Header]) -> dict[Header, list[Header]]:
  """Calculates a reverse dependency graph"""
  rdeps = collections.defaultdict(list)
  for header in headers:
    for dep in header.deps:
      rdeps[dep].append(header)
  return rdeps


def all_headers(graph: dict[str, Header]):
  """Iterates through all headers in a graph."""
  for header in graph.values():
    while header is not None:
      yield header
      header = header.next


@dataclasses.dataclass
class Target:
  """Represents a single clang module / gn target."""
  include_dir: IncludeDir
  name: str
  headers: list[Header] = dataclasses.field(default_factory=list)

  def __lt__(self, other):
    return self.name < other.name

  def __eq__(self, other):
    return self.name == other.name

  def __hash__(self):
    return hash(self.name)

  @property
  def kwargs(self) -> dict[str, set[str]]:
    """The kwargs associated with a build target.

    eg. if you have kwargs = {"defines": ["FOO"]}, then it outputs:

    target_type(target.name) {
      defines = ["FOO"]
    }
    """
    kwargs = collections.defaultdict(set)
    for header in self.headers:
      for single in header.group:
        for dep in {single} | single.required_textual_deps:
          for k, v in dep.kwargs.items():
            kwargs[k].update(v)
    return kwargs

  @property
  def header_deps(self) -> set[Header]:
    direct_deps = set()
    for hdr in self.headers:
      direct_deps.update(hdr.required_deps)
    return direct_deps

  @property
  def public_deps(self) -> list[str]:
    return sorted(
        set([
            hdr.root_module for hdr in self.header_deps
            if hdr.root_module is not None and hdr.root_module != self.name
        ]))


def run_build(graph: dict[str, Header]) -> list[Target]:
  """Calculates the correct way to run a build."""
  unbuilt_modules: dict[str, list[Header]] = collections.defaultdict(list)
  unbuilt_headers: set[Header] = set()

  for header in all_headers(graph):
    if not header.textual:
      if header.root_module is None:
        unbuilt_headers.add(header)
      else:
        unbuilt_modules[header.root_module].append(header)
    header.rdeps = set()
    # A group is a set of headers that must be compiled together, because they
    # form a dependency loop.
    header.group = [header]
    header.mod_deps = set()
    header.unbuilt_deps = set(
        dep for dep in header.required_deps
        # You don't need to wait for a dependency within the same module.
        if (header.root_module is None or header.root_module != dep.root_module)
        and dep != header)

  # Perform a union find to find all dependency loops.
  # Since we can easily tell if a given edge represents a dependency loop, we
  # simply union together all pairs of nodes on loop edges.
  # We perform a simple form of union find where we don't bother with rank or
  # size, and only do path compression (always on the lexicographically first
  # header). It's slower but still plenty fast and simplifies things.
  parents = {}

  def find(header):
    if header in parents:
      # Optimization: path compression
      parents[header] = find(parents[header])
      return parents[header]
    else:
      return header

  for header in sorted(unbuilt_headers):
    for dep in header.required_deps:
      if dep > header and header in dep.deps:
        assert header.include_dir == IncludeDir.Sysroot and dep.include_dir == IncludeDir.Sysroot, (
            header, dep)
        # Perform the 'union' operation.
        x, y = sorted([find(header), find(dep)])
        if x != y:
          parents[y] = x

  loops = collections.defaultdict(list)
  for header in unbuilt_headers:
    loops[find(header)].append(header)

  for headers in loops.values():
    # Not a loop
    if len(headers) == 1:
      continue
    headers.sort()
    headers[0].group = headers
    headers[0].unbuilt_deps = set.union(
        *[header.unbuilt_deps for header in headers]) - set(headers)
    for header in headers[1:]:
      unbuilt_headers.remove(header)

  for header in all_headers(graph):
    for dep in header.unbuilt_deps:
      dep.rdeps.add(header)

  build_gn = []

  for i in itertools.count():
    # Try and build any buildable modules from modulemaps.
    while True:
      n_remaining = len(unbuilt_modules)
      for mod, headers in list(unbuilt_modules.items()):
        if not any(header.unbuilt_deps for header in headers):
          build_gn.append(
              Target(
                  include_dir=headers[0].include_dir,
                  name=mod,
                  headers=sorted(headers),
              ))
          del unbuilt_modules[mod]
          for header in headers:
            for rdep in header.rdeps:
              rdep.mod_deps.add(header.root_module)
              rdep.unbuilt_deps.remove(header)

      if n_remaining == len(unbuilt_modules):
        break

    # Try and build any builtable sysroot modules
    sysroot_mod = f'sys_stage{i + 1}'
    build_gn.append(Target(
        include_dir=IncludeDir.Sysroot,
        name=sysroot_mod,
    ))
    while True:
      n_remaining = len(unbuilt_headers)
      for header in list(unbuilt_headers):
        if not header.unbuilt_deps:
          build_gn[-1].headers.append(header)
          unbuilt_headers.remove(header)
          for header in header.group:
            header.root_module = sysroot_mod
            for rdep in header.rdeps:
              rdep.mod_deps.add(header.root_module)
              rdep.unbuilt_deps.remove(header)

      if n_remaining == len(unbuilt_headers):
        break

    build_gn[-1].headers.sort()
    if not build_gn[-1].headers:
      break

  build_gn.pop()

  if not unbuilt_modules and not unbuilt_headers:
    # Success. Everything is built
    return build_gn
  else:
    print(
        "Dependency loop in sysroot. You probably want to make one of them textual."
    )
    print("The following headers are in a dependency loop:")
    seen = set()
    for header in unbuilt_headers:
      if header not in seen:
        chain = header.find_loop()
        if chain is not None:
          print(' -> '.join([header.pretty_name for header in chain]))
          seen.update(chain)

    # If you get to this point, you probably want a debugger to help understand what the problem is.
    breakpoint()
    exit(1)