mumu 0.10.0 - Docs.rs

{
  "id": "fn-gpu-reduce_sum",
  "dataComponent": "gpu",
  "heading": {
    "title": "reduce_sum",
    "badges": ["GPU"]
  },
  "synopsis": "Sums all elements of a 2D float32 tensor on the GPU, returning a single numeric Value. If GPU is not implemented, logs fallback CPU.",
  "codeBlocks": [
    "extend(\"gpu\")\\n\\nX = gpu:to_tensor([[1,2,3],[4,5,6]])\\nval = gpu:reduce_sum(X)\\nslog(val)\\n# => 21.0 (1+2+3+4+5+6)\\n\\n# If bridging doesn't have a real GPU kernel, logs fallback CPU path.\\n\\n# Use case => summing entire matrix for debugging, cost calculation, or normalizing data.\\n"
  ],
  "notes": [
    "gpu:reduce_sum => expects exactly one 2D float32 tensor. Returns a single float result (like 21.0).",
    "If bridging is incomplete, logs fallback CPU. For large shapes, a real GPU kernel can speed up summation significantly, if available.",
    "No partial usage is typically provided for reduce_sum; it requires exactly 1 argument."
  ]
}