megenginelite-sys 1.8.2

/**
 * \file src/core/test/graph/defrag.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

#include "megbrain/comp_node_env.h"
#include "megbrain/opr/basic_arith_wrapper.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/tensor_manip.h"
#include "megbrain/opr/utility.h"

#include "megbrain/test/helper.h"

using namespace mgb;

#if MGB_CUDA && MGB_ENABLE_EXCEPTION
// defrag only works when exception is enabled

namespace {
void run_graph(size_t mem_reserved, bool enable_defrag) {
    CompNode::try_coalesce_all_free_memory();
    CompNode::finalize();
    auto cn = CompNode::load("gpux");
    cn.sync();  // wait for async init to finish
    size_t size = mem_reserved / (12.1 * 4);

    HostTensorND host_x{cn, dtype::Int32()};
    auto px = host_x.resize({size}).ptr<dt_int32>();
    RNGxorshf rng{next_rand_seed()};
    dt_int32 expect = 0;
    for (size_t i = 0; i < size; ++i) {
        expect += (px[i] = rng());
    }
    expect *= 7;

    auto dev_x = std::make_shared<DeviceTensorND>();
    dev_x->copy_from(host_x);

    auto graph = ComputingGraph::make();
    graph->options().enable_var_mem_defragment = enable_defrag;
    graph->options().force_dynamic_alloc = true;
    graph->options().graph_opt_level = 0;
    graph->options().var_sanity_check_first_run = false;

    auto x0 = opr::SharedDeviceTensor::make(*graph, dev_x).rename("x0"),
         // x1 has rdonly fwd chain
            x1 = opr::Concat::make({x0, x0}, 0)
                         .add_axis(0)
                         .reshape({size * 2})
                         .rename("x1"),
         x2 = opr::Concat::make({x1, x0}, 0).rename("x2"),
         x3 = opr::Concat::make({x2, x0}, 0).rename("x3"),
         x4 = opr::Concat::make({x3, x0}, 0).rename("x4"),
         y0 = opr::reduce_sum(x1, x1.make_scalar(1)).rename("y0"),
         y1 = opr::reduce_sum(x4, x4.make_scalar(1)).rename("y1"),
         y = opr::add(y0, y1, {cn});

    set_priority(y0, 100);  // y0 executes after defrag

    HostTensorND host_y;
    auto func = graph->compile({make_callback_copy(y, host_y)});
    func->execute();
    ASSERT_EQ(expect, host_y.ptr<dt_int32>()[0]);

#if 0
    auto show = [](SymbolVar var) {
        auto size = var.node()->shape().total_nr_elems() * 4;
        const void* begin = var.node()->prev_dev_ptr(),
              *end = static_cast<const dt_byte*>(begin) + size;
        return ssprintf("[%p,%p]%.2fMiB", begin, end, size / 1024.0 / 1024);
    };
    printf("x0=%s\nx1=%s\nx2=%s\nx3=%s\nx4=%s\n",
            show(x0).c_str(),
            show(x1).c_str(),
            show(x2).c_str(),
            show(x3).c_str(),
            show(x4).c_str()
            );
#endif
}
}  // anonymous namespace

TEST(TestGraph, Defragment) {
    REQUIRE_GPU(1);
    CompNode::load("gpux").activate();
    size_t reserve;
    {
        size_t free, tot;
        MGB_CUDA_CHECK(cudaMemGetInfo(&free, &tot));
        reserve = free * 0.92;
    }
    auto reserve_setting = ssprintf("b:%zu", reserve);

    auto do_run = [reserve]() {
        ASSERT_THROW(run_graph(reserve, false), MemAllocError);
        run_graph(reserve, true);
    };

    // reserve memory explicitly to avoid uncontrollable factors
    constexpr const char* KEY = "MGB_CUDA_RESERVE_MEMORY";
    auto old_value = getenv(KEY);
    setenv(KEY, reserve_setting.c_str(), 1);
    MGB_TRY { do_run(); }
    MGB_FINALLY(
            if (old_value) { setenv(KEY, old_value, 1); } else {
                unsetenv(KEY);
            } CompNode::try_coalesce_all_free_memory();
            CompNode::finalize(););
}
#endif  // MGB_CUDA && MGB_ENABLE_EXCEPTION

// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}