1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
/**
* \file src/core/include/megbrain/tensor.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megbrain/common.h"
#include "megbrain/comp_node.h"
#include "megbrain/dtype.h"
#include "megbrain/utils/metahelper.h"
#include "megdnn/basic_types.h"
#include <limits>
#include <memory>
namespace mgb {
using ::megdnn::TensorFormat;
using ::megdnn::TensorLayout;
using ::megdnn::TensorShape;
using ::megdnn::TensorFormatArray;
using ::megdnn::TensorLayoutArray;
using ::megdnn::TensorShapeArray;
/*!
* \brief specify how a subtensor resides in a larger one
*/
class SubTensorSpec {
TensorLayout m_layout;
ptrdiff_t m_offset_elem = 0;
SubTensorSpec(const TensorLayout& l, ptrdiff_t o) : m_layout{l}, m_offset_elem{o} {}
public:
SubTensorSpec() = default;
//! make a SubTensorSpec from given layout and zero offset
static SubTensorSpec make_from_layout(const TensorLayout& layout) {
return make_from_offset_elem(layout, 0);
}
//! make a SubTensorSpec from given layout and offset
MGE_WIN_DECLSPEC_FUC static SubTensorSpec make_from_offset_elem(
const TensorLayout& layout, ptrdiff_t offset_elem);
//! get underlying layout
const TensorLayout& layout() const { return m_layout; }
//! get offset in number of logical elements in the layout
ptrdiff_t offset_elem() const { return m_offset_elem; }
//! get offset measured in bytes
ptrdiff_t offset_byte() const {
//! for lowbit cases, offset must aligned to bytes
mgb_assert(
!m_layout.dtype.is_low_bit() ||
!(m_offset_elem * m_layout.dtype.low_bit() % 8));
return m_layout.dtype.size(m_offset_elem);
}
/*!
* \brief merge with another SubTensorSpec: accum offset, and replace
* layout by rhs
*/
MGE_WIN_DECLSPEC_FUC void merge_with(const SubTensorSpec& rhs);
};
/*!
* \brief slice along some axis; index as in Python, with negative indices
* supported. Scalar index can also be represented as a Slice, where
* m_begin = idx, m_end = idx+1 and m_step = 1. The flag m_is_scalar_idx
* indicates whether the Slice comes from a scalar index.
*/
class Slice {
Maybe<ptrdiff_t> m_begin, m_end, m_step;
bool m_is_scalar_idx;
public:
Slice(Maybe<ptrdiff_t> begin = None, Maybe<ptrdiff_t> end = None,
Maybe<ptrdiff_t> step = None, bool is_scalar_idx = false)
: m_begin{begin},
m_end{end},
m_step{step},
m_is_scalar_idx{is_scalar_idx} {}
/*!
* \brief apply this slice on given tensor layout, and get corresponding
* subtensor
* \param axis the axis to apply this slice; -1 can be used for
* flattened layout
*/
MGE_WIN_DECLSPEC_FUC SubTensorSpec apply(TensorLayout layout, int axis) const;
};
template <class Trait>
class TensorStorage;
class DeviceTensorStorageTrait;
class HostTensorStorageTrait;
using HostTensorStorage = TensorStorage<HostTensorStorageTrait>;
using DeviceTensorStorage = TensorStorage<DeviceTensorStorageTrait>;
/*!
* \brief manager for raw tensor memory
*
* It contains no dtype information and all sizes are measured in bytes.
*
* Note that ensure_size() is lazy, and memory allocation only happens when
* ptr() or sub() is called
*/
template <class Trait>
class TensorStorage {
public:
using RawStorage = std::shared_ptr<dt_byte>;
TensorStorage() = default;
TensorStorage(CompNode comp_node) : m_comp_node(comp_node) {}
TensorStorage(TensorStorage&&) noexcept = default;
TensorStorage& operator=(TensorStorage&&) noexcept = default;
TensorStorage(const TensorStorage& rhs) { *this = rhs; }
MGE_WIN_DECLSPEC_FUC TensorStorage& operator=(const TensorStorage& rhs);
/*!
* \brief whether given tensor span is valid in this storage
*/
bool valid_span(const TensorLayout::Span& span) const {
return m_comp_node.valid() &&
static_cast<ptrdiff_t>(m_offset) + span.low_byte >= 0 &&
span.high_byte <= size();
}
/*!
* \brief ensure that its space could hold at least sz bytes
*
* Note
* 1. This method is lazy; size would only be changed when memory
* must be accessed.
* 2. This method would only grow storage, but it would not release
* memory
*/
MGE_WIN_DECLSPEC_FUC TensorStorage& ensure_size(size_t sz);
/*!
* \brief return a subtensor that shares the memory; the returned
* subtensor is not allowed to realloc
* \param offset offset given in bytes
*/
MGE_WIN_DECLSPEC_FUC TensorStorage sub(ptrdiff_t offset) const;
//! apply lazy resize and get ptr
dt_byte* ptr() const {
return const_cast<TensorStorage*>(this)->apply_lazy_and_get_ptr();
}
/*!
* \brief usable size in bytes until end of allocated block
*/
size_t size() const { return m_size; }
/*!
* \brief offset on allocated block in bytes
*/
size_t offset() const { return m_offset; }
//! get underlying comp node; error would be raised if it is invalid
CompNode comp_node() const {
check_comp_node_valid();
return m_comp_node;
}
//! get underlying comp node and allow it to be invalid
CompNode comp_node_allow_invalid() const { return m_comp_node; }
/*!
* \brief whether underlying comp_node is valid
*/
bool comp_node_valid() const { return m_comp_node.valid(); }
/*!
* \brief whether this tensor has no valid element (either due to
* reaching end of mem chunk or no mem allocated)
*/
bool empty() const { return !m_size; }
/*!
* \brief chain-style computing node setter
*
* note that if allow_mem_node_change is true and memory node is
* changed, the underlying data would be released and this tensor would
* become empty
*/
MGE_WIN_DECLSPEC_FUC TensorStorage& comp_node(
CompNode node, bool allow_mem_node_change = false);
/*!
* \brief copy from another TensorStorage, possibly of other storage
* type
*
* This storage must have been initialized
*
* \param size number of bytes to be copied; must not exceed size of
* this or src
*/
template <class RTrait>
MGE_WIN_DECLSPEC_FUC void copy_from(
const TensorStorage<RTrait>& src, size_t size) const;
/*!
* \brief reset the tensor storage to given memory area
*/
MGE_WIN_DECLSPEC_FUC void reset(CompNode node, size_t size, RawStorage data);
/*!
* \brief reset the tensor storage to given memory area
*/
MGE_WIN_DECLSPEC_FUC void only_reset_raw_storage(
CompNode node, size_t size, RawStorage data, size_t offset);
/*!
* \brief make a TensorStorage that shares memory with another
* TensorStorage some different storage type
*
* This method can be used to convert between HostTensorStorage and
* DeviceTensorStorage; \p src must be on CPU memory node.
*/
template <
class RTrait, typename = typename std::enable_if<
!std::is_same<Trait, RTrait>::value>::type>
MGE_WIN_DECLSPEC_FUC static TensorStorage make_proxy(
const TensorStorage<RTrait>& src);
/*!
* \brief make a DeviceTensorStorage on default_cpu
* that shares memory with this
*
* this must be a HostTensorStorage. Alignment not checked.
*/
template <
bool x = true,
typename = std::enable_if_t<
x && std::is_same<Trait, HostTensorStorageTrait>::value>>
DeviceTensorStorage proxy_to_default_cpu() const {
ptr();
return {true, CompNode::default_cpu(), m_size, m_capacity, m_offset, m_data};
}
//! shortcut for raw_storage().use_count(), but won't trigger lazy alloc
size_t use_count() const {
if (m_size > m_capacity) {
return 1;
}
return raw_storage().use_count();
}
//! whether current capacity is 0 (so we are waiting for lazy init)
bool has_no_real_storage() const { return !m_capacity; }
//! get underlying raw reference-counted storage
const RawStorage& raw_storage() const {
ptr(); // apply lazy resize
return m_data;
}
std::shared_ptr<void*> get_ref_ptr() const {
ptr();
return m_ref_ptr;
}
private:
template <class T>
friend class TensorStorage;
bool m_allow_realloc = true;
CompNode m_comp_node;
//! current logical size; may exceed m_capacity and in such case memory
//! would be allocate when ptr() is called
size_t m_size = 0;
//! usable size until end of allocated data block, excluding offset
size_t m_capacity = 0;
//! offset on m_data
size_t m_offset = 0;
RawStorage m_data;
std::shared_ptr<void*> m_ref_ptr = std::make_shared<void*>((void*)nullptr);
//! used internally for returning a predefined TensorStorage
TensorStorage(
bool allow_realloc, CompNode comp_node, size_t size, size_t capacity,
size_t offset, const RawStorage& data,
std::shared_ptr<void*> ref_ptr = std::make_shared<void*>((void*)nullptr))
: m_allow_realloc(allow_realloc),
m_comp_node(comp_node),
m_size(size),
m_capacity(capacity),
m_offset(offset),
m_data(data),
m_ref_ptr(ref_ptr) {}
void check_comp_node_valid() const {
if (mgb_unlikely(!m_comp_node.valid()))
on_invalid_comp_node();
}
MGE_WIN_DECLSPEC_FUC dt_byte* apply_lazy_and_get_ptr();
[[noreturn]] MGE_WIN_DECLSPEC_FUC static void on_invalid_comp_node();
};
template <class TensorStorage>
class TensorND;
using HostTensorND = TensorND<HostTensorStorage>;
using DeviceTensorND = TensorND<DeviceTensorStorage>;
/*!
* \brief n-dimensional tensor
*
* Note that TensorND is built on TensorStorage, which has some lazy behavior.
*/
template <class TensorStorage>
class TensorND {
TensorStorage m_storage;
TensorLayout m_layout;
public:
using ChainReturnType = TensorND<TensorStorage>;
MGE_WIN_DECLSPEC_FUC TensorND();
MGE_WIN_DECLSPEC_FUC explicit TensorND(CompNode node);
MGE_WIN_DECLSPEC_FUC explicit TensorND(DType dtype);
MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, DType dtype);
//! allocate contiguous tensor
MGE_WIN_DECLSPEC_FUC TensorND(
CompNode node, const TensorShape& shape, DType dtype = dtype::Float32{});
MGE_WIN_DECLSPEC_FUC TensorND(
CompNode node, const TensorShape& shape, DType dtype, TensorFormat format);
//! allocate contiguous tensor from given comp node and layout; layout
//! is required to be contiguous, and its dtype and format would be used
MGE_WIN_DECLSPEC_FUC TensorND(CompNode node, const TensorLayout& layout);
/* ================= shape and basic functionality ================= */
//! get subtensor according to given slices
MGE_WIN_DECLSPEC_FUC ChainReturnType
operator[](std::initializer_list<Slice> slice) const;
//! get subtensor according to spec
MGE_WIN_DECLSPEC_FUC ChainReturnType sub(const SubTensorSpec& spec) const;
//! whether underlying storage is empty
bool empty() const { return m_storage.empty(); }
//! whether tensor shape is valid (i.e. ndim != 0)
bool shape_valid() const { return m_layout.ndim; }
const TensorShape& shape() const { return m_layout; }
const TensorLayout& layout() const { return m_layout; }
//! shape at given dimension, with boundary check
size_t shape(size_t dim) const {
mgb_assert(dim < m_layout.ndim);
return m_layout.shape[dim];
}
//! get ptr at given index
template <typename T, typename Iter>
T* ptr(Iter idx_begin, Iter idx_end) {
auto ptr = this->template ptr<T>();
size_t nidx = 0;
while (idx_begin != idx_end) {
mgb_assert(nidx < m_layout.ndim);
size_t idx = *idx_begin;
mgb_assert(idx < m_layout.shape[nidx]);
ptr += m_layout.stride[nidx] * idx;
++idx_begin;
++nidx;
}
return ptr;
}
template <typename T>
T* ptr(std::initializer_list<size_t> idx) {
return ptr<T>(idx.begin(), idx.end());
}
template <typename T>
const T* ptr(std::initializer_list<size_t> dim) const {
return const_cast<TensorND&>(*this).ptr<T>(dim);
}
//! get ptr of buffer start; *T* must match dtype
template <typename T>
T* ptr() const {
m_layout.dtype.assert_is_ctype<T>();
return m_storage.ptr()->template as<T>();
}
dt_byte* raw_ptr() const { return m_storage.ptr(); }
/*!
* \brief change the shape without retaining old data, and initialize as
* contiguous stride
*
* dtype and format would not be changed
*/
MGE_WIN_DECLSPEC_FUC ChainReturnType& resize(const TensorShape& shape);
/*!
* \brief totally reset the tensor to given storage and layout
*/
MGE_WIN_DECLSPEC_FUC ChainReturnType& reset(
TensorStorage storage, const TensorLayout& layout);
MGE_WIN_DECLSPEC_FUC ChainReturnType& only_reset_raw_storage(TensorStorage storage);
/* ================= getter and setters ================= */
/*!
* \brief change comp node; see TensorStorage::comp_node()
*/
MGE_WIN_DECLSPEC_FUC ChainReturnType& comp_node(
CompNode comp_node, bool allow_mem_node_change = false);
CompNode comp_node() const { return m_storage.comp_node(); }
const TensorStorage& storage() const { return m_storage; }
/*!
* \brief change the storage and invalidate all data, resulting in an
* empty tensor
*/
MGE_WIN_DECLSPEC_FUC ChainReturnType& storage(const TensorStorage& storage);
//! get data type
DType dtype() const { return m_layout.dtype; }
//! get tensor format
TensorFormat format() const { return m_layout.format; }
/*!
* \brief change underlying dtype
*
* layout would be cleared (reset to ndim=0) if dtype actually changes
*/
MGE_WIN_DECLSPEC_FUC ChainReturnType& dtype(DType dtype);
/*!
* \brief change underlying tensor format
*
* layout would be cleared (reset to ndim=0) if format actually changes
*/
MGE_WIN_DECLSPEC_FUC ChainReturnType& format(TensorFormat format);
/*!
* \brief copy from another tensor and initialize contiguous layout
*
* Note:
* 1. If the computing node is empty, it would be copied from src
* 2. To copy from device to host, if the two tensors reside on
* different computing nodes, the caller is responsible to perform
* sync before copying; a better way is to set empty computing node
* to host tensor.
* 3. For cross-device copy: copy would be synced on comp node of this,
* and the caller is responsible to sync this comp node with src comp
* node.
* 4. If dtype is valid, it would be checked to match the dtype of src.
* 5. Format would be reset to default and layout would be initialized
* to be contiguous.
*/
template <class RStorage>
MGE_WIN_DECLSPEC_FUC ChainReturnType& copy_from(const TensorND<RStorage>& src);
/*!
* \brief copy from another tensor of the same shape, retaining current
* layout
*
* If storage type of src and this are different and src is not
* contiguous, a temporary storage would be allocated to first make src
* contiguous.
*/
template <class RStorage>
MGE_WIN_DECLSPEC_FUC const ChainReturnType& copy_from_fixlayout(
const TensorND<RStorage>& src) const;
//! non-const version of copy_from_fixlayout
template <class RStorage>
ChainReturnType& copy_from_fixlayout(const TensorND<RStorage>& src) {
return const_cast<ChainReturnType&>(
static_cast<const ChainReturnType*>(this)->copy_from_fixlayout(src));
}
//! convert to megdnn::TensorND
megdnn::TensorND as_megdnn() const {
megdnn::RefPtr ref_ptr(m_storage.get_ref_ptr(), m_storage.offset(), false);
return {m_layout, ref_ptr};
}
/* ================= misc ================= */
/*!
* \brief block host thread to synchronize with the CompNode
*/
const ChainReturnType& sync() const {
comp_node().sync();
return static_cast<const ChainReturnType&>(*this);
}
ChainReturnType& sync() {
return const_cast<ChainReturnType&>(
static_cast<const ChainReturnType*>(this)->sync());
}
//! similar to TensorStorage<>::make_proxy
template <
class RStorage, typename = typename std::enable_if<!std::is_same<
TensorStorage, RStorage>::value>::type>
static ChainReturnType make_proxy(const TensorND<RStorage>& src) {
ChainReturnType ret;
ret.reset(TensorStorage::make_proxy(src.storage()), src.layout());
return ret;
}
//! similar to HostTensorStorage::proxy_to_default_cpu
template <
bool x = true,
typename = std::enable_if_t<
x && std::is_same<TensorStorage, HostTensorStorage>::value>>
DeviceTensorND proxy_to_default_cpu() const {
DeviceTensorND ret;
ret.reset(storage().proxy_to_default_cpu(), layout());
return ret;
}
template <
bool x = true,
typename = std::enable_if_t<
x && std::is_same<TensorStorage, HostTensorStorage>::value>>
HostTensorND proxy_to_comp_node(CompNode cn) const {
HostTensorStorage host_storage;
host_storage.reset(cn, m_storage.size(), m_storage.raw_storage());
HostTensorND ret;
ret.reset(host_storage, m_layout);
return ret;
}
};
/*!
* \brief call memset in the data of a device tensor
*/
MGE_WIN_DECLSPEC_FUC void dev_tensor_memset(const DeviceTensorND& tensor, int val);
/*!
* \brief fill zeros in the content of a dev tensor
*/
static inline void fill_zero_dev_tensor(const DeviceTensorND& tensor) {
dev_tensor_memset(tensor, 0);
}
} // namespace mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}