Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion src/relay/backend/graph_executor_codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,12 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
if (num_unknown_devices == 0) {
node->attrs_["device_index"] = device_types;
}
// storage scope
std::vector<std::string> storage_scope;
for (const auto& virtual_device : storage_info->virtual_devices) {
storage_scope.push_back(std::string(virtual_device->memory_scope));
}
node->attrs_["storage_scope"] = std::move(storage_scope);
auto node_id = nodes_.size();
nodes_.push_back(node);
// Tuple return value, flatten as tuple
Expand Down Expand Up @@ -432,7 +438,6 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
return AddNode(node, call);
}
} else if (!call_node->attrs.defined()) { // Call is an extern function
std::cout << "call_node: \n" << PrettyPrint(call) << std::endl;
const auto* func = call_node->op.as<GlobalVarNode>();
ICHECK(func) << "Expected the operator to be a global var, but got "
<< call_node->op->GetTypeKey(); // getting a relay fn here, not sure why.
Expand Down Expand Up @@ -529,12 +534,15 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
size_t num_entry = 0;
ShapeVector shapes;
std::vector<size_t> storage_ids;
std::vector<std::string> storage_scopes;
std::vector<size_t> device_types;
std::vector<std::string> dltypes;
std::vector<size_t> node_row_ptr{0};
for (auto node : nodes_) {
const auto& shape_vec = dmlc::get<ShapeVector>(node->attrs_["shape"]);
const auto& storage_id = dmlc::get<std::vector<int64_t>>(node->attrs_["storage_id"]);
const auto& storage_scope =
dmlc::get<std::vector<std::string>>(node->attrs_["storage_scope"]);
const auto& dtype_vec = dmlc::get<std::vector<std::string>>(node->attrs_["dtype"]);

ICHECK_EQ(node->num_outputs_, shape_vec.size());
Expand All @@ -543,12 +551,25 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
shapes.insert(shapes.end(), shape_vec.begin(), shape_vec.end());
dltypes.insert(dltypes.end(), dtype_vec.begin(), dtype_vec.end());
storage_ids.insert(storage_ids.end(), storage_id.begin(), storage_id.end());
storage_scopes.insert(storage_scopes.end(), storage_scope.begin(), storage_scope.end());
if (node->attrs_.count("device_index")) {
const auto& dev_types = dmlc::get<std::vector<int64_t>>(node->attrs_["device_index"]);
device_types.insert(device_types.end(), dev_types.begin(), dev_types.end());
}
node_row_ptr.push_back(num_entry);
}

// verification if storage_scope contains any non global memory scope
// in other case it's better not to write scopes to the JSON at all
bool global_only_scope = true;
for (const auto& ss : storage_scopes) {
if (!(ss.empty() || ss == "global")) {
global_only_scope = false;
}
}
if (global_only_scope) {
storage_scopes.clear();
}
writer->BeginObject();
writer->WriteObjectKeyValue("nodes", nodes_);
writer->WriteObjectKeyValue("arg_nodes", arg_nodes);
Expand All @@ -562,6 +583,10 @@ class GraphExecutorCodegen : public backend::MemoizedExprTranslator<std::vector<
attrs["device_index"].emplace_back(std::string("list_int"));
attrs["device_index"].emplace_back(device_types);
}
if (storage_scopes.size()) {
attrs["storage_scope"].emplace_back(std::string("list_str"));
attrs["storage_scope"].emplace_back(storage_scopes);
}
attrs["dltype"].emplace_back(std::string("list_str"));
attrs["dltype"].emplace_back(dltypes);
writer->WriteObjectKeyValue("attrs", attrs);
Expand Down
58 changes: 44 additions & 14 deletions src/runtime/graph_executor/graph_executor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
#include <vector>

#include "../file_utils.h"
#include "../texture.h"

namespace tvm {
namespace runtime {
Expand All @@ -51,6 +52,7 @@ inline size_t GetDataAlignment(const DLTensor& arr) {
if (align < kAllocAlignment) return kAllocAlignment;
return align;
}
constexpr auto Is2DStorage = IsTextureStorage;
} // namespace details

/*!
Expand Down Expand Up @@ -361,24 +363,16 @@ void GraphExecutor::SetupStorage() {
// Find the maximum space size.
for (size_t i = 0; i < attrs_.shape.size(); ++i) {
int storage_id = attrs_.storage_id[i];
std::string storage_scope = attrs_.storage_scope.empty() ? "" : attrs_.storage_scope[i];
// Use the fallback device if no device index is available.
int device_type = static_cast<int>(devices_[0].device_type);
if (!attrs_.device_index.empty()) {
device_type = attrs_.device_index[i];
}
size_t size = 1;
for (int64_t sz : attrs_.shape[i]) {
size *= static_cast<size_t>(sz);
}
ICHECK_GE(storage_id, 0) << "Do not support runtime shape op";
DLDataType t = vtype[i];
size_t bits = t.bits * t.lanes;
ICHECK(bits % 8U == 0U || bits == 1U || bits == 4U);
size_t bytes = ((bits + 7U) / 8U) * size;

uint32_t sid = static_cast<uint32_t>(storage_id);
if (sid >= pool_entry.size()) {
pool_entry.resize(sid + 1, {0, -1});
pool_entry.resize(sid + 1, {-1, {0}, {}});
} else {
ICHECK(pool_entry[sid].device_type == -1 || pool_entry[sid].device_type == device_type)
<< "The same pool entry cannot be assigned to multiple devices";
Expand All @@ -395,8 +389,38 @@ void GraphExecutor::SetupStorage() {
pool_entry[sid].linked_param = lookup_rv;
}
pool_entry[sid].param_data_entry = i;
pool_entry[sid].size = std::max(pool_entry[sid].size, bytes);
pool_entry[sid].device_type = device_type;
pool_entry[sid].scope = storage_scope;

DLDataType t = vtype[i];
if (!details::Is2DStorage(storage_scope)) {
size_t size = 1;
for (int64_t sz : attrs_.shape[i]) {
size *= static_cast<size_t>(sz);
}
size_t bits = t.bits * t.lanes;
ICHECK(bits % 8U == 0U || bits == 1U || bits == 4U);
int64_t bytes = ((bits + 7U) / 8U) * size;
pool_entry[sid].shape[0] = std::max(pool_entry[sid].shape[0], bytes);
pool_entry[sid].dtype = DLDataType{kDLFloat, 32, 1};
} else {
if (pool_entry[sid].shape.size() == 1) {
pool_entry[sid].shape.resize(3, 0);
}
size_t axis = runtime::DefaultTextureLayoutSeparator(attrs_.shape[i].size(), storage_scope);
auto shape = ApplyTexture2DFlattening<int64_t>(attrs_.shape[i], attrs_.shape[i].size(), axis);
pool_entry[sid].shape[0] = std::max(pool_entry[sid].shape[0], shape.height);
pool_entry[sid].shape[1] = std::max(pool_entry[sid].shape[1], shape.width);
CHECK(pool_entry[sid].shape[2] == 0 || pool_entry[sid].shape[2] == shape.channel)
<< pool_entry[sid].shape[2] << " != " << shape.channel
<< ", texture channel length must be consistent within a storage pool";
pool_entry[sid].shape[2] = shape.channel;
CHECK(pool_entry[sid].dtype.bits == 0 || TypeEqual(pool_entry[sid].dtype, t))
<< DLDataType2String(pool_entry[sid].dtype) << " != " << DLDataType2String(t)
<< ", pool entry for 2d texure allocations must be of the same type;"
<< " downstream error from memory planner likely";
pool_entry[sid].dtype = t;
}
}

// Allocate the space.
Expand All @@ -410,9 +434,15 @@ void GraphExecutor::SetupStorage() {
if (pit.linked_param.defined()) {
storage_pool_.push_back(pit.linked_param);
} else {
std::vector<int64_t> shape;
shape.push_back(static_cast<int64_t>(pit.size + 3) / 4);
storage_pool_.push_back(NDArray::Empty(shape, DLDataType{kDLFloat, 32, 1}, dev));
std::vector<int64_t> shape = pit.shape;
if (shape.size() == 1) {
shape[0] = (shape[0] + 3) / 4;
}
Optional<String> mem_scope;
if (!pit.scope.empty()) {
mem_scope = String(pit.scope);
}
storage_pool_.push_back(NDArray::Empty(shape, pit.dtype, dev, mem_scope));
}
}

Expand Down
14 changes: 13 additions & 1 deletion src/runtime/graph_executor/graph_executor.h
Original file line number Diff line number Diff line change
Expand Up @@ -204,10 +204,12 @@ class TVM_DLL GraphExecutor : public ModuleNode {
protected:
// Memory pool entry.
struct PoolEntry {
size_t size;
int device_type;
std::vector<int64_t> shape;
DLDataType dtype;
int param_data_entry;
NDArray linked_param;
std::string scope;
// PoolEntry(int s, int dev_type, void* pre_linked_param) :
// size(s), device_type(dev_type), pre_linked_param(std::move(pre_linked_param)) {}
};
Expand Down Expand Up @@ -303,6 +305,7 @@ class TVM_DLL GraphExecutor : public ModuleNode {
std::vector<int> storage_id;
std::vector<int> device_index;
std::vector<std::string> dltype;
std::vector<std::string> storage_scope;
std::vector<std::vector<int64_t>> shape;
// The graph attribute fields.
void Load(dmlc::JSONReader* reader) {
Expand All @@ -328,6 +331,15 @@ class TVM_DLL GraphExecutor : public ModuleNode {
reader->Read(&storage_id);
ICHECK(!reader->NextArrayItem());
bitmask |= 2;
} else if (key == "storage_scope") {
reader->BeginArray();
ICHECK(reader->NextArrayItem());
reader->Read(&type);
ICHECK_EQ(type, "list_str");
ICHECK(reader->NextArrayItem());
reader->Read(&storage_scope);
ICHECK(!reader->NextArrayItem());
bitmask |= 1;
} else if (key == "shape") {
reader->BeginArray();
ICHECK(reader->NextArrayItem());
Expand Down
4 changes: 2 additions & 2 deletions src/target/source/codegen_opencl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ std::string CodeGenOpenCL::Finish() {
"#pragma OPENCL EXTENSION cl_amd_fp16 : enable\n"
"#else\n"
"#error \"Half precision floating point not supported"
"by OpenCL implementation on your device.\" \n"
" by OpenCL implementation on your device.\" \n"
"#endif\n\n";
}

Expand All @@ -109,7 +109,7 @@ std::string CodeGenOpenCL::Finish() {
"#pragma OPENCL EXTENSION cl_amd_fp64 : enable\n"
"#else\n"
"#error \"Double precision floating point not supported"
"by OpenCL implementation on your device.\" \n"
" by OpenCL implementation on your device.\" \n"
"#endif\n\n";
}

Expand Down