From b3f876f8da4720c3135c53186391cf3f9208bd96 Mon Sep 17 00:00:00 2001 From: Ivy Date: Wed, 12 Jan 2022 16:04:10 +0800 Subject: [PATCH 01/15] enable dnnl optimal layout for supported ops --- python/tvm/relay/op/contrib/dnnl.py | 248 ++++++++++++- .../backend/contrib/dnnl/query_layout.cc | 343 ++++++++++++++++++ src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 178 +++++++-- src/tir/ir/data_layout.cc | 1 - tests/python/contrib/test_dnnl.py | 79 ++-- 5 files changed, 784 insertions(+), 65 deletions(-) create mode 100755 src/relay/backend/contrib/dnnl/query_layout.cc diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 6d4fe0d81260..1dce6038b13b 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -35,9 +35,11 @@ import logging import tvm.ir +from tvm import relay from tvm.relay import transform from tvm.relay.build_module import bind_params_by_name +from ... import _ffi_api from ...dataflow_pattern import wildcard, is_op from .register import register_pattern_table @@ -207,7 +209,201 @@ def pattern_table(): return dnnl_patterns -def partition_for_dnnl(mod, params=None): +def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, strides, dilates, G): + """Get the optimal layout of dnnl, given shape of conv2d. + + Parameters + ---------- + input_size, weight_shape, out_shape, paddings, strides, dilates, G : Int, String + Input argument. + + Returns + ------- + layouts : string + The result. + """ + return _ffi_api.get_optimal_layout_for_conv( + input_size, + weight_shape, + out_shape, + paddings, + strides, + dilates, + G, + ) + + +def get_optimal_layout_for_deconv( + input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, G +): + """Get the optimal layout of dnnl, given shape of tranpose conv2d. + + Parameters + ---------- + input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, G + : Int, String + Input argument. + + Returns + ------- + layouts : string + The result. + """ + return _ffi_api.get_optimal_layout_for_deconv( + input_size, + weight_shape, + out_shape, + paddings, + output_paddings, + strides, + dilates, + G, + ) + + +def get_shape(tensor): + """Get tensor's shape.""" + if isinstance(tensor, relay.expr.Var): + return tensor.type_annotation.concrete_shape + elif isinstance(tensor, relay.expr.Constant): + return tensor.data.shape + elif isinstance(tensor, tvm.ir.tensor_type.TensorType): + return tensor.concrete_shape + elif isinstance(tensor, tvm.ir.container.Array): + return tensor[-1].shape + elif isinstance(tensor, relay.expr.Call): + return tensor.checked_type.shape + else: + raise TypeError("Unsupport data type: %s" % type(tensor)) + + +def trans_data(input_data, is_weight=False, conv_type=1): + if conv_type == 1: + data_dic = {"a": "N", "b": "C", "c": "W"} + weight_dic = {"a": "O", "b": "I", "c": "W", "d": "G"} + elif conv_type == 2: + data_dic = {"a": "N", "b": "C", "c": "H", "d": "W"} + weight_dic = {"a": "O", "b": "I", "c": "H", "d": "W"} + if "e" in input_data: + weight_dic = {"a": "G", "b": "O", "c": "I", "d": "H", "e": "W"} + elif conv_type == 3: + data_dic = {"a": "N", "b": "C", "c": "D", "d": "H", "e": "W"} + weight_dic = {"a": "O", "b": "I", "c": "D", "d": "H", "e": "W", "f": "G"} + + dic = weight_dic if is_weight else data_dic + res = "" + + for i in input_data: + if i.isupper(): + i = i.lower() + res += dic[i] + dic[i] = dic[i].lower() + elif i.islower(): + res += dic[i] + elif i.isdigit(): + res += i + else: + raise ValueError("Unsupport layout format: %s" % input_data) + return res + + +def legalize_group_conv(attrs, inputs, types): + """Legalize group conv's calculation. + Alter weight layout from OIHW to GOIHW""" + G = attrs.groups + if G == 1: + return + data, weight = inputs + OC, IC, H, W = get_shape(weight) + new_attrs = dict(attrs) + weight = relay.reshape(weight, (G, OC // G, IC, H, W)) + new_attrs["kernel_layout"] = "GOIHW" + return relay.nn.conv2d(data, weight, **new_attrs) + + +def legalize_group_deconv(attrs, inputs, types): + """Legalize group deconv's calculation. + Alter weight layout from IOHW to GIOHW""" + G = attrs.groups + if G == 1: + return + data, weight = inputs + IC, OC, H, W = get_shape(weight) + new_attrs = dict(attrs) + new_attrs["kernel_layout"] = "GIOHW" + weight = relay.reshape(weight, (G, IC // G, OC, H, W)) + return relay.nn.conv2d_transpose(data, weight, **new_attrs) + + +def alter_conv(attrs, inputs, tinfos, out_type): + """The convolution's layout auto-query func for dnnl.""" + + data, weight = inputs + G = str(attrs.groups) + weight_shape = ",".join([str(x) for x in get_shape(weight)]) + out_shape = ",".join([str(x) for x in get_shape(out_type)]) + paddings = ",".join([str(x) for x in attrs.get_int_tuple("padding")]) + strides = ",".join([str(x) for x in attrs.get_int_tuple("strides")]) + dilates = ",".join([str(x) for x in attrs.get_int_tuple("dilation")]) + new_attrs = dict(attrs) + conv_type = len(get_shape(out_type)) - 2 + + res = get_optimal_layout_for_conv( + len(get_shape(out_type)), weight_shape, out_shape, paddings, strides, dilates, G + ) + src_df, weight_df, dst_df = res.split(",") + new_attrs["data_layout"] = trans_data(src_df, is_weight=False, conv_type=conv_type) + new_attrs["kernel_layout"] = trans_data(weight_df, is_weight=True, conv_type=conv_type) + new_attrs["out_layout"] = trans_data(dst_df, is_weight=False, conv_type=conv_type) + if new_attrs["kernel_layout"] == "HWOIG16g": + new_attrs["kernel_layout"] = "HWIOG16g" + + if conv_type == 1: + return relay.nn.conv1d(data, weight, **new_attrs) + elif conv_type == 2: + return relay.nn.conv2d(data, weight, **new_attrs) + elif conv_type == 3: + return relay.nn.conv3d(data, weight, **new_attrs) + + +def alter_deconv(attrs, inputs, tinfos, out_type): + """The transpose convolution's layout auto-query func for dnnl.""" + + data, weight = inputs + weight_shape = ",".join([str(x) for x in get_shape(weight)]) + out_shape = ",".join([str(x) for x in get_shape(out_type)]) + paddings = ",".join([str(x) for x in attrs.get_int_tuple("padding")]) + output_paddings = ",".join([str(x) for x in attrs.get_int_tuple("output_padding")]) + strides = ",".join([str(x) for x in attrs.get_int_tuple("strides")]) + dilates = ",".join([str(x) for x in attrs.get_int_tuple("dilation")]) + G = str(attrs.groups) + new_attrs = dict(attrs) + conv_type = len(get_shape(out_type)) - 2 + + res = get_optimal_layout_for_deconv( + len(get_shape(out_type)), + weight_shape, + out_shape, + paddings, + output_paddings, + strides, + dilates, + G, + ) + src_df, weight_df, dst_df = res.split(",") + new_attrs["data_layout"] = trans_data(src_df, is_weight=False, conv_type=conv_type) + new_attrs["kernel_layout"] = trans_data(weight_df, is_weight=True, conv_type=conv_type) + new_attrs["out_layout"] = trans_data(dst_df, is_weight=False, conv_type=conv_type) + + if conv_type == 1: + return relay.nn.conv1d_transpose(data, weight, **new_attrs) + elif conv_type == 2: + return relay.nn.conv2d_transpose(data, weight, **new_attrs) + elif conv_type == 3: + return relay.nn.conv3d_transpose(data, weight, **new_attrs) + + +def partition_for_dnnl(mod, params=None, alter_layout=True): """Partition the graph greedily offloading supported operators to DNNL. Parameters @@ -224,16 +420,46 @@ def partition_for_dnnl(mod, params=None): if params: mod["main"] = bind_params_by_name(mod["main"], params) - seq = tvm.transform.Sequential( + from tvm.relay.testing.temp_op_attr import TempOpAttr + + with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_group_conv): + with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_deconv): + seq = tvm.transform.Sequential( + [ + transform.CanonicalizeOps(), + transform.InferType(), + transform.SimplifyInference(), + transform.FoldConstant(), + transform.FoldScaleAxis(), + # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` + transform.SimplifyExpr(), + transform.FoldConstant(), + # alter group conv /deconv layout to `GOIHW` / `GIOHW` + transform.Legalize(), + transform.FoldConstant(), + ] + ) + with tvm.transform.PassContext(opt_level=3): + mod = seq(mod) + if alter_layout: + from tvm.relay.testing.temp_op_attr import TempOpAttr + + with TempOpAttr("nn.conv1d", "FTVMAlterOpLayout", alter_conv): + with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv): + with TempOpAttr("nn.conv3d", "FTVMAlterOpLayout", alter_conv): + with TempOpAttr("nn.conv2d_transpose", "FTVMAlterOpLayout", alter_deconv): + with TempOpAttr("nn.conv3d_transpose", "FTVMAlterOpLayout", alter_deconv): + alter_layout_seq = tvm.transform.Sequential( + [ + transform.AlterOpLayout(), + transform.FoldConstant(), + ] + ) + with tvm.transform.PassContext(opt_level=3): + mod = alter_layout_seq(mod) + + byoc_seq = tvm.transform.Sequential( [ - transform.CanonicalizeOps(), - transform.InferType(), - transform.SimplifyInference(), - transform.FoldConstant(), - transform.FoldScaleAxis(), - # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` - transform.SimplifyExpr(), - transform.FoldConstant(), transform.MergeComposite(pattern_table()), transform.AnnotateTarget("dnnl"), transform.MergeCompilerRegions(), @@ -241,5 +467,5 @@ def partition_for_dnnl(mod, params=None): ] ) with tvm.transform.PassContext(opt_level=3): - mod = seq(mod) + mod = byoc_seq(mod) return mod diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc new file mode 100755 index 000000000000..fac6cf1111da --- /dev/null +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -0,0 +1,343 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/dnnl/query_layout.cc + * \brief layout auto-query func. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../../utils.h" +#include "dnnl.hpp" + +using dim_t = dnnl_dim_t; +using dims_t = dnnl_dims_t; + +namespace tvm { +namespace relay { +namespace contrib { + +template +inline void array_set(T* arr, const U& val, size_t size) { + for (size_t i = 0; i < size; ++i) arr[i] = static_cast(val); +} + +template +inline void array_copy(T* dst, const T* src, size_t size) { + for (size_t i = 0; i < size; ++i) dst[i] = src[i]; +} + +void compute_blocks(dims_t blocks, const dnnl::memory::desc* md) { + using format_kind_t = dnnl_format_kind_t; + const format_kind_t blocked = dnnl_blocked; + if (!(md->data.format_kind == blocked)) { + array_set(blocks, 0, md->data.ndims); + return; + } + + array_set(blocks, 1, md->data.ndims); + + const auto& bd = md->data.format_desc.blocking; + for (int iblk = 0; iblk < bd.inner_nblks; ++iblk) + blocks[bd.inner_idxs[iblk]] *= bd.inner_blks[iblk]; +} + +inline bool has_runtime_strides(const dnnl::memory::desc* md) { + using format_kind_t = dnnl_format_kind_t; + const format_kind_t blocked = dnnl_blocked; + if (!(md->data.format_kind == blocked)) return false; + for (int d = 0; d < md->data.ndims; ++d) + if (md->data.format_desc.blocking.strides[d] == DNNL_RUNTIME_DIM_VAL) return true; + return false; +} + +template +inline void swap(T& t1, T& t2) { + T tmp(t1); + t1 = t2; + t2 = tmp; +} + +template +inline void simultaneous_sort(T* vals, T* vals_2nd_level, U* keys, size_t size, F comparator) { + if (size == 0) return; + + for (size_t i = 0; i < size - 1; ++i) { + bool swapped = false; + + for (size_t j = 0; j < size - i - 1; j++) { + auto res = comparator(vals[j], vals[j + 1]); + if (res == 0) res = comparator(vals_2nd_level[j], vals_2nd_level[j + 1]); + + if (res > 0) { + swap(vals[j], vals[j + 1]); + swap(vals_2nd_level[j], vals_2nd_level[j + 1]); + swap(keys[j], keys[j + 1]); + swapped = true; + } + } + + if (swapped == false) break; + } +} + +std::string md2fmt_tag_str(const dnnl::memory::desc* md) { + const auto& blk = md->data.format_desc.blocking; + + dims_t blocks = {0}; + compute_blocks(blocks, md); + + char dim_chars[DNNL_MAX_NDIMS + 1]; + + dims_t ou_blocks = {0}; + array_copy(ou_blocks, md->data.padded_dims, md->data.ndims); + + bool plain = true; + for (int d = 0; d < md->data.ndims; ++d) { + dim_chars[d] = (blocks[d] == 1 ? 'a' : 'A') + static_cast(d); + if (blocks[d] != 1) plain = false; + ou_blocks[d] /= blocks[d]; + } + + // Can't report meaningful tag for runtime dimensions. + if (has_runtime_strides(md)) return "*"; + + dims_t strides; + array_copy(strides, blk.strides, md->data.ndims); + + simultaneous_sort(strides, ou_blocks, dim_chars, md->data.ndims, + [](dim_t a, dim_t b) { return b - a; }); + + dim_chars[md->data.ndims] = '\0'; + + std::string s(dim_chars); + + if (!plain) { + for (int iblk = 0; iblk < blk.inner_nblks; ++iblk) { + char c = ('a' + static_cast(blk.inner_idxs[iblk])); + s += (std::to_string(blk.inner_blks[iblk]) + c); + } + } + return s; +} + +dnnl::memory::dims str2num(std::string str_shape, int input_size) { + std::string str_reg = "(\\d*)"; + for (int i = 0; i < input_size - 1; i++) { + str_reg.append(",(\\d*)"); + } + std::regex rex(str_reg); + std::smatch m; + dnnl::memory::dims out_dims; + if (std::regex_search(str_shape, m, rex)) { + std::transform(m.begin() + 1, m.end(), std::back_inserter(out_dims), + [](const std::string& str) { return std::stoi(str); }); + } else { + LOG(FATAL) << "Unsupported shape for querying optimal dnnl layout: " << str_shape; + } + return out_dims; +} + +std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape, + std::string out_shape, std::string paddings, + std::string strides, std::string dilates, std::string G) { + dnnl::engine eng(dnnl::engine::kind::cpu, 0); + dnnl::stream s(eng); + using tag = dnnl::memory::format_tag; + using dt = dnnl::memory::data_type; + + dnnl::memory::dim groups = std::stoi(G); + dnnl::memory::dims weight_dims_ = str2num(weight_shape, input_size); + dnnl::memory::dims weight_dims = weight_dims_; + if (groups > 1) { + if (weight_dims_.size() == 5) { + weight_dims = {weight_dims_[0] * weight_dims_[1], weight_dims_[2], weight_dims_[3], + weight_dims_[4]}; + } else { + weight_dims[1] = weight_dims[1] * groups; + } + } + dnnl::memory::dims out_dims = str2num(out_shape, input_size); + dnnl::memory::dims padding_dims = str2num(paddings, 2 * (input_size - 2)); + dnnl::memory::dims padding_dims_l(padding_dims.begin(), + padding_dims.begin() + padding_dims.size() / 2); + dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, + padding_dims.end()); + dnnl::memory::dims strides_dims = str2num(strides, input_size - 2); + dnnl::memory::dims dilates_dims = str2num(dilates, input_size - 2); + + dnnl::memory::dims input_dims = out_dims; + input_dims[1] = weight_dims[1]; + for (int i = 2; i < input_size; i++) { + dnnl::memory::dim K = weight_dims[i]; + dnnl::memory::dim S = strides_dims[i - 2]; + dnnl::memory::dim D = dilates_dims[i - 2] - 1; + dnnl::memory::dim PL = padding_dims_l[i - 2]; + dnnl::memory::dim PR = padding_dims_r[i - 2]; + dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); + dilates_dims[i - 2] = D; + input_dims[i] = out_dims[i] * S - PL - PR + DK - 1; + } + + dnnl::memory::dims conv_src_tz = input_dims; + dnnl::memory::dims conv_weights_tz = weight_dims; + if (groups > 1) { + conv_weights_tz = {groups, out_dims[1] / groups, input_dims[1] / groups}; + conv_weights_tz.insert(conv_weights_tz.end(), weight_dims.begin() + 2, weight_dims.end()); + } + dnnl::memory::dims conv_bias_tz = {out_dims[1]}; + dnnl::memory::dims conv_dst_tz = out_dims; + dnnl::memory::dims conv_strides = strides_dims; + dnnl::memory::dims conv_dilates = dilates_dims; + dnnl::memory::dims conv_padding_l = padding_dims_l; + dnnl::memory::dims conv_padding_r = padding_dims_r; + + auto conv_src_md = dnnl::memory::desc({conv_src_tz}, dt::f32, tag::any); + auto conv_weights_md = dnnl::memory::desc({conv_weights_tz}, dt::f32, tag::any); + auto conv_dst_md = dnnl::memory::desc({conv_dst_tz}, dt::f32, tag::any); + + auto conv_desc = dnnl::convolution_forward::desc( + dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md, + conv_weights_md, conv_dst_md, conv_strides, conv_dilates, conv_padding_l, conv_padding_r); + + auto conv_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, eng); + + auto src_format = conv_prim_desc.src_desc(); + auto weights_format = conv_prim_desc.weights_desc(); + auto dst_format = conv_prim_desc.dst_desc(); + std::string src_df, weight_df, dst_df; + + src_df = md2fmt_tag_str(&src_format); + weight_df = md2fmt_tag_str(&weights_format); + dst_df = md2fmt_tag_str(&dst_format); + std::string res = src_df + "," + weight_df + "," + dst_df; + return res; +} + +std::string get_optimal_layout_for_deconv(int input_size, std::string weight_shape, + std::string out_shape, std::string paddings, + std::string output_paddings, std::string strides, + std::string dilates, std::string G) { + dnnl::engine eng(dnnl::engine::kind::cpu, 0); + dnnl::stream s(eng); + using tag = dnnl::memory::format_tag; + using dt = dnnl::memory::data_type; + + dnnl::memory::dim groups = std::stoi(G); + dnnl::memory::dims weight_dims_ = str2num(weight_shape, input_size); + dnnl::memory::dims weight_dims = weight_dims_; + if (groups > 1) { + if (weight_dims_.size() == 5) { + weight_dims = {weight_dims_[0] * weight_dims_[1], weight_dims_[2], weight_dims_[3], + weight_dims_[4]}; + } else { + weight_dims[1] = weight_dims[1] * groups; + } + } + dnnl::memory::dims out_dims = str2num(out_shape, input_size); + dnnl::memory::dims padding_dims = str2num(paddings, 2 * (input_size - 2)); + dnnl::memory::dims padding_dims_l(padding_dims.begin(), + padding_dims.begin() + padding_dims.size() / 2); + dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, + padding_dims.end()); + dnnl::memory::dims output_padding_dims = str2num(output_paddings, input_size - 2); + dnnl::memory::dims strides_dims = str2num(strides, input_size - 2); + dnnl::memory::dims dilates_dims = str2num(dilates, input_size - 2); + + dnnl::memory::dims input_dims = out_dims; + if (out_dims[1] == weight_dims[0]) { + input_dims[1] = weight_dims[1]; + } else { + input_dims[1] = weight_dims[0]; + std::swap(weight_dims[0], weight_dims[1]); + } + for (int i = 2; i < input_size; i++) { + dnnl::memory::dim K = weight_dims[i]; + dnnl::memory::dim S = strides_dims[i - 2]; + dnnl::memory::dim D = dilates_dims[i - 2] - 1; + dnnl::memory::dim PL = padding_dims_l[i - 2]; + dnnl::memory::dim PR = padding_dims_r[i - 2]; + dnnl::memory::dim OP = output_padding_dims[i - 2]; + dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); + dilates_dims[i - 2] = D; + input_dims[i] = (out_dims[i] - DK + PL + PR - OP) / S + 1; + } + + dnnl::memory::dims deconv_src_tz = input_dims; + dnnl::memory::dims deconv_weights_tz = weight_dims; + if (groups > 1) { + deconv_weights_tz = {groups, out_dims[1] / groups, input_dims[1] / groups}; + deconv_weights_tz.insert(deconv_weights_tz.end(), weight_dims.begin() + 2, weight_dims.end()); + } + dnnl::memory::dims deconv_bias_tz = {out_dims[1]}; + dnnl::memory::dims deconv_dst_tz = out_dims; + dnnl::memory::dims deconv_strides = strides_dims; + dnnl::memory::dims deconv_dilates = dilates_dims; + dnnl::memory::dims deconv_padding_l = padding_dims_l; + dnnl::memory::dims deconv_padding_r = padding_dims_r; + + auto deconv_src_md = dnnl::memory::desc({deconv_src_tz}, dt::f32, tag::any); + auto deconv_weights_md = dnnl::memory::desc({deconv_weights_tz}, dt::f32, tag::any); + auto deconv_dst_md = dnnl::memory::desc({deconv_dst_tz}, dt::f32, tag::any); + + auto deconv_desc = dnnl::deconvolution_forward::desc( + dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct, deconv_src_md, + deconv_weights_md, deconv_dst_md, deconv_strides, deconv_dilates, deconv_padding_l, + deconv_padding_r); + + auto deconv_prim_desc = dnnl::deconvolution_forward::primitive_desc(deconv_desc, eng); + + auto src_format = deconv_prim_desc.src_desc(); + auto weights_format = deconv_prim_desc.weights_desc(); + auto dst_format = deconv_prim_desc.dst_desc(); + std::string src_df, weight_df, dst_df; + + src_df = md2fmt_tag_str(&src_format); + weight_df = md2fmt_tag_str(&weights_format); + dst_df = md2fmt_tag_str(&dst_format); + std::string res = src_df + "," + weight_df + "," + dst_df; + return res; +} + +TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5], + args[6]); + }); + +TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_deconv") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = get_optimal_layout_for_deconv(args[0], args[1], args[2], args[3], args[4], args[5], + args[6], args[7]); + }); + +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 6d5e5543cd40..4fc1fa4d61a6 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -91,12 +91,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { private: // Build up the engine based on the input graph. - std::map layout_dict{ - {"NCW", tag::ncw}, {"OIW", tag::oiw}, {"GOIW", tag::goiw}, {"NCHW", tag::nchw}, - {"OIHW", tag::oihw}, {"GOIHW", tag::goihw}, {"NCDHW", tag::ncdhw}, {"OIDHW", tag::oidhw}, - {"GOIDHW", tag::goidhw}, {"IOHW", tag::iohw}, {"GIOHW", tag::giohw}, {"IODHW", tag::iodhw}, - {"GIODHW", tag::giodhw}, - }; std::map elt_name2algo{ {"abs", dnnl::algorithm::eltwise_abs}, @@ -112,6 +106,47 @@ class DNNLJSONRuntime : public JSONRuntimeBase { {"clip", dnnl::algorithm::eltwise_clip}, }; + std::map layout_dict{ + {"NCW", tag::ncw}, + {"NWC", tag::nwc}, + {"OIW", tag::oiw}, + {"GOIW", tag::goiw}, + {"NCHW", tag::nchw}, + {"NHWC", tag::nhwc}, + {"OIHW", tag::oihw}, + {"GOIHW", tag::goihw}, + {"NCDHW", tag::ncdhw}, + {"NDHWC", tag::ndhwc}, + {"OIDHW", tag::oidhw}, + {"GOIDHW", tag::goidhw}, + {"IOHW", tag::iohw}, + {"GIOHW", tag::giohw}, + {"IODHW", tag::iodhw}, + {"GIODHW", tag::giodhw}, + + // Blocking layout. + {"NCW16c", tag::nCw16c}, + {"OIW16i16o", tag::OIw16i16o}, + {"OWI16o", tag::Owi16o}, + {"NCHW8c", tag::nChw8c}, + {"NCHW16c", tag::nChw16c}, + {"OIHW8i8o", tag::OIhw8i8o}, + {"OIHW16i16o", tag::OIhw16i16o}, + {"IOHW16i16o", tag::IOhw16i16o}, + {"OHWI8o", tag::Ohwi8o}, + {"OHWI16o", tag::Ohwi16o}, + {"OHWI32o", tag::Ohwi32o}, + {"OHWI48o", tag::Ohwi48o}, + {"OHWI64o", tag::Ohwi64o}, + {"HWIOG16g", tag::hwioG16g}, + {"GOIHW8g", tag::Goihw8g}, + {"GOIHW16g", tag::Goihw16g}, + {"NCDHW16c", tag::nCdhw16c}, + {"OIDHW16i16o", tag::OIdhw16i16o}, + {"IODHW16i16o", tag::IOdhw16i16o}, + {"ODHWI16o", tag::Odhwi16o}, + }; + bool ParsingOpName(const std::string op_name, dnnl::primitive_attr attr) { // Define RegExp. std::regex bias_add_pat(".*_bias.*"); @@ -136,6 +171,69 @@ class DNNLJSONRuntime : public JSONRuntimeBase { return std::regex_match(op_name, bias_add_pat) ? true : false; } + dnnl::memory::dims TransDims2Plain(dnnl::memory::dims input_dims, std::string layout) { + std::regex dl_plain_reg("NC(D*)(H*)W"); + std::regex kl_plain_reg("(OI|IO)(D*)(H*)W"); + std::regex dl_nwc_reg("N(D*)(H*)WC"); + std::regex dl_nCwxc_reg("NC(D*)(H*)W(\\d*)c"); + std::regex kl_goiw_reg("GOI(D*)(H*)W"); + std::regex kl_giow_reg("GIO(D*)(H*)W"); + std::regex kl_Goihwxg_reg("GOIHW(\\d*)g"); + std::regex kl_hwioGxg_reg("HWIOG(\\d*)g"); + std::regex kl_OIwxixo_reg("OI(D*)(H*)W(\\d*)i(\\d*)o"); + std::regex kl_IOwxixo_reg("IO(D*)(H*)W(\\d*)i(\\d*)o"); + std::regex kl_Owixo_reg("O(D*)(H*)WI(\\d*)o"); + dnnl::memory::dims out_dims; + + if (std::regex_match(layout, dl_nCwxc_reg)) { + dnnl::memory::dim C = input_dims[1] * input_dims[input_dims.size() - 1]; + out_dims = {input_dims[0], C}; + out_dims.insert(out_dims.end(), input_dims.begin() + 2, input_dims.end() - 1); + } else if (std::regex_match(layout, dl_nwc_reg)) { + dnnl::memory::dim N = input_dims[0], C = input_dims[input_dims.size() - 1]; + out_dims = {N, C}; + out_dims.insert(out_dims.end(), input_dims.begin() + 1, input_dims.end() - 1); + } else if (std::regex_match(layout, kl_goiw_reg)) { + dnnl::memory::dim O = input_dims[0] * input_dims[1], I = input_dims[0] * input_dims[1]; + out_dims = {O, I}; + out_dims.insert(out_dims.end(), input_dims.begin() + 3, input_dims.end()); + } else if (std::regex_match(layout, kl_giow_reg)) { + dnnl::memory::dim O = input_dims[0] * input_dims[2], I = input_dims[0] * input_dims[2]; + out_dims = {O, I}; + out_dims.insert(out_dims.end(), input_dims.begin() + 3, input_dims.end()); + } else if (std::regex_match(layout, kl_Goihwxg_reg)) { + dnnl::memory::dim G = input_dims[0] * input_dims[input_dims.size() - 1]; + dnnl::memory::dim O = G * input_dims[1], I = G * input_dims[2]; + out_dims = {O, I}; + out_dims.insert(out_dims.end(), input_dims.begin() + 3, input_dims.end() - 1); + } else if (std::regex_match(layout, kl_hwioGxg_reg)) { + dnnl::memory::dim G = input_dims[input_dims.size() - 2] * input_dims[input_dims.size() - 1]; + dnnl::memory::dim O = G * input_dims[input_dims.size() - 3], + I = G * input_dims[input_dims.size() - 4]; + out_dims = {O, I}; + out_dims.insert(out_dims.end(), input_dims.begin(), input_dims.end() - 4); + } else if (std::regex_match(layout, kl_OIwxixo_reg)) { + dnnl::memory::dim O = input_dims[0] * input_dims[input_dims.size() - 1], + I = input_dims[1] * input_dims[input_dims.size() - 2]; + out_dims = {O, I}; + out_dims.insert(out_dims.end(), input_dims.begin() + 2, input_dims.end() - 2); + } else if (std::regex_match(layout, kl_IOwxixo_reg)) { + dnnl::memory::dim O = input_dims[1] * input_dims[input_dims.size() - 1], + I = input_dims[0] * input_dims[input_dims.size() - 2]; + out_dims = {O, I}; + out_dims.insert(out_dims.end(), input_dims.begin() + 2, input_dims.end() - 2); + } else if (std::regex_match(layout, kl_Owixo_reg)) { + dnnl::memory::dim O = input_dims[0] * input_dims[input_dims.size() - 1]; + out_dims = {O, input_dims[input_dims.size() - 2]}; + out_dims.insert(out_dims.end(), input_dims.begin() + 1, input_dims.end() - 2); + } else if (std::regex_match(layout, dl_plain_reg) || std::regex_match(layout, kl_plain_reg)) { + out_dims = input_dims; + } else { + LOG(FATAL) << "Unsupported layout for TransDims2Plain: " << layout; + } + return out_dims; + } + dnnl::memory::dims TransformStr2Dims(std::vector strs, std::string str_name) { dnnl::memory::dims out_dims; if (str_name == "dilates") { @@ -253,19 +351,31 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } // Memory shapes. - dnnl::memory::dims src_dims = input_shape; // {N, IC, ID, IH, IW} - dnnl::memory::dims weights_dims = weight_shape; // {OC, IC, KD, KH, KW} - if (groups > 1) { - weights_dims = {groups, channels / groups, input_shape[1] / groups}; - weights_dims.insert(weights_dims.end(), weight_shape.begin() + 2, weight_shape.end()); - kernel_layout.insert(0, "G"); - } + dnnl::memory::dims src_dims = TransDims2Plain(input_shape, data_layout); + dnnl::memory::dims weights_dims_ = TransDims2Plain(weight_shape, kernel_layout); dnnl::memory::dims bias_dims = {channels}; - dnnl::memory::dims dst_dims = out_shape; // {N, OC, OD, OH, OW} dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, "dilates"); dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l, "padding"); dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r, "padding"); + dnnl::memory::dims dst_dims = src_dims; + dst_dims[1] = channels; + weights_dims_[0] = channels; + for (int i = 2; i < src_dims.size(); i++) { + dnnl::memory::dim K = weights_dims_[i]; + dnnl::memory::dim S = strides_dims[i - 2]; + dnnl::memory::dim D = dilates_dims[i - 2]; + dnnl::memory::dim PL = padding_dims_l[i - 2]; + dnnl::memory::dim PR = padding_dims_r[i - 2]; + dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); + dst_dims[i] = (src_dims[i] - DK + PL + PR) / S + 1; + } + + dnnl::memory::dims weights_dims = weights_dims_; + if (groups > 1) { + weights_dims = {groups, channels / groups, src_dims[1] / groups}; + weights_dims.insert(weights_dims.end(), weights_dims_.begin() + 2, weights_dims_.end()); + } // Memory descriptions. auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, layout_dict[data_layout]); @@ -343,6 +453,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { str_padding.begin() + str_padding.size() / 2); std::vector str_padding_r(str_padding.end() - str_padding.size() / 2, str_padding.end()); + std::vector str_out_padding = + node.GetAttr>("output_padding"); dnnl::memory::dim groups = std::stoi(node.GetAttr>("groups")[0]); std::string data_layout = node.GetAttr>("data_layout")[0]; std::string kernel_layout = node.GetAttr>("kernel_layout")[0]; @@ -354,27 +466,35 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } // Memory shapes. - dnnl::memory::dims src_dims = input_shape; // {N, IC, ID, IH, IW} - dnnl::memory::dims weights_dims = weight_shape; // {OC, IC, KD, KH, KW} - - // Check weight shape, transform to `OIHW` - if (weights_dims[0] == src_dims[1] && weights_dims[1] == channels) { - std::swap(weights_dims[0], weights_dims[1]); - } - if (kernel_layout == "OIDHW") { - kernel_layout = "IODHW"; - } - if (groups > 1) { - weights_dims = {groups, channels / groups, input_shape[1] / groups}; - weights_dims.insert(weights_dims.end(), weight_shape.begin() + 2, weight_shape.end()); - kernel_layout.insert(0, "G"); + dnnl::memory::dims src_dims = TransDims2Plain(input_shape, data_layout); + dnnl::memory::dims weights_dims_ = TransDims2Plain(weight_shape, kernel_layout); + if (weights_dims_[0] == src_dims[1] && weights_dims_[1] == channels) { + std::swap(weights_dims_[0], weights_dims_[1]); } dnnl::memory::dims bias_dims = {channels}; - dnnl::memory::dims dst_dims = out_shape; // {N, OC, OD, OH, OW} dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, "dilates"); dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l, "padding"); dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r, "padding"); + dnnl::memory::dims out_padding = TransformStr2Dims(str_out_padding, "padding"); + dnnl::memory::dims dst_dims = src_dims; + dst_dims[1] = channels; + for (int i = 2; i < src_dims.size(); i++) { + dnnl::memory::dim K = weights_dims_[i]; + dnnl::memory::dim S = strides_dims[i - 2]; + dnnl::memory::dim D = dilates_dims[i - 2]; + dnnl::memory::dim PL = padding_dims_l[i - 2]; + dnnl::memory::dim PR = padding_dims_r[i - 2]; + dnnl::memory::dim OP = out_padding[i - 2]; + dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); + dst_dims[i] = S * (src_dims[i] - 1) + DK - PL - PR + OP; + } + + dnnl::memory::dims weights_dims = weights_dims_; + if (groups > 1) { + weights_dims = {groups, channels / groups, src_dims[1] / groups}; + weights_dims.insert(weights_dims.end(), weights_dims_.begin() + 2, weights_dims_.end()); + } // Memory descriptions. auto deconv_src_md = dnnl::memory::desc(src_dims, dt::f32, layout_dict[data_layout]); diff --git a/src/tir/ir/data_layout.cc b/src/tir/ir/data_layout.cc index 070cd7077d18..5e3ba83ce000 100644 --- a/src/tir/ir/data_layout.cc +++ b/src/tir/ir/data_layout.cc @@ -412,7 +412,6 @@ BijectiveLayout::BijectiveLayout(Layout src_layout, Layout dst_layout) { n->src_layout = std::move(src_layout); n->dst_layout = std::move(dst_layout); - // To be consistent with previous behavior, a nullptr layout is created // when argument is invalid. if (GetStoreRule(&n->index_forward_rule, &n->shape_forward_rule, n->src_layout, n->dst_layout)) { diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py index 4d1972d6a3b0..70a68f6092f5 100755 --- a/tests/python/contrib/test_dnnl.py +++ b/tests/python/contrib/test_dnnl.py @@ -16,6 +16,7 @@ # under the License. import pytest import itertools + import tvm import tvm.relay.testing from tvm import relay @@ -61,13 +62,17 @@ def check_dnnl_used(mod): dev = tvm.cpu() result_dict = dict() for mode in ["graph", "vm"]: - for use_dnnl in [False, True]: + for use_dnnl, alter_layout in [(False, False), (True, False), (True, True)]: result_key = mode + ("_dnnl" if use_dnnl else "") if use_dnnl: - mod = dnnl.partition_for_dnnl(mod, params) - check_dnnl_used(mod) + processed_mod = dnnl.partition_for_dnnl(mod, params, alter_layout) + check_dnnl_used(processed_mod) + else: + processed_mod = mod with tvm.transform.PassContext(opt_level=3): - func = relay.create_executor(mode, mod=mod, device=dev, target=target).evaluate() + func = relay.create_executor( + mode, mod=processed_mod, device=dev, target=target + ).evaluate() if run_module: if isinstance(input, dict): result_dict[result_key] = func(**input, **params) @@ -80,13 +85,11 @@ def check_dnnl_used(mod): def run_and_verify_func(config, run_module, target="llvm", dtype="float32"): """Test a Relay func by compiling, running, and comparing TVM and DNNL outputs. - Parameters ---------- config : Tuple[relay.Function, Dict[str, NDArray], List[str]] A tuple containing 1) The function to test, 2) A dictionary of var names to input shapes and 3) A list of which vars should be considered params. - run_module: bool If True, the built module will be run after being compiled. """ @@ -97,12 +100,12 @@ def run_and_verify_func(config, run_module, target="llvm", dtype="float32"): for k, v in input_shapes.items() if k not in is_param } - run_and_verify(f, input_dict, params, target, run_module) + run_and_verify(f, input_dict, params, target=target, run_module=run_module) def get_conv1d( x_shape=((1, 3, 224)), - k_shape=(10, 3, 3), + k_shape=(16, 3, 3), groups=1, padding=(1, 1), strides=(1), @@ -222,7 +225,7 @@ def get_conv2d_transpose( out = relay.nn.conv2d_transpose( x, kernel, - channels=k_shape[1], + channels=k_shape[1] * groups, kernel_size=k_shape[2:4], groups=groups, padding=padding, @@ -251,7 +254,7 @@ def get_conv2d_weights_const( dtype="float32", ): x = relay.var("x", shape=(x_shape), dtype=dtype) - kernel = relay.const(np.ones(k_shape).astype(dtype)) + kernel = relay.const(np.random.randint(0, 1, k_shape).astype(dtype)) out = relay.nn.conv2d( x, kernel, @@ -270,7 +273,7 @@ def get_conv2d_weights_const( def get_conv2d_bias( x_shape=(1, 32, 8, 8), k_shape=(16, 32, 3, 3), activation=None, dtype="float32" ): - conv, dic, param_lst = get_conv2d(x_shape=x_shape, k_shape=k_shape, dtype=dtype) + conv, dic, param_lst = get_conv2d_weights_const(x_shape=x_shape, k_shape=k_shape, dtype=dtype) bias = relay.var("bias", shape=(k_shape[0],), dtype=dtype) out = relay.nn.bias_add(conv, bias) dic["bias"] = (k_shape[0],) @@ -336,7 +339,7 @@ def get_conv3d( dtype="float32", ): x = relay.var("x", shape=(x_shape), dtype=dtype) - kernel = relay.var("kernel", shape=(k_shape), dtype=dtype) + kernel = relay.const(np.random.randint(0, 1, k_shape).astype(dtype)) out = relay.nn.conv3d( x, kernel, @@ -373,7 +376,7 @@ def get_conv3d_transpose( kernel_layout="OIDHW", ): x = relay.var("x", shape=(x_shape), dtype=dtype) - kernel = relay.var("kernel", shape=(k_shape), dtype=dtype) + kernel = relay.const(np.random.randint(0, 1, k_shape).astype(dtype)) out = relay.nn.conv3d_transpose( x, kernel, @@ -542,15 +545,22 @@ def get_graph(x_shape, axis): def test_conv1d(run_module, dtype="float32"): - conv1d, dic, param_lst = get_conv1d(channels=10, dtype=dtype) + conv1d, dic, param_lst = get_conv1d(channels=16, dtype=dtype) conv1d = tvm.IRModule.from_expr(conv1d) config = conv1d, dic, param_lst run_and_verify_func(config, run_module=run_module, dtype=dtype) + x_shape = (1, 32, 224) + k_shape = (16, 32, 3) + conv1d_bias, dic, param_lst = get_conv1d(x_shape, k_shape, dtype=dtype) + conv1d_bias = tvm.IRModule.from_expr(conv1d_bias) + config = conv1d_bias, dic, param_lst + run_and_verify_func(config, run_module=run_module, dtype=dtype) + def test_conv1d_pattern(run_module, dtype="float32"): x_shape = (1, 3, 224) - k_shape = (10, 3, 3) + k_shape = (16, 3, 3) activation_lst = [None, "relu", "tanh", "sigmoid"] for a in activation_lst: conv1d, dic, param_lst = get_conv1d(x_shape, k_shape, activation=a, dtype=dtype) @@ -566,7 +576,7 @@ def test_conv1d_pattern(run_module, dtype="float32"): def test_conv2d(run_module, dtype="float32"): x_shape = (1, 32, 8, 8) - for k_shape, groups in [((16, 32, 3, 3), 1), ((32, 1, 3, 3), 32)]: + for k_shape, groups in [((16, 32, 3, 3), 1), ((32, 1, 3, 3), 32), ((32, 2, 3, 3), 16)]: for padding in [(0, 0), (1, 1)]: for strides in [(1, 1), (2, 2)]: for dilation in [(1, 1), (2, 2)]: @@ -592,6 +602,13 @@ def test_conv2d_weights_const(run_module, dtype="float32"): config = conv2d, dic, param_lst run_and_verify_func(config, run_module=run_module, dtype=dtype) + x_shape = (1, 3, 8, 8) + k_shape = (16, 3, 3, 3) + conv2d, dic, param_lst = get_conv2d_weights_const(x_shape, k_shape, dtype=dtype) + conv2d = tvm.IRModule.from_expr(conv2d) + config = conv2d, dic, param_lst + run_and_verify_func(config, run_module=run_module, dtype=dtype) + def test_conv2d_pattern(run_module, dtype="float32"): x_shape = (1, 32, 8, 8) @@ -615,14 +632,21 @@ def test_conv2d_pattern(run_module, dtype="float32"): def test_conv2d_transpose(run_module, dtype="float32"): - for padding in [(0, 0), (1, 1)]: - for strides in [(1, 1), (2, 2)]: - conv2d_transpose, dic, param_lst = get_conv2d_transpose( - padding=padding, strides=strides, dtype=dtype - ) - conv2d_transpose = tvm.IRModule.from_expr(conv2d_transpose) - config = conv2d_transpose, dic, param_lst - run_and_verify_func(config, run_module=run_module, dtype=dtype) + x_shape = (1, 32, 8, 8) + for k_shape, groups in [((32, 16, 3, 3), 1), ((32, 1, 3, 3), 32), ((32, 4, 3, 3), 16)]: + for padding in [(0, 0), (1, 1)]: + for strides in [(1, 1), (2, 2)]: + conv2d_transpose, dic, param_lst = get_conv2d_transpose( + x_shape=x_shape, + k_shape=k_shape, + groups=groups, + padding=padding, + strides=strides, + dtype=dtype, + ) + conv2d_transpose = tvm.IRModule.from_expr(conv2d_transpose) + config = conv2d_transpose, dic, param_lst + run_and_verify_func(config, run_module=run_module, dtype=dtype) def test_conv2d_transpose_pattern(run_module, dtype="float32"): @@ -650,6 +674,13 @@ def test_conv3d(run_module, dtype="float32"): config = conv3d, dic, param_lst run_and_verify_func(config, run_module=run_module, dtype=dtype) + conv3d, dic, param_lst = get_conv3d( + x_shape=(1, 3, 8, 8, 8), k_shape=(16, 3, 3, 3, 3), dtype=dtype + ) + conv3d = tvm.IRModule.from_expr(conv3d) + config = conv3d, dic, param_lst + run_and_verify_func(config, run_module=run_module, dtype=dtype) + def test_conv3d_pattern(run_module, dtype="float32"): activation_lst = [None, "relu", "tanh", "sigmoid"] From b278c6c7d2bde9a1e4596faa61700ca29f317335 Mon Sep 17 00:00:00 2001 From: Ivy Date: Wed, 26 Jan 2022 16:20:16 +0800 Subject: [PATCH 02/15] verfied cv models with onednnv1.7 --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 116 +++++++++--------- tests/python/contrib/test_dnnl.py | 26 +++- 2 files changed, 78 insertions(+), 64 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 4fc1fa4d61a6..adbcf411b405 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -107,6 +107,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { }; std::map layout_dict{ + {"", tag::any}, {"NCW", tag::ncw}, {"NWC", tag::nwc}, {"OIW", tag::oiw}, @@ -128,11 +129,15 @@ class DNNLJSONRuntime : public JSONRuntimeBase { {"NCW16c", tag::nCw16c}, {"OIW16i16o", tag::OIw16i16o}, {"OWI16o", tag::Owi16o}, + {"NCHW4c", tag::nChw4c}, {"NCHW8c", tag::nChw8c}, {"NCHW16c", tag::nChw16c}, {"OIHW8i8o", tag::OIhw8i8o}, {"OIHW16i16o", tag::OIhw16i16o}, {"IOHW16i16o", tag::IOhw16i16o}, + {"GOIHW4i4o", tag::gOIhw4i4o}, + {"GOIHW8i8o", tag::gOIhw8i8o}, + {"GOIHW16i16o", tag::gOIhw16i16o}, {"OHWI8o", tag::Ohwi8o}, {"OHWI16o", tag::Ohwi16o}, {"OHWI32o", tag::Ohwi32o}, @@ -172,64 +177,41 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } dnnl::memory::dims TransDims2Plain(dnnl::memory::dims input_dims, std::string layout) { - std::regex dl_plain_reg("NC(D*)(H*)W"); - std::regex kl_plain_reg("(OI|IO)(D*)(H*)W"); - std::regex dl_nwc_reg("N(D*)(H*)WC"); - std::regex dl_nCwxc_reg("NC(D*)(H*)W(\\d*)c"); - std::regex kl_goiw_reg("GOI(D*)(H*)W"); - std::regex kl_giow_reg("GIO(D*)(H*)W"); - std::regex kl_Goihwxg_reg("GOIHW(\\d*)g"); - std::regex kl_hwioGxg_reg("HWIOG(\\d*)g"); - std::regex kl_OIwxixo_reg("OI(D*)(H*)W(\\d*)i(\\d*)o"); - std::regex kl_IOwxixo_reg("IO(D*)(H*)W(\\d*)i(\\d*)o"); - std::regex kl_Owixo_reg("O(D*)(H*)WI(\\d*)o"); + std::vector axis = { + 'N', 'C', 'O', 'I', 'D', 'H', 'W', + }; dnnl::memory::dims out_dims; - - if (std::regex_match(layout, dl_nCwxc_reg)) { - dnnl::memory::dim C = input_dims[1] * input_dims[input_dims.size() - 1]; - out_dims = {input_dims[0], C}; - out_dims.insert(out_dims.end(), input_dims.begin() + 2, input_dims.end() - 1); - } else if (std::regex_match(layout, dl_nwc_reg)) { - dnnl::memory::dim N = input_dims[0], C = input_dims[input_dims.size() - 1]; - out_dims = {N, C}; - out_dims.insert(out_dims.end(), input_dims.begin() + 1, input_dims.end() - 1); - } else if (std::regex_match(layout, kl_goiw_reg)) { - dnnl::memory::dim O = input_dims[0] * input_dims[1], I = input_dims[0] * input_dims[1]; - out_dims = {O, I}; - out_dims.insert(out_dims.end(), input_dims.begin() + 3, input_dims.end()); - } else if (std::regex_match(layout, kl_giow_reg)) { - dnnl::memory::dim O = input_dims[0] * input_dims[2], I = input_dims[0] * input_dims[2]; - out_dims = {O, I}; - out_dims.insert(out_dims.end(), input_dims.begin() + 3, input_dims.end()); - } else if (std::regex_match(layout, kl_Goihwxg_reg)) { - dnnl::memory::dim G = input_dims[0] * input_dims[input_dims.size() - 1]; - dnnl::memory::dim O = G * input_dims[1], I = G * input_dims[2]; - out_dims = {O, I}; - out_dims.insert(out_dims.end(), input_dims.begin() + 3, input_dims.end() - 1); - } else if (std::regex_match(layout, kl_hwioGxg_reg)) { - dnnl::memory::dim G = input_dims[input_dims.size() - 2] * input_dims[input_dims.size() - 1]; - dnnl::memory::dim O = G * input_dims[input_dims.size() - 3], - I = G * input_dims[input_dims.size() - 4]; - out_dims = {O, I}; - out_dims.insert(out_dims.end(), input_dims.begin(), input_dims.end() - 4); - } else if (std::regex_match(layout, kl_OIwxixo_reg)) { - dnnl::memory::dim O = input_dims[0] * input_dims[input_dims.size() - 1], - I = input_dims[1] * input_dims[input_dims.size() - 2]; - out_dims = {O, I}; - out_dims.insert(out_dims.end(), input_dims.begin() + 2, input_dims.end() - 2); - } else if (std::regex_match(layout, kl_IOwxixo_reg)) { - dnnl::memory::dim O = input_dims[1] * input_dims[input_dims.size() - 1], - I = input_dims[0] * input_dims[input_dims.size() - 2]; - out_dims = {O, I}; - out_dims.insert(out_dims.end(), input_dims.begin() + 2, input_dims.end() - 2); - } else if (std::regex_match(layout, kl_Owixo_reg)) { - dnnl::memory::dim O = input_dims[0] * input_dims[input_dims.size() - 1]; - out_dims = {O, input_dims[input_dims.size() - 2]}; - out_dims.insert(out_dims.end(), input_dims.begin() + 1, input_dims.end() - 2); - } else if (std::regex_match(layout, dl_plain_reg) || std::regex_match(layout, kl_plain_reg)) { - out_dims = input_dims; - } else { - LOG(FATAL) << "Unsupported layout for TransDims2Plain: " << layout; + std::string::iterator t = layout.begin(); + // Remove numbers in layout string to match the size of input_dims + while (t != layout.end()) { + if (*t >= '0' && *t <= '9') { + layout.erase(t); + } else { + t++; + } + } + // Push the correct shapes of each axis into the output_dims + for (auto a : axis) { + dnnl::memory::dim shape = 1; + if (layout.find(a) != std::string::npos) { + shape *= input_dims[layout.find(a)]; + char lower_a = std::tolower(a); + if (layout.find(lower_a) != std::string::npos) { + shape *= input_dims[layout.find(lower_a)]; + } + out_dims.push_back(shape); + } + } + // Multiply O and I with G, respectively + if (layout.find("G") != std::string::npos) { + dnnl::memory::dim G = 1; + if (layout.find("g") != std::string::npos) { + G = input_dims[layout.find("g")] * input_dims[layout.find("G")]; + } else { + G = input_dims[layout.find("G")]; + } + out_dims[0] *= G; + out_dims[1] *= G; } return out_dims; } @@ -343,6 +325,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::memory::dim groups = std::stoi(node.GetAttr>("groups")[0]); std::string data_layout = node.GetAttr>("data_layout")[0]; std::string kernel_layout = node.GetAttr>("kernel_layout")[0]; + std::string out_layout = ""; + if (node.HasAttr("out_layout")) { + if (node.GetAttr>("out_layout")[0] != "") { + out_layout = node.GetAttr>("out_layout")[0]; + } else { + out_layout = data_layout; + } + } // Check layout. if (layout_dict.find(data_layout) == layout_dict.end() || @@ -381,7 +371,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, layout_dict[data_layout]); auto conv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, layout_dict[kernel_layout]); auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::any); - auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, layout_dict[data_layout]); + auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, layout_dict[out_layout]); // Covn2d description. auto conv_desc = @@ -468,8 +458,12 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Memory shapes. dnnl::memory::dims src_dims = TransDims2Plain(input_shape, data_layout); dnnl::memory::dims weights_dims_ = TransDims2Plain(weight_shape, kernel_layout); + // legalize shape IOHW with layout OIHW if (weights_dims_[0] == src_dims[1] && weights_dims_[1] == channels) { std::swap(weights_dims_[0], weights_dims_[1]); + if (kernel_layout.find("OI") == 0) { + kernel_layout.replace(kernel_layout.find("OI"), 2, "IO"); + } } dnnl::memory::dims bias_dims = {channels}; dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); @@ -500,7 +494,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto deconv_src_md = dnnl::memory::desc(src_dims, dt::f32, layout_dict[data_layout]); auto deconv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, layout_dict[kernel_layout]); auto deconv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::any); - auto deconv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, layout_dict[data_layout]); + auto deconv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, tag::any); // Transposed covn2d description. auto deconv_desc = @@ -682,8 +676,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { : dnnl::algorithm::pooling_avg_exclude_padding; } - dnnl::memory::dims src_dims = input_shape; - dnnl::memory::dims dst_dims = out_shape; + dnnl::memory::dims src_dims = TransDims2Plain(input_shape, layout); + dnnl::memory::dims dst_dims = TransDims2Plain(out_shape, layout); dnnl::memory::dims kernel_dims = TransformStr2Dims(str_kernel, "kernel"); dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, "dilates"); diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py index 70a68f6092f5..69890467ec17 100755 --- a/tests/python/contrib/test_dnnl.py +++ b/tests/python/contrib/test_dnnl.py @@ -14,15 +14,16 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from gluoncv.model_zoo import get_model import pytest import itertools +import numpy as np import tvm -import tvm.relay.testing from tvm import relay from tvm.relay.op.contrib import dnnl import tvm.testing -import numpy as np + has_dnnl_codegen = pytest.mark.skipif( not tvm.get_global_func("relay.ext.dnnl", True), reason="DNNL codegen not available" @@ -63,7 +64,8 @@ def check_dnnl_used(mod): result_dict = dict() for mode in ["graph", "vm"]: for use_dnnl, alter_layout in [(False, False), (True, False), (True, True)]: - result_key = mode + ("_dnnl" if use_dnnl else "") + result_key = mode + ("_dnnl" if use_dnnl else "") + ("_layout" if alter_layout else "") + print(result_key) if use_dnnl: processed_mod = dnnl.partition_for_dnnl(mod, params, alter_layout) check_dnnl_used(processed_mod) @@ -864,6 +866,24 @@ def get_graph( run_and_verify_func(get_graph(relay.nn.max_pool3d, strides=(1, 1, 1)), run_module=run_module) +def run_and_verify_model( + model, run_module, input_shape=(1, 3, 224, 224), target="llvm", dtype="float32" +): + i_data = np.random.uniform(-1, 1, input_shape).astype(dtype) + block = get_model(model, pretrained=True) + mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype) + run_and_verify(mod, i_data, params, target=target, run_module=run_module) + + +@pytest.mark.skip(reason="takes a long time for this test ") +def test_model(run_module, dtype="float32"): + run_and_verify_model("ResNet50_v1b", run_module, dtype=dtype) + run_and_verify_model("VGG11_bn", run_module, dtype=dtype) + run_and_verify_model("InceptionV3", run_module, input_shape=(1, 3, 300, 300), dtype=dtype) + run_and_verify_model("MobileNet1.0", run_module, dtype=dtype) + run_and_verify_model("ResNext50_32x4d", run_module, dtype=dtype) + + if __name__ == "__main__": import sys From b2b27ff44658aa73219458904afa51e79ab763ae Mon Sep 17 00:00:00 2001 From: Ivy Date: Tue, 22 Feb 2022 13:32:55 +0800 Subject: [PATCH 03/15] rebase to the latest main branch --- src/relay/op/nn/convolution.cc | 50 ++++++++++++++++++++++++------- tests/python/contrib/test_dnnl.py | 1 - 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc index 447bbd4926b4..d2b6983fef0c 100644 --- a/src/relay/op/nn/convolution.cc +++ b/src/relay/op/nn/convolution.cc @@ -185,13 +185,19 @@ bool Conv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const auto* weight = types[1].as(); if (data == nullptr) return false; static const Layout kNCHW("NCHW"); - static const Layout kOIHW("OIHW"); + Layout kOIHW("OIHW"); const auto* param = attrs.as(); ICHECK(param != nullptr); const Layout in_layout(param->data_layout); const Layout kernel_layout(param->kernel_layout); + bool is_group = false; + if (param->groups > 1 && kernel_layout.name().find("G") != std::string::npos) { + kOIHW = Layout("GOIHW"); + is_group = true; + } + const auto trans_in_layout = tir::BijectiveLayout(in_layout, kNCHW); if (!trans_in_layout.defined()) { reporter->GetDiagCtx().Emit( @@ -204,9 +210,10 @@ bool Conv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const auto trans_kernel_layout = tir::BijectiveLayout(kernel_layout, kOIHW); if (!trans_kernel_layout.defined()) { reporter->GetDiagCtx().Emit( - Diagnostic::Error(reporter->GetSpan()) - << "conv2d only support kernel layouts that are convertible from OIHW." - << " The provided layout is: " << kernel_layout); + Diagnostic::Error(reporter->GetSpan()) + << "conv2d only support kernel layouts that are convertible from " + << kOIHW << "." + << " The provided layout is: " << kernel_layout); return false; } @@ -244,7 +251,12 @@ bool Conv2DRel(const Array& types, int num_inputs, const Attrs& attrs, ICHECK_EQ(param->dilation.size(), 2); Array wshape; - if (is_depthwise) { + if (is_group) { + // infer weight's shape for group convolution + wshape = {{param->groups, indexdiv(param->channels, param->groups), + indexdiv(dshape_nchw[1], param->groups), param->kernel_size[0], + param->kernel_size[1]}}; + } else if (is_depthwise) { // infer weight's shape for depthwise convolution wshape = {{dshape_nchw[1], indexdiv(param->channels, dshape_nchw[1]), param->kernel_size[0], param->kernel_size[1]}}; @@ -734,13 +746,19 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a if (data == nullptr) return false; static const Layout kNCHW("NCHW"); - static const Layout kIOHW("IOHW"); + Layout kIOHW("IOHW"); const Conv2DTransposeAttrs* param = attrs.as(); ICHECK(param != nullptr); const Layout in_layout(param->data_layout); const Layout kernel_layout(param->kernel_layout); + bool is_group = false; + if (param->groups > 1 && kernel_layout.name().find("G") != std::string::npos) { + kIOHW = Layout("GIOHW"); + is_group = true; + } + const auto trans_in_layout = tir::BijectiveLayout(in_layout, kNCHW); ICHECK(trans_in_layout.defined()) << "Conv2DTransposed only support input layouts that are convertible from NCHW." @@ -748,9 +766,10 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a const auto trans_kernel_layout = tir::BijectiveLayout(kernel_layout, kIOHW); ICHECK(trans_kernel_layout.defined()) - << "Conv2DTransposed only support kernel layouts that are convertible from IOHW." - << " But got " << kernel_layout; - + << "Conv2DTransposed only support kernel layouts that are convertible from " + << kIOHW << "." + << " But got " << kernel_layout << " " << kIOHW; + Layout out_layout(param->out_layout == "" ? param->data_layout : param->out_layout); const auto trans_out_layout = tir::BijectiveLayout(out_layout, kNCHW); ICHECK(trans_out_layout.defined()) @@ -766,8 +785,17 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a ICHECK_EQ(param->kernel_size.size(), 2); ICHECK_EQ(param->dilation.size(), 2); - Array wshape({dshape_nchw[1], indexdiv(param->channels, param->groups), - param->kernel_size[0], param->kernel_size[1]}); + Array wshape; + if (is_group) { + // infer weight's shape for group convolution + wshape = {{param->groups, indexdiv(dshape_nchw[1], param->groups), + indexdiv(param->channels, param->groups), param->kernel_size[0], + param->kernel_size[1]}}; + } else { + // infer weight's shape for depthwise convolution + wshape = {{dshape_nchw[1], indexdiv(param->channels, param->groups), param->kernel_size[0], + param->kernel_size[1]}}; + } wshape = trans_kernel_layout.BackwardShape(wshape); dilated_ksize_y = 1 + (param->kernel_size[0] - 1) * param->dilation[0]; diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py index 69890467ec17..b7e8f1159e80 100755 --- a/tests/python/contrib/test_dnnl.py +++ b/tests/python/contrib/test_dnnl.py @@ -881,7 +881,6 @@ def test_model(run_module, dtype="float32"): run_and_verify_model("VGG11_bn", run_module, dtype=dtype) run_and_verify_model("InceptionV3", run_module, input_shape=(1, 3, 300, 300), dtype=dtype) run_and_verify_model("MobileNet1.0", run_module, dtype=dtype) - run_and_verify_model("ResNext50_32x4d", run_module, dtype=dtype) if __name__ == "__main__": From ce272d59fee08ae531e85d6829eb165befb44fb9 Mon Sep 17 00:00:00 2001 From: Ivy Date: Tue, 22 Feb 2022 23:30:55 +0800 Subject: [PATCH 04/15] fix format related comments --- python/tvm/relay/op/contrib/dnnl.py | 40 ++++--- src/relay/backend/contrib/dnnl/codegen.cc | 42 ++++--- .../backend/contrib/dnnl/query_layout.cc | 105 +++++++++--------- src/relay/op/nn/convolution.cc | 14 +-- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 9 +- tests/python/contrib/test_dnnl.py | 1 - 6 files changed, 117 insertions(+), 94 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 1dce6038b13b..a852844a0a0e 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -96,12 +96,12 @@ def _func_wrapper(expr): def make_conv_pattern(conv_name, with_bias=True, with_eltwise=None): - """Create patterns related to conv and deconv. + """Create patterns related to conv and conv_transpose. Parameters ---------- with_bias : bool - Whether attach `bias_add` to `conv / deconv`. + Whether attach `bias_add` to `conv / conv_transpose`. with_eltwise : str The attached elementwise post-op name. Returns @@ -149,12 +149,12 @@ def make_dense_pattern(with_bias=True, with_eltwise=None): return dense_out -def make_dnnl_pattern(op, with_bias, with_eltwise): +def make_dnnl_pattern(op_name, with_bias, with_eltwise): """Create dnnl patterns. Parameters ---------- - op : str + op_name : str The first call node's op name. with_bias : bool Whether attach `bias_add` to `nn.dense`. @@ -165,18 +165,20 @@ def make_dnnl_pattern(op, with_bias, with_eltwise): pattern : Tuple(pattern_name, CallPattern) Created pattern name, along with its CallPattern. """ - pat_name = op.replace("nn", "dnnl") + pat_name = op_name.replace("nn", "dnnl") + if "_transpose" in op_name: + pat_name = "dnnl.deconv" + op_name.split("_")[0][-2::] pat_name += "_bias" if with_bias else "" pat_name += ("_" + with_eltwise.split(".")[-1]) if with_eltwise else "" - if "conv" in op: - dnnl_pattern = (pat_name, make_conv_pattern(op, with_bias, with_eltwise)) - elif op == "nn.dense": + if "conv" in op_name: + dnnl_pattern = (pat_name, make_conv_pattern(op_name, with_bias, with_eltwise)) + elif op_name == "nn.dense": dnnl_pattern = (pat_name, make_dense_pattern(with_bias, with_eltwise)) else: logger.warning( "Currently, only conv1d, conv2d, conv2d_transpose, conv3d_transpose and " "dense op are supported, but got %s.", - op, + op_name, ) dnnl_pattern = () return dnnl_pattern @@ -277,7 +279,9 @@ def get_shape(tensor): raise TypeError("Unsupport data type: %s" % type(tensor)) -def trans_data(input_data, is_weight=False, conv_type=1): +def validate_layout_for_tvm(input_data, is_weight=False, conv_type=1): + """Transfer layout, denoted with `a, b, c, d, e`, + into valid layout (NCHW / OIHW) of TVM.""" if conv_type == 1: data_dic = {"a": "N", "b": "C", "c": "W"} weight_dic = {"a": "O", "b": "I", "c": "W", "d": "G"} @@ -352,9 +356,11 @@ def alter_conv(attrs, inputs, tinfos, out_type): len(get_shape(out_type)), weight_shape, out_shape, paddings, strides, dilates, G ) src_df, weight_df, dst_df = res.split(",") - new_attrs["data_layout"] = trans_data(src_df, is_weight=False, conv_type=conv_type) - new_attrs["kernel_layout"] = trans_data(weight_df, is_weight=True, conv_type=conv_type) - new_attrs["out_layout"] = trans_data(dst_df, is_weight=False, conv_type=conv_type) + new_attrs["data_layout"] = validate_layout_for_tvm(src_df, is_weight=False, conv_type=conv_type) + new_attrs["kernel_layout"] = validate_layout_for_tvm( + weight_df, is_weight=True, conv_type=conv_type + ) + new_attrs["out_layout"] = validate_layout_for_tvm(dst_df, is_weight=False, conv_type=conv_type) if new_attrs["kernel_layout"] == "HWOIG16g": new_attrs["kernel_layout"] = "HWIOG16g" @@ -391,9 +397,11 @@ def alter_deconv(attrs, inputs, tinfos, out_type): G, ) src_df, weight_df, dst_df = res.split(",") - new_attrs["data_layout"] = trans_data(src_df, is_weight=False, conv_type=conv_type) - new_attrs["kernel_layout"] = trans_data(weight_df, is_weight=True, conv_type=conv_type) - new_attrs["out_layout"] = trans_data(dst_df, is_weight=False, conv_type=conv_type) + new_attrs["data_layout"] = validate_layout_for_tvm(src_df, is_weight=False, conv_type=conv_type) + new_attrs["kernel_layout"] = validate_layout_for_tvm( + weight_df, is_weight=True, conv_type=conv_type + ) + new_attrs["out_layout"] = validate_layout_for_tvm(dst_df, is_weight=False, conv_type=conv_type) if conv_type == 1: return relay.nn.conv1d_transpose(data, weight, **new_attrs) diff --git a/src/relay/backend/contrib/dnnl/codegen.cc b/src/relay/backend/contrib/dnnl/codegen.cc index 7971b9cf67d2..41480ed33b0a 100644 --- a/src/relay/backend/contrib/dnnl/codegen.cc +++ b/src/relay/backend/contrib/dnnl/codegen.cc @@ -445,14 +445,30 @@ class DNNLJSONSerializer : public backend::contrib::JSONSerializer { {"relu", "nn.relu"}, {"tanh", "tanh"}, {"sigmoid", "sigmoid"}, + {"nn.deconv2d", "nn.conv2d_transpose"}, + {"nn.deconv3d", "nn.conv3d_transpose"}, }; - std::vector ParsingOpList(std::string op, std::string pattern_name) { - std::vector op_list = {"nn." + op}; - for (auto& t : op_map) { - if (pattern_name.find(t.first) != std::string::npos) { - op_list.push_back(t.second); + std::vector ParsingOpList(const std::string& pattern_name, + std::string interval = "_") { + ICHECK_NE(pattern_name, ""); + std::vector op_list; + size_t pos = 0, start = 0; + while ((pos = pattern_name.find(interval, start)) != std::string::npos) { + std::string op_name = pattern_name.substr(start, pos - start); + if (op_name.find("dnnl") != std::string::npos) { + op_name.replace(op_name.find("dnnl"), 4, "nn"); + if (op_name.find("deconv") != std::string::npos) { + op_name = op_map[op_name]; + } + } else { + op_name = op_map[op_name]; } + if (pos > start) op_list.push_back(op_name); + start = pos + interval.size(); + } + if (pattern_name.size() > start) { + op_list.push_back(op_map[pattern_name.substr(start)]); } return op_list; } @@ -471,28 +487,28 @@ class DNNLJSONSerializer : public backend::contrib::JSONSerializer { ICHECK(comp.defined()) << "DNNL JSON runtime only supports composite functions."; name = comp.value(); - if (name.find("dnnl.conv2d_transpose") != std::string::npos) { - std::vector op_list = ParsingOpList("conv2d_transpose", name); + if (name.find("dnnl.deconv2d") != std::string::npos) { + std::vector op_list = ParsingOpList(name); call = GetRootCall(fn->body.as(), op_list.size() - 1, op_list); ICHECK(call->op.as()) << "Not op node"; - } else if (name.find("dnnl.conv3d_transpose") != std::string::npos) { - std::vector op_list = ParsingOpList("conv3d_transpose", name); + } else if (name.find("dnnl.deconv3d") != std::string::npos) { + std::vector op_list = ParsingOpList(name); call = GetRootCall(fn->body.as(), op_list.size() - 1, op_list); ICHECK(call->op.as()) << "Not op node"; } else if (name.find("dnnl.conv1d") != std::string::npos) { - std::vector op_list = ParsingOpList("conv1d", name); + std::vector op_list = ParsingOpList(name); call = GetRootCall(fn->body.as(), op_list.size() - 1, op_list); ICHECK(call->op.as()) << "Not op node"; } else if (name.find("dnnl.conv2d") != std::string::npos) { - std::vector op_list = ParsingOpList("conv2d", name); + std::vector op_list = ParsingOpList(name); call = GetRootCall(fn->body.as(), op_list.size() - 1, op_list); ICHECK(call->op.as()) << "Not op node"; } else if (name.find("dnnl.conv3d") != std::string::npos) { - std::vector op_list = ParsingOpList("conv3d", name); + std::vector op_list = ParsingOpList(name); call = GetRootCall(fn->body.as(), op_list.size() - 1, op_list); ICHECK(call->op.as()) << "Not op node"; } else if (name.find("dnnl.dense") != std::string::npos) { - std::vector op_list = ParsingOpList("dense", name); + std::vector op_list = ParsingOpList(name); call = GetRootCall(fn->body.as(), op_list.size() - 1, op_list); ICHECK(call->op.as()) << "Not op node"; } else { diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc index fac6cf1111da..53d8a22c6eae 100755 --- a/src/relay/backend/contrib/dnnl/query_layout.cc +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -54,30 +54,6 @@ inline void array_copy(T* dst, const T* src, size_t size) { for (size_t i = 0; i < size; ++i) dst[i] = src[i]; } -void compute_blocks(dims_t blocks, const dnnl::memory::desc* md) { - using format_kind_t = dnnl_format_kind_t; - const format_kind_t blocked = dnnl_blocked; - if (!(md->data.format_kind == blocked)) { - array_set(blocks, 0, md->data.ndims); - return; - } - - array_set(blocks, 1, md->data.ndims); - - const auto& bd = md->data.format_desc.blocking; - for (int iblk = 0; iblk < bd.inner_nblks; ++iblk) - blocks[bd.inner_idxs[iblk]] *= bd.inner_blks[iblk]; -} - -inline bool has_runtime_strides(const dnnl::memory::desc* md) { - using format_kind_t = dnnl_format_kind_t; - const format_kind_t blocked = dnnl_blocked; - if (!(md->data.format_kind == blocked)) return false; - for (int d = 0; d < md->data.ndims; ++d) - if (md->data.format_desc.blocking.strides[d] == DNNL_RUNTIME_DIM_VAL) return true; - return false; -} - template inline void swap(T& t1, T& t2) { T tmp(t1); @@ -108,6 +84,28 @@ inline void simultaneous_sort(T* vals, T* vals_2nd_level, U* keys, size_t size, } } +void compute_blocks(dims_t blocks, const dnnl::memory::desc* md) { + using format_kind_t = dnnl_format_kind_t; + const format_kind_t blocked = dnnl_blocked; + if (!(md->data.format_kind == blocked)) { + array_set(blocks, 0, md->data.ndims); + return; + } + array_set(blocks, 1, md->data.ndims); + const auto& bd = md->data.format_desc.blocking; + for (int iblk = 0; iblk < bd.inner_nblks; ++iblk) + blocks[bd.inner_idxs[iblk]] *= bd.inner_blks[iblk]; +} + +inline bool has_runtime_strides(const dnnl::memory::desc* md) { + using format_kind_t = dnnl_format_kind_t; + const format_kind_t blocked = dnnl_blocked; + if (!(md->data.format_kind == blocked)) return false; + for (int d = 0; d < md->data.ndims; ++d) + if (md->data.format_desc.blocking.strides[d] == DNNL_RUNTIME_DIM_VAL) return true; + return false; +} + std::string md2fmt_tag_str(const dnnl::memory::desc* md) { const auto& blk = md->data.format_desc.blocking; @@ -148,7 +146,7 @@ std::string md2fmt_tag_str(const dnnl::memory::desc* md) { return s; } -dnnl::memory::dims str2num(std::string str_shape, int input_size) { +dnnl::memory::dims str2dims(std::string str_shape, int input_size) { std::string str_reg = "(\\d*)"; for (int i = 0; i < input_size - 1; i++) { str_reg.append(",(\\d*)"); @@ -174,7 +172,7 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape using dt = dnnl::memory::data_type; dnnl::memory::dim groups = std::stoi(G); - dnnl::memory::dims weight_dims_ = str2num(weight_shape, input_size); + dnnl::memory::dims weight_dims_ = str2dims(weight_shape, input_size); dnnl::memory::dims weight_dims = weight_dims_; if (groups > 1) { if (weight_dims_.size() == 5) { @@ -184,14 +182,14 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape weight_dims[1] = weight_dims[1] * groups; } } - dnnl::memory::dims out_dims = str2num(out_shape, input_size); - dnnl::memory::dims padding_dims = str2num(paddings, 2 * (input_size - 2)); + dnnl::memory::dims out_dims = str2dims(out_shape, input_size); + dnnl::memory::dims padding_dims = str2dims(paddings, 2 * (input_size - 2)); dnnl::memory::dims padding_dims_l(padding_dims.begin(), padding_dims.begin() + padding_dims.size() / 2); dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, padding_dims.end()); - dnnl::memory::dims strides_dims = str2num(strides, input_size - 2); - dnnl::memory::dims dilates_dims = str2num(dilates, input_size - 2); + dnnl::memory::dims strides_dims = str2dims(strides, input_size - 2); + dnnl::memory::dims dilates_dims = str2dims(dilates, input_size - 2); dnnl::memory::dims input_dims = out_dims; input_dims[1] = weight_dims[1]; @@ -206,22 +204,21 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape input_dims[i] = out_dims[i] * S - PL - PR + DK - 1; } - dnnl::memory::dims conv_src_tz = input_dims; - dnnl::memory::dims conv_weights_tz = weight_dims; + dnnl::memory::dims conv_src_dims = input_dims; + dnnl::memory::dims conv_weights_dims = weight_dims; if (groups > 1) { - conv_weights_tz = {groups, out_dims[1] / groups, input_dims[1] / groups}; - conv_weights_tz.insert(conv_weights_tz.end(), weight_dims.begin() + 2, weight_dims.end()); + conv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; + conv_weights_dims.insert(conv_weights_dims.end(), weight_dims.begin() + 2, weight_dims.end()); } - dnnl::memory::dims conv_bias_tz = {out_dims[1]}; - dnnl::memory::dims conv_dst_tz = out_dims; + dnnl::memory::dims conv_dst_dims = out_dims; dnnl::memory::dims conv_strides = strides_dims; dnnl::memory::dims conv_dilates = dilates_dims; dnnl::memory::dims conv_padding_l = padding_dims_l; dnnl::memory::dims conv_padding_r = padding_dims_r; - auto conv_src_md = dnnl::memory::desc({conv_src_tz}, dt::f32, tag::any); - auto conv_weights_md = dnnl::memory::desc({conv_weights_tz}, dt::f32, tag::any); - auto conv_dst_md = dnnl::memory::desc({conv_dst_tz}, dt::f32, tag::any); + auto conv_src_md = dnnl::memory::desc({conv_src_dims}, dt::f32, tag::any); + auto conv_weights_md = dnnl::memory::desc({conv_weights_dims}, dt::f32, tag::any); + auto conv_dst_md = dnnl::memory::desc({conv_dst_dims}, dt::f32, tag::any); auto conv_desc = dnnl::convolution_forward::desc( dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md, @@ -251,7 +248,7 @@ std::string get_optimal_layout_for_deconv(int input_size, std::string weight_sha using dt = dnnl::memory::data_type; dnnl::memory::dim groups = std::stoi(G); - dnnl::memory::dims weight_dims_ = str2num(weight_shape, input_size); + dnnl::memory::dims weight_dims_ = str2dims(weight_shape, input_size); dnnl::memory::dims weight_dims = weight_dims_; if (groups > 1) { if (weight_dims_.size() == 5) { @@ -261,15 +258,15 @@ std::string get_optimal_layout_for_deconv(int input_size, std::string weight_sha weight_dims[1] = weight_dims[1] * groups; } } - dnnl::memory::dims out_dims = str2num(out_shape, input_size); - dnnl::memory::dims padding_dims = str2num(paddings, 2 * (input_size - 2)); + dnnl::memory::dims out_dims = str2dims(out_shape, input_size); + dnnl::memory::dims padding_dims = str2dims(paddings, 2 * (input_size - 2)); dnnl::memory::dims padding_dims_l(padding_dims.begin(), padding_dims.begin() + padding_dims.size() / 2); dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, padding_dims.end()); - dnnl::memory::dims output_padding_dims = str2num(output_paddings, input_size - 2); - dnnl::memory::dims strides_dims = str2num(strides, input_size - 2); - dnnl::memory::dims dilates_dims = str2num(dilates, input_size - 2); + dnnl::memory::dims output_padding_dims = str2dims(output_paddings, input_size - 2); + dnnl::memory::dims strides_dims = str2dims(strides, input_size - 2); + dnnl::memory::dims dilates_dims = str2dims(dilates, input_size - 2); dnnl::memory::dims input_dims = out_dims; if (out_dims[1] == weight_dims[0]) { @@ -290,22 +287,22 @@ std::string get_optimal_layout_for_deconv(int input_size, std::string weight_sha input_dims[i] = (out_dims[i] - DK + PL + PR - OP) / S + 1; } - dnnl::memory::dims deconv_src_tz = input_dims; - dnnl::memory::dims deconv_weights_tz = weight_dims; + dnnl::memory::dims deconv_src_dims = input_dims; + dnnl::memory::dims deconv_weights_dims = weight_dims; if (groups > 1) { - deconv_weights_tz = {groups, out_dims[1] / groups, input_dims[1] / groups}; - deconv_weights_tz.insert(deconv_weights_tz.end(), weight_dims.begin() + 2, weight_dims.end()); + deconv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; + deconv_weights_dims.insert(deconv_weights_dims.end(), weight_dims.begin() + 2, + weight_dims.end()); } - dnnl::memory::dims deconv_bias_tz = {out_dims[1]}; - dnnl::memory::dims deconv_dst_tz = out_dims; + dnnl::memory::dims deconv_dst_dims = out_dims; dnnl::memory::dims deconv_strides = strides_dims; dnnl::memory::dims deconv_dilates = dilates_dims; dnnl::memory::dims deconv_padding_l = padding_dims_l; dnnl::memory::dims deconv_padding_r = padding_dims_r; - auto deconv_src_md = dnnl::memory::desc({deconv_src_tz}, dt::f32, tag::any); - auto deconv_weights_md = dnnl::memory::desc({deconv_weights_tz}, dt::f32, tag::any); - auto deconv_dst_md = dnnl::memory::desc({deconv_dst_tz}, dt::f32, tag::any); + auto deconv_src_md = dnnl::memory::desc({deconv_src_dims}, dt::f32, tag::any); + auto deconv_weights_md = dnnl::memory::desc({deconv_weights_dims}, dt::f32, tag::any); + auto deconv_dst_md = dnnl::memory::desc({deconv_dst_dims}, dt::f32, tag::any); auto deconv_desc = dnnl::deconvolution_forward::desc( dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct, deconv_src_md, diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc index d2b6983fef0c..1d883cfb5b14 100644 --- a/src/relay/op/nn/convolution.cc +++ b/src/relay/op/nn/convolution.cc @@ -209,11 +209,10 @@ bool Conv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const auto trans_kernel_layout = tir::BijectiveLayout(kernel_layout, kOIHW); if (!trans_kernel_layout.defined()) { - reporter->GetDiagCtx().Emit( - Diagnostic::Error(reporter->GetSpan()) - << "conv2d only support kernel layouts that are convertible from " - << kOIHW << "." - << " The provided layout is: " << kernel_layout); + reporter->GetDiagCtx().Emit(Diagnostic::Error(reporter->GetSpan()) + << "conv2d only support kernel layouts that are convertible from " + << kOIHW << "." + << " The provided layout is: " << kernel_layout); return false; } @@ -766,10 +765,9 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a const auto trans_kernel_layout = tir::BijectiveLayout(kernel_layout, kIOHW); ICHECK(trans_kernel_layout.defined()) - << "Conv2DTransposed only support kernel layouts that are convertible from " - << kIOHW << "." + << "Conv2DTransposed only support kernel layouts that are convertible from " << kIOHW << "." << " But got " << kernel_layout << " " << kIOHW; - + Layout out_layout(param->out_layout == "" ? param->data_layout : param->out_layout); const auto trans_out_layout = tir::BijectiveLayout(out_layout, kNCHW); ICHECK(trans_out_layout.defined()) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index adbcf411b405..9f08870175ca 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -233,7 +233,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { stream_ = dnnl::stream(engine_); std::regex conv_pat(".*conv[1-3]d.*"); - std::regex conv_tranpose_pat(".*conv[1-3]d_transpose.*"); + std::regex deconv_pat(".*deconv[1-3]d.*"); + std::regex conv_transpose_pat(".*conv[1-3]d_transpose.*"); std::regex dense_pat(".*dense.*"); std::regex max_pool_pat(".*max_pool[1-3]d"); std::regex avg_pool_pat(".*avg_pool[1-3]d"); @@ -244,7 +245,8 @@ class DNNLJSONRuntime : public JSONRuntimeBase { if (node.GetOpType() == "kernel") { ICHECK_EQ(node.GetOpType(), "kernel"); auto op_name = node.GetOpName(); - if (std::regex_match(op_name, conv_tranpose_pat)) { + if (std::regex_match(op_name, deconv_pat) || + std::regex_match(op_name, conv_transpose_pat)) { Deconvolution(nid); } else if (std::regex_match(op_name, conv_pat)) { Convolution(nid); @@ -365,6 +367,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase { if (groups > 1) { weights_dims = {groups, channels / groups, src_dims[1] / groups}; weights_dims.insert(weights_dims.end(), weights_dims_.begin() + 2, weights_dims_.end()); + if (kernel_layout == "OIHW") { + kernel_layout.insert(0, "G"); + } } // Memory descriptions. diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py index b7e8f1159e80..3476ba853ccb 100755 --- a/tests/python/contrib/test_dnnl.py +++ b/tests/python/contrib/test_dnnl.py @@ -65,7 +65,6 @@ def check_dnnl_used(mod): for mode in ["graph", "vm"]: for use_dnnl, alter_layout in [(False, False), (True, False), (True, True)]: result_key = mode + ("_dnnl" if use_dnnl else "") + ("_layout" if alter_layout else "") - print(result_key) if use_dnnl: processed_mod = dnnl.partition_for_dnnl(mod, params, alter_layout) check_dnnl_used(processed_mod) From c5e4d1073510bad03f374ae0499cfcccdf924dae Mon Sep 17 00:00:00 2001 From: Ivy Date: Wed, 23 Feb 2022 10:02:31 +0800 Subject: [PATCH 05/15] remove unnecessary layout transformation --- python/tvm/relay/op/contrib/dnnl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index a852844a0a0e..15e88845c232 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -361,8 +361,6 @@ def alter_conv(attrs, inputs, tinfos, out_type): weight_df, is_weight=True, conv_type=conv_type ) new_attrs["out_layout"] = validate_layout_for_tvm(dst_df, is_weight=False, conv_type=conv_type) - if new_attrs["kernel_layout"] == "HWOIG16g": - new_attrs["kernel_layout"] = "HWIOG16g" if conv_type == 1: return relay.nn.conv1d(data, weight, **new_attrs) From 2dd3d9d6cd71223fe81ea7f506f0cf907ac33eef Mon Sep 17 00:00:00 2001 From: Ivy Date: Wed, 23 Feb 2022 10:29:43 +0800 Subject: [PATCH 06/15] change deconv into conv_transpose --- python/tvm/relay/op/contrib/dnnl.py | 28 +++++++++++-------- .../backend/contrib/dnnl/query_layout.cc | 14 +++++----- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 15e88845c232..768da3211c53 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -235,10 +235,10 @@ def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, s ) -def get_optimal_layout_for_deconv( +def get_optimal_layout_for_conv_transpose( input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, G ): - """Get the optimal layout of dnnl, given shape of tranpose conv2d. + """Get the optimal layout of dnnl, given shape of tranposed conv2d. Parameters ---------- @@ -251,7 +251,7 @@ def get_optimal_layout_for_deconv( layouts : string The result. """ - return _ffi_api.get_optimal_layout_for_deconv( + return _ffi_api.get_optimal_layout_for_conv_transpose( input_size, weight_shape, out_shape, @@ -325,8 +325,8 @@ def legalize_group_conv(attrs, inputs, types): return relay.nn.conv2d(data, weight, **new_attrs) -def legalize_group_deconv(attrs, inputs, types): - """Legalize group deconv's calculation. +def legalize_group_conv_transpose(attrs, inputs, types): + """Legalize group conv_transpose's calculation. Alter weight layout from IOHW to GIOHW""" G = attrs.groups if G == 1: @@ -370,8 +370,8 @@ def alter_conv(attrs, inputs, tinfos, out_type): return relay.nn.conv3d(data, weight, **new_attrs) -def alter_deconv(attrs, inputs, tinfos, out_type): - """The transpose convolution's layout auto-query func for dnnl.""" +def alter_conv_transpose(attrs, inputs, tinfos, out_type): + """The transposed convolution's layout auto-query func for dnnl.""" data, weight = inputs weight_shape = ",".join([str(x) for x in get_shape(weight)]) @@ -384,7 +384,7 @@ def alter_deconv(attrs, inputs, tinfos, out_type): new_attrs = dict(attrs) conv_type = len(get_shape(out_type)) - 2 - res = get_optimal_layout_for_deconv( + res = get_optimal_layout_for_conv_transpose( len(get_shape(out_type)), weight_shape, out_shape, @@ -429,7 +429,7 @@ def partition_for_dnnl(mod, params=None, alter_layout=True): from tvm.relay.testing.temp_op_attr import TempOpAttr with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_group_conv): - with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_deconv): + with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_conv_transpose): seq = tvm.transform.Sequential( [ transform.CanonicalizeOps(), @@ -440,7 +440,7 @@ def partition_for_dnnl(mod, params=None, alter_layout=True): # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` transform.SimplifyExpr(), transform.FoldConstant(), - # alter group conv /deconv layout to `GOIHW` / `GIOHW` + # alter group conv /conv_transpose layout to `GOIHW` / `GIOHW` transform.Legalize(), transform.FoldConstant(), ] @@ -453,8 +453,12 @@ def partition_for_dnnl(mod, params=None, alter_layout=True): with TempOpAttr("nn.conv1d", "FTVMAlterOpLayout", alter_conv): with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv): with TempOpAttr("nn.conv3d", "FTVMAlterOpLayout", alter_conv): - with TempOpAttr("nn.conv2d_transpose", "FTVMAlterOpLayout", alter_deconv): - with TempOpAttr("nn.conv3d_transpose", "FTVMAlterOpLayout", alter_deconv): + with TempOpAttr( + "nn.conv2d_transpose", "FTVMAlterOpLayout", alter_conv_transpose + ): + with TempOpAttr( + "nn.conv3d_transpose", "FTVMAlterOpLayout", alter_conv_transpose + ): alter_layout_seq = tvm.transform.Sequential( [ transform.AlterOpLayout(), diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc index 53d8a22c6eae..047c72a4453b 100755 --- a/src/relay/backend/contrib/dnnl/query_layout.cc +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -238,10 +238,10 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape return res; } -std::string get_optimal_layout_for_deconv(int input_size, std::string weight_shape, - std::string out_shape, std::string paddings, - std::string output_paddings, std::string strides, - std::string dilates, std::string G) { +std::string get_optimal_layout_for_conv_transpose(int input_size, std::string weight_shape, + std::string out_shape, std::string paddings, + std::string output_paddings, std::string strides, + std::string dilates, std::string G) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); dnnl::stream s(eng); using tag = dnnl::memory::format_tag; @@ -329,10 +329,10 @@ TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv") args[6]); }); -TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_deconv") +TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv_transpose") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = get_optimal_layout_for_deconv(args[0], args[1], args[2], args[3], args[4], args[5], - args[6], args[7]); + *rv = get_optimal_layout_for_conv_transpose(args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7]); }); } // namespace contrib From f63e6bdcd5d5af7f265f94d40e148097c16c7375 Mon Sep 17 00:00:00 2001 From: Ivy Date: Fri, 25 Feb 2022 09:56:59 +0800 Subject: [PATCH 07/15] rename some variables and functions --- python/tvm/relay/op/contrib/dnnl.py | 92 +++++++++---------- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 66 ++++++------- 2 files changed, 70 insertions(+), 88 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 768da3211c53..926618efac37 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -211,13 +211,13 @@ def pattern_table(): return dnnl_patterns -def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, strides, dilates, G): +def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, strides, dilates, groups): """Get the optimal layout of dnnl, given shape of conv2d. Parameters ---------- - input_size, weight_shape, out_shape, paddings, strides, dilates, G : Int, String - Input argument. + input_size, weight_shape, out_shape, paddings, strides, dilates, groups : Int, String + Input argument. Returns ------- @@ -231,18 +231,18 @@ def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, s paddings, strides, dilates, - G, + groups, ) def get_optimal_layout_for_conv_transpose( - input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, G + input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups ): """Get the optimal layout of dnnl, given shape of tranposed conv2d. Parameters ---------- - input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, G + input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups : Int, String Input argument. @@ -259,7 +259,7 @@ def get_optimal_layout_for_conv_transpose( output_paddings, strides, dilates, - G, + groups, ) @@ -279,18 +279,18 @@ def get_shape(tensor): raise TypeError("Unsupport data type: %s" % type(tensor)) -def validate_layout_for_tvm(input_data, is_weight=False, conv_type=1): +def tag2layout(input_data, is_weight=False, conv_type="Conv1D"): """Transfer layout, denoted with `a, b, c, d, e`, into valid layout (NCHW / OIHW) of TVM.""" - if conv_type == 1: + if conv_type == "Conv1D": data_dic = {"a": "N", "b": "C", "c": "W"} weight_dic = {"a": "O", "b": "I", "c": "W", "d": "G"} - elif conv_type == 2: + elif conv_type == "Conv2D": data_dic = {"a": "N", "b": "C", "c": "H", "d": "W"} weight_dic = {"a": "O", "b": "I", "c": "H", "d": "W"} if "e" in input_data: weight_dic = {"a": "G", "b": "O", "c": "I", "d": "H", "e": "W"} - elif conv_type == 3: + elif conv_type == "Conv3D": data_dic = {"a": "N", "b": "C", "c": "D", "d": "H", "e": "W"} weight_dic = {"a": "O", "b": "I", "c": "D", "d": "H", "e": "W", "f": "G"} @@ -312,61 +312,51 @@ def validate_layout_for_tvm(input_data, is_weight=False, conv_type=1): def legalize_group_conv(attrs, inputs, types): - """Legalize group conv's calculation. - Alter weight layout from OIHW to GOIHW""" - G = attrs.groups - if G == 1: + """Legalize group conv / conv_transpose calculation. + Alter weight layout from OIHW to GOIHW / IOHW to GIOHW""" + groups = attrs.groups + if groups == 1: return data, weight = inputs OC, IC, H, W = get_shape(weight) new_attrs = dict(attrs) - weight = relay.reshape(weight, (G, OC // G, IC, H, W)) - new_attrs["kernel_layout"] = "GOIHW" - return relay.nn.conv2d(data, weight, **new_attrs) - - -def legalize_group_conv_transpose(attrs, inputs, types): - """Legalize group conv_transpose's calculation. - Alter weight layout from IOHW to GIOHW""" - G = attrs.groups - if G == 1: - return - data, weight = inputs - IC, OC, H, W = get_shape(weight) - new_attrs = dict(attrs) - new_attrs["kernel_layout"] = "GIOHW" - weight = relay.reshape(weight, (G, IC // G, OC, H, W)) - return relay.nn.conv2d_transpose(data, weight, **new_attrs) + weight = relay.reshape(weight, (groups, OC // groups, IC, H, W)) + if "Transpose" not in type(attrs).__name__: + new_attrs["kernel_layout"] = "GOIHW" + return relay.nn.conv2d(data, weight, **new_attrs) + else: + new_attrs["kernel_layout"] = "GIOHW" + return relay.nn.conv2d_transpose(data, weight, **new_attrs) def alter_conv(attrs, inputs, tinfos, out_type): """The convolution's layout auto-query func for dnnl.""" data, weight = inputs - G = str(attrs.groups) + groups = str(attrs.groups) weight_shape = ",".join([str(x) for x in get_shape(weight)]) out_shape = ",".join([str(x) for x in get_shape(out_type)]) paddings = ",".join([str(x) for x in attrs.get_int_tuple("padding")]) strides = ",".join([str(x) for x in attrs.get_int_tuple("strides")]) dilates = ",".join([str(x) for x in attrs.get_int_tuple("dilation")]) new_attrs = dict(attrs) - conv_type = len(get_shape(out_type)) - 2 + conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv( - len(get_shape(out_type)), weight_shape, out_shape, paddings, strides, dilates, G + len(get_shape(out_type)), weight_shape, out_shape, paddings, strides, dilates, groups ) src_df, weight_df, dst_df = res.split(",") - new_attrs["data_layout"] = validate_layout_for_tvm(src_df, is_weight=False, conv_type=conv_type) - new_attrs["kernel_layout"] = validate_layout_for_tvm( + new_attrs["data_layout"] = tag2layout(src_df, is_weight=False, conv_type=conv_type) + new_attrs["kernel_layout"] = tag2layout( weight_df, is_weight=True, conv_type=conv_type ) - new_attrs["out_layout"] = validate_layout_for_tvm(dst_df, is_weight=False, conv_type=conv_type) + new_attrs["out_layout"] = tag2layout(dst_df, is_weight=False, conv_type=conv_type) - if conv_type == 1: + if conv_type == "Conv1D": return relay.nn.conv1d(data, weight, **new_attrs) - elif conv_type == 2: + elif conv_type == "Conv2D": return relay.nn.conv2d(data, weight, **new_attrs) - elif conv_type == 3: + elif conv_type == "Conv3D": return relay.nn.conv3d(data, weight, **new_attrs) @@ -380,9 +370,9 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): output_paddings = ",".join([str(x) for x in attrs.get_int_tuple("output_padding")]) strides = ",".join([str(x) for x in attrs.get_int_tuple("strides")]) dilates = ",".join([str(x) for x in attrs.get_int_tuple("dilation")]) - G = str(attrs.groups) + groups = str(attrs.groups) new_attrs = dict(attrs) - conv_type = len(get_shape(out_type)) - 2 + conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv_transpose( len(get_shape(out_type)), @@ -392,20 +382,20 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): output_paddings, strides, dilates, - G, + groups, ) src_df, weight_df, dst_df = res.split(",") - new_attrs["data_layout"] = validate_layout_for_tvm(src_df, is_weight=False, conv_type=conv_type) - new_attrs["kernel_layout"] = validate_layout_for_tvm( + new_attrs["data_layout"] = tag2layout(src_df, is_weight=False, conv_type=conv_type) + new_attrs["kernel_layout"] = tag2layout( weight_df, is_weight=True, conv_type=conv_type ) - new_attrs["out_layout"] = validate_layout_for_tvm(dst_df, is_weight=False, conv_type=conv_type) + new_attrs["out_layout"] = tag2layout(dst_df, is_weight=False, conv_type=conv_type) - if conv_type == 1: + if conv_type == "Conv1DTranspose": return relay.nn.conv1d_transpose(data, weight, **new_attrs) - elif conv_type == 2: + elif conv_type == "Conv2DTranspose": return relay.nn.conv2d_transpose(data, weight, **new_attrs) - elif conv_type == 3: + elif conv_type == "Conv3DTranspose": return relay.nn.conv3d_transpose(data, weight, **new_attrs) @@ -429,7 +419,7 @@ def partition_for_dnnl(mod, params=None, alter_layout=True): from tvm.relay.testing.temp_op_attr import TempOpAttr with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_group_conv): - with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_conv_transpose): + with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_conv): seq = tvm.transform.Sequential( [ transform.CanonicalizeOps(), diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 9f08870175ca..84f42703afb4 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -327,14 +327,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::memory::dim groups = std::stoi(node.GetAttr>("groups")[0]); std::string data_layout = node.GetAttr>("data_layout")[0]; std::string kernel_layout = node.GetAttr>("kernel_layout")[0]; - std::string out_layout = ""; - if (node.HasAttr("out_layout")) { - if (node.GetAttr>("out_layout")[0] != "") { - out_layout = node.GetAttr>("out_layout")[0]; - } else { - out_layout = data_layout; - } - } // Check layout. if (layout_dict.find(data_layout) == layout_dict.end() || @@ -376,7 +368,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto conv_src_md = dnnl::memory::desc(src_dims, dt::f32, layout_dict[data_layout]); auto conv_weights_md = dnnl::memory::desc(weights_dims, dt::f32, layout_dict[kernel_layout]); auto conv_bias_md = dnnl::memory::desc(bias_dims, dt::f32, tag::any); - auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, layout_dict[out_layout]); + auto conv_dst_md = dnnl::memory::desc(dst_dims, dt::f32, tag::any); // Covn2d description. auto conv_desc = @@ -390,37 +382,37 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dilates_dims, padding_dims_l, padding_dims_r); // Enable elementwise post-ops. - auto conv2d_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_); + auto conv_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, attr, engine_); // Push to the network. - auto conv = dnnl::convolution_forward(conv2d_prim_desc); + auto conv = dnnl::convolution_forward(conv_prim_desc); net_.push_back(conv); // Data memory. - auto conv2d_src_memory = BindDNNLMemory(data_entry, conv_src_md); + auto conv_src_memory = BindDNNLMemory(data_entry, conv_src_md); // Weight memory. - auto conv2d_weights_memory = BindDNNLMemory(weight_entry, conv_weights_md); + auto conv_weights_memory = BindDNNLMemory(weight_entry, conv_weights_md); // Output memory. - auto conv2d_dst_memory = BindDNNLMemory(out_entry, conv2d_prim_desc.dst_desc()); + auto conv_dst_memory = BindDNNLMemory(out_entry, conv_prim_desc.dst_desc()); // Bias memory. - auto conv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_); + auto conv_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_); if (has_bias) { auto bias_entry = node.GetInputs()[2]; - BindDNNLMemory(bias_entry, conv2d_bias_memory); + BindDNNLMemory(bias_entry, conv_bias_memory); // Bind memory buffers. - net_args_.push_back({{DNNL_ARG_SRC, conv2d_src_memory}, - {DNNL_ARG_WEIGHTS, conv2d_weights_memory}, - {DNNL_ARG_BIAS, conv2d_bias_memory}, - {DNNL_ARG_DST, conv2d_dst_memory}}); + net_args_.push_back({{DNNL_ARG_SRC, conv_src_memory}, + {DNNL_ARG_WEIGHTS, conv_weights_memory}, + {DNNL_ARG_BIAS, conv_bias_memory}, + {DNNL_ARG_DST, conv_dst_memory}}); } else { // Bind memory buffers. - net_args_.push_back({{DNNL_ARG_SRC, conv2d_src_memory}, - {DNNL_ARG_WEIGHTS, conv2d_weights_memory}, - {DNNL_ARG_DST, conv2d_dst_memory}}); + net_args_.push_back({{DNNL_ARG_SRC, conv_src_memory}, + {DNNL_ARG_WEIGHTS, conv_weights_memory}, + {DNNL_ARG_DST, conv_dst_memory}}); } } @@ -513,38 +505,38 @@ class DNNLJSONRuntime : public JSONRuntimeBase { padding_dims_l, padding_dims_r); // Enable elementwise post-ops. - auto deconv2d_prim_desc = + auto deconv_prim_desc = dnnl::deconvolution_forward::primitive_desc(deconv_desc, attr, engine_); // Push to the network. - auto deconv = dnnl::deconvolution_forward(deconv2d_prim_desc); + auto deconv = dnnl::deconvolution_forward(deconv_prim_desc); net_.push_back(deconv); // Data memory. - auto deconv2d_src_memory = BindDNNLMemory(data_entry, deconv_src_md); + auto deconv_src_memory = BindDNNLMemory(data_entry, deconv_src_md); // Weight memory. - auto deconv2d_weights_memory = BindDNNLMemory(weight_entry, deconv_weights_md); + auto deconv_weights_memory = BindDNNLMemory(weight_entry, deconv_weights_md); // Output memory. - auto deconv2d_dst_memory = BindDNNLMemory(out_entry, deconv2d_prim_desc.dst_desc()); + auto deconv_dst_memory = BindDNNLMemory(out_entry, deconv_prim_desc.dst_desc()); // Bias memory. - auto deconv2d_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_); + auto deconv_bias_memory = dnnl::memory({bias_dims, dt::f32, tag::x}, engine_); if (has_bias) { auto bias_entry = node.GetInputs()[2]; - BindDNNLMemory(bias_entry, deconv2d_bias_memory); + BindDNNLMemory(bias_entry, deconv_bias_memory); // Bind memory buffers. - net_args_.push_back({{DNNL_ARG_SRC, deconv2d_src_memory}, - {DNNL_ARG_WEIGHTS, deconv2d_weights_memory}, - {DNNL_ARG_BIAS, deconv2d_bias_memory}, - {DNNL_ARG_DST, deconv2d_dst_memory}}); + net_args_.push_back({{DNNL_ARG_SRC, deconv_src_memory}, + {DNNL_ARG_WEIGHTS, deconv_weights_memory}, + {DNNL_ARG_BIAS, deconv_bias_memory}, + {DNNL_ARG_DST, deconv_dst_memory}}); } else { // Bind memory buffers. - net_args_.push_back({{DNNL_ARG_SRC, deconv2d_src_memory}, - {DNNL_ARG_WEIGHTS, deconv2d_weights_memory}, - {DNNL_ARG_DST, deconv2d_dst_memory}}); + net_args_.push_back({{DNNL_ARG_SRC, deconv_src_memory}, + {DNNL_ARG_WEIGHTS, deconv_weights_memory}, + {DNNL_ARG_DST, deconv_dst_memory}}); } } From 3a06dcfac6dd2324e6ff78506db9338ca9591c99 Mon Sep 17 00:00:00 2001 From: Ivy Date: Fri, 25 Feb 2022 13:25:51 +0800 Subject: [PATCH 08/15] simplify query_layout --- python/tvm/relay/op/contrib/dnnl.py | 21 +++-- .../backend/contrib/dnnl/query_layout.cc | 77 +++++++++++-------- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 32 ++++---- 3 files changed, 68 insertions(+), 62 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 926618efac37..5142eb91046d 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -211,13 +211,13 @@ def pattern_table(): return dnnl_patterns -def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, strides, dilates, groups): +def get_optimal_layout_for_conv(weight_shape, out_shape, paddings, strides, dilates, groups): """Get the optimal layout of dnnl, given shape of conv2d. Parameters ---------- - input_size, weight_shape, out_shape, paddings, strides, dilates, groups : Int, String - Input argument. + weight_shape, out_shape, paddings, strides, dilates, groups : Int, String + Input argument. Returns ------- @@ -225,7 +225,6 @@ def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, s The result. """ return _ffi_api.get_optimal_layout_for_conv( - input_size, weight_shape, out_shape, paddings, @@ -236,13 +235,13 @@ def get_optimal_layout_for_conv(input_size, weight_shape, out_shape, paddings, s def get_optimal_layout_for_conv_transpose( - input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups + weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups ): """Get the optimal layout of dnnl, given shape of tranposed conv2d. Parameters ---------- - input_size, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups + weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups : Int, String Input argument. @@ -252,7 +251,6 @@ def get_optimal_layout_for_conv_transpose( The result. """ return _ffi_api.get_optimal_layout_for_conv_transpose( - input_size, weight_shape, out_shape, paddings, @@ -282,15 +280,15 @@ def get_shape(tensor): def tag2layout(input_data, is_weight=False, conv_type="Conv1D"): """Transfer layout, denoted with `a, b, c, d, e`, into valid layout (NCHW / OIHW) of TVM.""" - if conv_type == "Conv1D": + if "Conv1D" in conv_type: data_dic = {"a": "N", "b": "C", "c": "W"} weight_dic = {"a": "O", "b": "I", "c": "W", "d": "G"} - elif conv_type == "Conv2D": + elif "Conv2D" in conv_type: data_dic = {"a": "N", "b": "C", "c": "H", "d": "W"} weight_dic = {"a": "O", "b": "I", "c": "H", "d": "W"} if "e" in input_data: weight_dic = {"a": "G", "b": "O", "c": "I", "d": "H", "e": "W"} - elif conv_type == "Conv3D": + elif "Conv3D" in conv_type: data_dic = {"a": "N", "b": "C", "c": "D", "d": "H", "e": "W"} weight_dic = {"a": "O", "b": "I", "c": "D", "d": "H", "e": "W", "f": "G"} @@ -343,7 +341,7 @@ def alter_conv(attrs, inputs, tinfos, out_type): conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv( - len(get_shape(out_type)), weight_shape, out_shape, paddings, strides, dilates, groups + weight_shape, out_shape, paddings, strides, dilates, groups ) src_df, weight_df, dst_df = res.split(",") new_attrs["data_layout"] = tag2layout(src_df, is_weight=False, conv_type=conv_type) @@ -375,7 +373,6 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv_transpose( - len(get_shape(out_type)), weight_shape, out_shape, paddings, diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc index 047c72a4453b..2e1ccac9601f 100755 --- a/src/relay/backend/contrib/dnnl/query_layout.cc +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -146,24 +146,33 @@ std::string md2fmt_tag_str(const dnnl::memory::desc* md) { return s; } -dnnl::memory::dims str2dims(std::string str_shape, int input_size) { - std::string str_reg = "(\\d*)"; - for (int i = 0; i < input_size - 1; i++) { - str_reg.append(",(\\d*)"); +dnnl::memory::dims str2dims(const std::string& str_shape, + bool dilates = false, + std::string interval = ",") { + // Split strings + std::vector str_dims; + size_t pos = 0, start = 0; + while ((pos = str_shape.find(interval, start)) != std::string::npos) { + std::string str_dim = str_shape.substr(start, pos - start); + if (pos > start) str_dims.push_back(str_dim); + start = pos + interval.size(); } - std::regex rex(str_reg); - std::smatch m; + if (str_shape.size() > start) { + str_dims.push_back(str_shape.substr(start)); + } + // transfer string to dims dnnl::memory::dims out_dims; - if (std::regex_search(str_shape, m, rex)) { - std::transform(m.begin() + 1, m.end(), std::back_inserter(out_dims), - [](const std::string& str) { return std::stoi(str); }); + if (dilates) { + std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), + [](const std::string& str) { return std::stoi(str) - 1; }); } else { - LOG(FATAL) << "Unsupported shape for querying optimal dnnl layout: " << str_shape; + std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), + [](const std::string& str) { return std::stoi(str); }); } return out_dims; } -std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape, +std::string get_optimal_layout_for_conv(std::string weight_shape, std::string out_shape, std::string paddings, std::string strides, std::string dilates, std::string G) { dnnl::engine eng(dnnl::engine::kind::cpu, 0); @@ -172,35 +181,36 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape using dt = dnnl::memory::data_type; dnnl::memory::dim groups = std::stoi(G); - dnnl::memory::dims weight_dims_ = str2dims(weight_shape, input_size); + dnnl::memory::dims weight_dims_ = str2dims(weight_shape); dnnl::memory::dims weight_dims = weight_dims_; + if (groups > 1) { if (weight_dims_.size() == 5) { - weight_dims = {weight_dims_[0] * weight_dims_[1], weight_dims_[2], weight_dims_[3], + weight_dims = {groups * weight_dims_[1], groups * weight_dims_[2], weight_dims_[3], weight_dims_[4]}; } else { weight_dims[1] = weight_dims[1] * groups; } } - dnnl::memory::dims out_dims = str2dims(out_shape, input_size); - dnnl::memory::dims padding_dims = str2dims(paddings, 2 * (input_size - 2)); + + dnnl::memory::dims out_dims = str2dims(out_shape); + dnnl::memory::dims padding_dims = str2dims(paddings); dnnl::memory::dims padding_dims_l(padding_dims.begin(), padding_dims.begin() + padding_dims.size() / 2); dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, padding_dims.end()); - dnnl::memory::dims strides_dims = str2dims(strides, input_size - 2); - dnnl::memory::dims dilates_dims = str2dims(dilates, input_size - 2); + dnnl::memory::dims strides_dims = str2dims(strides); + dnnl::memory::dims dilates_dims = str2dims(dilates, true); dnnl::memory::dims input_dims = out_dims; input_dims[1] = weight_dims[1]; - for (int i = 2; i < input_size; i++) { + for (int i = 2; i < out_dims.size(); i++) { dnnl::memory::dim K = weight_dims[i]; dnnl::memory::dim S = strides_dims[i - 2]; - dnnl::memory::dim D = dilates_dims[i - 2] - 1; + dnnl::memory::dim D = dilates_dims[i - 2]; dnnl::memory::dim PL = padding_dims_l[i - 2]; dnnl::memory::dim PR = padding_dims_r[i - 2]; dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); - dilates_dims[i - 2] = D; input_dims[i] = out_dims[i] * S - PL - PR + DK - 1; } @@ -210,6 +220,7 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape conv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; conv_weights_dims.insert(conv_weights_dims.end(), weight_dims.begin() + 2, weight_dims.end()); } + dnnl::memory::dims conv_dst_dims = out_dims; dnnl::memory::dims conv_strides = strides_dims; dnnl::memory::dims conv_dilates = dilates_dims; @@ -238,7 +249,7 @@ std::string get_optimal_layout_for_conv(int input_size, std::string weight_shape return res; } -std::string get_optimal_layout_for_conv_transpose(int input_size, std::string weight_shape, +std::string get_optimal_layout_for_conv_transpose(std::string weight_shape, std::string out_shape, std::string paddings, std::string output_paddings, std::string strides, std::string dilates, std::string G) { @@ -248,25 +259,25 @@ std::string get_optimal_layout_for_conv_transpose(int input_size, std::string we using dt = dnnl::memory::data_type; dnnl::memory::dim groups = std::stoi(G); - dnnl::memory::dims weight_dims_ = str2dims(weight_shape, input_size); + dnnl::memory::dims weight_dims_ = str2dims(weight_shape); dnnl::memory::dims weight_dims = weight_dims_; if (groups > 1) { if (weight_dims_.size() == 5) { - weight_dims = {weight_dims_[0] * weight_dims_[1], weight_dims_[2], weight_dims_[3], + weight_dims = {groups * weight_dims_[1], groups * weight_dims_[2], weight_dims_[3], weight_dims_[4]}; } else { weight_dims[1] = weight_dims[1] * groups; } } - dnnl::memory::dims out_dims = str2dims(out_shape, input_size); - dnnl::memory::dims padding_dims = str2dims(paddings, 2 * (input_size - 2)); + dnnl::memory::dims out_dims = str2dims(out_shape); + dnnl::memory::dims padding_dims = str2dims(paddings); dnnl::memory::dims padding_dims_l(padding_dims.begin(), padding_dims.begin() + padding_dims.size() / 2); dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, padding_dims.end()); - dnnl::memory::dims output_padding_dims = str2dims(output_paddings, input_size - 2); - dnnl::memory::dims strides_dims = str2dims(strides, input_size - 2); - dnnl::memory::dims dilates_dims = str2dims(dilates, input_size - 2); + dnnl::memory::dims output_padding_dims = str2dims(output_paddings); + dnnl::memory::dims strides_dims = str2dims(strides); + dnnl::memory::dims dilates_dims = str2dims(dilates, true); dnnl::memory::dims input_dims = out_dims; if (out_dims[1] == weight_dims[0]) { @@ -275,15 +286,14 @@ std::string get_optimal_layout_for_conv_transpose(int input_size, std::string we input_dims[1] = weight_dims[0]; std::swap(weight_dims[0], weight_dims[1]); } - for (int i = 2; i < input_size; i++) { + for (int i = 2; i < out_dims.size(); i++) { dnnl::memory::dim K = weight_dims[i]; dnnl::memory::dim S = strides_dims[i - 2]; - dnnl::memory::dim D = dilates_dims[i - 2] - 1; + dnnl::memory::dim D = dilates_dims[i - 2]; dnnl::memory::dim PL = padding_dims_l[i - 2]; dnnl::memory::dim PR = padding_dims_r[i - 2]; dnnl::memory::dim OP = output_padding_dims[i - 2]; dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); - dilates_dims[i - 2] = D; input_dims[i] = (out_dims[i] - DK + PL + PR - OP) / S + 1; } @@ -325,14 +335,13 @@ std::string get_optimal_layout_for_conv_transpose(int input_size, std::string we TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5], - args[6]); + *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5]); }); TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv_transpose") .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = get_optimal_layout_for_conv_transpose(args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7]); + args[5], args[6]); }); } // namespace contrib diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 84f42703afb4..26144c4f8f8f 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -216,9 +216,9 @@ class DNNLJSONRuntime : public JSONRuntimeBase { return out_dims; } - dnnl::memory::dims TransformStr2Dims(std::vector strs, std::string str_name) { + dnnl::memory::dims TransformStr2Dims(std::vector strs, bool dilates = false) { dnnl::memory::dims out_dims; - if (str_name == "dilates") { + if (dilates) { std::transform(strs.begin(), strs.end(), std::back_inserter(out_dims), [](const std::string& str) { return std::stoi(str) - 1; }); } else { @@ -338,10 +338,10 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::memory::dims src_dims = TransDims2Plain(input_shape, data_layout); dnnl::memory::dims weights_dims_ = TransDims2Plain(weight_shape, kernel_layout); dnnl::memory::dims bias_dims = {channels}; - dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); - dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, "dilates"); - dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l, "padding"); - dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r, "padding"); + dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides); + dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, true); + dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l); + dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r); dnnl::memory::dims dst_dims = src_dims; dst_dims[1] = channels; weights_dims_[0] = channels; @@ -463,11 +463,11 @@ class DNNLJSONRuntime : public JSONRuntimeBase { } } dnnl::memory::dims bias_dims = {channels}; - dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); - dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, "dilates"); - dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l, "padding"); - dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r, "padding"); - dnnl::memory::dims out_padding = TransformStr2Dims(str_out_padding, "padding"); + dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides); + dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, true); + dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l); + dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r); + dnnl::memory::dims out_padding = TransformStr2Dims(str_out_padding); dnnl::memory::dims dst_dims = src_dims; dst_dims[1] = channels; for (int i = 2; i < src_dims.size(); i++) { @@ -675,11 +675,11 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::memory::dims src_dims = TransDims2Plain(input_shape, layout); dnnl::memory::dims dst_dims = TransDims2Plain(out_shape, layout); - dnnl::memory::dims kernel_dims = TransformStr2Dims(str_kernel, "kernel"); - dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides, "strides"); - dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, "dilates"); - dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l, "padding"); - dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r, "padding"); + dnnl::memory::dims kernel_dims = TransformStr2Dims(str_kernel); + dnnl::memory::dims strides_dims = TransformStr2Dims(str_strides); + dnnl::memory::dims dilates_dims = TransformStr2Dims(str_dilates, true); + dnnl::memory::dims padding_dims_l = TransformStr2Dims(str_padding_l); + dnnl::memory::dims padding_dims_r = TransformStr2Dims(str_padding_r); // Memory descriptions. auto pool_src_md = dnnl::memory::desc(src_dims, dt::f32, layout_dict[layout]); From e0c78cc0126c08596accbc342713e198a26aaa43 Mon Sep 17 00:00:00 2001 From: Ivy Date: Fri, 25 Feb 2022 14:43:49 +0800 Subject: [PATCH 09/15] add checkes for query_layout --- python/tvm/relay/op/contrib/dnnl.py | 20 +++++--- .../backend/contrib/dnnl/query_layout.cc | 47 +++++++++++++++---- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 5142eb91046d..06dd3c031064 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -211,13 +211,14 @@ def pattern_table(): return dnnl_patterns -def get_optimal_layout_for_conv(weight_shape, out_shape, paddings, strides, dilates, groups): +def get_optimal_layout_for_conv(data_layout, kernel_layout, weight_shape, + out_shape, paddings, strides, dilates, groups): """Get the optimal layout of dnnl, given shape of conv2d. Parameters ---------- - weight_shape, out_shape, paddings, strides, dilates, groups : Int, String - Input argument. + data_layout, kernel_layout,weight_shape, out_shape, paddings, strides, dilates, groups :String + Input argument. Returns ------- @@ -225,6 +226,8 @@ def get_optimal_layout_for_conv(weight_shape, out_shape, paddings, strides, dila The result. """ return _ffi_api.get_optimal_layout_for_conv( + data_layout, + kernel_layout, weight_shape, out_shape, paddings, @@ -235,13 +238,13 @@ def get_optimal_layout_for_conv(weight_shape, out_shape, paddings, strides, dila def get_optimal_layout_for_conv_transpose( - weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups + data_layout, kernel_layout, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups ): """Get the optimal layout of dnnl, given shape of tranposed conv2d. Parameters ---------- - weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups + data_layout, kernel_layout, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups : Int, String Input argument. @@ -251,6 +254,8 @@ def get_optimal_layout_for_conv_transpose( The result. """ return _ffi_api.get_optimal_layout_for_conv_transpose( + data_layout, + kernel_layout, weight_shape, out_shape, paddings, @@ -341,7 +346,8 @@ def alter_conv(attrs, inputs, tinfos, out_type): conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv( - weight_shape, out_shape, paddings, strides, dilates, groups + attrs["data_layout"], attrs["kernel_layout"], weight_shape, out_shape, paddings, + strides, dilates, groups, ) src_df, weight_df, dst_df = res.split(",") new_attrs["data_layout"] = tag2layout(src_df, is_weight=False, conv_type=conv_type) @@ -373,6 +379,8 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv_transpose( + attrs["data_layout"], + attrs["kernel_layout"], weight_shape, out_shape, paddings, diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc index 2e1ccac9601f..b1a392bfca1f 100755 --- a/src/relay/backend/contrib/dnnl/query_layout.cc +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -172,9 +172,32 @@ dnnl::memory::dims str2dims(const std::string& str_shape, return out_dims; } -std::string get_optimal_layout_for_conv(std::string weight_shape, - std::string out_shape, std::string paddings, - std::string strides, std::string dilates, std::string G) { +void check_shapes(const std::vector shapes) { + std::regex valid_pat("(\\d*)(,(\\d*))*"); + bool checked = std::regex_match(shapes[0], valid_pat); + for (size_t i = 1; i < shapes.size()-1; i++) { + checked &= std::regex_match(shapes[i], valid_pat); + } + checked &= std::regex_match(shapes[shapes.size()-1], std::regex("\\d*")); + if (!checked) { + LOG(FATAL) << "Invalid input args for query dnnl optimal layout."; + } +} + +void check_layout(bool var, bool ref) { + if (var != ref) { + LOG(FATAL) << "Invalid input layout for query dnnl optimal layout."; + } +} + +std::string get_optimal_layout_for_conv(std::string data_layout, std::string kernel_layout, + std::string weight_shape, std::string out_shape, + std::string paddings, std::string strides, + std::string dilates, std::string G) { + check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); + check_layout(std::regex_match(kernel_layout, std::regex("(G?)OI(D?)(H?)W")), true); + check_shapes({weight_shape, out_shape, paddings, strides, dilates, G}); + dnnl::engine eng(dnnl::engine::kind::cpu, 0); dnnl::stream s(eng); using tag = dnnl::memory::format_tag; @@ -249,10 +272,15 @@ std::string get_optimal_layout_for_conv(std::string weight_shape, return res; } -std::string get_optimal_layout_for_conv_transpose(std::string weight_shape, - std::string out_shape, std::string paddings, - std::string output_paddings, std::string strides, - std::string dilates, std::string G) { +std::string get_optimal_layout_for_conv_transpose(std::string data_layout, std::string kernel_layout, + std::string weight_shape, std::string out_shape, + std::string paddings, std::string output_paddings, + std::string strides, std::string dilates, + std::string G) { + check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); + check_layout(std::regex_match(kernel_layout, std::regex("(G?)((IO)|(OI))(D?)(H?)W")), true); + check_shapes({weight_shape, out_shape, paddings, output_paddings, strides, dilates, G}); + dnnl::engine eng(dnnl::engine::kind::cpu, 0); dnnl::stream s(eng); using tag = dnnl::memory::format_tag; @@ -335,13 +363,14 @@ std::string get_optimal_layout_for_conv_transpose(std::string weight_shape, TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv") .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5]); + *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5], + args[6], args[7]); }); TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv_transpose") .set_body([](TVMArgs args, TVMRetValue* rv) { *rv = get_optimal_layout_for_conv_transpose(args[0], args[1], args[2], args[3], args[4], - args[5], args[6]); + args[5], args[6], args[7], args[8]); }); } // namespace contrib From a9314dfac35989a3735e12e419de91989ce17db7 Mon Sep 17 00:00:00 2001 From: Ivy Date: Wed, 2 Mar 2022 09:31:49 +0800 Subject: [PATCH 10/15] fix lint --- python/tvm/relay/op/contrib/dnnl.py | 82 ++++++++++--------- .../backend/contrib/dnnl/query_layout.cc | 20 ++--- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 8 +- 3 files changed, 58 insertions(+), 52 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index 06dd3c031064..c51de23c19aa 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -38,6 +38,7 @@ from tvm import relay from tvm.relay import transform from tvm.relay.build_module import bind_params_by_name +from tvm.relay.testing.temp_op_attr import TempOpAttr from ... import _ffi_api from ...dataflow_pattern import wildcard, is_op @@ -211,14 +212,16 @@ def pattern_table(): return dnnl_patterns -def get_optimal_layout_for_conv(data_layout, kernel_layout, weight_shape, - out_shape, paddings, strides, dilates, groups): +def get_optimal_layout_for_conv( + data_layout, kernel_layout, weight_shape, out_shape, paddings, strides, dilates, groups +): """Get the optimal layout of dnnl, given shape of conv2d. Parameters ---------- - data_layout, kernel_layout,weight_shape, out_shape, paddings, strides, dilates, groups :String - Input argument. + data_layout, kernel_layout,weight_shape, out_shape, paddings, strides, dilates, groups + : String + Input argument. Returns ------- @@ -238,13 +241,22 @@ def get_optimal_layout_for_conv(data_layout, kernel_layout, weight_shape, def get_optimal_layout_for_conv_transpose( - data_layout, kernel_layout, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups + data_layout, + kernel_layout, + weight_shape, + out_shape, + paddings, + output_paddings, + strides, + dilates, + groups, ): """Get the optimal layout of dnnl, given shape of tranposed conv2d. Parameters ---------- - data_layout, kernel_layout, weight_shape, out_shape, paddings, output_paddings, strides, dilates, groups + data_layout, kernel_layout, weight_shape, out_shape, paddings, output_paddings, strides, + dilates, groups : Int, String Input argument. @@ -255,7 +267,7 @@ def get_optimal_layout_for_conv_transpose( """ return _ffi_api.get_optimal_layout_for_conv_transpose( data_layout, - kernel_layout, + kernel_layout, weight_shape, out_shape, paddings, @@ -270,16 +282,15 @@ def get_shape(tensor): """Get tensor's shape.""" if isinstance(tensor, relay.expr.Var): return tensor.type_annotation.concrete_shape - elif isinstance(tensor, relay.expr.Constant): + if isinstance(tensor, relay.expr.Constant): return tensor.data.shape - elif isinstance(tensor, tvm.ir.tensor_type.TensorType): + if isinstance(tensor, tvm.ir.tensor_type.TensorType): return tensor.concrete_shape - elif isinstance(tensor, tvm.ir.container.Array): + if isinstance(tensor, tvm.ir.container.Array): return tensor[-1].shape - elif isinstance(tensor, relay.expr.Call): + if isinstance(tensor, relay.expr.Call): return tensor.checked_type.shape - else: - raise TypeError("Unsupport data type: %s" % type(tensor)) + raise TypeError("Unsupport data type: %s" % type(tensor)) def tag2layout(input_data, is_weight=False, conv_type="Conv1D"): @@ -318,18 +329,19 @@ def legalize_group_conv(attrs, inputs, types): """Legalize group conv / conv_transpose calculation. Alter weight layout from OIHW to GOIHW / IOHW to GIOHW""" groups = attrs.groups - if groups == 1: - return data, weight = inputs + if groups == 1: + if "Transpose" not in type(attrs).__name__: + return relay.nn.conv2d(data, weight, **attrs) + return relay.nn.conv2d_transpose(data, weight, **attrs) OC, IC, H, W = get_shape(weight) new_attrs = dict(attrs) weight = relay.reshape(weight, (groups, OC // groups, IC, H, W)) if "Transpose" not in type(attrs).__name__: new_attrs["kernel_layout"] = "GOIHW" return relay.nn.conv2d(data, weight, **new_attrs) - else: - new_attrs["kernel_layout"] = "GIOHW" - return relay.nn.conv2d_transpose(data, weight, **new_attrs) + new_attrs["kernel_layout"] = "GIOHW" + return relay.nn.conv2d_transpose(data, weight, **new_attrs) def alter_conv(attrs, inputs, tinfos, out_type): @@ -346,22 +358,25 @@ def alter_conv(attrs, inputs, tinfos, out_type): conv_type = type(attrs).__name__.split("Attrs")[0] res = get_optimal_layout_for_conv( - attrs["data_layout"], attrs["kernel_layout"], weight_shape, out_shape, paddings, - strides, dilates, groups, + attrs["data_layout"], + attrs["kernel_layout"], + weight_shape, + out_shape, + paddings, + strides, + dilates, + groups, ) src_df, weight_df, dst_df = res.split(",") new_attrs["data_layout"] = tag2layout(src_df, is_weight=False, conv_type=conv_type) - new_attrs["kernel_layout"] = tag2layout( - weight_df, is_weight=True, conv_type=conv_type - ) + new_attrs["kernel_layout"] = tag2layout(weight_df, is_weight=True, conv_type=conv_type) new_attrs["out_layout"] = tag2layout(dst_df, is_weight=False, conv_type=conv_type) if conv_type == "Conv1D": return relay.nn.conv1d(data, weight, **new_attrs) - elif conv_type == "Conv2D": + if conv_type == "Conv2D": return relay.nn.conv2d(data, weight, **new_attrs) - elif conv_type == "Conv3D": - return relay.nn.conv3d(data, weight, **new_attrs) + return relay.nn.conv3d(data, weight, **new_attrs) def alter_conv_transpose(attrs, inputs, tinfos, out_type): @@ -380,7 +395,7 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): res = get_optimal_layout_for_conv_transpose( attrs["data_layout"], - attrs["kernel_layout"], + attrs["kernel_layout"], weight_shape, out_shape, paddings, @@ -391,17 +406,14 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): ) src_df, weight_df, dst_df = res.split(",") new_attrs["data_layout"] = tag2layout(src_df, is_weight=False, conv_type=conv_type) - new_attrs["kernel_layout"] = tag2layout( - weight_df, is_weight=True, conv_type=conv_type - ) + new_attrs["kernel_layout"] = tag2layout(weight_df, is_weight=True, conv_type=conv_type) new_attrs["out_layout"] = tag2layout(dst_df, is_weight=False, conv_type=conv_type) if conv_type == "Conv1DTranspose": return relay.nn.conv1d_transpose(data, weight, **new_attrs) - elif conv_type == "Conv2DTranspose": + if conv_type == "Conv2DTranspose": return relay.nn.conv2d_transpose(data, weight, **new_attrs) - elif conv_type == "Conv3DTranspose": - return relay.nn.conv3d_transpose(data, weight, **new_attrs) + return relay.nn.conv3d_transpose(data, weight, **new_attrs) def partition_for_dnnl(mod, params=None, alter_layout=True): @@ -418,10 +430,8 @@ def partition_for_dnnl(mod, params=None, alter_layout=True): mod : Module Annotated and partitioned module. """ - if params: mod["main"] = bind_params_by_name(mod["main"], params) - from tvm.relay.testing.temp_op_attr import TempOpAttr with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_group_conv): with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_conv): @@ -443,8 +453,6 @@ def partition_for_dnnl(mod, params=None, alter_layout=True): with tvm.transform.PassContext(opt_level=3): mod = seq(mod) if alter_layout: - from tvm.relay.testing.temp_op_attr import TempOpAttr - with TempOpAttr("nn.conv1d", "FTVMAlterOpLayout", alter_conv): with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv): with TempOpAttr("nn.conv3d", "FTVMAlterOpLayout", alter_conv): diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc index b1a392bfca1f..7fb1d824c702 100755 --- a/src/relay/backend/contrib/dnnl/query_layout.cc +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -146,8 +146,7 @@ std::string md2fmt_tag_str(const dnnl::memory::desc* md) { return s; } -dnnl::memory::dims str2dims(const std::string& str_shape, - bool dilates = false, +dnnl::memory::dims str2dims(const std::string& str_shape, bool dilates = false, std::string interval = ",") { // Split strings std::vector str_dims; @@ -164,10 +163,10 @@ dnnl::memory::dims str2dims(const std::string& str_shape, dnnl::memory::dims out_dims; if (dilates) { std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), - [](const std::string& str) { return std::stoi(str) - 1; }); + [](const std::string& str) { return std::stoi(str) - 1; }); } else { std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), - [](const std::string& str) { return std::stoi(str); }); + [](const std::string& str) { return std::stoi(str); }); } return out_dims; } @@ -175,10 +174,10 @@ dnnl::memory::dims str2dims(const std::string& str_shape, void check_shapes(const std::vector shapes) { std::regex valid_pat("(\\d*)(,(\\d*))*"); bool checked = std::regex_match(shapes[0], valid_pat); - for (size_t i = 1; i < shapes.size()-1; i++) { + for (size_t i = 1; i < shapes.size() - 1; i++) { checked &= std::regex_match(shapes[i], valid_pat); } - checked &= std::regex_match(shapes[shapes.size()-1], std::regex("\\d*")); + checked &= std::regex_match(shapes[shapes.size() - 1], std::regex("\\d*")); if (!checked) { LOG(FATAL) << "Invalid input args for query dnnl optimal layout."; } @@ -193,7 +192,7 @@ void check_layout(bool var, bool ref) { std::string get_optimal_layout_for_conv(std::string data_layout, std::string kernel_layout, std::string weight_shape, std::string out_shape, std::string paddings, std::string strides, - std::string dilates, std::string G) { + std::string dilates, std::string G) { check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); check_layout(std::regex_match(kernel_layout, std::regex("(G?)OI(D?)(H?)W")), true); check_shapes({weight_shape, out_shape, paddings, strides, dilates, G}); @@ -227,7 +226,7 @@ std::string get_optimal_layout_for_conv(std::string data_layout, std::string ker dnnl::memory::dims input_dims = out_dims; input_dims[1] = weight_dims[1]; - for (int i = 2; i < out_dims.size(); i++) { + for (size_t i = 2; i < out_dims.size(); i++) { dnnl::memory::dim K = weight_dims[i]; dnnl::memory::dim S = strides_dims[i - 2]; dnnl::memory::dim D = dilates_dims[i - 2]; @@ -272,7 +271,8 @@ std::string get_optimal_layout_for_conv(std::string data_layout, std::string ker return res; } -std::string get_optimal_layout_for_conv_transpose(std::string data_layout, std::string kernel_layout, +std::string get_optimal_layout_for_conv_transpose(std::string data_layout, + std::string kernel_layout, std::string weight_shape, std::string out_shape, std::string paddings, std::string output_paddings, std::string strides, std::string dilates, @@ -314,7 +314,7 @@ std::string get_optimal_layout_for_conv_transpose(std::string data_layout, std:: input_dims[1] = weight_dims[0]; std::swap(weight_dims[0], weight_dims[1]); } - for (int i = 2; i < out_dims.size(); i++) { + for (size_t i = 2; i < out_dims.size(); i++) { dnnl::memory::dim K = weight_dims[i]; dnnl::memory::dim S = strides_dims[i - 2]; dnnl::memory::dim D = dilates_dims[i - 2]; diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 26144c4f8f8f..a38603666dd5 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -143,7 +143,6 @@ class DNNLJSONRuntime : public JSONRuntimeBase { {"OHWI32o", tag::Ohwi32o}, {"OHWI48o", tag::Ohwi48o}, {"OHWI64o", tag::Ohwi64o}, - {"HWIOG16g", tag::hwioG16g}, {"GOIHW8g", tag::Goihw8g}, {"GOIHW16g", tag::Goihw16g}, {"NCDHW16c", tag::nCdhw16c}, @@ -345,7 +344,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::memory::dims dst_dims = src_dims; dst_dims[1] = channels; weights_dims_[0] = channels; - for (int i = 2; i < src_dims.size(); i++) { + for (size_t i = 2; i < src_dims.size(); i++) { dnnl::memory::dim K = weights_dims_[i]; dnnl::memory::dim S = strides_dims[i - 2]; dnnl::memory::dim D = dilates_dims[i - 2]; @@ -470,7 +469,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { dnnl::memory::dims out_padding = TransformStr2Dims(str_out_padding); dnnl::memory::dims dst_dims = src_dims; dst_dims[1] = channels; - for (int i = 2; i < src_dims.size(); i++) { + for (size_t i = 2; i < src_dims.size(); i++) { dnnl::memory::dim K = weights_dims_[i]; dnnl::memory::dim S = strides_dims[i - 2]; dnnl::memory::dim D = dilates_dims[i - 2]; @@ -505,8 +504,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { padding_dims_l, padding_dims_r); // Enable elementwise post-ops. - auto deconv_prim_desc = - dnnl::deconvolution_forward::primitive_desc(deconv_desc, attr, engine_); + auto deconv_prim_desc = dnnl::deconvolution_forward::primitive_desc(deconv_desc, attr, engine_); // Push to the network. auto deconv = dnnl::deconvolution_forward(deconv_prim_desc); From 838a2ea96bee7294f51b20360f7438271d84cf75 Mon Sep 17 00:00:00 2001 From: Ivy Date: Wed, 2 Mar 2022 16:15:42 +0800 Subject: [PATCH 11/15] move partition_for_dnnl from dnnl.py to test_dnnl.py --- python/tvm/relay/op/contrib/dnnl.py | 71 --------------------------- tests/python/contrib/test_dnnl.py | 75 ++++++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 73 deletions(-) diff --git a/python/tvm/relay/op/contrib/dnnl.py b/python/tvm/relay/op/contrib/dnnl.py index c51de23c19aa..2bcb2b0ef7f8 100644 --- a/python/tvm/relay/op/contrib/dnnl.py +++ b/python/tvm/relay/op/contrib/dnnl.py @@ -36,9 +36,6 @@ import tvm.ir from tvm import relay -from tvm.relay import transform -from tvm.relay.build_module import bind_params_by_name -from tvm.relay.testing.temp_op_attr import TempOpAttr from ... import _ffi_api from ...dataflow_pattern import wildcard, is_op @@ -414,71 +411,3 @@ def alter_conv_transpose(attrs, inputs, tinfos, out_type): if conv_type == "Conv2DTranspose": return relay.nn.conv2d_transpose(data, weight, **new_attrs) return relay.nn.conv3d_transpose(data, weight, **new_attrs) - - -def partition_for_dnnl(mod, params=None, alter_layout=True): - """Partition the graph greedily offloading supported operators to DNNL. - - Parameters - ---------- - mod : Module - The module to run passes on. - params : Optional[Dict[str, NDArray]] - Constant input parameters. - Returns - ------- - mod : Module - Annotated and partitioned module. - """ - if params: - mod["main"] = bind_params_by_name(mod["main"], params) - - with TempOpAttr("nn.conv2d", "FTVMLegalize", legalize_group_conv): - with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", legalize_group_conv): - seq = tvm.transform.Sequential( - [ - transform.CanonicalizeOps(), - transform.InferType(), - transform.SimplifyInference(), - transform.FoldConstant(), - transform.FoldScaleAxis(), - # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` - transform.SimplifyExpr(), - transform.FoldConstant(), - # alter group conv /conv_transpose layout to `GOIHW` / `GIOHW` - transform.Legalize(), - transform.FoldConstant(), - ] - ) - with tvm.transform.PassContext(opt_level=3): - mod = seq(mod) - if alter_layout: - with TempOpAttr("nn.conv1d", "FTVMAlterOpLayout", alter_conv): - with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", alter_conv): - with TempOpAttr("nn.conv3d", "FTVMAlterOpLayout", alter_conv): - with TempOpAttr( - "nn.conv2d_transpose", "FTVMAlterOpLayout", alter_conv_transpose - ): - with TempOpAttr( - "nn.conv3d_transpose", "FTVMAlterOpLayout", alter_conv_transpose - ): - alter_layout_seq = tvm.transform.Sequential( - [ - transform.AlterOpLayout(), - transform.FoldConstant(), - ] - ) - with tvm.transform.PassContext(opt_level=3): - mod = alter_layout_seq(mod) - - byoc_seq = tvm.transform.Sequential( - [ - transform.MergeComposite(pattern_table()), - transform.AnnotateTarget("dnnl"), - transform.MergeCompilerRegions(), - transform.PartitionGraph(), - ] - ) - with tvm.transform.PassContext(opt_level=3): - mod = byoc_seq(mod) - return mod diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py index 3476ba853ccb..e8046884697e 100755 --- a/tests/python/contrib/test_dnnl.py +++ b/tests/python/contrib/test_dnnl.py @@ -21,6 +21,9 @@ import tvm from tvm import relay +from tvm.relay import transform +from tvm.relay.build_module import bind_params_by_name +from tvm.relay.testing.temp_op_attr import TempOpAttr from tvm.relay.op.contrib import dnnl import tvm.testing @@ -36,6 +39,74 @@ ) +def partition_for_dnnl(mod, params=None, alter_layout=True): + """Partition the graph greedily offloading supported operators to DNNL. + + Parameters + ---------- + mod : Module + The module to run passes on. + params : Optional[Dict[str, NDArray]] + Constant input parameters. + Returns + ------- + mod : Module + Annotated and partitioned module. + """ + if params: + mod["main"] = bind_params_by_name(mod["main"], params) + + with TempOpAttr("nn.conv2d", "FTVMLegalize", dnnl.legalize_group_conv): + with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize", dnnl.legalize_group_conv): + seq = tvm.transform.Sequential( + [ + transform.CanonicalizeOps(), + transform.InferType(), + transform.SimplifyInference(), + transform.FoldConstant(), + transform.FoldScaleAxis(), + # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu` + transform.SimplifyExpr(), + transform.FoldConstant(), + # alter group conv /conv_transpose layout to `GOIHW` / `GIOHW` + transform.Legalize(), + transform.FoldConstant(), + ] + ) + with tvm.transform.PassContext(opt_level=3): + mod = seq(mod) + if alter_layout: + with TempOpAttr("nn.conv1d", "FTVMAlterOpLayout", dnnl.alter_conv): + with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", dnnl.alter_conv): + with TempOpAttr("nn.conv3d", "FTVMAlterOpLayout", dnnl.alter_conv): + with TempOpAttr( + "nn.conv2d_transpose", "FTVMAlterOpLayout", dnnl.alter_conv_transpose + ): + with TempOpAttr( + "nn.conv3d_transpose", "FTVMAlterOpLayout", dnnl.alter_conv_transpose + ): + alter_layout_seq = tvm.transform.Sequential( + [ + transform.AlterOpLayout(), + transform.FoldConstant(), + ] + ) + with tvm.transform.PassContext(opt_level=3): + mod = alter_layout_seq(mod) + + byoc_seq = tvm.transform.Sequential( + [ + transform.MergeComposite(dnnl.pattern_table()), + transform.AnnotateTarget("dnnl"), + transform.MergeCompilerRegions(), + transform.PartitionGraph(), + ] + ) + with tvm.transform.PassContext(opt_level=3): + mod = byoc_seq(mod) + return mod + + def vmobj_to_list(o): if isinstance(o, tvm.nd.NDArray): return [o.numpy()] @@ -66,7 +137,7 @@ def check_dnnl_used(mod): for use_dnnl, alter_layout in [(False, False), (True, False), (True, True)]: result_key = mode + ("_dnnl" if use_dnnl else "") + ("_layout" if alter_layout else "") if use_dnnl: - processed_mod = dnnl.partition_for_dnnl(mod, params, alter_layout) + processed_mod = partition_for_dnnl(mod, params, alter_layout) check_dnnl_used(processed_mod) else: processed_mod = mod @@ -470,7 +541,7 @@ def test_dnnl_not_compatible(run_module, target="llvm", dtype="float32"): f = relay.Function([x], out) mod = tvm.IRModule() mod["main"] = f - mod = dnnl.partition_for_dnnl(mod) + mod = partition_for_dnnl(mod) for mode in ["graph", "vm"]: with tvm.transform.PassContext(opt_level=3): func = relay.create_executor(mode, mod=mod, device=tvm.cpu(0), target=target).evaluate() From 4e50729329d1ade47cce86e9fc0cf55edff15214 Mon Sep 17 00:00:00 2001 From: Ivy Date: Thu, 3 Mar 2022 09:56:04 +0800 Subject: [PATCH 12/15] remove unnecessary model test --- tests/python/contrib/test_dnnl.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py index e8046884697e..fb48e05c4d80 100755 --- a/tests/python/contrib/test_dnnl.py +++ b/tests/python/contrib/test_dnnl.py @@ -14,7 +14,6 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from gluoncv.model_zoo import get_model import pytest import itertools import numpy as np @@ -936,23 +935,6 @@ def get_graph( run_and_verify_func(get_graph(relay.nn.max_pool3d, strides=(1, 1, 1)), run_module=run_module) -def run_and_verify_model( - model, run_module, input_shape=(1, 3, 224, 224), target="llvm", dtype="float32" -): - i_data = np.random.uniform(-1, 1, input_shape).astype(dtype) - block = get_model(model, pretrained=True) - mod, params = relay.frontend.from_mxnet(block, shape={"data": input_shape}, dtype=dtype) - run_and_verify(mod, i_data, params, target=target, run_module=run_module) - - -@pytest.mark.skip(reason="takes a long time for this test ") -def test_model(run_module, dtype="float32"): - run_and_verify_model("ResNet50_v1b", run_module, dtype=dtype) - run_and_verify_model("VGG11_bn", run_module, dtype=dtype) - run_and_verify_model("InceptionV3", run_module, input_shape=(1, 3, 300, 300), dtype=dtype) - run_and_verify_model("MobileNet1.0", run_module, dtype=dtype) - - if __name__ == "__main__": import sys From 329dcab79692201079fff2c1d8d94d399dfb99d9 Mon Sep 17 00:00:00 2001 From: Ivy Date: Thu, 3 Mar 2022 14:26:08 +0800 Subject: [PATCH 13/15] add more dnnl layout --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index a38603666dd5..2bd486cb8671 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -126,13 +126,16 @@ class DNNLJSONRuntime : public JSONRuntimeBase { {"GIODHW", tag::giodhw}, // Blocking layout. + {"NCW8c", tag::nCw8c}, {"NCW16c", tag::nCw16c}, {"OIW16i16o", tag::OIw16i16o}, + {"OWI8o", tag::Owi8o}, {"OWI16o", tag::Owi16o}, {"NCHW4c", tag::nChw4c}, {"NCHW8c", tag::nChw8c}, {"NCHW16c", tag::nChw16c}, {"OIHW8i8o", tag::OIhw8i8o}, + {"IOHW8i8o", tag::any}, {"OIHW16i16o", tag::OIhw16i16o}, {"IOHW16i16o", tag::IOhw16i16o}, {"GOIHW4i4o", tag::gOIhw4i4o}, @@ -145,9 +148,12 @@ class DNNLJSONRuntime : public JSONRuntimeBase { {"OHWI64o", tag::Ohwi64o}, {"GOIHW8g", tag::Goihw8g}, {"GOIHW16g", tag::Goihw16g}, + {"NCDHW8c", tag::nCdhw8c}, {"NCDHW16c", tag::nCdhw16c}, {"OIDHW16i16o", tag::OIdhw16i16o}, {"IODHW16i16o", tag::IOdhw16i16o}, + {"OIDHW8i8o", tag::OIdhw8i8o}, + {"IODHW8i8o", tag::any}, {"ODHWI16o", tag::Odhwi16o}, }; @@ -391,7 +397,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto conv_src_memory = BindDNNLMemory(data_entry, conv_src_md); // Weight memory. - auto conv_weights_memory = BindDNNLMemory(weight_entry, conv_weights_md); + auto conv_weights_memory = BindDNNLMemory(weight_entry, conv_prim_desc.weights_desc()); // Output memory. auto conv_dst_memory = BindDNNLMemory(out_entry, conv_prim_desc.dst_desc()); @@ -448,7 +454,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Check layout. if (layout_dict.find(data_layout) == layout_dict.end() || layout_dict.find(kernel_layout) == layout_dict.end()) { - LOG(FATAL) << "Unsupported layout: " << data_layout << " " << kernel_layout; + LOG(FATAL) << "Unsupported layout for deconv: " << data_layout << " " << kernel_layout; } // Memory shapes. @@ -514,7 +520,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { auto deconv_src_memory = BindDNNLMemory(data_entry, deconv_src_md); // Weight memory. - auto deconv_weights_memory = BindDNNLMemory(weight_entry, deconv_weights_md); + auto deconv_weights_memory = BindDNNLMemory(weight_entry, deconv_prim_desc.weights_desc()); // Output memory. auto deconv_dst_memory = BindDNNLMemory(out_entry, deconv_prim_desc.dst_desc()); From 0357162f92cd2ad70afc91ccf8acefa94f3282de Mon Sep 17 00:00:00 2001 From: Ivy Date: Fri, 4 Mar 2022 15:14:02 +0800 Subject: [PATCH 14/15] rename flag in convolution.cc --- src/relay/op/nn/convolution.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc index 1d883cfb5b14..ec447d916ec3 100644 --- a/src/relay/op/nn/convolution.cc +++ b/src/relay/op/nn/convolution.cc @@ -192,10 +192,10 @@ bool Conv2DRel(const Array& types, int num_inputs, const Attrs& attrs, const Layout in_layout(param->data_layout); const Layout kernel_layout(param->kernel_layout); - bool is_group = false; + bool is_dnnl_group_conv = false; if (param->groups > 1 && kernel_layout.name().find("G") != std::string::npos) { kOIHW = Layout("GOIHW"); - is_group = true; + is_dnnl_group_conv = true; } const auto trans_in_layout = tir::BijectiveLayout(in_layout, kNCHW); @@ -250,7 +250,7 @@ bool Conv2DRel(const Array& types, int num_inputs, const Attrs& attrs, ICHECK_EQ(param->dilation.size(), 2); Array wshape; - if (is_group) { + if (is_dnnl_group_conv) { // infer weight's shape for group convolution wshape = {{param->groups, indexdiv(param->channels, param->groups), indexdiv(dshape_nchw[1], param->groups), param->kernel_size[0], @@ -752,10 +752,10 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a const Layout in_layout(param->data_layout); const Layout kernel_layout(param->kernel_layout); - bool is_group = false; + bool is_dnnl_group_conv = false; if (param->groups > 1 && kernel_layout.name().find("G") != std::string::npos) { kIOHW = Layout("GIOHW"); - is_group = true; + is_dnnl_group_conv = true; } const auto trans_in_layout = tir::BijectiveLayout(in_layout, kNCHW); @@ -784,7 +784,7 @@ bool Conv2DTransposeRel(const Array& types, int num_inputs, const Attrs& a ICHECK_EQ(param->dilation.size(), 2); Array wshape; - if (is_group) { + if (is_dnnl_group_conv) { // infer weight's shape for group convolution wshape = {{param->groups, indexdiv(dshape_nchw[1], param->groups), indexdiv(param->channels, param->groups), param->kernel_size[0], From 0b39995e5cd1a20f1d8d5f216642939f90e1d998 Mon Sep 17 00:00:00 2001 From: Ivy Date: Mon, 7 Mar 2022 10:06:10 +0800 Subject: [PATCH 15/15] enhance dnnl layout --- src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc index 2bd486cb8671..706780614279 100644 --- a/src/runtime/contrib/dnnl/dnnl_json_runtime.cc +++ b/src/runtime/contrib/dnnl/dnnl_json_runtime.cc @@ -128,6 +128,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { // Blocking layout. {"NCW8c", tag::nCw8c}, {"NCW16c", tag::nCw16c}, + {"OIW16i16o", tag::OIw8i8o}, {"OIW16i16o", tag::OIw16i16o}, {"OWI8o", tag::Owi8o}, {"OWI16o", tag::Owi16o}, @@ -154,6 +155,7 @@ class DNNLJSONRuntime : public JSONRuntimeBase { {"IODHW16i16o", tag::IOdhw16i16o}, {"OIDHW8i8o", tag::OIdhw8i8o}, {"IODHW8i8o", tag::any}, + {"ODHWI8o", tag::Odhwi8o}, {"ODHWI16o", tag::Odhwi16o}, }; @@ -334,9 +336,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { std::string kernel_layout = node.GetAttr>("kernel_layout")[0]; // Check layout. - if (layout_dict.find(data_layout) == layout_dict.end() || - layout_dict.find(kernel_layout) == layout_dict.end()) { - LOG(FATAL) << "Unsupported layout for conv: " << data_layout << " " << kernel_layout; + if (layout_dict.find(data_layout) == layout_dict.end()) { + LOG(FATAL) << "Unsupported data layout for conv: " << data_layout; + } + + if (layout_dict.find(kernel_layout) == layout_dict.end()) { + layout_dict.insert({kernel_layout, tag::any}); + LOG(WARNING) << "Unregistered kernel layout for conv: " << data_layout + << ", transfer to tag::any"; } // Memory shapes. @@ -452,9 +459,14 @@ class DNNLJSONRuntime : public JSONRuntimeBase { std::string kernel_layout = node.GetAttr>("kernel_layout")[0]; // Check layout. - if (layout_dict.find(data_layout) == layout_dict.end() || - layout_dict.find(kernel_layout) == layout_dict.end()) { - LOG(FATAL) << "Unsupported layout for deconv: " << data_layout << " " << kernel_layout; + if (layout_dict.find(data_layout) == layout_dict.end()) { + LOG(FATAL) << "Unsupported data layout for deconv: " << data_layout; + } + + if (layout_dict.find(kernel_layout) == layout_dict.end()) { + layout_dict.insert({kernel_layout, tag::any}); + LOG(WARNING) << "Unregistered kernel layout for deconv: " << data_layout + << ", transfer to tag::any"; } // Memory shapes.