diff --git a/python/tvm/relay/op/contrib/clml.py b/python/tvm/relay/op/contrib/clml.py index d253544d45d9..c3d4eb84700d 100644 --- a/python/tvm/relay/op/contrib/clml.py +++ b/python/tvm/relay/op/contrib/clml.py @@ -137,11 +137,12 @@ def conv_pattern(): pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant())) pattern = pattern.optional(lambda x: is_op("add")(x, is_constant())) pattern = pattern.optional( - lambda x: is_op("nn.batch_norm")( - x, is_constant(), is_constant(), is_constant(), is_constant() + lambda x: is_tuple_get_item( + is_op("nn.batch_norm")( + x, is_constant(), is_constant(), is_constant(), is_constant() + ) ) ) - pattern = pattern.optional(is_tuple_get_item) pattern = pattern.optional(is_op("nn.relu")) pattern = pattern.optional(is_op("clip")) return pattern @@ -176,12 +177,13 @@ def dense_pattern(): def pad_pattern(): """Create a pad pattern.""" - pattern = is_op("nn.pad")(wildcard(), wildcard()) + pattern = is_op("nn.pad")(wildcard(), is_constant()) return pattern def check_conv(extract): """Check conv pattern is supported by CLML.""" call = extract + clip_found = False if isinstance(call, tvm.relay.expr.TupleGetItem): call = call.tuple_value elif call.op.name == "nn.relu": @@ -189,6 +191,7 @@ def check_conv(extract): if isinstance(call, tvm.relay.expr.TupleGetItem): call = call.tuple_value elif call.op.name == "clip": + clip_found = True if call.attrs["a_min"] != 0.0 or call.attrs["a_max"] != 6.0: return False call = call.args[0] @@ -200,6 +203,14 @@ def check_conv(extract): attrs, args = call.attrs, call.args if attrs.data_layout != "NCHW": return False + if ( + (not clip_found) + and (attrs.kernel_size[0] == 3) + and (attrs.dilation[0] != 1) + and (attrs.groups != 1) + and (attrs.channels == attrs.groups) + ): + return False data_typ = args[0].checked_type kernel_typ = args[1].checked_type is_depthwise = is_depthwise_conv2d( @@ -213,12 +224,44 @@ def check_conv(extract): return False return True + def check_binary_op(extract): + call = extract + if len(call.args[1].checked_type.shape) > 0: + return True + return False + + def check_pad_op(extract): + call = extract + if len(call.attrs["pad_width"]) != 4: + return False + return True + + def check_softmax_op(extract): + call = extract + if len(call.args[0].checked_type.shape) > 2: + return False + return True + + def check_default_op(extract): + return True + return [ ("clml.conv2d", conv_pattern(), check_conv), - ("clml.dense", dense_pattern()), - ("clml.pad", pad_pattern()), - ("clml.concat", concat_pattern()), - ("clml.batch_norm", batch_norm_pattern()), + ("clml.dense", dense_pattern(), check_default_op), + ("clml.pad", pad_pattern(), check_pad_op), + ("clml.concat", concat_pattern(), check_default_op), + ("clml.batch_norm", batch_norm_pattern(), check_default_op), + ("clml.add", is_op("add")(wildcard(), wildcard()), check_binary_op), + ("clml.subtract", is_op("subtract")(wildcard(), wildcard()), check_binary_op), + ("clml.multiply", is_op("multiply")(wildcard(), wildcard()), check_binary_op), + ("clml.softmax", is_op("nn.softmax")(wildcard()), check_softmax_op), + ("clml.reshape", is_op("reshape")(wildcard()), check_default_op), + ("clml.avg_pool2d", is_op("nn.avg_pool2d")(wildcard()), check_default_op), + ("clml.max_pool2d", is_op("nn.max_pool2d")(wildcard()), check_default_op), + ("clml.global_avg_pool2d", is_op("nn.global_avg_pool2d")(wildcard()), check_default_op), + ("clml.global_max_pool2d", is_op("nn.global_max_pool2d")(wildcard()), check_default_op), + ("clml.relu", is_op("nn.relu")(wildcard()), check_default_op), + ("clml.clip", is_op("clip")(wildcard()), check_default_op), ] @@ -230,17 +273,6 @@ def _func_wrapper(expr): return _func_wrapper -_register_external_op_helper("clip") -_register_external_op_helper("nn.relu") -_register_external_op_helper("nn.global_avg_pool2d") -_register_external_op_helper("nn.global_max_pool2d") -_register_external_op_helper("nn.avg_pool2d") -_register_external_op_helper("nn.max_pool2d") -_register_external_op_helper("nn.softmax") -_register_external_op_helper("reshape") -_register_external_op_helper("add") -_register_external_op_helper("subtract") -_register_external_op_helper("multiply") _register_external_op_helper("minimum") _register_external_op_helper("maximum") diff --git a/src/relay/backend/contrib/clml/codegen.cc b/src/relay/backend/contrib/clml/codegen.cc index b89f05e17857..9ecec0c4531f 100644 --- a/src/relay/backend/contrib/clml/codegen.cc +++ b/src/relay/backend/contrib/clml/codegen.cc @@ -94,7 +94,7 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer { } else if (name == "clml.concat") { json_node = CreateConcatJSONNode(cn); } else { - LOG(FATAL) << "Unrecognized CLML pattern: " << name; + json_node = CreateGenericJSONNode(cn); } return AddNode(json_node, GetRef(cn)); } @@ -164,7 +164,7 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer { nodes.bn = current_call; current_call = current_call->args[0].as(); } - if (backend::IsOp(current_call, "add")) { + if (backend::IsOp(current_call, "add") || backend::IsOp(current_call, "nn.bias_add")) { nodes.bias = current_call; current_call = current_call->args[0].as(); } @@ -387,6 +387,28 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer { return json_node; } + + std::shared_ptr CreateGenericJSONNode(const CallNode* cn) { + const auto* fn = cn->op.as(); + ICHECK(fn); + const auto* node = fn->body.as(); + + const auto* node_op = node->op.as(); + ICHECK(node_op); + const std::string name = node_op->name; + + std::vector inputs; + unsigned int i = 0; + for (i = 0; i < cn->args.size(); i++) { + inputs.push_back(VisitExpr(cn->args[i])[0]); + } + for (unsigned int j = i; j < node->args.size(); j++) { + inputs.push_back(VisitExpr(node->args[j])[0]); + } + auto json_node = std::make_shared(name, "kernel", inputs, 1); + SetCallNodeAttribute(json_node, node); + return json_node; + } }; /*! diff --git a/src/relay/backend/contrib/clml/target.cc b/src/relay/backend/contrib/clml/target.cc new file mode 100644 index 000000000000..c7f22c1315c8 --- /dev/null +++ b/src/relay/backend/contrib/clml/target.cc @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/clml/target.cc + * \brief Registers the "clml" external codegen TargetKind. + */ + +#include + +namespace tvm { +namespace relay { +namespace contrib { + +/*! + * \brief This external codegen target can use the CLML library linked into the TVM runtime. + * - Patterns and custom compiler: python/tvm/relay/op/contrib/clml.py + * - Runtime: src/runtime/contrib/clml/clml_runtime.cc + */ +TVM_REGISTER_TARGET_KIND("clml", kDLOpenCL) + .set_attr(tvm::attr::kIsExternalCodegen, Bool(true)); + +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/src/runtime/contrib/clml/clml_runtime.cc b/src/runtime/contrib/clml/clml_runtime.cc index cdc3b9a7b51c..7492e521b7f5 100644 --- a/src/runtime/contrib/clml/clml_runtime.cc +++ b/src/runtime/contrib/clml/clml_runtime.cc @@ -1144,28 +1144,38 @@ class CLMLRuntime : public JSONRuntimeBase { CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype); auto wt_dims = get_tensor_dims(nodes_[node.GetInputs()[1].id_]); bool has_bias = node.GetInputs().size() == 3 ? true : false; - auto weight = MakeCLMLTensorFromJSONEntry(node.GetInputs()[1], {1, 1, wt_dims.n, wt_dims.c}, + auto weight = MakeCLMLTensorFromJSONEntry(node.GetInputs()[1], {wt_dims.n, wt_dims.c, 1, 1}, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype); + auto bias = std::make_shared(); if (has_bias) { auto bias_dims = get_tensor_dims(nodes_[node.GetInputs()[2].id_]); bias = MakeCLMLTensorFromJSONEntry(node.GetInputs()[2], {1, bias_dims.c, 1, 1}, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype); - } - - cl_ml_op_fully_connected_desc_qcom fc_desc = {1, CL_FC_WEIGHT_TRANSFORM_TRANSPOSE_QCOM, - cl_arithmetic_mode}; - auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype); - - if (has_bias) { - result = h_ClmlIntf->clCreateMLOpFullyConnectedQCOM( - workspace->context, 0, &fc_desc, input->tensor, weight->tensor, bias->tensor, - output->tensor, &op, tuning_cache); } else { - result = h_ClmlIntf->clCreateMLOpFullyConnectedQCOM(workspace->context, 0, &fc_desc, - input->tensor, weight->tensor, NULL, - output->tensor, &op, tuning_cache); + cl_ml_tensor_desc_qcom desc = {}; + desc.num_dimensions = CL_TENSOR_UNUSED_QCOM; + result = + h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor); + ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result; + bias->tensor = layer_.unusedTensor; } + // Output + auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype, nullptr, + {1, wt_dims.n, 1, 1}); + cl_ml_op_convolution_desc_qcom conv_desc = {CL_CONVOLUTION_MODE_CONVOLUTION_QCOM, + 1, + 4, + {0, 0}, + {0, 0}, + {1, 1}, + {1, 1}, + 0, + cl_arithmetic_mode}; + + result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM( + workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor, + output->tensor, &op, NULL); ICHECK(op && result == CL_SUCCESS) << "Fully Connected Error:" << result; layer->function.push_back(op);