Skip to content
70 changes: 51 additions & 19 deletions python/tvm/relay/op/contrib/clml.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,11 +137,12 @@ def conv_pattern():
pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant()))
pattern = pattern.optional(lambda x: is_op("add")(x, is_constant()))
pattern = pattern.optional(
lambda x: is_op("nn.batch_norm")(
x, is_constant(), is_constant(), is_constant(), is_constant()
lambda x: is_tuple_get_item(
is_op("nn.batch_norm")(
x, is_constant(), is_constant(), is_constant(), is_constant()
)
)
)
pattern = pattern.optional(is_tuple_get_item)
pattern = pattern.optional(is_op("nn.relu"))
pattern = pattern.optional(is_op("clip"))
return pattern
Expand Down Expand Up @@ -176,19 +177,21 @@ def dense_pattern():

def pad_pattern():
"""Create a pad pattern."""
pattern = is_op("nn.pad")(wildcard(), wildcard())
pattern = is_op("nn.pad")(wildcard(), is_constant())
return pattern

def check_conv(extract):
"""Check conv pattern is supported by CLML."""
call = extract
clip_found = False
if isinstance(call, tvm.relay.expr.TupleGetItem):
call = call.tuple_value
elif call.op.name == "nn.relu":
call = call.args[0]
if isinstance(call, tvm.relay.expr.TupleGetItem):
call = call.tuple_value
elif call.op.name == "clip":
clip_found = True
if call.attrs["a_min"] != 0.0 or call.attrs["a_max"] != 6.0:
return False
call = call.args[0]
Expand All @@ -200,6 +203,14 @@ def check_conv(extract):
attrs, args = call.attrs, call.args
if attrs.data_layout != "NCHW":
return False
if (
(not clip_found)
and (attrs.kernel_size[0] == 3)
and (attrs.dilation[0] != 1)
and (attrs.groups != 1)
and (attrs.channels == attrs.groups)
):
return False
data_typ = args[0].checked_type
kernel_typ = args[1].checked_type
is_depthwise = is_depthwise_conv2d(
Expand All @@ -213,12 +224,44 @@ def check_conv(extract):
return False
return True

def check_binary_op(extract):
call = extract
if len(call.args[1].checked_type.shape) > 0:
return True
return False

def check_pad_op(extract):
call = extract
if len(call.attrs["pad_width"]) != 4:
return False
return True

def check_softmax_op(extract):
call = extract
if len(call.args[0].checked_type.shape) > 2:
return False
return True

def check_default_op(extract):
return True

return [
("clml.conv2d", conv_pattern(), check_conv),
("clml.dense", dense_pattern()),
("clml.pad", pad_pattern()),
("clml.concat", concat_pattern()),
("clml.batch_norm", batch_norm_pattern()),
("clml.dense", dense_pattern(), check_default_op),
("clml.pad", pad_pattern(), check_pad_op),
("clml.concat", concat_pattern(), check_default_op),
("clml.batch_norm", batch_norm_pattern(), check_default_op),
("clml.add", is_op("add")(wildcard(), wildcard()), check_binary_op),
("clml.subtract", is_op("subtract")(wildcard(), wildcard()), check_binary_op),
("clml.multiply", is_op("multiply")(wildcard(), wildcard()), check_binary_op),
("clml.softmax", is_op("nn.softmax")(wildcard()), check_softmax_op),
("clml.reshape", is_op("reshape")(wildcard()), check_default_op),
("clml.avg_pool2d", is_op("nn.avg_pool2d")(wildcard()), check_default_op),
("clml.max_pool2d", is_op("nn.max_pool2d")(wildcard()), check_default_op),
("clml.global_avg_pool2d", is_op("nn.global_avg_pool2d")(wildcard()), check_default_op),
("clml.global_max_pool2d", is_op("nn.global_max_pool2d")(wildcard()), check_default_op),
("clml.relu", is_op("nn.relu")(wildcard()), check_default_op),
("clml.clip", is_op("clip")(wildcard()), check_default_op),
]


Expand All @@ -230,17 +273,6 @@ def _func_wrapper(expr):
return _func_wrapper


_register_external_op_helper("clip")
_register_external_op_helper("nn.relu")
_register_external_op_helper("nn.global_avg_pool2d")
_register_external_op_helper("nn.global_max_pool2d")
_register_external_op_helper("nn.avg_pool2d")
_register_external_op_helper("nn.max_pool2d")
_register_external_op_helper("nn.softmax")
_register_external_op_helper("reshape")
_register_external_op_helper("add")
_register_external_op_helper("subtract")
_register_external_op_helper("multiply")
_register_external_op_helper("minimum")
_register_external_op_helper("maximum")

Expand Down
26 changes: 24 additions & 2 deletions src/relay/backend/contrib/clml/codegen.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer {
} else if (name == "clml.concat") {
json_node = CreateConcatJSONNode(cn);
} else {
LOG(FATAL) << "Unrecognized CLML pattern: " << name;
json_node = CreateGenericJSONNode(cn);
}
return AddNode(json_node, GetRef<Expr>(cn));
}
Expand Down Expand Up @@ -164,7 +164,7 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer {
nodes.bn = current_call;
current_call = current_call->args[0].as<CallNode>();
}
if (backend::IsOp(current_call, "add")) {
if (backend::IsOp(current_call, "add") || backend::IsOp(current_call, "nn.bias_add")) {
nodes.bias = current_call;
current_call = current_call->args[0].as<CallNode>();
}
Expand Down Expand Up @@ -387,6 +387,28 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer {

return json_node;
}

std::shared_ptr<JSONGraphNode> CreateGenericJSONNode(const CallNode* cn) {
const auto* fn = cn->op.as<FunctionNode>();
ICHECK(fn);
const auto* node = fn->body.as<CallNode>();

const auto* node_op = node->op.as<OpNode>();
ICHECK(node_op);
const std::string name = node_op->name;

std::vector<JSONGraphNodeEntry> inputs;
unsigned int i = 0;
for (i = 0; i < cn->args.size(); i++) {
inputs.push_back(VisitExpr(cn->args[i])[0]);
}
for (unsigned int j = i; j < node->args.size(); j++) {
inputs.push_back(VisitExpr(node->args[j])[0]);
}
auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
SetCallNodeAttribute(json_node, node);
return json_node;
}
};

/*!
Expand Down
41 changes: 41 additions & 0 deletions src/relay/backend/contrib/clml/target.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* \file src/relay/backend/contrib/clml/target.cc
* \brief Registers the "clml" external codegen TargetKind.
*/

#include <tvm/target/target.h>

namespace tvm {
namespace relay {
namespace contrib {

/*!
* \brief This external codegen target can use the CLML library linked into the TVM runtime.
* - Patterns and custom compiler: python/tvm/relay/op/contrib/clml.py
* - Runtime: src/runtime/contrib/clml/clml_runtime.cc
*/
TVM_REGISTER_TARGET_KIND("clml", kDLOpenCL)
.set_attr<Bool>(tvm::attr::kIsExternalCodegen, Bool(true));

} // namespace contrib
} // namespace relay
} // namespace tvm
38 changes: 24 additions & 14 deletions src/runtime/contrib/clml/clml_runtime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1144,28 +1144,38 @@ class CLMLRuntime : public JSONRuntimeBase {
CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
auto wt_dims = get_tensor_dims(nodes_[node.GetInputs()[1].id_]);
bool has_bias = node.GetInputs().size() == 3 ? true : false;
auto weight = MakeCLMLTensorFromJSONEntry(node.GetInputs()[1], {1, 1, wt_dims.n, wt_dims.c},
auto weight = MakeCLMLTensorFromJSONEntry(node.GetInputs()[1], {wt_dims.n, wt_dims.c, 1, 1},
CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);

auto bias = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
if (has_bias) {
auto bias_dims = get_tensor_dims(nodes_[node.GetInputs()[2].id_]);
bias = MakeCLMLTensorFromJSONEntry(node.GetInputs()[2], {1, bias_dims.c, 1, 1},
CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
}

cl_ml_op_fully_connected_desc_qcom fc_desc = {1, CL_FC_WEIGHT_TRANSFORM_TRANSPOSE_QCOM,
cl_arithmetic_mode};
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);

if (has_bias) {
result = h_ClmlIntf->clCreateMLOpFullyConnectedQCOM(
workspace->context, 0, &fc_desc, input->tensor, weight->tensor, bias->tensor,
output->tensor, &op, tuning_cache);
} else {
result = h_ClmlIntf->clCreateMLOpFullyConnectedQCOM(workspace->context, 0, &fc_desc,
input->tensor, weight->tensor, NULL,
output->tensor, &op, tuning_cache);
cl_ml_tensor_desc_qcom desc = {};
desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
result =
h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
bias->tensor = layer_.unusedTensor;
}
// Output
auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype, nullptr,
{1, wt_dims.n, 1, 1});
cl_ml_op_convolution_desc_qcom conv_desc = {CL_CONVOLUTION_MODE_CONVOLUTION_QCOM,
1,
4,
{0, 0},
{0, 0},
{1, 1},
{1, 1},
0,
cl_arithmetic_mode};

result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
output->tensor, &op, NULL);
ICHECK(op && result == CL_SUCCESS) << "Fully Connected Error:" << result;

layer->function.push_back(op);
Expand Down