diff --git a/python/tvm/relay/op/contrib/clml.py b/python/tvm/relay/op/contrib/clml.py
index d253544d45d9..c3d4eb84700d 100644
--- a/python/tvm/relay/op/contrib/clml.py
+++ b/python/tvm/relay/op/contrib/clml.py
@@ -137,11 +137,12 @@ def conv_pattern():
         pattern = pattern.optional(lambda x: is_op("nn.bias_add")(x, is_constant()))
         pattern = pattern.optional(lambda x: is_op("add")(x, is_constant()))
         pattern = pattern.optional(
-            lambda x: is_op("nn.batch_norm")(
-                x, is_constant(), is_constant(), is_constant(), is_constant()
+            lambda x: is_tuple_get_item(
+                is_op("nn.batch_norm")(
+                    x, is_constant(), is_constant(), is_constant(), is_constant()
+                )
             )
         )
-        pattern = pattern.optional(is_tuple_get_item)
         pattern = pattern.optional(is_op("nn.relu"))
         pattern = pattern.optional(is_op("clip"))
         return pattern
@@ -176,12 +177,13 @@ def dense_pattern():
 
     def pad_pattern():
         """Create a pad pattern."""
-        pattern = is_op("nn.pad")(wildcard(), wildcard())
+        pattern = is_op("nn.pad")(wildcard(), is_constant())
         return pattern
 
     def check_conv(extract):
         """Check conv pattern is supported by CLML."""
         call = extract
+        clip_found = False
         if isinstance(call, tvm.relay.expr.TupleGetItem):
             call = call.tuple_value
         elif call.op.name == "nn.relu":
@@ -189,6 +191,7 @@ def check_conv(extract):
             if isinstance(call, tvm.relay.expr.TupleGetItem):
                 call = call.tuple_value
         elif call.op.name == "clip":
+            clip_found = True
             if call.attrs["a_min"] != 0.0 or call.attrs["a_max"] != 6.0:
                 return False
             call = call.args[0]
@@ -200,6 +203,14 @@ def check_conv(extract):
         attrs, args = call.attrs, call.args
         if attrs.data_layout != "NCHW":
             return False
+        if (
+            (not clip_found)
+            and (attrs.kernel_size[0] == 3)
+            and (attrs.dilation[0] != 1)
+            and (attrs.groups != 1)
+            and (attrs.channels == attrs.groups)
+        ):
+            return False
         data_typ = args[0].checked_type
         kernel_typ = args[1].checked_type
         is_depthwise = is_depthwise_conv2d(
@@ -213,12 +224,44 @@ def check_conv(extract):
             return False
         return True
 
+    def check_binary_op(extract):
+        call = extract
+        if len(call.args[1].checked_type.shape) > 0:
+            return True
+        return False
+
+    def check_pad_op(extract):
+        call = extract
+        if len(call.attrs["pad_width"]) != 4:
+            return False
+        return True
+
+    def check_softmax_op(extract):
+        call = extract
+        if len(call.args[0].checked_type.shape) > 2:
+            return False
+        return True
+
+    def check_default_op(extract):
+        return True
+
     return [
         ("clml.conv2d", conv_pattern(), check_conv),
-        ("clml.dense", dense_pattern()),
-        ("clml.pad", pad_pattern()),
-        ("clml.concat", concat_pattern()),
-        ("clml.batch_norm", batch_norm_pattern()),
+        ("clml.dense", dense_pattern(), check_default_op),
+        ("clml.pad", pad_pattern(), check_pad_op),
+        ("clml.concat", concat_pattern(), check_default_op),
+        ("clml.batch_norm", batch_norm_pattern(), check_default_op),
+        ("clml.add", is_op("add")(wildcard(), wildcard()), check_binary_op),
+        ("clml.subtract", is_op("subtract")(wildcard(), wildcard()), check_binary_op),
+        ("clml.multiply", is_op("multiply")(wildcard(), wildcard()), check_binary_op),
+        ("clml.softmax", is_op("nn.softmax")(wildcard()), check_softmax_op),
+        ("clml.reshape", is_op("reshape")(wildcard()), check_default_op),
+        ("clml.avg_pool2d", is_op("nn.avg_pool2d")(wildcard()), check_default_op),
+        ("clml.max_pool2d", is_op("nn.max_pool2d")(wildcard()), check_default_op),
+        ("clml.global_avg_pool2d", is_op("nn.global_avg_pool2d")(wildcard()), check_default_op),
+        ("clml.global_max_pool2d", is_op("nn.global_max_pool2d")(wildcard()), check_default_op),
+        ("clml.relu", is_op("nn.relu")(wildcard()), check_default_op),
+        ("clml.clip", is_op("clip")(wildcard()), check_default_op),
     ]
 
 
@@ -230,17 +273,6 @@ def _func_wrapper(expr):
     return _func_wrapper
 
 
-_register_external_op_helper("clip")
-_register_external_op_helper("nn.relu")
-_register_external_op_helper("nn.global_avg_pool2d")
-_register_external_op_helper("nn.global_max_pool2d")
-_register_external_op_helper("nn.avg_pool2d")
-_register_external_op_helper("nn.max_pool2d")
-_register_external_op_helper("nn.softmax")
-_register_external_op_helper("reshape")
-_register_external_op_helper("add")
-_register_external_op_helper("subtract")
-_register_external_op_helper("multiply")
 _register_external_op_helper("minimum")
 _register_external_op_helper("maximum")
 
diff --git a/src/relay/backend/contrib/clml/codegen.cc b/src/relay/backend/contrib/clml/codegen.cc
index b89f05e17857..9ecec0c4531f 100644
--- a/src/relay/backend/contrib/clml/codegen.cc
+++ b/src/relay/backend/contrib/clml/codegen.cc
@@ -94,7 +94,7 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer {
     } else if (name == "clml.concat") {
       json_node = CreateConcatJSONNode(cn);
     } else {
-      LOG(FATAL) << "Unrecognized CLML  pattern: " << name;
+      json_node = CreateGenericJSONNode(cn);
     }
     return AddNode(json_node, GetRef<Expr>(cn));
   }
@@ -164,7 +164,7 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer {
       nodes.bn = current_call;
       current_call = current_call->args[0].as<CallNode>();
     }
-    if (backend::IsOp(current_call, "add")) {
+    if (backend::IsOp(current_call, "add") || backend::IsOp(current_call, "nn.bias_add")) {
       nodes.bias = current_call;
       current_call = current_call->args[0].as<CallNode>();
     }
@@ -387,6 +387,28 @@ class CLMLJSONSerializer : public backend::contrib::JSONSerializer {
 
     return json_node;
   }
+
+  std::shared_ptr<JSONGraphNode> CreateGenericJSONNode(const CallNode* cn) {
+    const auto* fn = cn->op.as<FunctionNode>();
+    ICHECK(fn);
+    const auto* node = fn->body.as<CallNode>();
+
+    const auto* node_op = node->op.as<OpNode>();
+    ICHECK(node_op);
+    const std::string name = node_op->name;
+
+    std::vector<JSONGraphNodeEntry> inputs;
+    unsigned int i = 0;
+    for (i = 0; i < cn->args.size(); i++) {
+      inputs.push_back(VisitExpr(cn->args[i])[0]);
+    }
+    for (unsigned int j = i; j < node->args.size(); j++) {
+      inputs.push_back(VisitExpr(node->args[j])[0]);
+    }
+    auto json_node = std::make_shared<JSONGraphNode>(name, "kernel", inputs, 1);
+    SetCallNodeAttribute(json_node, node);
+    return json_node;
+  }
 };
 
 /*!
diff --git a/src/relay/backend/contrib/clml/target.cc b/src/relay/backend/contrib/clml/target.cc
new file mode 100644
index 000000000000..c7f22c1315c8
--- /dev/null
+++ b/src/relay/backend/contrib/clml/target.cc
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/clml/target.cc
+ * \brief Registers the "clml" external codegen TargetKind.
+ */
+
+#include <tvm/target/target.h>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+
+/*!
+ * \brief This external codegen target can use the CLML library linked into the TVM runtime.
+ *  - Patterns and custom compiler: python/tvm/relay/op/contrib/clml.py
+ *  - Runtime: src/runtime/contrib/clml/clml_runtime.cc
+ */
+TVM_REGISTER_TARGET_KIND("clml", kDLOpenCL)
+    .set_attr<Bool>(tvm::attr::kIsExternalCodegen, Bool(true));
+
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/runtime/contrib/clml/clml_runtime.cc b/src/runtime/contrib/clml/clml_runtime.cc
index cdc3b9a7b51c..7492e521b7f5 100644
--- a/src/runtime/contrib/clml/clml_runtime.cc
+++ b/src/runtime/contrib/clml/clml_runtime.cc
@@ -1144,28 +1144,38 @@ class CLMLRuntime : public JSONRuntimeBase {
                                              CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
     auto wt_dims = get_tensor_dims(nodes_[node.GetInputs()[1].id_]);
     bool has_bias = node.GetInputs().size() == 3 ? true : false;
-    auto weight = MakeCLMLTensorFromJSONEntry(node.GetInputs()[1], {1, 1, wt_dims.n, wt_dims.c},
+    auto weight = MakeCLMLTensorFromJSONEntry(node.GetInputs()[1], {wt_dims.n, wt_dims.c, 1, 1},
                                               CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
+
     auto bias = std::make_shared<cl_ml_tensor_memory_desc_qcom>();
     if (has_bias) {
       auto bias_dims = get_tensor_dims(nodes_[node.GetInputs()[2].id_]);
       bias = MakeCLMLTensorFromJSONEntry(node.GetInputs()[2], {1, bias_dims.c, 1, 1},
                                          CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
-    }
-
-    cl_ml_op_fully_connected_desc_qcom fc_desc = {1, CL_FC_WEIGHT_TRANSFORM_TRANSPOSE_QCOM,
-                                                  cl_arithmetic_mode};
-    auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype);
-
-    if (has_bias) {
-      result = h_ClmlIntf->clCreateMLOpFullyConnectedQCOM(
-          workspace->context, 0, &fc_desc, input->tensor, weight->tensor, bias->tensor,
-          output->tensor, &op, tuning_cache);
     } else {
-      result = h_ClmlIntf->clCreateMLOpFullyConnectedQCOM(workspace->context, 0, &fc_desc,
-                                                          input->tensor, weight->tensor, NULL,
-                                                          output->tensor, &op, tuning_cache);
+      cl_ml_tensor_desc_qcom desc = {};
+      desc.num_dimensions = CL_TENSOR_UNUSED_QCOM;
+      result =
+          h_ClmlIntf->clCreateMLTensorQCOM(workspace->context, NULL, &desc, &layer_.unusedTensor);
+      ICHECK(layer_.unusedTensor && result == CL_SUCCESS) << "clCreateMLTensorQCOM:" << result;
+      bias->tensor = layer_.unusedTensor;
     }
+    // Output
+    auto output = MakeCLMLTensorFromJSONNode(node, CL_TENSOR_LAYOUT_OPTIMAL_QCOM, cl_dtype, nullptr,
+                                             {1, wt_dims.n, 1, 1});
+    cl_ml_op_convolution_desc_qcom conv_desc = {CL_CONVOLUTION_MODE_CONVOLUTION_QCOM,
+                                                1,
+                                                4,
+                                                {0, 0},
+                                                {0, 0},
+                                                {1, 1},
+                                                {1, 1},
+                                                0,
+                                                cl_arithmetic_mode};
+
+    result = h_ClmlIntf->clCreateMLOpConvolutionForwardQCOM(
+        workspace->context, 0, &conv_desc, input->tensor, weight->tensor, bias->tensor,
+        output->tensor, &op, NULL);
     ICHECK(op && result == CL_SUCCESS) << "Fully Connected Error:" << result;
 
     layer->function.push_back(op);