From 37df763d4abd5d3a2e1d74a237322443b6fdf297 Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Wed, 12 Oct 2022 14:06:53 +0000 Subject: [PATCH 1/4] [ETHOSN] Inline non-compute-intensive partitions Adds a pass that analyzes functions partitioned for the NPU and inlines those that are deemed "non-compute-intensive" back to the main function so that they can be considered for other backends. The current heurisic for deciding a non-compute-intensive function is to collectively check all of the operations in the function have no multiply accumulate operations. This heuristic is not optimial; optimization is left for future exploration. This pass is inspired by the "IsComputeIntensiveGraph" pass in the TensorRT integration. Change-Id: I20c197702f5252f102cfc1e4b4635ab836aa7835 --- python/tvm/relay/op/contrib/ethosn.py | 55 ++++-- .../backend/contrib/ethosn/codegen_ethosn.h | 17 ++ .../contrib/ethosn/inline_partitions.cc | 120 +++++++++++++ .../contrib/test_ethosn/infrastructure.py | 28 ++- .../contrib/test_ethosn/test_addition.py | 2 +- .../contrib/test_ethosn/test_concatenate.py | 2 +- .../test_ethosn/test_inline_partitions.py | 167 ++++++++++++++++++ .../contrib/test_ethosn/test_leaky_relu.py | 2 +- .../contrib/test_ethosn/test_multiply.py | 4 +- tests/python/contrib/test_ethosn/test_relu.py | 2 +- .../contrib/test_ethosn/test_requantize.py | 4 +- .../contrib/test_ethosn/test_reshape.py | 6 +- .../python/contrib/test_ethosn/test_split.py | 4 +- tests/python/contrib/test_ethosn/test_tanh.py | 2 +- .../contrib/test_ethosn/test_topologies.py | 34 +++- 15 files changed, 418 insertions(+), 31 deletions(-) create mode 100644 src/relay/backend/contrib/ethosn/inline_partitions.cc create mode 100644 tests/python/contrib/test_ethosn/test_inline_partitions.py diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py index 80cc1ca3b202..5952d21e7a37 100644 --- a/python/tvm/relay/op/contrib/ethosn.py +++ b/python/tvm/relay/op/contrib/ethosn.py @@ -64,14 +64,42 @@ def ConvertEquivalents() -> tvm.ir.IRModule: # pylint: disable=invalid-name """Converts operations into a numerically equivalent form that can be understood by the NPU codegen. - Return - ------ + Returns + ------- Pass The module pass. """ return _ethosn.ConvertEquivalents() +def InlineNonComputeIntensivePartitions() -> tvm.ir.IRModule: # pylint: disable=invalid-name + """This pass checks whether functions partitioned for the NPU are considered + non-compute intensive. If they are not, they will be unpartitioned and passed onto + other backends to consider. + + A partitioned function is currently considered non-compute intensive if it contains + no multiply accumulate operations. + + Returns + ------- + Pass + The module pass. + """ + return _ethosn.InlineNonComputeIntensivePartitions() + + +def inline_non_compute_intensive_partitions() -> bool: + """ + Determine whether to inline none-compute-intensive partitions. + + Returns + ------- + True if inlining should happen, False if not. + """ + compiler_attrs = tvm.get_global_func("relay.ext.ethos-n.get_compiler_attrs")() + return compiler_attrs.inline_non_compute_intensive_partitions + + def partition_for_ethosn(mod, params=None, **opts): """Partition the graph greedily offloading supported operators to Arm Ethos-N NPU. @@ -112,17 +140,18 @@ def partition_for_ethosn(mod, params=None, **opts): if params: mod["main"] = bind_params_by_name(mod["main"], params) - seq = tvm.transform.Sequential( - [ - transform.InferType(), - transform.MergeComposite(pattern_table()), - transform.AnnotateTarget("ethos-n"), - transform.MergeCompilerRegions(), - transform.PartitionGraph(), - ConvertEquivalents(), - ] - ) - return seq(mod) + passes = [ + transform.InferType(), + transform.MergeComposite(pattern_table()), + transform.AnnotateTarget("ethos-n"), + transform.MergeCompilerRegions(), + transform.PartitionGraph(), + ConvertEquivalents(), + ] + if inline_non_compute_intensive_partitions(): + passes.append(InlineNonComputeIntensivePartitions()) + + return tvm.transform.Sequential(passes)(mod) @register_pattern_table("ethos-n") diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h index ab853599aa2d..c640db47b6dd 100644 --- a/src/relay/backend/contrib/ethosn/codegen_ethosn.h +++ b/src/relay/backend/contrib/ethosn/codegen_ethosn.h @@ -251,6 +251,7 @@ struct EthosnCompilerConfigNode : public tvm::AttrsNodeGetConfig("relay.ext.ethos-n.options"); + if (!cfg.defined()) { + cfg = AttrsWithDefaultValues(); + } + return cfg; +} +TVM_REGISTER_GLOBAL("relay.ext.ethos-n.get_compiler_attrs").set_body_typed(GetCompilerAttrs); + /*! \brief The compiler for Ethos-N functions */ class EthosnCompiler { public: diff --git a/src/relay/backend/contrib/ethosn/inline_partitions.cc b/src/relay/backend/contrib/ethosn/inline_partitions.cc new file mode 100644 index 000000000000..8dc7ebb1116f --- /dev/null +++ b/src/relay/backend/contrib/ethosn/inline_partitions.cc @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/ethosn/inline_partitions.cc + * \brief A pass to inline NPU partitions that are not considered compute + * intensive. + */ + +#include +#include + +#include "../../../transforms/compiler_function_utils.h" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +class IsComputeIntensivePartition : MixedModeVisitor { + public: + /*! + * \brief Check if the partitioned function is compute + * intensive. If it has not multiply-accumulate operations + * it is not considered compute intensive. + * + * \param expr The partitioned function to check. + */ + bool CheckSubgraph(const Expr& expr) { + is_compute_intensive = false; + VisitExpr(expr); + return is_compute_intensive; + } + + /*! + * \brief Visit the call nodes of a partitioned function + * and check if operators or composite functions make the + * partitioned function compute intensive. + * + * \param op The call node to check. + */ + void VisitExpr_(const CallNode* op) override { + Call call = GetRef(op); + std::string op_name = ""; + if (const auto* op = call->op.as()) { + op_name = op->name; + } else if (const auto* func = call->op.as()) { + op_name = func->GetAttr(attr::kComposite, "").value(); + } + + if (op_name != "") { + if (compute_intensive_operators.find(op_name) != compute_intensive_operators.end()) { + is_compute_intensive = true; + } + } + } + + private: + /*! \brief Whether or not the partitioned function is consdiered compute intensive. */ + bool is_compute_intensive; + /*! \brief A set of operators considered compute intensive. */ + const std::unordered_set compute_intensive_operators{ + "ethos-n.qnn_add", "ethos-n.qnn_conv2d", "ethos-n.qnn_conv2d_transpose", + "ethos-n.qnn_avg_pool2d", "ethos-n.qnn_sigmoid", "ethos-n.qnn_fc", + "ethos-n.qnn_mean", "ethos-n.qnn_resize", "nn.max_pool2d", + "nn.depth_to_space"}; +}; + +/*! + * \brief This pass checks whether functions partitioned for the NPU are considered + * non-compute intensive. If they are not, they will be unpartitioned and passed onto + * other backends to consider. + * + * A partitioned function is currently considered non-compute intensive if it contains + * no multiply accumulate operations. Note that this is not an optimal heuristic, + * however, it will not degrade performance. + */ +tvm::transform::Pass InlineNonComputeIntensivePartitions() { + runtime::TypedPackedFunc pass_func = + [=](IRModule mod, tvm::transform::PassContext ctx) { + auto analyzer = IsComputeIntensivePartition(); + Array gvs_to_inline; + for (auto gv : mod->GetGlobalVars()) { + Function func = Downcast(mod->Lookup(gv)); + auto compiler_name = func->GetAttr(attr::kCompiler); + if (compiler_name.defined() && compiler_name == "ethos-n") { + if (!analyzer.CheckSubgraph(func->body)) { + gvs_to_inline.push_back(gv); + } + } + } + return relay::transform::InlineCompilerFunctionsBoundTo(gvs_to_inline)(mod); + }; + return tvm::transform::CreateModulePass( + pass_func, 0, "relay.backend.contrib.ethos-n.InlineNonComputeIntensivePartitions", {}); +} + +TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.InlineNonComputeIntensivePartitions") + .set_body_typed(InlineNonComputeIntensivePartitions); + +} // namespace ethosn +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/tests/python/contrib/test_ethosn/infrastructure.py b/tests/python/contrib/test_ethosn/infrastructure.py index 6b019686968e..85ebd98efcff 100644 --- a/tests/python/contrib/test_ethosn/infrastructure.py +++ b/tests/python/contrib/test_ethosn/infrastructure.py @@ -143,7 +143,7 @@ def visit_call(self, call): return c.count -def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1): +def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1, optimize_partitions=True): """Build a network with or without Ethos-N offloading. Parameters @@ -158,10 +158,18 @@ def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1): The number of ops expected to remain on the host. npu_partitions : int, optional The number of Ethos-N partitions expected. + optimize_partitions : bool, optional + Disable the pass that optimizes NPU partitions post partitioning. """ relay.backend.te_compiler.get().clear() with tvm.transform.PassContext( - opt_level=3, config={"relay.ext.ethos-n.options": {"variant": get_ethosn_variant()}} + opt_level=3, + config={ + "relay.ext.ethos-n.options": { + "variant": get_ethosn_variant(), + "inline_non_compute_intensive_partitions": optimize_partitions, + } + }, ): with tvm.target.Target("llvm"): if npu: @@ -228,8 +236,20 @@ def run(lib, inputs, outputs, npu=True): return out -def build_and_run(mod, inputs, outputs, params, npu=True, expected_host_ops=0, npu_partitions=1): - lib = build(mod, params, npu, expected_host_ops, npu_partitions) +def build_and_run( + mod, + inputs, + outputs, + params, + npu=True, + expected_host_ops=0, + npu_partitions=1, + optimize_partitions=True, +): + """ + Convenient wrapper for building and running a module on the NPU. + """ + lib = build(mod, params, npu, expected_host_ops, npu_partitions, optimize_partitions) return run(lib, inputs, outputs, npu) diff --git a/tests/python/contrib/test_ethosn/test_addition.py b/tests/python/contrib/test_ethosn/test_addition.py index 11d8b8d1cd56..76bda5e0b7d4 100644 --- a/tests/python/contrib/test_ethosn/test_addition.py +++ b/tests/python/contrib/test_ethosn/test_addition.py @@ -227,7 +227,7 @@ def test_addition_to_reinterpret_quantize(lhs_shape, lhs_is_constant, rhs_shape, outputs = [] for npu in [False, True]: mod = tei.make_module(model, {}) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_concatenate.py b/tests/python/contrib/test_ethosn/test_concatenate.py index 0389b3c5b103..f8521b595060 100644 --- a/tests/python/contrib/test_ethosn/test_concatenate.py +++ b/tests/python/contrib/test_ethosn/test_concatenate.py @@ -76,7 +76,7 @@ def test_concatenate(dtype, shapes, axis): for npu in [False, True]: model = _get_model(shapes, dtype, axis) mod = tei.make_module(model, {}) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 0) diff --git a/tests/python/contrib/test_ethosn/test_inline_partitions.py b/tests/python/contrib/test_ethosn/test_inline_partitions.py new file mode 100644 index 000000000000..79c35fc5bcb2 --- /dev/null +++ b/tests/python/contrib/test_ethosn/test_inline_partitions.py @@ -0,0 +1,167 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Tests for the 'InlineNonComputeIntensivePartitions' pass. +""" + +import tvm +from tvm import relay +from tvm.testing import requires_ethosn +from tvm.relay.op.contrib.ethosn import InlineNonComputeIntensivePartitions + +from . import infrastructure as tei + + +def _assert_structural_equal(a, b): + """Check structural equality of two Relay expressions.""" + reason = ( + "Actual and expected relay functions are not equal. " + "InlineNonComputeIntensiveSubgraphs is not correctly " + "transforming the input graph." + ) + assert tvm.ir.structural_equal(a, b, map_free_vars=True), reason + + +@requires_ethosn +def test_single_reshape(): + """Check that a single reshape is inlined correctly.""" + + def get_reshape(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + return relay.reshape(x, newshape=(2, 2, 4)) + + def before(): + reshape = get_reshape() + return tei.make_ethosn_partition(reshape) + + def expected(): + reshape = get_reshape() + mod = tvm.IRModule.from_expr(reshape) + return relay.transform.InferType()(mod) + + mod = before() + mod = InlineNonComputeIntensivePartitions()(mod) + expected_mod = expected() + _assert_structural_equal(mod, expected_mod) + + +@requires_ethosn +def test_multiple_non_compute_intensive_ops(): + """ + Check that a partitioned function is correctly inlined + when it contains multiple non-compute intensive operations. + """ + + def get_graph(): + x = relay.var("x", shape=(2, 2, 4), dtype="int8") + x = relay.reshape(x, newshape=(1, 2, 2, 4)) + x = relay.clip(x, 0.0, 1.0) + x = relay.reshape(x, newshape=(2, 2, 4)) + return relay.clip(x, 0.0, 1.0) + + def before(): + func = get_graph() + return tei.make_ethosn_partition(func) + + def expected(): + func = get_graph() + mod = tvm.IRModule.from_expr(func) + return relay.transform.InferType()(mod) + + mod = before() + mod = InlineNonComputeIntensivePartitions()(mod) + expected_mod = expected() + _assert_structural_equal(mod, expected_mod) + + +@requires_ethosn +def test_compute_intensive_ops(): + """ + Check that a partitioned function that is considered + compute intensive is not inlined. + """ + + def before(): + x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + x = relay.nn.max_pool2d(x, layout="NHWC") + x = relay.reshape(x, newshape=(2, 2, 4)) + return tei.make_ethosn_partition(x) + + mod = before() + transformed_mod = InlineNonComputeIntensivePartitions()(mod) + for global_var in mod.get_global_vars(): + _assert_structural_equal(mod[global_var], transformed_mod[global_var]) + + +@requires_ethosn +def test_multiple_partitioned_functions(): + """ + Tests the pass on a number of partitioned functions. + """ + + def before(): + composite_func_name = "ethos-n_0" + inp = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + + # partitioned func 1 (non compute intensive) + x = relay.reshape(inp, newshape=(1, 2, 2, 4)) + partitioned_func_1 = tei.make_ethosn_partition(x)[composite_func_name] + gv_1 = relay.GlobalVar("ethos-n_0") + + # partitioned func 2 (compute intensive) + x = relay.nn.max_pool2d(inp, layout="NHWC") + partitioned_func_2 = tei.make_ethosn_partition(x)[composite_func_name] + gv_2 = relay.GlobalVar("ethos-n_1") + + # partitioned func 3 (non compute intensive) + x = relay.clip(inp, 0.0, 1.0) + partitioned_func_3 = tei.make_ethosn_partition(x)[composite_func_name] + gv_3 = relay.GlobalVar("ethos-n_2") + + mod = tvm.IRModule({}) + mod[gv_1] = partitioned_func_1 + mod[gv_2] = partitioned_func_2 + mod[gv_3] = partitioned_func_3 + main_expr = relay.Call(gv_1, [inp]) + main_expr = relay.Call(gv_2, [main_expr]) + main_expr = relay.Call(gv_3, [main_expr]) + mod["main"] = relay.Function([inp], main_expr) + return relay.transform.InferType()(mod) + + def expected(): + composite_func_name = "ethos-n_0" + inp = relay.var("x", shape=(1, 2, 2, 4), dtype="int8") + + # partitioned func 2 (compute intensive) + x = relay.nn.max_pool2d(inp, layout="NHWC") + partitioned_func_2 = tei.make_ethosn_partition(x)[composite_func_name] + gv_2 = relay.GlobalVar("ethos-n_1") + + mod = tvm.IRModule({}) + mod[gv_2] = partitioned_func_2 + main_expr = relay.reshape(inp, newshape=(1, 2, 2, 4)) + main_expr = relay.Call(gv_2, [main_expr]) + main_expr = relay.clip(main_expr, 0.0, 1.0) + mod["main"] = relay.Function([inp], main_expr) + return relay.transform.InferType()(mod) + + mod = before() + mod = InlineNonComputeIntensivePartitions()(mod) + expected_mod = expected() + for global_var in mod.get_global_vars(): + _assert_structural_equal(mod[global_var.name_hint], expected_mod[global_var.name_hint]) diff --git a/tests/python/contrib/test_ethosn/test_leaky_relu.py b/tests/python/contrib/test_ethosn/test_leaky_relu.py index 3c3bbc709679..7c1969ec44ba 100644 --- a/tests/python/contrib/test_ethosn/test_leaky_relu.py +++ b/tests/python/contrib/test_ethosn/test_leaky_relu.py @@ -65,7 +65,7 @@ def test_leaky_relu(dtype, shape, alpha): for npu in [False, True]: model = _get_model(shape, input_zp, input_sc, output_zp, output_sc, dtype, alpha) mod = tei.make_module(model, []) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_multiply.py b/tests/python/contrib/test_ethosn/test_multiply.py index 41c06092447a..a7b97e39cb13 100644 --- a/tests/python/contrib/test_ethosn/test_multiply.py +++ b/tests/python/contrib/test_ethosn/test_multiply.py @@ -151,7 +151,9 @@ def test_multiply_to_reinterpret_quantize(shape, constant_shape, reverse_inputs) outputs = [] for npu in [False, True]: mod = tei.make_module(model, params) - outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu)) + outputs.append( + tei.build_and_run(mod, inputs, 1, params, npu=npu, optimize_partitions=False) + ) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_relu.py b/tests/python/contrib/test_ethosn/test_relu.py index db1894931dd9..8ecea0d23ce4 100644 --- a/tests/python/contrib/test_ethosn/test_relu.py +++ b/tests/python/contrib/test_ethosn/test_relu.py @@ -60,7 +60,7 @@ def test_relu(dtype, shape, a_min, a_max): for npu in [False, True]: model = _get_model(inputs["a"].shape, dtype, a_min, a_max) mod = tei.make_module(model, {}) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_requantize.py b/tests/python/contrib/test_ethosn/test_requantize.py index 3187c22f3391..618b00c6e4ee 100644 --- a/tests/python/contrib/test_ethosn/test_requantize.py +++ b/tests/python/contrib/test_ethosn/test_requantize.py @@ -64,7 +64,7 @@ def test_requantize(in_dtype, out_dtype, shape): out_dtype=out_dtype, ) mod = tei.make_module(model, []) - x = tei.build_and_run(mod, inputs, 1, {}, npu=npu) + x = tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False) outputs.append(x) tei.verify(outputs, out_dtype, 1) @@ -128,7 +128,7 @@ def get_model(): for npu in [False, True]: model = get_model() mod = tei.make_module(model, {}) - x = tei.build_and_run(mod, inputs, 1, {}, npu=npu) + x = tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False) outputs.append(x) tei.verify(outputs, out_dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_reshape.py b/tests/python/contrib/test_ethosn/test_reshape.py index 2d6eae9b2522..d60ad50b97bc 100644 --- a/tests/python/contrib/test_ethosn/test_reshape.py +++ b/tests/python/contrib/test_ethosn/test_reshape.py @@ -71,7 +71,9 @@ def test_reshape(dtype, input_shape, output_shape): for npu in [False, True]: model, params = _get_model(input_shape, output_shape, dtype) mod = tei.make_module(model, params) - outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu)) + outputs.append( + tei.build_and_run(mod, inputs, 1, params, npu=npu, optimize_partitions=False) + ) tei.verify(outputs, dtype, 1) @@ -91,4 +93,4 @@ def test_reshape_failure(input_shape, output_shape): model, params = _get_model(input_shape, output_shape, "int8") mod = tei.make_module(model, params) - tei.build(mod, params, expected_host_ops=1, npu_partitions=0) + tei.build(mod, params, expected_host_ops=1, npu_partitions=0, optimize_partitions=False) diff --git a/tests/python/contrib/test_ethosn/test_split.py b/tests/python/contrib/test_ethosn/test_split.py index 57335feadbba..56e51e2de159 100644 --- a/tests/python/contrib/test_ethosn/test_split.py +++ b/tests/python/contrib/test_ethosn/test_split.py @@ -56,7 +56,9 @@ def test_split(dtype, shape, splits, axis): model = _get_model(shape, dtype, splits, axis) mod = tei.make_module(model, {}) output_count = splits if isinstance(splits, int) else len(splits) + 1 - outputs.append(tei.build_and_run(mod, inputs, output_count, {}, npu=npu)) + outputs.append( + tei.build_and_run(mod, inputs, output_count, {}, npu=npu, optimize_partitions=False) + ) tei.verify(outputs, dtype, 0) diff --git a/tests/python/contrib/test_ethosn/test_tanh.py b/tests/python/contrib/test_ethosn/test_tanh.py index 68170601c5f8..c2fc5188e5f1 100644 --- a/tests/python/contrib/test_ethosn/test_tanh.py +++ b/tests/python/contrib/test_ethosn/test_tanh.py @@ -59,7 +59,7 @@ def test_tanh(dtype, shape): for npu in [False, True]: model = _get_model(shape, zp_min + 120, 0.0250629, zp_min + 128, 0.0078125, dtype) mod = tei.make_module(model, []) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_topologies.py b/tests/python/contrib/test_ethosn/test_topologies.py index 6425eb0faba3..a16a453d0f85 100644 --- a/tests/python/contrib/test_ethosn/test_topologies.py +++ b/tests/python/contrib/test_ethosn/test_topologies.py @@ -183,7 +183,7 @@ def get_model(input_shape, dtype, var_names): for npu in [False, True]: model = get_model(inputs["a"].shape, dtype, iter(inputs)) mod = tei.make_module(model, []) - outputs.append(tei.build_and_run(mod, inputs, 8, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 8, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) @@ -291,6 +291,7 @@ def get_model(shape, dtype, splits, axis): npu=npu, expected_host_ops=expected_host_ops, npu_partitions=npu_partitions, + optimize_partitions=False, ) else: outputs.append( @@ -302,6 +303,7 @@ def get_model(shape, dtype, splits, axis): npu=npu, expected_host_ops=expected_host_ops, npu_partitions=npu_partitions, + optimize_partitions=False, ) ) @@ -332,7 +334,7 @@ def get_model(dtype): for npu in [False, True]: model = get_model(dtype) mod = tei.make_module(model, {}) - outputs.append(tei.build_and_run(mod, inputs, 4, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 4, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 0) @@ -381,7 +383,33 @@ def get_model(shapes, dtype, axis): mod = tei.make_module(model, {}) else: mod = tei.make_ethosn_partition(model) - lib = tei.build(mod, {}, npu=False) + lib = tei.build(mod, {}, npu=False, optimize_partitions=False) outputs.append(tei.run(lib, inputs, 1, npu=npu)) tei.verify(outputs, dtype, 0) + + +@requires_ethosn +def test_inline_non_compute_intensive_operations(): + """Tests the case when a subgraph is unpartitioned.""" + np.random.seed(0) + dtype = "int8" + shape = (1, 2, 2, 4) + + inp = relay.var("x", shape=shape, dtype=dtype) + reshape = relay.reshape(inp, newshape=(1, 1, 4, 4)) + + inputs = { + "x": tvm.nd.array( + np.random.randint(np.iinfo(dtype).min, np.iinfo(dtype).max + 1, size=shape, dtype=dtype) + ), + } + outputs = [] + + for npu in [False, True]: + mod = tei.make_module(reshape, {}) + outputs.append( + tei.build_and_run(mod, inputs, 1, {}, npu=npu, expected_host_ops=1, npu_partitions=0) + ) + + tei.verify(outputs, dtype, 0) From 3defac896abc41d4367251ee5ee23134f08709cc Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Thu, 20 Oct 2022 10:55:48 +0000 Subject: [PATCH 2/4] Address comments * 'inline_non_compute_intensive_partitions' -> 'is_inline_non_compute _intensive_partitions_enabled'. * remove no MAC operations. * fix network test. Change-Id: Ie1015b27f37e47544bed6f0aff819ee4649de579 --- python/tvm/relay/op/contrib/ethosn.py | 4 ++-- .../backend/contrib/ethosn/inline_partitions.cc | 12 ++++++------ tests/python/contrib/test_ethosn/test_networks.py | 5 ++--- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py index 5952d21e7a37..9afab68ccd8f 100644 --- a/python/tvm/relay/op/contrib/ethosn.py +++ b/python/tvm/relay/op/contrib/ethosn.py @@ -88,7 +88,7 @@ def InlineNonComputeIntensivePartitions() -> tvm.ir.IRModule: # pylint: disable return _ethosn.InlineNonComputeIntensivePartitions() -def inline_non_compute_intensive_partitions() -> bool: +def is_inline_non_compute_intensive_partitions_enabled() -> bool: """ Determine whether to inline none-compute-intensive partitions. @@ -148,7 +148,7 @@ def partition_for_ethosn(mod, params=None, **opts): transform.PartitionGraph(), ConvertEquivalents(), ] - if inline_non_compute_intensive_partitions(): + if is_inline_non_compute_intensive_partitions_enabled(): passes.append(InlineNonComputeIntensivePartitions()) return tvm.transform.Sequential(passes)(mod) diff --git a/src/relay/backend/contrib/ethosn/inline_partitions.cc b/src/relay/backend/contrib/ethosn/inline_partitions.cc index 8dc7ebb1116f..739515503c0f 100644 --- a/src/relay/backend/contrib/ethosn/inline_partitions.cc +++ b/src/relay/backend/contrib/ethosn/inline_partitions.cc @@ -76,10 +76,11 @@ class IsComputeIntensivePartition : MixedModeVisitor { bool is_compute_intensive; /*! \brief A set of operators considered compute intensive. */ const std::unordered_set compute_intensive_operators{ - "ethos-n.qnn_add", "ethos-n.qnn_conv2d", "ethos-n.qnn_conv2d_transpose", - "ethos-n.qnn_avg_pool2d", "ethos-n.qnn_sigmoid", "ethos-n.qnn_fc", - "ethos-n.qnn_mean", "ethos-n.qnn_resize", "nn.max_pool2d", - "nn.depth_to_space"}; + "ethos-n.qnn_conv2d", "ethos-n.qnn_conv2d_transpose", + "ethos-n.qnn_avg_pool2d", "ethos-n.qnn_sigmoid", + "ethos-n.qnn_fc", "ethos-n.qnn_mean", + "ethos-n.qnn_resize", "nn.max_pool2d", + }; }; /*! @@ -88,8 +89,7 @@ class IsComputeIntensivePartition : MixedModeVisitor { * other backends to consider. * * A partitioned function is currently considered non-compute intensive if it contains - * no multiply accumulate operations. Note that this is not an optimal heuristic, - * however, it will not degrade performance. + * no multiply accumulate operations. Note that this is not an optimal heuristic. */ tvm::transform::Pass InlineNonComputeIntensivePartitions() { runtime::TypedPackedFunc pass_func = diff --git a/tests/python/contrib/test_ethosn/test_networks.py b/tests/python/contrib/test_ethosn/test_networks.py index 5bd133ba20bb..68402cd5e8a9 100644 --- a/tests/python/contrib/test_ethosn/test_networks.py +++ b/tests/python/contrib/test_ethosn/test_networks.py @@ -146,7 +146,6 @@ def test_resnet_50_int8(): # on hardware that isn't available in CI. _compile_hash = { "f16dc9caa8e696bc5da8a5c6a644eb72", - "6e5fcbab831607b9da1039aff4e56871", "41acecca37b2735bd580f6ec38d8c2e0", } _test_image_network( @@ -156,8 +155,8 @@ def test_resnet_50_int8(): input_dict={"input": (1, 224, 224, 3)}, compile_hash=_compile_hash, output_count=1, - host_ops=9, - npu_partitions=3, + host_ops=10, + npu_partitions=2, ) From ea1c7404f5c0c3863e04cc80c5dd98cf899812ce Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Thu, 20 Oct 2022 13:31:06 +0000 Subject: [PATCH 3/4] Fix failing unit tests due to optimization Change-Id: I0ee0af071dc77c91e0ef0f6753506cb40d1d1859 --- .../contrib/test_ethosn/test_addition.py | 2 +- .../test_ethosn/test_depth_to_space.py | 2 +- .../contrib/test_ethosn/test_topologies.py | 27 ++++++++----------- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/tests/python/contrib/test_ethosn/test_addition.py b/tests/python/contrib/test_ethosn/test_addition.py index 76bda5e0b7d4..53afd01b8449 100644 --- a/tests/python/contrib/test_ethosn/test_addition.py +++ b/tests/python/contrib/test_ethosn/test_addition.py @@ -111,7 +111,7 @@ def test_addition(dtype, shape): model = _get_model(shape, shape, lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc, dtype) for npu in [False, True]: mod = tei.make_module(model, []) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_depth_to_space.py b/tests/python/contrib/test_ethosn/test_depth_to_space.py index 732932d8f324..814693b664ca 100644 --- a/tests/python/contrib/test_ethosn/test_depth_to_space.py +++ b/tests/python/contrib/test_ethosn/test_depth_to_space.py @@ -53,7 +53,7 @@ def test_depth_to_space(dtype, shape): for npu in [False, True]: model = _get_model(shape, 2, dtype, "NHWC") mod = tei.make_module(model, {}) - outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu)) + outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu, optimize_partitions=False)) tei.verify(outputs, dtype, 1) diff --git a/tests/python/contrib/test_ethosn/test_topologies.py b/tests/python/contrib/test_ethosn/test_topologies.py index a16a453d0f85..4a4fc1e4d126 100644 --- a/tests/python/contrib/test_ethosn/test_topologies.py +++ b/tests/python/contrib/test_ethosn/test_topologies.py @@ -81,23 +81,18 @@ def get_model(input_shape, dtype, var_names): expected_host_ops = 0 npu_partitions = 1 - # Mock inference is only supported when the whole graph is offloaded to the NPU - if ethosn_available() == Available.SW_ONLY: - tei.build( - mod, {}, npu=npu, expected_host_ops=expected_host_ops, npu_partitions=npu_partitions - ) - else: - outputs.append( - tei.build_and_run( - mod, - inputs, - 1, - {}, - npu=npu, - expected_host_ops=expected_host_ops, - npu_partitions=npu_partitions, - ) + outputs.append( + tei.build_and_run( + mod, + inputs, + 1, + {}, + npu=npu, + expected_host_ops=expected_host_ops, + npu_partitions=npu_partitions, + optimize_partitions=False, ) + ) if outputs: tei.verify(outputs, dtype, 2) From bd0bf6c10ece5a3950b2d8772291db93b428f77d Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Fri, 21 Oct 2022 08:17:36 +0000 Subject: [PATCH 4/4] Add future exploration suggestions Change-Id: Ie918d7f1059f032282f1f5eeffda38f4febcd59c --- src/relay/backend/contrib/ethosn/inline_partitions.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/relay/backend/contrib/ethosn/inline_partitions.cc b/src/relay/backend/contrib/ethosn/inline_partitions.cc index 739515503c0f..f8cc3fc00d10 100644 --- a/src/relay/backend/contrib/ethosn/inline_partitions.cc +++ b/src/relay/backend/contrib/ethosn/inline_partitions.cc @@ -90,6 +90,12 @@ class IsComputeIntensivePartition : MixedModeVisitor { * * A partitioned function is currently considered non-compute intensive if it contains * no multiply accumulate operations. Note that this is not an optimal heuristic. + * + * Some suggestions for future exploration: + * - Making a better choice about large non-compute-intensive subgraphs + * as currently these are inlined. + * - Allowing the user to input ops that are considered compute-intensive. + * - Inline "small" compute intensive operations. */ tvm::transform::Pass InlineNonComputeIntensivePartitions() { runtime::TypedPackedFunc pass_func =