From 0a523854cd55734ad6a942e9c78833794bd59f86 Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Fri, 22 Oct 2021 14:20:50 +0100 Subject: [PATCH 1/2] [AOT][DeviceAPI] Connecting devices structure to relevant operators This patch adds support for passing the device context via the unpacked API in AOT, generating an additional struct if necessary: ```c /*! * \brief Device context pointers for TVM module "default" */ struct tvmgen_default_devices { void* npu; }; ``` Which is then added as an argument to the entry function: ```c /*! * \brief entrypoint function for TVM module "default" * \param inputs Input tensors for the module * \param outputs Output tensors for the module * \param devices Device context pointers for the module */ int32_t tvmgen_default_run( struct tvmgen_default_inputs* inputs, struct tvmgen_default_outputs* outputs, struct tvmgen_default_devices* devices ); ``` I've temporarily added the collection of external code generators to the TE compiler pending proper annotation of the eventual functions. Co-authored-by: Grant Watson --- apps/microtvm/ethosu/Makefile | 4 +- .../ethosu/include/tvm_ethosu_runtime.h | 30 +++++ apps/microtvm/ethosu/src/demo.c | 7 +- apps/microtvm/ethosu/src/tvm_ethosu_runtime.c | 34 ++++++ python/tvm/driver/tvmc/composite_target.py | 2 +- python/tvm/micro/model_library_format.py | 7 +- .../relay/backend/contrib/ethosu/_ffi_api.py | 2 +- .../relay/backend/contrib/ethosu/codegen.py | 6 +- .../relay/backend/contrib/ethosu/legalize.py | 2 +- .../tvm/relay/backend/contrib/ethosu/util.py | 2 +- .../relay/backend/contrib/ethosu/vela_api.py | 2 +- python/tvm/relay/backend/executor_factory.py | 11 +- python/tvm/relay/build_module.py | 16 ++- python/tvm/relay/op/contrib/ethosu.py | 30 ++--- src/relay/backend/aot_executor_codegen.cc | 73 ++++++++++-- src/relay/backend/build_module.cc | 6 + .../backend/contrib/ethosu/compiler_attrs.cc | 6 +- .../backend/contrib/ethosu/preprocess.cc | 4 +- .../backend/contrib/ethosu/source_module.cc | 31 ++--- src/relay/backend/graph_executor_codegen.cc | 3 + src/relay/backend/te_compiler.cc | 15 ++- src/relay/backend/te_compiler.h | 7 ++ src/relay/transforms/partition_graph.cc | 1 + .../ethosu/bare_metal/tvm_ethosu_runtime.c | 34 ++++++ .../ethosu/bare_metal/tvm_ethosu_runtime.h | 30 +++++ src/runtime/meta_data.h | 6 +- src/target/source/interface_c.cc | 39 ++++-- src/target/source/source_module.cc | 28 ++++- src/target/target_kind.cc | 2 + tests/cpp/target/source/interface_c_test.cc | 112 ++++++++++++++++-- tests/micro/zephyr/test_zephyr_aot.py | 4 +- tests/micro/zephyr/test_zephyr_armv7m.py | 2 +- tests/python/contrib/test_ethosu/infra.py | 7 +- .../contrib/test_ethosu/test_attr_passing.py | 6 +- .../contrib/test_ethosu/test_codegen.py | 18 +-- .../contrib/test_ethosu/test_legalize.py | 44 +++---- .../contrib/test_ethosu/test_preprocess.py | 14 +-- tests/python/relay/aot/aot_test_utils.py | 61 +++++++--- tests/python/relay/aot/corstone300.mk | 8 +- 39 files changed, 564 insertions(+), 152 deletions(-) create mode 100644 apps/microtvm/ethosu/include/tvm_ethosu_runtime.h create mode 100644 apps/microtvm/ethosu/src/tvm_ethosu_runtime.c create mode 100644 src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c create mode 100644 src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h diff --git a/apps/microtvm/ethosu/Makefile b/apps/microtvm/ethosu/Makefile index 370799972de6..d624571840f5 100644 --- a/apps/microtvm/ethosu/Makefile +++ b/apps/microtvm/ethosu/Makefile @@ -35,7 +35,7 @@ RANLIB = arm-none-eabi-ranlib PKG_CFLAGS = ${PKG_COMPILE_OPTS} \ -I${STANDALONE_CRT_PATH}/include \ -I${STANDALONE_CRT_PATH}/src/runtime/crt/include \ - -Iinclude \ + -I${PWD}/include \ -I${CORSTONE_300_PATH} \ -I${ETHOSU_PATH}/core_driver/include \ -I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \ @@ -95,7 +95,7 @@ ${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a: $(QUIET)cd $(abspath $(BUILD_DIR)/ethosu_core_driver) && $(MAKE) # Build demo application -$(BUILD_DIR)/demo: src/demo.c $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a ${BUILD_DIR}/libuart.a +$(BUILD_DIR)/demo: src/demo.c src/tvm_ethosu_runtime.c $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/ethosu_core_driver/libethosu_core_driver.a ${BUILD_DIR}/libuart.a $(QUIET)mkdir -p $(@D) $(QUIET)$(CC) $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS) diff --git a/apps/microtvm/ethosu/include/tvm_ethosu_runtime.h b/apps/microtvm/ethosu/include/tvm_ethosu_runtime.h new file mode 100644 index 000000000000..06188baaa9ee --- /dev/null +++ b/apps/microtvm/ethosu/include/tvm_ethosu_runtime.h @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_CONTRIB_ETHOSU_ETHOSU_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_ETHOSU_ETHOSU_RUNTIME_H_ + +#include +#include +#include + +int32_t TVMEthosULaunch(struct ethosu_driver* resource_handle, void* cms_data, size_t cms_data_size, + uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors); + +#endif // TVM_RUNTIME_CONTRIB_ETHOSU_ETHOSU_RUNTIME_H_ diff --git a/apps/microtvm/ethosu/src/demo.c b/apps/microtvm/ethosu/src/demo.c index 5ad4353b0a12..1ee13db4e755 100644 --- a/apps/microtvm/ethosu/src/demo.c +++ b/apps/microtvm/ethosu/src/demo.c @@ -45,7 +45,12 @@ int main(int argc, char** argv) { struct tvmgen_default_inputs inputs = { .input = input, }; - tvmgen_default_run(&inputs, &outputs); + struct ethosu_driver* driver = ethosu_reserve_driver(); + struct tvmgen_default_devices devices = { + .ethos_u = driver, + }; + tvmgen_default_run(&inputs, &outputs, &devices); + ethosu_release_driver(driver); // Calculate index of max value uint8_t max_value = 0; diff --git a/apps/microtvm/ethosu/src/tvm_ethosu_runtime.c b/apps/microtvm/ethosu/src/tvm_ethosu_runtime.c new file mode 100644 index 000000000000..6b7399b67406 --- /dev/null +++ b/apps/microtvm/ethosu/src/tvm_ethosu_runtime.c @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "tvm_ethosu_runtime.h" + +#include + +int32_t TVMEthosULaunch(struct ethosu_driver* driver, void* cms_data, size_t cms_data_size, + uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors) { + int32_t result = + ethosu_invoke(driver, cms_data, cms_data_size, base_addrs, base_addrs_size, num_tensors); + + // Map errors in invoke to TVM errors + if (result != 0) { + return -1; + } + return 0; +} diff --git a/python/tvm/driver/tvmc/composite_target.py b/python/tvm/driver/tvmc/composite_target.py index 0c04d2b7248f..848af1e4ee4e 100644 --- a/python/tvm/driver/tvmc/composite_target.py +++ b/python/tvm/driver/tvmc/composite_target.py @@ -65,7 +65,7 @@ "pass_pipeline": partition_for_ethosn78, }, "ethos-u": { - "config_key": "relay.ext.ethosu.options", + "config_key": "relay.ext.ethos-u.options", "pass_pipeline": partition_for_ethosu, }, "bnns": { diff --git a/python/tvm/micro/model_library_format.py b/python/tvm/micro/model_library_format.py index f031acec0d76..038cd0d04ff0 100644 --- a/python/tvm/micro/model_library_format.py +++ b/python/tvm/micro/model_library_format.py @@ -44,13 +44,13 @@ class UnsupportedInModelLibraryFormatError(Exception): """Raised when export_model_library_format does not support the given Module tree.""" -def generate_c_interface_header(module_name, inputs, outputs, include_path): +def generate_c_interface_header(module_name, inputs, outputs, devices, include_path): """Generate C Interface header to be included in MLF""" mangled_name = to_c_variable_style(prefix_generated_name(module_name)) metadata_header = os.path.join(include_path, f"{mangled_name}.h") interface_c_create = tvm._ffi.get_global_func("runtime.InterfaceCCreate") - interface_c_module = interface_c_create(module_name, inputs, outputs) + interface_c_module = interface_c_create(module_name, inputs, outputs, devices) with open(metadata_header, "w") as header_file: header_file.write(interface_c_module.get_source()) @@ -318,7 +318,8 @@ def _export_graph_model_library_format( include_path = codegen_dir / "host" / "include" include_path.mkdir() inputs, outputs = _get_inputs_and_outputs_from_module(mod) - generate_c_interface_header(mod.libmod_name, inputs, outputs, include_path) + devices = mod.get_devices() + generate_c_interface_header(mod.libmod_name, inputs, outputs, devices, include_path) parameters_dir = tempdir / "parameters" parameters_dir.mkdir() diff --git a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py index ccf1039a6994..22eb9820a2bb 100644 --- a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py +++ b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py @@ -17,4 +17,4 @@ """FFI APIs for relay transformation passes.""" import tvm._ffi # type: ignore -tvm._ffi._init_api("relay.ext.ethosu", __name__) +tvm._ffi._init_api("relay.ext.ethos-u", __name__) diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py b/python/tvm/relay/backend/contrib/ethosu/codegen.py index 827146f77c80..5fe51b4cbda0 100644 --- a/python/tvm/relay/backend/contrib/ethosu/codegen.py +++ b/python/tvm/relay/backend/contrib/ethosu/codegen.py @@ -24,7 +24,7 @@ from tvm.relay.backend.contrib.ethosu import util -@tvm._ffi.register_func("relay.ext.ethosu") +@tvm._ffi.register_func("relay.ext.ethos-u") def ethosu_compiler(external_function): """The entry-point to a compile a external relay function of NPU compatible operators to generated command stream. @@ -38,11 +38,11 @@ def ethosu_compiler(external_function): input_size = util.calculate_size_bytes(external_function.params[0]) output_size = util.calculate_size_bytes(external_function.body) cmms, encoded_constants, scratch_size = _compile(external_function) - ethosu_runtime = tvm._ffi.get_global_func("runtime.module.ethosu.create") + ethosu_runtime = tvm._ffi.get_global_func("runtime.module.ethos-u.create") return ethosu_runtime(func_name, cmms, encoded_constants, scratch_size, input_size, output_size) -@tvm._ffi.register_func("relay.ext.ethosu.constant_updater") +@tvm._ffi.register_func("relay.ext.ethos-u.constant_updater") def constant_updater(expr, symbol): # pylint: disable=unused-argument """ The constant updater process happen after lowering in the core compiler. diff --git a/python/tvm/relay/backend/contrib/ethosu/legalize.py b/python/tvm/relay/backend/contrib/ethosu/legalize.py index d0d04cebaefe..7a6335151003 100644 --- a/python/tvm/relay/backend/contrib/ethosu/legalize.py +++ b/python/tvm/relay/backend/contrib/ethosu/legalize.py @@ -128,7 +128,7 @@ class Conv2DRewriter(DFPatternCallback): def __init__(self): super().__init__(require_type=True) - self.pattern = (wildcard().has_attr({"Composite": "ethosu.qnn_conv2d"}))(wildcard()) + self.pattern = (wildcard().has_attr({"Composite": "ethos-u.qnn_conv2d"}))(wildcard()) def callback( self, pre: tvm.relay.Expr, post: tvm.relay.Expr, node_map: tvm.ir.container.Map diff --git a/python/tvm/relay/backend/contrib/ethosu/util.py b/python/tvm/relay/backend/contrib/ethosu/util.py index 8afb6eb9b9ee..370821aefa7e 100644 --- a/python/tvm/relay/backend/contrib/ethosu/util.py +++ b/python/tvm/relay/backend/contrib/ethosu/util.py @@ -154,7 +154,7 @@ def round_up(a: int, b: int) -> int: def get_accelerator_config(): """Get the variant of the accelerator to compile for""" - compiler_attrs = tvm.get_global_func("relay.ext.ethosu.get_compiler_attrs")() + compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")() return compiler_attrs.accelerator_config diff --git a/python/tvm/relay/backend/contrib/ethosu/vela_api.py b/python/tvm/relay/backend/contrib/ethosu/vela_api.py index 69095e43416e..345d459c1a57 100644 --- a/python/tvm/relay/backend/contrib/ethosu/vela_api.py +++ b/python/tvm/relay/backend/contrib/ethosu/vela_api.py @@ -381,7 +381,7 @@ def get_accelerator_config() -> vapi.NpuAccelerator: "ethos-u55-64": vapi.NpuAccelerator.Ethos_U55_64, "ethos-u55-32": vapi.NpuAccelerator.Ethos_U55_32, } - compiler_attrs = tvm.get_global_func("relay.ext.ethosu.get_compiler_attrs")() + compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")() accel_config_str = compiler_attrs.accelerator_config assert accel_config_str in npu_accel_str_map.keys(), f"{accel_config_str} is not supported" return npu_accel_str_map[accel_config_str] diff --git a/python/tvm/relay/backend/executor_factory.py b/python/tvm/relay/backend/executor_factory.py index 7b147b440f40..db33c1b7844a 100644 --- a/python/tvm/relay/backend/executor_factory.py +++ b/python/tvm/relay/backend/executor_factory.py @@ -85,9 +85,11 @@ class AOTExecutorFactoryModule(ExecutorFactoryModule): The parameters of module function_metadata : Map of String to FunctionInfo This holds a map function names to their information + devices : List[str] + List of devices used in the module """ - def __init__(self, ir_mod, target, libmod, libmod_name, params, function_metadata): + def __init__(self, ir_mod, target, libmod, libmod_name, params, function_metadata, devices): self.ir_mod = ir_mod self.target = target self.lib = libmod @@ -95,6 +97,10 @@ def __init__(self, ir_mod, target, libmod, libmod_name, params, function_metadat self.params = params self.iter_cnt = 0 self.function_metadata = function_metadata + self.devices = devices + + def get_devices(self): + return self.devices def get_params(self): return self.params @@ -152,6 +158,9 @@ def __init__( def export_library(self, file_name, fcompile=None, addons=None, **kwargs): return self.module.export_library(file_name, fcompile, addons, **kwargs) + def get_devices(self): + return [] + def get_params(self): return self.params diff --git a/python/tvm/relay/build_module.py b/python/tvm/relay/build_module.py index f1686d2a03bb..9de2c0e91550 100644 --- a/python/tvm/relay/build_module.py +++ b/python/tvm/relay/build_module.py @@ -101,6 +101,7 @@ def __init__(self): self._set_params_func = self.mod["set_params"] self._get_params_func = self.mod["get_params"] self._get_function_metadata = self.mod["get_function_metadata"] + self._get_devices = self.mod["get_devices"] def build( self, mod, target=None, target_host=None, params=None, executor="graph", mod_name=None @@ -231,6 +232,10 @@ def get_function_metadata(self): each PrimFunc""" return self._get_function_metadata() + def get_devices(self): + """Returns a list of devices configured in this module""" + return self._get_devices() + def get_params(self): """Return the updated weights.""" params = self._get_params_func() @@ -358,14 +363,21 @@ def build(ir_mod, target=None, target_host=None, params=None, mod_name="default" mod=ir_mod, target=target, params=params, executor=executor, mod_name=mod_name ) func_metadata = bld_mod.get_function_metadata() + devices = bld_mod.get_devices() if executor == "aot": executor_factory = _executor_factory.AOTExecutorFactoryModule( - ir_mod, target, runtime_mod, mod_name, params, func_metadata + ir_mod, target, runtime_mod, mod_name, params, func_metadata, devices ) elif executor == "graph": executor_factory = _executor_factory.GraphExecutorFactoryModule( - ir_mod, target, executor_config, runtime_mod, mod_name, params, func_metadata + ir_mod, + target, + executor_config, + runtime_mod, + mod_name, + params, + func_metadata, ) else: assert False, "Executor " + executor + " not supported" diff --git a/python/tvm/relay/op/contrib/ethosu.py b/python/tvm/relay/op/contrib/ethosu.py index 25538cae9dbc..a255f93189c9 100644 --- a/python/tvm/relay/op/contrib/ethosu.py +++ b/python/tvm/relay/op/contrib/ethosu.py @@ -186,7 +186,7 @@ class QnnConv2DParams: and extract quantization information of all the associated tensors. """ - composite_name = "ethosu.qnn_conv2d" + composite_name = "ethos-u.qnn_conv2d" # The NPU only supports padding upto the numbers as follows padding_bounds = [31, 31, 32, 32] activation_map = {"clip": "CLIP"} @@ -275,7 +275,7 @@ class QnnDepthwiseConv2DParams(QnnConv2DParams): and extract the parameter information. """ - composite_name = "ethosu.depthwise_conv2d" + composite_name = "ethos-u.depthwise_conv2d" # The hardware only supports padding upto the numbers as follows padding_bounds = [31, 31, 32, 32] @@ -343,11 +343,11 @@ def qnn_depthwise_conv2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern: class MaxPool2DParams: """ - This class will parse a call to a ethosu.maxpool2d composite function + This class will parse a call to a ethos-u.maxpool2d composite function and extract the parameter information. """ - composite_name = "ethosu.maxpool2d" + composite_name = "ethos-u.maxpool2d" # The hardware only supports padding upto the numbers as follows padding_bounds = [127, 127, 128, 128] @@ -399,11 +399,11 @@ def qnn_maxpool2d_pattern() -> tvm.relay.dataflow_pattern.DFPattern: class AvgPool2DParams: """ - This class will parse a call to a ethosu.avgpool2d composite function + This class will parse a call to a ethos-u.avgpool2d composite function and extract the parameter information. """ - composite_name = "ethosu.avgpool2d" + composite_name = "ethos-u.avgpool2d" # The hardware only supports padding upto the numbers as follows padding_bounds = [127, 127, 128, 128] @@ -547,7 +547,7 @@ class AddParams(BinaryElementwiseParams): and extract the parameter information. """ - composite_name = "ethosu.add" + composite_name = "ethos-u.add" def __init__(self, func_body: Call): BinaryElementwiseParams.__init__(self, func_body, "ADD", True) @@ -589,7 +589,7 @@ class SubParams(BinaryElementwiseParams): and extract the parameter information. """ - composite_name = "ethosu.sub" + composite_name = "ethos-u.sub" def __init__(self, func_body: Call): BinaryElementwiseParams.__init__(self, func_body, "SUB", True) @@ -631,7 +631,7 @@ class MulParams(BinaryElementwiseParams): and extract the parameter information. """ - composite_name = "ethosu.mul" + composite_name = "ethos-u.mul" def __init__(self, func_body: Call): BinaryElementwiseParams.__init__(self, func_body, "MUL", True) @@ -673,7 +673,7 @@ class MinParams(BinaryElementwiseParams): and extract the parameter information. """ - composite_name = "ethosu.min" + composite_name = "ethos-u.min" def __init__(self, func_body: Call): BinaryElementwiseParams.__init__(self, func_body, "MIN", False) @@ -708,7 +708,7 @@ class MaxParams(BinaryElementwiseParams): and extract the parameter information. """ - composite_name = "ethosu.max" + composite_name = "ethos-u.max" def __init__(self, func_body: Call): BinaryElementwiseParams.__init__(self, func_body, "MAX", False) @@ -743,7 +743,7 @@ class ShlParams(BinaryElementwiseParams): and extract the parameter information. """ - composite_name = "ethosu.shl" + composite_name = "ethos-u.shl" def __init__(self, func_body: Call): BinaryElementwiseParams.__init__(self, func_body, "SHL", False) @@ -768,7 +768,7 @@ def shl_pattern() -> tvm.relay.dataflow_pattern.DFPattern: return pattern -@register_pattern_table("ethosu") +@register_pattern_table("ethos-u") def pattern_table() -> List[Tuple[str, tvm.relay.dataflow_pattern.DFPattern, Callable]]: return [ ( @@ -848,10 +848,10 @@ def partition_for_ethosu( if params: mod["main"] = bind_params_by_name(mod["main"], params) - pattern = relay.op.contrib.get_pattern_table("ethosu") + pattern = relay.op.contrib.get_pattern_table("ethos-u") mod = relay.transform.InferType()(mod) mod = relay.transform.MergeComposite(pattern)(mod) - mod = relay.transform.AnnotateTarget("ethosu")(mod) + mod = relay.transform.AnnotateTarget("ethos-u")(mod) mod = relay.transform.MergeCompilerRegions()(mod) mod = relay.transform.InferType()(mod) mod = relay.transform.PartitionGraph()(mod) diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index 58bcccf90879..1c26b3e0af76 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -18,8 +18,8 @@ */ /*! - * \file relay/backend/graph_codegen.cc - * \brief Graph runtime codegen + * \file src/relay/backend/aot_executor_codegen.cc + * \brief AOT executor codegen */ #include @@ -43,6 +43,7 @@ #include "../op/annotation/annotation.h" #include "../op/call/call.h" #include "../transforms/device_aware_visitors.h" +#include "./name_transforms.h" #include "./te_compiler.h" #include "./utils.h" @@ -315,7 +316,6 @@ class AOTExecutorCodegen : public MixedModeVisitor { */ void CreateFuncCall(CallLoweredProps call_lowered_props, Call call) { std::string func_name = call_lowered_props.lowered_func->name_hint; - tvm::Array args{tvm::tir::StringImm(func_name)}; std::vector create_func_call_stmts; @@ -345,15 +345,21 @@ class AOTExecutorCodegen : public MixedModeVisitor { calling_pattern = tvm::tir::builtin::call_extern(); } - create_func_call_stmts.push_back( - tir::Evaluate(tvm::tir::Call(DataType::Int(32), calling_pattern, args))); + GlobalVar global_var = call_lowered_props.lowered_func; + bool has_c_device_api_context = device_contexts_.count(global_var) != 0; + if (has_c_device_api_context) { + args.push_back(device_contexts_[global_var]); + } + + tir::Evaluate func_call(tvm::tir::Call(DataType::Int(32), calling_pattern, args)); + create_func_call_stmts.push_back(func_call); tir::Stmt body = tir::SeqStmt(create_func_call_stmts); stmts_.push_back(body); } /*! - * brief Copy a variable to the output. This function is mainly used in edge cases + * \brief Copy a variable to the output. This function is mainly used in edge cases * when we want to return an input or a parameter. * TODO(giuseros): we should try to avoid unnecessary copy to the output, e.g., in a * copy-on-write fashion. @@ -386,6 +392,39 @@ class AOTExecutorCodegen : public MixedModeVisitor { stmts_.push_back(tir::LetStmt(tmp1, tostore, copy)); } + /* + * \brief Collects device context variables for passing to operators + */ + void CollectDeviceVariables(const Map& device_contexts) { + Map target_contexts; + TargetKindAttrMap target_attr_map = tvm::TargetKind::GetAttrMap("c_device_api"); + + for (const auto& it : device_contexts) { + const GlobalVar& global_var = it.first; + const std::string device_context_name = it.second; + + Optional target_kind = tvm::TargetKind::Get(device_context_name); + if (!target_kind || !target_attr_map.count(target_kind.value())) { + return; + } + if (target_attr_map[target_kind.value()]) { + std::string context_name = SanitizeName(device_context_name); + tir::Var device_context_var("device_context_" + context_name, DataType::Handle()); + + auto pair = target_contexts.find(target_kind.value()); + if (pair != target_contexts.end()) { + device_context_var = (*pair).second; + } else { + main_signature_.push_back(device_context_var); + devices_.push_back(context_name); + target_contexts.Set(target_kind.value(), device_context_var); + } + + device_contexts_.Set(global_var, device_context_var); + } + } + } + /*! * Utility function to string together different arguments */ @@ -557,6 +596,10 @@ class AOTExecutorCodegen : public MixedModeVisitor { runtime::Module* mod_; /*! \brief list of input expressions (i.e., variable passed by the user) */ std::vector input_vars_; + /*! \brief list of device contexts used */ + std::vector devices_; + /*! \brief map of GlobalVars to C Device API contexts */ + Map device_contexts_; /*! \brief input and output variables belonging to the main function signature */ Array main_signature_; /*! \brief target device */ @@ -670,6 +713,7 @@ class AOTExecutorCodegen : public MixedModeVisitor { main_signature_.push_back(tir::Var("output", DataType::Handle())); } + CollectDeviceVariables(lowered_mod->GetAttr>("device_contexts").value()); VisitExpr(lowered_main_func->body); // Create the runner function. Please note that the function is not legal yet @@ -733,11 +777,18 @@ class AOTExecutorCodegen : public MixedModeVisitor { std::vector input_var_names(input_vars_.size()); std::transform(input_vars_.begin(), input_vars_.end(), input_var_names.begin(), [](Var input_var) -> String { return input_var->name_hint(); }); - ret.metadata = - runtime::Metadata(input_var_names, return_sid_.size(), runtime::kTvmExecutorAot, mod_name); + + ret.metadata = runtime::Metadata(input_var_names, devices_, return_sid_.size(), + runtime::kTvmExecutorAot, mod_name); return ret; } -}; + + /*! + * \brief Get list of devices found + * \return List of devices + */ + Array ListDevices() { return devices_; } +}; // namespace backend class AOTExecutorCodegenModule : public runtime::ModuleNode { public: @@ -780,6 +831,10 @@ class AOTExecutorCodegenModule : public runtime::ModuleNode { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->output_.function_metadata; }); + } else if (name == "get_devices") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + *rv = this->codegen_->ListDevices(); + }); } else if (name == "get_metadata") { return PackedFunc( [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = output_.metadata; }); diff --git a/src/relay/backend/build_module.cc b/src/relay/backend/build_module.cc index 4dd12ad1d106..ead1bf757a05 100644 --- a/src/relay/backend/build_module.cc +++ b/src/relay/backend/build_module.cc @@ -95,6 +95,8 @@ struct ExecutorCodegen { return CallFunc>("get_irmodule", nullptr); } + Array ListDevices() { return CallFunc>("get_devices"); } + runtime::Metadata GetMetadata() { return CallFunc("get_metadata"); } virtual ~ExecutorCodegen() {} @@ -192,6 +194,10 @@ class RelayBuildModule : public runtime::ModuleNode { this->SetParam(kv.first, kv.second->data); } }); + } else if (name == "get_devices") { + return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { + *rv = this->executor_codegen_->ListDevices(); + }); } else if (name == "get_irmodule") { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->executor_codegen_->GetIRModule(); diff --git a/src/relay/backend/contrib/ethosu/compiler_attrs.cc b/src/relay/backend/contrib/ethosu/compiler_attrs.cc index 6a87d11d5d6a..5795db29b490 100644 --- a/src/relay/backend/contrib/ethosu/compiler_attrs.cc +++ b/src/relay/backend/contrib/ethosu/compiler_attrs.cc @@ -55,17 +55,17 @@ class EthosUCompilerConfig : public Attrs { }; TVM_REGISTER_NODE_TYPE(EthosUCompilerConfigNode); -TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethosu.options", EthosUCompilerConfig); +TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethos-u.options", EthosUCompilerConfig); auto GetCompilerAttrs() { auto ctx = transform::PassContext::Current(); - auto cfg = ctx->GetConfig("relay.ext.ethosu.options"); + auto cfg = ctx->GetConfig("relay.ext.ethos-u.options"); if (!cfg.defined()) { cfg = AttrsWithDefaultValues(); } return cfg; } -TVM_REGISTER_GLOBAL("relay.ext.ethosu.get_compiler_attrs").set_body_typed(GetCompilerAttrs); +TVM_REGISTER_GLOBAL("relay.ext.ethos-u.get_compiler_attrs").set_body_typed(GetCompilerAttrs); } // namespace ethosu } // namespace contrib diff --git a/src/relay/backend/contrib/ethosu/preprocess.cc b/src/relay/backend/contrib/ethosu/preprocess.cc index ac52844091b4..571a56ad97c0 100644 --- a/src/relay/backend/contrib/ethosu/preprocess.cc +++ b/src/relay/backend/contrib/ethosu/preprocess.cc @@ -198,7 +198,7 @@ class ExternalFuncIOHandler : public ExprRewriter { return post; } if (auto compiler = func->GetAttr(attr::kCompiler)) { - if (compiler == "ethosu") { + if (compiler == "ethos-u") { auto ext_input = std::move(post_call->args[0]); auto arg_dtype = Downcast(post_call->args[0]->checked_type())->dtype; if (post_call->args.size() > 1) { @@ -261,7 +261,7 @@ Pass PreprocessExternalFuncIO() { return Sequential({preprocess_pass, InferType()}); } -TVM_REGISTER_GLOBAL("relay.ext.ethosu.PreprocessExternalFuncIO") +TVM_REGISTER_GLOBAL("relay.ext.ethos-u.PreprocessExternalFuncIO") .set_body_typed(transform::PreprocessExternalFuncIO); } // namespace transform diff --git a/src/relay/backend/contrib/ethosu/source_module.cc b/src/relay/backend/contrib/ethosu/source_module.cc index e3f48bc27617..18a6951bdd9a 100644 --- a/src/relay/backend/contrib/ethosu/source_module.cc +++ b/src/relay/backend/contrib/ethosu/source_module.cc @@ -183,7 +183,7 @@ class EthosUModuleNode : public ModuleNode { */ void PrintRuntimeFunctionHeader(std::stringstream& ss, std::string func_name) { ss << "TVM_DLL int32_t "; - ss << func_name << "(void* input, void* output) {\n"; + ss << func_name << "(void* input, void* output, void* resource_handle) {\n"; } /*! @@ -219,7 +219,7 @@ class EthosUModuleNode : public ModuleNode { ss << "#include \n"; ss << "#include \n"; ss << "#include \n"; - ss << "#include \n"; + ss << "#include \n"; ss << "\n"; size_t weights_size = (weights_bias_hex.size() / 2); ss << "static const size_t weights_size = " << std::to_string(weights_size) << ";\n"; @@ -243,7 +243,7 @@ class EthosUModuleNode : public ModuleNode { PrintExternCPrefix(ss); ss << "static int32_t " << func_no_dashes + "_(int8_t* in0, " - << "size_t in0_size, int8_t* out0, size_t out0_size) {\n"; + << "size_t in0_size, int8_t* out0, size_t out0_size, void* resource_handle) {\n"; ss << " int num_tensors = 5;\n"; ss << " void* cms_data = (void*)(cms_data_data);\n"; ss << " int64_t device_type = kDLCPU;\n"; @@ -263,30 +263,25 @@ class EthosUModuleNode : public ModuleNode { ss << SetBaseAddress(3, "in0"); ss << SetBaseAddress(4, "out0"); ss << "\n"; - ss << " struct ethosu_driver *drv = ethosu_reserve_driver();\n"; - ss << " int32_t result = ethosu_invoke(drv, cms_data, cms_data_size, base_addrs, " - "base_addrs_size, " - "num_tensors);\n"; - ss << " ethosu_release_driver(drv);\n"; + ss << " int32_t result = TVMEthosULaunch(resource_handle, cms_data, cms_data_size, " + "base_addrs, base_addrs_size, num_tensors);\n"; if (scratch_size > 0) { ss << " TVMBackendFreeWorkspace(device_type, device_id, scratch);\n"; } - ss << " if (result != 0) {\n"; - ss << " return -1;\n"; - ss << " } else {\n"; - ss << " return 0;\n"; - ss << " }\n"; + ss << " return result;\n"; ss << "}\n"; ss << "\n"; PrintExternCPostfix(ss); ss << "\n"; PrintExternCPrefix(ss); ss << "// Wrapper function is provided to allow for easier debugging\n"; - ss << "inline static int32_t " + func_no_dashes + "_wrapper_(void* input, void* output) {\n"; + ss << "inline static int32_t " + func_no_dashes + + "_wrapper_(void* input, void* output, void* resource_handle) {\n"; ss << " size_t input_data_size = " << input_size << ";\n"; ss << " size_t output_data_size = " << output_size << ";\n"; ss << " return " + func_no_dashes + - "_((int8_t*)input, input_data_size, (int8_t*)output, output_data_size);\n"; + "_((int8_t*)input, input_data_size, (int8_t*)output, output_data_size, " + + "resource_handle);\n"; ss << "}\n"; PrintExternCPostfix(ss); ss << "\n"; @@ -294,7 +289,7 @@ class EthosUModuleNode : public ModuleNode { PrintRuntimeFunctionHeader(ss, func_name); EnterScope(); PrintIndents(ss); - ss << "return " << func_no_dashes << "_wrapper_(input, output);\n"; + ss << "return " << func_no_dashes << "_wrapper_(input, output, resource_handle);\n"; ExitScope(); ss << "}\n"; PrintExternCPostfix(ss); @@ -317,14 +312,14 @@ inline EthosUModuleNode* EthosUModule::operator->() { return static_cast(get_mutable()); } -TVM_REGISTER_GLOBAL("runtime.module.ethosu.create") +TVM_REGISTER_GLOBAL("runtime.module.ethos-u.create") .set_body_typed([](String func_name, String cmms_hex, String weights_bias_hex, Integer scratch_size, Integer input_size, Integer output_size) { return EthosUModuleNode::Create(func_name, cmms_hex, weights_bias_hex, scratch_size, input_size, output_size); }); -TVM_REGISTER_GLOBAL("runtime.module.ethosu.getcs").set_body_typed([](EthosUModule mod) { +TVM_REGISTER_GLOBAL("runtime.module.ethos-u.getcs").set_body_typed([](EthosUModule mod) { return mod->GetCS(); }); diff --git a/src/relay/backend/graph_executor_codegen.cc b/src/relay/backend/graph_executor_codegen.cc index ac3c835ed648..a40ca232a0bd 100644 --- a/src/relay/backend/graph_executor_codegen.cc +++ b/src/relay/backend/graph_executor_codegen.cc @@ -681,6 +681,9 @@ class GraphExecutorCodegenModule : public runtime::ModuleNode { return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->output_.external_mods; }); + } else if (name == "get_devices") { + return PackedFunc( + [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = Array(); }); } else if (name == "get_metadata") { return PackedFunc( [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv = this->output_.metadata; }); diff --git a/src/relay/backend/te_compiler.cc b/src/relay/backend/te_compiler.cc index 915fc22b2052..9c6da1717e2a 100644 --- a/src/relay/backend/te_compiler.cc +++ b/src/relay/backend/te_compiler.cc @@ -177,6 +177,8 @@ class TECompilerImpl : public TECompilerNode { return ret; } + Map GetDeviceContexts() { return device_contexts_; } + void Clear() final { cache_.clear(); } // List all items in the cache. @@ -226,6 +228,9 @@ class TECompilerImpl : public TECompilerNode { ir_module->Add(global_var, key->source_func); value->cached_func = CachedFunc(target, global_var, {}, {}, te::Schedule{nullptr}, tir::PrimFunc{nullptr}, {}, ir_module); + // Collect these here as it's removed in LowerExternalFunctions() + std::string codegen_name = key->source_func->GetAttr(attr::kCompiler).value(); + device_contexts_.Set(global_var, codegen_name); return value; } @@ -312,6 +317,8 @@ class TECompilerImpl : public TECompilerNode { std::unordered_map shape_func_cache_; /*! \brief the cache key of the function that is being lowered currently*/ CCacheKey cur_ccache_key_; + /*! \brief Map of GlobalVar to C Device API context names */ + Map device_contexts_; }; TECompiler::TECompiler() { @@ -930,8 +937,12 @@ IRModule LowerTE(const IRModule& module, TargetMap targets, const String& module // Copy the lowered functions into the return module updated_module->Update(compiler->GetLoweredFunctions()); - // Annotate the module with the external modules and function info - updated_module = WithAttr(updated_module, "external_mods", compiler->LowerExternalFunctions()); + // Annotate the module with C Device API context mapping, the external modules and function info + // this is until we have Target's annotated for the C Device API + // TODO(Mousius) - Remove "device_contexts" as soon as we have the graph annotated properly with + // Target's + updated_module = WithAttrs(updated_module, {{"external_mods", compiler->LowerExternalFunctions()}, + {"device_contexts", compiler->GetDeviceContexts()}}); return updated_module; } diff --git a/src/relay/backend/te_compiler.h b/src/relay/backend/te_compiler.h index d0401e9605f7..75cbc6d7dfd6 100644 --- a/src/relay/backend/te_compiler.h +++ b/src/relay/backend/te_compiler.h @@ -109,6 +109,13 @@ class TECompilerNode : public Object { */ virtual tvm::Array LowerExternalFunctions() = 0; + /*! + * \brief Get C Device API context mapping + * \return Map of GlobalVar to associated C Device API context name (either Target or kCompiler + * annotated) + */ + virtual Map GetDeviceContexts() = 0; + virtual std::unordered_map GetOpWeights() = 0; /*! \brief clear the cache. */ diff --git a/src/relay/transforms/partition_graph.cc b/src/relay/transforms/partition_graph.cc index 6e52cbfbe55a..99799fdeb5f0 100644 --- a/src/relay/transforms/partition_graph.cc +++ b/src/relay/transforms/partition_graph.cc @@ -49,6 +49,7 @@ namespace tvm { namespace relay { + namespace partitioning { /*! \brief This struct maintains the required metadata for a region to generate a corresponding diff --git a/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c b/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c new file mode 100644 index 000000000000..6b7399b67406 --- /dev/null +++ b/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "tvm_ethosu_runtime.h" + +#include + +int32_t TVMEthosULaunch(struct ethosu_driver* driver, void* cms_data, size_t cms_data_size, + uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors) { + int32_t result = + ethosu_invoke(driver, cms_data, cms_data_size, base_addrs, base_addrs_size, num_tensors); + + // Map errors in invoke to TVM errors + if (result != 0) { + return -1; + } + return 0; +} diff --git a/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h b/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h new file mode 100644 index 000000000000..d62afc4c69ef --- /dev/null +++ b/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.h @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_CONTRIB_ETHOSU_BARE_METAL_TVM_ETHOSU_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_ETHOSU_BARE_METAL_TVM_ETHOSU_RUNTIME_H_ + +#include +#include +#include + +int32_t TVMEthosULaunch(struct ethosu_driver* driver, void* cms_data, size_t cms_data_size, + uint64_t* base_addrs, size_t* base_addrs_size, int num_tensors); + +#endif // TVM_RUNTIME_CONTRIB_ETHOSU_BARE_METAL_TVM_ETHOSU_RUNTIME_H_ diff --git a/src/runtime/meta_data.h b/src/runtime/meta_data.h index 66d9a44099da..fd612b08ab0e 100644 --- a/src/runtime/meta_data.h +++ b/src/runtime/meta_data.h @@ -58,6 +58,8 @@ class MetadataNode : public Object { Array inputs; /*! \brief number of outputs of the main function */ int num_outputs = 1; + /*! \brief device contexts information for the main function */ + Array devices; /*! \brief the executor to be used to run the model */ String executor = kTvmExecutorGraph; @@ -73,9 +75,11 @@ class MetadataNode : public Object { */ class Metadata : public ObjectRef { public: - TVM_DLL Metadata(Array inputs, int num_outputs, String executor, String mod_name) { + TVM_DLL Metadata(Array inputs, Array devices, int num_outputs, String executor, + String mod_name) { auto n = make_object(); n->inputs = inputs; + n->devices = devices; n->num_outputs = num_outputs; n->executor = executor; n->mod_name = mod_name; diff --git a/src/target/source/interface_c.cc b/src/target/source/interface_c.cc index 4089ccc7523c..fd111642b168 100644 --- a/src/target/source/interface_c.cc +++ b/src/target/source/interface_c.cc @@ -40,8 +40,9 @@ using namespace tvm::relay::backend; class InterfaceCNode : public runtime::ModuleNode { public: - InterfaceCNode(std::string module_name, Array inputs, Array outputs) - : module_name_(module_name), inputs_(inputs), outputs_(outputs) {} + InterfaceCNode(std::string module_name, Array inputs, Array outputs, + Array devices) + : module_name_(module_name), inputs_(inputs), outputs_(outputs), devices_(devices) {} const char* type_key() const { return "h"; } std::string GetSource(const std::string& format) final { @@ -52,6 +53,12 @@ class InterfaceCNode : public runtime::ModuleNode { EmitStruct(code, "inputs", inputs_); EmitBrief(code, "Output tensor pointers"); EmitStruct(code, "outputs", outputs_); + + if (!devices_.empty()) { + EmitBrief(code, "Device context pointers"); + EmitStruct(code, "devices", devices_); + } + EmitRunFunction(code); EmitLowerHeaderGuard(code); @@ -108,26 +115,40 @@ class InterfaceCNode : public runtime::ModuleNode { std::string run_function = ToCVariableStyle(PrefixGeneratedName({module_name_, "run"})); std::string inputs_struct = ToCVariableStyle(PrefixGeneratedName({module_name_, "inputs"})); std::string outputs_struct = ToCVariableStyle(PrefixGeneratedName({module_name_, "outputs"})); + std::string devices_struct = ToCVariableStyle(PrefixGeneratedName({module_name_, "devices"})); code_stream << "/*!\n" << " * \\brief entrypoint function for TVM module \"" << module_name_ << "\"\n" << " * \\param inputs Input tensors for the module \n" - << " * \\param outputs Output tensors for the module \n" - << " */\n" + << " * \\param outputs Output tensors for the module \n"; + + if (!devices_.empty()) { + code_stream << " * \\param devices Device context pointers for the module \n"; + } + + code_stream << " */\n" << "int32_t " << run_function << "(\n" - << " struct " << inputs_struct << "* inputs,\n" - << " struct " << outputs_struct << "* outputs\n" - << ");\n"; + << " struct " << inputs_struct << "* inputs,\n"; + + if (!devices_.empty()) { + code_stream << " struct " << outputs_struct << "* outputs,\n"; + code_stream << " struct " << devices_struct << "* devices\n"; + } else { + code_stream << " struct " << outputs_struct << "* outputs\n"; + } + + code_stream << ");\n"; } std::string module_name_; Array inputs_; Array outputs_; + Array devices_; }; runtime::Module InterfaceCCreate(std::string module_name, Array inputs, - Array outputs) { - auto n = make_object(module_name, inputs, outputs); + Array outputs, Array devices) { + auto n = make_object(module_name, inputs, outputs, devices); return runtime::Module(n); } diff --git a/src/target/source/source_module.cc b/src/target/source/source_module.cc index 9b93b0726f3a..21f82c3a99f1 100644 --- a/src/target/source/source_module.cc +++ b/src/target/source/source_module.cc @@ -240,7 +240,8 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { const std::string& mod_name) { code_ << "#include <" << mod_name << ".h>\n"; code_ << "TVM_DLL int32_t " << run_func << "("; - unsigned int total_args = (metadata_->inputs.size() + metadata_->num_outputs); + unsigned int total_args = + (metadata_->inputs.size() + metadata_->devices.size() + metadata_->num_outputs); for (unsigned int i = 0; i < total_args; ++i) { code_ << "void* arg" << i; if (i + 1 != total_args) { @@ -249,10 +250,16 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { } code_ << ");\n"; code_ << "int32_t " << entrypoint_name << "("; - code_ << "struct " << runtime::get_name_mangled(mod_name, "inputs") << "* inputs," - << "struct " << runtime::get_name_mangled(mod_name, "outputs") << "* outputs" - << ") {"; - code_ << "return " << run_func << "("; + code_ << "struct " << runtime::get_name_mangled(mod_name, "inputs") << "* inputs,"; + if (!metadata_->devices.empty()) { + code_ << "struct " << runtime::get_name_mangled(mod_name, "outputs") << "* outputs,"; + code_ << "struct " << runtime::get_name_mangled(mod_name, "devices") << "* devices"; + } else { + code_ << "struct " << runtime::get_name_mangled(mod_name, "outputs") << "* outputs"; + } + + code_ << ") {" + << "return " << run_func << "("; for (const auto& input : metadata_->inputs) { std::string sanitised_input = input; std::replace_if(sanitised_input.begin(), sanitised_input.end(), isNotAlnum, '_'); @@ -268,6 +275,17 @@ class CSourceCrtMetadataModuleNode : public runtime::ModuleNode { } } } + + if (!metadata_->devices.empty()) { + code_ << ","; + for (const String& device : metadata_->devices) { + code_ << "devices->" << device; + if (device != metadata_->devices.back()) { + code_ << ","; + } + } + } + code_ << ");\n"; code_ << "}\n"; } diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index 4403af26d1a8..b07b7f055310 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -373,6 +373,8 @@ TVM_REGISTER_TARGET_KIND("hybrid", kDLCPU) // line break TVM_REGISTER_TARGET_KIND("composite", kDLCPU).add_attr_option>("devices"); +TVM_REGISTER_TARGET_KIND("ethos-u", kDLCPU).set_attr("c_device_api", Bool(true)); + /********** Registry **********/ TVM_REGISTER_GLOBAL("target.ListTargetKinds").set_body_typed(TargetKindRegEntry::ListTargetKinds); diff --git a/tests/cpp/target/source/interface_c_test.cc b/tests/cpp/target/source/interface_c_test.cc index c53af43e9f69..7bfea17f4548 100644 --- a/tests/cpp/target/source/interface_c_test.cc +++ b/tests/cpp/target/source/interface_c_test.cc @@ -29,7 +29,7 @@ namespace tvm { namespace codegen { runtime::Module InterfaceCCreate(std::string module_name, Array inputs, - Array outputs); + Array outputs, Array devices); namespace { @@ -49,7 +49,7 @@ TEST(InterfaceAPI, ContainsHeaderGuards) { << "#endif\n\n" << "#endif // TVMGEN_ULTIMATE_CAT_SPOTTER_H_\n"; - runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}); + runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(upper_header_guard.str())); @@ -69,7 +69,29 @@ TEST(InterfaceAPI, ContainsRunFunction) { << " struct tvmgen_ultimate_cat_spotter_outputs* outputs\n" << ");\n"; - runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}); + runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {}); + std::string header_source = test_module->GetSource(); + + ASSERT_THAT(header_source, HasSubstr(run_function.str())); +} + +TEST(InterfaceAPI, ContainsRunFunctionWithDevices) { + std::stringstream run_function; + + run_function << "/*!\n" + << " * \\brief entrypoint function for TVM module \"ultimate_cat_spotter\"\n" + << " * \\param inputs Input tensors for the module \n" + << " * \\param outputs Output tensors for the module \n" + << " * \\param devices Device context pointers for the module \n" + << " */\n" + << "int32_t tvmgen_ultimate_cat_spotter_run(\n" + << " struct tvmgen_ultimate_cat_spotter_inputs* inputs,\n" + << " struct tvmgen_ultimate_cat_spotter_outputs* outputs,\n" + << " struct tvmgen_ultimate_cat_spotter_devices* devices\n" + << ");\n"; + + runtime::Module test_module = + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {"device"}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(run_function.str())); @@ -85,7 +107,7 @@ TEST(InterfaceAPI, ContainsInputStructSingle) { << " void* input;\n" << "};\n\n"; - runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}); + runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(input_struct.str())); @@ -100,7 +122,7 @@ TEST(InterfaceAPI, ContainsInputStructMany) { << "};\n\n"; runtime::Module test_module = - InterfaceCCreate("ultimate_cat_spotter", {"input1", "input2"}, {"output"}); + InterfaceCCreate("ultimate_cat_spotter", {"input1", "input2"}, {"output"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(input_struct.str())); @@ -115,7 +137,7 @@ TEST(InterfaceAPI, ContainsInputStructSanitised) { << "};\n\n"; runtime::Module test_module = - InterfaceCCreate("ultimate_cat_spotter", {"input+1", "input+2"}, {"output"}); + InterfaceCCreate("ultimate_cat_spotter", {"input+1", "input+2"}, {"output"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(input_struct.str())); @@ -123,7 +145,7 @@ TEST(InterfaceAPI, ContainsInputStructSanitised) { TEST(InterfaceAPI, ContainsInputStructClash) { runtime::Module test_module = - InterfaceCCreate("ultimate_cat_spotter", {"input+", "input-"}, {"output"}); + InterfaceCCreate("ultimate_cat_spotter", {"input+", "input-"}, {"output"}, {}); ASSERT_THROW(test_module->GetSource(), InternalError); } @@ -137,7 +159,7 @@ TEST(InterfaceAPI, ContainsOutputStructSingle) { << " void* output;\n" << "};\n\n"; - runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}); + runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(output_struct.str())); @@ -152,7 +174,7 @@ TEST(InterfaceAPI, ContainsOutputStructMany) { << "};\n\n"; runtime::Module test_module = - InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output1", "output2"}); + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output1", "output2"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(output_struct.str())); @@ -167,7 +189,7 @@ TEST(InterfaceAPI, ContainsOutputStructSanitised) { << "};\n\n"; runtime::Module test_module = - InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output+1", "output-2"}); + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output+1", "output-2"}, {}); std::string header_source = test_module->GetSource(); ASSERT_THAT(header_source, HasSubstr(output_struct.str())); @@ -175,7 +197,75 @@ TEST(InterfaceAPI, ContainsOutputStructSanitised) { TEST(InterfaceAPI, ContainsOutputStructClash) { runtime::Module test_module = - InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output+", "output-"}); + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output+", "output-"}, {}); + ASSERT_THROW(test_module->GetSource(), InternalError); +} + +TEST(InterfaceAPI, NoDeviceAPIStructIfNoDevices) { + std::stringstream device_struct; + + device_struct << "/*!\n" + << " * \\brief Device context pointers for TVM module \"ultimate_cat_spotter\" \n" + << " */\n" + << "struct tvmgen_ultimate_cat_spotter_devices {\n" + << "};\n\n"; + + runtime::Module test_module = InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {}); + std::string header_source = test_module->GetSource(); + + ASSERT_THAT(header_source, Not(HasSubstr(device_struct.str()))); +} + +TEST(InterfaceAPI, ContainsDeviceStructSingle) { + std::stringstream device_struct; + + device_struct << "/*!\n" + << " * \\brief Device context pointers for TVM module \"ultimate_cat_spotter\" \n" + << " */\n" + << "struct tvmgen_ultimate_cat_spotter_devices {\n" + << " void* device;\n" + << "};\n\n"; + + runtime::Module test_module = + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {"device"}); + std::string header_source = test_module->GetSource(); + + ASSERT_THAT(header_source, HasSubstr(device_struct.str())); +} + +TEST(InterfaceAPI, ContainsDeviceStructMany) { + std::stringstream device_struct; + + device_struct << "struct tvmgen_ultimate_cat_spotter_devices {\n" + << " void* device1;\n" + << " void* device2;\n" + << "};\n\n"; + + runtime::Module test_module = + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {"device1", "device2"}); + std::string header_source = test_module->GetSource(); + + ASSERT_THAT(header_source, HasSubstr(device_struct.str())); +} + +TEST(InterfaceAPI, ContainsDeviceStructSanitised) { + std::stringstream device_struct; + + device_struct << "struct tvmgen_ultimate_cat_spotter_devices {\n" + << " void* device_1;\n" + << " void* device_2;\n" + << "};\n\n"; + + runtime::Module test_module = + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {"device+1", "device+2"}); + std::string header_source = test_module->GetSource(); + + ASSERT_THAT(header_source, HasSubstr(device_struct.str())); +} + +TEST(InterfaceAPI, ContainsDeviceStructClash) { + runtime::Module test_module = + InterfaceCCreate("ultimate_cat_spotter", {"input"}, {"output"}, {"device+", "device-"}); ASSERT_THROW(test_module->GetSource(), InternalError); } diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py index 7cd32f4e1879..4324570e1930 100644 --- a/tests/micro/zephyr/test_zephyr_aot.py +++ b/tests/micro/zephyr/test_zephyr_aot.py @@ -89,7 +89,7 @@ def test_tflite(temp_dir, board, west_cmd, tvm_debug): model_files_path = os.path.join(tar_temp_dir, "include") os.mkdir(model_files_path) header_path = generate_c_interface_header( - lowered.libmod_name, ["input_1"], ["output"], model_files_path + lowered.libmod_name, ["input_1"], ["output"], [], model_files_path ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) @@ -150,7 +150,7 @@ def test_qemu_make_fail(temp_dir, board, west_cmd, tvm_debug): model_files_path = os.path.join(tar_temp_dir, "include") os.mkdir(model_files_path) header_path = generate_c_interface_header( - lowered.libmod_name, ["input_1"], ["output"], model_files_path + lowered.libmod_name, ["input_1"], ["output"], [], model_files_path ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) test_utils.create_header_file( diff --git a/tests/micro/zephyr/test_zephyr_armv7m.py b/tests/micro/zephyr/test_zephyr_armv7m.py index 2366bad203be..9364b54c153f 100644 --- a/tests/micro/zephyr/test_zephyr_armv7m.py +++ b/tests/micro/zephyr/test_zephyr_armv7m.py @@ -112,7 +112,7 @@ def _generate_project(temp_dir, board, west_cmd, lowered, build_config, sample, test_utils.loadCMSIS(model_files_path) tf.add(model_files_path, arcname=os.path.relpath(model_files_path, tar_temp_dir)) header_path = generate_c_interface_header( - lowered.libmod_name, ["input_1"], ["output"], model_files_path + lowered.libmod_name, ["input_1"], ["output"], [], model_files_path ) tf.add(header_path, arcname=os.path.relpath(header_path, tar_temp_dir)) diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py index 17d3fad9cb30..d37d91538287 100644 --- a/tests/python/contrib/test_ethosu/infra.py +++ b/tests/python/contrib/test_ethosu/infra.py @@ -198,11 +198,16 @@ def _create_test_runner(accel): prologue=""" uart_init(); EthosuInit(); + + struct ethosu_driver* ethos_u = ethosu_reserve_driver(); + """, + epilogue=""" + ethosu_release_driver(ethos_u); """, includes=["uart.h", "ethosu_55.h", "ethosu_mod.h", "hard_fault.h"], parameters={"ETHOSU_TEST_ROOT": test_root, "NPU_VARIANT": ethosu_macs}, pass_config={ - "relay.ext.ethosu.options": { + "relay.ext.ethos-u.options": { "accelerator_config": accel, } }, diff --git a/tests/python/contrib/test_ethosu/test_attr_passing.py b/tests/python/contrib/test_ethosu/test_attr_passing.py index 6b99a5c1e540..5aab39a7ae97 100644 --- a/tests/python/contrib/test_ethosu/test_attr_passing.py +++ b/tests/python/contrib/test_ethosu/test_attr_passing.py @@ -26,9 +26,9 @@ def test_compiler_attr(): config = { "accelerator_config": "ethos-u55-32", } - with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethosu.options": config}): + with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-u.options": config}): with tvm.target.Target("c -device=micro_dev"): - compiler_attrs = tvm.get_global_func("relay.ext.ethosu.get_compiler_attrs")() + compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")() accel_config_str = compiler_attrs.accelerator_config assert accel_config_str == config["accelerator_config"] @@ -39,7 +39,7 @@ def test_compiler_attr_default(): } with tvm.transform.PassContext(opt_level=3): with tvm.target.Target("c -device=micro_dev"): - compiler_attrs = tvm.get_global_func("relay.ext.ethosu.get_compiler_attrs")() + compiler_attrs = tvm.get_global_func("relay.ext.ethos-u.get_compiler_attrs")() accel_config_str = compiler_attrs.accelerator_config assert accel_config_str == default_config["accelerator_config"] diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py index a5686c81beb8..e29bfa2d79c2 100644 --- a/tests/python/contrib/test_ethosu/test_codegen.py +++ b/tests/python/contrib/test_ethosu/test_codegen.py @@ -159,7 +159,7 @@ def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_ ethosu_module = imported_modules[0] # Verify generated C source - get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") + get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) @@ -246,7 +246,7 @@ def representative_dataset(): ethosu_module = imported_modules[0] # Verify generated C source - get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") + get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) @@ -333,7 +333,7 @@ def representative_dataset(): ethosu_module = imported_modules[0] # Verify generated C source - get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") + get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) @@ -427,7 +427,7 @@ def representative_dataset(): ethosu_module = imported_modules[0] # Verify generated C source - get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") + get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) @@ -484,7 +484,7 @@ def create_model(): ethosu_module = imported_modules[0] # Verify generated C source - get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") + get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) @@ -519,12 +519,12 @@ def create_model(): ifm, ifm2, ifm_shape[3], ifm2_shape[3], "SHR", ofm_dtype, reversed_operands ) - glb_ethosu = relay.GlobalVar("tvmgen_default_ethosu_main_0") + glb_ethosu = relay.GlobalVar("tvmgen_default_ethos_u_main_0") func = ( relay.Function([ifms], shr_op) .with_attr("Inline", 1) - .with_attr("Compiler", "ethosu") - .with_attr("global_symbol", "tvmgen_default_ethosu_main_0") + .with_attr("Compiler", "ethos-u") + .with_attr("global_symbol", "tvmgen_default_ethos_u_main_0") .with_attr("Primitive", 1) ) mod = tvm.IRModule() @@ -583,7 +583,7 @@ def rounding_right_shift(lhs, rhs): ethosu_module = imported_modules[0] # Verify generated C source - get_cs = tvm._ffi.get_global_func("runtime.module.ethosu.getcs") + get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) diff --git a/tests/python/contrib/test_ethosu/test_legalize.py b/tests/python/contrib/test_ethosu/test_legalize.py index 2a84a23930e4..166a965ae117 100644 --- a/tests/python/contrib/test_ethosu/test_legalize.py +++ b/tests/python/contrib/test_ethosu/test_legalize.py @@ -40,7 +40,7 @@ def partition_ethosu_by_table(mod, pattern_table): wouldn't attempt to offload an operator without full stack support.""" mod = relay.transform.InferType()(mod) mod = relay.transform.MergeComposite(pattern_table)(mod) - mod = relay.transform.AnnotateTarget("ethosu")(mod) + mod = relay.transform.AnnotateTarget("ethos-u")(mod) mod = relay.transform.MergeCompilerRegions()(mod) mod = relay.transform.InferType()(mod) mod = relay.transform.PartitionGraph()(mod) @@ -59,7 +59,7 @@ def create_graph(axis): def expected_mod_axis1(): expected_ir_string = """ #[version = "0.0.5"] - def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 5, 50, 3), float32],\ + def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 5, 50, 3), float32],\ Tensor[(1, 15, 50, 3), float32],\ Tensor[(1, 25, 50, 3), float32],\ Tensor[(1, 5, 50, 3), float32]) { @@ -80,7 +80,7 @@ def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tenso def expected_mod_axis2(): expected_ir_string = """ #[version = "0.0.5"] - def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 5, 3), float32],\ + def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 5, 3), float32],\ Tensor[(1, 50, 15, 3), float32],\ Tensor[(1, 50, 25, 3), float32],\ Tensor[(1, 50, 5, 3), float32]) { @@ -99,13 +99,13 @@ def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tenso return tvm.parser.fromtext(expected_ir_string) mod_axis1 = tvm.IRModule() - mod_axis1["tvmgen_default_ethosu_main_0"] = create_graph(1) + mod_axis1["tvmgen_default_ethos_u_main_0"] = create_graph(1) mod_axis1 = legalize.LegalizeSplit()(mod_axis1) expected_axis1 = expected_mod_axis1() tvm.ir.assert_structural_equal(mod_axis1, expected_axis1) mod_axis2 = tvm.IRModule() - mod_axis2["tvmgen_default_ethosu_main_0"] = create_graph(2) + mod_axis2["tvmgen_default_ethos_u_main_0"] = create_graph(2) mod_axis2 = legalize.LegalizeSplit()(mod_axis2) expected_axis2 = expected_mod_axis2() tvm.ir.assert_structural_equal(mod_axis2, expected_axis2) @@ -127,7 +127,7 @@ def create_graph(axis, sections): def expected_mod_axis1(): expected_ir_string = """ #[version = "0.0.5"] - def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 10, 50, 3), float32],\ + def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 10, 50, 3), float32],\ Tensor[(1, 10, 50, 3), float32],\ Tensor[(1, 10, 50, 3), float32],\ Tensor[(1, 10, 50, 3), float32],\ @@ -162,7 +162,7 @@ def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tenso def expected_mod_axis2(): expected_ir_string = """ #[version = "0.0.5"] - def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 10, 3), float32],\ + def @tvmgen_default_ethos_u_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tensor[(1, 50, 10, 3), float32],\ Tensor[(1, 50, 10, 3), float32],\ Tensor[(1, 50, 10, 3), float32],\ Tensor[(1, 50, 10, 3), float32],\ @@ -195,13 +195,13 @@ def @tvmgen_default_ethosu_main_0(%x: Tensor[(1, 50, 50, 3), float32]) -> (Tenso return tvm.parser.fromtext(expected_ir_string) mod_axis1 = tvm.IRModule() - mod_axis1["tvmgen_default_ethosu_main_0"] = create_graph(1, 5) + mod_axis1["tvmgen_default_ethos_u_main_0"] = create_graph(1, 5) mod_axis1 = legalize.LegalizeSplit()(mod_axis1) expected_axis1 = expected_mod_axis1() tvm.ir.assert_structural_equal(mod_axis1, expected_axis1) mod_axis2 = tvm.IRModule() - mod_axis2["tvmgen_default_ethosu_main_0"] = create_graph(2, 5) + mod_axis2["tvmgen_default_ethos_u_main_0"] = create_graph(2, 5) mod_axis2 = legalize.LegalizeSplit()(mod_axis2) expected_axis2 = expected_mod_axis2() tvm.ir.assert_structural_equal(mod_axis2, expected_axis2) @@ -314,7 +314,7 @@ def verify_linear(ext_func, conv2d_params): mod, conv_params = test_case[0](*test_case[1]) mod = ethosu.partition_for_ethosu(mod) mod = legalize.LegalizeConv2D()(mod) - verify_linear(mod["tvmgen_default_ethosu_main_0"], conv_params) + verify_linear(mod["tvmgen_default_ethos_u_main_0"], conv_params) def test_ethosu_conv2d_legalize_errors(): @@ -457,10 +457,10 @@ def verify(ext_func): mod["main"] = bind_params_by_name(mod["main"], params) mod = partition_ethosu_by_table(mod, depthwise_pattern_table) - mod["tvmgen_default_ethosu_main_0"] = dataflow_pattern.rewrite( - legalize.DepthwiseConv2DRewriter(), mod["tvmgen_default_ethosu_main_0"] + mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite( + legalize.DepthwiseConv2DRewriter(), mod["tvmgen_default_ethos_u_main_0"] ) - verify(mod["tvmgen_default_ethosu_main_0"]) + verify(mod["tvmgen_default_ethos_u_main_0"]) @pytest.mark.parametrize("pooling_type", ["MAX", "AVG"]) @@ -552,10 +552,10 @@ def verify(ext_func): ) mod = partition_ethosu_by_table(mod, pattern_table) - mod["tvmgen_default_ethosu_main_0"] = dataflow_pattern.rewrite( - rewriter, mod["tvmgen_default_ethosu_main_0"] + mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite( + rewriter, mod["tvmgen_default_ethos_u_main_0"] ) - verify(mod["tvmgen_default_ethosu_main_0"]) + verify(mod["tvmgen_default_ethos_u_main_0"]) @pytest.mark.parametrize("operator_type", ["ADD", "SUB", "MUL", "MIN", "MAX"]) @@ -687,10 +687,10 @@ def verify(ext_func): ) mod = partition_ethosu_by_table(mod, pattern_table) - mod["tvmgen_default_ethosu_main_0"] = dataflow_pattern.rewrite( - rewriter, mod["tvmgen_default_ethosu_main_0"] + mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite( + rewriter, mod["tvmgen_default_ethos_u_main_0"] ) - verify(mod["tvmgen_default_ethosu_main_0"]) + verify(mod["tvmgen_default_ethos_u_main_0"]) @pytest.mark.parametrize( @@ -740,10 +740,10 @@ def verify(ext_func): mod = create_graph() mod = partition_ethosu_by_table(mod, pattern_table) - mod["tvmgen_default_ethosu_main_0"] = dataflow_pattern.rewrite( - rewriter, mod["tvmgen_default_ethosu_main_0"] + mod["tvmgen_default_ethos_u_main_0"] = dataflow_pattern.rewrite( + rewriter, mod["tvmgen_default_ethos_u_main_0"] ) - verify(mod["tvmgen_default_ethosu_main_0"]) + verify(mod["tvmgen_default_ethos_u_main_0"]) if __name__ == "__main__": diff --git a/tests/python/contrib/test_ethosu/test_preprocess.py b/tests/python/contrib/test_ethosu/test_preprocess.py index f2c7b0afafd8..41831f270d12 100644 --- a/tests/python/contrib/test_ethosu/test_preprocess.py +++ b/tests/python/contrib/test_ethosu/test_preprocess.py @@ -57,7 +57,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): mod = tvm.IRModule() x = relay.var("x", shape=(10, 10)) - glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0") r = relay.Call(glb_symbol_f1, [x]) main = relay.Function([x], r) mod["main"] = main @@ -93,7 +93,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) - glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0") r = relay.Call(glb_symbol_f1, [x, w0]) main = relay.Function([x, w0], r) mod["main"] = main @@ -130,7 +130,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): # concat ifms = relay.concatenate((x_reshaped, w0_reshaped), 0) - glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0") r = relay.Call(glb_symbol_f1, [ifms]) main = relay.Function([x, w0], r) mod["main"] = main @@ -165,7 +165,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): mod = tvm.IRModule() x = relay.var("x", shape=(10, 10)) - glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0") pq_tuple = relay.Call(glb_symbol_f1, [x]) p0 = relay.TupleGetItem(pq_tuple, 0) q0 = relay.TupleGetItem(pq_tuple, 1) @@ -196,7 +196,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): mod = tvm.IRModule() x = relay.var("x", shape=(10, 10)) - glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0") ofms = relay.Call(glb_symbol_f1, [x]) # splits @@ -254,7 +254,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) - glb_symbol_f1, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_symbol_f1, mod = create_external_func1(mod, "ethos-u", "ethosu_0") pq_tuple = relay.Call(glb_symbol_f1, [x, w0, w1, w2]) p0 = relay.TupleGetItem(pq_tuple, 0) @@ -313,7 +313,7 @@ def create_external_func1(mod_, compiler_name, symbol_name): ifms = relay.concatenate((x_reshaped, w0_reshaped, w1_reshaped, w2_reshaped), 0) # call - glb_func, mod = create_external_func1(mod, "ethosu", "ethosu_0") + glb_func, mod = create_external_func1(mod, "ethos-u", "ethosu_0") ofms = relay.Call(glb_func, [ifms]) # splits diff --git a/tests/python/relay/aot/aot_test_utils.py b/tests/python/relay/aot/aot_test_utils.py index 278a95f60b6c..484148db4f8a 100644 --- a/tests/python/relay/aot/aot_test_utils.py +++ b/tests/python/relay/aot/aot_test_utils.py @@ -116,6 +116,8 @@ class AOTTestRunner(NamedTuple): Premade Makefile to use from the AOT test folder prologue: str Code to prepend to the main function + epilogue: str + Code to append to the main function includes: List[str] Additional includes required to run the AOT test runner parameters: Dict[str, str] @@ -126,6 +128,7 @@ class AOTTestRunner(NamedTuple): makefile: str = "default" prologue: str = "" + epilogue: str = "" includes: List[str] = [] parameters: Dict[str, str] = {} pass_config: Dict[str, Any] = {} @@ -320,6 +323,16 @@ def emit_main_data(main_file, input_map, output_list, mod_name): main_file.write(f'#include "{mangle_name(mod_name,"output_data")}{i}.h"\n') +def emit_main_device_structs(main_file, devices, mod_name): + if devices: + main_file.write( + f"struct {mangle_name(mod_name, 'devices')} {mangle_name(mod_name, 'devices')} = {{" + ) + for device in devices: + main_file.write(f"\t.{device} = {device},\n") + main_file.write("};\n") + + def emit_main_data_structs(main_file, input_map, output_list, mod_name): main_file.write( f"struct {mangle_name(mod_name, 'inputs')} {mangle_name(mod_name, 'inputs')} = {{" @@ -359,10 +372,20 @@ def emit_main_data_setup(main_file, input_map, output_list, mod_name): main_file.write("};\n") -def emit_main_c_interface_call(main_file, mod_name): - main_file.write( - f'{mangle_name(mod_name,"run")}(&{mangle_name(mod_name,"inputs")}, &{mangle_name(mod_name,"outputs")});\n' - ) +def emit_main_c_interface_call(main_file, devices, mod_name): + if devices: + main_file.write( + f'{mangle_name(mod_name,"run")}(' + f'&{mangle_name(mod_name,"inputs")}, ' + f'&{mangle_name(mod_name,"outputs")}, ' + f'&{mangle_name(mod_name,"devices")});\n' + ) + else: + main_file.write( + f'{mangle_name(mod_name,"run")}(' + f'&{mangle_name(mod_name,"inputs")}, ' + f'&{mangle_name(mod_name,"outputs")});\n' + ) def emit_main_fake_packed_values(main_file): @@ -446,7 +469,8 @@ def emit_main_init_memory_manager(main_file): main_file.write("\n") -def emit_main_epilogue(main_file): +def emit_main_epilogue(main_file, custom_epilogue): + main_file.write(custom_epilogue) main_file.write(f'printf("{AOT_SUCCESS_TOKEN}\\n");') main_file.write("return 0;") main_file.write("}\n") @@ -469,10 +493,11 @@ def emit_main_micro_include(main_file, mod_name): def create_main( test_name, - models, + compiled_models, output_path, custom_includes, custom_prologue, + custom_epilogue, data_linkage, interface_api, workspace_bytes, @@ -484,27 +509,34 @@ def create_main( emit_main_common_includes(main_file, custom_includes) if interface_api == "c": - for model in models: + for compiled_model in compiled_models: + model = compiled_model.model emit_main_micro_include(main_file, model.name) - for model in models: + for compiled_model in compiled_models: + model = compiled_model.model emit_main_data(main_file, model.inputs, model.outputs, model.name) emit_main_prologue(main_file, custom_prologue, workspace_bytes, data_linkage) emit_main_init_memory_manager(main_file) if interface_api == "c": - for model in models: + for compiled_model in compiled_models: + model = compiled_model.model + devices = compiled_model.executor_factory.get_devices() + emit_main_device_structs(main_file, devices, model.name) emit_main_data_structs(main_file, model.inputs, model.outputs, model.name) - emit_main_c_interface_call(main_file, model.name) + emit_main_c_interface_call(main_file, devices, model.name) else: emit_main_fake_packed_values(main_file) - for model in models: + for compiled_model in compiled_models: + model = compiled_model.model emit_main_data_setup(main_file, model.inputs, model.outputs, model.name) emit_main_packed_call(main_file, model.inputs, model.outputs, model.name) - for model in models: + for compiled_model in compiled_models: + model = compiled_model.model emit_main_compare(main_file, model.outputs, model.output_tolerance, model.name) - emit_main_epilogue(main_file) + emit_main_epilogue(main_file, custom_epilogue) def create_header_file(tensor_name, npy_data, output_path, data_linkage): @@ -647,10 +679,11 @@ def run_and_check( create_main( "test.c", - [compiled_model.model for compiled_model in models], + models, build_path, runner.includes, runner.prologue, + runner.epilogue, data_linkage, interface_api, workspace_bytes, diff --git a/tests/python/relay/aot/corstone300.mk b/tests/python/relay/aot/corstone300.mk index 553ed84277c6..bf4e3882f9d4 100644 --- a/tests/python/relay/aot/corstone300.mk +++ b/tests/python/relay/aot/corstone300.mk @@ -40,6 +40,7 @@ CC_OPTS = CC=$(CC) AR=$(AR) RANLIB=$(RANLIB) PKG_CFLAGS = ${PKG_COMPILE_OPTS} \ ${CFLAGS} \ -I$(build_dir)/../include \ + -I${TVM_ROOT}/src/runtime/contrib/ethosu/bare_metal \ -I$(CODEGEN_ROOT)/host/include \ -I${PLATFORM_PATH} \ -I${DRIVER_PATH}/include \ @@ -70,6 +71,7 @@ CMSIS_NN_LIBS = $(wildcard ${CMSIS_PATH}/CMSIS/NN/build/Source/*/*.a) ifdef ETHOSU_TEST_ROOT ETHOSU_DRIVER_LIBS = $(wildcard ${DRIVER_PATH}/build/*.a) +ETHOSU_RUNTIME=$(build_dir)/tvm_ethosu_runtime.o ETHOSU_INCLUDE=-I$(ETHOSU_TEST_ROOT) endif @@ -83,6 +85,10 @@ $(build_dir)/crt_backend_api.o: $(TVM_ROOT)/src/runtime/crt/common/crt_backend_a $(QUIET)mkdir -p $(@D) $(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^ +$(build_dir)/tvm_ethosu_runtime.o: $(TVM_ROOT)/src/runtime/contrib/ethosu/bare_metal/tvm_ethosu_runtime.c + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^ + $(build_dir)/libcodegen.a: $(CODEGEN_SRCS) $(QUIET)cd $(abspath $(CODEGEN_ROOT)/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS) $(QUIET)$(AR) -cr $(abspath $(build_dir)/libcodegen.a) $(CODEGEN_OBJS) @@ -100,7 +106,7 @@ ${build_dir}/libuart.a: $(UART_SRCS) $(QUIET)$(AR) -cr $(abspath $(build_dir)/libuart.a) $(abspath $(build_dir))/libuart/*.o $(QUIET)$(RANLIB) $(abspath $(build_dir)/libuart.a) -$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o ${build_dir}/libcmsis_startup.a ${build_dir}/libuart.a $(build_dir)/libcodegen.a $(CMSIS_NN_LIBS) $(ETHOSU_DRIVER_LIBS) +$(build_dir)/aot_test_runner: $(build_dir)/test.c $(build_dir)/crt_backend_api.o $(build_dir)/stack_allocator.o ${build_dir}/libcmsis_startup.a ${build_dir}/libuart.a $(build_dir)/libcodegen.a $(CMSIS_NN_LIBS) $(ETHOSU_DRIVER_LIBS) $(ETHOSU_RUNTIME) $(QUIET)mkdir -p $(@D) $(QUIET)$(CC) $(PKG_CFLAGS) $(ETHOSU_INCLUDE) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS) From 4cea8a466d260d6f4998a6d788e0f524bc379372 Mon Sep 17 00:00:00 2001 From: Chris Sidebottom Date: Fri, 12 Nov 2021 09:44:56 +0000 Subject: [PATCH 2/2] Correct "use_device_api" attribute name on Target --- src/relay/backend/aot_executor_codegen.cc | 2 +- src/target/target_kind.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/relay/backend/aot_executor_codegen.cc b/src/relay/backend/aot_executor_codegen.cc index 1c26b3e0af76..07351a630f65 100644 --- a/src/relay/backend/aot_executor_codegen.cc +++ b/src/relay/backend/aot_executor_codegen.cc @@ -397,7 +397,7 @@ class AOTExecutorCodegen : public MixedModeVisitor { */ void CollectDeviceVariables(const Map& device_contexts) { Map target_contexts; - TargetKindAttrMap target_attr_map = tvm::TargetKind::GetAttrMap("c_device_api"); + TargetKindAttrMap target_attr_map = tvm::TargetKind::GetAttrMap("use_device_api"); for (const auto& it : device_contexts) { const GlobalVar& global_var = it.first; diff --git a/src/target/target_kind.cc b/src/target/target_kind.cc index b07b7f055310..9f7bc56b8539 100644 --- a/src/target/target_kind.cc +++ b/src/target/target_kind.cc @@ -373,7 +373,7 @@ TVM_REGISTER_TARGET_KIND("hybrid", kDLCPU) // line break TVM_REGISTER_TARGET_KIND("composite", kDLCPU).add_attr_option>("devices"); -TVM_REGISTER_TARGET_KIND("ethos-u", kDLCPU).set_attr("c_device_api", Bool(true)); +TVM_REGISTER_TARGET_KIND("ethos-u", kDLCPU).set_attr("use_device_api", Bool(true)); /********** Registry **********/