From 43ccb85b61c1764382d5958e81fc5c811f11f7b5 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Thu, 28 Apr 2022 16:29:26 -0700
Subject: [PATCH 01/13] Added conv2d relay test for each schedule

Enable relay tests in qemu
---
 python/tvm/autotvm/__init__.py                |   1 +
 python/tvm/autotvm/task/__init__.py           |   1 +
 python/tvm/autotvm/task/dispatcher.py         |  44 +++++
 .../tvm/micro/testing}/aot_test_utils.py      |  84 +++++----
 .../micro/{testing.py => testing/utils.py}    |   0
 tests/micro/zephyr/test_utils.py              |   2 +-
 tests/micro/zephyr/test_zephyr.py             |   2 +-
 tests/micro/zephyr/test_zephyr_aot.py         |   1 -
 .../contrib/test_cmsisnn/test_binary_ops.py   |   2 +-
 .../contrib/test_cmsisnn/test_conv2d.py       |   2 +-
 .../test_cmsisnn/test_fully_connected.py      |   2 +-
 .../test_cmsisnn/test_invalid_graphs.py       |   2 +-
 .../contrib/test_cmsisnn/test_networks.py     |   2 +-
 .../contrib/test_cmsisnn/test_pooling.py      |   2 +-
 .../contrib/test_cmsisnn/test_softmax.py      |   2 +-
 tests/python/contrib/test_ethosu/infra.py     |   2 +-
 .../contrib/test_ethosu/test_codegen.py       |   2 +-
 .../contrib/test_ethosu/test_networks.py      |   2 +-
 .../integration/test_arm_mprofile_dsp.py      |   2 +-
 tests/python/relay/aot/test_c_device_api.py   |   2 +-
 tests/python/relay/aot/test_cpp_aot.py        |   7 +-
 tests/python/relay/aot/test_crt_aot.py        |   2 +-
 tests/python/relay/aot/test_crt_aot_usmp.py   |   2 +-
 .../strategy/arm_cpu/test_conv2d_nchw.py      | 130 ++++++++++++++
 .../strategy/arm_cpu/test_conv2d_nhwc.py      | 163 ++++++++++++++++++
 .../strategy/arm_cpu/test_depthwise_conv2d.py | 157 +++++++++++++++++
 .../strategy/arm_cpu/test_group_conv2d.py     | 155 +++++++++++++++++
 tests/python/relay/utils/external_codegen.py  |   2 +-
 tests/python/unittest/test_crt.py             |   2 +-
 tests/scripts/task_python_microtvm.sh         |   2 +
 30 files changed, 731 insertions(+), 50 deletions(-)
 rename {tests/python/relay/aot => python/tvm/micro/testing}/aot_test_utils.py (92%)
 rename python/tvm/micro/{testing.py => testing/utils.py} (100%)
 create mode 100644 tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
 create mode 100644 tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
 create mode 100644 tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
 create mode 100644 tests/python/relay/strategy/arm_cpu/test_group_conv2d.py

diff --git a/python/tvm/autotvm/__init__.py b/python/tvm/autotvm/__init__.py
index a3c59252b01a..5a7d00960ecd 100644
--- a/python/tvm/autotvm/__init__.py
+++ b/python/tvm/autotvm/__init__.py
@@ -60,5 +60,6 @@
     FallbackContext,
     ApplyHistoryBest as apply_history_best,
     ApplyGraphBest as apply_graph_best,
+    ApplyFixedConfig as apply_fixed_config,
 )
 from .env import GLOBAL_SCOPE
diff --git a/python/tvm/autotvm/task/__init__.py b/python/tvm/autotvm/task/__init__.py
index 6eea62264d7d..3949d324c4df 100644
--- a/python/tvm/autotvm/task/__init__.py
+++ b/python/tvm/autotvm/task/__init__.py
@@ -36,6 +36,7 @@
 from .dispatcher import (
     DispatchContext,
     ApplyConfig,
+    ApplyFixedConfig,
     ApplyHistoryBest,
     FallbackContext,
     clear_fallback_cache,
diff --git a/python/tvm/autotvm/task/dispatcher.py b/python/tvm/autotvm/task/dispatcher.py
index ffff50b9dc0b..a5b0367acb4c 100644
--- a/python/tvm/autotvm/task/dispatcher.py
+++ b/python/tvm/autotvm/task/dispatcher.py
@@ -178,6 +178,50 @@ def update(self, target, workload, cfg):
         self._config = cfg
 
 
+class ApplyFixedConfig(DispatchContext):
+    """Apply a config of a deterministic schedule.
+
+    Parameters
+    ----------
+    tasks : list[tvm.autotvm.task.task.Task]
+        List of autoTVM tasks.
+    schedule_name : str
+        Name of schedule to use.
+    """
+
+    def __init__(self, tasks, schedule_name: str):
+        super(ApplyFixedConfig, self).__init__()
+        self._schedule_name = schedule_name
+        self._tasks = tasks
+        self.workload = None
+
+    def _query_inside(self, target, workload):
+        """Override query"""
+        self.workload = workload
+
+        # Creat a config from correct task
+        for task in self._tasks:
+            if task.name == workload[0]:
+                config = task.config_space.get(0)
+                break
+
+        if not config:
+            raise RuntimeError(
+                "workload: %s does not exist in %s" % (str(workload), str(self._tasks))
+            )
+        # Add low cost to the target schedule and high cost to others.
+        if workload[0] == self._schedule_name:
+            config.cost = 0.000001
+        else:
+            config.cost = 100000
+        return config
+
+    def update(self, target, workload, cfg):
+        """Override update"""
+        self.workload = workload
+        self._config = cfg
+
+
 class ApplyHistoryBest(DispatchContext):
     """
     Apply the history best config
diff --git a/tests/python/relay/aot/aot_test_utils.py b/python/tvm/micro/testing/aot_test_utils.py
similarity index 92%
rename from tests/python/relay/aot/aot_test_utils.py
rename to python/tvm/micro/testing/aot_test_utils.py
index 2c4262a3d2be..b56649a9b0d9 100644
--- a/tests/python/relay/aot/aot_test_utils.py
+++ b/python/tvm/micro/testing/aot_test_utils.py
@@ -18,11 +18,9 @@
 import sys
 import datetime
 import itertools
-import json
 import logging
 import os
 import pathlib
-import platform
 import re
 import shutil
 import subprocess
@@ -38,12 +36,13 @@
 import tvm
 from tvm import relay
 from tvm import te
+from tvm import autotvm
 from tvm.contrib import utils, graph_executor
 from tvm.relay.backend import te_compiler, Executor, Runtime
 from tvm.relay.backend.te_compiler import TECompiler
 from tvm.relay.backend.utils import mangle_module_name
 from tvm.micro import export_model_library_format
-from tvm.micro.testing import mlf_extract_workspace_size_bytes
+from tvm.micro.testing.utils import mlf_extract_workspace_size_bytes
 
 _LOG = logging.getLogger(__name__)
 
@@ -683,6 +682,7 @@ def compile_models(
     use_runtime_executor: bool = True,
     target: tvm.target.Target = tvm.target.Target("c"),
     workspace_memory_pools=None,
+    schedule_name: str = None,
 ) -> List[AOTCompiledTestModel]:
     """
     This method generates runtime.Modules for the tests
@@ -708,31 +708,52 @@ def compile_models(
 
     compiled_mods = list()
     for model in models:
-        with tvm.transform.PassContext(opt_level=3, config=config):
-            # TODO(Mousius) - Remove once executor/runtime are fully removed from Target
-            if use_runtime_executor:
-                executor_factory = tvm.relay.build(
-                    model.module,
-                    target,
-                    executor=executor,
-                    runtime=runtime,
-                    workspace_memory_pools=workspace_memory_pools,
-                    params=model.params,
-                    mod_name=model.name,
-                )
-                compiled_mods.append(
-                    AOTCompiledTestModel(model=model, executor_factory=executor_factory)
-                )
-            else:
-                executor_factory = tvm.relay.build(
-                    model.module,
-                    tvm.target.Target(target, host=target),
-                    params=model.params,
-                    mod_name=model.name,
-                )
-                compiled_mods.append(
-                    AOTCompiledTestModel(model=model, executor_factory=executor_factory)
-                )
+        if schedule_name:
+            # Testing with deterministic schedule
+            task_list = autotvm.task.extract_from_program(
+                model.module, target=target, params=model.params
+            )
+            with tvm.autotvm.apply_fixed_config(task_list, schedule_name):
+                with tvm.transform.PassContext(opt_level=3, config=config):
+                    if use_runtime_executor:
+                        executor_factory = tvm.relay.build(
+                            model.module,
+                            target,
+                            executor=executor,
+                            runtime=runtime,
+                            workspace_memory_pools=workspace_memory_pools,
+                            params=model.params,
+                            mod_name=model.name,
+                        )
+                        compiled_mods.append(
+                            AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                        )
+        else:
+            with tvm.transform.PassContext(opt_level=3, config=config):
+                # TODO(Mousius) - Remove once executor/runtime are fully removed from Target
+                if use_runtime_executor:
+                    executor_factory = tvm.relay.build(
+                        model.module,
+                        target,
+                        executor=executor,
+                        runtime=runtime,
+                        workspace_memory_pools=workspace_memory_pools,
+                        params=model.params,
+                        mod_name=model.name,
+                    )
+                    compiled_mods.append(
+                        AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                    )
+                else:
+                    executor_factory = tvm.relay.build(
+                        model.module,
+                        tvm.target.Target(target, host=target),
+                        params=model.params,
+                        mod_name=model.name,
+                    )
+                    compiled_mods.append(
+                        AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                    )
     return compiled_mods
 
 
@@ -830,8 +851,9 @@ def run_and_check_body(base_path):
 
         # Verify that compiles fine
         file_dir = os.path.dirname(os.path.abspath(__file__))
+        makefile_dir = os.path.join(file_dir, "../../../../tests/python/relay/aot")
         codegen_path = os.path.join(base_path, "codegen")
-        makefile = os.path.join(file_dir, f"{runner.makefile}.mk")
+        makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
         fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
         # TODO(@grant-arm): Remove once ci_cpu docker image has been updated to FVP_Corstone_SSE
         if not os.path.isdir(fvp_dir):
@@ -843,7 +865,7 @@ def run_and_check_body(base_path):
             f"make -f {makefile} build_dir={build_path}"
             + f" CFLAGS='{cflags}'"
             + f" TVM_ROOT={file_dir}/../../../.."
-            + f" AOT_TEST_ROOT={file_dir}"
+            + f" AOT_TEST_ROOT={makefile_dir}"
             + f" CODEGEN_ROOT={codegen_path}"
             + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}"
             + f" FVP_DIR={fvp_dir}"
@@ -895,6 +917,7 @@ def compile_and_run(
     target_opts: Dict = None,
     test_dir: str = None,
     verbose: bool = False,
+    schedule_name: str = None,
 ):
     """This is a wrapper API to compile and run models as test for AoT
 
@@ -919,6 +942,7 @@ def compile_and_run(
         pass_config=runner.pass_config,
         use_runtime_executor=use_runtime_executor,
         target=tvm.target.Target(target),
+        schedule_name=schedule_name,
     )
 
     run_and_check(
diff --git a/python/tvm/micro/testing.py b/python/tvm/micro/testing/utils.py
similarity index 100%
rename from python/tvm/micro/testing.py
rename to python/tvm/micro/testing/utils.py
diff --git a/tests/micro/zephyr/test_utils.py b/tests/micro/zephyr/test_utils.py
index e0aad7c3c6d5..4fd3e39fd1c0 100644
--- a/tests/micro/zephyr/test_utils.py
+++ b/tests/micro/zephyr/test_utils.py
@@ -32,7 +32,7 @@
 import tvm.micro
 from tvm.micro import export_model_library_format
 from tvm.micro.model_library_format import generate_c_interface_header
-from tvm.micro.testing import (
+from tvm.micro.testing.utils import (
     mlf_extract_workspace_size_bytes,
     aot_transport_init_wait,
     aot_transport_find_message,
diff --git a/tests/micro/zephyr/test_zephyr.py b/tests/micro/zephyr/test_zephyr.py
index 1582d7e4a5fe..49e5e2757b20 100644
--- a/tests/micro/zephyr/test_zephyr.py
+++ b/tests/micro/zephyr/test_zephyr.py
@@ -30,7 +30,7 @@
 from tvm.relay.backend import Executor, Runtime
 from tvm.relay.testing import byoc
 from tvm.contrib import utils
-from tvm.micro.testing import check_tune_log
+from tvm.micro.testing.utils import check_tune_log
 
 import test_utils
 
diff --git a/tests/micro/zephyr/test_zephyr_aot.py b/tests/micro/zephyr/test_zephyr_aot.py
index 87c7dc92fbda..6b355f28de4b 100644
--- a/tests/micro/zephyr/test_zephyr_aot.py
+++ b/tests/micro/zephyr/test_zephyr_aot.py
@@ -33,7 +33,6 @@
 from tvm.relay.backend import Executor, Runtime
 
 from tvm.contrib.download import download_testdata
-from tvm.micro.testing import aot_transport_init_wait, aot_transport_find_message
 
 import test_utils
 
diff --git a/tests/python/contrib/test_cmsisnn/test_binary_ops.py b/tests/python/contrib/test_cmsisnn/test_binary_ops.py
index 028ab406243f..a9957e779867 100644
--- a/tests/python/contrib/test_cmsisnn/test_binary_ops.py
+++ b/tests/python/contrib/test_cmsisnn/test_binary_ops.py
@@ -36,7 +36,7 @@
     assert_partitioned_function,
     assert_no_external_function,
 )
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py
index 47245f60e15e..51afb943c370 100644
--- a/tests/python/contrib/test_cmsisnn/test_conv2d.py
+++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py
@@ -24,7 +24,7 @@
 from tvm.relay.op.contrib import cmsisnn
 
 
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
diff --git a/tests/python/contrib/test_cmsisnn/test_fully_connected.py b/tests/python/contrib/test_cmsisnn/test_fully_connected.py
index ec2e9bbdcca7..d23f3ed4c425 100644
--- a/tests/python/contrib/test_cmsisnn/test_fully_connected.py
+++ b/tests/python/contrib/test_cmsisnn/test_fully_connected.py
@@ -24,7 +24,7 @@
 from tvm.relay.op.contrib import cmsisnn
 
 
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
diff --git a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
index 7808fbf7752f..286502e0bd5b 100644
--- a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
+++ b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
@@ -23,7 +23,7 @@
 from tvm import relay
 
 
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_USMP_CORSTONE300_RUNNER,
     generate_ref_data,
diff --git a/tests/python/contrib/test_cmsisnn/test_networks.py b/tests/python/contrib/test_cmsisnn/test_networks.py
index a6e77515859e..efef27af0c1e 100644
--- a/tests/python/contrib/test_cmsisnn/test_networks.py
+++ b/tests/python/contrib/test_cmsisnn/test_networks.py
@@ -28,7 +28,7 @@
 from tvm.relay.op.contrib import cmsisnn
 
 from utils import skip_if_no_reference_system, get_range_for_dtype_str
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
diff --git a/tests/python/contrib/test_cmsisnn/test_pooling.py b/tests/python/contrib/test_cmsisnn/test_pooling.py
index cca1288ac2a0..6099f6292f32 100644
--- a/tests/python/contrib/test_cmsisnn/test_pooling.py
+++ b/tests/python/contrib/test_cmsisnn/test_pooling.py
@@ -24,7 +24,7 @@
 from tvm.relay.op.contrib import cmsisnn
 
 
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
diff --git a/tests/python/contrib/test_cmsisnn/test_softmax.py b/tests/python/contrib/test_cmsisnn/test_softmax.py
index 6eac76d841b4..36c104628fe5 100644
--- a/tests/python/contrib/test_cmsisnn/test_softmax.py
+++ b/tests/python/contrib/test_cmsisnn/test_softmax.py
@@ -34,7 +34,7 @@
     assert_partitioned_function,
     assert_no_external_function,
 )
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py
index 0c42b024f274..1d32def5f638 100644
--- a/tests/python/contrib/test_ethosu/infra.py
+++ b/tests/python/contrib/test_ethosu/infra.py
@@ -47,7 +47,7 @@
 import tvm.relay.testing.tf as tf_testing
 
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTCompiledTestModel,
     AOTDataLinkage,
     AOTTestModel,
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
index 4268392f1b78..0444794fdf6b 100644
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -29,7 +29,7 @@
 from tvm.relay.backend.contrib.ethosu import util
 
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tests.python.relay.aot.aot_test_utils import generate_ref_data
+from tvm.micro.testing.aot_test_utils import generate_ref_data
 
 from . import infra
 
diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py
index e04cabe79d2f..1fb757ec989e 100644
--- a/tests/python/contrib/test_ethosu/test_networks.py
+++ b/tests/python/contrib/test_ethosu/test_networks.py
@@ -24,7 +24,7 @@
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tvm.micro import model_library_format as mlf
 
-from tests.python.relay.aot.aot_test_utils import convert_to_relay
+from tvm.micro.testing.aot_test_utils import convert_to_relay
 
 from . import infra
 
diff --git a/tests/python/integration/test_arm_mprofile_dsp.py b/tests/python/integration/test_arm_mprofile_dsp.py
index 484c19fa222c..6a18fc3b6695 100644
--- a/tests/python/integration/test_arm_mprofile_dsp.py
+++ b/tests/python/integration/test_arm_mprofile_dsp.py
@@ -20,7 +20,7 @@
 import tvm
 import tvm.testing
 from tvm import relay
-from tests.python.relay.aot.aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     generate_ref_data,
diff --git a/tests/python/relay/aot/test_c_device_api.py b/tests/python/relay/aot/test_c_device_api.py
index f9fa0c6eadbb..771b80127c62 100644
--- a/tests/python/relay/aot/test_c_device_api.py
+++ b/tests/python/relay/aot/test_c_device_api.py
@@ -24,7 +24,7 @@
 
 from tvm import relay
 from tvm.ir.module import IRModule
-from aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOT_DEFAULT_RUNNER,
     AOTTestModel,
     generate_ref_data,
diff --git a/tests/python/relay/aot/test_cpp_aot.py b/tests/python/relay/aot/test_cpp_aot.py
index cdcc61c33ac7..545788c4b1a3 100644
--- a/tests/python/relay/aot/test_cpp_aot.py
+++ b/tests/python/relay/aot/test_cpp_aot.py
@@ -27,7 +27,12 @@
 from tvm import IRModule
 from tvm import relay
 from tvm.relay import backend, testing
-from aot_test_utils import AOT_DEFAULT_RUNNER, AOTTestModel, generate_ref_data, compile_and_run
+from tvm.micro.testing.aot_test_utils import (
+    AOT_DEFAULT_RUNNER,
+    AOTTestModel,
+    generate_ref_data,
+    compile_and_run,
+)
 
 
 def test_error_c_interface():
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index 2991cc01fc92..3bd9f2a3989a 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -37,7 +37,7 @@
 from tvm.micro import model_library_format as mlf
 from tvm.micro import export_model_library_format
 from tvm.ir.instrument import pass_instrument
-from aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_DEFAULT_RUNNER,
     generate_ref_data,
diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py
index 650cb4526f09..abf898db3aad 100644
--- a/tests/python/relay/aot/test_crt_aot_usmp.py
+++ b/tests/python/relay/aot/test_crt_aot_usmp.py
@@ -32,7 +32,7 @@
 from tvm.relay.backend import Executor, Runtime
 from tvm import WorkspaceMemoryPools, PoolInfo
 from tvm.micro import model_library_format as mlf
-from aot_test_utils import (
+from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOTTestRunner,
     generate_ref_data,
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
new file mode 100644
index 000000000000..a45a1eecc12a
--- /dev/null
+++ b/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
@@ -0,0 +1,130 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import sys
+import numpy as np
+import pytest
+import tvm
+import tvm.testing
+from tvm import relay
+from tvm.micro.testing.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+class BasicConv2dTests:
+    @tvm.testing.requires_corstone300
+    def test_conv2d(
+        self,
+        data_shape,
+        kernel_size,
+        kernel_layout,
+        num_filter,
+        strides,
+        padding,
+        dilation,
+        dtype,
+        schedule_name,
+    ):
+        """Test a subgraph with a single conv2d_nchw operator."""
+        ishape = data_shape
+        wshape = (num_filter, data_shape[1], *kernel_size)
+        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
+
+        input0 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight0 = relay.const(weight_data)
+        out0 = relay.op.nn.conv2d(
+            input0,
+            weight0,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            dilation=(dilation, dilation),
+            data_layout="NCHW",
+            kernel_layout="OIHW",
+            out_dtype="int32",
+            out_layout="NCHW",
+        )
+        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
+
+        input1 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight1 = relay.const(weight_data)
+
+        out1 = relay.op.nn.conv2d(
+            input1,
+            weight1,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            dilation=(dilation, dilation),
+            data_layout="NCHW",
+            kernel_layout=kernel_layout,
+            out_dtype="int32",
+            out_layout="NCHW",
+        )
+        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
+
+        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
+        output_list = generate_ref_data(ref_mod, inputs)
+
+        compile_and_run(
+            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+            runner=AOT_CORSTONE300_RUNNER,
+            interface_api="c",
+            use_unpacked_api=True,
+            target_opts={
+                "-keys": "arm_cpu",
+                "-mcpu": "cortex-m7",
+            },
+            schedule_name=schedule_name,
+        )
+
+
+# TODO(mehrdadh): Add hardware that supports this schedule
+# class TestConv2d_OIHW_int8_large_kernel(BasicConv2dTests):
+#     """This test is for conv2d_nchw_int8.arm_cpu schedule."""
+
+#     data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+#         ((1, 64, 32, 32), (3, 3), 12, 1, 0, 1),
+#         ((1, 128, 32, 32), (3, 3), 12, 1, 0, 1),
+#         ((1, 64, 32, 32), (5, 5), 16, 1, 0, 1),
+#     )
+
+#     dtype = tvm.testing.parameter("int8")
+#     kernel_layout = tvm.testing.parameter("OIHW")
+#     schedule_name = tvm.testing.parameter("conv2d_nchw_int8.arm_cpu")
+
+
+class TestConv2d_OIHW_small_kernel(BasicConv2dTests):
+    """This test is for conv2d_nchw_spatial_pack.arm_cpu schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation, dtype = tvm.testing.parameters(
+        ((1, 16, 32, 32), (3, 3), 12, 1, 0, 1, "int8"),
+        ((1, 16, 32, 32), (3, 3), 12, 1, 0, 1, "int16"),
+        ((1, 32, 16, 16), (3, 3), 12, 1, 0, 1, "int16"),
+    )
+    kernel_layout = tvm.testing.parameter("OIHW")
+    schedule_name = tvm.testing.parameter("conv2d_nchw_spatial_pack.arm_cpu")
+
+
+# TODO(mehrdadh): Add test for `OIHW\d*o` layout format for conv2d_nchw_spatial_pack.arm_cpu schedule.
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
new file mode 100644
index 000000000000..5e312f3d7057
--- /dev/null
+++ b/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
@@ -0,0 +1,163 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import sys
+import numpy as np
+import pytest
+import tvm
+import tvm.testing
+from tvm import relay
+from tvm.micro.testing.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+class BasicConv2dTests:
+    @tvm.testing.requires_corstone300
+    def test_conv2d(
+        self,
+        data_shape,
+        kernel_size,
+        kernel_layout,
+        num_filter,
+        strides,
+        padding,
+        dilation,
+        dtype,
+        schedule_name,
+    ):
+        """Test a subgraph with a single conv2d operator."""
+        ishape = data_shape
+        wshape = (*kernel_size, data_shape[-1], num_filter)
+
+        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
+
+        input0 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight0 = relay.const(weight_data)
+        out0 = relay.op.nn.conv2d(
+            input0,
+            weight0,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            dilation=(dilation, dilation),
+            data_layout="NHWC",
+            kernel_layout="HWIO",
+            out_dtype="int32",
+            out_layout="NHWC",
+        )
+        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
+
+        input1 = relay.var("input", relay.TensorType(ishape, dtype))
+
+        if kernel_layout == "HWOI":
+            weight1 = relay.const(np.moveaxis(weight_data, 2, -1))
+        elif kernel_layout == "HWIO":
+            weight1 = relay.const(weight_data)
+
+        out1 = relay.op.nn.conv2d(
+            input1,
+            weight1,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            dilation=(dilation, dilation),
+            data_layout="NHWC",
+            kernel_layout=kernel_layout,
+            out_dtype="int32",
+            out_layout="NHWC",
+        )
+        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
+
+        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
+        output_list = generate_ref_data(ref_mod, inputs)
+
+        compile_and_run(
+            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+            runner=AOT_CORSTONE300_RUNNER,
+            interface_api="c",
+            use_unpacked_api=True,
+            target_opts={
+                "-keys": "arm_cpu",
+                "-mcpu": "cortex-m7",
+            },
+            schedule_name=schedule_name,
+        )
+
+
+class TestConv2d_DSP_HWOI(BasicConv2dTests):
+    """This test is for conv2d_nhwc_dsp.arm_cpu schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+        # TODO(mehrdadh): Fails due to https://github.com/apache/tvm/issues/11216
+        # ((1, 32, 32, 1), (3, 3), 12, 1, 0, 1),
+        # ((1, 32, 10, 3), (3, 3), 16, 1, 0, 1),
+        # ((1, 49, 10, 1), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
+        # from Keyword Spotting model from MLPerfTiny models
+        # TODO(mehrdad): Fails due to https://github.com/apache/tvm/issues/11216
+        # ((1, 49, 10, 1), (10, 4), 64, (2, 2), (4, 1, 5, 1), 1),
+        # from Visual Wake Word model from MLPerfTiny models
+        # TODO(mehrdadh): fails due to https://github.com/apache/tvm/issues/11216
+        # ((1, 96, 96, 3), (3, 3), 8, (2, 2), (0, 0, 1, 1), 1),
+        # from Image Classification model from MLPerfTiny models
+        ((1, 16, 16, 32), (1, 1), 64, (2, 2), 0, 1),
+        ((4, 16, 16, 8), (5, 5), 8, 2, (0, 4, 4, 0), 1),
+        ((4, 16, 16, 8), (5, 5), 16, 2, (0, 4, 4, 0), 1),
+        ((4, 16, 16, 8), (5, 5), 8, 2, 0, 1),
+        ((4, 16, 16, 8), (5, 5), 16, 2, 0, 1),
+        ((1, 16, 16, 8), (3, 3), 16, 2, (0, 0, 1, 1), 1),
+        ((1, 16, 16, 8), (3, 3), 16, 2, (1, 1, 2, 2), 1),
+        ((1, 16, 16, 8), (5, 5), 16, 2, (3, 3, 2, 2), 1),
+        ((1, 16, 16, 8), (3, 3), 16, 2, (0, 1, 2, 3), 1),
+    )
+    dtype = tvm.testing.parameter("int8", "int16")
+    kernel_layout = tvm.testing.parameter("HWOI")
+    schedule_name = tvm.testing.parameter("conv2d_nhwc_dsp.arm_cpu")
+
+
+class TestConv2d_HWIO(BasicConv2dTests):
+    """This test is for conv2d_nhwc_spatial_pack.arm_cpu schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+        ((1, 32, 32, 1), (3, 3), 12, 1, 0, 1),
+        ((1, 32, 10, 3), (3, 3), 16, 1, 0, 1),
+        ((1, 49, 10, 1), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
+    )
+    dtype = tvm.testing.parameter("int8", "int16")
+    kernel_layout = tvm.testing.parameter("HWIO")
+    schedule_name = tvm.testing.parameter("conv2d_nhwc_spatial_pack.arm_cpu")
+
+
+# TODO(mehrdadh): Add test for conv2d_NHWC_quantized_native.arm_cpu
+
+# TODO(mehrdadh): Add test for conv2d_NHWC_quantized_interleaved.arm_cpu
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
new file mode 100644
index 000000000000..bcf82c124860
--- /dev/null
+++ b/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
@@ -0,0 +1,157 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import sys
+import numpy as np
+import pytest
+import tvm
+import tvm.testing
+from tvm import relay
+from tvm.micro.testing.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+class BasicDepthwiseConv2dTests:
+    @tvm.testing.requires_corstone300
+    def test_conv2d(
+        self,
+        data_shape,
+        data_layout,
+        kernel_size,
+        kernel_layout,
+        num_filter,
+        strides,
+        padding,
+        dilation,
+        dtype,
+        schedule_name,
+    ):
+        """Test a subgraph with a single conv2d operator."""
+        ishape = data_shape
+        groups = num_filter
+
+        assert groups > 1, f"groups should be more than 1 to create a depthwise conv2d."
+
+        if data_layout == "NCHW" and kernel_layout == "OIHW":
+            assert (
+                num_filter == data_shape[1]
+            ), f"Output channels({num_filter}) should be equal to input channels({data_shape[1]})."
+            wshape = (num_filter, data_shape[1] // groups, *kernel_size)
+        elif data_layout == "NHWC" and kernel_layout == "HWOI":
+            assert (
+                num_filter == data_shape[3]
+            ), f"Output channels({num_filter}) should be equal to input channels({data_shape[3]})."
+            wshape = (*kernel_size, num_filter, data_shape[3] // groups)
+        else:
+            raise ValueError(
+                f"Incorrect data layout({data_layout}) and kernel layout({kernel_layout})."
+            )
+
+        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
+
+        input0 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight0 = relay.const(weight_data)
+        out0 = relay.op.nn.conv2d(
+            input0,
+            weight0,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            groups=groups,
+            dilation=(dilation, dilation),
+            data_layout=data_layout,
+            kernel_layout=kernel_layout,
+            out_dtype="int32",
+            out_layout=data_layout,
+        )
+        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
+
+        input1 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight1 = relay.const(weight_data)
+        out1 = relay.op.nn.conv2d(
+            input1,
+            weight1,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            groups=groups,
+            dilation=(dilation, dilation),
+            data_layout=data_layout,
+            kernel_layout=kernel_layout,
+            out_dtype="int32",
+            out_layout=data_layout,
+        )
+        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
+
+        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
+        output_list = generate_ref_data(ref_mod, inputs)
+
+        compile_and_run(
+            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+            runner=AOT_CORSTONE300_RUNNER,
+            interface_api="c",
+            use_unpacked_api=True,
+            target_opts={
+                "-keys": "arm_cpu",
+                "-mcpu": "cortex-m7",
+            },
+            schedule_name=schedule_name,
+        )
+
+
+class TestDepthwiseConv2d_NCHW_OIHW(BasicDepthwiseConv2dTests):
+    """This test is for depthwise_conv2d_nchw.arm_cpu schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+        ((1, 16, 32, 32), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 10, 3), (3, 3), 32, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 32, 1, (0, 2, 2, 0), 1),
+        ((1, 32, 32, 16), (3, 3), 32, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 32, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 32, 1, (0, 2, 2, 0), 2),
+        ((1, 16, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
+    )
+    data_layout = tvm.testing.parameter("NCHW")
+    dtype = tvm.testing.parameter("int8", "int16")
+    kernel_layout = tvm.testing.parameter("OIHW")
+    schedule_name = tvm.testing.parameter("depthwise_conv2d_nchw.arm_cpu")
+
+
+class TestDepthwiseConv2d_NHWC_HWOI(BasicDepthwiseConv2dTests):
+    """This test is for depthwise_conv2d_nhwc.generic schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 10, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 49, 10, 64), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
+    )
+    data_layout = tvm.testing.parameter("NHWC")
+    dtype = tvm.testing.parameter("int8", "int16")
+    kernel_layout = tvm.testing.parameter("HWOI")
+    schedule_name = tvm.testing.parameter("depthwise_conv2d_nhwc.generic")
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
new file mode 100644
index 000000000000..ef3d695e0f0c
--- /dev/null
+++ b/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
@@ -0,0 +1,155 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+import sys
+import numpy as np
+import pytest
+import tvm
+import tvm.testing
+from tvm import relay
+from tvm.micro.testing.aot_test_utils import (
+    AOTTestModel,
+    AOT_CORSTONE300_RUNNER,
+    generate_ref_data,
+    compile_and_run,
+)
+
+
+class BasicGroupConv2dTests:
+    @tvm.testing.requires_corstone300
+    def test_conv2d(
+        self,
+        data_shape,
+        data_layout,
+        kernel_size,
+        kernel_layout,
+        num_filter,
+        strides,
+        padding,
+        dilation,
+        groups,
+        dtype,
+        schedule_name,
+    ):
+        """Test a subgraph with a single conv2d operator."""
+        ishape = data_shape
+
+        assert groups > 1, f"groups should be more than 1 to create a group conv2d."
+
+        if data_layout == "NCHW" and kernel_layout == "OIHW":
+            assert data_shape[1] % groups == 0
+            wshape = (num_filter, data_shape[1] // groups, *kernel_size)
+        elif data_layout == "NHWC" and kernel_layout == "HWIO":
+            assert data_shape[3] % groups == 0
+            wshape = (*kernel_size, data_shape[3] // groups, num_filter)
+        else:
+            raise ValueError(
+                f"Incorrect data layout({data_layout}) and kernel layout({kernel_layout})."
+            )
+
+        weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype)
+
+        input0 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight0 = relay.const(weight_data)
+        out0 = relay.op.nn.conv2d(
+            input0,
+            weight0,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            groups=groups,
+            dilation=(dilation, dilation),
+            data_layout=data_layout,
+            kernel_layout=kernel_layout,
+            out_dtype="int32",
+            out_layout=data_layout,
+        )
+        ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0))
+
+        input1 = relay.var("input", relay.TensorType(ishape, dtype))
+        weight1 = relay.const(weight_data)
+        out1 = relay.op.nn.conv2d(
+            input1,
+            weight1,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            groups=groups,
+            dilation=(dilation, dilation),
+            data_layout=data_layout,
+            kernel_layout=kernel_layout,
+            out_dtype="int32",
+            out_layout=data_layout,
+        )
+        mod = tvm.IRModule.from_expr(relay.Function([input1], out1))
+
+        inputs = {"input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype)}
+        output_list = generate_ref_data(ref_mod, inputs)
+
+        compile_and_run(
+            AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
+            runner=AOT_CORSTONE300_RUNNER,
+            interface_api="c",
+            use_unpacked_api=True,
+            target_opts={
+                "-keys": "arm_cpu",
+                "-mcpu": "cortex-m7",
+            },
+            schedule_name=schedule_name,
+        )
+
+
+class TestGroupConv2d_NCHW_OIHW(BasicGroupConv2dTests):
+    """This test is for group_conv2d_nchw.arm_cpu schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+        ((1, 16, 32, 32), (3, 3), 12, 1, 0, 1),
+        ((1, 16, 32, 10), (3, 3), 16, 1, 0, 1),
+        ((1, 16, 32, 32), (3, 3), 16, 1, (0, 2, 2, 0), 1),
+        ((1, 16, 32, 32), (3, 3), 16, 1, 0, 1),
+        ((1, 16, 32, 32), (3, 3), 16, 1, 0, 1),
+        ((1, 16, 32, 32), (3, 3), 16, 1, (0, 2, 2, 0), 2),
+        ((1, 16, 32, 32), (3, 3), 32, 1, (1, 1, 2, 2), 2),
+    )
+    groups = tvm.testing.parameter(2, 4)
+    data_layout = tvm.testing.parameter("NCHW")
+    dtype = tvm.testing.parameter("int8", "int16")
+    kernel_layout = tvm.testing.parameter("OIHW")
+    schedule_name = tvm.testing.parameter("group_conv2d_nchw.arm_cpu")
+
+
+class TestGroupConv2d_NHWC_HWIO(BasicGroupConv2dTests):
+    """This test is for group_conv2d_nhwc.generic schedule."""
+
+    data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
+        ((1, 32, 32, 16), (3, 3), 12, 1, 0, 1),
+        ((1, 32, 10, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 49, 10, 16), (10, 4), 64, (2, 1), (4, 1, 5, 1), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, 0, 1),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (0, 2, 2, 0), 2),
+        ((1, 32, 32, 16), (3, 3), 16, 1, (1, 1, 2, 2), 2),
+    )
+    groups = tvm.testing.parameter(2, 4)
+    data_layout = tvm.testing.parameter("NHWC")
+    dtype = tvm.testing.parameter("int8", "int16")
+    kernel_layout = tvm.testing.parameter("HWIO")
+    schedule_name = tvm.testing.parameter("group_conv2d_nhwc.generic")
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/tests/python/relay/utils/external_codegen.py b/tests/python/relay/utils/external_codegen.py
index 4dbc8f274264..54132c63d96b 100644
--- a/tests/python/relay/utils/external_codegen.py
+++ b/tests/python/relay/utils/external_codegen.py
@@ -104,7 +104,7 @@ def check_aot_executor_result(
     mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", device=tvm.cpu()
 ):
     # Late import to avoid breaking test with USE_MICRO=OFF.
-    from aot.aot_test_utils import AOTTestModel, AOT_DEFAULT_RUNNER, compile_and_run
+    from tvm.micro.testing.aot_test_utils import AOTTestModel, AOT_DEFAULT_RUNNER, compile_and_run
 
     interface_api = "packed"
     use_unpacked_api = False
diff --git a/tests/python/unittest/test_crt.py b/tests/python/unittest/test_crt.py
index 3a93dbc89b1f..d5611906fc5d 100644
--- a/tests/python/unittest/test_crt.py
+++ b/tests/python/unittest/test_crt.py
@@ -292,7 +292,7 @@ def test_platform_timer():
 def test_autotune():
     """Verify that autotune works with micro."""
     import tvm.relay as relay
-    from tvm.micro.testing import check_tune_log
+    from tvm.micro.testing.utils import check_tune_log
 
     runtime = Runtime("crt", {"system-lib": True})
 
diff --git a/tests/scripts/task_python_microtvm.sh b/tests/scripts/task_python_microtvm.sh
index d13ee91a0ba8..7301c6f833ab 100755
--- a/tests/scripts/task_python_microtvm.sh
+++ b/tests/scripts/task_python_microtvm.sh
@@ -51,3 +51,5 @@ export TVM_MICRO_USE_HW=1
 export TVM_MICRO_BOARD=qemu_x86
 python3 gallery/how_to/work_with_microtvm/micro_tflite.py
 python3 gallery/how_to/work_with_microtvm/micro_autotune.py
+
+run_pytest ctypes python-relay-strategy-arm_cpu tests/python/relay/strategy/arm_cpu --enable-corstone300-tests

From cd00f7bfa8ea469ea8a67864911d699a580e852f Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 14:05:12 -0700
Subject: [PATCH 02/13] split aot test utils

---
 python/tvm/micro/testing/aot_test_utils.py | 902 +-------------------
 python/tvm/testing/aot.py                  | 926 +++++++++++++++++++++
 2 files changed, 928 insertions(+), 900 deletions(-)
 create mode 100644 python/tvm/testing/aot.py

diff --git a/python/tvm/micro/testing/aot_test_utils.py b/python/tvm/micro/testing/aot_test_utils.py
index b56649a9b0d9..82ac1ac68e9d 100644
--- a/python/tvm/micro/testing/aot_test_utils.py
+++ b/python/tvm/micro/testing/aot_test_utils.py
@@ -15,127 +15,19 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import sys
-import datetime
-import itertools
 import logging
-import os
-import pathlib
-import re
+import itertools
 import shutil
-import subprocess
-import tarfile
-import tempfile
-from typing import Any, NamedTuple, Union, Optional, List, Dict
 
 import pytest
-import numpy as np
 
 pytest.importorskip("tvm.micro")
 
 import tvm
-from tvm import relay
-from tvm import te
-from tvm import autotvm
-from tvm.contrib import utils, graph_executor
-from tvm.relay.backend import te_compiler, Executor, Runtime
-from tvm.relay.backend.te_compiler import TECompiler
-from tvm.relay.backend.utils import mangle_module_name
-from tvm.micro import export_model_library_format
-from tvm.micro.testing.utils import mlf_extract_workspace_size_bytes
+from tvm.testing.aot import AOTTestRunner
 
 _LOG = logging.getLogger(__name__)
 
-AOT_SUCCESS_TOKEN = "AOT_TEST_SUCCESS"
-AOT_FAILURE_TOKEN = "AOT_TEST_FAILURE"
-
-
-class AOTTestModel(NamedTuple):
-    """Class to describe a model under test
-
-    Parameters
-    ----------
-    module: tvm.IRModule
-        IRModule to generate AOT executor for
-    inputs: Dict[str, np.array]
-        Dict of input names to value arrays
-    outputs: List[np.array]
-        Dict of output names to value arrays
-    output_tolerance: Optional[Union[int, float]]
-        Allowed tolerance of the output
-    name: str
-        Name to use for this model
-    params: Optional[Dict[str, np.array]]
-        Dict of parameter names to value arrays
-    extra_memory_in_bytes: int
-        Extra memory to allocate after planned memory
-    """
-
-    module: tvm.IRModule
-    inputs: Dict[str, np.array]
-    outputs: Dict[str, np.array]
-    output_tolerance: Optional[Union[int, float]] = None
-    name: str = "default"
-    params: Optional[Dict[str, np.array]] = None
-    extra_memory_in_bytes: int = 0
-
-
-class AOTCompiledTestModel(NamedTuple):
-    """A compiled AOTTestModel with associated module
-
-    Parameters
-    ----------
-    model: AOTTestModel
-        Input model to be compiled
-    module: tvm.runtime.Module
-        The compiled Module for the associated AOTTestModel
-    """
-
-    model: AOTTestModel
-    executor_factory: tvm.relay.backend.executor_factory.AOTExecutorFactoryModule
-
-
-class AOTDataLinkage(NamedTuple):
-    """A compiled AOTTestModel with associated module
-
-    Parameters
-    ----------
-    section: str
-        Named section to place data into
-    alignment: int
-        Section alignment
-    """
-
-    section: str
-    alignment: int
-
-
-class AOTTestRunner(NamedTuple):
-    """Class to describe a test runner for AOT code
-
-    Parameters
-    ----------
-    makefile: str
-        Premade Makefile to use from the AOT test folder
-    prologue: str
-        Code to prepend to the main function
-    epilogue: str
-        Code to append to the main function
-    includes: List[str]
-        Additional includes required to run the AOT test runner
-    parameters: Dict[str, str]
-        Additional parameters to pass to the make command
-    pass_config: Dict[str, Any]
-        Additional pass configuration when building the model
-    """
-
-    makefile: str = "default"
-    prologue: str = ""
-    epilogue: str = ""
-    includes: List[str] = []
-    parameters: Dict[str, str] = {}
-    pass_config: Dict[str, Any] = {}
-
 
 AOT_DEFAULT_RUNNER = AOTTestRunner()
 
@@ -168,42 +60,6 @@ class AOTTestRunner(NamedTuple):
     },
 )
 
-NP_TYPE_TO_C = {
-    "int8": "int8_t",
-    "uint8": "uint8_t",
-    "int16": "int16_t",
-    "uint16": "uint16_t",
-    "int32": "int32_t",
-    "uint32": "uint32_t",
-    "float32": "float",
-}
-
-
-def mangle_name(mod_name, name):
-    mod_name = mangle_module_name(mod_name)
-    return mod_name + "_" + name
-
-
-def convert_to_relay(
-    tflite_model_buf,
-):
-    """Convert a tflite model buffer in a Relay module"""
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except ImportError:
-        raise ImportError("The tflite package must be installed")
-
-    mod, params = relay.frontend.from_tflite(tflite_model)
-    mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params)
-    return mod, params
-
 
 def parametrize_aot_options(test):
     """Parametrize over valid option combinations"""
@@ -247,757 +103,3 @@ def parametrize_aot_options(test):
     )(test)
 
     return tvm.testing.skip_if_32bit(reason="Reference system unavailable in i386 container")(fn)
-
-
-def subprocess_check_log_output(cmd, cwd, logfile):
-    """
-    This method runs a process and logs the output to both a log file and stdout
-    """
-    _LOG.info("Execute (%s): %s", cwd, cmd)
-    cmd_base = cmd[0] if isinstance(cmd, (list, tuple)) else cmd.split(" ", 1)[0]
-    proc = subprocess.Popen(
-        cmd,
-        cwd=cwd,
-        shell=True,
-        bufsize=0,
-        stdout=subprocess.PIPE,
-        stderr=subprocess.STDOUT,
-        encoding="utf-8",
-    )
-    stdout = ""
-    with open(logfile, "a") as f:
-        msg = (
-            "\n"
-            + "-" * 80
-            + f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Execute ({cwd}): {cmd}\n"
-            + "-" * 80
-        )
-        f.write(msg)
-        stdout += msg + "\n"
-        while True:
-            data = proc.stdout.readline()
-            stdout += data
-            _LOG.debug("%s: %s", cmd_base, data.rstrip("\n"))
-            f.write(data)
-
-            # process is done if there is no data and the result is valid
-            if not data:  # EOF
-                break
-
-    proc.wait()
-    if proc.returncode != 0:
-        raise RuntimeError(f"Subprocess failed: {cmd}\nstdout:\n{stdout}")
-
-
-# TODO: Move to linker script with list of symbols rather than coding into source
-def emit_data_linkage(output_file, data_linkage):
-    if data_linkage is not None:
-        output_file.write(
-            f'__attribute__((section("{data_linkage.section}"), aligned({data_linkage.alignment}))) '
-        )
-
-
-def emit_main_prologue(
-    main_file,
-    custom_prologue,
-    workspace_bytes,
-    data_linkage,
-    compiled_models,
-    interface_api,
-    use_stack_allocator=True,
-):
-    if use_stack_allocator:
-        workspace_define = f"#define WORKSPACE_SIZE ({workspace_bytes}"
-        if interface_api == "c":
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                workspace_define += f" + TVMGEN_{model.name.upper()}_WORKSPACE_SIZE"
-        # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment.
-        workspace_define += " + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n"
-        main_file.write(workspace_define)
-        emit_data_linkage(main_file, data_linkage)
-        main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
-        main_file.write("tvm_workspace_t app_workspace;\n")
-        main_file.write(
-            """
-            
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-    return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-    return StackMemoryManager_Free(&app_workspace,ptr);
-}
-        """
-        )
-    else:
-        # An implementation is not needed for these if the stack allocator is not used
-        main_file.write(
-            """
-            
-tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
-    return kTvmErrorFunctionCallNotImplemented;
-}
-
-tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
-    return kTvmErrorFunctionCallNotImplemented;
-}
-
-            """
-        )
-    main_file.write(
-        """
-    
-void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }
-
-void TVMLogf(const char* msg, ...) {
-  va_list args;
-  va_start(args, msg);
-  vfprintf(stdout, msg, args);
-  va_end(args);
-}
-    
-TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
-int main(){\n
-    """
-    )
-    main_file.write(custom_prologue)
-
-
-def emit_main_data(main_file, input_map, output_map, mod_name):
-    for key in input_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f'#include "{mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}.h"\n'
-        )
-
-    for key in output_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f'#include "{mangle_name(mod_name,"expected_output_data")}_{sanitized_tensor_name}.h"\n'
-            f'#include "{mangle_name(mod_name,"output_data")}_{sanitized_tensor_name}.h"\n'
-        )
-
-
-def emit_main_device_structs(main_file, devices, mod_name):
-    if devices:
-        main_file.write(
-            f"struct {mangle_name(mod_name, 'devices')} {mangle_name(mod_name, 'devices')} = {{"
-        )
-        for device in devices:
-            main_file.write(f"\t.{device} = {device},\n")
-        main_file.write("};\n")
-
-
-def emit_main_workspace_pool_structs(main_file, workspace_pool_names, mod_name):
-    if workspace_pool_names and len(workspace_pool_names) > 0:
-        main_file.write(
-            f"struct {mangle_name(mod_name, 'workspace_pools')} {mangle_name(mod_name, 'workspace_pools')} = {{"
-        )
-        for workspace_pool_name in workspace_pool_names:
-            main_file.write(f"\t.{workspace_pool_name} = {workspace_pool_name},\n")
-        main_file.write("};\n")
-
-
-def emit_main_data_structs(main_file, input_map, output_map, mod_name):
-    main_file.write(
-        f"struct {mangle_name(mod_name, 'inputs')} {mangle_name(mod_name, 'inputs')} = {{"
-    )
-    for key in input_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f"\t.{sanitized_tensor_name} = {mangle_name(mod_name, 'input_data')}_{sanitized_tensor_name},\n"
-        )
-    main_file.write("};\n")
-
-    main_file.write(
-        f"struct {mangle_name(mod_name, 'outputs')} {mangle_name(mod_name, 'outputs')} = {{"
-    )
-    for key in output_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(
-            f"\t.{sanitized_tensor_name} = {mangle_name(mod_name, 'output_data')}_{sanitized_tensor_name},\n"
-        )
-    main_file.write("};\n")
-
-
-def emit_main_data_setup(main_file, input_map, output_map, mod_name):
-    num_outputs = len(output_map)
-    num_inputs = len(input_map)
-
-    main_file.write(f'void* {mangle_name(mod_name,"inputs")}[{num_inputs}] = {{ ')
-    for key in input_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(f'{mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}, ')
-    main_file.write("};\n")
-
-    main_file.write(f'void* {mangle_name(mod_name,"outputs")}[{num_outputs}]  = {{ ')
-    for key in output_map:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        main_file.write(f'{mangle_name(mod_name, "output_data")}_{sanitized_tensor_name}, ')
-    main_file.write("};\n")
-
-
-def emit_main_c_interface_call(
-    main_file, devices, workspace_pool_names, mod_name, use_workspace_io
-):
-    sub_strings = list()
-    sub_strings.append(f'{mangle_name(mod_name,"run")}(')
-    if not use_workspace_io:
-        sub_strings.append(f'&{mangle_name(mod_name,"inputs")}, ')
-        sub_strings.append(f'&{mangle_name(mod_name,"outputs")}, ')
-    if workspace_pool_names:
-        sub_strings.append(f'&{mangle_name(mod_name,"workspace_pools")}, ')
-    if devices:
-        sub_strings.append(f'&{mangle_name(mod_name,"devices")}, ')
-    # Removing the last two characters that is a comma and a space
-    sub_strings[-1] = sub_strings[-1][:-2]
-    # Adding brackets and newline instead
-    sub_strings[-1] = sub_strings[-1] + ");\n"
-
-    main_file_string = ""
-    for sub_string in sub_strings:
-        main_file_string += sub_string
-
-    main_file.write(main_file_string)
-
-
-def emit_main_fake_packed_values(main_file):
-    main_file.write(
-        """
-    static DLDevice fake_device = {kDLCPU, 0};
-    static int64_t fake_dims = 0;
-    static int64_t fake_shape = {0};
-    """
-    )
-
-
-def emit_main_packed_call(main_file, input_map, output_list, mod_name):
-    tensors_name = mangle_name(mod_name, "tensors")
-    values_name = mangle_name(mod_name, "values")
-    typeids_name = mangle_name(mod_name, "typeids")
-
-    def fake_tensor(source, source_index, packed_index):
-        main_file.write(
-            f"""
-        {tensors_name}[{packed_index}].device = fake_device;
-        {tensors_name}[{packed_index}].data = {source}[{source_index}];
-        {tensors_name}[{packed_index}].shape = &fake_shape;
-        {tensors_name}[{packed_index}].ndim = fake_dims;
-        {tensors_name}[{packed_index}].byte_offset = 0;
-        {tensors_name}[{packed_index}].strides = NULL;
-        {values_name}[{packed_index}].v_handle = &{tensors_name}[{packed_index}];
-        """
-        )
-
-    num_outputs = len(output_list)
-    num_inputs = len(input_map)
-    num_tensors = num_inputs + num_outputs
-    main_file.write(
-        f"""
-    DLTensor {tensors_name}[{num_tensors}];
-    TVMValue {values_name}[{num_tensors}];
-    int32_t {typeids_name}[{num_tensors}];
-    """
-    )
-
-    for i in range(0, num_inputs):
-        fake_tensor(mangle_name(mod_name, "inputs"), i, i)
-    for i in range(0, num_outputs):
-        fake_tensor(mangle_name(mod_name, "outputs"), i, i + num_inputs)
-
-    main_file.write(
-        f'{mangle_name(mod_name, "run")}({values_name}, {typeids_name}, 0, NULL, 0, NULL);\n'
-    )
-    main_file.write("\n")
-
-
-def emit_main_compare(main_file, outputs, output_tolerance, mod_name, use_interface_c=False):
-    for key in outputs:
-        sanitized_tensor_name = re.sub(r"\W", "_", key)
-        expected_data_name = mangle_name(mod_name, f"expected_output_data_{sanitized_tensor_name}")
-        is_float_dtype = outputs[key].dtype == "float32"
-
-        comparison_function = "abs"
-        tolerance = output_tolerance or 0
-        if is_float_dtype:
-            comparison_function = "fabs"
-            tolerance = output_tolerance or 0.001
-
-        data_length_var_name = (
-            mangle_name(mod_name, f"output_data_{sanitized_tensor_name}") + "_len"
-        )
-        if use_interface_c:
-            c_type = NP_TYPE_TO_C[str(outputs[key].dtype)]
-            actual_data_name = f"(({c_type}*)" + mangle_name(
-                mod_name, f"outputs.{sanitized_tensor_name})"
-            )
-        else:
-            actual_data_name = mangle_name(mod_name, f"output_data_{sanitized_tensor_name}")
-        main_file.write(
-            f"""
-            for (int i = 0; i<{data_length_var_name}; i++) {{
-                if ({comparison_function}({actual_data_name}[i]-{expected_data_name}[i]) > {tolerance}) {{
-                    printf("{AOT_FAILURE_TOKEN}\\n");
-                    return -1;
-                }}
-            }}
-            """
-        )
-
-
-def emit_main_init_memory_manager(main_file):
-    main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
-    main_file.write("\n")
-
-
-def emit_main_epilogue(main_file, custom_epilogue):
-    main_file.write(custom_epilogue)
-    main_file.write(f'printf("{AOT_SUCCESS_TOKEN}\\n");')
-    main_file.write("return 0;")
-    main_file.write("}\n")
-
-
-def emit_main_common_includes(main_file, custom_includes):
-    main_file.write("#include <stdio.h>\n")
-    main_file.write("#include <stdarg.h>\n")
-    main_file.write("#include <stdlib.h>\n")
-    main_file.write("#include <math.h>\n")
-    main_file.write('#include "tvm/runtime/c_runtime_api.h"\n')
-    main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n')
-    for include in custom_includes:
-        main_file.write(f'#include "{include}"\n')
-
-
-def emit_main_micro_include(main_file, mod_name):
-    main_file.write(f"#include <{mangle_module_name(mod_name)}.h>\n")
-
-
-def create_main(
-    test_name,
-    compiled_models,
-    output_path,
-    custom_includes,
-    custom_prologue,
-    custom_epilogue,
-    data_linkage,
-    interface_api,
-    workspace_bytes,
-    use_stack_allocator=True,
-    use_workspace_io=False,
-):
-    file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
-    # create header file
-    raw_path = file_path.with_suffix(".c").resolve()
-    with open(raw_path, "w") as main_file:
-        emit_main_common_includes(main_file, custom_includes)
-
-        if interface_api == "c":
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                emit_main_micro_include(main_file, model.name)
-        for compiled_model in compiled_models:
-            model = compiled_model.model
-            emit_main_data(main_file, model.inputs, model.outputs, model.name)
-
-        emit_main_prologue(
-            main_file,
-            custom_prologue,
-            workspace_bytes,
-            data_linkage,
-            compiled_models,
-            interface_api,
-            use_stack_allocator,
-        )
-        if use_stack_allocator:
-            emit_main_init_memory_manager(main_file)
-
-        if interface_api == "c":
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                executor_codegen_metadata = (
-                    compiled_model.executor_factory.executor_codegen_metadata
-                )
-                devices = compiled_model.executor_factory.get_devices()
-                workspace_pool_names = None
-                if executor_codegen_metadata.pool_inputs:
-                    workspace_pool_names = [
-                        allocated_pool.pool_info.pool_name
-                        for allocated_pool in dict(executor_codegen_metadata.pool_inputs).values()
-                        if not allocated_pool.pool_info.is_internal
-                    ]
-                emit_main_device_structs(main_file, devices, model.name)
-                if not use_workspace_io:
-                    emit_main_workspace_pool_structs(main_file, workspace_pool_names, model.name)
-                    emit_main_data_structs(main_file, model.inputs, model.outputs, model.name)
-                emit_main_c_interface_call(
-                    main_file, devices, workspace_pool_names, model.name, use_workspace_io
-                )
-        else:
-            emit_main_fake_packed_values(main_file)
-            for compiled_model in compiled_models:
-                model = compiled_model.model
-                emit_main_data_setup(main_file, model.inputs, model.outputs, model.name)
-                emit_main_packed_call(main_file, model.inputs, model.outputs, model.name)
-
-        for compiled_model in compiled_models:
-            model = compiled_model.model
-            emit_main_compare(
-                main_file, model.outputs, model.output_tolerance, model.name, interface_api == "c"
-            )
-        emit_main_epilogue(main_file, custom_epilogue)
-
-
-def create_header_file(tensor_name, npy_data, output_path, data_linkage):
-    """
-    This method generates a header file containing the data contained in the numpy array provided.
-    It is used to capture the tensor data (for both inputs and expected outputs) to be bundled into the standalone application.
-    """
-    file_path = pathlib.Path(f"{output_path}/" + tensor_name).resolve()
-    # create header file
-    raw_path = file_path.with_suffix(".h").resolve()
-    with open(raw_path, "w") as header_file:
-        header_file.write("#include <stddef.h>\n")
-        header_file.write("#include <stdint.h>\n")
-        header_file.write("#include <dlpack/dlpack.h>\n")
-        header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n")
-
-        emit_data_linkage(header_file, data_linkage)
-
-        header_file.write(f"{NP_TYPE_TO_C[str(npy_data.dtype)]} {tensor_name}[] =")
-
-        header_file.write("{")
-        for i in np.ndindex(npy_data.shape):
-            header_file.write(f"{npy_data[i]}, ")
-        header_file.write("};\n\n")
-
-
-def compile_models(
-    models: Union[List[AOTTestModel], AOTTestModel],
-    interface_api: str,
-    use_unpacked_api: bool,
-    workspace_byte_alignment: int = 8,
-    enable_op_fusion: bool = True,
-    pass_config: Dict[str, Any] = None,
-    use_runtime_executor: bool = True,
-    target: tvm.target.Target = tvm.target.Target("c"),
-    workspace_memory_pools=None,
-    schedule_name: str = None,
-) -> List[AOTCompiledTestModel]:
-    """
-    This method generates runtime.Modules for the tests
-    """
-    if not isinstance(models, list):
-        models = [models]
-
-    runtime = Runtime("crt")
-    executor = Executor(
-        "aot",
-        {
-            "workspace-byte-alignment": workspace_byte_alignment,
-            "interface-api": interface_api,
-            "unpacked-api": use_unpacked_api,
-        },
-    )
-
-    config = {"tir.disable_vectorize": True}
-    if pass_config:
-        config = {**config, **pass_config}
-    if not enable_op_fusion:
-        config["relay.FuseOps.max_depth"] = 1
-
-    compiled_mods = list()
-    for model in models:
-        if schedule_name:
-            # Testing with deterministic schedule
-            task_list = autotvm.task.extract_from_program(
-                model.module, target=target, params=model.params
-            )
-            with tvm.autotvm.apply_fixed_config(task_list, schedule_name):
-                with tvm.transform.PassContext(opt_level=3, config=config):
-                    if use_runtime_executor:
-                        executor_factory = tvm.relay.build(
-                            model.module,
-                            target,
-                            executor=executor,
-                            runtime=runtime,
-                            workspace_memory_pools=workspace_memory_pools,
-                            params=model.params,
-                            mod_name=model.name,
-                        )
-                        compiled_mods.append(
-                            AOTCompiledTestModel(model=model, executor_factory=executor_factory)
-                        )
-        else:
-            with tvm.transform.PassContext(opt_level=3, config=config):
-                # TODO(Mousius) - Remove once executor/runtime are fully removed from Target
-                if use_runtime_executor:
-                    executor_factory = tvm.relay.build(
-                        model.module,
-                        target,
-                        executor=executor,
-                        runtime=runtime,
-                        workspace_memory_pools=workspace_memory_pools,
-                        params=model.params,
-                        mod_name=model.name,
-                    )
-                    compiled_mods.append(
-                        AOTCompiledTestModel(model=model, executor_factory=executor_factory)
-                    )
-                else:
-                    executor_factory = tvm.relay.build(
-                        model.module,
-                        tvm.target.Target(target, host=target),
-                        params=model.params,
-                        mod_name=model.name,
-                    )
-                    compiled_mods.append(
-                        AOTCompiledTestModel(model=model, executor_factory=executor_factory)
-                    )
-    return compiled_mods
-
-
-def run_and_check(
-    models: List[AOTCompiledTestModel],
-    runner: AOTTestRunner,
-    interface_api: str,
-    debug_calculated_workspaces=False,
-    workspace_byte_alignment=8,
-    data_linkage: AOTDataLinkage = None,
-    test_dir: str = None,
-    verbose: bool = False,
-    use_workspace_io: bool = False,
-):
-    """
-    This method uses the original test data and compiled runtime.Modules
-    to run in the test runner to verify the results.
-    """
-
-    def run_and_check_body(base_path):
-        cflags = f"-DTVM_RUNTIME_ALLOC_ALIGNMENT_BYTES={workspace_byte_alignment} "
-        # The calculated workspaces will not account for stack allocator tags used for debugging
-        if debug_calculated_workspaces:
-            cflags += "-DTVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK "
-
-        base_path = os.path.abspath(base_path)
-        build_path = os.path.join(base_path, "build")
-        os.makedirs(build_path, exist_ok=True)
-
-        include_path = os.path.join(base_path, "include")
-        os.mkdir(include_path)
-        crt_root = tvm.micro.get_standalone_crt_dir()
-        shutil.copy2(
-            os.path.join(crt_root, "template", "crt_config-template.h"),
-            os.path.join(include_path, "crt_config.h"),
-        )
-
-        workspace_bytes = 0
-        for compiled_model in models:
-            model = compiled_model.model
-            tar_file = os.path.join(base_path, f"{model.name}.tar")
-            export_model_library_format(compiled_model.executor_factory, tar_file)
-            t = tarfile.open(tar_file)
-            t.extractall(base_path)
-
-            # Interface C APIs does not need compiler generated
-            # workspace to generate the test application, because
-            # workspace size is codegen'd as a macro to
-            # tvmgen_<model_name>.h.
-            if interface_api != "c":
-                workspace_bytes += mlf_extract_workspace_size_bytes(tar_file)
-
-            workspace_bytes += model.extra_memory_in_bytes
-            for key in model.inputs:
-                sanitized_tensor_name = re.sub(r"\W", "_", key)
-                create_header_file(
-                    f'{mangle_name(model.name, "input_data")}_{sanitized_tensor_name}',
-                    model.inputs[key],
-                    include_path,
-                    data_linkage,
-                )
-
-            for key in model.outputs:
-                sanitized_tensor_name = re.sub(r"\W", "_", key)
-                create_header_file(
-                    f'{mangle_name(model.name, "output_data")}_{sanitized_tensor_name}',
-                    np.zeros(model.outputs[key].shape, model.outputs[key].dtype),
-                    include_path,
-                    data_linkage,
-                )
-                create_header_file(
-                    f'{mangle_name(model.name, "expected_output_data")}_{sanitized_tensor_name}',
-                    model.outputs[key],
-                    include_path,
-                    data_linkage,
-                )
-
-        use_usmp = runner.pass_config.get("tir.usmp.enable", False)
-        # We only need the stack allocator if USMP is not used
-        use_stack_allocator = not use_usmp
-
-        create_main(
-            "test.c",
-            models,
-            build_path,
-            runner.includes,
-            runner.prologue,
-            runner.epilogue,
-            data_linkage,
-            interface_api,
-            workspace_bytes,
-            use_stack_allocator,
-            use_workspace_io,
-        )
-
-        # Verify that compiles fine
-        file_dir = os.path.dirname(os.path.abspath(__file__))
-        makefile_dir = os.path.join(file_dir, "../../../../tests/python/relay/aot")
-        codegen_path = os.path.join(base_path, "codegen")
-        makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
-        fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
-        # TODO(@grant-arm): Remove once ci_cpu docker image has been updated to FVP_Corstone_SSE
-        if not os.path.isdir(fvp_dir):
-            fvp_dir = "/opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4/"
-        custom_params = " ".join(
-            [f" {param}='{value}'" for param, value in runner.parameters.items()]
-        )
-        make_command = (
-            f"make -f {makefile} build_dir={build_path}"
-            + f" CFLAGS='{cflags}'"
-            + f" TVM_ROOT={file_dir}/../../../.."
-            + f" AOT_TEST_ROOT={makefile_dir}"
-            + f" CODEGEN_ROOT={codegen_path}"
-            + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}"
-            + f" FVP_DIR={fvp_dir}"
-            + custom_params
-        )
-
-        compile_log_path = os.path.join(build_path, "test_compile.log")
-        compile_command = f"{make_command} aot_test_runner"
-        if verbose:
-            print("Compile command:\n", compile_command)
-        subprocess_check_log_output(compile_command, ".", compile_log_path)
-
-        # Verify that runs fine
-        run_log_path = os.path.join(build_path, "test_run.log")
-        run_command = f"{make_command} run"
-        if verbose:
-            print("Run command:\n", run_command)
-
-        # TODO(lhutton1) This is a quick and dirty work around to help temporarily reduce
-        # the flakyness of the tests. Will remove once #10300 and #10314 are resolved.
-        try:
-            subprocess_check_log_output(run_command, build_path, run_log_path)
-        except RuntimeError as err:
-            print("Failed to run the module, having a second attempt...", file=sys.stderr)
-            print(err, file=sys.stderr)
-            subprocess_check_log_output(run_command, build_path, run_log_path)
-
-        with open(run_log_path) as run_log:
-            assert AOT_SUCCESS_TOKEN in run_log.read()
-
-    if test_dir is None:
-        with tempfile.TemporaryDirectory() as tmpdir:
-            run_and_check_body(os.path.join(tmpdir, "test"))
-    else:
-        run_and_check_body(test_dir)
-
-
-def compile_and_run(
-    models: Union[List[AOTTestModel], AOTTestModel],
-    runner: AOTTestRunner,
-    interface_api: str,
-    use_unpacked_api: bool,
-    debug_calculated_workspaces: bool = False,
-    workspace_byte_alignment: int = 8,
-    enable_op_fusion: bool = True,
-    data_linkage: AOTDataLinkage = None,
-    use_runtime_executor: bool = True,
-    target: str = "c",
-    target_opts: Dict = None,
-    test_dir: str = None,
-    verbose: bool = False,
-    schedule_name: str = None,
-):
-    """This is a wrapper API to compile and run models as test for AoT
-
-    Parameters
-    ----------
-    test_dir : str
-        This path will contain build, codegen, include directories
-    verbose: bool
-        Prints commands to build and run AOT test runner
-    """
-
-    if target_opts:
-        for key, val in target_opts.items():
-            target += f" {key}={val}"
-
-    compiled_test_mods = compile_models(
-        models=models,
-        interface_api=interface_api,
-        use_unpacked_api=use_unpacked_api,
-        workspace_byte_alignment=workspace_byte_alignment,
-        enable_op_fusion=enable_op_fusion,
-        pass_config=runner.pass_config,
-        use_runtime_executor=use_runtime_executor,
-        target=tvm.target.Target(target),
-        schedule_name=schedule_name,
-    )
-
-    run_and_check(
-        models=compiled_test_mods,
-        runner=runner,
-        interface_api=interface_api,
-        debug_calculated_workspaces=debug_calculated_workspaces,
-        workspace_byte_alignment=workspace_byte_alignment,
-        data_linkage=data_linkage,
-        test_dir=test_dir,
-        verbose=verbose,
-    )
-
-
-def generate_ref_data(mod, input_data, params=None, target="llvm"):
-    """Generate reference data through executing the relay module"""
-    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
-        lib = relay.build(mod, target=target, params=params)
-
-    lib_name = "mod.so"
-    temp = utils.tempdir()
-    lib_path = temp.relpath(lib_name)
-    lib.export_library(lib_path)
-    lib = tvm.runtime.load_module(lib_path)
-    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
-    grt_mod.set_input(**input_data)
-    grt_mod.run()
-    output_count = grt_mod.get_num_outputs()
-    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
-    if isinstance(mod, tvm.relay.Function):
-        main = mod
-    else:
-        main = mod["main"]
-    if main.attrs is None or main.attrs["output_tensor_names"] is None:
-        output_tensor_names = ["output" if i == 0 else f"output{i+1}" for i in range(output_count)]
-    else:
-        output_tensor_names = main.attrs["output_tensor_names"]
-
-    return dict(zip(output_tensor_names, out))
-
-
-def create_relay_module_and_inputs_from_tflite_file(tflite_model_file):
-    """A helper function to create a Relay IRModule with inputs
-    and params from a tflite file"""
-    with open(tflite_model_file, "rb") as f:
-        tflite_model_buf = f.read()
-    mod, params = convert_to_relay(tflite_model_buf)
-
-    inputs = dict()
-    for param in mod["main"].params:
-        name = str(param.name_hint)
-        data_shape = [int(i) for i in param.type_annotation.shape]
-        dtype = str(param.type_annotation.dtype)
-        in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max)
-        data = np.random.randint(in_min, high=in_max, size=data_shape, dtype=dtype)
-        inputs[name] = data
-
-    return mod, inputs, params
diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
new file mode 100644
index 000000000000..94bc6e048d7e
--- /dev/null
+++ b/python/tvm/testing/aot.py
@@ -0,0 +1,926 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import sys
+import datetime
+import itertools
+import logging
+import os
+import pathlib
+import re
+import shutil
+import subprocess
+import tarfile
+import tempfile
+from typing import Any, NamedTuple, Union, Optional, List, Dict
+
+import pytest
+import numpy as np
+
+pytest.importorskip("tvm.micro")
+
+import tvm
+from tvm import relay
+from tvm import te
+from tvm import autotvm
+from tvm.contrib import utils, graph_executor
+from tvm.relay.backend import te_compiler, Executor, Runtime
+from tvm.relay.backend.te_compiler import TECompiler
+from tvm.relay.backend.utils import mangle_module_name
+from tvm.micro import export_model_library_format
+from tvm.micro.testing.utils import mlf_extract_workspace_size_bytes
+
+
+NP_TYPE_TO_C = {
+    "int8": "int8_t",
+    "uint8": "uint8_t",
+    "int16": "int16_t",
+    "uint16": "uint16_t",
+    "int32": "int32_t",
+    "uint32": "uint32_t",
+    "float32": "float",
+}
+
+AOT_SUCCESS_TOKEN = "AOT_TEST_SUCCESS"
+AOT_FAILURE_TOKEN = "AOT_TEST_FAILURE"
+
+
+class AOTTestModel(NamedTuple):
+    """Class to describe a model under test
+
+    Parameters
+    ----------
+    module: tvm.IRModule
+        IRModule to generate AOT executor for
+    inputs: Dict[str, np.array]
+        Dict of input names to value arrays
+    outputs: List[np.array]
+        Dict of output names to value arrays
+    output_tolerance: Optional[Union[int, float]]
+        Allowed tolerance of the output
+    name: str
+        Name to use for this model
+    params: Optional[Dict[str, np.array]]
+        Dict of parameter names to value arrays
+    extra_memory_in_bytes: int
+        Extra memory to allocate after planned memory
+    """
+
+    module: tvm.IRModule
+    inputs: Dict[str, np.array]
+    outputs: Dict[str, np.array]
+    output_tolerance: Optional[Union[int, float]] = None
+    name: str = "default"
+    params: Optional[Dict[str, np.array]] = None
+    extra_memory_in_bytes: int = 0
+
+
+class AOTCompiledTestModel(NamedTuple):
+    """A compiled AOTTestModel with associated module
+
+    Parameters
+    ----------
+    model: AOTTestModel
+        Input model to be compiled
+    module: tvm.runtime.Module
+        The compiled Module for the associated AOTTestModel
+    """
+
+    model: AOTTestModel
+    executor_factory: tvm.relay.backend.executor_factory.AOTExecutorFactoryModule
+
+
+class AOTDataLinkage(NamedTuple):
+    """A compiled AOTTestModel with associated module
+
+    Parameters
+    ----------
+    section: str
+        Named section to place data into
+    alignment: int
+        Section alignment
+    """
+
+    section: str
+    alignment: int
+
+
+class AOTTestRunner(NamedTuple):
+    """Class to describe a test runner for AOT code
+
+    Parameters
+    ----------
+    makefile: str
+        Premade Makefile to use from the AOT test folder
+    prologue: str
+        Code to prepend to the main function
+    epilogue: str
+        Code to append to the main function
+    includes: List[str]
+        Additional includes required to run the AOT test runner
+    parameters: Dict[str, str]
+        Additional parameters to pass to the make command
+    pass_config: Dict[str, Any]
+        Additional pass configuration when building the model
+    """
+
+    makefile: str = "default"
+    prologue: str = ""
+    epilogue: str = ""
+    includes: List[str] = []
+    parameters: Dict[str, str] = {}
+    pass_config: Dict[str, Any] = {}
+
+
+def _subprocess_check_log_output(cmd, cwd, logfile):
+    """
+    This method runs a process and logs the output to both a log file and stdout
+    """
+    _LOG.info("Execute (%s): %s", cwd, cmd)
+    cmd_base = cmd[0] if isinstance(cmd, (list, tuple)) else cmd.split(" ", 1)[0]
+    proc = subprocess.Popen(
+        cmd,
+        cwd=cwd,
+        shell=True,
+        bufsize=0,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        encoding="utf-8",
+    )
+    stdout = ""
+    with open(logfile, "a") as f:
+        msg = (
+            "\n"
+            + "-" * 80
+            + f"{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}: Execute ({cwd}): {cmd}\n"
+            + "-" * 80
+        )
+        f.write(msg)
+        stdout += msg + "\n"
+        while True:
+            data = proc.stdout.readline()
+            stdout += data
+            _LOG.debug("%s: %s", cmd_base, data.rstrip("\n"))
+            f.write(data)
+
+            # process is done if there is no data and the result is valid
+            if not data:  # EOF
+                break
+
+    proc.wait()
+    if proc.returncode != 0:
+        raise RuntimeError(f"Subprocess failed: {cmd}\nstdout:\n{stdout}")
+
+
+def _mangle_name(mod_name, name):
+    mod_name = mangle_module_name(mod_name)
+    return mod_name + "_" + name
+
+
+# TODO: Move to linker script with list of symbols rather than coding into source
+def _emit_data_linkage(output_file, data_linkage):
+    if data_linkage is not None:
+        output_file.write(
+            f'__attribute__((section("{data_linkage.section}"), aligned({data_linkage.alignment}))) '
+        )
+
+
+def _emit_main_prologue(
+    main_file,
+    custom_prologue,
+    workspace_bytes,
+    data_linkage,
+    compiled_models,
+    interface_api,
+    use_stack_allocator=True,
+):
+    if use_stack_allocator:
+        workspace_define = f"#define WORKSPACE_SIZE ({workspace_bytes}"
+        if interface_api == "c":
+            for compiled_model in compiled_models:
+                model = compiled_model.model
+                workspace_define += f" + TVMGEN_{model.name.upper()}_WORKSPACE_SIZE"
+        # Add TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES because of memory alignment.
+        workspace_define += " + TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)\n"
+        main_file.write(workspace_define)
+        _emit_data_linkage(main_file, data_linkage)
+        main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
+        main_file.write("tvm_workspace_t app_workspace;\n")
+        main_file.write(
+            """
+            
+tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
+    return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
+}
+
+tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
+    return StackMemoryManager_Free(&app_workspace,ptr);
+}
+        """
+        )
+    else:
+        # An implementation is not needed for these if the stack allocator is not used
+        main_file.write(
+            """
+            
+tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
+    return kTvmErrorFunctionCallNotImplemented;
+}
+
+tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
+    return kTvmErrorFunctionCallNotImplemented;
+}
+
+            """
+        )
+    main_file.write(
+        """
+    
+void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }
+
+void TVMLogf(const char* msg, ...) {
+  va_list args;
+  va_start(args, msg);
+  vfprintf(stdout, msg, args);
+  va_end(args);
+}
+    
+TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
+int main(){\n
+    """
+    )
+    main_file.write(custom_prologue)
+
+
+def _emit_main_data(main_file, input_map, output_map, mod_name):
+    for key in input_map:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        main_file.write(
+            f'#include "{_mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}.h"\n'
+        )
+
+    for key in output_map:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        main_file.write(
+            f'#include "{_mangle_name(mod_name,"expected_output_data")}_{sanitized_tensor_name}.h"\n'
+            f'#include "{_mangle_name(mod_name,"output_data")}_{sanitized_tensor_name}.h"\n'
+        )
+
+
+def _emit_main_device_structs(main_file, devices, mod_name):
+    if devices:
+        main_file.write(
+            f"struct {_mangle_name(mod_name, 'devices')} {_mangle_name(mod_name, 'devices')} = {{"
+        )
+        for device in devices:
+            main_file.write(f"\t.{device} = {device},\n")
+        main_file.write("};\n")
+
+
+def _emit_main_workspace_pool_structs(main_file, workspace_pool_names, mod_name):
+    if workspace_pool_names and len(workspace_pool_names) > 0:
+        main_file.write(
+            f"struct {_mangle_name(mod_name, 'workspace_pools')} {_mangle_name(mod_name, 'workspace_pools')} = {{"
+        )
+        for workspace_pool_name in workspace_pool_names:
+            main_file.write(f"\t.{workspace_pool_name} = {workspace_pool_name},\n")
+        main_file.write("};\n")
+
+
+def _emit_main_data_structs(main_file, input_map, output_map, mod_name):
+    main_file.write(
+        f"struct {_mangle_name(mod_name, 'inputs')} {_mangle_name(mod_name, 'inputs')} = {{"
+    )
+    for key in input_map:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        main_file.write(
+            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'input_data')}_{sanitized_tensor_name},\n"
+        )
+    main_file.write("};\n")
+
+    main_file.write(
+        f"struct {_mangle_name(mod_name, 'outputs')} {_mangle_name(mod_name, 'outputs')} = {{"
+    )
+    for key in output_map:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        main_file.write(
+            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'output_data')}_{sanitized_tensor_name},\n"
+        )
+    main_file.write("};\n")
+
+
+def _emit_main_data_setup(main_file, input_map, output_map, mod_name):
+    num_outputs = len(output_map)
+    num_inputs = len(input_map)
+
+    main_file.write(f'void* {_mangle_name(mod_name,"inputs")}[{num_inputs}] = {{ ')
+    for key in input_map:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        main_file.write(f'{_mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}, ')
+    main_file.write("};\n")
+
+    main_file.write(f'void* {_mangle_name(mod_name,"outputs")}[{num_outputs}]  = {{ ')
+    for key in output_map:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        main_file.write(f'{_mangle_name(mod_name, "output_data")}_{sanitized_tensor_name}, ')
+    main_file.write("};\n")
+
+
+def _emit_main_c_interface_call(
+    main_file, devices, workspace_pool_names, mod_name, use_workspace_io
+):
+    sub_strings = list()
+    sub_strings.append(f'{_mangle_name(mod_name,"run")}(')
+    if not use_workspace_io:
+        sub_strings.append(f'&{_mangle_name(mod_name,"inputs")}, ')
+        sub_strings.append(f'&{_mangle_name(mod_name,"outputs")}, ')
+    if workspace_pool_names:
+        sub_strings.append(f'&{_mangle_name(mod_name,"workspace_pools")}, ')
+    if devices:
+        sub_strings.append(f'&{_mangle_name(mod_name,"devices")}, ')
+    # Removing the last two characters that is a comma and a space
+    sub_strings[-1] = sub_strings[-1][:-2]
+    # Adding brackets and newline instead
+    sub_strings[-1] = sub_strings[-1] + ");\n"
+
+    main_file_string = ""
+    for sub_string in sub_strings:
+        main_file_string += sub_string
+
+    main_file.write(main_file_string)
+
+
+def _emit_main_fake_packed_values(main_file):
+    main_file.write(
+        """
+    static DLDevice fake_device = {kDLCPU, 0};
+    static int64_t fake_dims = 0;
+    static int64_t fake_shape = {0};
+    """
+    )
+
+
+def _emit_main_packed_call(main_file, input_map, output_list, mod_name):
+    tensors_name = _mangle_name(mod_name, "tensors")
+    values_name = _mangle_name(mod_name, "values")
+    typeids_name = _mangle_name(mod_name, "typeids")
+
+    def fake_tensor(source, source_index, packed_index):
+        main_file.write(
+            f"""
+        {tensors_name}[{packed_index}].device = fake_device;
+        {tensors_name}[{packed_index}].data = {source}[{source_index}];
+        {tensors_name}[{packed_index}].shape = &fake_shape;
+        {tensors_name}[{packed_index}].ndim = fake_dims;
+        {tensors_name}[{packed_index}].byte_offset = 0;
+        {tensors_name}[{packed_index}].strides = NULL;
+        {values_name}[{packed_index}].v_handle = &{tensors_name}[{packed_index}];
+        """
+        )
+
+    num_outputs = len(output_list)
+    num_inputs = len(input_map)
+    num_tensors = num_inputs + num_outputs
+    main_file.write(
+        f"""
+    DLTensor {tensors_name}[{num_tensors}];
+    TVMValue {values_name}[{num_tensors}];
+    int32_t {typeids_name}[{num_tensors}];
+    """
+    )
+
+    for i in range(0, num_inputs):
+        fake_tensor(_mangle_name(mod_name, "inputs"), i, i)
+    for i in range(0, num_outputs):
+        fake_tensor(_mangle_name(mod_name, "outputs"), i, i + num_inputs)
+
+    main_file.write(
+        f'{_mangle_name(mod_name, "run")}({values_name}, {typeids_name}, 0, NULL, 0, NULL);\n'
+    )
+    main_file.write("\n")
+
+
+def _emit_main_compare(main_file, outputs, output_tolerance, mod_name, use_interface_c=False):
+    for key in outputs:
+        sanitized_tensor_name = re.sub(r"\W", "_", key)
+        expected_data_name = _mangle_name(mod_name, f"expected_output_data_{sanitized_tensor_name}")
+        is_float_dtype = outputs[key].dtype == "float32"
+
+        comparison_function = "abs"
+        tolerance = output_tolerance or 0
+        if is_float_dtype:
+            comparison_function = "fabs"
+            tolerance = output_tolerance or 0.001
+
+        data_length_var_name = (
+            _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}") + "_len"
+        )
+        if use_interface_c:
+            c_type = NP_TYPE_TO_C[str(outputs[key].dtype)]
+            actual_data_name = f"(({c_type}*)" + _mangle_name(
+                mod_name, f"outputs.{sanitized_tensor_name})"
+            )
+        else:
+            actual_data_name = _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}")
+        main_file.write(
+            f"""
+            for (int i = 0; i<{data_length_var_name}; i++) {{
+                if ({comparison_function}({actual_data_name}[i]-{expected_data_name}[i]) > {tolerance}) {{
+                    printf("{AOT_FAILURE_TOKEN}\\n");
+                    return -1;
+                }}
+            }}
+            """
+        )
+
+
+def _emit_main_init_memory_manager(main_file):
+    main_file.write("StackMemoryManager_Init(&app_workspace, g_aot_memory, WORKSPACE_SIZE);")
+    main_file.write("\n")
+
+
+def _emit_main_epilogue(main_file, custom_epilogue):
+    main_file.write(custom_epilogue)
+    main_file.write(f'printf("{AOT_SUCCESS_TOKEN}\\n");')
+    main_file.write("return 0;")
+    main_file.write("}\n")
+
+
+def _emit_main_common_includes(main_file, custom_includes):
+    main_file.write("#include <stdio.h>\n")
+    main_file.write("#include <stdarg.h>\n")
+    main_file.write("#include <stdlib.h>\n")
+    main_file.write("#include <math.h>\n")
+    main_file.write('#include "tvm/runtime/c_runtime_api.h"\n')
+    main_file.write('#include "tvm/runtime/crt/stack_allocator.h"\n')
+    for include in custom_includes:
+        main_file.write(f'#include "{include}"\n')
+
+
+def _emit_main_micro_include(main_file, mod_name):
+    main_file.write(f"#include <{mangle_module_name(mod_name)}.h>\n")
+
+
+def _create_main(
+    test_name,
+    compiled_models,
+    output_path,
+    custom_includes,
+    custom_prologue,
+    custom_epilogue,
+    data_linkage,
+    interface_api,
+    workspace_bytes,
+    use_stack_allocator=True,
+    use_workspace_io=False,
+):
+    file_path = pathlib.Path(f"{output_path}/" + test_name).resolve()
+    # create header file
+    raw_path = file_path.with_suffix(".c").resolve()
+    with open(raw_path, "w") as main_file:
+        _emit_main_common_includes(main_file, custom_includes)
+
+        if interface_api == "c":
+            for compiled_model in compiled_models:
+                model = compiled_model.model
+                _emit_main_micro_include(main_file, model.name)
+        for compiled_model in compiled_models:
+            model = compiled_model.model
+            _emit_main_data(main_file, model.inputs, model.outputs, model.name)
+
+        _emit_main_prologue(
+            main_file,
+            custom_prologue,
+            workspace_bytes,
+            data_linkage,
+            compiled_models,
+            interface_api,
+            use_stack_allocator,
+        )
+        if use_stack_allocator:
+            _emit_main_init_memory_manager(main_file)
+
+        if interface_api == "c":
+            for compiled_model in compiled_models:
+                model = compiled_model.model
+                executor_codegen_metadata = (
+                    compiled_model.executor_factory.executor_codegen_metadata
+                )
+                devices = compiled_model.executor_factory.get_devices()
+                workspace_pool_names = None
+                if executor_codegen_metadata.pool_inputs:
+                    workspace_pool_names = [
+                        allocated_pool.pool_info.pool_name
+                        for allocated_pool in dict(executor_codegen_metadata.pool_inputs).values()
+                        if not allocated_pool.pool_info.is_internal
+                    ]
+                _emit_main_device_structs(main_file, devices, model.name)
+                if not use_workspace_io:
+                    _emit_main_workspace_pool_structs(main_file, workspace_pool_names, model.name)
+                    _emit_main_data_structs(main_file, model.inputs, model.outputs, model.name)
+                _emit_main_c_interface_call(
+                    main_file, devices, workspace_pool_names, model.name, use_workspace_io
+                )
+        else:
+            _emit_main_fake_packed_values(main_file)
+            for compiled_model in compiled_models:
+                model = compiled_model.model
+                _emit_main_data_setup(main_file, model.inputs, model.outputs, model.name)
+                _emit_main_packed_call(main_file, model.inputs, model.outputs, model.name)
+
+        for compiled_model in compiled_models:
+            model = compiled_model.model
+            _emit_main_compare(
+                main_file, model.outputs, model.output_tolerance, model.name, interface_api == "c"
+            )
+        _emit_main_epilogue(main_file, custom_epilogue)
+
+
+def _create_header_file(tensor_name, npy_data, output_path, data_linkage):
+    """
+    This method generates a header file containing the data contained in the numpy array provided.
+    It is used to capture the tensor data (for both inputs and expected outputs) to be bundled into the standalone application.
+    """
+    file_path = pathlib.Path(f"{output_path}/" + tensor_name).resolve()
+    # create header file
+    raw_path = file_path.with_suffix(".h").resolve()
+    with open(raw_path, "w") as header_file:
+        header_file.write("#include <stddef.h>\n")
+        header_file.write("#include <stdint.h>\n")
+        header_file.write("#include <dlpack/dlpack.h>\n")
+        header_file.write(f"const size_t {tensor_name}_len = {npy_data.size};\n")
+
+        _emit_data_linkage(header_file, data_linkage)
+
+        header_file.write(f"{NP_TYPE_TO_C[str(npy_data.dtype)]} {tensor_name}[] =")
+
+        header_file.write("{")
+        for i in np.ndindex(npy_data.shape):
+            header_file.write(f"{npy_data[i]}, ")
+        header_file.write("};\n\n")
+
+
+def compile_models(
+    models: Union[List[AOTTestModel], AOTTestModel],
+    interface_api: str,
+    use_unpacked_api: bool,
+    workspace_byte_alignment: int = 8,
+    enable_op_fusion: bool = True,
+    pass_config: Dict[str, Any] = None,
+    use_runtime_executor: bool = True,
+    target: tvm.target.Target = tvm.target.Target("c"),
+    workspace_memory_pools=None,
+    schedule_name: str = None,
+) -> List[AOTCompiledTestModel]:
+    """
+    This method generates runtime.Modules for the tests
+    """
+    if not isinstance(models, list):
+        models = [models]
+
+    runtime = Runtime("crt")
+    executor = Executor(
+        "aot",
+        {
+            "workspace-byte-alignment": workspace_byte_alignment,
+            "interface-api": interface_api,
+            "unpacked-api": use_unpacked_api,
+        },
+    )
+
+    config = {"tir.disable_vectorize": True}
+    if pass_config:
+        config = {**config, **pass_config}
+    if not enable_op_fusion:
+        config["relay.FuseOps.max_depth"] = 1
+
+    compiled_mods = list()
+    for model in models:
+        if schedule_name:
+            # Testing with deterministic schedule
+            task_list = autotvm.task.extract_from_program(
+                model.module, target=target, params=model.params
+            )
+            with tvm.autotvm.apply_fixed_config(task_list, schedule_name):
+                with tvm.transform.PassContext(opt_level=3, config=config):
+                    if use_runtime_executor:
+                        executor_factory = tvm.relay.build(
+                            model.module,
+                            target,
+                            executor=executor,
+                            runtime=runtime,
+                            workspace_memory_pools=workspace_memory_pools,
+                            params=model.params,
+                            mod_name=model.name,
+                        )
+                        compiled_mods.append(
+                            AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                        )
+        else:
+            with tvm.transform.PassContext(opt_level=3, config=config):
+                # TODO(Mousius) - Remove once executor/runtime are fully removed from Target
+                if use_runtime_executor:
+                    executor_factory = tvm.relay.build(
+                        model.module,
+                        target,
+                        executor=executor,
+                        runtime=runtime,
+                        workspace_memory_pools=workspace_memory_pools,
+                        params=model.params,
+                        mod_name=model.name,
+                    )
+                    compiled_mods.append(
+                        AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                    )
+                else:
+                    executor_factory = tvm.relay.build(
+                        model.module,
+                        tvm.target.Target(target, host=target),
+                        params=model.params,
+                        mod_name=model.name,
+                    )
+                    compiled_mods.append(
+                        AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                    )
+    return compiled_mods
+
+
+def run_and_check(
+    models: List[AOTCompiledTestModel],
+    runner: AOTTestRunner,
+    interface_api: str,
+    debug_calculated_workspaces=False,
+    workspace_byte_alignment=8,
+    data_linkage: AOTDataLinkage = None,
+    test_dir: str = None,
+    verbose: bool = False,
+    use_workspace_io: bool = False,
+):
+    """
+    This method uses the original test data and compiled runtime.Modules
+    to run in the test runner to verify the results.
+    """
+
+    def run_and_check_body(base_path):
+        cflags = f"-DTVM_RUNTIME_ALLOC_ALIGNMENT_BYTES={workspace_byte_alignment} "
+        # The calculated workspaces will not account for stack allocator tags used for debugging
+        if debug_calculated_workspaces:
+            cflags += "-DTVM_CRT_STACK_ALLOCATOR_ENABLE_LIFO_CHECK "
+
+        base_path = os.path.abspath(base_path)
+        build_path = os.path.join(base_path, "build")
+        os.makedirs(build_path, exist_ok=True)
+
+        include_path = os.path.join(base_path, "include")
+        os.mkdir(include_path)
+        crt_root = tvm.micro.get_standalone_crt_dir()
+        shutil.copy2(
+            os.path.join(crt_root, "template", "crt_config-template.h"),
+            os.path.join(include_path, "crt_config.h"),
+        )
+
+        workspace_bytes = 0
+        for compiled_model in models:
+            model = compiled_model.model
+            tar_file = os.path.join(base_path, f"{model.name}.tar")
+            export_model_library_format(compiled_model.executor_factory, tar_file)
+            t = tarfile.open(tar_file)
+            t.extractall(base_path)
+
+            # Interface C APIs does not need compiler generated
+            # workspace to generate the test application, because
+            # workspace size is codegen'd as a macro to
+            # tvmgen_<model_name>.h.
+            if interface_api != "c":
+                workspace_bytes += mlf_extract_workspace_size_bytes(tar_file)
+
+            workspace_bytes += model.extra_memory_in_bytes
+            for key in model.inputs:
+                sanitized_tensor_name = re.sub(r"\W", "_", key)
+                _create_header_file(
+                    f'{_mangle_name(model.name, "input_data")}_{sanitized_tensor_name}',
+                    model.inputs[key],
+                    include_path,
+                    data_linkage,
+                )
+
+            for key in model.outputs:
+                sanitized_tensor_name = re.sub(r"\W", "_", key)
+                _create_header_file(
+                    f'{_mangle_name(model.name, "output_data")}_{sanitized_tensor_name}',
+                    np.zeros(model.outputs[key].shape, model.outputs[key].dtype),
+                    include_path,
+                    data_linkage,
+                )
+                _create_header_file(
+                    f'{_mangle_name(model.name, "expected_output_data")}_{sanitized_tensor_name}',
+                    model.outputs[key],
+                    include_path,
+                    data_linkage,
+                )
+
+        use_usmp = runner.pass_config.get("tir.usmp.enable", False)
+        # We only need the stack allocator if USMP is not used
+        use_stack_allocator = not use_usmp
+
+        _create_main(
+            "test.c",
+            models,
+            build_path,
+            runner.includes,
+            runner.prologue,
+            runner.epilogue,
+            data_linkage,
+            interface_api,
+            workspace_bytes,
+            use_stack_allocator,
+            use_workspace_io,
+        )
+
+        # Verify that compiles fine
+        file_dir = os.path.dirname(os.path.abspath(__file__))
+        makefile_dir = os.path.join(file_dir, "../../../../tests/python/relay/aot")
+        codegen_path = os.path.join(base_path, "codegen")
+        makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
+        fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
+        # TODO(@grant-arm): Remove once ci_cpu docker image has been updated to FVP_Corstone_SSE
+        if not os.path.isdir(fvp_dir):
+            fvp_dir = "/opt/arm/FVP_Corstone_SSE-300_Ethos-U55/models/Linux64_GCC-6.4/"
+        custom_params = " ".join(
+            [f" {param}='{value}'" for param, value in runner.parameters.items()]
+        )
+        make_command = (
+            f"make -f {makefile} build_dir={build_path}"
+            + f" CFLAGS='{cflags}'"
+            + f" TVM_ROOT={file_dir}/../../../.."
+            + f" AOT_TEST_ROOT={makefile_dir}"
+            + f" CODEGEN_ROOT={codegen_path}"
+            + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}"
+            + f" FVP_DIR={fvp_dir}"
+            + custom_params
+        )
+
+        compile_log_path = os.path.join(build_path, "test_compile.log")
+        compile_command = f"{make_command} aot_test_runner"
+        if verbose:
+            print("Compile command:\n", compile_command)
+        _subprocess_check_log_output(compile_command, ".", compile_log_path)
+
+        # Verify that runs fine
+        run_log_path = os.path.join(build_path, "test_run.log")
+        run_command = f"{make_command} run"
+        if verbose:
+            print("Run command:\n", run_command)
+
+        # TODO(lhutton1) This is a quick and dirty work around to help temporarily reduce
+        # the flakyness of the tests. Will remove once #10300 and #10314 are resolved.
+        try:
+            _subprocess_check_log_output(run_command, build_path, run_log_path)
+        except RuntimeError as err:
+            print("Failed to run the module, having a second attempt...", file=sys.stderr)
+            print(err, file=sys.stderr)
+            _subprocess_check_log_output(run_command, build_path, run_log_path)
+
+        with open(run_log_path) as run_log:
+            assert AOT_SUCCESS_TOKEN in run_log.read()
+
+    if test_dir is None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            run_and_check_body(os.path.join(tmpdir, "test"))
+    else:
+        run_and_check_body(test_dir)
+
+
+def compile_and_run(
+    models: Union[List[AOTTestModel], AOTTestModel],
+    runner: AOTTestRunner,
+    interface_api: str,
+    use_unpacked_api: bool,
+    debug_calculated_workspaces: bool = False,
+    workspace_byte_alignment: int = 8,
+    enable_op_fusion: bool = True,
+    data_linkage: AOTDataLinkage = None,
+    use_runtime_executor: bool = True,
+    target: str = "c",
+    target_opts: Dict = None,
+    test_dir: str = None,
+    verbose: bool = False,
+    schedule_name: str = None,
+):
+    """This is a wrapper API to compile and run models as test for AoT
+
+    Parameters
+    ----------
+    test_dir : str
+        This path will contain build, codegen, include directories
+    verbose: bool
+        Prints commands to build and run AOT test runner
+    """
+
+    if target_opts:
+        for key, val in target_opts.items():
+            target += f" {key}={val}"
+
+    compiled_test_mods = compile_models(
+        models=models,
+        interface_api=interface_api,
+        use_unpacked_api=use_unpacked_api,
+        workspace_byte_alignment=workspace_byte_alignment,
+        enable_op_fusion=enable_op_fusion,
+        pass_config=runner.pass_config,
+        use_runtime_executor=use_runtime_executor,
+        target=tvm.target.Target(target),
+        schedule_name=schedule_name,
+    )
+
+    run_and_check(
+        models=compiled_test_mods,
+        runner=runner,
+        interface_api=interface_api,
+        debug_calculated_workspaces=debug_calculated_workspaces,
+        workspace_byte_alignment=workspace_byte_alignment,
+        data_linkage=data_linkage,
+        test_dir=test_dir,
+        verbose=verbose,
+    )
+
+
+def generate_ref_data(mod, input_data, params=None, target="llvm"):
+    """Generate reference data through executing the relay module"""
+    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
+        lib = relay.build(mod, target=target, params=params)
+
+    lib_name = "mod.so"
+    temp = utils.tempdir()
+    lib_path = temp.relpath(lib_name)
+    lib.export_library(lib_path)
+    lib = tvm.runtime.load_module(lib_path)
+    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
+    grt_mod.set_input(**input_data)
+    grt_mod.run()
+    output_count = grt_mod.get_num_outputs()
+    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
+    if isinstance(mod, tvm.relay.Function):
+        main = mod
+    else:
+        main = mod["main"]
+    if main.attrs is None or main.attrs["output_tensor_names"] is None:
+        output_tensor_names = ["output" if i == 0 else f"output{i+1}" for i in range(output_count)]
+    else:
+        output_tensor_names = main.attrs["output_tensor_names"]
+
+    return dict(zip(output_tensor_names, out))
+
+
+def _convert_to_relay(
+    tflite_model_buf,
+):
+    """Convert a tflite model buffer in a Relay module"""
+    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
+    try:
+        import tflite.Model
+
+        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
+    except AttributeError:
+        import tflite
+
+        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
+    except ImportError:
+        raise ImportError("The tflite package must be installed")
+
+    mod, params = relay.frontend.from_tflite(tflite_model)
+    mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params)
+    return mod, params
+
+
+def create_relay_module_and_inputs_from_tflite_file(tflite_model_file):
+    """A helper function to create a Relay IRModule with inputs
+    and params from a tflite file"""
+    with open(tflite_model_file, "rb") as f:
+        tflite_model_buf = f.read()
+    mod, params = _convert_to_relay(tflite_model_buf)
+
+    inputs = dict()
+    for param in mod["main"].params:
+        name = str(param.name_hint)
+        data_shape = [int(i) for i in param.type_annotation.shape]
+        dtype = str(param.type_annotation.dtype)
+        in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max)
+        data = np.random.randint(in_min, high=in_max, size=data_shape, dtype=dtype)
+        inputs[name] = data
+
+    return mod, inputs, params

From 531bd2c3bd30d6cc77f114205a8ae3cde84c5137 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 14:29:14 -0700
Subject: [PATCH 03/13] fix split imports

---
 python/tvm/testing/aot.py                     | 42 +++++++++----------
 .../contrib/test_cmsisnn/test_binary_ops.py   |  4 +-
 .../contrib/test_cmsisnn/test_conv2d.py       | 10 +----
 .../test_cmsisnn/test_fully_connected.py      |  7 +---
 .../test_cmsisnn/test_invalid_graphs.py       |  5 +--
 .../contrib/test_cmsisnn/test_networks.py     |  4 +-
 .../contrib/test_cmsisnn/test_pooling.py      | 11 +----
 .../contrib/test_cmsisnn/test_softmax.py      |  9 +---
 tests/python/contrib/test_ethosu/infra.py     |  2 +-
 .../contrib/test_ethosu/test_codegen.py       |  2 +-
 .../integration/test_arm_mprofile_dsp.py      |  8 +---
 tests/python/relay/aot/test_c_device_api.py   |  4 +-
 tests/python/relay/aot/test_cpp_aot.py        |  4 +-
 tests/python/relay/aot/test_crt_aot.py        |  2 +-
 tests/python/relay/aot/test_crt_aot_usmp.py   |  3 +-
 .../strategy/arm_cpu/test_conv2d_nchw.py      |  4 +-
 .../strategy/arm_cpu/test_conv2d_nhwc.py      |  8 +---
 .../strategy/arm_cpu/test_depthwise_conv2d.py |  8 +---
 .../strategy/arm_cpu/test_group_conv2d.py     |  8 +---
 tests/python/relay/utils/external_codegen.py  |  3 +-
 20 files changed, 48 insertions(+), 100 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index 94bc6e048d7e..ed2a7339ab5b 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -574,6 +574,27 @@ def _create_header_file(tensor_name, npy_data, output_path, data_linkage):
         header_file.write("};\n\n")
 
 
+def _convert_to_relay(
+    tflite_model_buf,
+):
+    """Convert a tflite model buffer in a Relay module"""
+    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
+    try:
+        import tflite.Model
+
+        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
+    except AttributeError:
+        import tflite
+
+        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
+    except ImportError:
+        raise ImportError("The tflite package must be installed")
+
+    mod, params = relay.frontend.from_tflite(tflite_model)
+    mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params)
+    return mod, params
+
+
 def compile_models(
     models: Union[List[AOTTestModel], AOTTestModel],
     interface_api: str,
@@ -886,27 +907,6 @@ def generate_ref_data(mod, input_data, params=None, target="llvm"):
     return dict(zip(output_tensor_names, out))
 
 
-def _convert_to_relay(
-    tflite_model_buf,
-):
-    """Convert a tflite model buffer in a Relay module"""
-    # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
-    try:
-        import tflite.Model
-
-        tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
-    except AttributeError:
-        import tflite
-
-        tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
-    except ImportError:
-        raise ImportError("The tflite package must be installed")
-
-    mod, params = relay.frontend.from_tflite(tflite_model)
-    mod["main"] = relay.build_module.bind_params_by_name(mod["main"], params)
-    return mod, params
-
-
 def create_relay_module_and_inputs_from_tflite_file(tflite_model_file):
     """A helper function to create a Relay IRModule with inputs
     and params from a tflite file"""
diff --git a/tests/python/contrib/test_cmsisnn/test_binary_ops.py b/tests/python/contrib/test_cmsisnn/test_binary_ops.py
index a9957e779867..7846bba1e089 100644
--- a/tests/python/contrib/test_cmsisnn/test_binary_ops.py
+++ b/tests/python/contrib/test_cmsisnn/test_binary_ops.py
@@ -36,12 +36,10 @@
     assert_partitioned_function,
     assert_no_external_function,
 )
+from tvm.testing.aot import generate_ref_data, AOTTestModel, compile_and_run
 from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
 )
 
 
diff --git a/tests/python/contrib/test_cmsisnn/test_conv2d.py b/tests/python/contrib/test_cmsisnn/test_conv2d.py
index 51afb943c370..1cdf98510148 100644
--- a/tests/python/contrib/test_cmsisnn/test_conv2d.py
+++ b/tests/python/contrib/test_cmsisnn/test_conv2d.py
@@ -23,15 +23,9 @@
 from tvm import relay
 from tvm.relay.op.contrib import cmsisnn
 
+from tvm.testing.aot import generate_ref_data, AOTTestModel, compile_and_run
 
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    AOT_USMP_CORSTONE300_RUNNER,
-    AOT_DEFAULT_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
 from utils import (
     skip_if_no_reference_system,
     make_module,
diff --git a/tests/python/contrib/test_cmsisnn/test_fully_connected.py b/tests/python/contrib/test_cmsisnn/test_fully_connected.py
index d23f3ed4c425..111d3b2edac1 100644
--- a/tests/python/contrib/test_cmsisnn/test_fully_connected.py
+++ b/tests/python/contrib/test_cmsisnn/test_fully_connected.py
@@ -23,14 +23,9 @@
 from tvm import relay
 from tvm.relay.op.contrib import cmsisnn
 
-
+from tvm.testing.aot import generate_ref_data, AOTTestModel, compile_and_run
 from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
-    AOT_DEFAULT_RUNNER,
-    generate_ref_data,
-    compile_and_run,
 )
 from utils import (
     skip_if_no_reference_system,
diff --git a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
index 286502e0bd5b..d0a8547d32ac 100644
--- a/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
+++ b/tests/python/contrib/test_cmsisnn/test_invalid_graphs.py
@@ -22,12 +22,9 @@
 import tvm
 from tvm import relay
 
-
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
 from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
     AOT_USMP_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
 )
 from utils import (
     skip_if_no_reference_system,
diff --git a/tests/python/contrib/test_cmsisnn/test_networks.py b/tests/python/contrib/test_cmsisnn/test_networks.py
index efef27af0c1e..10edd01a867e 100644
--- a/tests/python/contrib/test_cmsisnn/test_networks.py
+++ b/tests/python/contrib/test_cmsisnn/test_networks.py
@@ -28,12 +28,10 @@
 from tvm.relay.op.contrib import cmsisnn
 
 from utils import skip_if_no_reference_system, get_range_for_dtype_str
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
 from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
     AOT_CORSTONE300_RUNNER,
     AOT_USMP_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
 )
 
 
diff --git a/tests/python/contrib/test_cmsisnn/test_pooling.py b/tests/python/contrib/test_cmsisnn/test_pooling.py
index 6099f6292f32..a2650bb8d028 100644
--- a/tests/python/contrib/test_cmsisnn/test_pooling.py
+++ b/tests/python/contrib/test_cmsisnn/test_pooling.py
@@ -23,15 +23,8 @@
 from tvm import relay
 from tvm.relay.op.contrib import cmsisnn
 
-
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    AOT_USMP_CORSTONE300_RUNNER,
-    AOT_DEFAULT_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
 from utils import (
     skip_if_no_reference_system,
     make_module,
diff --git a/tests/python/contrib/test_cmsisnn/test_softmax.py b/tests/python/contrib/test_cmsisnn/test_softmax.py
index 36c104628fe5..5a44a7865e66 100644
--- a/tests/python/contrib/test_cmsisnn/test_softmax.py
+++ b/tests/python/contrib/test_cmsisnn/test_softmax.py
@@ -34,13 +34,8 @@
     assert_partitioned_function,
     assert_no_external_function,
 )
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    AOT_USMP_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_USMP_CORSTONE300_RUNNER
 
 
 def make_model(
diff --git a/tests/python/contrib/test_ethosu/infra.py b/tests/python/contrib/test_ethosu/infra.py
index 1d32def5f638..20bd12945f8f 100644
--- a/tests/python/contrib/test_ethosu/infra.py
+++ b/tests/python/contrib/test_ethosu/infra.py
@@ -47,7 +47,7 @@
 import tvm.relay.testing.tf as tf_testing
 
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.micro.testing.aot_test_utils import (
+from tvm.testing.aot import (
     AOTCompiledTestModel,
     AOTDataLinkage,
     AOTTestModel,
diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
index 0444794fdf6b..7ea813762796 100644
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -29,7 +29,7 @@
 from tvm.relay.backend.contrib.ethosu import util
 
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
-from tvm.micro.testing.aot_test_utils import generate_ref_data
+from tvm.testing.aot import generate_ref_data
 
 from . import infra
 
diff --git a/tests/python/integration/test_arm_mprofile_dsp.py b/tests/python/integration/test_arm_mprofile_dsp.py
index 6a18fc3b6695..7628755af4ac 100644
--- a/tests/python/integration/test_arm_mprofile_dsp.py
+++ b/tests/python/integration/test_arm_mprofile_dsp.py
@@ -20,12 +20,8 @@
 import tvm
 import tvm.testing
 from tvm import relay
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
 
 
 @tvm.testing.requires_corstone300
diff --git a/tests/python/relay/aot/test_c_device_api.py b/tests/python/relay/aot/test_c_device_api.py
index 771b80127c62..c53bbc50d991 100644
--- a/tests/python/relay/aot/test_c_device_api.py
+++ b/tests/python/relay/aot/test_c_device_api.py
@@ -24,11 +24,9 @@
 
 from tvm import relay
 from tvm.ir.module import IRModule
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
 from tvm.micro.testing.aot_test_utils import (
     AOT_DEFAULT_RUNNER,
-    AOTTestModel,
-    generate_ref_data,
-    compile_models,
 )
 
 
diff --git a/tests/python/relay/aot/test_cpp_aot.py b/tests/python/relay/aot/test_cpp_aot.py
index 545788c4b1a3..16a24266df46 100644
--- a/tests/python/relay/aot/test_cpp_aot.py
+++ b/tests/python/relay/aot/test_cpp_aot.py
@@ -27,11 +27,9 @@
 from tvm import IRModule
 from tvm import relay
 from tvm.relay import backend, testing
+from tvm.testing.aot import generate_ref_data
 from tvm.micro.testing.aot_test_utils import (
     AOT_DEFAULT_RUNNER,
-    AOTTestModel,
-    generate_ref_data,
-    compile_and_run,
 )
 
 
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index 3bd9f2a3989a..1225f1facaf7 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -37,6 +37,7 @@
 from tvm.micro import model_library_format as mlf
 from tvm.micro import export_model_library_format
 from tvm.ir.instrument import pass_instrument
+from tvm.testing.aot import create_relay_module_and_inputs_from_tflite_file
 from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
     AOT_DEFAULT_RUNNER,
@@ -45,7 +46,6 @@
     compile_and_run,
     compile_models,
     parametrize_aot_options,
-    create_relay_module_and_inputs_from_tflite_file,
 )
 
 
diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py
index abf898db3aad..eaff1d9bd17b 100644
--- a/tests/python/relay/aot/test_crt_aot_usmp.py
+++ b/tests/python/relay/aot/test_crt_aot_usmp.py
@@ -32,11 +32,10 @@
 from tvm.relay.backend import Executor, Runtime
 from tvm import WorkspaceMemoryPools, PoolInfo
 from tvm.micro import model_library_format as mlf
-from tvm.micro.testing.aot_test_utils import (
+from tvm.testing.aot import (
     AOTTestModel,
     AOTTestRunner,
     generate_ref_data,
-    convert_to_relay,
     compile_and_run,
     compile_models,
     parametrize_aot_options,
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
index a45a1eecc12a..c110cd864f7f 100644
--- a/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
+++ b/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
@@ -20,11 +20,9 @@
 import tvm
 import tvm.testing
 from tvm import relay
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
 from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
     AOT_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
 )
 
 
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
index 5e312f3d7057..4a363f02822a 100644
--- a/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
+++ b/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
@@ -20,12 +20,8 @@
 import tvm
 import tvm.testing
 from tvm import relay
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
 
 
 class BasicConv2dTests:
diff --git a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
index bcf82c124860..89f1fb1843b4 100644
--- a/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
+++ b/tests/python/relay/strategy/arm_cpu/test_depthwise_conv2d.py
@@ -20,12 +20,8 @@
 import tvm
 import tvm.testing
 from tvm import relay
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
 
 
 class BasicDepthwiseConv2dTests:
diff --git a/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py b/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
index ef3d695e0f0c..d3f504d04e35 100644
--- a/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
+++ b/tests/python/relay/strategy/arm_cpu/test_group_conv2d.py
@@ -20,12 +20,8 @@
 import tvm
 import tvm.testing
 from tvm import relay
-from tvm.micro.testing.aot_test_utils import (
-    AOTTestModel,
-    AOT_CORSTONE300_RUNNER,
-    generate_ref_data,
-    compile_and_run,
-)
+from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_CORSTONE300_RUNNER
 
 
 class BasicGroupConv2dTests:
diff --git a/tests/python/relay/utils/external_codegen.py b/tests/python/relay/utils/external_codegen.py
index 54132c63d96b..6d3d917ff5a2 100644
--- a/tests/python/relay/utils/external_codegen.py
+++ b/tests/python/relay/utils/external_codegen.py
@@ -104,7 +104,8 @@ def check_aot_executor_result(
     mod, map_inputs, out_shape, result, tol=1e-5, target="llvm", device=tvm.cpu()
 ):
     # Late import to avoid breaking test with USE_MICRO=OFF.
-    from tvm.micro.testing.aot_test_utils import AOTTestModel, AOT_DEFAULT_RUNNER, compile_and_run
+    from tvm.testing.aot import AOTTestModel, compile_and_run
+    from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
 
     interface_api = "packed"
     use_unpacked_api = False

From d2e2cecca109805bdfefa5b5981d0d1f26191344 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 14:34:06 -0700
Subject: [PATCH 04/13] add else case

---
 python/tvm/testing/aot.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index ed2a7339ab5b..cf1bf65142cc 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -651,6 +651,16 @@ def compile_models(
                         compiled_mods.append(
                             AOTCompiledTestModel(model=model, executor_factory=executor_factory)
                         )
+                    else:
+                        executor_factory = tvm.relay.build(
+                            model.module,
+                            tvm.target.Target(target, host=target),
+                            params=model.params,
+                            mod_name=model.name,
+                        )
+                        compiled_mods.append(
+                            AOTCompiledTestModel(model=model, executor_factory=executor_factory)
+                        )
         else:
             with tvm.transform.PassContext(opt_level=3, config=config):
                 # TODO(Mousius) - Remove once executor/runtime are fully removed from Target

From 76c44fd7b97dc66119359fa632ed1d6f92e3f34e Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 14:34:49 -0700
Subject: [PATCH 05/13] address comments

---
 python/tvm/autotvm/task/dispatcher.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/python/tvm/autotvm/task/dispatcher.py b/python/tvm/autotvm/task/dispatcher.py
index a5b0367acb4c..1158edd102b3 100644
--- a/python/tvm/autotvm/task/dispatcher.py
+++ b/python/tvm/autotvm/task/dispatcher.py
@@ -180,6 +180,8 @@ def update(self, target, workload, cfg):
 
 class ApplyFixedConfig(DispatchContext):
     """Apply a config of a deterministic schedule.
+    This is used for building a single Relay operator with deterministic schedule
+    for testing schedules at Relay level.
 
     Parameters
     ----------
@@ -199,7 +201,7 @@ def _query_inside(self, target, workload):
         """Override query"""
         self.workload = workload
 
-        # Creat a config from correct task
+        # Create a config from correct task
         for task in self._tasks:
             if task.name == workload[0]:
                 config = task.config_space.get(0)
@@ -211,7 +213,7 @@ def _query_inside(self, target, workload):
             )
         # Add low cost to the target schedule and high cost to others.
         if workload[0] == self._schedule_name:
-            config.cost = 0.000001
+            config.cost = 1e-6
         else:
             config.cost = 100000
         return config

From 6b25e1992242d44d2947919b31c5bd316c278d30 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 14:49:40 -0700
Subject: [PATCH 06/13] remove comments

---
 .../relay/strategy/arm_cpu/test_conv2d_nchw.py | 18 ------------------
 .../relay/strategy/arm_cpu/test_conv2d_nhwc.py |  5 -----
 2 files changed, 23 deletions(-)

diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
index c110cd864f7f..e88210a59e77 100644
--- a/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
+++ b/tests/python/relay/strategy/arm_cpu/test_conv2d_nchw.py
@@ -94,21 +94,6 @@ def test_conv2d(
         )
 
 
-# TODO(mehrdadh): Add hardware that supports this schedule
-# class TestConv2d_OIHW_int8_large_kernel(BasicConv2dTests):
-#     """This test is for conv2d_nchw_int8.arm_cpu schedule."""
-
-#     data_shape, kernel_size, num_filter, strides, padding, dilation = tvm.testing.parameters(
-#         ((1, 64, 32, 32), (3, 3), 12, 1, 0, 1),
-#         ((1, 128, 32, 32), (3, 3), 12, 1, 0, 1),
-#         ((1, 64, 32, 32), (5, 5), 16, 1, 0, 1),
-#     )
-
-#     dtype = tvm.testing.parameter("int8")
-#     kernel_layout = tvm.testing.parameter("OIHW")
-#     schedule_name = tvm.testing.parameter("conv2d_nchw_int8.arm_cpu")
-
-
 class TestConv2d_OIHW_small_kernel(BasicConv2dTests):
     """This test is for conv2d_nchw_spatial_pack.arm_cpu schedule."""
 
@@ -121,8 +106,5 @@ class TestConv2d_OIHW_small_kernel(BasicConv2dTests):
     schedule_name = tvm.testing.parameter("conv2d_nchw_spatial_pack.arm_cpu")
 
 
-# TODO(mehrdadh): Add test for `OIHW\d*o` layout format for conv2d_nchw_spatial_pack.arm_cpu schedule.
-
-
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__] + sys.argv[1:]))
diff --git a/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py b/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
index 4a363f02822a..f56645d43672 100644
--- a/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
+++ b/tests/python/relay/strategy/arm_cpu/test_conv2d_nhwc.py
@@ -150,10 +150,5 @@ class TestConv2d_HWIO(BasicConv2dTests):
     schedule_name = tvm.testing.parameter("conv2d_nhwc_spatial_pack.arm_cpu")
 
 
-# TODO(mehrdadh): Add test for conv2d_NHWC_quantized_native.arm_cpu
-
-# TODO(mehrdadh): Add test for conv2d_NHWC_quantized_interleaved.arm_cpu
-
-
 if __name__ == "__main__":
     sys.exit(pytest.main([__file__] + sys.argv[1:]))

From a740fc24152a2183e32e360b07e55a1468baf2e0 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 15:18:15 -0700
Subject: [PATCH 07/13] lint

---
 python/tvm/testing/aot.py | 80 +++++++++++++++++----------------------
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index cf1bf65142cc..505584bf66cd 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -14,11 +14,9 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
+"""Common functions for AOT test cases"""
 import sys
 import datetime
-import itertools
-import logging
 import os
 import pathlib
 import re
@@ -27,19 +25,13 @@
 import tarfile
 import tempfile
 from typing import Any, NamedTuple, Union, Optional, List, Dict
-
-import pytest
 import numpy as np
 
-pytest.importorskip("tvm.micro")
-
 import tvm
 from tvm import relay
-from tvm import te
 from tvm import autotvm
 from tvm.contrib import utils, graph_executor
-from tvm.relay.backend import te_compiler, Executor, Runtime
-from tvm.relay.backend.te_compiler import TECompiler
+from tvm.relay.backend import Executor, Runtime
 from tvm.relay.backend.utils import mangle_module_name
 from tvm.micro import export_model_library_format
 from tvm.micro.testing.utils import mlf_extract_workspace_size_bytes
@@ -195,7 +187,8 @@ def _mangle_name(mod_name, name):
 def _emit_data_linkage(output_file, data_linkage):
     if data_linkage is not None:
         output_file.write(
-            f'__attribute__((section("{data_linkage.section}"), aligned({data_linkage.alignment}))) '
+            f'__attribute__((section("{data_linkage.section}"), '
+            f"aligned({data_linkage.alignment}))) "
         )
 
 
@@ -221,12 +214,10 @@ def _emit_main_prologue(
         main_file.write("static uint8_t g_aot_memory[WORKSPACE_SIZE];\n")
         main_file.write("tvm_workspace_t app_workspace;\n")
         main_file.write(
-            """
-            
+            """\n
 tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
     return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-}
-
+}\n
 tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
     return StackMemoryManager_Free(&app_workspace,ptr);
 }
@@ -235,33 +226,27 @@ def _emit_main_prologue(
     else:
         # An implementation is not needed for these if the stack allocator is not used
         main_file.write(
-            """
-            
+            """\n
 tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
     return kTvmErrorFunctionCallNotImplemented;
-}
-
+}\n
 tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
     return kTvmErrorFunctionCallNotImplemented;
-}
-
+}\n
             """
         )
     main_file.write(
-        """
-    
-void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }
-
+        """\n
+void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }\n
 void TVMLogf(const char* msg, ...) {
   va_list args;
   va_start(args, msg);
   vfprintf(stdout, msg, args);
   va_end(args);
-}
-    
+}\n
 TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
 int main(){\n
-    """
+"""
     )
     main_file.write(custom_prologue)
 
@@ -276,8 +261,10 @@ def _emit_main_data(main_file, input_map, output_map, mod_name):
     for key in output_map:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
         main_file.write(
-            f'#include "{_mangle_name(mod_name,"expected_output_data")}_{sanitized_tensor_name}.h"\n'
-            f'#include "{_mangle_name(mod_name,"output_data")}_{sanitized_tensor_name}.h"\n'
+            f'#include "{_mangle_name(mod_name,"expected_output_data")}_'
+            f'{sanitized_tensor_name}.h"\n'
+            f'#include "{_mangle_name(mod_name,"output_data")}_'
+            f'{sanitized_tensor_name}.h"\n'
         )
 
 
@@ -294,7 +281,8 @@ def _emit_main_device_structs(main_file, devices, mod_name):
 def _emit_main_workspace_pool_structs(main_file, workspace_pool_names, mod_name):
     if workspace_pool_names and len(workspace_pool_names) > 0:
         main_file.write(
-            f"struct {_mangle_name(mod_name, 'workspace_pools')} {_mangle_name(mod_name, 'workspace_pools')} = {{"
+            f"struct {_mangle_name(mod_name, 'workspace_pools')} "
+            f"{_mangle_name(mod_name, 'workspace_pools')} = {{"
         )
         for workspace_pool_name in workspace_pool_names:
             main_file.write(f"\t.{workspace_pool_name} = {workspace_pool_name},\n")
@@ -308,7 +296,8 @@ def _emit_main_data_structs(main_file, input_map, output_map, mod_name):
     for key in input_map:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
         main_file.write(
-            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'input_data')}_{sanitized_tensor_name},\n"
+            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'input_data')}_"
+            f"{sanitized_tensor_name},\n"
         )
     main_file.write("};\n")
 
@@ -318,7 +307,8 @@ def _emit_main_data_structs(main_file, input_map, output_map, mod_name):
     for key in output_map:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
         main_file.write(
-            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'output_data')}_{sanitized_tensor_name},\n"
+            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'output_data')}_"
+            f"{sanitized_tensor_name},\n"
         )
     main_file.write("};\n")
 
@@ -359,7 +349,7 @@ def _emit_main_c_interface_call(
 
     main_file_string = ""
     for sub_string in sub_strings:
-        main_file_string += sub_string
+        main_file_string.join(sub_string)
 
     main_file.write(main_file_string)
 
@@ -437,14 +427,13 @@ def _emit_main_compare(main_file, outputs, output_tolerance, mod_name, use_inter
         else:
             actual_data_name = _mangle_name(mod_name, f"output_data_{sanitized_tensor_name}")
         main_file.write(
-            f"""
-            for (int i = 0; i<{data_length_var_name}; i++) {{
-                if ({comparison_function}({actual_data_name}[i]-{expected_data_name}[i]) > {tolerance}) {{
-                    printf("{AOT_FAILURE_TOKEN}\\n");
-                    return -1;
-                }}
-            }}
-            """
+            f"for (int i = 0; i<{data_length_var_name}; i++) {{\n"
+            f"\tif ({comparison_function}({actual_data_name}[i]-"
+            f"{expected_data_name}[i]) > {tolerance}) {{\n"
+            f'\t\tprintf("{AOT_FAILURE_TOKEN}\\n");\n'
+            f"\t\treturn -1;\n"
+            f"\t}}\n"
+            f"}}"
         )
 
 
@@ -553,7 +542,8 @@ def _create_main(
 def _create_header_file(tensor_name, npy_data, output_path, data_linkage):
     """
     This method generates a header file containing the data contained in the numpy array provided.
-    It is used to capture the tensor data (for both inputs and expected outputs) to be bundled into the standalone application.
+    It is used to capture the tensor data (for both inputs and expected outputs)
+    to be bundled into the standalone application.
     """
     file_path = pathlib.Path(f"{output_path}/" + tensor_name).resolve()
     # create header file
@@ -580,11 +570,11 @@ def _convert_to_relay(
     """Convert a tflite model buffer in a Relay module"""
     # TFLite.Model.Model has changed to TFLite.Model from 1.14 to 2.1
     try:
-        import tflite.Model
+        import tflite.Model  # pylint: disable=import-outside-toplevel
 
         tflite_model = tflite.Model.Model.GetRootAsModel(tflite_model_buf, 0)
     except AttributeError:
-        import tflite
+        import tflite  # pylint: disable=import-outside-toplevel
 
         tflite_model = tflite.Model.GetRootAsModel(tflite_model_buf, 0)
     except ImportError:

From 17fac28818a0e3850b7a5d26e901c9001f85b143 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Mon, 16 May 2022 17:15:01 -0700
Subject: [PATCH 08/13] fix import errors

---
 python/tvm/testing/aot.py                   | 2 ++
 tests/python/relay/aot/test_crt_aot.py      | 5 ++---
 tests/python/relay/aot/test_crt_aot_usmp.py | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index 505584bf66cd..bbe4f4caa082 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -24,6 +24,7 @@
 import subprocess
 import tarfile
 import tempfile
+import logging
 from typing import Any, NamedTuple, Union, Optional, List, Dict
 import numpy as np
 
@@ -36,6 +37,7 @@
 from tvm.micro import export_model_library_format
 from tvm.micro.testing.utils import mlf_extract_workspace_size_bytes
 
+_LOG = logging.getLogger(__name__)
 
 NP_TYPE_TO_C = {
     "int8": "int8_t",
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index 1225f1facaf7..d13f8d6e35fb 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -37,16 +37,15 @@
 from tvm.micro import model_library_format as mlf
 from tvm.micro import export_model_library_format
 from tvm.ir.instrument import pass_instrument
-from tvm.testing.aot import create_relay_module_and_inputs_from_tflite_file
 from tvm.micro.testing.aot_test_utils import (
     AOTTestModel,
-    AOT_DEFAULT_RUNNER,
     generate_ref_data,
-    convert_to_relay,
     compile_and_run,
     compile_models,
     parametrize_aot_options,
+    create_relay_module_and_inputs_from_tflite_file,
 )
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
 
 
 def test_error_c_interface_with_packed_api():
diff --git a/tests/python/relay/aot/test_crt_aot_usmp.py b/tests/python/relay/aot/test_crt_aot_usmp.py
index eaff1d9bd17b..60b46d96b555 100644
--- a/tests/python/relay/aot/test_crt_aot_usmp.py
+++ b/tests/python/relay/aot/test_crt_aot_usmp.py
@@ -32,13 +32,13 @@
 from tvm.relay.backend import Executor, Runtime
 from tvm import WorkspaceMemoryPools, PoolInfo
 from tvm.micro import model_library_format as mlf
+from tvm.micro.testing.aot_test_utils import parametrize_aot_options
 from tvm.testing.aot import (
     AOTTestModel,
     AOTTestRunner,
     generate_ref_data,
     compile_and_run,
     compile_models,
-    parametrize_aot_options,
     run_and_check,
     create_relay_module_and_inputs_from_tflite_file,
 )

From 3a42844859c5d3ecefc5890a63df238c067fb5d4 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Tue, 17 May 2022 10:39:21 -0700
Subject: [PATCH 09/13] fix error

---
 python/tvm/testing/aot.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index bbe4f4caa082..a5b7e9f125fa 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -219,7 +219,7 @@ def _emit_main_prologue(
             """\n
 tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
     return StackMemoryManager_Allocate(&app_workspace, num_bytes, out_ptr);
-}\n
+}
 tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
     return StackMemoryManager_Free(&app_workspace,ptr);
 }
@@ -231,15 +231,15 @@ def _emit_main_prologue(
             """\n
 tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) {
     return kTvmErrorFunctionCallNotImplemented;
-}\n
+}
 tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) {
     return kTvmErrorFunctionCallNotImplemented;
-}\n
+}
             """
         )
     main_file.write(
         """\n
-void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }\n
+void TVMPlatformAbort(tvm_crt_error_t code) { exit(-1); }
 void TVMLogf(const char* msg, ...) {
   va_list args;
   va_start(args, msg);
@@ -248,7 +248,7 @@ def _emit_main_prologue(
 }\n
 TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) {}
 int main(){\n
-"""
+    """
     )
     main_file.write(custom_prologue)
 
@@ -298,8 +298,8 @@ def _emit_main_data_structs(main_file, input_map, output_map, mod_name):
     for key in input_map:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
         main_file.write(
-            f"\t.{sanitized_tensor_name} = {_mangle_name(mod_name, 'input_data')}_"
-            f"{sanitized_tensor_name},\n"
+            f"\t.{sanitized_tensor_name} = "
+            f"{_mangle_name(mod_name, 'input_data')}_{sanitized_tensor_name},\n"
         )
     main_file.write("};\n")
 
@@ -318,13 +318,11 @@ def _emit_main_data_structs(main_file, input_map, output_map, mod_name):
 def _emit_main_data_setup(main_file, input_map, output_map, mod_name):
     num_outputs = len(output_map)
     num_inputs = len(input_map)
-
     main_file.write(f'void* {_mangle_name(mod_name,"inputs")}[{num_inputs}] = {{ ')
     for key in input_map:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
         main_file.write(f'{_mangle_name(mod_name,"input_data")}_{sanitized_tensor_name}, ')
     main_file.write("};\n")
-
     main_file.write(f'void* {_mangle_name(mod_name,"outputs")}[{num_outputs}]  = {{ ')
     for key in output_map:
         sanitized_tensor_name = re.sub(r"\W", "_", key)
@@ -349,10 +347,7 @@ def _emit_main_c_interface_call(
     # Adding brackets and newline instead
     sub_strings[-1] = sub_strings[-1] + ");\n"
 
-    main_file_string = ""
-    for sub_string in sub_strings:
-        main_file_string.join(sub_string)
-
+    main_file_string = "".join(sub_strings)
     main_file.write(main_file_string)
 
 
@@ -776,7 +771,7 @@ def run_and_check_body(base_path):
 
         # Verify that compiles fine
         file_dir = os.path.dirname(os.path.abspath(__file__))
-        makefile_dir = os.path.join(file_dir, "../../../../tests/python/relay/aot")
+        makefile_dir = os.path.join(file_dir, "../../../tests/python/relay/aot")
         codegen_path = os.path.join(base_path, "codegen")
         makefile = os.path.join(makefile_dir, f"{runner.makefile}.mk")
         fvp_dir = "/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4/"
@@ -789,7 +784,7 @@ def run_and_check_body(base_path):
         make_command = (
             f"make -f {makefile} build_dir={build_path}"
             + f" CFLAGS='{cflags}'"
-            + f" TVM_ROOT={file_dir}/../../../.."
+            + f" TVM_ROOT={file_dir}/../../.."
             + f" AOT_TEST_ROOT={makefile_dir}"
             + f" CODEGEN_ROOT={codegen_path}"
             + f" STANDALONE_CRT_DIR={tvm.micro.get_standalone_crt_dir()}"

From 002682feea2432682048fa21d298f41b3e70c589 Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Tue, 17 May 2022 14:00:24 -0700
Subject: [PATCH 10/13] fix imports

---
 tests/python/relay/aot/test_c_device_api.py | 6 ++----
 tests/python/relay/aot/test_cpp_aot.py      | 4 +---
 tests/python/relay/aot/test_crt_aot.py      | 5 ++---
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/tests/python/relay/aot/test_c_device_api.py b/tests/python/relay/aot/test_c_device_api.py
index c53bbc50d991..5b94a9841367 100644
--- a/tests/python/relay/aot/test_c_device_api.py
+++ b/tests/python/relay/aot/test_c_device_api.py
@@ -24,10 +24,8 @@
 
 from tvm import relay
 from tvm.ir.module import IRModule
-from tvm.testing.aot import AOTTestModel, compile_and_run, generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_DEFAULT_RUNNER,
-)
+from tvm.testing.aot import AOTTestModel, generate_ref_data
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
 
 
 @pytest.fixture
diff --git a/tests/python/relay/aot/test_cpp_aot.py b/tests/python/relay/aot/test_cpp_aot.py
index 16a24266df46..4a12678a79d9 100644
--- a/tests/python/relay/aot/test_cpp_aot.py
+++ b/tests/python/relay/aot/test_cpp_aot.py
@@ -28,9 +28,7 @@
 from tvm import relay
 from tvm.relay import backend, testing
 from tvm.testing.aot import generate_ref_data
-from tvm.micro.testing.aot_test_utils import (
-    AOT_DEFAULT_RUNNER,
-)
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
 
 
 def test_error_c_interface():
diff --git a/tests/python/relay/aot/test_crt_aot.py b/tests/python/relay/aot/test_crt_aot.py
index d13f8d6e35fb..d1d80d434b6a 100644
--- a/tests/python/relay/aot/test_crt_aot.py
+++ b/tests/python/relay/aot/test_crt_aot.py
@@ -37,15 +37,14 @@
 from tvm.micro import model_library_format as mlf
 from tvm.micro import export_model_library_format
 from tvm.ir.instrument import pass_instrument
-from tvm.micro.testing.aot_test_utils import (
+from tvm.testing.aot import (
     AOTTestModel,
     generate_ref_data,
     compile_and_run,
     compile_models,
-    parametrize_aot_options,
     create_relay_module_and_inputs_from_tflite_file,
 )
-from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
+from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER, parametrize_aot_options
 
 
 def test_error_c_interface_with_packed_api():

From 0c879e3ba93f1204f78cdd341180c3779ba8fa9f Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Tue, 17 May 2022 16:11:15 -0700
Subject: [PATCH 11/13] fix import

---
 tests/python/relay/aot/test_c_device_api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python/relay/aot/test_c_device_api.py b/tests/python/relay/aot/test_c_device_api.py
index 5b94a9841367..3c7db62890f5 100644
--- a/tests/python/relay/aot/test_c_device_api.py
+++ b/tests/python/relay/aot/test_c_device_api.py
@@ -24,7 +24,7 @@
 
 from tvm import relay
 from tvm.ir.module import IRModule
-from tvm.testing.aot import AOTTestModel, generate_ref_data
+from tvm.testing.aot import AOTTestModel, generate_ref_data, compile_models
 from tvm.micro.testing.aot_test_utils import AOT_DEFAULT_RUNNER
 
 

From bd6efebfde65af36f77275b8cf4aead471a7dddc Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Wed, 18 May 2022 16:04:47 -0700
Subject: [PATCH 12/13] Address comments

---
 python/tvm/autotvm/task/dispatcher.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/python/tvm/autotvm/task/dispatcher.py b/python/tvm/autotvm/task/dispatcher.py
index 1158edd102b3..a750bcd3a36e 100644
--- a/python/tvm/autotvm/task/dispatcher.py
+++ b/python/tvm/autotvm/task/dispatcher.py
@@ -31,6 +31,8 @@
 from __future__ import absolute_import as _abs
 
 import logging
+import typing
+from typing import Union
 
 import numpy as np
 
@@ -187,13 +189,18 @@ class ApplyFixedConfig(DispatchContext):
     ----------
     tasks : list[tvm.autotvm.task.task.Task]
         List of autoTVM tasks.
-    schedule_name : str
-        Name of schedule to use.
+    schedule_names : str, List[str]
+        Name of schedules to use.
     """
 
-    def __init__(self, tasks, schedule_name: str):
+    def __init__(self, tasks, schedule_names: Union[str, typing.List[str]]):
         super(ApplyFixedConfig, self).__init__()
-        self._schedule_name = schedule_name
+        if isinstance(schedule_names, str):
+            self._schedule_names = list(schedule_names)
+        elif isinstance(schedule_names, list):
+            self._schedule_names = schedule_names
+        else:
+            raise RuntimeError("Incorrect type: " + schedule_names)
         self._tasks = tasks
         self.workload = None
 
@@ -212,7 +219,7 @@ def _query_inside(self, target, workload):
                 "workload: %s does not exist in %s" % (str(workload), str(self._tasks))
             )
         # Add low cost to the target schedule and high cost to others.
-        if workload[0] == self._schedule_name:
+        if workload[0] in self._schedule_names:
             config.cost = 1e-6
         else:
             config.cost = 100000

From 70dadcd825fc79b25beea5f3f2b501698b3fdf9f Mon Sep 17 00:00:00 2001
From: Mehrdad Hessar <mhessar@octoml.ai>
Date: Thu, 19 May 2022 13:47:24 -0700
Subject: [PATCH 13/13] fix import

---
 python/tvm/testing/aot.py                          | 4 ++--
 tests/python/contrib/test_cmsisnn/test_networks.py | 4 ++--
 tests/python/contrib/test_ethosu/test_networks.py  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/tvm/testing/aot.py b/python/tvm/testing/aot.py
index a5b7e9f125fa..f8f170366ac5 100644
--- a/python/tvm/testing/aot.py
+++ b/python/tvm/testing/aot.py
@@ -561,7 +561,7 @@ def _create_header_file(tensor_name, npy_data, output_path, data_linkage):
         header_file.write("};\n\n")
 
 
-def _convert_to_relay(
+def convert_to_relay(
     tflite_model_buf,
 ):
     """Convert a tflite model buffer in a Relay module"""
@@ -909,7 +909,7 @@ def create_relay_module_and_inputs_from_tflite_file(tflite_model_file):
     and params from a tflite file"""
     with open(tflite_model_file, "rb") as f:
         tflite_model_buf = f.read()
-    mod, params = _convert_to_relay(tflite_model_buf)
+    mod, params = convert_to_relay(tflite_model_buf)
 
     inputs = dict()
     for param in mod["main"].params:
diff --git a/tests/python/contrib/test_cmsisnn/test_networks.py b/tests/python/contrib/test_cmsisnn/test_networks.py
index 10edd01a867e..fefce9e86c2d 100644
--- a/tests/python/contrib/test_cmsisnn/test_networks.py
+++ b/tests/python/contrib/test_cmsisnn/test_networks.py
@@ -35,7 +35,7 @@
 )
 
 
-def convert_to_relay(
+def _convert_to_relay(
     tflite_model_buf,
     input_data,
     input_node,
@@ -93,7 +93,7 @@ def test_cnn_small(test_runner):
     rng = np.random.default_rng(12345)
     input_data = rng.integers(in_min, high=in_max, size=input_shape, dtype=dtype)
 
-    orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input")
+    orig_mod, params = _convert_to_relay(tflite_model_buf, input_data, "input")
     cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)
 
     # validate CMSIS-NN output against CPU output
diff --git a/tests/python/contrib/test_ethosu/test_networks.py b/tests/python/contrib/test_ethosu/test_networks.py
index 3adc75cc0ee9..b91168b7bbe6 100644
--- a/tests/python/contrib/test_ethosu/test_networks.py
+++ b/tests/python/contrib/test_ethosu/test_networks.py
@@ -24,7 +24,7 @@
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tvm.micro import model_library_format as mlf
 
-from tvm.micro.testing.aot_test_utils import convert_to_relay
+from tvm.testing.aot import convert_to_relay
 
 from . import infra