apache · spectrometerHBH · Apr 25, 2026 · Apr 25, 2026 · Apr 25, 2026
diff --git a/docs/arch/index.rst b/docs/arch/index.rst
@@ -151,19 +151,19 @@ The main goal of TVM's runtime is to provide a minimal API for loading and execu
     # Example runtime execution program in python, with type annotated
     mod: tvm.runtime.Module = tvm.runtime.load_module("compiled_artifact.so")
     arr: tvm.runtime.Tensor = tvm.runtime.tensor([1, 2, 3], device=tvm.cuda(0))
-    fun: tvm.runtime.PackedFunc = mod["addone"]
+    fun: tvm_ffi.Function = mod["addone"]
     fun(arr)
     print(arr.numpy())
 
 
-:py:class:`tvm.runtime.Module` encapsulates the result of compilation. A runtime.Module contains a GetFunction method to obtain PackedFuncs by name.
+:py:class:`tvm.runtime.Module` encapsulates the result of compilation. A runtime.Module contains a GetFunction method to obtain :py:class:`tvm_ffi.Function` instances by name.
 
-:py:class:`tvm.runtime.PackedFunc` is a type-erased function interface for both the generated functions. A runtime.PackedFunc can take arguments and return values with the
-following types: POD types(int, float), string, runtime.PackedFunc, runtime.Module, runtime.Tensor, and other sub-classes of runtime.Object.
+:py:class:`tvm_ffi.Function` is a type-erased function interface for both the generated functions. A tvm_ffi.Function can take arguments and return values with the
+following types: POD types(int, float), string, tvm_ffi.Function, runtime.Module, runtime.Tensor, and other sub-classes of runtime.Object.
 
-:py:class:`tvm.runtime.Module` and :py:class:`tvm.runtime.PackedFunc` are powerful mechanisms to modularize the runtime. For example, to get the above `addone` function on CUDA, we can use LLVM to generate the host-side code to compute the launching parameters(e.g. size of the thread groups) and then call into another PackedFunc from a CUDAModule that is backed by the CUDA driver API. The same mechanism can be used for OpenCL kernels.
+:py:class:`tvm.runtime.Module` and :py:class:`tvm_ffi.Function` are powerful mechanisms to modularize the runtime. For example, to get the above `addone` function on CUDA, we can use LLVM to generate the host-side code to compute the launching parameters(e.g. size of the thread groups) and then call into another tvm_ffi.Function from a CUDAModule that is backed by the CUDA driver API. The same mechanism can be used for OpenCL kernels.
 
-The above example only deals with a simple `addone` function. The code snippet below gives an example of an end-to-end model execution using the Relax Virtual Machine, which is built on the same runtime.Module and runtime.PackedFunc interface:
+The above example only deals with a simple `addone` function. The code snippet below gives an example of an end-to-end model execution using the Relax Virtual Machine, which is built on the same runtime.Module and tvm_ffi.Function interface:
 
 .. code-block:: python
 
@@ -434,4 +434,3 @@ and then integrate it into the IRModule.
 
 While possible to construct operators directly via TensorIR or tensor expressions (TE) for each use case, it is tedious to do so.
 `topi` (Tensor operator inventory) provides a set of pre-defined operators defined by numpy and found in common deep learning workloads.
-
diff --git a/docs/how_to/tutorials/cross_compilation_and_rpc.py b/docs/how_to/tutorials/cross_compilation_and_rpc.py
@@ -97,6 +97,7 @@
 # Here we will declare a simple kernel on the local machine:
 
 import numpy as np
+import tvm_ffi
 
 import tvm
 from tvm import rpc, te
@@ -481,7 +482,7 @@ def forward(self, data: torch.Tensor) -> torch.Tensor:
     output = vm.get_outputs("main")
 
     # Extract result (handle both tuple and single tensor outputs)
-    if isinstance(output, tvm.ir.Array) and len(output) > 0:
+    if isinstance(output, tvm_ffi.Array) and len(output) > 0:
         result = output[0]
     else:
         result = output

diff --git a/docs/how_to/tutorials/export_and_load_executable.py b/docs/how_to/tutorials/export_and_load_executable.py
@@ -62,6 +62,8 @@
 # model is exported to a :py:class:`torch.export.ExportedProgram` and then
 # translated into a Relax ``IRModule``.
 
+import tvm_ffi
+
 import tvm
 from tvm import relax
 from tvm.relax.frontend.torch import from_exported_program
@@ -174,7 +176,7 @@ def forward(self, data: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
     # TVM returns Array objects for tuple outputs, access via indexing.
     # For models imported from PyTorch, outputs are typically tuples (even for single outputs).
     # For ONNX models, outputs may be a single Tensor directly.
-    if isinstance(tvm_output, tvm.ir.Array) and len(tvm_output) > 0:
+    if isinstance(tvm_output, tvm_ffi.Array) and len(tvm_output) > 0:
         result_tensor = tvm_output[0]
     else:
         result_tensor = tvm_output
@@ -263,7 +265,7 @@ def forward(self, data: torch.Tensor) -> torch.Tensor:  # type: ignore[override]
 #
 #    # Step 6: Extract result (output may be tuple or single Tensor)
 #    # PyTorch models typically return tuples, ONNX models may return a single Tensor
-#    if isinstance(output, tvm.ir.Array) and len(output) > 0:
+#    if isinstance(output, tvm_ffi.Array) and len(output) > 0:
 #        result_tensor = output[0]
 #    else:
 #        result_tensor = output

diff --git a/docs/how_to/tutorials/optimize_llm.py b/docs/how_to/tutorials/optimize_llm.py
@@ -61,13 +61,14 @@
 from pathlib import Path
 from pprint import pprint
 
+from tvm_ffi import Shape
+
 import tvm
 from tvm import relax, te, tirx
 from tvm.relax import register_pipeline
 from tvm.relax.frontend import nn
 from tvm.relax.frontend.nn import Tensor, op
 from tvm.relax.frontend.nn.llm.kv_cache import PagedKVCache, TIRPagedKVCache
-from tvm.runtime import ShapeTuple
 from tvm.s_tir import dlight
 
 ######################################################################
@@ -534,10 +535,10 @@ def _pipeline(mod: tvm.ir.IRModule, _ctx: tvm.transform.PassContext) -> tvm.ir.I
 
 if not IS_IN_CI:
     kv_cache = vm["create_tir_paged_kv_cache"](
-        ShapeTuple([1]),  # max_batch_size=1
-        ShapeTuple([2048]),  # max_total_seq_len=2048
-        ShapeTuple([2048]),  # prefill_chunk_size=2048
-        ShapeTuple([16]),  # page_size=16
+        Shape([1]),  # max_batch_size=1
+        Shape([2048]),  # max_total_seq_len=2048
+        Shape([2048]),  # prefill_chunk_size=2048
+        Shape([16]),  # page_size=16
     )
 
 
@@ -553,7 +554,7 @@ def _pipeline(mod: tvm.ir.IRModule, _ctx: tvm.transform.PassContext) -> tvm.ir.I
 def embed(tokens, params):
     _embed = vm["embed"](tokens, params)
     # Reshape hidden from [seq_len, hidden_size] to [1, seq_len, hidden_size]
-    _embed = nd_view_func(_embed, ShapeTuple([1, _embed.shape[0], _embed.shape[1]]))
+    _embed = nd_view_func(_embed, Shape([1, _embed.shape[0], _embed.shape[1]]))
     return _embed
 
 
@@ -575,7 +576,7 @@ def embed(tokens, params):
     seq_id = 0
     add_sequence_func(kv_cache, seq_id)
     hidden_states = embed(tokens, params)
-    begin_forward_func(kv_cache, ShapeTuple([seq_id]), ShapeTuple([input_len]))
+    begin_forward_func(kv_cache, Shape([seq_id]), Shape([input_len]))
     logits, kv_cache = vm["prefill"](hidden_states, kv_cache, params)
     end_forward_func(kv_cache)
 
@@ -611,7 +612,7 @@ def sample_token(logits):
     while last_token != tokenizer.eos_token_id:
         tokens = tvm.runtime.tensor(np.array([last_token]).astype("int32"), device=dev)
         hidden_states = embed(tokens, params)
-        begin_forward_func(kv_cache, ShapeTuple([seq_id]), ShapeTuple([1]))
+        begin_forward_func(kv_cache, Shape([seq_id]), Shape([1]))
         logits, kv_cache = vm["decode"](hidden_states, kv_cache, params)
 
         end_forward_func(kv_cache)

diff --git a/python/tvm/__init__.py b/python/tvm/__init__.py
@@ -42,7 +42,6 @@
 from .ir import IRModule
 from .ir import transform
 from .ir import instrument
-from .ir import container
 from . import ir
 
 # tvm.tirx

diff --git a/python/tvm/contrib/cutlass/gen_tensor_op.py b/python/tvm/contrib/cutlass/gen_tensor_op.py
@@ -473,7 +473,7 @@ def instantiate_template(func_name, annotations, func_args):
     func_name: str
         A string to identify the type of the kernel (dense/matmul, batched_matmul, or conv2d).
 
-    annotations: container.Map
+    annotations: tvm_ffi.Map
         Key and value pairs annotated during kernel selection.
 
     func_args: list

diff --git a/python/tvm/exec/disco_worker.py b/python/tvm/exec/disco_worker.py
@@ -22,10 +22,11 @@
 import sys
 from collections.abc import Callable
 
-from tvm_ffi import get_global_func, register_global_func
+from tvm_ffi import Shape, get_global_func, register_global_func
+from tvm_ffi.core import String
 
 import tvm
-from tvm.runtime import ShapeTuple, String, Tensor, tensor
+from tvm.runtime import Tensor, tensor
 
 
 @register_global_func("tests.disco.add_one", override=True)
@@ -55,9 +56,9 @@ def _str_obj_func(x: str):
 
 
 @register_global_func("tests.disco.shape_tuple", override=True)
-def _shape_tuple_func(x: ShapeTuple):
-    assert isinstance(x, ShapeTuple)
-    return ShapeTuple(list(x) + [4, 5])
+def _shape_tuple_func(x: Shape):
+    assert isinstance(x, Shape)
+    return Shape(list(x) + [4, 5])
 
 
 @register_global_func("tests.disco.test_callback", override=True)

diff --git a/python/tvm/ir/__init__.py b/python/tvm/ir/__init__.py
@@ -32,7 +32,6 @@
     structural_equal,
     structural_hash,
 )
-from .container import Array, Map
 from .expr import BaseExpr, GlobalVar, PrimExpr, Range, RelaxExpr
 from .function import BaseFunc, CallingConv
 from .global_info import GlobalInfo, DummyGlobalInfo, VDevice

diff --git a/python/tvm/ir/container.py b/python/tvm/ir/container.py
diff --git a/python/tvm/ir/supply.py b/python/tvm/ir/supply.py
@@ -18,7 +18,6 @@
 
 import tvm_ffi
 
-import tvm
 from tvm import IRModule, Object
 
 from . import _ffi_api
@@ -100,7 +99,7 @@ def __init__(self, value=None):
             self.__init_handle_by_constructor__(_ffi_api.GlobalVarSupply_NameSupply, name_supply)
         elif isinstance(value, NameSupply):
             self.__init_handle_by_constructor__(_ffi_api.GlobalVarSupply_NameSupply, value)
-        elif isinstance(value, list | tvm.container.Array):
+        elif isinstance(value, list | tvm_ffi.Array):
             self.__init_handle_by_constructor__(_ffi_api.GlobalVarSupply_IRModules, value)
         elif isinstance(value, IRModule):
             self.__init_handle_by_constructor__(_ffi_api.GlobalVarSupply_IRModule, value)

diff --git a/python/tvm/relax/base_py_module.py b/python/tvm/relax/base_py_module.py
@@ -22,11 +22,12 @@
 from typing import Any, Optional, Union
 
 import numpy as np
+from tvm_ffi import Function
 
 import tvm
 from tvm import relax, tirx
 from tvm.ir import IRModule
-from tvm.runtime import Device, PackedFunc, Tensor
+from tvm.runtime import Device, Tensor
 from tvm.target import Target
 
 try:
@@ -100,8 +101,8 @@ def _getattr_python_function(name: str) -> Any:
 
         self.__getattr__ = _getattr_python_function
 
-        self.compiled_tir_funcs: dict[str, PackedFunc] = {}
-        self.extern_funcs: dict[str, PackedFunc] = {}
+        self.compiled_tir_funcs: dict[str, Function] = {}
+        self.extern_funcs: dict[str, Function] = {}
         self.tir_func_names: list[str] = []
         self.relax_func_names: list[str] = []
         self.relax_vm: relax.VirtualMachine | None = None
@@ -450,7 +451,7 @@ def _convert_single_tvm_to_pytorch(self, tvm_tensor: Any) -> "torch.Tensor":
             numpy_array = tvm_tensor.numpy()
             return torch.from_numpy(numpy_array)
 
-    def get_function(self, name: str) -> PackedFunc | None:
+    def get_function(self, name: str) -> Function | None:
         """Get a compiled function by name."""
         if name in self.compiled_tir_funcs:
             return self.compiled_tir_funcs[name]

diff --git a/python/tvm/relax/block_builder.py b/python/tvm/relax/block_builder.py
@@ -297,7 +297,7 @@ def _normalize_python_tuple(self, expr: Expr | Sequence[Expr]):
         called with python `list` or `tuple` objects.  These objects
         should be converted to `relax.Tuple` prior to calling an FFI
         function, as they would otherwise be converted to
-        `tvm.runtime.Array`.  In addition, any nested tuple objects
+        `tvm_ffi.Array`.  In addition, any nested tuple objects
         should be converted.
         """
         if isinstance(expr, list | tuple):

diff --git a/python/tvm/relax/distributed/global_info.py b/python/tvm/relax/distributed/global_info.py
@@ -18,10 +18,10 @@
 """Global Info Data structures for distributed tensor."""
 
 import tvm_ffi
+from tvm_ffi import Shape
 
 from tvm.ir import Range
 from tvm.ir.global_info import GlobalInfo
-from tvm.runtime import ShapeTuple
 
 from . import _ffi_api as ffi
 
@@ -33,27 +33,27 @@ class DeviceMesh(GlobalInfo):
 
     Parameters
     ----------
-    shape: Union[ShapeTuple, List[int], Tuple[int]]
+    shape: Union[Shape, List[int], Tuple[int]]
         Logical shape of device mesh
     device_ids: Union[List[int], Range]
         Represents the device id in the mesh
     """
 
-    def __init__(self, shape: ShapeTuple | list[int] | tuple[int], device_ids: list[int] | Range):
-        if isinstance(shape, list | tuple):
-            shape = ShapeTuple(shape)
+    def __init__(self, shape: Shape | list[int] | tuple[int], device_ids: list[int] | Range):
+        if not isinstance(shape, Shape):
+            shape = Shape(shape)
         device_range = None
         if isinstance(device_ids, Range):
             device_range = device_ids
             device_ids = []
         self.__init_handle_by_constructor__(ffi.DeviceMesh, shape, device_ids, device_range)  # type: ignore
 
 
-def device_mesh(shape: ShapeTuple, device_ids: list[int] | Range) -> DeviceMesh:
+def device_mesh(shape: Shape, device_ids: list[int] | Range) -> DeviceMesh:
     """Create a device mesh expression.
     Parameters
     ----------
-    shape : ShapeTuple
+    shape : Shape
         The shape of the device mesh.
     device_ids: Union[List[int], Range]
         Represents the device id in the mesh

diff --git a/python/tvm/relax/dpl/pattern.py b/python/tvm/relax/dpl/pattern.py
@@ -23,9 +23,9 @@
 from typing import Union
 
 import tvm_ffi
+from tvm_ffi import Array
 
 import tvm
-from tvm.ir.container import Array
 from tvm.ir.expr import PrimExpr
 from tvm.ir.op import Op
 
@@ -848,15 +848,15 @@ def is_shape(shape: list[tvm.ir.PrimExpr]) -> "PrimArrPattern":
     Raises
     ------
     ValueError
-        If the argument shape is not a list/tuple/tvm.ir.Array
+        If the argument shape is not a list/tuple/tvm_ffi.Array
 
     Note
     ----
     The difference between p.has_shape(s) and is_shape(s) is that: has_shape
     puts assumptions on the shape of the tensor matched by pattern p. While
     is_shape directly matches the shape (an array of PrimExpr).
     """
-    if not isinstance(shape, list | tuple | tvm.ir.Array):
+    if not isinstance(shape, list | tuple | Array):
         raise ValueError("is_shape takes a list or tuple as input.")
     return PrimArrPattern(shape)
 

diff --git a/python/tvm/relax/exec_builder.py b/python/tvm/relax/exec_builder.py
@@ -21,9 +21,9 @@
 from enum import IntEnum
 
 import tvm_ffi
+from tvm_ffi import Shape
 
 import tvm
-from tvm.runtime.container import ShapeTuple
 
 from . import _ffi_api
 from .vm_build import VMExecutable
@@ -121,10 +121,10 @@ def emit_call(
         if args is not None:
             for arg in args:
                 if isinstance(arg, tuple):
-                    shape_tuple = ShapeTuple(arg)
+                    shape_tuple = Shape(arg)
                     new_arg = self.convert_constant(shape_tuple)
                     args_.append(new_arg)
-                elif isinstance(arg, tvm.runtime.Tensor | tvm.DataType | ShapeTuple):
+                elif isinstance(arg, tvm.runtime.Tensor | tvm.DataType | Shape):
                     new_arg = self.convert_constant(arg)
                     args_.append(new_arg)
                 else: