From 2abce99330cd0edd8c2567da315e25be28c1b81d Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Thu, 26 May 2022 08:48:29 +0900 Subject: [PATCH 1/4] Allow dynamic shmem of size > 48K --- src/runtime/cuda/cuda_module.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/runtime/cuda/cuda_module.cc b/src/runtime/cuda/cuda_module.cc index 7d6879a62aba..2471b3110b1a 100644 --- a/src/runtime/cuda/cuda_module.cc +++ b/src/runtime/cuda/cuda_module.cc @@ -164,11 +164,18 @@ class CUDAWrappedFunc { void operator()(TVMArgs args, TVMRetValue* rv, void** void_args) const { int device_id; CUDA_CALL(cudaGetDevice(&device_id)); + ThreadWorkLoad wl = launch_param_config_.Extract(args); + if (fcache_[device_id] == nullptr) { fcache_[device_id] = m_->GetFunc(device_id, func_name_); + if (wl.dyn_shmem_size >= (48 << 10)) { + // Assumption: dyn_shmem_size doesn't change across different invocations of + // fcache_[device_id] + cuFuncSetAttribute(fcache_[device_id], CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, + wl.dyn_shmem_size); + } } CUstream strm = static_cast(CUDAThreadEntry::ThreadLocal()->stream); - ThreadWorkLoad wl = launch_param_config_.Extract(args); CUresult result = cuLaunchKernel(fcache_[device_id], wl.grid_dim(0), wl.grid_dim(1), wl.grid_dim(2), wl.block_dim(0), wl.block_dim(1), wl.block_dim(2), wl.dyn_shmem_size, strm, void_args, nullptr); From 5b52cc91d55610b3271b903da22078a53e02bee6 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Fri, 27 May 2022 06:40:23 +0900 Subject: [PATCH 2/4] add error msg --- src/runtime/cuda/cuda_module.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/runtime/cuda/cuda_module.cc b/src/runtime/cuda/cuda_module.cc index 2471b3110b1a..739875fe850f 100644 --- a/src/runtime/cuda/cuda_module.cc +++ b/src/runtime/cuda/cuda_module.cc @@ -171,8 +171,12 @@ class CUDAWrappedFunc { if (wl.dyn_shmem_size >= (48 << 10)) { // Assumption: dyn_shmem_size doesn't change across different invocations of // fcache_[device_id] - cuFuncSetAttribute(fcache_[device_id], CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, - wl.dyn_shmem_size); + CUresult result = cuFuncSetAttribute( + fcache_[device_id], CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, wl.dyn_shmem_size); + if (result != CUDA_SUCCESS) { + LOG(FATAL) << "Failed to set the allowed dynamic shared memory size to " + << wl.dyn_shmem_size; + } } } CUstream strm = static_cast(CUDAThreadEntry::ThreadLocal()->stream); From 10f01818598d5c03c896ae70fa5d7f2167b82606 Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Fri, 27 May 2022 19:48:37 +0900 Subject: [PATCH 3/4] skip cascader test --- tests/python/contrib/test_ethosu/cascader/test_scheduler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/contrib/test_ethosu/cascader/test_scheduler.py b/tests/python/contrib/test_ethosu/cascader/test_scheduler.py index c97cfeb7a991..b3610315441e 100644 --- a/tests/python/contrib/test_ethosu/cascader/test_scheduler.py +++ b/tests/python/contrib/test_ethosu/cascader/test_scheduler.py @@ -48,6 +48,7 @@ def test_cascade(SRAM, FLASH, TwoConv2DWithSliceTE, TwoConv2DTE, MobileNetv1Star cs.cascade(sch, te_graph, const_dict, options, SRAM, FLASH, [SRAM], device_config) +@pytest.mark.skip(reason="See https://github.com/apache/tvm/issues/11483") def test_compute_cycles_annotation(SRAM, FLASH, TwoConv2DTE): device_config = cs.EthosuDeviceConfig("ethos-u55-256") options = infra.make_options( From 4aa41f86e106b0d73467dfb633146729bb888d1f Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Fri, 27 May 2022 19:48:22 +0900 Subject: [PATCH 4/4] suppres logging --- python/tvm/contrib/cc.py | 3 --- python/tvm/contrib/nvcc.py | 2 -- 2 files changed, 5 deletions(-) diff --git a/python/tvm/contrib/cc.py b/python/tvm/contrib/cc.py index ec40ef3189d1..867cbd601256 100644 --- a/python/tvm/contrib/cc.py +++ b/python/tvm/contrib/cc.py @@ -19,7 +19,6 @@ import sys import os import subprocess -import logging from .._ffi.base import py_str @@ -239,7 +238,6 @@ def _linux_compile(output, objects, options, compile_cmd, compile_shared=False): cmd += objects if options: cmd += options - logging.info("invoking '%s'", cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = proc.communicate() if proc.returncode != 0: @@ -266,7 +264,6 @@ def _windows_compile(output, objects, options): cmd += options try: - logging.info("invoking '%s'", cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = proc.communicate() except FileNotFoundError: diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py index 33a32c9c0047..5a104be9966d 100644 --- a/python/tvm/contrib/nvcc.py +++ b/python/tvm/contrib/nvcc.py @@ -21,7 +21,6 @@ import subprocess import os import warnings -import logging import tvm._ffi from tvm.target import Target @@ -103,7 +102,6 @@ def compile_cuda(code, target_format="ptx", arch=None, options=None, path_target # if cxx_compiler_path != "": # cmd += ["-ccbin", cxx_compiler_path] - logging.info("invoking '%s'", cmd) proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) (out, _) = proc.communicate()