diff --git a/include/tvm/runtime/debug.h b/include/tvm/runtime/debug.h new file mode 100644 index 000000000000..29d812b74dd8 --- /dev/null +++ b/include/tvm/runtime/debug.h @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file tvm/runtime/debug.h + * \brief Helpers for debugging at runtime. + */ +#ifndef TVM_RUNTIME_DEBUG_H_ +#define TVM_RUNTIME_DEBUG_H_ + +#include +#include + +#include +#include + +namespace tvm { +namespace runtime { + +/*! + * \brief Helpers to describe runtime objects in human-friendly form. For \p nd_arrays we show their + * shapes and dtypes, but also their contents if 'small' and on the \p host_device (mostly so that + * we can see dynamic shapes as they are computed). For \p adts we show the ADT fields. For + * \p objects we dispatch to one of the above as appropriate. + */ +void AppendNDArray(std::ostream& os, const NDArray& nd_array, const DLDevice& host_device, + bool show_content = true); +void AppendADT(std::ostream& os, const ADT& adt, const DLDevice& host_device, + bool show_content = true); +void AppendRuntimeObject(std::ostream& os, const ObjectRef& object, const DLDevice& host_device, + bool show_content = true); +std::string RuntimeObject2String(const ObjectRef& object, const DLDevice& host_device, + bool show_content = true); + +} // namespace runtime +} // namespace tvm + +#endif // TVM_RUNTIME_DEBUG_H_ diff --git a/python/tvm/relay/op/tensor.py b/python/tvm/relay/op/tensor.py index 20b883ba2616..963bb3d55693 100644 --- a/python/tvm/relay/op/tensor.py +++ b/python/tvm/relay/op/tensor.py @@ -1178,8 +1178,18 @@ def copy(data): @script -def _copy_shape_func(data_shape): - return data_shape +def _copy_shape_func_tensor(data_shape): + ndim = data_shape.shape[0] + out = output_tensor((ndim,), "int64") + for i in const_range(ndim): + out[i] = data_shape[i] + return out + + +@script +def _copy_shape_func_scalar(data_shape): + out = output_tensor((), "int64") + return out @reg.register_shape_func("copy", False) @@ -1187,7 +1197,10 @@ def copy_shape_func(attrs, inputs, _): """ Shape function for copy op. """ - return [_copy_shape_func(inputs[0])] + input = inputs[0] + if len(input.shape) == 0: + return [_copy_shape_func_scalar(input)] + return [_copy_shape_func_tensor(input)] def device_copy(data, src_device, dst_device): diff --git a/src/relay/backend/te_compiler.cc b/src/relay/backend/te_compiler.cc index 901661dd87a3..3ff6076473f1 100644 --- a/src/relay/backend/te_compiler.cc +++ b/src/relay/backend/te_compiler.cc @@ -350,7 +350,7 @@ class TECompilerImpl : public TECompilerNode { // implement lowered shape func CCacheValue LowerShapeFuncInternal(const CCacheKey& key) { - VLOG(1) << "lowering dynamic shape function:" << std::endl + VLOG(1) << "lowering dynamic shape function for:" << std::endl << PrettyPrint(key->source_func) << std::endl << "for target:" << std::endl << key->target->ToDebugString(); diff --git a/src/relay/backend/te_compiler_cache.cc b/src/relay/backend/te_compiler_cache.cc index f028c3da02ab..32164f3fdf20 100644 --- a/src/relay/backend/te_compiler_cache.cc +++ b/src/relay/backend/te_compiler_cache.cc @@ -145,7 +145,7 @@ class ScheduleBuilder : public backend::MemoizedExprTranslator candidate_name = truncated_name.str(); } - // TODO(mbs): This should be the definititive global by which the PrimFunc is known and + // TODO(mbs): This should be the definitive global by which the PrimFunc is known and // no other GlobalVar ctors should appear inside the lowering machinery. auto prim_fn_var = GlobalVar(renamer(candidate_name)); prim_fn_var->checked_type_ = relay_func->checked_type(); @@ -371,6 +371,7 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator> CachedFunc Create(const Function& prim_func, const Target& target, std::function renamer) { + VLOG_CONTEXT << "MakeShapeFunc"; TShapeDataDependent shape_func_param_states; for (auto param : prim_func->params) { @@ -399,11 +400,12 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator> // Setup the name; readable_name_stream_ << "shape_func"; - // Create the `te::Tensor`s which represent the output. - auto outputs = VisitExpr(prim_func->body); + // Create the tensor expressions representing the output shapes. + Array outputs = VisitExpr(prim_func->body); // Generate a name. auto candidate_name = readable_name_stream_.str(); + constexpr static size_t kMaxFuncNameLength = 80; // WARNING: Please make sure to also update TVM_CRT_MAX_STRLEN_FUNCTION_NAME // whenever the value of kMaxFuncNameLength changes @@ -463,7 +465,7 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator> for (auto t : outputs) { out_ops.push_back(t->op); } - auto schedule = te::create_schedule(out_ops); + te::Schedule schedule = te::create_schedule(out_ops); tvm::te::AutoInlineInjective(schedule); for (const auto& scalar : scalars_) { auto scalar_op = scalar->op; @@ -589,12 +591,15 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator> } Array VisitExpr_(const CallNode* call_node) final { + VLOG(1) << "considering call:" << std::endl << PrettyPrint(GetRef(call_node)); if (auto* func = call_node->op.as()) { + VLOG(1) << "user function"; for (size_t i = 0; i < func->params.size(); ++i) { param_arg_map_[func->params[i]] = call_node->args[i]; } return VisitExpr(func->body); } + static auto fshape_func = Op::GetAttrMap("FShapeFunc"); static auto tshape_data_dependent = Op::GetAttrMap("TShapeDataDependent"); ICHECK(call_node->op.as()) << "Primitive function only allows call into primitive ops"; @@ -635,20 +640,16 @@ class MakeShapeFunc : public backend::MemoizedExprTranslator> // Get output ndims auto ret_type = call_node->checked_type(); Array out_ndims; - if (const auto* ttype = ret_type.as()) { + for (const auto& ttype : FlattenTupleType(ret_type)) { out_ndims.push_back(IntImm(DataType::Int(32), ttype->shape.size())); - } else { - auto rtype = ret_type.as(); - // TODO(@icemelon): Allow recursive tuple - ICHECK(rtype); - for (size_t i = 0; i < rtype->fields.size(); ++i) { - auto ttype = rtype->fields[i].as(); - ICHECK(ttype); - out_ndims.push_back(IntImm(DataType::Int(32), ttype->shape.size())); - } } + // Call shape function - auto outputs = fshape_func[op](call_node->attrs, inputs, out_ndims); + Array outputs = fshape_func[op](call_node->attrs, inputs, out_ndims); + VLOG(1) << "shape function for '" << op->name << "' with inputs:" << std::endl + << inputs << std::endl + << "yielded outputs:" << std::endl + << outputs; readable_name_stream_ << "_" << op->name; return outputs; } diff --git a/src/runtime/debug.cc b/src/runtime/debug.cc new file mode 100644 index 000000000000..e5d9f0ead09e --- /dev/null +++ b/src/runtime/debug.cc @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/runtime/debug.cc + * \brief Helpers for debugging at runtime. + */ + +#include + +namespace tvm { +namespace runtime { + +template +void AppendMembers(std::ostream& os, const NDArray& nd_array, int64_t dim0) { + os << "=["; + for (int64_t i = 0; i < dim0; ++i) { + if (i > 0) { + os << ","; + } + os << reinterpret_cast(nd_array->data)[i]; + } + os << "]"; +} + +void AppendNDArray(std::ostream& os, const NDArray& nd_array, const DLDevice& host_device, + bool show_contents) { + os << "NDArray["; + os << "("; + for (int dim = 0; dim < nd_array->ndim; ++dim) { + if (dim > 0) { + os << ","; + } + os << nd_array->shape[dim]; + } + std::string basic_type = DLDataType2String(nd_array->dtype); + os << ")," << basic_type; + os << ",(" << nd_array->device.device_type; + os << "," << nd_array->device.device_id; + os << ")]"; + if (show_contents && nd_array->device.device_type == host_device.device_type && + nd_array->device.device_id == host_device.device_id) { + int64_t dim0; + if (nd_array->ndim == 0) { + dim0 = 1; + } else if (nd_array->ndim == 1) { + dim0 = nd_array->shape[0]; + if (dim0 > 10) { + // Too large. + dim0 = 0; + } + } else { + // Not rank-1. + dim0 = 0; + } + if (dim0 > 0) { + if (basic_type == "bool") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "int8") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "int16") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "int32") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "int64") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "uint8") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "uint16") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "uint32") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "uint64") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "float32") { + AppendMembers(os, nd_array, dim0); + } else if (basic_type == "float64") { + AppendMembers(os, nd_array, dim0); + } + } + } +} + +void AppendADT(std::ostream& os, const ADT& adt, const DLDevice& host_device, bool show_contents) { + os << "ADT(" << adt->tag; + for (size_t i = 0; i < adt->size; ++i) { + os << ","; + AppendRuntimeObject(os, adt[i], host_device, show_contents); + } + os << ")"; +} + +void AppendRuntimeObject(std::ostream& os, const ObjectRef& object, const DLDevice& host_device, + bool show_contents) { + if (const auto* adt_obj = object.as()) { + AppendADT(os, GetRef(adt_obj), host_device, show_contents); + } else if (const auto* nd_array_cont = object.as()) { + AppendNDArray(os, GetRef(nd_array_cont), host_device, show_contents); + } else { + os << "?"; + } +} + +std::string RuntimeObject2String(const ObjectRef& object, const DLDevice& host_device, + bool show_contents) { + std::ostringstream os; + AppendRuntimeObject(os, object, host_device, show_contents); + return os.str(); +} + +} // namespace runtime +} // namespace tvm diff --git a/src/runtime/vm/executable.cc b/src/runtime/vm/executable.cc index 76c385ae9918..e2fe867630b0 100644 --- a/src/runtime/vm/executable.cc +++ b/src/runtime/vm/executable.cc @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -171,27 +172,13 @@ std::string Executable::GetBytecode() const { return oss.str(); } -namespace { -String ShapeString(const ShapeTuple& shape_tuple, DLDataType dtype) { - std::stringstream sizes; - sizes << DLDataType2String(dtype) << "["; - for (size_t i = 0; i < shape_tuple.size(); i++) { - if (i != 0) { - sizes << ", "; - } - sizes << shape_tuple.data()[i]; - } - sizes << "]"; - return String(sizes.str()); -} -} // namespace - std::string Executable::GetConstants() const { std::ostringstream oss; for (size_t i = 0; i < constants.size(); ++i) { const auto& constant = constants[i]; auto ndarray = Downcast(constant); - oss << "VM Const[" << i << "]: has shape " << ShapeString(ndarray.Shape(), ndarray->dtype) + oss << "VM Const[" << i + << "]: " << RuntimeObject2String(ndarray, virtual_devices[host_device_index]) << " on device index " << const_device_indexes[i] << std::endl; } return oss.str(); diff --git a/src/runtime/vm/vm.cc b/src/runtime/vm/vm.cc index acbbec0d2991..7a83c9acb906 100644 --- a/src/runtime/vm/vm.cc +++ b/src/runtime/vm/vm.cc @@ -24,6 +24,8 @@ #include #include +#include +#include #include #include #include @@ -292,13 +294,14 @@ Index VirtualMachine::PopFrame() { } void VirtualMachine::InvokeGlobal(const VMFunction& func, const std::vector& args) { - VLOG(2) << "Invoking global " << func.name << " " << args.size(); + VLOG(2) << "Invoking global " << func.name << " with " << args.size() << " args"; PushFrame(func.params.size(), this->pc_ + 1, func); for (size_t i = 0; i < args.size(); ++i) { WriteRegister(i, args[i]); + VLOG(2) << "arg " << i << " = " + << RuntimeObject2String(args[i], GetDevice(exec_->host_device_index)); } - VLOG(2) << "func.params= " << func.params.size(); code_ = func.instructions.data(); pc_ = 0; @@ -527,20 +530,35 @@ void VirtualMachine::RunLoop() { goto main_loop; } case Opcode::InvokePacked: { - VLOG(2) << "InvokedPacked " << instr.packed_index << " arity=" << instr.arity; ICHECK_LE(instr.packed_index, packed_funcs_.size()); const auto& func = packed_funcs_[instr.packed_index]; const auto& arity = instr.arity; std::vector args; for (Index i = 0; i < arity; ++i) { - VLOG(2) << "arg" << i << " $" << instr.packed_args[i]; auto arg = ReadRegister(instr.packed_args[i]); args.push_back(arg); +#if TVM_LOG_DEBUG + if (i < arity) { + const bool is_input = i < arity - instr.output_size; + VLOG(2) << (is_input ? "input" : "placeholder") << " arg " << i << " = " + << RuntimeObject2String(arg, GetDevice(exec_->host_device_index), + /*show_contents=*/is_input); + } +#endif } // We no longer need to write the registers back, we write directly // through the registers mutably. InvokePacked(instr.packed_index, func, arity, instr.output_size, args); + +#if TVM_LOG_DEBUG + for (Index i = arity - instr.output_size; i < arity; ++i) { + auto arg = ReadRegister(instr.packed_args[i]); + VLOG(2) << "output arg " << i << " = " + << RuntimeObject2String(arg, GetDevice(exec_->host_device_index)); + } +#endif + pc_++; goto main_loop; } @@ -606,19 +624,10 @@ void VirtualMachine::RunLoop() { auto storage_obj = ReadRegister(instr.alloc_tensor.storage); auto offset = LoadScalarInt(instr.alloc_tensor.offset); auto storage = Downcast(storage_obj); -#if TVM_LOG_DEBUG - std::ostringstream os; - os << "AllocTensor: "; - os << "offset=" << offset; - os << ", shape=["; - for (auto i : shape) { - os << i << ","; - } - os << "]"; - os << ", dtype=" << DLDataType2String(instr.alloc_tensor.dtype); - VLOG(2) << os.str(); -#endif auto obj = storage->AllocNDArray(offset, shape, instr.alloc_tensor.dtype); + VLOG(2) << "allocated " + << RuntimeObject2String(obj, GetDevice(exec_->host_device_index), + /*show_contents=*/false); WriteRegister(instr.dst, obj); OpStopHook(); @@ -635,6 +644,9 @@ void VirtualMachine::RunLoop() { auto storage = Downcast(storage_obj); auto offset = LoadScalarInt(instr.alloc_tensor.offset); auto obj = storage->AllocNDArray(offset, shape, instr.alloc_tensor_reg.dtype); + VLOG(2) << "allocated " + << RuntimeObject2String(obj, GetDevice(exec_->host_device_index), + /*show_contents=*/false); WriteRegister(instr.dst, obj); OpStopHook(); @@ -668,7 +680,7 @@ void VirtualMachine::RunLoop() { auto storage_obj = SimpleObjAllocator().make_object(); Allocator* allocator = GetAllocator(instr.alloc_storage.device_index); ICHECK(allocator) << "Did you forget to init the VirtualMachine with devices?"; - VLOG(2) << "AllocStorage: allocation_size=" << size << ", alignment=" << alignment + VLOG(2) << "allocating with allocation_size=" << size << ", alignment=" << alignment << ", dtype_hint=" << DLDataType2String(instr.alloc_storage.dtype_hint) << ", device_index=" << instr.alloc_storage.device_index; @@ -688,6 +700,8 @@ void VirtualMachine::RunLoop() { for (int i = 0; i < ndim; ++i) { reinterpret_cast(out_tensor->data)[i] = input_array->shape[i]; } + VLOG(2) << "shape = " + << RuntimeObject2String(out_tensor, GetDevice(exec_->host_device_index)); WriteRegister(instr.dst, out_tensor); pc_++; goto main_loop; @@ -722,18 +736,10 @@ void VirtualMachine::RunLoop() { int64_t ndim = shape_tensor->shape[0]; std::vector shape(dims, dims + ndim); // Reshape the input tensor -#if TVM_LOG_DEBUG - std::ostringstream os; - os << "ReshapeTensor: "; - os << "shape=["; - for (auto i : shape) { - os << i << ","; - } - os << "]"; - os << ", dtype=" << DLDataType2String(tensor_arr->dtype); - VLOG(2) << os.str(); -#endif auto out_tensor = tensor_arr.CreateView(shape, tensor_arr->dtype); + VLOG(2) << "reshaped " + << RuntimeObject2String(tensor_obj, GetDevice(exec_->host_device_index)) << " to " + << RuntimeObject2String(out_tensor, GetDevice(exec_->host_device_index)); WriteRegister(instr.dst, out_tensor); OpStopHook(); pc_++; diff --git a/src/target/compilation_config.cc b/src/target/compilation_config.cc index 0401eebe51ef..a56e0ad0777c 100644 --- a/src/target/compilation_config.cc +++ b/src/target/compilation_config.cc @@ -62,31 +62,31 @@ void CompilationConfigNode::EstablishDefaultVirtualDevices(const transform::Pass if (host_target.defined()) { CHECK(!host_target->host.defined()) << "Host targets are not expected to have hosts"; host_device_type = static_cast(host_target->kind->device_type); - DLOG(INFO) << "Using the given host target " << host_target->ToDebugString() - << " of device type " << host_device_type << " for the host target"; + VLOG(1) << "Using the given host target " << host_target->ToDebugString() << " of device type " + << host_device_type << " for the host target"; for (const auto& primitive_target : primitive_targets) { if (primitive_target->host.defined() && !StructuralEqual()(primitive_target->host, host_target)) { - DLOG(WARNING) << "The primitive target " << primitive_target->ToDebugString() - << " already has a host which disagrees with the desired host target. It " - << "will be ignored."; + VLOG(1) << "The primitive target " << primitive_target->ToDebugString() + << " already has a host which disagrees with the desired host target. It " + << "will be ignored."; } } } else if (primitive_targets.size() == 1 && primitive_targets.front()->host.defined()) { host_target = primitive_targets.front()->GetHost().value(); CHECK(!host_target->host.defined()) << "Host targets are not expected to have hosts"; host_device_type = static_cast(host_target->kind->device_type); - DLOG(INFO) << "Using the host of the unique primitive target, namely " - << host_target->ToDebugString() << " of device type " << host_device_type - << " for the host target"; + VLOG(1) << "Using the host of the unique primitive target, namely " + << host_target->ToDebugString() << " of device type " << host_device_type + << " for the host target"; } else if (primitive_targets.size() == 1 && primitive_targets.front()->kind->device_type == kDLCPU) { // In the homogenous case without an explicit host target just use the given target so long as // it's a CPU. host_device_type = kDLCPU; host_target = primitive_targets.front(); - DLOG(INFO) << "Using the unique primitive target " << host_target->ToDebugString() - << " of device type " << host_device_type << " for the host target"; + VLOG(1) << "Using the unique primitive target " << host_target->ToDebugString() + << " of device type " << host_device_type << " for the host target"; } else { // Fallback. host_device_type = kDLCPU; @@ -94,15 +94,15 @@ void CompilationConfigNode::EstablishDefaultVirtualDevices(const transform::Pass // in the hetrogeneous case since its options may not be appropriate for host code // (eg shape functions). Instead, create a fresh default Target. host_target = MakeDefaultTarget(host_device_type); - DLOG(WARNING) << "Using the default target " << host_target->ToDebugString() - << " of device type " << host_device_type << " for the host target"; + VLOG(1) << "Using the default target " << host_target->ToDebugString() << " of device type " + << host_device_type << " for the host target"; } ICHECK(host_target.defined()); ICHECK(!host_target->host.defined()); if (host_device_type != kDLCPU) { // I think we're on thin ice here until we've audited the code base for assumed kDLCPU. - LOG(WARNING) << "The host target is not a CPU."; + VLOG(1) << "The host target is not a CPU."; } // @@ -132,22 +132,22 @@ void CompilationConfigNode::EstablishDefaultVirtualDevices(const transform::Pass CHECK_GT(v, 0) << "The 'relay.fallback_device_type' pass attribute is set to an invalid device type " << v; default_primitive_device_type = static_cast(v); - DLOG(INFO) << "Using the 'relay.fallback_device_type' pass attribute " - << default_primitive_device_type - << " as the default device type for all primitive operations"; + VLOG(1) << "Using the 'relay.fallback_device_type' pass attribute " + << default_primitive_device_type + << " as the default device type for all primitive operations"; } else if (primitive_targets.size() == 1) { // In the homogeneous case there's no free choice. default_primitive_device_type = static_cast(primitive_targets.front()->kind->device_type); - DLOG(INFO) << "Using the device type " << default_primitive_device_type - << " of the unique primitive target as the default device type for all primitive " - << "operations"; + VLOG(1) << "Using the device type " << default_primitive_device_type + << " of the unique primitive target as the default device type for all primitive " + << "operations"; } else { // Fallback. Note that we'll require a primitive Target of kDLCPU device_type to be given // and won't manufacture one out of thin air. default_primitive_device_type = kDLCPU; - DLOG(WARNING) << "Using " << default_primitive_device_type - << " as the default device type for all primitive operations"; + VLOG(1) << "Using " << default_primitive_device_type + << " as the default device type for all primitive operations"; } // @@ -227,11 +227,11 @@ CompilationConfig::CompilationConfig(const transform::PassContext& pass_ctx, node->legacy_target_map.size() == 1 ? (*node->legacy_target_map.begin()).second : Target(); for (const auto& target : node->primitive_targets) { - DLOG(INFO) << "Target " << target->ToDebugString() << " of device type " - << target->kind->device_type << " is available for primitives"; + VLOG(1) << "Target " << target->ToDebugString() << " of device type " + << target->kind->device_type << " is available for primitives"; } - DLOG(INFO) << "Using default primitive virtual device " << node->default_primitive_virtual_device; - DLOG(INFO) << "Using host virtual device " << node->host_virtual_device; + VLOG(1) << "Using default primitive virtual device " << node->default_primitive_virtual_device; + VLOG(1) << "Using host virtual device " << node->host_virtual_device; data_ = std::move(node); } diff --git a/tests/python/relay/dyn/test_dynamic_op_level3.py b/tests/python/relay/dyn/test_dynamic_op_level3.py index 7669d02cd536..0456401e8ad2 100644 --- a/tests/python/relay/dyn/test_dynamic_op_level3.py +++ b/tests/python/relay/dyn/test_dynamic_op_level3.py @@ -21,7 +21,6 @@ import tvm import tvm.testing from tvm import relay, te -from tvm.relay import create_executor, transform from tvm.relay.testing import check_grad, run_infer_type @@ -44,6 +43,15 @@ def verify_func(func, data, ref_res, target_device=tvm.testing.enabled_targets() relay.backend.te_compiler.get().clear() +def check_on_vm(target, dev, args, expected_result, mod): + """ + Check that evaluating `expr` applied to the arguments produces + `result` on Relay VM. + """ + rts_result = relay.create_executor("vm", device=dev, target=target, mod=mod).evaluate()(*args) + tvm.testing.assert_allclose(expected_result, rts_result.numpy()) + + @tvm.testing.uses_gpu def test_dyn_reshape(): def verify_reshape(shape, newshape, oshape): @@ -410,5 +418,59 @@ def verify_sparse_fill_empty_rows( ) +def test_dyn_copy(): + target = tvm.target.Target("llvm") + dev = tvm.cpu() + mod = tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int64] { + copy(%x) + } + """ + ) + x_data = np.random.rand(15, 3).astype("int64") + expected = x_data + check_on_vm(target, dev, [x_data], expected, mod) + + +def test_dyn_copy_scalar(): + target = tvm.target.Target("llvm") + dev = tvm.cpu() + mod = tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: int32, %y: Tensor[(?), int32]) -> Tensor[(?), int32] { + %0 = copy(%x); + %1 = expand_dims(%0, axis=0); + %2 = (%y, %1); + concatenate(%2) + } + """ + ) + x_data = 3 + y_data = np.random.rand(7).astype("int32") + expected = np.concatenate((y_data, np.expand_dims(x_data, axis=0))) + check_on_vm(target, dev, [x_data, y_data], expected, mod) + + +def test_dyn_cast(): + target = tvm.target.Target("llvm") + dev = tvm.cpu() + mod = tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(?, 3), int64]) -> Tensor[(?, 3), int32] { + cast(%x, dtype="int32") + } + """ + ) + x_data = np.random.rand(15, 3).astype("int64") + expected = x_data.astype("int32") + check_on_vm(target, dev, [x_data], expected, mod) + + if __name__ == "__main__": - pytest.main([__file__]) + import sys + + sys.exit(pytest.main([__file__] + sys.argv[1:])) diff --git a/tests/python/relay/test_vm.py b/tests/python/relay/test_vm.py index 1c60702982cc..7f0f8041b1a2 100644 --- a/tests/python/relay/test_vm.py +++ b/tests/python/relay/test_vm.py @@ -36,7 +36,7 @@ from tvm.relay.backend.vm import VMCompiler -def check_result(target, dev, args, expected_result, mod=None): +def check_result(target, dev, args, expected_result, mod): """ Check that evaluating `expr` applied to the arguments produces `result` on Relay VM. @@ -111,7 +111,7 @@ def test_id(target, dev): x_data = np.random.rand(10, 10).astype("float64") mod = tvm.IRModule() mod["main"] = f - check_result(target, dev, [x_data], x_data, mod=mod) + check_result(target, dev, [x_data], x_data, mod) def test_op(target, dev): @@ -120,7 +120,7 @@ def test_op(target, dev): x_data = np.random.rand(10, 10).astype("float32") mod = tvm.IRModule() mod["main"] = f - check_result(target, dev, [x_data], 2 * x_data, mod=mod) + check_result(target, dev, [x_data], 2 * x_data, mod) def any(x): @@ -140,10 +140,10 @@ def test_cond(target, dev): mod = tvm.IRModule() mod["main"] = f # same - check_result(target, dev, [x_data, x_data], True, mod=mod) + check_result(target, dev, [x_data, x_data], True, mod) # diff - check_result(target, dev, [x_data, y_data], False, mod=mod) + check_result(target, dev, [x_data, y_data], False, mod) @tvm.testing.known_failing_targets("vulkan") @@ -157,10 +157,10 @@ def test_simple_if(target, dev): mod = tvm.IRModule() mod["main"] = f # same - check_result(target, dev, [x_data, x_data], x_data, mod=mod) + check_result(target, dev, [x_data, x_data], x_data, mod) # diff - check_result(target, dev, [x_data, y_data], y_data, mod=mod) + check_result(target, dev, [x_data, y_data], y_data, mod) @tvm.testing.parametrize_targets("llvm") @@ -204,7 +204,7 @@ def test_unused_function(target, dev): x_data = np.random.rand(2, 2).astype("float32") y_data = x_data * 2 - check_result(target, dev, [x_data], y_data, mod=mod) + check_result(target, dev, [x_data], y_data, mod) def test_simple_call(target, dev): @@ -218,7 +218,7 @@ def test_simple_call(target, dev): i_data = np.array(0, dtype="int32") iarg = relay.var("iarg", shape=[], dtype="int32") mod["main"] = relay.Function([iarg], sum_up(iarg)) - check_result(target, dev, [i_data], i_data, mod=mod) + check_result(target, dev, [i_data], i_data, mod) def test_count_loop(target, dev): @@ -239,7 +239,7 @@ def test_count_loop(target, dev): mod["main"] = relay.Function([iarg], sum_up(iarg)) result = veval(mod, i_data, device=dev, target=target) tvm.testing.assert_allclose(result.numpy(), i_data) - check_result(target, dev, [i_data], i_data, mod=mod) + check_result(target, dev, [i_data], i_data, mod) def test_sum_loop(target, dev): @@ -263,7 +263,7 @@ def test_sum_loop(target, dev): iarg = relay.var("i", shape=[], dtype="int32") aarg = relay.var("accum", shape=[], dtype="int32") mod["main"] = relay.Function([iarg, aarg], sum_up(iarg, aarg)) - check_result(target, dev, [i_data, accum_data], sum(range(1, loop_bound + 1)), mod=mod) + check_result(target, dev, [i_data, accum_data], sum(range(1, loop_bound + 1)), mod) def test_tuple_fst(target, dev): @@ -274,7 +274,7 @@ def test_tuple_fst(target, dev): j_data = np.random.rand(10).astype("float32") mod = tvm.IRModule() mod["main"] = f - check_result(target, dev, [(i_data, j_data)], i_data, mod=mod) + check_result(target, dev, [(i_data, j_data)], i_data, mod) def test_tuple_second(target, dev): @@ -285,7 +285,7 @@ def test_tuple_second(target, dev): j_data = np.random.rand(10).astype("float32") mod = tvm.IRModule() mod["main"] = f - check_result(target, dev, [(i_data, j_data)], j_data, mod=mod) + check_result(target, dev, [(i_data, j_data)], j_data, mod) def test_list_constructor(target, dev): @@ -325,7 +325,7 @@ def test_let_tensor(target, dev): x_data = np.random.rand(*shape).astype("float32") mod = tvm.IRModule() mod["main"] = f - check_result(target, dev, [x_data], x_data + 42.0, mod=mod) + check_result(target, dev, [x_data], x_data + 42.0, mod) def test_let_scalar(target, dev): @@ -342,7 +342,7 @@ def test_let_scalar(target, dev): x_data = np.array(np.random.rand()).astype("float32") mod = tvm.IRModule() mod["main"] = f - check_result(target, dev, [x_data], x_data + 42.0, mod=mod) + check_result(target, dev, [x_data], x_data + 42.0, mod) def test_compose(target, dev): @@ -616,7 +616,7 @@ def test_add_op_scalar(target, dev): ] for (x_data, y_data) in x_y_data: mod["main"] = func - check_result(target, dev, [x_data, y_data], x_data + y_data, mod=mod) + check_result(target, dev, [x_data, y_data], x_data + y_data, mod) def test_add_op_scalar_int(target, dev): @@ -637,7 +637,7 @@ def test_add_op_scalar_int(target, dev): ] for (x_data, y_data) in x_y_data: mod["main"] = func - check_result(target, dev, [x_data, y_data], x_data + y_data, mod=mod) + check_result(target, dev, [x_data, y_data], x_data + y_data, mod) def test_add_op_tensor(target, dev): @@ -654,7 +654,7 @@ def test_add_op_tensor(target, dev): x_data = np.random.rand(10, 5).astype("float32") y_data = np.random.rand(10, 5).astype("float32") mod["main"] = func - check_result(target, dev, [x_data, y_data], x_data + y_data, mod=mod) + check_result(target, dev, [x_data, y_data], x_data + y_data, mod) def test_add_op_broadcast(target, dev): @@ -671,7 +671,7 @@ def test_add_op_broadcast(target, dev): x_data = np.random.rand(10, 5).astype("float32") y_data = np.random.rand(1, 5).astype("float32") mod["main"] = func - check_result(target, dev, [x_data, y_data], x_data + y_data, mod=mod) + check_result(target, dev, [x_data, y_data], x_data + y_data, mod) def test_vm_optimize_dynamic(): @@ -717,7 +717,7 @@ def body_with_free_var(i, acc): ret = relay.TupleGetItem(tup, 1) mod = tvm.IRModule() mod["main"] = relay.Function(relay.analysis.free_vars(ret), ret) - check_result(target, dev, args, expected, mod=mod) + check_result(target, dev, args, expected, mod) def test_vm_reshape_tensor(target, dev): @@ -1040,8 +1040,8 @@ def @main(%a: Tensor[(5, 7), float32], # - The offset of the tensor within the storage (second arg) to alloc_tensor # Both should be on the CPU assert "VirtualDevice[0]: device type 1" in exe.virtual_devices - assert "Const[0]: has shape int64[] on device index 0" in exe.constants - assert "Const[1]: has shape int64[] on device index 0" in exe.constants + assert "VM Const[0]: NDArray[(),int64,(1,0)]=[140] on device index 0" in exe.constants + assert "VM Const[1]: NDArray[(),int64,(1,0)]=[0] on device index 0" in exe.constants @tvm.testing.requires_cuda @@ -1073,7 +1073,7 @@ def @main(%x: Tensor[(2, 8), float32], # The newshape annotation should have been turned into a constant on the CPU. assert "VirtualDevice[0]: device type 1" in exe.virtual_devices - assert "Const[0]: has shape int64[3] on device index 0" in exe.constants + assert "VM Const[0]: NDArray[(3),int64,(1,0)]=[2,4,2] on device index 0" in exe.constants @tvm.testing.requires_cuda