From 0b389e1c997e61aac2249436a3be88f2a68991fd Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Fri, 23 Sep 2022 13:11:20 +0000 Subject: [PATCH 1/2] [ETHOSN] Support conversion of add/mul to requantize where possible Add/mul operations that correspond to identity operations can be converted to a simple reinterpret quantize operation. This conversion takes place in the convert equivalents pass similar to the depthwise counter-part. In addtion, an issue was noticed that would cause unsupported operations to raise an error rather than not being offloaded. This has been fixed by allowing the conversion to return Null when the conversion is not supported. Change-Id: I7747d499820fa3a9cf0ca72a6d3ff1060b000e9c --- python/tvm/relay/op/contrib/ethosn.py | 82 +++-- src/relay/backend/contrib/ethosn/codegen.cc | 39 +++ .../backend/contrib/ethosn/codegen_ethosn.h | 1 + .../contrib/ethosn/convert_equivalent.cc | 324 +++++++++++++----- .../backend/contrib/ethosn/ethosn_api.cc | 36 ++ src/relay/backend/contrib/ethosn/ethosn_api.h | 16 + .../contrib/test_ethosn/test_addition.py | 70 +++- .../test_ethosn/test_convert_equivalents.py | 318 ++++++++++++++++- .../contrib/test_ethosn/test_multiply.py | 102 ++++-- 9 files changed, 825 insertions(+), 163 deletions(-) diff --git a/python/tvm/relay/op/contrib/ethosn.py b/python/tvm/relay/op/contrib/ethosn.py index 6a318c602fd2..80cc1ca3b202 100644 --- a/python/tvm/relay/op/contrib/ethosn.py +++ b/python/tvm/relay/op/contrib/ethosn.py @@ -215,7 +215,7 @@ def qnn_mul_pattern(): input_is_right = gen_mul_inputs(is_constant(), wildcard()) return input_is_left | input_is_right - def qnn_add_pattern(): + def qnn_add_pattern(has_constant_input=False): add_op = is_op("qnn.add") gen_add_inputs = lambda x, y: add_op( x, @@ -227,11 +227,13 @@ def qnn_add_pattern(): is_constant(), is_constant(), ) - two_inputs = gen_add_inputs(wildcard(), wildcard()) - input_is_left = gen_add_inputs(wildcard(), is_constant()) - input_is_right = gen_add_inputs(is_constant(), wildcard()) - return input_is_left | input_is_right | two_inputs + if has_constant_input: + input_is_left = gen_add_inputs(wildcard(), is_constant()) + input_is_right = gen_add_inputs(is_constant(), wildcard()) + return input_is_left | input_is_right + else: + return gen_add_inputs(wildcard(), wildcard()) def qnn_conv2d_transpose_pattern(): pattern = is_op("qnn.conv2d_transpose")( @@ -299,16 +301,24 @@ def check_leaky_relu(extract): return _ethosn.leaky_relu(extract) - def check_mul(extract): - """Check if Mul is supported.""" + def check_mul_to_reinterpret_quantize(extract): + """Check if Mul is supported by converting to reinterpret quantize""" if not ethosn_available(): return False - # Do not support scalar constants for now - check_scalar = lambda i: isinstance(i, tvm.relay.Constant) and len(i.data.shape) == 0 - if check_scalar(extract.args[0]) or check_scalar(extract.args[1]): + + converted_extract = _ethosn.ConvertQnnMultiplyToReinterpretQuantize(extract) + if converted_extract: + return _ethosn.reinterpret_quantize(converted_extract) + return False + + def check_mul_to_depthwise(extract): + """Check if Mul is supported by converting to a depthwise operation.""" + if not ethosn_available(): return False - extract = _ethosn.ConvertQnnMultiply(extract) - return _ethosn.conv2d(extract) + converted_extract = _ethosn.ConvertQnnMultiplyToDepthwise(extract) + if converted_extract: + return _ethosn.conv2d(converted_extract) + return False def check_requantize(extract): """Check if requantize is supported.""" @@ -328,19 +338,40 @@ def check_add(extract): """Check if an addition is supported by Ethos-N.""" if not ethosn_available(): return False - # Do not support scalar constants for now - check_scalar = lambda i: isinstance(i, tvm.relay.Constant) and len(i.data.shape) == 0 - if check_scalar(extract.args[0]) or check_scalar(extract.args[1]): - return False - inputs = extract.args[0:2] - if any([isinstance(i, tvm.relay.Constant) for i in inputs]): - extract = _ethosn.ConvertQnnAdd(extract) - return _ethosn.conv2d(extract) return _ethosn.addition(extract) + def check_add_to_reinterpret_quantize(extract): + """Check if addition can be converted to a reinterpret quantize operation.""" + if not ethosn_available(): + return False + converted_extract = _ethosn.ConvertQnnAddToReinterpretQuantize(extract) + if converted_extract: + return _ethosn.reinterpret_quantize(converted_extract) + return False + + def check_add_to_depthwise(extract): + """Check if addition can be converted to a depthwise operation.""" + if not ethosn_available(): + return False + converted_extract = _ethosn.ConvertQnnAddToDepthwise(extract) + if converted_extract: + return _ethosn.conv2d(converted_extract) + return False + return [ - ("ethos-n.qnn_mul", qnn_mul_pattern(), check_mul), + ( + "ethos-n.qnn_mul_to_reinterpret_quantize", + qnn_mul_pattern(), + check_mul_to_reinterpret_quantize, + ), + ("ethos-n.qnn_mul_to_depthwise", qnn_mul_pattern(), check_mul_to_depthwise), + ( + "ethos-n.qnn_add_to_reinterpret_quantize", + qnn_add_pattern(True), + check_add_to_reinterpret_quantize, + ), + ("ethos-n.qnn_add_to_depthwise", qnn_add_pattern(True), check_add_to_depthwise), ("ethos-n.qnn_add", qnn_add_pattern(), check_add), ("ethos-n.qnn_conv2d", qnn_conv_pattern(), check_conv2d), ("ethos-n.qnn_conv2d_transpose", qnn_conv2d_transpose_pattern(), check_conv2d_transpose), @@ -355,15 +386,6 @@ def check_add(extract): ] -def _is_ethosn_composite(node): - if isinstance(node, tvm.relay.expr.Call) and isinstance(node.op, tvm.relay.Function): - if "Composite" in node.op.attrs: - comp_name = node.op.attrs["Composite"] - return comp_name.split(".")[0] == "ethos-n" - - return False - - @tvm.ir.register_op_attr("nn.max_pool2d", "target.ethos-n") def max_pool2d(expr): """Check if a max pool2d is supported by Ethos-N.""" diff --git a/src/relay/backend/contrib/ethosn/codegen.cc b/src/relay/backend/contrib/ethosn/codegen.cc index c7109b754d2b..46420775ae5b 100644 --- a/src/relay/backend/contrib/ethosn/codegen.cc +++ b/src/relay/backend/contrib/ethosn/codegen.cc @@ -152,6 +152,10 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) { RequantizeParams params; err += EthosnAPI::Requantize(cn->op.as()->body, ¶ms); tensor_table_[cn->args[0]] = {params.input_info}; + } else if (IsEthosnFunc(call, "ethos-n.qnn_reinterpret_quantize")) { + ReinterpretQuantizationParams params; + err += EthosnAPI::ReinterpretQuantize(cn->op.as()->body, ¶ms); + tensor_table_[cn->args[0]] = {params.input_info}; } else if (IsEthosnFunc(call, "ethos-n.qnn_resize")) { ResizeParams params; err += EthosnAPI::Resize(cn->op.as()->body, ¶ms); @@ -333,6 +337,9 @@ sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) { } else if (IsEthosnFunc(call, "ethos-n.qnn_requantize")) { if ((err = MakeRequantizeLayer(call, &tensor))) ReportFatalError(call, err); return MakeOps(tensor); + } else if (IsEthosnFunc(call, "ethos-n.qnn_reinterpret_quantize")) { + if ((err = MakeReinterpretQuantizeLayer(call, &tensor))) ReportFatalError(call, err); + return MakeOps(tensor); } else if (IsEthosnFunc(call, "ethos-n.qnn_resize")) { if ((err = MakeResizeLayer(call, &tensor))) ReportFatalError(call, err); return MakeOps(tensor); @@ -654,6 +661,24 @@ EthosnError ConstructNetworkVisitor::MakeRequantizeLayer(const Call& call, return EthosnError(); } +EthosnError ConstructNetworkVisitor::MakeReinterpretQuantizeLayer( + const Call& call, sl::TensorAndId* out) { + ReinterpretQuantizationParams params; + params.input_info = GetTensorInfo(tensor_table_, call); + if (auto err = EthosnAPI::ReinterpretQuantize(call->op.as()->body, ¶ms)) { + return err; + } + + auto input = operand_table_[call->args[0]][0]; + + try { + *out = AddReinterpretQuantization(network_, *input, params.reinterpret_quantize_info); + } catch (const sl::NotSupportedException& e) { + return EthosnError(e.what()); + } + return EthosnError(); +} + EthosnError ConstructNetworkVisitor::MakeResizeLayer(const Call& call, sl::TensorAndId* out) { ResizeParams params; @@ -1022,6 +1047,20 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.requantize") err += EthosnError(reason); }); +TVM_REGISTER_GLOBAL("relay.ethos-n.support.reinterpret_quantize") + .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { + Call call = args[0]; + ReinterpretQuantizationParams params; + auto err = EthosnAPI::ReinterpretQuantize(call, ¶ms); + err += EthosnCompiler::SupportedSetup(); + char reason[kReasonMaxLength]; + reason[0] = '\0'; + *rv = !err && EthosnCompiler::GetSupported()->IsReinterpretQuantizationSupported( + params.reinterpret_quantize_info, params.input_info, ¶ms.output_info, + reason, sizeof(reason)); + err += EthosnError(reason); + }); + TVM_REGISTER_GLOBAL("relay.ethos-n.support.resize") .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { Call call = args[0]; diff --git a/src/relay/backend/contrib/ethosn/codegen_ethosn.h b/src/relay/backend/contrib/ethosn/codegen_ethosn.h index a653b0b8dc97..ab853599aa2d 100644 --- a/src/relay/backend/contrib/ethosn/codegen_ethosn.h +++ b/src/relay/backend/contrib/ethosn/codegen_ethosn.h @@ -213,6 +213,7 @@ class ConstructNetworkVisitor : public MixedModeVisitor, private ErrorReportingP EthosnError MakeReluLayer(const Call& call, sl::TensorAndId* out); EthosnError MakeLeakyReLULayer(const Call& call, sl::TensorAndId* out); EthosnError MakeRequantizeLayer(const Call& call, sl::TensorAndId* out); + EthosnError MakeReinterpretQuantizeLayer(const Call& call, sl::TensorAndId* out); EthosnError MakeResizeLayer(const Call& call, sl::TensorAndId* out); /*! \brief A look-up table from Expr to layers. */ diff --git a/src/relay/backend/contrib/ethosn/convert_equivalent.cc b/src/relay/backend/contrib/ethosn/convert_equivalent.cc index 91c924b1b04f..7f4e1a3c5045 100644 --- a/src/relay/backend/contrib/ethosn/convert_equivalent.cc +++ b/src/relay/backend/contrib/ethosn/convert_equivalent.cc @@ -39,37 +39,63 @@ namespace relay { namespace contrib { namespace ethosn { +/*! + * \brief Helper class to extract inputs and quantization information from binary + * elementwise operations ready to convert. + */ +class BinaryElementwiseParams { + public: + static BinaryElementwiseParams ExtractBinaryElementwiseParams(const Call& call) { + auto params = BinaryElementwiseParams(); + params.input1 = call->args[0]; + params.input2 = call->args[1]; + params.input1_scale = call->args[2]; + params.input1_zero_point = call->args[3]; + params.input2_scale = call->args[4]; + params.input2_zero_point = call->args[5]; + // Reverse the inputs if the constant is first input + if (call->args[0]->IsInstance()) { + params.input1 = call->args[1]; + params.input2 = call->args[0]; + params.input1_scale = call->args[4]; + params.input1_zero_point = call->args[5]; + params.input2_scale = call->args[2]; + params.input2_zero_point = call->args[3]; + } + params.output_scale = call->args[6]; + params.output_zero_point = call->args[7]; + return params; + } + + Expr input1; + Expr input2; + Expr input1_scale; + Expr input1_zero_point; + Expr input2_scale; + Expr input2_zero_point; + Expr output_scale; + Expr output_zero_point; +}; + /*! * \brief Converts qnn.mul to mathematically equivalent * qnn.conv2d depthwise operation. + * + * \param expr The expression to attempt to convert. + * + * \return Null if conversion is not supported else the converted expression. */ -Expr ConvertQnnMultiply(const Expr& expr) { +Optional ConvertQnnMultiplyToDepthwise(const Expr& expr) { Call call = Downcast(expr); + const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call); - Expr input1 = call->args[0]; - Expr input2 = call->args[1]; - Expr input1_scale = call->args[2]; - Expr input1_zero_point = call->args[3]; - Expr input2_scale = call->args[4]; - Expr input2_zero_point = call->args[5]; - // Reverse the inputs if the constant is first input - if (call->args[0]->IsInstance()) { - input1 = call->args[1]; - input2 = call->args[0]; - input1_scale = call->args[4]; - input1_zero_point = call->args[5]; - input2_scale = call->args[2]; - input2_zero_point = call->args[3]; + Constant input_constant = Downcast(params.input2); + TensorType input_constant_tt = Downcast(input_constant->checked_type()); + TensorType input_tt = Downcast(call->checked_type()); + int channels = Downcast(input_tt->shape.back())->value; + if (channels != Downcast(input_constant_tt->Size())->value) { + return NullOpt; } - Expr output_scale = call->args[6]; - Expr output_zero_point = call->args[7]; - - const auto* input_constant = input2.as(); - ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey(); - Type input_constant_type = input_constant->checked_type(); - const auto* input_constant_tt = input_constant_type.as(); - ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey(); - int channels = input_constant_tt->shape.back().as()->value; runtime::NDArray input_data = input_constant->data; runtime::NDArray kernel_data_hwoi = @@ -77,62 +103,53 @@ Expr ConvertQnnMultiply(const Expr& expr) { kernel_data_hwoi.CopyFrom(input_data); Constant kernel = Constant(kernel_data_hwoi, input_constant->span); - Type output_type = expr->checked_type(); - auto output_tt = output_type.as(); - ICHECK(output_tt) << "Expected TensorTypeNode but got " << output_type->GetTypeKey(); + TensorType output_tt = Downcast(expr->checked_type()); DataType output_dtype = output_tt->dtype; - Expr conv2d = qnn::MakeQnnConv2D( - input1, kernel, input1_zero_point, input2_zero_point, input1_scale, input2_scale, {1, 1}, - {0, 0, 0, 0}, {1, 1}, channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32)); + Expr conv2d = + qnn::MakeQnnConv2D(params.input1, kernel, params.input1_zero_point, params.input2_zero_point, + params.input1_scale, params.input2_scale, {1, 1}, {0, 0, 0, 0}, {1, 1}, + channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32)); Constant bias_data = MakeConstantZeros(DataType::Int(32), {channels}); Expr bias_add = MakeBiasAdd(conv2d, bias_data, 3); - Expr requantize = qnn::MakeRequantize(bias_add, input1_scale, input1_zero_point, output_scale, - output_zero_point, -1, "None", "None", output_dtype); + Expr requantize = qnn::MakeRequantize(bias_add, params.input1_scale, params.input1_zero_point, + params.output_scale, params.output_zero_point, -1, "None", + "None", output_dtype); - return InferType(requantize); + try { + requantize = InferType(requantize); + return requantize; + } catch (tvm::Error& e) { + // Conversion produced an invalid op. + return NullOpt; + } } -TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiply") - .set_body_typed(ConvertQnnMultiply); - /*! * \brief Converts qnn.add to a mathematically equivalent * qnn.conv2d depthwise operation. + * + * \param expr The expression to attempt to convert. + * + * \return Null if conversion is not supported else the converted expression. */ -Expr ConvertQnnAdd(const Expr& expr) { +Optional ConvertQnnAddToDepthwise(const Expr& expr) { Call call = Downcast(expr); + const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call); - Expr input1 = call->args[0]; - Expr input2 = call->args[1]; - Expr input1_scale = call->args[2]; - Expr input1_zero_point = call->args[3]; - Expr input2_scale = call->args[4]; - Expr input2_zero_point = call->args[5]; - // Reverse the inputs if the constant is first input - if (call->args[0]->IsInstance()) { - input1 = call->args[1]; - input2 = call->args[0]; - input1_scale = call->args[4]; - input1_zero_point = call->args[5]; - input2_scale = call->args[2]; - input2_zero_point = call->args[3]; + Constant input_constant = Downcast(params.input2); + TensorType input_constant_tt = Downcast(input_constant->checked_type()); + TensorType input_tt = Downcast(call->checked_type()); + int channels = Downcast(input_tt->shape.back())->value; + if (channels != Downcast(input_constant_tt->Size())->value) { + return NullOpt; } - Expr output_scale = call->args[6]; - Expr output_zero_point = call->args[7]; - - const auto* input_constant = input2.as(); - ICHECK(input_constant) << "Expected ConstantNode but got " << input2->GetTypeKey(); - Type input_constant_type = input_constant->checked_type(); - const auto* input_constant_tt = input_constant_type.as(); - ICHECK(input_constant) << "Expected TensorTypeNode but got " << input_constant_type->GetTypeKey(); - int channels = input_constant_tt->shape.back().as()->value; // Create the identity kernel. The kernel data is constructed such that it produces an identity // operation in the quantized space. Therefore, the input is not scaled in any way which allows // us to later use the bias to perform the addition. - float input_scale_value = GetScalarFromConstant(input1_scale); - float output_scale_value = GetScalarFromConstant(output_scale); + float input_scale_value = GetScalarFromConstant(params.input1_scale); + float output_scale_value = GetScalarFromConstant(params.output_scale); float identity_kernel_scale_ub = std::min(output_scale_value / input_scale_value, 1.f); float identity_kernel_scale_lb = (1.f / 255.f); float identity_kernel_scale_target = (identity_kernel_scale_ub + identity_kernel_scale_lb) / 2.f; @@ -153,25 +170,131 @@ Expr ConvertQnnAdd(const Expr& expr) { MakeConstantScalar(DataType::Float(32), input_scale_value * identity_kernel_scale_value); Constant bias_zero_point = MakeConstantScalar(DataType::Int(32), 0); Expr requantize_bias = - qnn::MakeRequantize(input2, input2_scale, input2_zero_point, bias_scale, bias_zero_point, -1, - "None", "None", DataType::Int(32)); + qnn::MakeRequantize(params.input2, params.input2_scale, params.input2_zero_point, bias_scale, + bias_zero_point, -1, "None", "None", DataType::Int(32)); Expr reshape_bias = MakeReshape(requantize_bias, {channels}); - Constant bias = Downcast(FoldConstantExpr(reshape_bias)); + + try { + reshape_bias = FoldConstantExpr(reshape_bias); + } catch (tvm::Error& e) { + // Conversion produced an invalid op. + return NullOpt; + } + Constant bias = Downcast(reshape_bias); // Make depthwise conv2d operation - Expr conv2d = - qnn::MakeQnnConv2D(input1, identity_kernel, input1_zero_point, identity_kernel_zero_point, - input1_scale, identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1}, - channels, channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32)); + Expr conv2d = qnn::MakeQnnConv2D(params.input1, identity_kernel, params.input1_zero_point, + identity_kernel_zero_point, params.input1_scale, + identity_kernel_scale, {1, 1}, {0, 0, 0, 0}, {1, 1}, channels, + channels, {1, 1}, "NHWC", "HWOI", "NHWC", DataType::Int(32)); Expr bias_add = MakeBiasAdd(conv2d, bias, 3); - Expr requantize = - qnn::MakeRequantize(bias_add, input1_scale, input1_zero_point, output_scale, - output_zero_point, -1, "None", "None", input_constant_tt->dtype); + Expr requantize = qnn::MakeRequantize(bias_add, params.input1_scale, params.input1_zero_point, + params.output_scale, params.output_zero_point, -1, "None", + "None", input_constant_tt->dtype); - return InferType(requantize); + try { + return InferType(requantize); + } catch (tvm::Error& e) { + // Conversion produced an invalid op. + return NullOpt; + } } -TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAdd").set_body_typed(ConvertQnnAdd); +/*! + * \brief Converts qnn.mul to a mathematically equivalent qnn.requantize operation. + * When converting to support library API, a reinterpret quantize operation will be created. + * + * \param expr The expression to attempt to convert. + * + * \return Null if conversion is not supported else the converted expression. + */ +Optional ConvertQnnMultiplyToReinterpretQuantize(const Expr& expr) { + Call call = Downcast(expr); + const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call); + + Constant input_constant = Downcast(params.input2); + TensorType input_constant_tt = Downcast(input_constant->checked_type()); + if (Downcast(input_constant_tt->Size())->value != 1) { + return NullOpt; + } + + float input_scale_value = GetScalarFromConstant(params.input1_scale); + float constant_scale_value = GetScalarFromConstant(params.input2_scale); + int constant_zero_point_value = GetScalarFromConstant(params.input2_zero_point); + float new_output_scale_value = input_scale_value * constant_scale_value * + (ToScalar(input_constant->data) - constant_zero_point_value); + Constant new_output_scale = MakeConstantScalar(DataType::Float(32), new_output_scale_value); + + if (std::abs(new_output_scale_value - GetScalarFromConstant(params.output_scale)) > + 0.004f) { + // Multiply does not represent an identity operation so don't convert. + return NullOpt; + } + + DataType output_data_type = Downcast(call->checked_type())->dtype; + + // A requantize operation is used to represent the identity reinterperet quantize op in + // the support library at this stage. That is requantize is used here as a means for + // passing the quantization information to the API conversion layer. + Expr requantize = qnn::MakeRequantize( + params.input1, params.input1_scale, params.input1_zero_point, params.output_scale, + params.output_zero_point, -1, "None", "None", output_data_type); + + try { + return InferType(requantize); + } catch (tvm::Error& e) { + // Conversion produced an invalid op. + return NullOpt; + } +} + +/*! + * \brief Converts qnn.mul to a mathematically equivalent qnn.requantize operation. + * When converting to support library API, a reinterpret quantize operation will be created. + * + * \param expr The expression to attempt to convert. + * + * \return Null if conversion is not supported else the converted expression. + */ +Optional ConvertQnnAddToReinterpretQuantize(const Expr& expr) { + Call call = Downcast(expr); + const auto params = BinaryElementwiseParams::ExtractBinaryElementwiseParams(call); + + Constant input_constant = Downcast(params.input2); + TensorType input_constant_tt = Downcast(input_constant->checked_type()); + if (Downcast(input_constant_tt->Size())->value != 1) { + return NullOpt; + } + + float input_scale = GetScalarFromConstant(params.input1_scale); + int input_zero_point = GetScalarFromConstant(params.input1_zero_point); + float scalar_scale = GetScalarFromConstant(params.input2_scale); + int scalar_zero_point = GetScalarFromConstant(params.input2_zero_point); + int output_zero_point_value = GetScalarFromConstant(params.output_zero_point); + float scalar_value = (ToScalar(input_constant->data) - scalar_zero_point) * scalar_scale; + + float new_output_zero_point_value = input_zero_point - (scalar_value / input_scale); + if (new_output_zero_point_value - output_zero_point_value > 1.0f) { + // Add does not represent an identity operation so don't convert + return NullOpt; + } + + DataType output_data_type = Downcast(call->checked_type())->dtype; + + // A requantize operation is used to represent the identity reinterperet quantize op in + // the support library at this stage. That is requantize is used here as a means for + // passing the quantization information to the API conversion layer. + Expr requantize = qnn::MakeRequantize( + params.input1, params.input1_scale, params.input1_zero_point, params.output_scale, + params.output_zero_point, -1, "None", "None", output_data_type); + + try { + return InferType(requantize); + } catch (tvm::Error& e) { + // Conversion produced an invalid op. + return NullOpt; + } +} class ConvertEquivalentsMutator : public MixedModeMutator { public: @@ -184,29 +307,34 @@ class ConvertEquivalentsMutator : public MixedModeMutator { Function func = Downcast(call->op); Function new_func = Function(func); auto composite_name = func->GetAttr(attr::kComposite); - if (composite_name == "ethos-n.qnn_mul") { - Expr new_func_body = ConvertQnnMultiply(func->body); - new_func = WithFields(func, func->params, new_func_body); - new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d")); - } else if (composite_name == "ethos-n.qnn_add" && CheckCanConvertAdd(func->body)) { - Expr new_func_body = ConvertQnnAdd(func->body); - new_func = WithFields(func, func->params, new_func_body); - new_func = WithAttr(std::move(new_func), attr::kComposite, String("ethos-n.qnn_conv2d")); + + Optional optional_new_func_body; + String new_composite_name = ""; + if (composite_name == "ethos-n.qnn_mul_to_reinterpret_quantize") { + optional_new_func_body = ConvertQnnMultiplyToReinterpretQuantize(func->body); + new_composite_name = "ethos-n.qnn_reinterpret_quantize"; + } else if (composite_name == "ethos-n.qnn_mul_to_depthwise") { + optional_new_func_body = ConvertQnnMultiplyToDepthwise(func->body); + new_composite_name = "ethos-n.qnn_conv2d"; + } else if (composite_name == "ethos-n.qnn_add_to_reinterpret_quantize") { + optional_new_func_body = ConvertQnnAddToReinterpretQuantize(func->body); + new_composite_name = "ethos-n.qnn_reinterpret_quantize"; + } else if (composite_name == "ethos-n.qnn_add_to_depthwise") { + optional_new_func_body = ConvertQnnAddToDepthwise(func->body); + new_composite_name = "ethos-n.qnn_conv2d"; + } + + if (new_composite_name != "") { + ICHECK(optional_new_func_body) + << "Operation " << composite_name + << " was marked as having a valid conversion, but it could not be converted."; + new_func = WithFields(func, func->params, optional_new_func_body.value()); + new_func = WithAttr(std::move(new_func), attr::kComposite, new_composite_name); } Call new_call = WithFields(call, new_func); return Downcast(new_call); } - - private: - /*! - * \brief Check whether add can be converted to depthwise, or whether - * it should be offloaded as a normal add operation. - */ - bool CheckCanConvertAdd(const Expr& expr) { - Call call = Downcast(expr); - return call->args[0]->IsInstance() || call->args[1]->IsInstance(); - } }; tvm::transform::Pass ConvertEquivalents() { @@ -229,6 +357,18 @@ tvm::transform::Pass ConvertEquivalents() { pass_func, 0, "relay.backend.contrib.ethos-n.ConvertEquivalents", {"InferType"}); } +TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiplyToDepthwise") + .set_body_typed(ConvertQnnMultiplyToDepthwise); + +TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAddToDepthwise") + .set_body_typed(ConvertQnnAddToDepthwise); + +TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnMultiplyToReinterpretQuantize") + .set_body_typed(ConvertQnnMultiplyToReinterpretQuantize); + +TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertQnnAddToReinterpretQuantize") + .set_body_typed(ConvertQnnAddToReinterpretQuantize); + TVM_REGISTER_GLOBAL("relay.backend.contrib.ethos-n.ConvertEquivalents") .set_body_typed(ConvertEquivalents); diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc index ce57cc23419a..dbcdecd8f382 100644 --- a/src/relay/backend/contrib/ethosn/ethosn_api.cc +++ b/src/relay/backend/contrib/ethosn/ethosn_api.cc @@ -809,6 +809,42 @@ EthosnError EthosnAPI::Requantize(const Expr& expr, RequantizeParams* params) { return err; } +EthosnError EthosnAPI::ReinterpretQuantize(const Expr& expr, + ReinterpretQuantizationParams* params) { + Call call = Downcast(expr); + const auto* input_ttype = call->args[0]->checked_type().as(); + sl::TensorShape input_tensor_shape = {1, 1, 1, 1}; + sl::DataType input_data_type; + EthosnError err = Tvm2Npu(input_ttype->shape, &input_tensor_shape); + err += Tvm2Npu(input_ttype->dtype, &input_data_type); + + const auto* output_ttype = call->checked_type().as(); + sl::TensorShape output_tensor_shape = {1, 1, 1, 1}; + sl::DataType output_data_type; + err += Tvm2Npu(output_ttype->shape, &output_tensor_shape); + err += Tvm2Npu(output_ttype->dtype, &output_data_type); + + float input_sc, output_sc; + int input_zp, output_zp; + err += AsConstant(call->args[1], &input_sc); + err += AsConstant(call->args[2], &input_zp); + err += AsConstant(call->args[3], &output_sc); + err += AsConstant(call->args[4], &output_zp); + + sl::QuantizationInfo input_q_info; + err += Tvm2Npu(input_zp, input_sc, &input_q_info); + params->input_info = + sl::TensorInfo(input_tensor_shape, input_data_type, sl::DataFormat::NHWC, input_q_info); + + sl::QuantizationInfo reinterpret_quantize_q_info; + err += Tvm2Npu(output_zp, output_sc, &reinterpret_quantize_q_info); + params->reinterpret_quantize_info = sl::ReinterpretQuantizationInfo(reinterpret_quantize_q_info); + + params->output_info = sl::TensorInfo(output_tensor_shape, output_data_type, sl::DataFormat::NHWC, + reinterpret_quantize_q_info); + return err; +} + EthosnError EthosnAPI::Resize(const Expr& expr, ResizeParams* params) { Call requantize = Downcast(expr); Call resize = Downcast(requantize->args[0]); diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h b/src/relay/backend/contrib/ethosn/ethosn_api.h index 167106c3d06d..3d704f2757c6 100644 --- a/src/relay/backend/contrib/ethosn/ethosn_api.h +++ b/src/relay/backend/contrib/ethosn/ethosn_api.h @@ -157,6 +157,12 @@ struct RequantizeParams { sl::TensorInfo output_info; }; +struct ReinterpretQuantizationParams { + sl::ReinterpretQuantizationInfo reinterpret_quantize_info; + sl::TensorInfo input_info; + sl::TensorInfo output_info; +}; + struct ResizeParams { sl::ResizeInfo resize_info; sl::TensorInfo input_info; @@ -261,6 +267,16 @@ class EthosnAPI { static EthosnError Relu(const Expr& expr, ReluParams* params); /*! \brief Extract the Support Library requantize params from a Relay qnn.requantize call */ static EthosnError Requantize(const Expr& expr, RequantizeParams* params); + + /*! + * \brief Extact the Support Library reinterpret quantization params from a Relay qnn.requantize + * call. + * + * \note This is used for the conversion from add and mul to a reinterpret quantization operator. + * This is effectively an identity operation, as not the same as 'requantize'. + */ + static EthosnError ReinterpretQuantize(const Expr& expr, ReinterpretQuantizationParams* params); + /*! \brief Extract the Support Library resize params from a Relay resize call */ static EthosnError Resize(const Expr& expr, ResizeParams* params); diff --git a/tests/python/contrib/test_ethosn/test_addition.py b/tests/python/contrib/test_ethosn/test_addition.py index 72981182e17f..11d8b8d1cd56 100644 --- a/tests/python/contrib/test_ethosn/test_addition.py +++ b/tests/python/contrib/test_ethosn/test_addition.py @@ -37,6 +37,7 @@ def _get_model( dtype, lhs_is_constant=False, rhs_is_constant=False, + constant_data=None, ): """Return a model and any parameters it may have""" @@ -45,13 +46,14 @@ def _get_model( data_max = iinfo.max if lhs_is_constant: - a_data = np.random.randint(data_min, data_max + 1, size=lhs_shape, dtype=dtype) + a_data = np.array(constant_data, dtype=dtype).reshape(lhs_shape) a = relay.const(a_data, dtype=dtype) else: a = relay.var("a", shape=lhs_shape, dtype=dtype) if rhs_is_constant: - b_data = np.random.randint(data_min, data_max + 1, size=rhs_shape, dtype=dtype) + b_data = np.array(constant_data, dtype=dtype).reshape(rhs_shape) + np.random.randint(data_min, data_max + 1, size=rhs_shape, dtype=dtype) b = relay.const(b_data, dtype=dtype) else: b = relay.var("b", shape=rhs_shape, dtype=dtype) @@ -117,13 +119,15 @@ def test_addition(dtype, shape): @requires_ethosn @pytest.mark.parametrize("dtype", ["uint8", "int8"]) @pytest.mark.parametrize( - "lhs_shape,rhs_shape", + "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant", [ - ((1, 4, 4, 8), (1, 1, 1, 8)), - ((1, 16, 12, 4), (4,)), + ((1, 4, 4, 8), False, (1, 1, 1, 8), True), + ((4,), True, (1, 16, 12, 4), False), + ((1, 1, 1, 8), True, (1, 4, 4, 8), False), + ((1, 16, 12, 4), False, (4,), True), ], ) -def test_addition_to_depthwise_rhs_constant(dtype, lhs_shape, rhs_shape): +def test_addition_to_depthwise(dtype, lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant): """Compare addition to depthwise with TVM.""" np.random.seed(0) @@ -132,6 +136,9 @@ def test_addition_to_depthwise_rhs_constant(dtype, lhs_shape, rhs_shape): data_max = iinfo.max lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype) + constant_shape = lhs_shape if lhs_is_constant else rhs_shape + constant_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype) + model = _get_model( lhs_shape, rhs_shape, @@ -142,11 +149,16 @@ def test_addition_to_depthwise_rhs_constant(dtype, lhs_shape, rhs_shape): out_zp, out_sc, dtype, - lhs_is_constant=False, - rhs_is_constant=True, + lhs_is_constant=lhs_is_constant, + rhs_is_constant=rhs_is_constant, + constant_data=constant_data, ) + input_shape = rhs_shape if lhs_is_constant else lhs_shape + input_name = "b" if lhs_is_constant else "a" inputs = { - "a": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=lhs_shape, dtype=dtype)) + input_name: tvm.nd.array( + np.random.randint(data_min, data_max + 1, size=input_shape, dtype=dtype) + ) } outputs = [] for npu in [False, True]: @@ -156,21 +168,40 @@ def test_addition_to_depthwise_rhs_constant(dtype, lhs_shape, rhs_shape): @requires_ethosn -@pytest.mark.parametrize("dtype", ["uint8", "int8"]) @pytest.mark.parametrize( - "lhs_shape,rhs_shape", + "lhs_shape,lhs_is_constant,rhs_shape,rhs_is_constant", [ - ((1, 8), (1, 20, 15, 8)), + ((1, 2, 8, 4), False, None, True), + ((1, 5, 6, 7), False, (1, 1, 1, 1), True), + (None, True, (1, 2, 8, 4), False), + ((1, 1, 1, 1), True, (1, 5, 6, 7), False), ], ) -def test_addition_to_depthwise_lhs_constant(dtype, lhs_shape, rhs_shape): +def test_addition_to_reinterpret_quantize(lhs_shape, lhs_is_constant, rhs_shape, rhs_is_constant): """Compare addition to depthwise with TVM.""" np.random.seed(0) + dtype = "uint8" iinfo = np.iinfo(dtype) data_min = iinfo.min data_max = iinfo.max - lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype) + + # Add can only be offloaded as a reinterpret quantize operation if + # it is an identity operation. We must choose the quantization and + # constant data carefully to maske sure that this is the case. + if lhs_is_constant: + rhs_zp = 128 + rhs_sc = 0.0078125 + lhs_zp = 0 + lhs_sc = 0.003921568859368563 + else: + lhs_zp = 128 + lhs_sc = 0.0078125 + rhs_zp = 0 + rhs_sc = 0.003921568859368563 + out_zp = 0 + out_sc = 0.007814894430339336 + constant_data = 255 model = _get_model( lhs_shape, @@ -182,11 +213,16 @@ def test_addition_to_depthwise_lhs_constant(dtype, lhs_shape, rhs_shape): out_zp, out_sc, dtype, - lhs_is_constant=True, - rhs_is_constant=False, + lhs_is_constant=lhs_is_constant, + rhs_is_constant=rhs_is_constant, + constant_data=constant_data, ) + input_shape = rhs_shape if lhs_is_constant else lhs_shape + input_name = "b" if lhs_is_constant else "a" inputs = { - "b": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=rhs_shape, dtype=dtype)) + input_name: tvm.nd.array( + np.random.randint(data_min, data_max + 1, size=input_shape, dtype=dtype) + ) } outputs = [] for npu in [False, True]: diff --git a/tests/python/contrib/test_ethosn/test_convert_equivalents.py b/tests/python/contrib/test_ethosn/test_convert_equivalents.py index c8d1b5729d83..77777293729c 100644 --- a/tests/python/contrib/test_ethosn/test_convert_equivalents.py +++ b/tests/python/contrib/test_ethosn/test_convert_equivalents.py @@ -74,7 +74,7 @@ def before(): relay.const(output_sc, "float32"), relay.const(output_zp, "int32"), ) - composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul") + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_depthwise") return tei.make_ethosn_partition(composite) def expected(): @@ -117,6 +117,165 @@ def expected(): _assert_structural_equal(mod["ethos-n_0"], expected_mod["ethos-n_0"]) +@requires_ethosn +@pytest.mark.parametrize( + "dtype,shape,constant_shape", + [("int8", (1, 4, 4), (4,)), ("int16", (1, 16, 12, 4), (1, 1, 1, 4))], +) +def test_unsupported_multiply_to_depthwise(dtype, shape, constant_shape): + """Check that unsupported variants of multiply to depthwise are not converted.""" + np.random.seed(0) + + iinfo = np.iinfo(dtype) + data_min = iinfo.min + data_max = iinfo.max + input_zp = np.random.randint(data_min, data_max) + input_sc = np.random.random() * 2 + input2_zp = np.random.randint(data_min, data_max) + input2_sc = np.random.random() * 2 + output_zp, output_sc = tei.get_conv2d_qnn_params( + dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[-1] + ) + x = relay.var("x", shape=shape, dtype=dtype) + y_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype) + + def before(): + y = relay.const(y_data, dtype=dtype) + expr = relay.qnn.op.mul( + x, + y, + relay.const(input_sc, "float32"), + relay.const(input_zp, "int32"), + relay.const(input2_sc, "float32"), + relay.const(input2_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_depthwise") + return tei.make_ethosn_partition(composite) + + mod = before() + + error_regex = ( + r'Operation "ethos-n.qnn_mul_to_depthwise" was marked ' + r"as having a valid conversion, but it could not be converted." + ) + + with pytest.raises(tvm.TVMError, match=error_regex): + mod = ConvertEquivalents()(mod) + + +@requires_ethosn +@pytest.mark.parametrize( + "shape,constant_shape", + [((1, 4, 4, 8), (1, 1, 1, 1)), ((1, 16, 12, 4), None)], +) +@pytest.mark.parametrize("reverse_inputs", [True, False]) +def test_multiply_to_reinterpret_quantize(shape, constant_shape, reverse_inputs): + """Check that multiply is correctly converted to a reinterpret quantize operation.""" + np.random.seed(0) + + dtype = "uint8" + + # Multiply can only be offloaded as a reinterpret quantize operation if + # it is an identity option. We must choose the quantization and constant + # data carefully to make sure that this is the case. + input_zp = 0 + input_sc = 0.007814894430339336 + input2_zp = 0 + input2_sc = 0.5 + output_zp = 0 + output_sc = 0.9963990449905396 + constant_data = 255 + + x = relay.var("x", shape=shape, dtype=dtype) + y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape) + + def before(): + y = relay.const(y_data, dtype=dtype) + expr = relay.qnn.op.mul( + y if reverse_inputs else x, + x if reverse_inputs else y, + relay.const(input2_sc if reverse_inputs else input_sc, "float32"), + relay.const(input2_zp if reverse_inputs else input_zp, "int32"), + relay.const(input_sc if reverse_inputs else input2_sc, "float32"), + relay.const(input_zp if reverse_inputs else input2_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_reinterpret_quantize") + return tei.make_ethosn_partition(composite) + + def expected(): + expr = relay.qnn.op.requantize( + x, + relay.const(input_sc, "float32"), + relay.const(input_zp if reverse_inputs else input_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + out_dtype=dtype, + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_reinterpret_quantize") + return tei.make_ethosn_partition(composite) + + mod = before() + mod = ConvertEquivalents()(mod) + expected_mod = expected() + _assert_structural_equal(mod["ethos-n_0"], expected_mod["ethos-n_0"]) + + +@requires_ethosn +@pytest.mark.parametrize( + "dtype,shape,constant_shape", + [("int16", (1, 16, 12, 4), None)], +) +def test_unsupported_multiply_to_reinterpret_quantize(dtype, shape, constant_shape): + """ + Check that unsupported variants of multiply conversion to reinterpret + quantize are not converted. + """ + np.random.seed(0) + + # Multiply can only be offloaded as a reinterpret quantize operation if + # it is an identity option. We must choose the quantization and constant + # data carefully to make sure that this is the case. + input_zp = 0 + input_sc = 0.007814894430339336 + input2_zp = 0 + input2_sc = 0.5 + output_zp = 0 + output_sc = 0.9963990449905396 + constant_data = 255 + + x = relay.var("x", shape=shape, dtype=dtype) + y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape) + + def before(): + y = relay.const(y_data, dtype=dtype) + expr = relay.qnn.op.mul( + x, + y, + relay.const(input_sc, "float32"), + relay.const(input_zp, "int32"), + relay.const(input2_sc, "float32"), + relay.const(input2_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_mul_to_reinterpret_quantize") + return tei.make_ethosn_partition(composite) + + mod = before() + + error_regex = ( + r'Operation "ethos-n.qnn_mul_to_reinterpret_quantize" was marked ' + r"as having a valid conversion, but it could not be converted." + ) + + with pytest.raises(tvm.TVMError, match=error_regex): + mod = ConvertEquivalents()(mod) + + @requires_ethosn @pytest.mark.parametrize("reverse_inputs", [True, False]) def test_add_to_depthwise(reverse_inputs): @@ -148,7 +307,7 @@ def before(): output_scale=relay.const(out_sc, "float32"), output_zero_point=relay.const(out_zp, "int32"), ) - composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add") + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_depthwise") return tei.make_ethosn_partition(composite) class ConversionChecker(ExprVisitor): @@ -176,3 +335,158 @@ def visit_call(self, call): mod = before() mod = ConvertEquivalents()(mod) mod = ConversionChecker().visit(mod["ethos-n_0"].body.op) + + +@requires_ethosn +@pytest.mark.parametrize( + "dtype,lhs_shape,rhs_shape", [("uint8", (1, 4, 4), (1, 1, 4)), ("int16", (1, 4, 4, 4), (4,))] +) +def test_unsupported_add_to_depthwise(dtype, lhs_shape, rhs_shape): + """Check that unsupported variants of add are not converted.""" + np.random.seed(0) + + iinfo = np.iinfo(dtype) + data_min = iinfo.min + data_max = iinfo.max + lhs_zp, lhs_sc, rhs_zp, rhs_sc, out_zp, out_sc = _get_addition_qnn_params(dtype) + + x = relay.var("x", shape=lhs_shape, dtype=dtype) + y_data = np.random.randint(data_min, data_max + 1, size=rhs_shape, dtype=dtype) + + def before(): + y = relay.const(y_data) + expr = relay.qnn.op.add( + lhs=x, + rhs=y, + lhs_scale=relay.const(lhs_sc, "float32"), + lhs_zero_point=relay.const(lhs_zp, "int32"), + rhs_scale=relay.const(rhs_sc, "float32"), + rhs_zero_point=relay.const(rhs_zp, "int32"), + output_scale=relay.const(out_sc, "float32"), + output_zero_point=relay.const(out_zp, "int32"), + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_depthwise") + return tei.make_ethosn_partition(composite) + + mod = before() + + error_regex = ( + r'Operation "ethos-n.qnn_add_to_depthwise" was marked ' + r"as having a valid conversion, but it could not be converted." + ) + + with pytest.raises(tvm.TVMError, match=error_regex): + mod = ConvertEquivalents()(mod) + + +@requires_ethosn +@pytest.mark.parametrize( + "shape,constant_shape", + [ + ((1, 4, 4, 8), (1, 1, 1, 1)), + ((1, 16, 12, 4), None), + ], +) +@pytest.mark.parametrize("reverse_inputs", [True, False]) +def test_add_to_reinterpret_quantize(shape, constant_shape, reverse_inputs): + """Check that add is correctly converted to a reinterpret quantize operation.""" + np.random.seed(0) + + dtype = "uint8" + + # Add can only be offloaded as a reinterpret quantize operation if + # it is an identity option. We must choose the quantization and constant + # data carefully to make sure that this is the case. + input_zp = 128 + input_sc = 0.0078125 + input2_zp = 0 + input2_sc = 0.003921568859368563 + output_zp = 0 + output_sc = 0.007814894430339336 + constant_data = 255 + + x = relay.var("x", shape=shape, dtype=dtype) + y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape) + + def before(): + y = relay.const(y_data, dtype=dtype) + expr = relay.qnn.op.add( + y if reverse_inputs else x, + x if reverse_inputs else y, + relay.const(input2_sc if reverse_inputs else input_sc, "float32"), + relay.const(input2_zp if reverse_inputs else input_zp, "int32"), + relay.const(input_sc if reverse_inputs else input2_sc, "float32"), + relay.const(input_zp if reverse_inputs else input2_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_reinterpret_quantize") + return tei.make_ethosn_partition(composite) + + def expected(): + expr = relay.qnn.op.requantize( + x, + relay.const(input_sc, "float32"), + relay.const(input_zp if reverse_inputs else input_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + out_dtype=dtype, + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_reinterpret_quantize") + return tei.make_ethosn_partition(composite) + + mod = before() + mod = ConvertEquivalents()(mod) + expected_mod = expected() + _assert_structural_equal(mod["ethos-n_0"], expected_mod["ethos-n_0"]) + + +@requires_ethosn +@pytest.mark.parametrize( + "dtype,shape,constant_shape", + [ + ("int16", (1, 16, 12, 4), None), + ], +) +def test_unsupported_add_to_reinterpret_quantize(dtype, shape, constant_shape): + """Check that unsupported variants of add to reinterpret quantize are not converted.""" + np.random.seed(0) + + # Add can only be offloaded as a reinterpret quantize operation if + # it is an identity option. We must choose the quantization and constant + # data carefully to make sure that this is the case. + input_zp = 128 + input_sc = 0.0078125 + input2_zp = 0 + input2_sc = 0.003921568859368563 + output_zp = 0 + output_sc = 0.007814894430339336 + constant_data = 255 + + x = relay.var("x", shape=shape, dtype=dtype) + y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape) + + def before(): + y = relay.const(y_data, dtype=dtype) + expr = relay.qnn.op.add( + x, + y, + relay.const(input_sc, "float32"), + relay.const(input_zp, "int32"), + relay.const(input2_sc, "float32"), + relay.const(input2_zp, "int32"), + relay.const(output_sc, "float32"), + relay.const(output_zp, "int32"), + ) + composite = tei.make_ethosn_composite(expr, "ethos-n.qnn_add_to_reinterpret_quantize") + return tei.make_ethosn_partition(composite) + + mod = before() + + error_regex = ( + r'Operation "ethos-n.qnn_add_to_reinterpret_quantize" was marked ' + r"as having a valid conversion, but it could not be converted." + ) + + with pytest.raises(tvm.TVMError, match=error_regex): + mod = ConvertEquivalents()(mod) diff --git a/tests/python/contrib/test_ethosn/test_multiply.py b/tests/python/contrib/test_ethosn/test_multiply.py index cb95a97db529..41c06092447a 100644 --- a/tests/python/contrib/test_ethosn/test_multiply.py +++ b/tests/python/contrib/test_ethosn/test_multiply.py @@ -38,13 +38,17 @@ def _get_model( output_sc, dtype, reverse_inputs=False, + constant_data=None, ): iinfo = np.iinfo(dtype) data_min = iinfo.min data_max = iinfo.max x = relay.var("x", shape=shape, dtype=dtype) - y_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype) + if constant_data: + y_data = np.array(constant_data, dtype=dtype).reshape(constant_shape) + else: + y_data = np.random.randint(data_min, data_max + 1, size=constant_shape, dtype=dtype) y = relay.const(y_data, dtype=dtype) out = relay.qnn.op.mul( @@ -64,11 +68,12 @@ def _get_model( @requires_ethosn @pytest.mark.parametrize("dtype", ["uint8", "int8"]) @pytest.mark.parametrize( - "shape,constant_shape", [((1, 4, 4, 8), (1, 1, 1, 8)), ((1, 16, 12, 4), (4,))] + "shape,constant_shape", + [((1, 4, 4, 8), (1, 1, 1, 8)), ((1, 16, 12, 4), (4,))], ) @pytest.mark.parametrize("reverse_inputs", [False, True]) -def test_multiply(dtype, shape, constant_shape, reverse_inputs): - """Compare Multiply output with TVM.""" +def test_multiply_to_depthwise(dtype, shape, constant_shape, reverse_inputs): + """Compare Multiply -> Depthwise conversion output with TVM.""" np.random.seed(0) @@ -104,6 +109,53 @@ def test_multiply(dtype, shape, constant_shape, reverse_inputs): tei.verify(outputs, dtype, 1) +@requires_ethosn +@pytest.mark.parametrize( + "shape,constant_shape", [((1, 4, 5, 8), (1, 1, 1, 1)), ((1, 3, 7, 10), None)] +) +@pytest.mark.parametrize("reverse_inputs", [False, True]) +def test_multiply_to_reinterpret_quantize(shape, constant_shape, reverse_inputs): + """Compare Multiply -> Reinterpret Quantize conversion output with TVM.""" + np.random.seed(0) + + dtype = "uint8" + iinfo = np.iinfo(dtype) + data_min = iinfo.min + data_max = iinfo.max + + # Multiply can only be offloaded as a reinterpret quantize operation if + # it is an identity option. We must choose the quantization and constant + # data carefully to make sure that this is the case. + input_zp = 0 + input_sc = 0.007814894430339336 + input2_zp = 0 + input2_sc = 0.5 + output_zp = 0 + output_sc = 0.9963990449905396 + constant_data = 255 + + model, params = _get_model( + shape, + constant_shape, + input_zp, + input_sc, + input2_zp, + input2_sc, + output_zp, + output_sc, + dtype, + reverse_inputs, + constant_data, + ) + inputs = {"x": tvm.nd.array(np.random.randint(data_min, data_max + 1, size=shape, dtype=dtype))} + outputs = [] + for npu in [False, True]: + mod = tei.make_module(model, params) + outputs.append(tei.build_and_run(mod, inputs, 1, params, npu=npu)) + + tei.verify(outputs, dtype, 1) + + @requires_ethosn def test_multiply_multiple_inputs_unsupported(): """Check multiply operator with two inputs is not offloaded.""" @@ -151,14 +203,19 @@ def test_multiply_multiple_inputs_unsupported(): @requires_ethosn -def test_multiply_unsupported_datatype(): - """Check multiply operator with unsupported datatype is not offloaded.""" +@pytest.mark.parametrize( + "dtype,shape,constant_shape", + [ + ("int16", (1, 4, 5, 6), (1, 1, 1, 6)), + ("int8", (1, 1, 3), (1, 1, 1, 3)), + ("int8", (1, 2, 4, 8), (1, 2, 4, 8)), + ], +) +def test_multiply_unsupported(dtype, shape, constant_shape): + """Check multiply operator with unsupported attributes is not offloaded.""" np.random.seed(0) - shape = (1, 4, 5, 6) - dtype = "int16" - iinfo = np.iinfo(dtype) data_min = iinfo.min data_max = iinfo.max @@ -167,20 +224,21 @@ def test_multiply_unsupported_datatype(): input2_zp = np.random.randint(data_min, data_max) input2_sc = np.random.random() * 2 output_zp, output_sc = tei.get_conv2d_qnn_params( - dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[3] + dtype, input_zp, input_sc, input2_zp, input2_sc, 1, 1, shape[-1] ) - x = relay.var("x", shape=shape, dtype=dtype) - y = relay.var("y", shape=shape, dtype=dtype) - model = relay.qnn.op.mul( - x, - y, - relay.const(input_sc, "float32"), - relay.const(input_zp, "int32"), - relay.const(input2_sc, "float32"), - relay.const(input2_zp, "int32"), - relay.const(output_sc, "float32"), - relay.const(output_zp, "int32"), + model, params = _get_model( + shape, + constant_shape, + input_zp, + input_sc, + input2_zp, + input2_sc, + output_zp, + output_sc, + dtype, + reverse_inputs=False, + constant_data=False, ) expected_host_ops = 1 @@ -189,7 +247,7 @@ def test_multiply_unsupported_datatype(): mod = tei.make_module(model, {}) tei.build( mod, - {}, + params, npu=npu, expected_host_ops=expected_host_ops, npu_partitions=npu_partitions, From e3d235ecf2ae92bffcf702eaf3781c0e6e7ab476 Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Tue, 27 Sep 2022 14:30:33 +0000 Subject: [PATCH 2/2] update resnet hash Change-Id: Ia3377b275639e71273fbfe5cfdb2fb5214335987 --- tests/python/contrib/test_ethosn/test_networks.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/python/contrib/test_ethosn/test_networks.py b/tests/python/contrib/test_ethosn/test_networks.py index 2e6b52927769..54ca44805171 100644 --- a/tests/python/contrib/test_ethosn/test_networks.py +++ b/tests/python/contrib/test_ethosn/test_networks.py @@ -144,7 +144,11 @@ def test_resnet_50_int8(): # codegen, which could come about from either a change in Support Library # version or a change in the Ethos-N codegen. To update this requires running # on hardware that isn't available in CI. - _compile_hash = {"12d65aec33594c88b6d0d31dcd5144e6", "6a64d69ccb36dfb6b30dd2abdba4b005"} + _compile_hash = { + "6b130a99397715156d5fb833809a92d2", + "6e5fcbab831607b9da1039aff4e56871", + "41acecca37b2735bd580f6ec38d8c2e0", + } _test_image_network( model_url="https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/" "models/Quantized/resnet_50_quantized.tflite", @@ -152,8 +156,8 @@ def test_resnet_50_int8(): input_dict={"input": (1, 224, 224, 3)}, compile_hash=_compile_hash, output_count=1, - host_ops=10, - npu_partitions=2, + host_ops=9, + npu_partitions=3, )