diff --git a/python/tvm/topi/adreno/conv2d_alter_op.py b/python/tvm/topi/adreno/conv2d_alter_op.py index 6cf749a62b27..cf72cc2a846e 100644 --- a/python/tvm/topi/adreno/conv2d_alter_op.py +++ b/python/tvm/topi/adreno/conv2d_alter_op.py @@ -143,8 +143,11 @@ def _alter_conv2d_layout(attrs, inputs, tinfos, out_type): CO, _, KH, KW = get_const_tuple(kernel_tensor.shape) # pre-compute weight transformation in winograd + # alpha, alpha, CO, CI weight = relay.nn.contrib_conv2d_winograd_weight_transform(inputs[1], tile_size=tile_size) weight = relay.transpose(weight, axes=[2, 3, 0, 1]) # HWOI -> OIHW + # (oc, ic, h, w) -> (h, w, ic, oc) + new_attrs["kernel_layout"] = "HWIO" new_attrs["tile_size"] = tile_size new_attrs["channels"] = CO diff --git a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py index 504a2b4e3ed3..4e813b1ffe84 100644 --- a/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nchw_texture.py @@ -1029,3 +1029,36 @@ def test_conv2d_different_lowering_same_op(target, dtype): ] build_run_compare(mod, params1, {"data": input_shape}, dtype, target, static_memory_scope) + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_winograd_non_rect(target, dtype): + input_shape = (1, 771, 36, 64) + A = relay.var("data", shape=input_shape, dtype=dtype) + filter_shape = (128, 771, 3, 3) + B = relay.var("weight", shape=filter_shape, dtype=dtype) + D = relay.nn.conv2d( + A, B, padding=[1, 1, 1, 1], channels=128, kernel_size=[3, 3], out_dtype=dtype + ) + + mod = relay.Function([A, B], D) + np.random.seed(1) + initializer = relay.testing.init.Xavier() + filter_data = np.zeros(filter_shape).astype(dtype) + initializer("weight", filter_data) + params1 = { + "weight": tvm.nd.array(filter_data), + } + + temp = utils.tempdir() + stat_file = temp.relpath("stat.log") + with open(stat_file, "w") as f: + f.write( + f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256 -texture_spatial_limit=16384 -thread_warp_size=1", "conv2d_nchw_winograd.image2d", [["TENSOR", [1, 771, 36, 64], "{dtype}"], ["TENSOR", [128, 771, 3, 3], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 5399, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 16], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 8]], ["tile_rc", "sp", [-1, 193]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n' + ) + graph = build_run_compare( + mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file + ) + matches = re.findall("winograd", graph) + assert len(matches) > 0 diff --git a/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py index 37c22137f035..0b89e3dc9c7f 100644 --- a/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py +++ b/tests/python/relay/opencl_texture/test_conv2d_nhwc_texture.py @@ -581,3 +581,104 @@ def test_conv2d_vgg16_winograd_4d(target, dtype): ) matches = re.findall("winograd", graph) assert len(matches) > 0 + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_winograd_conv(target, dtype): + input_shape = (1, 3, 3, 4) + A = relay.var("data", shape=input_shape, dtype=dtype) + filter_shape3 = (3, 3, 4, 8) + bias_shape3 = (1, 1, 1, 8) + B3 = relay.var("weight3", shape=filter_shape3, dtype=dtype) + D = relay.nn.conv2d( + A, + B3, + data_layout="NHWC", + kernel_layout="HWIO", + padding=[1, 1, 1, 1], + channels=8, + kernel_size=[3, 3], + out_dtype=dtype, + ) + + filter_shape4 = (3, 3, 8, 8) + bias_shape4 = (1, 1, 1, 8) + B4 = relay.var("weight4", shape=filter_shape4, dtype=dtype) + D = relay.nn.conv2d( + D, + B4, + data_layout="NHWC", + kernel_layout="HWIO", + padding=[1, 1, 1, 1], + channels=8, + kernel_size=[3, 3], + out_dtype=dtype, + ) + mod = relay.Function([A, B3, B4], D) + np.random.seed(1) + initializer = relay.testing.init.Xavier() + filter_data3 = np.zeros(filter_shape3).astype(dtype) + bias_data3 = np.zeros(bias_shape3).astype(dtype) + filter_data4 = np.zeros(filter_shape4).astype(dtype) + bias_data4 = np.zeros(bias_shape4).astype(dtype) + initializer("weight", filter_data3) + initializer("bias", bias_data3) + initializer("weight", filter_data4) + initializer("bias", bias_data4) + params1 = { + "weight3": tvm.nd.array(filter_data3), + "weight4": tvm.nd.array(filter_data4), + } + + temp = utils.tempdir() + stat_file = temp.relpath("stat.log") + with open(stat_file, "w") as f: + f.write( + f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 3, 3, 4], "{dtype}"], ["TENSOR", [3, 3, 4, 8], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 1591, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 4], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 2]], ["tile_rc", "sp", [-1, 8]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n' + ) + graph = build_run_compare( + mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file + ) + matches = re.findall("winograd", graph) + assert len(matches) > 0 + + +@tvm.testing.requires_opencl +@tvm.testing.parametrize_targets("opencl -device=adreno") +def test_conv2d_winograd_non_rect(target, dtype): + input_shape = (1, 36, 64, 771) + A = relay.var("data", shape=input_shape, dtype=dtype) + filter_shape = (3, 3, 771, 128) + B = relay.var("weight", shape=filter_shape, dtype=dtype) + D = relay.nn.conv2d( + A, + B, + data_layout="NHWC", + kernel_layout="HWIO", + padding=[1, 1, 1, 1], + channels=128, + kernel_size=[3, 3], + out_dtype=dtype, + ) + + mod = relay.Function([A, B], D) + np.random.seed(1) + initializer = relay.testing.init.Xavier() + filter_data = np.zeros(filter_shape).astype(dtype) + initializer("weight", filter_data) + params1 = { + "weight": tvm.nd.array(filter_data), + } + + temp = utils.tempdir() + stat_file = temp.relpath("stat.log") + with open(stat_file, "w") as f: + f.write( + f'{{"input": ["opencl -keys=adreno,opencl,gpu -device=adreno -max_num_threads=256 -texture_spatial_limit=16384 -thread_warp_size=1", "conv2d_nhwc_winograd.image2d", [["TENSOR", [1, 36, 64, 771], "{dtype}"], ["TENSOR", [3, 3, 771, 128], "{dtype}"], [1, 1], [1, 1, 1, 1], [1, 1], "{dtype}"], {{}}], "config": {{"index": 5399, "code_hash": null, "entity": [["auto_unroll_max_step", "ot", 16], ["tile_y", "sp", [-1, 1, 32]], ["tile_x", "sp", [-1, 4, 8]], ["tile_rc", "sp", [-1, 193]]]}}, "result": [[0.0037244], 0, 7.06374192237854, 1653898629.7427933], "version": 0.2, "tvm_version": "0.8.dev0"}}\n' + ) + graph = build_run_compare( + mod, params1, {"data": input_shape}, dtype, target, stat_file=stat_file + ) + matches = re.findall("winograd", graph) + assert len(matches) > 0