diff --git a/python/tvm/autotvm/task/topi_integration.py b/python/tvm/autotvm/task/topi_integration.py index 32d8674640ed..307d44810c79 100644 --- a/python/tvm/autotvm/task/topi_integration.py +++ b/python/tvm/autotvm/task/topi_integration.py @@ -233,7 +233,10 @@ def wrapper(outs, *args, **kwargs): """wrapper function for topi schedule""" workload = get_workload(outs, task_name) if workload is None: - raise RuntimeError("Cannot find workload in attribute of this schedule") + raise RuntimeError( + f"Cannot find TOPI workload {task_name}. " + "Is it registered with `register_topi_compute`?" + ) tgt = Target.current() cfg = DispatchContext.current.query(tgt, workload) return topi_schedule(cfg, outs, *args, **kwargs) @@ -253,7 +256,7 @@ def traverse(tensors): for t in tensors: op = t.op wkl = traverse(op.input_tensors) - if wkl: + if wkl is not None: return wkl if "workload" in op.attrs: diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py index 4ba9c07e4f80..b8bbcf89df60 100644 --- a/python/tvm/relay/frontend/onnx.py +++ b/python/tvm/relay/frontend/onnx.py @@ -526,23 +526,6 @@ def _impl_v1(cls, inputs, attr, params): raise tvm.error.OpAttributeInvalid(msg.format(attr["auto_pad"])) attr.pop("auto_pad") - # Check if the requested convolution is a group conv1d, if so convert it to conv2d. - # TODO(jwfromm) Remove once proper group_conv1d is supported. - group_conv1d = False - if dimension_picker("conv")(attr) == "conv1d" and attr.get("group") != 1: - group_conv1d = True - # Expand input from NCW to NCHW - data = _op.expand_dims(data, axis=2) - # Expand kernel from OIW to OIHW - kernel = _op.expand_dims(kernel, axis=2) - # Add new value to kernel_shape, strices, dilation, pads, if needed - attr["kernel_shape"] = [1] + list(attr["kernel_shape"]) - if "strides" in attr: - attr["strides"] = [1] + list(attr["strides"]) - if "dilations" in attr: - attr["dilations"] = [1] + list(attr["dilations"]) - if "pads" in attr: - attr["pads"] = [0, attr["pads"][0], 0, attr["pads"][1]] attr["channels"] = kernel_shapes[0][0] out = AttrCvt( op_name=dimension_picker("conv"), @@ -555,10 +538,6 @@ def _impl_v1(cls, inputs, attr, params): custom_check=dimension_constraint(), )([data, kernel], attr, params) - # If this was a group_conv1d, squish output back to NCW. - if group_conv1d: - out = _op.squeeze(out, axis=[2]) - use_bias = len(inputs) == 3 if use_bias: out = _op.nn.bias_add(out, inputs[2]) diff --git a/python/tvm/relay/op/strategy/cuda.py b/python/tvm/relay/op/strategy/cuda.py index 607b2a59045a..54c78d4d8cd9 100644 --- a/python/tvm/relay/op/strategy/cuda.py +++ b/python/tvm/relay/op/strategy/cuda.py @@ -689,20 +689,36 @@ def conv1d_strategy_cuda(attrs, inputs, out_type, target): if dilation[0] < 1: raise ValueError("dilation should be a positive value") strategy = _op.OpStrategy() - if layout == "NCW": - strategy.add_implementation( - wrap_compute_conv1d(topi.cuda.conv1d_ncw), - wrap_topi_schedule(topi.cuda.schedule_conv1d_ncw), - name="conv1d_ncw.cuda", - ) - elif layout == "NWC": - strategy.add_implementation( - wrap_compute_conv1d(topi.cuda.conv1d_nwc), - wrap_topi_schedule(topi.cuda.schedule_conv1d_nwc), - name="conv1d_nwc.cuda", - ) + if attrs.groups == 1: + if layout == "NCW": + strategy.add_implementation( + wrap_compute_conv1d(topi.cuda.conv1d_ncw), + wrap_topi_schedule(topi.cuda.schedule_conv1d_ncw), + name="conv1d_ncw.cuda", + ) + elif layout == "NWC": + strategy.add_implementation( + wrap_compute_conv1d(topi.cuda.conv1d_nwc), + wrap_topi_schedule(topi.cuda.schedule_conv1d_nwc), + name="conv1d_nwc.cuda", + ) + else: + raise ValueError("Unsupported conv1d layout {}".format(layout)) else: - raise ValueError("Unsupported conv1d layout {}".format(layout)) + if layout == "NCW": + strategy.add_implementation( + wrap_compute_group_conv1d(topi.cuda.group_conv1d_ncw), + wrap_topi_schedule(topi.cuda.schedule_group_conv1d_ncw), + name="group_conv1d_ncw.cuda", + ) + elif layout == "NWC": + strategy.add_implementation( + wrap_compute_group_conv1d(topi.cuda.group_conv1d_nwc), + wrap_topi_schedule(topi.cuda.schedule_group_conv1d_nwc), + name="group_conv1d_nwc.cuda", + ) + else: + raise ValueError("Unsupported conv1d layout {}".format(layout)) return strategy diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py index 461e755f5212..d7f0dda92c6d 100644 --- a/python/tvm/relay/op/strategy/generic.py +++ b/python/tvm/relay/op/strategy/generic.py @@ -637,6 +637,49 @@ def conv1d_strategy(attrs, inputs, out_type, target): return strategy +def wrap_compute_group_conv1d(topi_compute): + """wrap conv1d topi compute""" + + def _compute_group_conv1d(attrs, inputs, out_type): + """Compute definition of conv1d""" + strides = get_const_tuple(attrs.strides) + padding = get_const_tuple(attrs.padding) + dilation = get_const_tuple(attrs.dilation) + out_dtype = attrs.out_dtype + out_dtype = inputs[0].dtype if out_dtype in ("same", "") else out_dtype + return [ + topi_compute(inputs[0], inputs[1], strides, padding, dilation, attrs.groups, out_dtype) + ] + + return _compute_group_conv1d + + +@override_native_generic_func("group_conv1d_strategy") +def group_conv1d_strategy(attrs, inputs, out_type, target): + """group_conv1d generic strategy""" + logger.warning("group_conv1d is not optimized for this platform.") + layout = attrs.data_layout + dilation = get_const_tuple(attrs.dilation) + if dilation[0] < 1: + raise ValueError("dilation should be a positive value") + strategy = _op.OpStrategy() + if layout == "NCW": + strategy.add_implementation( + wrap_compute_conv1d(topi.nn.group_conv1d_ncw), + wrap_topi_schedule(topi.generic.schedule_group_conv1d_ncw), + name="group_conv1d_ncw.generic", + ) + elif layout == "NWC": + strategy.add_implementation( + wrap_compute_conv1d(topi.nn.group_conv1d_nwc), + wrap_topi_schedule(topi.generic.schedule_group_conv1d_nwc), + name="group_conv1d_nwc.generic", + ) + else: + raise ValueError("Unsupported conv1d layout {}".format(layout)) + return strategy + + # conv1d_transpose def wrap_compute_conv1d_transpose(topi_compute): """wrap conv1d_transpose topi compute""" diff --git a/python/tvm/relay/op/strategy/x86.py b/python/tvm/relay/op/strategy/x86.py index a421b120fab4..20b674966131 100644 --- a/python/tvm/relay/op/strategy/x86.py +++ b/python/tvm/relay/op/strategy/x86.py @@ -360,24 +360,41 @@ def conv3d_strategy_cpu(attrs, inputs, out_type, target): def conv1d_strategy_cpu(attrs, inputs, out_type, target): """conv1d x86 strategy""" layout = attrs.data_layout + groups = attrs.groups dilation = get_const_tuple(attrs.dilation) if dilation[0] < 1: raise ValueError("dilation should be a positive value") strategy = _op.OpStrategy() - if layout == "NCW": - strategy.add_implementation( - wrap_compute_conv1d(topi.nn.conv1d_ncw), - wrap_topi_schedule(topi.x86.schedule_conv1d_ncw), - name="conv1d_ncw.x86", - ) - elif layout == "NWC": - strategy.add_implementation( - wrap_compute_conv1d(topi.nn.conv1d_nwc), - wrap_topi_schedule(topi.x86.schedule_conv1d_nwc), - name="conv1d_nwc.x86", - ) + if groups == 1: + if layout == "NCW": + strategy.add_implementation( + wrap_compute_conv1d(topi.nn.conv1d_ncw), + wrap_topi_schedule(topi.x86.schedule_conv1d_ncw), + name="conv1d_ncw.x86", + ) + elif layout == "NWC": + strategy.add_implementation( + wrap_compute_conv1d(topi.nn.conv1d_nwc), + wrap_topi_schedule(topi.x86.schedule_conv1d_nwc), + name="conv1d_nwc.x86", + ) + else: + raise ValueError("Unsupported conv1d layout {}".format(layout)) else: - raise ValueError("Unsupported conv1d layout {}".format(layout)) + if layout == "NCW": + strategy.add_implementation( + wrap_compute_group_conv1d(topi.nn.group_conv1d_ncw), + wrap_topi_schedule(topi.x86.schedule_group_conv1d_ncw), + name="group_conv1d_ncw.x86", + ) + elif layout == "NWC": + strategy.add_implementation( + wrap_compute_group_conv1d(topi.nn.group_conv1d_nwc), + wrap_topi_schedule(topi.x86.schedule_group_conv1d_nwc), + name="group_conv1d_nwc.x86", + ) + else: + raise ValueError("Unsupported conv1d layout {}".format(layout)) return strategy diff --git a/python/tvm/topi/cuda/conv1d.py b/python/tvm/topi/cuda/conv1d.py index e50913d88df2..b2fc4ca02dc9 100644 --- a/python/tvm/topi/cuda/conv1d.py +++ b/python/tvm/topi/cuda/conv1d.py @@ -29,8 +29,7 @@ def conv1d_ncw(cfg, data, kernel, strides, padding, dilation, out_dtype="float32 return nn.conv1d_ncw(data, kernel, strides, padding, dilation, out_dtype) -@autotvm.register_topi_schedule("conv1d_ncw.cuda") -def schedule_conv1d_ncw(cfg, outs): +def _schedule_conv1d_ncw(cfg, outs): """TOPI schedule callback of conv1d ncw for cuda gpu Parameters @@ -51,7 +50,7 @@ def schedule_conv1d_ncw(cfg, outs): s = te.create_schedule([x.op for x in outs]) def _callback(op): - if op.tag == "conv1d_ncw": + if op.tag == "conv1d_ncw" or op.tag == "group_conv1d_ncw": pad_data = op.input_tensors[0] kernel = op.input_tensors[1] conv = op.output(0) @@ -140,13 +139,27 @@ def _callback(op): return s +@autotvm.register_topi_schedule("conv1d_ncw.cuda") +def schedule_conv1d_ncw(cfg, outs): + return _schedule_conv1d_ncw(cfg, outs) + + +@autotvm.register_topi_compute("group_conv1d_ncw.cuda") +def group_conv1d_ncw(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"): + return nn.group_conv1d_ncw(data, kernel, strides, padding, dilation, groups, out_dtype) + + +@autotvm.register_topi_schedule("group_conv1d_ncw.cuda") +def schedule_group_conv1d_ncw(cfg, outs): + return _schedule_conv1d_ncw(cfg, outs) + + @autotvm.register_topi_compute("conv1d_nwc.cuda") def conv1d_nwc(cfg, data, kernel, strides, padding, dilation, out_dtype="float32"): return nn.conv1d_nwc(data, kernel, strides, padding, dilation, out_dtype) -@autotvm.register_topi_schedule("conv1d_nwc.cuda") -def schedule_conv1d_nwc(cfg, outs): +def _schedule_conv1d_nwc(cfg, outs): """TOPI schedule callback of conv1d nwc for cuda gpu Parameters @@ -167,7 +180,7 @@ def schedule_conv1d_nwc(cfg, outs): s = te.create_schedule([x.op for x in outs]) def _callback(op): - if op.tag == "conv1d_nwc": + if op.tag == "conv1d_nwc" or op.tag == "group_conv1d_nwc": pad_data = op.input_tensors[0] kernel = op.input_tensors[1] conv = op.output(0) @@ -254,3 +267,18 @@ def _callback(op): traverse_inline(s, outs[0].op, _callback) return s + + +@autotvm.register_topi_schedule("conv1d_nwc.cuda") +def schedule_conv1d_nwc(cfg, outs): + return _schedule_conv1d_nwc(cfg, outs) + + +@autotvm.register_topi_compute("group_conv1d_nwc.cuda") +def group_conv1d_nwc(cfg, data, kernel, strides, padding, dilation, groups, out_dtype="float32"): + return nn.group_conv1d_nwc(data, kernel, strides, padding, dilation, groups, out_dtype) + + +@autotvm.register_topi_schedule("group_conv1d_nwc.cuda") +def schedule_group_conv1d_nwc(cfg, outs): + return _schedule_conv1d_nwc(cfg, outs) diff --git a/python/tvm/topi/generic/nn.py b/python/tvm/topi/generic/nn.py index ba63c539133e..4226c6caf23c 100644 --- a/python/tvm/topi/generic/nn.py +++ b/python/tvm/topi/generic/nn.py @@ -54,6 +54,40 @@ def schedule_conv1d_nwc(outs): return _default_schedule(outs, False) +def schedule_group_conv1d_ncw(outs): + """Schedule for group_conv1d_ncw + + Parameters + ---------- + outs: Array of Tensor + The computation graph description of group_conv1d_ncw + in the format of an array of tensors. + + Returns + ------- + sch: Schedule + The computation schedule for the op. + """ + return _default_schedule(outs, False) + + +def schedule_group_conv1d_nwc(outs): + """Schedule for group_conv1d_nwc + + Parameters + ---------- + outs: Array of Tensor + The computation graph description of group_conv1d_nwc + in the format of an array of tensors. + + Returns + ------- + sch: Schedule + The computation schedule for the op. + """ + return _default_schedule(outs, False) + + def schedule_conv2d_hwcn(outs): """Schedule for conv2d_hwcn diff --git a/python/tvm/topi/nn/conv1d.py b/python/tvm/topi/nn/conv1d.py index 8fdf3f8918ee..0a1efa35655f 100644 --- a/python/tvm/topi/nn/conv1d.py +++ b/python/tvm/topi/nn/conv1d.py @@ -16,10 +16,7 @@ # under the License. # pylint: disable=invalid-name, unused-variable, unused-argument """1D convolution operators.""" -from tvm import te -from .pad import pad -from ..utils import simplify -from .utils import get_pad_tuple1d +from .conv2d import conv def conv1d(data, kernel, strides=1, padding="VALID", dilation=1, layout="NCW", out_dtype=None): @@ -50,30 +47,31 @@ def conv1d(data, kernel, strides=1, padding="VALID", dilation=1, layout="NCW", o out_dtype : str The output data type. If None then output is same type as input. """ - if out_dtype is None: - out_dtype = data.dtype - if isinstance(strides, (tuple, list)): - strides = strides[0] - if isinstance(dilation, (tuple, list)): - dilation = dilation[0] + return conv(data, kernel, strides, padding, dilation, 1, layout, out_dtype) - if layout == "NCW": - return conv1d_ncw(data, kernel, strides, padding, dilation, out_dtype) - if layout == "NWC": - return conv1d_nwc(data, kernel, strides, padding, dilation, out_dtype) - raise ValueError("This layout is not yet supported: {}".format(layout)) + +def conv1d_nwc(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None): + """1D convolution in NWC layout. See :py:func:`conv` for details on parameters""" + return conv(data, kernel, strides, padding, dilation, 1, "NWC", out_dtype=out_dtype) def conv1d_ncw(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None): - """1D convolution forward operator for NCW layout. + """1D convolution in NCW layout. See :py:func:`conv` for details on parameters""" + return conv(data, kernel, strides, padding, dilation, 1, "NCW", out_dtype=out_dtype) + + +def group_conv1d_nwc( + data, kernel, strides=1, padding="VALID", dilation=1, groups=1, out_dtype=None +): + """1D convolution forward operator for NWC layout. Parameters ---------- data : tvm.te.Tensor - 3-D with shape [batch, in_channel, in_width] + 3-D with shape [batch, in_width, in_channel] kernel : tvm.te.Tensor - 3-D with shape [num_filter, in_channel, filter_size] + 3-D with shape [filter_size, in_channel, num_filter] strides : int or tuple The spatial stride along width @@ -85,55 +83,27 @@ def conv1d_ncw(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=N dilation : int or tuple Dilation rate if convolution should be dilated. + groups : int + Number of groups + out_dtype : str The output data type. If None then output is same type as input. """ - if out_dtype is None: - out_dtype = data.dtype - if isinstance(strides, (tuple, list)): - strides = strides[0] - if isinstance(dilation, (tuple, list)): - dilation = dilation[0] - - batch, in_channels, data_width = data.shape - out_channels, _, kernel_size = kernel.shape - - # Compute the output shape - dilated_kernel_size = (kernel_size - 1) * dilation + 1 - pad_left, pad_right = get_pad_tuple1d(padding, (dilated_kernel_size,)) - out_channels = simplify(out_channels) - out_width = simplify((data_width - dilated_kernel_size + pad_left + pad_right) // strides + 1) - - # Apply padding - pad_before = [0, 0, pad_left] - pad_after = [0, 0, pad_right] - temp = pad(data, pad_before, pad_after, name="pad_temp") - - # Compute graph - rc = te.reduce_axis((0, in_channels), name="rc") - rw = te.reduce_axis((0, kernel_size), name="rw") - - return te.compute( - (batch, out_channels, out_width), - lambda b, c, w: te.sum( - temp[b, rc, w * strides + rw * dilation].astype(out_dtype) - * kernel[c, rc, rw].astype(out_dtype), - axis=[rc, rw], - ), - tag="conv1d_ncw", - ) + return conv(data, kernel, strides, padding, dilation, groups, "NWC", out_dtype=out_dtype) -def conv1d_nwc(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=None): - """1D convolution forward operator for NWC layout. +def group_conv1d_ncw( + data, kernel, strides=1, padding="VALID", dilation=1, groups=1, out_dtype=None +): + """1D convolution forward operator for NCW layout. Parameters ---------- data : tvm.te.Tensor - 3-D with shape [batch, in_width, in_channel] + 3-D with shape [batch, in_channel, in_width] kernel : tvm.te.Tensor - 3-D with shape [filter_size, in_channel, num_filter] + 3-D with shape [num_filter, in_channel, filter_size] strides : int or tuple The spatial stride along width @@ -145,40 +115,10 @@ def conv1d_nwc(data, kernel, strides=1, padding="VALID", dilation=1, out_dtype=N dilation : int or tuple Dilation rate if convolution should be dilated. + groups : int + Number of groups + out_dtype : str The output data type. If None then output is same type as input. """ - if out_dtype is None: - out_dtype = data.dtype - if isinstance(strides, (tuple, list)): - strides = strides[0] - if isinstance(dilation, (tuple, list)): - dilation = dilation[0] - - batch, data_width, in_channels = data.shape - kernel_size, _, out_channels = kernel.shape - - # Compute the output shape - dilated_kernel_size = (kernel_size - 1) * dilation + 1 - pad_left, pad_right = get_pad_tuple1d(padding, (dilated_kernel_size,)) - out_channels = simplify(out_channels) - out_width = simplify((data_width - dilated_kernel_size + pad_left + pad_right) // strides + 1) - - # Apply padding - pad_before = [0, pad_left, 0] - pad_after = [0, pad_right, 0] - temp = pad(data, pad_before, pad_after, name="pad_temp") - - # Compute graph - rc = te.reduce_axis((0, in_channels), name="rc") - rw = te.reduce_axis((0, kernel_size), name="rw") - - return te.compute( - (batch, out_width, out_channels), - lambda b, w, c: te.sum( - temp[b, w * strides + rw * dilation, rc].astype(out_dtype) - * kernel[rw, rc, c].astype(out_dtype), - axis=[rc, rw], - ), - tag="conv1d_nwc", - ) + return conv(data, kernel, strides, padding, dilation, groups, "NCW", out_dtype=out_dtype) diff --git a/python/tvm/topi/nn/conv2d.py b/python/tvm/topi/nn/conv2d.py index 7cb4b09b8805..97317fa0f9cd 100644 --- a/python/tvm/topi/nn/conv2d.py +++ b/python/tvm/topi/nn/conv2d.py @@ -20,13 +20,16 @@ from __future__ import absolute_import as _abs from collections import namedtuple +import re +from typing import Union, Sequence +import numpy as np import tvm from tvm import auto_scheduler, te from ..utils import get_const_int, get_const_tuple, simplify, tag from .pad import pad -from .utils import get_pad_tuple +from .utils import get_pad_tuple, get_pad_tuple_generic from .winograd_util import winograd_transform_matrices # workload description of conv2d @@ -86,13 +89,7 @@ def conv2d(input, filter, strides, padding, dilation, layout="NCHW", out_dtype=N """ # search platform specific declaration first # default declaration - if layout == "NCHW": - return conv2d_nchw(input, filter, strides, padding, dilation, out_dtype) - if layout == "HWCN": - return conv2d_hwcn(input, filter, strides, padding, dilation, out_dtype) - if layout == "NHWC": - return conv2d_nhwc(input, filter, strides, padding, dilation, out_dtype) - raise ValueError("not support this layout {} yet".format(layout)) + return conv(input, filter, strides, padding, dilation, 1, layout, out_dtype) @tvm.target.generic_func @@ -242,49 +239,7 @@ def conv2d_nchw(Input, Filter, stride, padding, dilation, out_dtype=None): Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ - if out_dtype is None: - out_dtype = Input.dtype - assert isinstance(stride, int) or len(stride) == 2 - assert isinstance(dilation, int) or len(dilation) == 2 - if isinstance(stride, int): - stride_h = stride_w = stride - else: - stride_h, stride_w = stride - - if isinstance(dilation, int): - dilation_h = dilation_w = dilation - else: - dilation_h, dilation_w = dilation - - batch, in_channel, in_height, in_width = Input.shape - num_filter, channel, kernel_h, kernel_w = Filter.shape - # compute the output shape - dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 - dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 - pad_top, pad_left, pad_down, pad_right = get_pad_tuple( - padding, (dilated_kernel_h, dilated_kernel_w) - ) - out_channel = num_filter - out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1) - out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1) - # compute graph - pad_before = [0, 0, pad_top, pad_left] - pad_after = [0, 0, pad_down, pad_right] - temp = pad(Input, pad_before, pad_after, name="pad_temp") - rc = te.reduce_axis((0, in_channel), name="rc") - ry = te.reduce_axis((0, kernel_h), name="ry") - rx = te.reduce_axis((0, kernel_w), name="rx") - return te.compute( - (batch, out_channel, out_height, out_width), - lambda nn, ff, yy, xx: te.sum( - temp[nn, rc, yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w].astype( - out_dtype - ) - * Filter[ff, rc, ry, rx].astype(out_dtype), - axis=[rc, ry, rx], - ), - tag="conv2d_nchw", - ) + return conv(Input, Filter, stride, padding, dilation, 1, "NCHW", out_dtype=out_dtype) def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): @@ -314,51 +269,7 @@ def conv2d_hwcn(Input, Filter, stride, padding, dilation, out_dtype=None): output : tvm.te.Tensor 4-D with shape [out_height, out_width, out_channel, batch] """ - if out_dtype is None: - out_dtype = Input.dtype - assert isinstance(stride, int) or len(stride) == 2 - assert isinstance(dilation, int) or len(dilation) == 2 - - if isinstance(stride, int): - stride_h = stride_w = stride - else: - stride_h, stride_w = stride - - if isinstance(dilation, int): - dilation_h = dilation_w = dilation - else: - dilation_h, dilation_w = dilation - - in_height, in_width, in_channel, batch = Input.shape - kernel_h, kernel_w, channel, num_filter = Filter.shape - # compute the output shape - dilated_kernel_h = (kernel_h - 1) * dilation_h + 1 - dilated_kernel_w = (kernel_w - 1) * dilation_w + 1 - pad_top, pad_left, pad_down, pad_right = get_pad_tuple( - padding, (dilated_kernel_h, dilated_kernel_w) - ) - out_channel = num_filter - out_height = simplify((in_height - dilated_kernel_h + pad_top + pad_down) // stride_h + 1) - out_width = simplify((in_width - dilated_kernel_w + pad_left + pad_right) // stride_w + 1) - pad_before = [pad_top, pad_left, 0, 0] - pad_after = [pad_down, pad_right, 0, 0] - PaddedInput = pad(Input, pad_before, pad_after, name="PaddedInput") - rc = te.reduce_axis((0, in_channel), name="rc") - ry = te.reduce_axis((0, kernel_h), name="ry") - rx = te.reduce_axis((0, kernel_w), name="rx") - Output = te.compute( - (out_height, out_width, out_channel, batch), - lambda yy, xx, ff, nn: te.sum( - PaddedInput[ - yy * stride_h + ry * dilation_h, xx * stride_w + rx * dilation_w, rc, nn - ].astype(out_dtype) - * Filter[ry, rx, rc, ff].astype(out_dtype), - axis=[ry, rx, rc], - ), - name="Conv2dOutput", - tag="conv2d_hwcn", - ) - return Output + return conv(Input, Filter, stride, padding, dilation, 1, "HWCN", out_dtype=out_dtype) def conv2d_nhwc( @@ -835,55 +746,148 @@ def group_conv2d_nchw(Input, Filter, stride, padding, dilation, groups, out_dtyp Output : tvm.te.Tensor 4-D with shape [batch, out_channel, out_height, out_width] """ + return conv(Input, Filter, stride, padding, dilation, groups, "NCHW", out_dtype=out_dtype) + + +def conv( + inp: te.Tensor, + filt: te.Tensor, + stride: Union[int, Sequence[int]], + padding: Union[int, Sequence[int]], + dilation: Union[int, Sequence[int]], + groups: int, + order: str, + out_dtype: Union[str, None] = None, +): + """Convolution operator in NCHW or NHWC layout. + + Supports 1D, 2D, 3D, ... and grouping. + + Parameters + ---------- + inp : tvm.te.Tensor + N-D with shape [batch, in_channel, in_height, in_width, ...] ordered by `order` + + filt : tvm.te.Tensor + N-D with shape [num_filter, in_channel // groups, filter_height, filter_width, ...] + for NCHW or [filter_height, filter_width, ..., in_channel // groups, num_filter] for NHWC + + stride : int or a list/tuple of dim ints + (where dim=2 for NCHW, dim=1 for NCH, etc.) + Stride size, or [stride_height, stride_width, ...] + + padding : int or a list/tuple of dim or 2*dim ints + (where dim=2 for NCHW, dim=1 for NCH, etc.) + padding size, or + [pad_height, pad_width, ...] for dim ints, or + [pad_top, pad_left, pad_bottom, pad_right] for 2*dim ints + + dilation : int or a list/tuple of two ints + dilation size, or [dilation_height, dilation_width] + + groups : int + number of groups + + order : str + Ordering of dimensions. N indicates batch dimension, C indicates + channels, any other character indicates HW (or H or HWD for 1D and 3D). + + out_dtype : str + Elements are converted to this type before elementwise multiplication + and summation. + + Returns + ------- + Output : tvm.te.Tensor + N-D with shape [batch, out_channel, out_height, out_width, ...] ordered by `order`. + """ + dim = len(inp.shape) - 2 if out_dtype is None: - out_dtype = Input.dtype - assert isinstance(stride, int) or len(stride) == 2 - assert isinstance(dilation, int) or len(dilation) == 2 + out_dtype = inp.dtype + assert isinstance(stride, int) or len(stride) == dim + assert isinstance(dilation, int) or len(dilation) == dim if isinstance(stride, int): - stride_h = stride_w = stride + strides = [stride for _ in range(dim)] else: - stride_h, stride_w = stride + strides = stride if isinstance(dilation, int): - dilation_h = dilation_w = dilation + dilations = [dilation for _ in range(dim)] else: - dilation_h, dilation_w = dilation + dilations = list(dilation) + + # transform from order to NCHW + permutation_to = [order.find("N"), order.find("C")] + [ + x.span()[0] for x in re.finditer("[^NC]", order) + ] + # transform from NCHW to order + permutation_from = np.argsort(permutation_to) + # transform from CHW to order + permutation_from_reductions = permutation_from[1:].copy() + permutation_from_reductions[permutation_from_reductions > permutation_from[0]] -= 1 + + # kernel permutation, if C appears before HW then num_filter is first, otherwise it is last + # tkonolige: I don't really understand kernel ordering for NHWC, it seems + # like num_filters should match the N dimension + if order.find("C") < re.search("[^NC]", order).span()[0]: + permutation_to_kernel = [0, 1] + list(range(2, dim + 2)) + else: + permutation_to_kernel = [dim + 1, dim] + list(range(dim)) + permutation_from_kernel = np.argsort(permutation_to_kernel) - batch, in_channel, in_height, in_width = get_const_tuple(Input.shape) - num_filter, _, kernel_h, kernel_w = get_const_tuple(Filter.shape) + batch, in_channel, *dimensions = np.array(get_const_tuple(inp.shape))[permutation_to].tolist() + num_filter, _, *kernel_dimensions = np.array(get_const_tuple(filt.shape))[ + permutation_to_kernel + ].tolist() assert in_channel % groups == 0, "input channels must divide group size" assert num_filter % groups == 0, "output channels must divide group size" - pad_top, pad_left, pad_down, pad_right = get_pad_tuple(padding, (kernel_h, kernel_w)) + dilated_kernel_dimensions = [(k - 1) * dil + 1 for k, dil in zip(kernel_dimensions, dilations)] + pad_begin, pad_end = get_pad_tuple_generic(padding, dilated_kernel_dimensions) # compute the output shape out_channel = num_filter - out_height = simplify( - (in_height - (kernel_h - 1) * dilation_h - 1 + pad_top + pad_down) // stride_h + 1 - ) - out_width = simplify( - (in_width - (kernel_w - 1) * dilation_w - 1 + pad_left + pad_right) // stride_w + 1 - ) + out_dimensions = [ + simplify(d - (k - 1) * dil - 1 + pb + pe) // stride + 1 + for d, k, dil, pb, pe, stride in zip( + dimensions, kernel_dimensions, dilations, pad_begin, pad_end, strides + ) + ] # compute graph - pad_before = [0, 0, pad_top, pad_left] - pad_after = [0, 0, pad_down, pad_right] - temp = pad(Input, pad_before, pad_after, name="pad_temp") + pad_before = list(np.array([0, 0] + pad_begin)[permutation_from]) + pad_after = list(np.array([0, 0] + pad_end)[permutation_from]) + temp = pad(inp, pad_before, pad_after, name="pad_temp") rc = te.reduce_axis((0, in_channel // groups), name="rc") - ry = te.reduce_axis((0, kernel_h), name="ry") - rx = te.reduce_axis((0, kernel_w), name="rx") + rs = [te.reduce_axis((0, k), name=f"r{i}") for i, k in enumerate(kernel_dimensions)] + + def compute(*args): + nn, ff, *dim_indices = list(np.array(args)[permutation_to]) + return te.sum( + temp.__getitem__( + tuple( + np.array( + [nn, ff // (num_filter // groups) * (in_channel // groups) + rc] + + [ + di * stride + r * dil + for di, stride, r, dil in zip(dim_indices, strides, rs, dilations) + ] + )[permutation_from] + ) + ).astype(out_dtype) + * filt.__getitem__(tuple(np.array([ff, rc] + rs)[permutation_from_kernel])).astype( + out_dtype + ), + # Schedules depend on reduction axes being in the same order as the + # layout, so we reorder here. + axis=np.array([rc, *rs])[permutation_from_reductions].tolist(), + ) + return te.compute( - (batch, out_channel, out_height, out_width), - lambda nn, ff, yy, xx: te.sum( - temp[ - nn, - ff // (num_filter // groups) * (in_channel // groups) + rc, - yy * stride_h + ry * dilation_h, - xx * stride_w + rx * dilation_w, - ].astype(out_dtype) - * Filter[ff, rc, ry, rx].astype(out_dtype), - axis=[rc, ry, rx], - ), - tag="group_conv2d_nchw", + list(np.array([batch, out_channel] + out_dimensions)[permutation_from]), + compute, + # tag is expected to be lowercase + tag=f"{'group_' if groups > 1 else ''}conv{dim}d_{order.lower()}", + name=f"{'group_' if groups > 1 else ''}conv{dim}d_{order.lower()}", ) @@ -893,7 +897,7 @@ def group_conv2d_nhwc(Input, Filter, stride, padding, dilation, groups, out_dtyp Parameters ---------- Input : tvm.te.Tensor - 4-D with shape [batch, in_height, in_width, in_channel] + 4-D with shape [batch, in_height, in_width, in_channel, ...] Filter : tvm.te.Tensor 4-D with shape [filter_height, filter_width, in_channel // groups, num_filter] @@ -920,56 +924,7 @@ def group_conv2d_nhwc(Input, Filter, stride, padding, dilation, groups, out_dtyp Output : tvm.te.Tensor 4-D with shape [batch, out_height, out_width, out_channel] """ - if out_dtype is None: - out_dtype = Input.dtype - assert isinstance(stride, int) or len(stride) == 2 - assert isinstance(dilation, int) or len(dilation) == 2 - if isinstance(stride, int): - stride_h = stride_w = stride - else: - stride_h, stride_w = stride - - if isinstance(dilation, int): - dilation_h = dilation_w = dilation - else: - dilation_h, dilation_w = dilation - - batch, in_height, in_width, in_channel = get_const_tuple(Input.shape) - kernel_h, kernel_w, _, num_filter = get_const_tuple(Filter.shape) - - assert in_channel % groups == 0, "input channels must divide group size" - assert num_filter % groups == 0, "output channels must divide group size" - - pad_top, pad_left, pad_down, pad_right = get_pad_tuple(padding, (kernel_h, kernel_w)) - # compute the output shape - out_channel = num_filter - out_height = simplify( - (in_height - (kernel_h - 1) * dilation_h - 1 + pad_top + pad_down) // stride_h + 1 - ) - out_width = simplify( - (in_width - (kernel_w - 1) * dilation_w - 1 + pad_left + pad_right) // stride_w + 1 - ) - # compute graph - pad_before = [0, pad_top, pad_left, 0] - pad_after = [0, pad_down, pad_right, 0] - temp = pad(Input, pad_before, pad_after, name="pad_temp") - ry = te.reduce_axis((0, kernel_h), name="ry") - rx = te.reduce_axis((0, kernel_w), name="rx") - rc = te.reduce_axis((0, in_channel // groups), name="rc") - return te.compute( - (batch, out_height, out_width, out_channel), - lambda nn, yy, xx, ff: te.sum( - temp[ - nn, - yy * stride_h + ry * dilation_h, - xx * stride_w + rx * dilation_w, - ff // (num_filter // groups) * (in_channel // groups) + rc, - ].astype(out_dtype) - * Filter[ry, rx, rc, ff].astype(out_dtype), - axis=[ry, rx, rc], - ), - tag="group_conv2d_nhwc", - ) + return conv(Input, Filter, stride, padding, dilation, groups, "NHWC", out_dtype=out_dtype) def unpack_NCHWc_to_nchw(packed_out, out_dtype): diff --git a/python/tvm/topi/nn/utils.py b/python/tvm/topi/nn/utils.py index 369b62c638ca..01e1c1ab5444 100644 --- a/python/tvm/topi/nn/utils.py +++ b/python/tvm/topi/nn/utils.py @@ -164,6 +164,53 @@ def get_pad_tuple(padding, kernel): return pad_top, pad_left, pad_h - pad_top, pad_w - pad_left +def get_pad_tuple_generic(padding, kernel): + """Common code to get the pad option + + Parameters + ---------- + padding : int or str + Padding size, or ['VALID', 'SAME'] + + kernel : tuple of int + Conv kernel size + + Returns + ------- + pad_top : int + Padding size on top + + pad_down : int + Padding size on down. + + pad_left : int + Padding size on left + + pad_right : int + Padding size on right. + """ + # compute the padding size + if isinstance(padding, (tuple, list)): + if len(padding) == len(kernel): + pad_dimensions = [p * 2 for p in padding] + elif len(padding) == len(kernel) * 2: + return [padding[i] for i in range(len(kernel))], [ + padding[len(kernel) + i] for i in range(len(kernel)) + ] + else: + raise ValueError("Size of padding can only be len(kernel) or len(kernel) * 2") + elif isinstance(padding, int): + pad_dimensions = [padding * 2 for _ in range(len(kernel))] + elif padding == "VALID": + pad_dimensions = [0 for _ in range(len(kernel))] + elif padding == "SAME": + pad_dimensions = [k - 1 for k in kernel] + else: + raise ValueError("Unknown padding option %s" % padding) + pad_begin = [(p + 1) // 2 for p in pad_dimensions] + return [pad_begin, [pd - pb for pb, pd in zip(pad_begin, pad_dimensions)]] + + def get_pad_tuple3d(padding, kernel): """Common code to get the pad option diff --git a/python/tvm/topi/testing/__init__.py b/python/tvm/topi/testing/__init__.py index 8f78805fff3b..345886c2be91 100644 --- a/python/tvm/topi/testing/__init__.py +++ b/python/tvm/topi/testing/__init__.py @@ -21,7 +21,7 @@ """ from __future__ import absolute_import as _abs -from .conv1d_ncw_python import conv1d_ncw_python +from .conv1d_ncw_python import conv1d_ncw_python, group_conv1d_ncw_python from .conv2d_hwcn_python import conv2d_hwcn_python from .conv2d_nchw_python import conv2d_nchw_python from .conv2d_nhwc_python import conv2d_nhwc_python diff --git a/python/tvm/topi/testing/conv1d_ncw_python.py b/python/tvm/topi/testing/conv1d_ncw_python.py index 190e1c664610..9104cacd173e 100644 --- a/python/tvm/topi/testing/conv1d_ncw_python.py +++ b/python/tvm/topi/testing/conv1d_ncw_python.py @@ -43,6 +43,17 @@ def dilate_np(x, dilation): return x +def group_conv1d_ncw_python(a_np, w_np, stride, padding, dilation, groups): + "Grouped version of `conv1d_ncw_python`, see that for documentation" + a_slices = np.array_split(a_np, groups, axis=1) + w_slices = np.array_split(w_np, groups, axis=0) + b_slices = [ + conv1d_ncw_python(a_slice, w_slice, stride, padding, dilation) + for a_slice, w_slice in zip(a_slices, w_slices) + ] + return np.concatenate(b_slices, axis=1) + + def conv1d_ncw_python(a_np, w_np, stride, padding, dilation): """1D convolution operator in NCW layout @@ -64,6 +75,9 @@ def conv1d_ncw_python(a_np, w_np, stride, padding, dilation): dilation : int Dilation rate of the kernel + groups : int + Number of groups in the convolution + Returns ------- b_np : numpy.ndarray diff --git a/python/tvm/topi/x86/conv1d.py b/python/tvm/topi/x86/conv1d.py index 121c1c21c784..76fc40cab3a4 100644 --- a/python/tvm/topi/x86/conv1d.py +++ b/python/tvm/topi/x86/conv1d.py @@ -126,3 +126,11 @@ def traverse(op): traverse(output_op) return s + + +def schedule_group_conv1d_ncw(outs): + return schedule_conv1d_ncw(outs) + + +def schedule_group_conv1d_nwc(outs): + return schedule_conv1d_nwc(outs) diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h index e5e64fa5be65..954daeaa86cf 100644 --- a/src/relay/op/nn/convolution.h +++ b/src/relay/op/nn/convolution.h @@ -76,7 +76,7 @@ bool Conv1DRel(const Array& types, int num_inputs, const Attrs& attrs, if (param->kernel_size.defined() && param->channels.defined()) { Array wshape; - wshape = {{param->channels, dshape_ncw[1], param->kernel_size[0]}}; + wshape = {{param->channels, indexdiv(dshape_ncw[1], param->groups), param->kernel_size[0]}}; wshape = trans_kernel_layout.BackwardShape(wshape); channels = param->channels; diff --git a/tests/python/topi/python/test_topi_conv1d.py b/tests/python/topi/python/test_topi_conv1d.py index f5284ca36fee..db8d7238feba 100644 --- a/tests/python/topi/python/test_topi_conv1d.py +++ b/tests/python/topi/python/test_topi_conv1d.py @@ -38,6 +38,19 @@ "gpu": (topi.cuda.conv1d_nwc, topi.cuda.schedule_conv1d_nwc), } +_group_conv1d_implementations = { + "NCW": { + "generic": (topi.nn.group_conv1d_ncw, topi.generic.schedule_group_conv1d_ncw), + "cpu": (topi.nn.group_conv1d_ncw, topi.x86.schedule_group_conv1d_ncw), + "gpu": (topi.cuda.group_conv1d_ncw, topi.cuda.schedule_group_conv1d_ncw), + }, + "NWC": { + "generic": (topi.nn.group_conv1d_nwc, topi.generic.schedule_group_conv1d_nwc), + "cpu": (topi.nn.group_conv1d_nwc, topi.x86.schedule_group_conv1d_nwc), + "gpu": (topi.cuda.group_conv1d_nwc, topi.cuda.schedule_group_conv1d_nwc), + }, +} + def verify_conv1d( batch, @@ -122,5 +135,65 @@ def test_conv1d(): verify_conv1d(1, 5, 27, 18, 3, 1, 1, "VALID", layout) +layout = tvm.testing.parameter("NCW", "NWC") +padding = tvm.testing.parameter("SAME", "VALID") +dtype = tvm.testing.parameter("float32") + +# batch, in_channels, in_width, filters, kernel_size, stride, dilation, groups +shape = tvm.testing.parameter( + [1, 4, 8, 4, 3, 1, 1, 4], + [1, 4, 8, 4, 3, 1, 1, 4], + [1, 16, 32, 16, 3, 1, 1, 4], + [1, 16, 32, 16, 3, 2, 1, 4], + [1, 16, 32, 16, 3, 1, 2, 4], + [8, 16, 32, 16, 3, 1, 1, 4], + [1, 16, 32, 16, 3, 1, 1, 4], + [1, 16, 32, 16, 2, 1, 1, 4], + [1, 16, 32, 16, 1, 1, 1, 4], + [1, 21, 12, 21, 3, 1, 1, 3], + [1, 20, 27, 20, 3, 1, 1, 5], +) + + +def test_group_conv1d(shape, layout, padding, target, dev, dtype): + batch, in_channels, in_width, filters, kernel_size, stride, dilation, groups = shape + if layout == "NCW": + in_shape = [batch, in_channels, in_width] + kernel_shape = [filters, in_channels // groups, kernel_size] + else: + in_shape = [batch, in_width, in_channels] + kernel_shape = [kernel_size, in_channels // groups, filters] + + # reference data + a_np = np.random.uniform(size=in_shape).astype(dtype) + w_np = np.random.uniform(size=kernel_shape).astype(dtype) + if layout == "NWC": + np_in = np.transpose(a_np, [0, 2, 1]) + np_w = np.transpose(w_np, [2, 1, 0]) + else: + np_in = a_np + np_w = w_np + b_np = tvm.topi.testing.group_conv1d_ncw_python(np_in, np_w, stride, padding, dilation, groups) + if layout == "NWC": + b_np = np.transpose(b_np, [0, 2, 1]) + + A = te.placeholder(in_shape, name="A", dtype=dtype) + W = te.placeholder(kernel_shape, name="W", dtype=dtype) + + fcompute, fschedule = tvm.topi.testing.dispatch(target, _group_conv1d_implementations[layout]) + with tvm.target.Target(target): + B = fcompute(A, W, stride, padding, dilation, groups, "float32") + s = fschedule([B]) + + a = tvm.nd.array(a_np, dev) + w = tvm.nd.array(w_np, dev) + b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=dtype), dev) + print(tvm.lower(s, [A, W, B], target)) + + func = tvm.build(s, [A, W, B], target) + func(a, w, b) + tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5) + + if __name__ == "__main__": test_conv1d()