prepare cuda 12.8

FindDefinition · FindDefinition · commit 1c278444beae · 2025-05-22T10:39:57.000Z
diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.9', '3.10', '3.11', '3.12', '3.13'] 
-        cuda-version: ['11.4', '11.8', '12.1', '12.4', '12.6', '']
+        cuda-version: ['11.4', '11.8', '12.1', '12.4', '12.6', '12.8', '']
     steps:
       - uses: actions/checkout@master
       - name: Install CUDA
@@ -91,7 +91,7 @@ jobs:
     strategy:
       matrix:
         python-version: ['3.12'] # this version is only used for upload.
-        cuda-version: ['114', '118', '121', '124', '126', '']
+        cuda-version: ['114', '118', '121', '124', '126', '128', '']
 
     steps:
       - uses: actions/checkout@master
@@ -112,7 +112,7 @@ jobs:
           PYTHON_VERSION: ${{ matrix.python-version }}
           DOCKER_IMAGE: scrin/manylinux2014-cuda:cu${{ matrix.cuda-version }}-devel-1.0.0
           PLAT: ${{ matrix.cuda-version > '123' && 'manylinux_2_28_x86_64' || 'manylinux2014_x86_64' }}
-        if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || env.CUDA_VERSION == '126'
+        if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || env.CUDA_VERSION == '128'
         run: |
           # clone nvidia cuda cccl to third_party/
           if [ $CUDA_VERSION -lt "120" ]; then
diff --git a/cumm/common.py b/cumm/common.py
@@ -132,22 +132,22 @@ def _get_cuda_arch_flags(is_gemm: bool = False) -> Tuple[List[str], List[Tuple[i
                     _arch_list = "3.7;5.0;5.2;6.0;6.1;7.0;7.5+PTX"
                 elif (major, minor) < (11, 8):
                     _arch_list = "5.2;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
-                elif (major, minor) < (12, 0):
+                elif (major, minor) < (12, 8):
                     _arch_list = "6.0;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
                 else:
                     # remove sm < 70 prebuilt gemm kernels in CUDA 12.
                     # these gemm kernels will be compiled via nvrtc.
-                    _arch_list = "6.0;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
+                    _arch_list = "7.5;8.0;8.6;8.9;9.0;10.0;12.0+PTX"
             else:
                 # flag for non-gemm kernels, they are usually simple and small.
                 if (major, minor) < (11, 0):
                     _arch_list = "3.5;3.7;5.0;5.2;6.0;6.1;7.0;7.5+PTX"
                 elif (major, minor) < (11, 8):
                     _arch_list = "3.5;3.7;5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
-                elif (major, minor) < (12, 0):
+                elif (major, minor) < (12, 8):
                     _arch_list = "5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
                 else:
-                    _arch_list = "5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0+PTX"
+                    _arch_list = "7.5;8.0;8.6;8.9;9.0;10.0;12.0+PTX"
     _all_arch = "5.2;6.0;6.1;7.0;7.5;8.0;8.6+PTX"
     for named_arch, archval in named_arches.items():
         _all_arch = _all_arch.replace(named_arch, archval)