diff --git a/.github/workflows/test-python-package.yml b/.github/workflows/test-python-package.yml index e2dac9687..dec26fdba 100644 --- a/.github/workflows/test-python-package.yml +++ b/.github/workflows/test-python-package.yml @@ -28,7 +28,7 @@ jobs: - name: Setup Nox uses: fjwillemsen/setup-nox2@v3.0.0 - name: Setup Poetry - uses: Gr1N/setup-poetry@v8 + uses: Gr1N/setup-poetry@v9 - run: poetry self add poetry-plugin-export - name: Run tests with Nox run: | diff --git a/doc/source/observers.rst b/doc/source/observers.rst index 174e6a01a..df4013734 100644 --- a/doc/source/observers.rst +++ b/doc/source/observers.rst @@ -112,3 +112,11 @@ More information about PMT can be found here: https://git.astron.nl/RD/pmt/ +NCUObserver +~~~~~~~~~~~ + +The NCUObserver can be used to automatically extract performance counters during tuning using Nvidia's NsightCompute profiler. +The NCUObserver relies on an intermediate library, which can be found here: https://github.com/nlesc-recruit/nvmetrics + +.. autoclass:: kernel_tuner.observers.ncu.NCUObserver + diff --git a/examples/cuda/vector_add_observers_ncu.py b/examples/cuda/vector_add_observers_ncu.py new file mode 100644 index 000000000..589420a3f --- /dev/null +++ b/examples/cuda/vector_add_observers_ncu.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +"""This is the minimal example from the README""" +import json + +import numpy +from kernel_tuner import tune_kernel +from kernel_tuner.observers.ncu import NCUObserver + +def tune(): + + kernel_string = """ + __global__ void vector_add(float *c, float *a, float *b, int n) { + int i = blockIdx.x * block_size_x + threadIdx.x; + if (i