-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathgpu_fft_py.pyx
More file actions
105 lines (91 loc) · 2.95 KB
/
gpu_fft_py.pyx
File metadata and controls
105 lines (91 loc) · 2.95 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
from cpython cimport bool
cdef extern from "mailbox.h":
int mbox_open_by_name(const char* s)
int mbox_error(const char* s)
void mbox_close(int mb)
const char* DEVICE_FILE_NAME
cdef extern from "gpu_fft.h":
cdef struct GPU_FFT_COMPLEX:
float re, im
cdef struct GPU_FFT:
GPU_FFT_COMPLEX* in_
GPU_FFT_COMPLEX* out
int mb, step
unsigned timeout, noflush, handle, size, vc_msg
int gpu_fft_prepare(
int mb, # mailbox file_desc
int log2_N, # log2(FFT_length) = 8...17
int direction, # GPU_FFT_FWD: fft(); GPU_FFT_REV: ifft()
int jobs, # number of transforms in batch
GPU_FFT **fft)
unsigned gpu_fft_execute(GPU_FFT *info)
void gpu_fft_release(GPU_FFT *info)
cdef class GpuFft:
cdef GPU_FFT* fft
cdef int mb, jobs, log_size, size
cdef bool prepared
def __cinit__(self, int log_size, is_forward=True, int jobs=10, buffer=None,
device_name=DEVICE_FILE_NAME):
cdef int forward, result
if is_forward:
forward = 1
else:
forward = 0
self.jobs = jobs
assert 8 <= log_size <= 17, 'log_size must be between 8 and 17'
self.log_size = log_size
self.size = 2 ** log_size
self.prepared = False
print('about to open device')
self.mb = mbox_open_by_name(device_name)
if self.mb < 0:
mbox_error(device_name)
print('Couldn\'t open device')
raise Exception("Couldn't open device.")
result = gpu_fft_prepare(self.mb, log_size, forward, jobs, &self.fft)
if result < 0:
if result == -1:
err = 'Unable to enable V3D. Please check your firmware is up to date.'
elif result == -2:
err = 'log_size=%d not supported. Try between 8 and 17.' % log_size
elif result == -3:
err = 'Out of memory. Try a smaller batch or increase GPU memory.'
else:
err = 'Uknown error %d' % result
raise Exception(err)
self.prepared = True
if buffer:
self.buffer = buffer
else:
self.buffer = np.array(dtype=complex)
self.buffer.resize([self.jobs, self.size])
def __dealloc__(self):
if self.prepared:
gpu_fft_release(self.fft)
if self.mb >= 0:
mbox_close(self.mb)
def execute(self, job_data):
cdef float re, im
cdef int i, j, data_count = 0, size_count = 0
cdef GPU_FFT_COMPLEX* base
for j, data in enumerate(job_data):
assert j < self.jobs
data_count = j
base = self.fft.in_ + j * self.fft.step;
for i, d in enumerate(data):
assert i < self.size
size_count = i
try:
re, im = d
except:
re, im = d.imag, d.real
base[i].re = re
base[i].im = im
assert size_count == (self.size - 1)
assert data_count == (self.jobs - 1)
gpu_fft_execute(self.fft)
for j in xrange(self.jobs):
base = self.fft.out + j * self.fft.step;
for i in xrange(self.size):
self.buffer[j][i] = base[i].re + base[i].im * 1j