This file is indexed.

/usr/share/psychtoolbox-3/PsychDemos/GPGPUDemos/GPUFFTDemo1.m is in psychtoolbox-3-common 3.0.12.20160126.dfsg1-1ubuntu1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
function GPUFFTDemo1(fwidth)
% GPUFFTDemo1([fwidth=11]) - Demonstrate use of GPGPU computing for 2D-FFT.
%
% This demo makes use of the FOSS GPUmat toolbox to perform a GPU
% accelerated 2D FFT + filtering in frequency space + 2D inverse FFT on our
% favourite bunnies. GPUmat allows to use NVidia's CUDA gpu computing
% framework on supported NVidia gpu's (GeForce-8000 series and later, aka
% Direct3D-10 or OpenGL-3 capable).
%
% It shows how a Psychtoolbox floating point texture (with our bunnies
% inside) can be efficiently passed to GPUmat as a matrix of GPUsingle data
% type, which is stored and processed on the GPU. Then it uses GPUmat's fft
% routines for forward/inverse fft's and matrix manipulation. Then it
% returns the final image to Psychtoolbox as a new floating point texture
% for display. The same steps are carried out with Matlab/Octave's regular
% fft routines on the cpu for reference.
%
% Requires the freely downloadable NVidia CUDA-5.0 SDK/Runtime and the free
% and open-source GPUmat toolbox as well as a compute capable NVidia
% graphics card.
%
% Parameters:
%
% 'fwidth' = Width of low-pass filter kernel in frequency space units.
%

% History:
% 5.02.2013  mk  Written.

% Default filter width is 11 units:
if nargin < 1 || isempty(fwidth)
    fwidth = 11;
end

% Running under PTB-3 hopefully?
AssertOpenGL;

% Skip timing tests and calibrations for this demo:
oldskip = Screen('Preference','SkipSyncTests', 2);

% Open onscreen window with black background on (external) screen,
% enable GPGPU computing support:
screenid = max(Screen('Screens'));

PsychImaging('PrepareConfiguration');
% We explicitely request the GPUmat based api:
PsychImaging('AddTask', 'General', 'UseGPGPUCompute', 'GPUmat');
w = PsychImaging('OpenWindow', screenid, 0);

try
    Screen('TextSize', w, 28);
    DrawFormattedText(w, 'Please be patient throughout the demo.\nSome benchmark steps are time intense...', 'center', 'center', 255);
    Screen('Flip', w);
    
    % Read our beloved bunny image from filesystem:
    bunnyimg = imread([PsychtoolboxRoot 'PsychDemos/konijntjes1024x768.jpg']);
    
    % Add a fourth neutral alpha channel, so we can create a 4-channel
    % RGBA texture. Why? Some GPU compute api's, e.g., NVidia's CUDA,
    % don't like RGB textures, so we need to do this to avoid errors:
    bunnyimg(:,:,4) = 255;
    
    % Turn it into a double matrix for conversion into a floating point
    % texture, and normalize/convert its color range from 0-255 to 0-1:
    dbunny = double(bunnyimg)/255;
    
    % Maketexture a texture out of it in float precision with upright
    % texture orientation, so further processing in teamwork between
    % GPUmat and Psychtoolbox is simplified. Note: This conversion
    % would turn a luminance texture into a troublesome RGB texture,
    % which would only work on Linux and maybe Windows, but not on OSX.
    % We avoid this by never passing in a luminance texture if it is
    % intended to be used with GPUmat aka NVidia's CUDA framework.
    bunnytex = Screen('MakeTexture', w, dbunny, [], [], 2, 1);
    
    %% Do the FFT -> manipulate -> IFFT cycle once in Matlab/Octave as a
    % reference:
    
    % Extract 2nd layer, the green color channel as an approximation of
    % luminance:
    dbunny = dbunny(:,:,2);
    
    % Show it pre-transform:
    close all;
    imshow(dbunny);
    title('Pre-FFT Bunny.');
    
    % Perform 2D-FFT in Matlab/Octave on CPU:
    f = fft2(dbunny);

    % Shift DC component to center of spectrum "image":
    fs = fftshift(f);

    % Define a low-pass filter in frequency space, ie., an amplitude mask
    % to point-wise multiply with the FFT spectrum of FFT transformed
    % image:
    % fwidth controls frequency - lower values = lower cutoff frequency:
    hh = size(fs, 1)/2;
    hw = size(fs, 2)/2;
    [x,y] = meshgrid(-hw:hw,-hh:hh);
    mask = exp(-((x/fwidth).^2)-((y/fwidth).^2));
    
    % Quick and dirty trick: Cut off a bit from mask, so size of mask and
    % frequency spectrum matches -- Don't do this at home (or for real research scripts)!
    mask = mask(2:end, 2:end);
    
    figure;
    imshow(mask);
    title('Gaussian low pass filter mask for FFT space filtering:');

    tcpu = GetSecs;
    
    for trials = 1:10
        % Low pass filter by point-wise multiply of fs with filter 'mask' in
        % frequency space:
        fs = fs .* mask;
        
        % Invert shift of filtered fs:
        f = ifftshift(fs);
        
        % Perform inverse 2D-FFT in Matlab/Octave on CPU:
        b = ifft2(f);
    end
    
    tcpu = (GetSecs - tcpu) / trials;
    fprintf('Measured per trial runtime of CPU FFT: %f msecs [n=%i trials].\n', tcpu * 1000, trials);
    
    % Extract real component for display:
    r = real(b);
    
    figure;
    imshow(r);
    title('Post-FFT->Filter->IFFT Bunny.');
    
    %% Perform FFT->Process->IFFT on GPU via GPUmat / CUDA:
    
    % First Show input image:
    Screen('DrawTexture', w, bunnytex);
    Screen('TextSize', w, 28);
    DrawFormattedText(w, 'Original pre-FFT Bunny:\nPress key to continue.', 'center', 40, [255 255 0]);
    Screen('Flip', w);
    KbStrokeWait(-1);
    
    % Create GPUsingle matrix with input image from RGBA float bunnytex:
    A = GPUTypeFromToGL(0, bunnytex, [], [], 0);
    
    % Extract 2nd layer, the green channel from it for further use:
    % Note: The 1st dimension of a GPUsingle matrix created from a
    % Psychtoolbox texture always contains the "color channel":
    A = A(2, :, :);
    
    % Squeeze it to remove the singleton 1st dimension of A, because
    % fft2 and ifft2 can only work on 2D input matrices, not 3D
    % matrices, not even ones with a singleton dimension, ie., a 2D
    % matrix in disguise:
    A = squeeze(A);
    
    % Perform forward 2D FFT on GPU:
    F = fft2(A);
    
    % Process it on GPU:
    
    % First shift the spectrums origin (DC component aka 0 Hz frequency) to
    % the center of the FFT image:
    FS = fftshift(F);
    
    % Generate the amplitude spectrum, scaled to 1/100th via abs(FS)/100.0,
    % then converte the image into a texture 'fftmag' and display it:
    fftmag = GPUTypeFromToGL(1, abs(FS)/100.0, [], [], 0);
    Screen('DrawTexture', w, fftmag);
    DrawFormattedText(w, 'Amplitude spectrum post-FFT Bunny:\nPress key to continue.', 'center', 40, [0 255 0]);
    Screen('Flip', w);
    KbStrokeWait(-1);
    
    % Turn our filter 'mask' from cpu version above into a matrix on GPU:
    % We must also transpose the mask, because 'mask' was generated to
    % match the shape of the fs matrix on the cpu in Matlab/Octave. As
    % Matlab and Octave use column-major storage, whereas Psychtoolbox uses
    % row-major storage, all the GPU variables we got from Psychtoolbox
    % textures are transposed with respect to the Matlab version. A simple
    % transpose fixes this for our mask to match GPU format:
    FM = transpose(GPUsingle(mask));
    
    % Measure execution time on GPU. The cudaThreadSynchronize() command
    % makes sure we are actually measuring GPU timing with the GetSecs():
    cudaThreadSynchronize;
    tgpu = GetSecs;

    for trials = 1:10
        % Filter the amplitude spectrum by point-wise multiply with filter FM:
        FS = FM .* FS;
        
        % Shift back DC component to original position to prepare inverse FFT:
        F = ifftshift(FS);
        
        % Process inverse 2D FFT on GPU:
        B = ifft2(F);
    end
    
    cudaThreadSynchronize;
    tgpu = (GetSecs - tgpu) / trials;
    fprintf('Measured per trial runtime of GPU FFT: %f msecs [n=%i trials].\n', tgpu * 1000, trials);
    fprintf('Speedup GPU vs. CPU: %f\n', tcpu / tgpu);
    
    % Extract real component for display:
    R = real(B);
    
    % Convert R back into a floating point luminance texture 'tex' for
    % processing/display by Screen. Here it is fine to pass a
    % single-layer matrix for getting a luminance texture, because we
    % don't use Screen('MakeTexture') or similar and don't perform the
    % normalization step which would convert into a troublesome RGB
    % texture.    
    tex = GPUTypeFromToGL(1, R, [], [], 0);
    
    % Show it:
    Screen('DrawTexture', w, tex);
    DrawFormattedText(w, 'GPU-Processed FFT->Filter->IFFT Bunny:\nPress key to continue.', 'center', 40, [0 255 0]);
    Screen('Flip', w);
    KbStrokeWait(-1);
    
    % Reset interop cache before closing the windows and textures:
    GPUTypeFromToGL(4);
    
    % Close window, which will also release all textures:
    sca;
    
    % Restore sync test setting:
    Screen('Preference','SkipSyncTests', oldskip);
    
catch %#ok<CTCH>
    % Error handling.
    
    % Reset interop cache before closing the windows and textures:
    GPUTypeFromToGL(4);
    
    % Close window.
    sca;
    
    % Restore sync test setting:
    Screen('Preference','SkipSyncTests', oldskip);
    
    psychrethrow(psychlasterror);
end

% Done.
end

% Documentation of another, not yet tested, more complex and probably
% higher performance, way of doing GPU accelerated FFT with GPUmat:
%
%         fftType = cufftType;
%         fftDir  = cufftTransformDirections;
%
%         % FFT plan
%         plan = 0;
%         [status, plan] = cufftPlan2d(plan, size(d_A, 1), size(d_A, 2), fftType.CUFFT_R2C);
%         cufftCheckStatus(status, 'Error in cufftPlan2D');
%
%         % Run GPU FFT
%         [status] = cufftExecR2C(plan, getPtr(d_A), getPtr(d_B), fftDir.CUFFT_FORWARD);
%         cufftCheckStatus(status, 'Error in cufftExecR2C');
%
%         % Run GPU IFFT
%         [status] = cufftExecC2R(plan, getPtr(d_B), getPtr(d_A), fftDir.CUFFT_INVERSE);
%         cufftCheckStatus(status, 'Error in cufftExecC2C');
%
%         % results should be scaled by 1/N if compared to CPU
%         % h_B = 1/N*single(d_A);
%
%         [status] = cufftDestroy(plan);
%         cufftCheckStatus(status, 'Error in cuffDestroyPlan');