Openholo  v1.0
Open Source Digital Holographic Library
ophSig_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophSig.h"
47 #include "ophSig_GPU.h"
48 
49 
50 
51 static void HandleError(cudaError_t err,
52  const char* file,
53  int line) {
54  if (err != cudaSuccess) {
55  printf("%s in %s at line %d\n", cudaGetErrorString(err),
56  file, line);
57  exit(EXIT_FAILURE);
58  }
59 }
60 #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
61 
62 
63 
64 void ophSig::cvtOffaxis_GPU(Real angleX, Real angleY) {
65  int nx = context_.pixel_number[_X];
66  int ny = context_.pixel_number[_Y];
68  Complex<Real> *host_data;
69  Complex<Real> *temp_data = new Complex<Real>[nx*ny];
70 
71  cField2Buffer(*ComplexH, &host_data,nx,ny);
72 
73  cudaStreamCreate(&streamLF);
74 
75  Complex<Real> *src_data;
76  Real *device_angle = new Real[2];
77  Real *temp_angle = new Real[2];
78  temp_angle[0] = angleX;
79  temp_angle[1] = angleY;
80 
81  //
82  Real *dst_data;
83  Complex<Real> *F;
84  ophSigConfig *device_config = nullptr;
85  //Malloc
86 
87  cudaMalloc(&src_data, sizeof(Complex<Real>)*nx*ny);
88  cudaMalloc(&dst_data, sizeof(Real)*nx*ny);
89  cudaMalloc(&F, sizeof(Complex<Real>)*ny*nx);
90  cudaMalloc(&device_config, sizeof(ophSigConfig));
91  cudaMalloc(&device_angle, sizeof(Real) * 2);
92 
93  //memcpy
94  cudaMemcpy(src_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
95  cudaMemcpy(dst_data, 0, sizeof(Real)*nx*ny, cudaMemcpyHostToDevice);
96  cudaMemcpy(F, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
97  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
98  cudaMemcpy(device_angle, temp_angle, sizeof(Real)*2, cudaMemcpyHostToDevice);
99 
100  //start
101 
102  cudaCvtOFF(src_data, dst_data, device_config, nx, ny,wl, F, device_angle);
103  //end
104  cudaMemcpy(temp_data, src_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
105  ivec2 size(nx, ny);
106  Buffer2Field(temp_data, *ComplexH, size);
107 
108  cudaFree(src_data);
109  cudaFree(dst_data);
110  cudaFree(F);
111  cudaFree(device_config);
112  cudaFree(device_angle);
113 
114  delete[] temp_data;
115  delete[] host_data;
116 }
117 
118 bool ophSig::sigConvertHPO_GPU(Real depth, Real_t redRate) {
119 
120 
121  int nx = context_.pixel_number[_X];
122  int ny = context_.pixel_number[_Y];
123  Complex<Real> *host_data,*temp_data,*F;
124  cufftDoubleComplex *fft_temp_data,*out_data;
125  cufftHandle fftplan;
126  ophSigConfig *device_config = nullptr;
127  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
128  {
129  LOG("FAIL in creating cufft plan");
130  return false;
131  };
132  if (!streamLF)
133  cudaStreamCreate(&streamLF);
134 
135  cField2Buffer(*ComplexH, &host_data, nx, ny);
136 
137  cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny);
138  cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny);
139  cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny);
140  cudaMalloc(&F, sizeof(Complex<Real>)*ny*nx);
141  cudaMalloc(&device_config, sizeof(ophSigConfig));
142 
143  cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
144  cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
145  cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
146  cudaMemcpy(F, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
147  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
148 
149  cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
150  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
151 
152  // 데이터 계산 Real wl = *context_.wave_length; Real NA = _cfgSig.NA; Real_t NA_g = NA * redRate; Real Rephase = -(1 / (4 * M_PI)*pow((wl / NA_g), 2)); Real Imphase = ((1 / (4 * M_PI))*depth*wl); cudaCvtHPO(streamLF,out_data,fft_temp_data,device_config,F,nx, ny,Rephase,Imphase); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); cudaCvtCuFFTToField(fft_temp_data, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, *ComplexH, size); // cudaFree(F); cudaFree(device_config); cudaFree(temp_data); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } bool ophSig::sigConvertCAC_GPU(double red, double green, double blue) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real> *host_data, *temp_data, *F; cufftDoubleComplex *fft_temp_data, *out_data; cufftHandle fftplan; ophSigConfig *device_config = nullptr; Real radius = _radius; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; ColorField2Buffer(ComplexH[0], &host_data, nx, ny); cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&F, sizeof(Complex<Real>)*ny*nx); cudaMalloc(&device_config, sizeof(ophSigConfig)); cudaMemcpy(F, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); //blue cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny); cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD); double sigmaf = ((_foc[2] - _foc[0]) * blue) / (4 * M_PI); cudaCvtCAC(fft_temp_data, out_data,F,device_config,nx, ny,sigmaf,radius); cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE); cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, ComplexH[0], size); // green ColorField2Buffer(ComplexH[1], &host_data, nx, ny); cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny); cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD); sigmaf = ((_foc[2] - _foc[1]) * green) / (4 * M_PI); cudaCvtCAC(fft_temp_data, out_data, F, device_config, nx, ny, sigmaf, radius); cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE); cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); Buffer2Field(host_data, ComplexH[1], size); //free cudaFree(F); cudaFree(device_config); cudaFree(temp_data); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } bool ophSig::propagationHolo_GPU(float depth) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real> *host_data, *temp_data, *F; cufftDoubleComplex *fft_temp_data, *out_data; cufftHandle fftplan; ophSigConfig *device_config = nullptr; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; cField2Buffer(*ComplexH, &host_data, nx, ny); cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&F, sizeof(Complex<Real>)*ny*nx); cudaMalloc(&device_config, sizeof(ophSigConfig)); cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(F, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); Real wl = *context_.wave_length; Real_t sigmaf = (depth*wl) / (4 * M_PI); cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf); cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE); cudaCvtCuFFTToField(fft_temp_data, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); ivec2 size(nx, ny); Buffer2Field(host_data, *ComplexH, size); // cudaFree(F); cudaFree(device_config); cudaFree(temp_data); cudaFree(out_data); cudaFree(fft_temp_data); cufftDestroy(fftplan); delete[] host_data; return true; } double ophSig::sigGetParamSF_GPU(float zMax, float zMin, int sampN, float th) { int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; Complex<Real> *host_data, *temp_data, *FH; cufftDoubleComplex *fft_temp_data, *out_data, *Ftemp_data; cufftHandle fftplan; cufftResult a; Real wl = *context_.wave_length; Real depth; Real *f; ophSigConfig *device_config = nullptr; cudaMalloc(&f, sizeof(Real)*nx*ny); cudaMalloc(&FH, sizeof(Complex<Real>)*ny*nx); cudaMalloc(&device_config, sizeof(ophSigConfig)); cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&Ftemp_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny); if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return false; }; cField2Buffer(*ComplexH, &host_data, nx, ny); cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny); cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD); cudaMemcpy(FH, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(f, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); depth = cudaGetParamSF(&fftplan, out_data, Ftemp_data, fft_temp_data, f, FH, device_config, nx, ny, zMax, zMin, sampN, th, wl); /*cudaCvtCuFFTToField(out_data, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); ivec2 size(nx, ny);*/ //Buffer2Field(host_data, *ComplexH, size); // cudaFree(FH); cudaFree(device_config); cudaFree(temp_data); cudaFree(Ftemp_data); cudaFree(out_data); cudaFree(fft_temp_data); cudaFree(f); cufftDestroy(fftplan); delete[] host_data; return depth; } double ophSig::sigGetParamAT_GPU() { Real index; int nx = context_.pixel_number[_X]; int ny = context_.pixel_number[_Y]; int tid = 0; ivec2 size(nx, ny); Real_t NA_g = (Real_t)0.025; Real wl = *context_.wave_length; Real max = 0; Complex<Real> *host_data, *Flr, *Fli, *G, *temp_data; cufftDoubleComplex *fft_data, *out_data; OphComplexField Fo_temp(nx, ny); OphComplexField Fon, yn, Ab_yn; OphRealField Ab_yn_half; ophSigConfig *device_config = nullptr; cufftHandle fftplan; vector<Real> t, tn; cufftResult a; //a = cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z); //cout << a << endl; if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS) { LOG("FAIL in creating cufft plan"); return 0; }; cField2Buffer(*ComplexH, &host_data, nx, ny); cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&Flr, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&Fli, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&G, sizeof(Complex<Real>)*nx*ny); cudaMalloc(&device_config, sizeof(ophSigConfig)); cudaMalloc(&fft_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny); cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(Flr, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(Fli, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(G, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice); cudaMemcpy(fft_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice); cudaGetParamAT1(temp_data, Flr, Fli, G, device_config, nx, ny, NA_g, wl); cudaCvtFieldToCuFFT(Flr, fft_data, nx, ny); cudaCuFFT(&fftplan, fft_data, out_data, nx, ny, CUFFT_FORWARD); cudaCvtCuFFTToField(out_data, Flr, nx, ny); cudaCvtFieldToCuFFT(Fli, fft_data, nx, ny); cudaCuFFT(&fftplan, fft_data, out_data, nx, ny, CUFFT_FORWARD); cudaCvtCuFFTToField(out_data, Fli, nx, ny); cudaGetParamAT2(Flr, Fli, G, temp_data, nx, ny); cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost); Buffer2Field(host_data, Fo_temp, size); cudaFree(temp_data); cudaFree(Flr); cudaFree(Fli); cudaFree(device_config); cudaFree(G); cudaFree(out_data); cudaFree(fft_data); cufftDestroy(fftplan); delete[] host_data; t = linspace(0., 1., nx / 2 + 1); tn.resize(t.size()); Fon.resize(1, t.size()); for (int i = 0; i < tn.size(); i++) { tn.at(i) = pow(t.at(i), 0.5); Fon(0, i)._Val[_RE] = Fo_temp(nx / 2 - 1, nx / 2 - 1 + i)._Val[_RE]; Fon(0, i)._Val[_IM] = 0; } yn.resize(1, tn.size()); linInterp(t, Fon, tn, yn); fft1(yn, yn); Ab_yn.resize(yn.size[_X], yn.size[_Y]); absMat(yn, Ab_yn); Ab_yn_half.resize(1, nx / 4 + 1); for (int i = 0; i < nx / 4 + 1; i++) { Ab_yn_half(0, i) = Ab_yn(0, nx / 4 + i - 1)._Val[_RE]; if (i == 0) max = Ab_yn_half(0, 0); else { if (Ab_yn_half(0, i) > max) { max = Ab_yn_half(0, i); index = i; } } } index = -(((index + 1) - 120) / 10) / 140 + 0.1; return index; }
153  Real wl = *context_.wave_length;
154  Real NA = _cfgSig.NA;
155  Real_t NA_g = NA * redRate;
156  Real Rephase = -(1 / (4 * M_PI)*pow((wl / NA_g), 2));
157  Real Imphase = ((1 / (4 * M_PI))*depth*wl);
158 
159  cudaCvtHPO(streamLF,out_data,fft_temp_data,device_config,F,nx, ny,Rephase,Imphase);
160 
161  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
162 
163 
164  cudaCvtCuFFTToField(fft_temp_data, temp_data, nx, ny);
165  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
166  ivec2 size(nx, ny);
167  Buffer2Field(host_data, *ComplexH, size);
168  //
169  cudaFree(F);
170  cudaFree(device_config);
171  cudaFree(temp_data);
172  cudaFree(out_data);
173  cudaFree(fft_temp_data);
174  cufftDestroy(fftplan);
175 
176  delete[] host_data;
177 
178 
179  return true;
180 }
181 
182 
183 
184 
185 bool ophSig::sigConvertCAC_GPU(double red, double green, double blue) {
186  int nx = context_.pixel_number[_X];
187  int ny = context_.pixel_number[_Y];
188 
189  Complex<Real> *host_data, *temp_data, *F;
190  cufftDoubleComplex *fft_temp_data, *out_data;
191  cufftHandle fftplan;
192  ophSigConfig *device_config = nullptr;
193  Real radius = _radius;
194 
195  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
196  {
197  LOG("FAIL in creating cufft plan");
198  return false;
199  };
200 
201  ColorField2Buffer(ComplexH[0], &host_data, nx, ny);
202  cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny);
203  cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny);
204  cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny);
205  cudaMalloc(&F, sizeof(Complex<Real>)*ny*nx);
206  cudaMalloc(&device_config, sizeof(ophSigConfig));
207 
208  cudaMemcpy(F, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
209  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
210  cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
211  cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
212 
213  //blue
214 
215  cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
216  cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny);
217  cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD);
218 
219  double sigmaf = ((_foc[2] - _foc[0]) * blue) / (4 * M_PI);
220  cudaCvtCAC(fft_temp_data, out_data,F,device_config,nx, ny,sigmaf,radius);
221 
222  cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE);
223 
224  cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
225  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
226  ivec2 size(nx, ny);
227  Buffer2Field(host_data, ComplexH[0], size);
228 
229  // green
230  ColorField2Buffer(ComplexH[1], &host_data, nx, ny);
231  cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
232  cudaCvtFieldToCuFFT(temp_data, out_data, nx, ny);
233  cudaCuFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_FORWARD);
234 
235  sigmaf = ((_foc[2] - _foc[1]) * green) / (4 * M_PI);
236  cudaCvtCAC(fft_temp_data, out_data, F, device_config, nx, ny, sigmaf, radius);
237 
238  cudaCuIFFT(&fftplan, out_data, fft_temp_data, nx, ny, CUFFT_INVERSE);
239 
240  cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
241  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
242  Buffer2Field(host_data, ComplexH[1], size);
243 
244  //free
245  cudaFree(F);
246  cudaFree(device_config);
247  cudaFree(temp_data);
248  cudaFree(out_data);
249  cudaFree(fft_temp_data);
250  cufftDestroy(fftplan);
251 
252  delete[] host_data;
253 
254  return true;
255 }
256 
257 bool ophSig::propagationHolo_GPU(float depth) {
258  int nx = context_.pixel_number[_X];
259  int ny = context_.pixel_number[_Y];
260  Complex<Real> *host_data, *temp_data, *F;
261  cufftDoubleComplex *fft_temp_data, *out_data;
262  cufftHandle fftplan;
263 
264  ophSigConfig *device_config = nullptr;
265 
266  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
267  {
268  LOG("FAIL in creating cufft plan");
269  return false;
270  };
271 
272  cField2Buffer(*ComplexH, &host_data, nx, ny);
273 
274  cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny);
275  cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny);
276  cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny);
277  cudaMalloc(&F, sizeof(Complex<Real>)*ny*nx);
278  cudaMalloc(&device_config, sizeof(ophSigConfig));
279 
280  cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
281  cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
282  cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
283  cudaMemcpy(F, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
284  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
285 
286  cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
287  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
288 
289  Real wl = *context_.wave_length;
290  Real_t sigmaf = (depth*wl) / (4 * M_PI);
291 
292  cudaPropagation(out_data, fft_temp_data, F, device_config, nx, ny, sigmaf);
293 
294  cudaCuIFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_INVERSE);
295 
296 
297  cudaCvtCuFFTToField(fft_temp_data, temp_data, nx, ny);
298  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
299  ivec2 size(nx, ny);
300  Buffer2Field(host_data, *ComplexH, size);
301  //
302  cudaFree(F);
303  cudaFree(device_config);
304  cudaFree(temp_data);
305  cudaFree(out_data);
306  cudaFree(fft_temp_data);
307  cufftDestroy(fftplan);
308 
309  delete[] host_data;
310 
311  return true;
312 }
313 
314 double ophSig::sigGetParamSF_GPU(float zMax, float zMin, int sampN, float th) {
315  int nx = context_.pixel_number[_X];
316  int ny = context_.pixel_number[_Y];
317 
318  Complex<Real> *host_data, *temp_data, *FH;
319  cufftDoubleComplex *fft_temp_data, *out_data, *Ftemp_data;
320  cufftHandle fftplan;
321  cufftResult a;
322  Real wl = *context_.wave_length;
323  Real depth;
324  Real *f;
325  ophSigConfig *device_config = nullptr;
326 
327  cudaMalloc(&f, sizeof(Real)*nx*ny);
328  cudaMalloc(&FH, sizeof(Complex<Real>)*ny*nx);
329  cudaMalloc(&device_config, sizeof(ophSigConfig));
330  cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny);
331  cudaMalloc(&fft_temp_data, sizeof(cufftDoubleComplex)*nx*ny);
332  cudaMalloc(&Ftemp_data, sizeof(cufftDoubleComplex)*nx*ny);
333  cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny);
334 
335  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
336  {
337  LOG("FAIL in creating cufft plan");
338  return false;
339  };
340 
341  cField2Buffer(*ComplexH, &host_data, nx, ny);
342 
343  cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
344  cudaMemcpy(fft_temp_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
345  cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
346 
347 
348  cudaCvtFieldToCuFFT(temp_data, fft_temp_data, nx, ny);
349  cudaCuFFT(&fftplan, fft_temp_data, out_data, nx, ny, CUFFT_FORWARD);
350 
351  cudaMemcpy(FH, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
352  cudaMemcpy(f, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
353  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
354 
355  depth = cudaGetParamSF(&fftplan, out_data, Ftemp_data, fft_temp_data, f, FH, device_config, nx, ny, zMax, zMin, sampN, th, wl);
356 
357 
358  /*cudaCvtCuFFTToField(out_data, temp_data, nx, ny);
359  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
360  ivec2 size(nx, ny);*/
361  //Buffer2Field(host_data, *ComplexH, size);
362  //
363  cudaFree(FH);
364  cudaFree(device_config);
365  cudaFree(temp_data);
366  cudaFree(Ftemp_data);
367  cudaFree(out_data);
368  cudaFree(fft_temp_data);
369  cudaFree(f);
370  cufftDestroy(fftplan);
371 
372  delete[] host_data;
373 
374  return depth;
375 }
376 
378 
379  Real index;
380  int nx = context_.pixel_number[_X];
381  int ny = context_.pixel_number[_Y];
382  int tid = 0;
383  ivec2 size(nx, ny);
384  Real_t NA_g = (Real_t)0.025;
385  Real wl = *context_.wave_length;
386  Real max = 0;
387  Complex<Real> *host_data, *Flr, *Fli, *G, *temp_data;
388  cufftDoubleComplex *fft_data, *out_data;
389 
390  OphComplexField Fo_temp(nx, ny);
391  OphComplexField Fon, yn, Ab_yn;
392  OphRealField Ab_yn_half;
393  ophSigConfig *device_config = nullptr;
394  cufftHandle fftplan;
395  vector<Real> t, tn;
396 
397  cufftResult a;
398  //a = cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z);
399  //cout << a << endl;
400  if (cufftPlan2d(&fftplan, nx, ny, CUFFT_Z2Z) != CUFFT_SUCCESS)
401  {
402  LOG("FAIL in creating cufft plan");
403  return 0;
404  };
405 
406  cField2Buffer(*ComplexH, &host_data, nx, ny);
407 
408  cudaMalloc(&temp_data, sizeof(Complex<Real>)*nx*ny);
409  cudaMalloc(&Flr, sizeof(Complex<Real>)*nx*ny);
410  cudaMalloc(&Fli, sizeof(Complex<Real>)*nx*ny);
411  cudaMalloc(&G, sizeof(Complex<Real>)*nx*ny);
412  cudaMalloc(&device_config, sizeof(ophSigConfig));
413 
414  cudaMalloc(&fft_data, sizeof(cufftDoubleComplex)*nx*ny);
415  cudaMalloc(&out_data, sizeof(cufftDoubleComplex)*nx*ny);
416 
417 
418  cudaMemcpy(temp_data, host_data, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
419  cudaMemcpy(Flr, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
420  cudaMemcpy(Fli, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
421  cudaMemcpy(G, 0, sizeof(Complex<Real>)*nx*ny, cudaMemcpyHostToDevice);
422  cudaMemcpy(device_config, &_cfgSig, sizeof(ophSigConfig), cudaMemcpyHostToDevice);
423 
424  cudaMemcpy(fft_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
425  cudaMemcpy(out_data, 0, sizeof(cufftDoubleComplex)*nx*ny, cudaMemcpyHostToDevice);
426 
427  cudaGetParamAT1(temp_data, Flr, Fli, G, device_config, nx, ny, NA_g, wl);
428 
429  cudaCvtFieldToCuFFT(Flr, fft_data, nx, ny);
430  cudaCuFFT(&fftplan, fft_data, out_data, nx, ny, CUFFT_FORWARD);
431  cudaCvtCuFFTToField(out_data, Flr, nx, ny);
432 
433  cudaCvtFieldToCuFFT(Fli, fft_data, nx, ny);
434  cudaCuFFT(&fftplan, fft_data, out_data, nx, ny, CUFFT_FORWARD);
435  cudaCvtCuFFTToField(out_data, Fli, nx, ny);
436 
437 
438  cudaGetParamAT2(Flr, Fli, G, temp_data, nx, ny);
439 
440  cudaMemcpy(host_data, temp_data, nx*ny * sizeof(Complex<Real>), cudaMemcpyDeviceToHost);
441  Buffer2Field(host_data, Fo_temp, size);
442 
443  cudaFree(temp_data);
444  cudaFree(Flr);
445  cudaFree(Fli);
446  cudaFree(device_config);
447  cudaFree(G);
448  cudaFree(out_data);
449  cudaFree(fft_data);
450  cufftDestroy(fftplan);
451 
452  delete[] host_data;
453 
454  t = linspace(0., 1., nx / 2 + 1);
455  tn.resize(t.size());
456  Fon.resize(1, t.size());
457 
458  for (int i = 0; i < tn.size(); i++)
459  {
460  tn.at(i) = pow(t.at(i), 0.5);
461  Fon(0, i)._Val[_RE] = Fo_temp(nx / 2 - 1, nx / 2 - 1 + i)._Val[_RE];
462  Fon(0, i)._Val[_IM] = 0;
463  }
464 
465  yn.resize(1, tn.size());
466  linInterp(t, Fon, tn, yn);
467  fft1(yn, yn);
468  Ab_yn.resize(yn.size[_X], yn.size[_Y]);
469  absMat(yn, Ab_yn);
470  Ab_yn_half.resize(1, nx / 4 + 1);
471 
472  for (int i = 0; i < nx / 4 + 1; i++)
473  {
474  Ab_yn_half(0, i) = Ab_yn(0, nx / 4 + i - 1)._Val[_RE];
475  if (i == 0) max = Ab_yn_half(0, 0);
476  else
477  {
478  if (Ab_yn_half(0, i) > max)
479  {
480  max = Ab_yn_half(0, i);
481  index = i;
482  }
483  }
484  }
485 
486  index = -(((index + 1) - 120) / 10) / 140 + 0.1;
487 
488  return index;
489 
490 
491 }
492 
void cudaCvtHPO(CUstream_st *stream, cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, ophSigConfig *device_config, Complex< Real > *F, int nx, int ny, Real Rephase, Real Imphase)
Real_t * _foc
Definition: ophSig.h:124
oph::matrix< Real > OphRealField
Definition: mat.h:419
void cField2Buffer(matrix< Complex< Real >> &src, Complex< Real > **dst, int nx, int ny)
Definition: ophSig.cpp:58
void cudaGetParamAT2(Complex< Real > *Flr, Complex< Real > *Fli, Complex< Real > *G, Complex< Real > *temp_data, int nx, int ny)
Real_t _radius
Definition: ophSig.h:123
Real * wave_length
Definition: Openholo.h:69
void ColorField2Buffer(matrix< Complex< Real >> &src, Complex< Real > **dst, int nx, int ny)
Definition: ophSig.cpp:73
double sigGetParamAT_GPU()
Definition: ophSig_GPU.cpp:377
float Real
Definition: typedef.h:55
void fft1(matrix< Complex< T >> &src, matrix< Complex< T >> &dst, int sign=OPH_FORWARD, uint flag=OPH_ESTIMATE)
Definition: ophSig.cpp:115
void cudaCvtFieldToCuFFT(Complex< Real > *src_data, cufftDoubleComplex *dst_data, int nx, int ny)
#define _Y
Definition: define.h:84
#define _IM
Definition: complex.h:57
void cudaPropagation(cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, Complex< Real > *FH, ophSigConfig *device_config, int nx, int ny, Real sigmaf)
void cudaCuIFFT(cufftHandle *plan, cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, int nx, int ny, int direction)
ophSigConfig _cfgSig
Definition: ophSig.h:108
cudaStream_t streamLF
#define _X
Definition: define.h:80
void cudaGetParamAT1(Complex< Real > *src_data, Complex< Real > *Flr, Complex< Real > *Fli, Complex< Real > *G, ophSigConfig *device_config, int nx, int ny, Real_t NA_g, Real wl)
double cudaGetParamSF(cufftHandle *fftplan, cufftDoubleComplex *src_data, cufftDoubleComplex *temp_data, cufftDoubleComplex *dst_data, Real *f, Complex< Real > *FH, ophSigConfig *device_config, int nx, int ny, float zMax, float zMin, int sampN, float th, Real wl)
void cudaCvtCAC(cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, Complex< Real > *FFZP, ophSigConfig *device_config, int nx, int ny, Real sigmaf, Real radius)
Real_t NA
Definition: ophSig.h:67
OphComplexField * ComplexH
Definition: ophSig.h:109
oph::ivec2 pixel_number
Definition: Openholo.h:63
bool sigConvertHPO_GPU(Real depth, Real_t redRate)
Definition: ophSig_GPU.cpp:118
#define _RE
Definition: complex.h:54
oph::matrix< Complex< Real > > OphComplexField
Definition: mat.h:421
bool sigConvertCAC_GPU(double red, double green, double blue)
Definition: ophSig_GPU.cpp:185
double Real_t
Definition: typedef.h:56
void cudaCvtCuFFTToField(cufftDoubleComplex *src_data, Complex< Real > *dst_data, int nx, int ny)
void absMat(matrix< Complex< T >> &src, matrix< T > &dst)
Function for Linear interpolation 1D.
Definition: ophSig.h:181
double sigGetParamSF_GPU(float zMax, float zMin, int sampN, float th)
Definition: ophSig_GPU.cpp:314
bool propagationHolo_GPU(float depth)
Definition: ophSig_GPU.cpp:257
void linInterp(vector< T > &X, matrix< Complex< T >> &src, vector< T > &Xq, matrix< Complex< T >> &dst)
Definition: ophSig.cpp:92
void cudaCuFFT(cufftHandle *plan, cufftDoubleComplex *src_data, cufftDoubleComplex *dst_data, int nx, int ny, int direction)
void Buffer2Field(const T *src, matrix< T > &dst, const ivec2 buffer_size)
Definition: function.h:344
OphConfig context_
Definition: Openholo.h:297
void cvtOffaxis_GPU(Real angleX, Real angleY)
Definition: ophSig_GPU.cpp:64
vector< T > linspace(T first, T last, int len)
Generate linearly spaced vector.
Definition: ophSig.h:160
#define M_PI
Definition: define.h:52
void cudaCvtOFF(Complex< Real > *src_data, Real *dst_data, ophSigConfig *device_config, int nx, int ny, Real wl, Complex< Real > *F, Real *angle)