HologramDepthmap Library
HologramGenerator_GPU.cpp
Go to the documentation of this file.
1 
3 #include "graphics/sys.h"
4 #include <cuda_runtime.h>
5 #include <cufft.h>
6 
7 static void HandleError(cudaError_t err,
8  const char *file,
9  int line) {
10  if (err != cudaSuccess) {
11  printf("%s in %s at line %d\n", cudaGetErrorString(err),
12  file, line);
13  exit(EXIT_FAILURE);
14  }
15 }
16 #define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
17 
18 
19 #define HANDLE_NULL( a ) {if (a == NULL) { \
20  printf( "Host memory failed in %s at line %d\n", \
21  __FILE__, __LINE__ ); \
22  exit( EXIT_FAILURE );}}
23 
24 cufftDoubleComplex *u_o_gpu_;
25 cufftDoubleComplex *u_complex_gpu_;
26 cufftDoubleComplex *k_temp_d_;
27 
28 cudaStream_t stream_;
29 cudaEvent_t start, stop;
30 
31 extern "C"
32 {
49  void cudaFFT(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* output_field, int direction, bool bNormailized = false);
50 
65  void cudaCropFringe(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_field, cufftDoubleComplex* out_field, int cropx1, int cropx2, int cropy1, int cropy2);
66 
86  void cudaDepthHoloKernel(CUstream_st* stream, int pnx, int pny, cufftDoubleComplex* u_o_gpu_, unsigned char* img_src_gpu_, unsigned char* dimg_src_gpu_, double* depth_index_gpu_,
87  int dtr, double rand_phase_val_a, double rand_phase_val_b, double carrier_phase_delay_a, double carrier_phase_delay_b, int flag_change_depth_quan, unsigned int default_depth_quan);
88 
106  void cudaPropagation_AngularSpKernel(CUstream_st* stream_, int pnx, int pny, cufftDoubleComplex* input_d, cufftDoubleComplex* u_complex,
107  double ppx, double ppy, double ssx, double ssy, double lambda, double params_k, double propagation_dist);
108 
126  void cudaGetFringe(CUstream_st* stream, int pnx, int pny, cufftDoubleComplex* in_field, cufftDoubleComplex* out_field, int sig_locationx, int sig_locationy,
127  double ssx, double ssy, double ppx, double ppy, double PI);
128 
145  void cudaChangeDepthQuanKernel(CUstream_st* stream_, int pnx, int pny, double* depth_index_gpu, unsigned char* dimg_src_gpu,
146  int dtr, double d1, double d2, double params_num_of_depth, double params_far_depthmap, double params_near_depthmap);
147 
150 }
151 
158 {
159  const int nx = params_.pn[0];
160  const int ny = params_.pn[1];
161  const int N = nx * ny;
162 
163  if (!stream_)
164  cudaStreamCreate(&stream_);
165 
166  if (img_src_gpu_) cudaFree(img_src_gpu_);
167  HANDLE_ERROR(cudaMalloc((void**)&img_src_gpu_, sizeof(uchar1)*N));
168 
169  if (dimg_src_gpu_) cudaFree(dimg_src_gpu_);
170  HANDLE_ERROR(cudaMalloc((void**)&dimg_src_gpu_, sizeof(uchar1)*N));
171 
172  if (depth_index_gpu_) cudaFree(depth_index_gpu_);
174  HANDLE_ERROR(cudaMalloc((void**)&depth_index_gpu_, sizeof(double)*N));
175 
176  if (u_o_gpu_) cudaFree(u_o_gpu_);
177  if (u_complex_gpu_) cudaFree(u_complex_gpu_);
178 
179  HANDLE_ERROR(cudaMalloc((void**)&u_o_gpu_, sizeof(cufftDoubleComplex)*N));
180  HANDLE_ERROR(cudaMalloc((void**)&u_complex_gpu_, sizeof(cufftDoubleComplex)*N));
181 
182  if (k_temp_d_) cudaFree(k_temp_d_);
183  HANDLE_ERROR(cudaMalloc((void**)&k_temp_d_, sizeof(cufftDoubleComplex)*N));
184 
185 }
186 
194 bool HologramGenerator::prepare_inputdata_GPU(uchar* imgptr, uchar* dimgptr)
195 {
196  const int nx = params_.pn[0];
197  const int ny = params_.pn[1];
198  const int N = nx * ny;
199 
200  HANDLE_ERROR(cudaMemcpyAsync(img_src_gpu_, imgptr, sizeof(uchar1)*N, cudaMemcpyHostToDevice), stream_);
201  HANDLE_ERROR(cudaMemcpyAsync(dimg_src_gpu_, dimgptr, sizeof(uchar1)*N, cudaMemcpyHostToDevice), stream_);
202 
203  return true;
204 }
205 
212 {
213  int pnx = params_.pn[0];
214  int pny = params_.pn[1];
215 
216  double temp_depth, d1, d2;
217  int tdepth;
218 
219  HANDLE_ERROR(cudaMemsetAsync(depth_index_gpu_, 0, sizeof(double)*pnx*pny, stream_));
220 
221  for (int dtr = 0; dtr < params_.num_of_depth; dtr++)
222  {
223  temp_depth = dlevel_[dtr];
224  d1 = temp_depth - dstep_ / 2.0;
225  d2 = temp_depth + dstep_ / 2.0;
226 
229  }
230 
231 }
232 
247 {
248  cudaEvent_t start, stop;
249 
250  cudaEventCreate(&start);
251  cudaEventCreate(&stop);
252 
253  if (!stream_)
254  cudaStreamCreate(&stream_);
255 
256  cudaEventRecord(start, stream_);
257 
258  int pnx = params_.pn[0];
259  int pny = params_.pn[1];
260  int N = pnx*pny;
261 
262  HANDLE_ERROR(cudaMemsetAsync(u_complex_gpu_, 0, sizeof(cufftDoubleComplex)*N, stream_));
263  int depth_sz = params_.render_depth.size();
264 
265  for (int p = 0; p < depth_sz; ++p)
266  {
267  Complex rand_phase_val;
268  get_rand_phase_value(rand_phase_val);
269 
270  int dtr = params_.render_depth[p];
271  double temp_depth = dlevel_transform_[dtr - 1];
272  Complex carrier_phase_delay(0, params_.k* temp_depth);
273  exponent_complex(&carrier_phase_delay);
274 
275  HANDLE_ERROR(cudaMemsetAsync(u_o_gpu_, 0, sizeof(cufftDoubleComplex)*N, stream_));
276 
278  dtr, rand_phase_val.a, rand_phase_val.b, carrier_phase_delay.a, carrier_phase_delay.b, FLAG_CHANGE_DEPTH_QUANTIZATION, DEFAULT_DEPTH_QUANTIZATION);
279 
280  if (Propagation_Method_ == 0)
281  {
282  HANDLE_ERROR(cudaMemsetAsync(k_temp_d_, 0, sizeof(cufftDoubleComplex)*N, stream_));
283  cudaFFT(stream_, pnx, pny, u_o_gpu_, k_temp_d_, -1);
284 
286  }
287 
288  LOG("Frame#: %d, Depth: %d of %d, z = %f mm\n", frame, dtr, params_.num_of_depth, -temp_depth * 1000);
289 
290  }
291 
292  cudaEventRecord(stop, stream_);
293  cudaEventSynchronize(stop);
294 
295  float elapsedTime = 0.0f;
296  cudaEventElapsedTime(&elapsedTime, start, stop);
297  LOG("GPU Time= %f ms. \n", elapsedTime);
298 
299  //writeIntensity_gray8_real_bmp("final_fr", pnx, pny, U_complex_);
300 
301 
302 }
303 
311 void HologramGenerator::Propagation_AngularSpectrum_GPU(cufftDoubleComplex* input_u, double propagation_dist)
312 {
313  int pnx = params_.pn[0];
314  int pny = params_.pn[1];
315  int N = pnx* pny;
316  double ppx = params_.pp[0];
317  double ppy = params_.pp[1];
318  double ssx = params_.ss[0];
319  double ssy = params_.ss[1];
320  double lambda = params_.lambda;
321 
323  ppx, ppy, ssx, ssy, lambda, params_.k, propagation_dist);
324 
325 }
326 
338 void HologramGenerator::encoding_GPU(int cropx1, int cropx2, int cropy1, int cropy2, ivec2 sig_location)
339 {
340  int pnx = params_.pn[0];
341  int pny = params_.pn[1];
342  double ppx = params_.pp[0];
343  double ppy = params_.pp[1];
344  double ssx = params_.ss[0];
345  double ssy = params_.ss[1];
346 
347  HANDLE_ERROR(cudaMemsetAsync(k_temp_d_, 0, sizeof(cufftDoubleComplex)*pnx*pny, stream_));
348  cudaCropFringe(stream_, pnx, pny, u_complex_gpu_, k_temp_d_, cropx1, cropx2, cropy1, cropy2);
349 
350  HANDLE_ERROR(cudaMemsetAsync(u_complex_gpu_, 0, sizeof(cufftDoubleComplex)*pnx*pny, stream_));
351  cudaFFT(stream_, pnx, pny, k_temp_d_, u_complex_gpu_, 1, true);
352 
353  HANDLE_ERROR(cudaMemsetAsync(k_temp_d_, 0, sizeof(cufftDoubleComplex)*pnx*pny, stream_));
354  cudaGetFringe(stream_, pnx, pny, u_complex_gpu_, k_temp_d_, sig_location[0], sig_location[1], ssx, ssy, ppx, ppy, PI);
355 
356  cufftDoubleComplex* sample_fd = (cufftDoubleComplex*)malloc(sizeof(cufftDoubleComplex)*pnx*pny);
357  memset(sample_fd, 0.0, sizeof(cufftDoubleComplex)*pnx*pny);
358 
359  HANDLE_ERROR(cudaMemcpyAsync(sample_fd, k_temp_d_, sizeof(cufftDoubleComplex)*pnx*pny, cudaMemcpyDeviceToHost), stream_);
360  memset(u255_fringe_, 0.0, sizeof(double)*pnx*pny);
361 
362  for (int i = 0; i < pnx*pny; ++i)
363  {
364  u255_fringe_[i] = sample_fd[i].x;
365  }
366 
367  free(sample_fd);
368 }
369 
370 /*
371 void HologramGenerator::writeImage_fromGPU(QString imgname, int pnx, int pny, cufftDoubleComplex* gpu_data)
372 {
373  cufftDoubleComplex* cpu_data = (cufftDoubleComplex*)malloc(sizeof(cufftDoubleComplex)*pnx*pny);
374  memset(cpu_data, 0.0, sizeof(cufftDoubleComplex)*pnx*pny);
375 
376  double* data = (double*)malloc(sizeof(double)*pnx*pny);
377 
378  HANDLE_ERROR(cudaMemcpyAsync(cpu_data, gpu_data, sizeof(cufftDoubleComplex)*pnx*pny, cudaMemcpyDeviceToHost), stream_);
379  for (int i = 0; i < pnx*pny; ++i)
380  {
381  data[i] = cpu_data[i].x;
382  }
383 
384  writeIntensity_gray8_bmp(imgname.toStdString().c_str(),pnx, pny, data );
385 
386  free(cpu_data);
387  free(data);
388 
389 }*/
void Propagation_AngularSpectrum_GPU(cufftDoubleComplex *input_u, double propagation_dist)
Angular spectrum propagation method for GPU implementation.
double dstep_
the physical increment of each depth map layer.
cufftDoubleComplex * k_temp_d_
void cudaGetFringe(CUstream_st *stream, int pnx, int pny, cufftDoubleComplex *in_field, cufftDoubleComplex *out_field, int sig_locationx, int sig_locationy, double ssx, double ssy, double ppx, double ppy, double PI)
Encode the CGH according to a signal location parameter on the GPU.
static void HandleError(cudaError_t err, const char *file, int line)
uint num_of_depth
the number of depth level.
unsigned char * dimg_src_gpu_
GPU variable - depth map data, values are from 0 to 255.
double a
Definition: complex.h:145
uint DEFAULT_DEPTH_QUANTIZATION
default value of the depth quantization - 256
double k
2 * PI / lambda
void cudaPropagation_AngularSpKernel(CUstream_st *stream_, int pnx, int pny, cufftDoubleComplex *input_d, cufftDoubleComplex *u_complex, double ppx, double ppy, double ssx, double ssy, double lambda, double params_k, double propagation_dist)
Angular spectrum propagation method for GPU implementation.
void cudaFFT(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *in_filed, cufftDoubleComplex *output_field, int direction, bool bNormailized=false)
Convert data from the spatial domain to the frequency domain using 2D FFT on GPU. ...
cudaStream_t stream_
bool FLAG_CHANGE_DEPTH_QUANTIZATION
if true, change the depth quantization from the default value.
double far_depthmap
FAR_OF_DEPTH_MAP at config file.
void exponent_complex(Complex *val)
Calculate the exponential of the complex number.
void cudaCropFringe(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *in_field, cufftDoubleComplex *out_field, int cropx1, int cropx2, int cropy1, int cropy2)
Crop input data according to x, y coordinates on GPU.
unsigned char * img_src_gpu_
GPU variable - image source data, values are from 0 to 255.
std::vector< double > dlevel_
the physical value of all depth map layer.
int Propagation_Method_
propagation method - currently AngularSpectrum
cufftDoubleComplex * u_o_gpu_
bool prepare_inputdata_GPU(uchar *img, uchar *dimg)
Copy input image & depth map data into a GPU.
double lambda
WAVELENGTH at config file.
structure for 2-dimensional integer vector and its arithmetic.
Definition: ivec.h:14
ivec2 pn
SLM_PIXEL_NUMBER_X & SLM_PIXEL_NUMBER_Y.
void Calc_Holo_GPU(int frame)
Main method for generating a hologram on the GPU.
cudaEvent_t start
std::vector< int > render_depth
Used when only few specific depth levels are rendered, usually for test purpose.
void cudaChangeDepthQuanKernel(CUstream_st *stream_, int pnx, int pny, double *depth_index_gpu, unsigned char *dimg_src_gpu, int dtr, double d1, double d2, double params_num_of_depth, double params_far_depthmap, double params_near_depthmap)
Quantize depth map on the GPU, only when the number of depth quantization is not the default value (i...
cudaEvent_t stop
#define HANDLE_ERROR(err)
cufftDoubleComplex * u_complex_gpu_
double b
Definition: complex.h:145
void init_GPU()
Initialize variables for the GPU implementation.
class for the complex number and its arithmetic.
Definition: complex.h:22
void cudaDepthHoloKernel(CUstream_st *stream, int pnx, int pny, cufftDoubleComplex *u_o_gpu_, unsigned char *img_src_gpu_, unsigned char *dimg_src_gpu_, double *depth_index_gpu_, int dtr, double rand_phase_val_a, double rand_phase_val_b, double carrier_phase_delay_a, double carrier_phase_delay_b, int flag_change_depth_quan, unsigned int default_depth_quan)
Find each depth plane of the input image and apply carrier phase delay to it on GPU.
vec2 pp
SLM_PIXEL_PITCH_X & SLM_PIXEL_PITCH_Y.
double * u255_fringe_
the final hologram, used for writing the result image.
std::vector< double > dlevel_transform_
transfomed dlevel_ variable
double near_depthmap
NEAR_OF_DEPTH_MAP at config file.
void encoding_GPU(int cropx1, int cropx2, int cropy1, int cropy2, ivec2 sig_location)
Encode the CGH according to a signal location parameter on GPU.
void change_depth_quan_GPU()
Quantize depth map on the GPU, when the number of depth quantization is not the default value (i...
void get_rand_phase_value(Complex &rand_phase_val)
Assign random phase value if RANDOM_PHASE == 1.
HologramParams params_
structure variable for hologram parameters
const double PI
Definition: complex.h:16
double * depth_index_gpu_
GPU variable - quantized depth map data.