Openholo  v1.0
Open Source Digital Holographic Library
ophLightField_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophLightField_GPU.h"
47 
48 #include "sys.h"
49 
50 extern "C"
51 void cudaFFT(CUstream_st* stream, int nx, int ny, cufftDoubleComplex* in_filed, cufftDoubleComplex* output_field, int direction, bool bNormalized);
52 
54 {
55 #ifdef CHECK_PROC_TIME
56  auto begin = CUR_TIME;
57 #endif
58  const int nX = num_image[_X];
59  const int nY = num_image[_Y];
60  const int nXY = nX * nY;
61  const int rX = resolution_image[_X];
62  const int rY = resolution_image[_Y];
63  const int rXY = rX * rY;
64 
65  if (!streamLF)
66  cudaStreamCreate(&streamLF);
67 
68  if (LF_gpu) cudaFree(LF_gpu);
69  if (LFData_gpu) {
70  for (int i = 0; i < nXY; i++)
71  cudaFree(LFData_gpu[i]);
72  free(LFData_gpu);
73  }
74 
75  HANDLE_ERROR(cudaMalloc(&LF_gpu, sizeof(uchar1*) * nXY));
76  LFData_gpu = (uchar**)malloc(sizeof(uchar*) * nXY);
77 
78  for (int i = 0; i < nXY; i++) {
79  HANDLE_ERROR(cudaMalloc(&LFData_gpu[i], sizeof(uchar1) * rXY));
80  HANDLE_ERROR(cudaMemset(LFData_gpu[i], 0, sizeof(uchar1) * rXY));
81  HANDLE_ERROR(cudaMemcpyAsync(LFData_gpu[i], LF[i], sizeof(uchar) * rXY, cudaMemcpyHostToDevice), streamLF);
82  }
83 
84  HANDLE_ERROR(cudaMemcpy(LF_gpu, LFData_gpu, sizeof(uchar*) * nXY, cudaMemcpyHostToDevice));
85 
87  cudaFree(RSplane_complex_field_gpu);
88  HANDLE_ERROR(cudaMalloc((void**)&RSplane_complex_field_gpu, sizeof(cufftDoubleComplex) * nXY * rXY));
89 #ifdef CHECK_PROC_TIME
90  auto end = CUR_TIME;
91  LOG("\n%s : %lf(s)\n\n", __FUNCTION__, ((chrono::duration<Real>)(end - begin)).count());
92 #endif
93 }
94 
96 {
97 #ifdef CHECK_PROC_TIME
98  auto begin = CUR_TIME;
99 #endif
100  const int nX = num_image[_X];
101  const int nY = num_image[_Y];
102  const int nXY = nX * nY;
103  const int rX = resolution_image[_X];
104  const int rY = resolution_image[_Y];
105  const int rXY = rX * rY;
106 
107  cufftDoubleComplex *complexLF_gpu;
108  cufftDoubleComplex *FFTLF_temp_gpu;
109 
110  HANDLE_ERROR(cudaMalloc((void**)&complexLF_gpu, sizeof(cufftDoubleComplex) * nXY * rXY));
111  HANDLE_ERROR(cudaMalloc((void**)&FFTLF_temp_gpu, sizeof(cufftDoubleComplex) * nXY * rXY));
112  HANDLE_ERROR(cudaMemsetAsync(complexLF_gpu, 0, sizeof(cufftDoubleComplex) * nXY * rXY, streamLF));
113  HANDLE_ERROR(cudaMemsetAsync(FFTLF_temp_gpu, 0, sizeof(cufftDoubleComplex) * nXY * rXY, streamLF));
114  HANDLE_ERROR(cudaMemsetAsync(RSplane_complex_field_gpu, 0, sizeof(cufftDoubleComplex) * nXY * rXY, streamLF));
115 
116  cudaConvertLF2ComplexField_Kernel(streamLF, nX, nY, rX, rY, LF_gpu, complexLF_gpu);
117  cufftHandle fftplan;
118  if (cufftPlan2d(&fftplan, nY, nX, CUFFT_Z2Z) != CUFFT_SUCCESS)
119  {
120  LOG("FAIL in creating cufft plan");
121  return;
122  };
123  cufftDoubleComplex* in, *out;
124 #if 0
125  for (int k = 0; k < nXY; k++)
126  {
127  int offset = rX * rY * k;
128  in = complexLF_gpu + offset;
129  out = FFTLF_temp_gpu + offset;
130  cudaFFT_LF(&fftplan, streamLF, rX, rY, in, out, -1);
131  }
132 #else
133  for (int k = 0; k < rXY; k++)
134  {
135  int offset = nX * nY * k;
136  in = complexLF_gpu + offset;
137  out = FFTLF_temp_gpu + offset;
138  cudaFFT_LF(&fftplan, streamLF, nX, nY, in, out, -1);
139  }
140 #endif
141  cufftDestroy(fftplan);
142 
143  procMultiplyPhase(streamLF, nX, nY, rX, rY, FFTLF_temp_gpu, RSplane_complex_field_gpu, CUDART_PI);
144  cudaFree(complexLF_gpu);
145  cudaFree(FFTLF_temp_gpu);
146 #ifdef CHECK_PROC_TIME
147  auto end = CUR_TIME;
148  LOG("\n%s : %lf(s)\n\n", __FUNCTION__, ((std::chrono::duration<Real>)(end - begin)).count());
149 #endif
150 }
151 
153 {
154 #ifdef CHECK_PROC_TIME
155  auto begin = CUR_TIME;
156 #endif
157  const int pnX = context_.pixel_number[_X];
158  const int pnY = context_.pixel_number[_Y];
159  const int pnXY = pnX * pnY;
160  const uint nChannel = context_.waveNum;
161  const Real ppX = context_.pixel_pitch[_X];
162  const Real ppY = context_.pixel_pitch[_Y];
163 
164  cufftDoubleComplex *in2x;
165  cufftDoubleComplex *temp;
166 
167  HANDLE_ERROR(cudaMalloc((void**)&in2x, sizeof(cufftDoubleComplex) * pnXY * 4));
168  HANDLE_ERROR(cudaMalloc((void**)&temp, sizeof(cufftDoubleComplex) * pnXY * 4));
169  HANDLE_ERROR(cudaMemsetAsync(in2x, 0, sizeof(cufftDoubleComplex) * pnXY * 4, streamLF));
170  HANDLE_ERROR(cudaMemsetAsync(temp, 0, sizeof(cufftDoubleComplex) * pnXY * 4, streamLF));
171 
173 
174  cudaFFT(streamLF, pnX * 2, pnY * 2, in2x, temp, -1, false);
175 
176 
177  for (uint ch = 0; ch < nChannel; ch++) {
178  Real wavelength = context_.wave_length[ch];
179 
180  procMultiplyProp(streamLF, pnX * 2, pnY * 2, temp, CUDART_PI, distanceRS2Holo, wavelength, ppX, ppY);
181 
182  HANDLE_ERROR(cudaMemsetAsync(in2x, 0, sizeof(cufftDoubleComplex) * pnXY * 4, streamLF));
183  cudaFFT(streamLF, pnX * 2, pnY * 2, temp, in2x, 1, false);
184 
185  HANDLE_ERROR(cudaMemsetAsync(RSplane_complex_field_gpu, 0, sizeof(cufftDoubleComplex) * pnXY, streamLF));
187 
188  cufftDoubleComplex* output = (cufftDoubleComplex*)malloc(sizeof(cufftDoubleComplex) * pnXY);
189  memset(output, 0.0, sizeof(cufftDoubleComplex) * pnXY);
190  HANDLE_ERROR(cudaMemcpyAsync(output, RSplane_complex_field_gpu, sizeof(cufftDoubleComplex) * pnXY, cudaMemcpyDeviceToHost), streamLF);
191  for (int i = 0; i < pnXY; ++i)
192  {
193  complex_H[ch][i][_RE] = output[i].x;
194  complex_H[ch][i][_IM] = output[i].y;
195  }
196  free(output);
197  }
198  cudaFree(in2x);
199  cudaFree(temp);
200 #ifdef CHECK_PROC_TIME
201  auto end = CUR_TIME;
202  LOG("\n%s : %lf(s)\n\n", __FUNCTION__, ((std::chrono::duration<Real>)(end - begin)).count());
203 #endif
204 }
#define HANDLE_ERROR(err)
Real * wave_length
Definition: Openholo.h:69
unsigned char uchar
Definition: typedef.h:64
uchar1 ** LF_gpu
float Real
Definition: typedef.h:55
#define CUR_TIME
Definition: function.h:58
void cudaFFT(CUstream_st *stream, int nx, int ny, cufftDoubleComplex *in_filed, cufftDoubleComplex *output_field, int direction, bool bNormalized)
void convertLF2ComplexField_GPU()
#define _Y
Definition: define.h:84
#define _IM
Definition: complex.h:57
uchar ** LFData_gpu
cudaStream_t streamLF
void prepareInputdataGPU()
#define _X
Definition: define.h:80
void procMoveToin2x(CUstream_st *streamLF, int Nx, int Ny, cufftDoubleComplex *in, cufftDoubleComplex *out)
void cudaFFT_LF(cufftHandle *plan, CUstream_st *stream, int nx, int ny, cufftDoubleComplex *in_field, cufftDoubleComplex *output_field, int direction)
void cudaConvertLF2ComplexField_Kernel(CUstream_st *stream, int nx, int ny, int rx, int ry, uchar1 **LF, cufftDoubleComplex *output)
cufftDoubleComplex * RSplane_complex_field_gpu
oph::ivec2 pixel_number
Definition: Openholo.h:63
#define _RE
Definition: complex.h:54
void fresnelPropagation_GPU()
void procMultiplyProp(CUstream_st *stream, int Nx, int Ny, cufftDoubleComplex *inout, double PI, double dist, double wavelength, double ppx, double ppy)
uint waveNum
Definition: Openholo.h:68
void procCopyToOut(CUstream_st *stream, int Nx, int Ny, cufftDoubleComplex *in, cufftDoubleComplex *out)
OphConfig context_
Definition: Openholo.h:297
Complex< Real > ** complex_H
Definition: Openholo.h:298
unsigned int uint
Definition: typedef.h:62
void procMultiplyPhase(CUstream_st *stream, int nx, int ny, int rx, int ry, cufftDoubleComplex *input, cufftDoubleComplex *output, double PI)
oph::vec2 pixel_pitch
Definition: Openholo.h:64