Openholo  v1.0
Open Source Digital Holographic Library
ophWRP_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 //M*/
43 #include "ophWRP.h"
44 #include "ophWRP_GPU.h"
45 #include "sys.h"
46 
48 {
49 #ifdef CHECK_PROC_TIME
50  auto begin = CUR_TIME;
51 #endif
52  if (p_wrp_) delete[] p_wrp_;
53  p_wrp_ = new oph::Complex<Real>[context_.pixel_number[_X] * context_.pixel_number[_Y]];
54  memset(p_wrp_, 0.0, sizeof(oph::Complex<Real>) * context_.pixel_number[_X] * context_.pixel_number[_Y]);
55 
57 #ifdef CHECK_PROC_TIME
58  auto end = CUR_TIME;
59  LOG("\n%s : %lf(s)\n\n", __FUNCTION__, ((std::chrono::duration<Real>)(end - begin)).count());
60 #endif
61  return 0;
62 
63 }
64 
66 {
67  // GPU information
68  int devID;
69  HANDLE_ERROR(cudaGetDevice(&devID));
70  cudaDeviceProp devProp;
71  HANDLE_ERROR(cudaGetDeviceProperties(&devProp, devID));
72 
73 #ifdef __DEBUG_LOG_GPU_SPEC_
74  cout << "GPU Spec : " << devProp.name << endl;
75  cout << " - Global Memory : " << devProp.totalGlobalMem << endl;
76  cout << " - Const Memory : " << devProp.totalConstMem << endl;
77  cout << " - Shared Memory / SM : " << devProp.sharedMemPerMultiprocessor << endl;
78  cout << " - Shared Memory / Block : " << devProp.sharedMemPerBlock << endl;
79  cout << " - SM Counter : " << devProp.multiProcessorCount << endl;
80  cout << " - Maximum Threads / SM : " << devProp.maxThreadsPerMultiProcessor << endl;
81  cout << " - Maximum Threads / Block : " << devProp.maxThreadsPerBlock << endl;
82  cout << " - Maximum Threads of each Dimension of a Block, X : " << devProp.maxThreadsDim[0] << ", Y : " << devProp.maxThreadsDim[1] << ", Z : " << devProp.maxThreadsDim[2] << endl;
83  cout << " - Maximum Blocks of each Dimension of a Grid, X : " << devProp.maxGridSize[0] << ", Y : " << devProp.maxGridSize[1] << ", Z : " << devProp.maxGridSize[2] << endl;
84  cout << " - Device supports allocating Managed Memory on this system : " << devProp.managedMemory << endl;
85  cout << endl;
86 #endif
87 
88  bool bSupportDouble = false;
89 
91 
92  const int blockSize = 512; //n_threads
93 
94 
95  const ulonglong gridSize = (n_points + blockSize - 1) / blockSize; //n_blocks
96 
97 
98  cout << ">>> All " << blockSize * gridSize << " threads in CUDA" << endl;
99  cout << ">>> " << blockSize << " threads/block, " << gridSize << " blocks/grid" << endl;
100 
101  //threads number
102 
103  //Host Memory Location
104  const int n_colors = obj_.n_colors;
105  Real* host_pc_data = scaledVertex;//obj_.vertex;
106  Real* host_amp_data = obj_.color;
107  const uint pnX = context_.pixel_number[_X];
108  const uint pnY = context_.pixel_number[_Y];
109  const uint pnXY = pnX * pnY;
110  const Real ppX = context_.pixel_pitch[_X];
111  const Real ppY = context_.pixel_pitch[_Y];
112  const Real distance = wrp_config_.propagation_distance;
113  const uint nChannel = context_.waveNum;
114 
115  Real* pc_index = new Real[obj_.n_points * 3];
116  memset(pc_index, 0.0, sizeof(Real) * obj_.n_points * 3);
117 
118  float wz = wrp_config_.wrp_location - zmax_;
119 
120  //Device(GPU) Memory Location
121  Real* device_pc_data;
122  HANDLE_ERROR(cudaMalloc((void**)&device_pc_data, n_points * 3 * sizeof(Real)));
123  Real* device_amp_data;
124  HANDLE_ERROR(cudaMalloc((void**)&device_amp_data, n_points * n_colors * sizeof(Real)));
125  Real* device_pc_xindex;
126  HANDLE_ERROR(cudaMalloc((void**)&device_pc_xindex, n_points * 3 * sizeof(Real)));
127 
128  WRPGpuConst* device_config = nullptr;
129  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(WRPGpuConst)));
130 
131  //cuda obj dst
132  const ulonglong bufferSize = pnXY * sizeof(Real);
133 
134  Real *host_obj_dst = new Real[pnXY];
135  memset(host_obj_dst, 0., bufferSize);
136 
137  Real *host_amp_dst = new Real[pnXY];
138  memset(host_amp_dst, 0., bufferSize);
139 
140  Real *device_obj_dst;
141  HANDLE_ERROR(cudaMalloc((void**)&device_obj_dst, bufferSize));
142 
143  Real *device_amp_dst;
144  HANDLE_ERROR(cudaMalloc((void**)&device_amp_dst, bufferSize));
145 
146 
147  const ulonglong gridSize2 = (pnXY + blockSize - 1) / blockSize; //n_blocks
148 
149  Real* device_dst;
150  HANDLE_ERROR(cudaMalloc((void**)&device_dst, bufferSize * 2));
151 
152  Real* host_dst = new Real[pnXY * 2];
153 
154  for (uint ch = 0; ch < nChannel; ch++) {
155 
156  Real lambda = context_.wave_length[ch];
157  Real k = context_.k = (2 * M_PI / lambda);
158  float wm = round(fabs(wz * tan(lambda / (2 * ppX)) / ppX));
159 
160  HANDLE_ERROR(cudaMemset(device_pc_xindex, 0., n_points * 3 * sizeof(Real)));
161 
162  WRPGpuConst* host_config = new WRPGpuConst(
163  obj_.n_points, n_colors, 1,
168  k, lambda
169  );
170 
171  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(WRPGpuConst), cudaMemcpyHostToDevice));
172 
173  HANDLE_ERROR(cudaMemcpy(device_pc_data, host_pc_data, n_points * 3 * sizeof(Real), cudaMemcpyHostToDevice));
174  HANDLE_ERROR(cudaMemcpy(device_amp_data, host_amp_data, n_points * n_colors * sizeof(Real), cudaMemcpyHostToDevice));
175  cudaGenindexx(gridSize, blockSize, n_points, device_pc_data, device_pc_xindex, (WRPGpuConst*)device_config);
176 
177  HANDLE_ERROR(cudaMemcpy(pc_index, device_pc_xindex, sizeof(Real) * 3 * n_points, cudaMemcpyDeviceToHost));
178  HANDLE_ERROR(cudaMemset(device_obj_dst, 0., bufferSize));
179  HANDLE_ERROR(cudaMemset(device_amp_dst, 0., bufferSize));
180 
181  cudaGetObjDst(gridSize, blockSize, n_points, device_pc_xindex, device_obj_dst, (WRPGpuConst*)device_config);
182  cudaGetAmpDst(gridSize, blockSize, n_points, device_pc_xindex, device_amp_data, device_amp_dst, (WRPGpuConst*)device_config);
183 
184  HANDLE_ERROR(cudaMemcpy(host_obj_dst, device_obj_dst, bufferSize, cudaMemcpyDeviceToHost));
185  HANDLE_ERROR(cudaMemcpy(host_amp_dst, device_amp_dst, bufferSize, cudaMemcpyDeviceToHost));
186 
187  HANDLE_ERROR(cudaMemset(device_dst, 0., bufferSize * 2));
188  memset(host_dst, 0., bufferSize * 2);
189 
190  // cuda WRP
191  cudaGenWRP(gridSize2, blockSize, n_points, device_obj_dst, device_amp_dst, device_dst, device_dst + pnXY, (WRPGpuConst*)device_config);
192 
193  HANDLE_ERROR(cudaMemcpy(host_dst, device_dst, bufferSize * 2, cudaMemcpyDeviceToHost));
194 
195  for (ulonglong n = 0; n < pnXY; ++n) {
196  if (host_dst[n] != 0)
197  {
198  p_wrp_[n][_RE] = host_dst[n];
199  p_wrp_[n][_IM] = host_dst[n + pnXY];
200  }
201  }
202 
203  fresnelPropagation(p_wrp_, complex_H[ch], distance, ch);
204 
205  delete host_config;
206  }
207 
208  HANDLE_ERROR(cudaFree(device_pc_data));
209 
210  //*(complex_H) = p_wrp_;
211 
212  //free memory
213  HANDLE_ERROR(cudaFree(device_amp_data));
214  HANDLE_ERROR(cudaFree(device_pc_xindex));
215  HANDLE_ERROR(cudaFree(device_config));
216 
217 }
218 
void cudaGetAmpDst(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Real *cuda_pc_index, Real *cuda_pc_amp, Real *cuda_amp_dst, const WRPGpuConst *cuda_config)
#define HANDLE_ERROR(err)
Real * color
Color data of point clouds.
Definition: ophGen.h:435
Real k
Definition: Openholo.h:66
Real * wave_length
Definition: Openholo.h:69
Real * scaledVertex
Definition: ophWRP.h:233
ulonglong n_points
Number of points.
Definition: ophGen.h:429
void prepareInputdataGPU()
Definition: ophWRP_GPU.cpp:65
OphPointCloudData obj_
Input Pointcloud Data.
Definition: ophWRP.h:232
float Real
Definition: typedef.h:55
#define CUR_TIME
Definition: function.h:58
void cudaGenindexx(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Real *cuda_pc_data, Real *cuda_pc_indexx, const WRPGpuConst *cuda_config)
#define _Y
Definition: define.h:84
double calculateWRPGPU(void)
Definition: ophWRP_GPU.cpp:47
#define _IM
Definition: complex.h:57
unsigned long long ulonglong
Definition: typedef.h:67
#define _X
Definition: define.h:80
Real wrp_location
Location distance of WRP.
Definition: ophGen.h:519
oph::ivec2 pixel_number
Definition: Openholo.h:63
OphWRPConfig wrp_config_
structure variable for WRP hologram configuration
Definition: ophWRP.h:234
#define _RE
Definition: complex.h:54
struct KernelConst WRPGpuConst
void fresnelPropagation(OphConfig context, Complex< Real > *in, Complex< Real > *out, Real distance)
Fresnel propagation.
Definition: ophGen.cpp:768
void cudaGetObjDst(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Real *cuda_pc_index, Real *cuda_pc_obj_dst, const WRPGpuConst *cuda_config)
uint waveNum
Definition: Openholo.h:68
int n_colors
Number of color channel.
Definition: ophGen.h:431
void cudaGenWRP(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Real *cuda_pc_data, Real *cuda_amp_data, Real *cuda_dst_re, Real *cuda_dst_im, const WRPGpuConst *cuda_config)
Complex< Real > * p_wrp_
wrp buffer - complex type
Definition: ophWRP.h:230
int n_points
numbers of points
Definition: ophWRP.h:228
Real propagation_distance
Distance of Hologram plane.
Definition: ophGen.h:521
OphConfig context_
Definition: Openholo.h:297
Complex< Real > ** complex_H
Definition: Openholo.h:298
unsigned int uint
Definition: typedef.h:62
#define M_PI
Definition: define.h:52
oph::vec2 pixel_pitch
Definition: Openholo.h:64