Openholo  v1.0
Open Source Digital Holographic Library
ophPointCloud_GPU.cpp
Go to the documentation of this file.
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install, copy or use the software.
7 //
8 //
9 // License Agreement
10 // For Open Source Digital Holographic Library
11 //
12 // Openholo library is free software;
13 // you can redistribute it and/or modify it under the terms of the BSD 2-Clause license.
14 //
15 // Copyright (C) 2017-2024, Korea Electronics Technology Institute. All rights reserved.
16 // E-mail : contact.openholo@gmail.com
17 // Web : http://www.openholo.org
18 //
19 // Redistribution and use in source and binary forms, with or without modification,
20 // are permitted provided that the following conditions are met:
21 //
22 // 1. Redistribution's of source code must retain the above copyright notice,
23 // this list of conditions and the following disclaimer.
24 //
25 // 2. Redistribution's in binary form must reproduce the above copyright notice,
26 // this list of conditions and the following disclaimer in the documentation
27 // and/or other materials provided with the distribution.
28 //
29 // This software is provided by the copyright holders and contributors "as is" and
30 // any express or implied warranties, including, but not limited to, the implied
31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
32 // In no event shall the copyright holder or contributors be liable for any direct,
33 // indirect, incidental, special, exemplary, or consequential damages
34 // (including, but not limited to, procurement of substitute goods or services;
35 // loss of use, data, or profits; or business interruption) however caused
36 // and on any theory of liability, whether in contract, strict liability,
37 // or tort (including negligence or otherwise) arising in any way out of
38 // the use of this software, even if advised of the possibility of such damage.
39 //
40 // This software contains opensource software released under GNU Generic Public License,
41 // NVDIA Software License Agreement, or CUDA supplement to Software License Agreement.
42 // Check whether software you use contains licensed software.
43 //
44 //M*/
45 
46 #include "ophPointCloud.h"
47 #include "ophPointCloud_GPU.h"
48 
49 #include <sys.h> //for LOG() macro
50 
51 //#define USE_ASYNC
52 Real ophPointCloud::genCghPointCloudGPU(uint diff_flag)
53 {
54  auto begin = CUR_TIME;
55  int devID;
56  HANDLE_ERROR(cudaGetDevice(&devID));
57  cudaDeviceProp devProp;
58  HANDLE_ERROR(cudaGetDeviceProperties(&devProp, devID));
59 
60 #ifdef __DEBUG_LOG_GPU_SPEC_
61  cout << "GPU Spec : " << devProp.name << endl;
62  cout << " - Global Memory : " << devProp.totalGlobalMem << endl;
63  cout << " - Const Memory : " << devProp.totalConstMem << endl;
64  cout << " - MP(Multiprocessor) Count : " << devProp.multiProcessorCount << endl;
65  cout << " - Maximum Threads per MP : " << devProp.maxThreadsPerMultiProcessor << endl;
66  cout << " - Shared Memory per MP : " << devProp.sharedMemPerMultiprocessor << endl;
67  cout << " - Block per MP : " << devProp.maxThreadsPerMultiProcessor/devProp.maxThreadsPerBlock << endl;
68 
69  cout << " - Shared Memory per Block : " << devProp.sharedMemPerBlock << endl;
70  cout << " - Maximum Threads per Block : " << devProp.maxThreadsPerBlock << endl;
71  printf(" - Maximum Threads of each Dimension of a Block (X: %d / Y: %d / Z: %d)\n",
72  devProp.maxThreadsDim[_X], devProp.maxThreadsDim[_Y], devProp.maxThreadsDim[_Z]);
73  printf(" - Maximum Blocks of each Dimension of a Grid, (X: %d / Y: %d / Z: %d)\n",
74  devProp.maxGridSize[_X], devProp.maxGridSize[_Y], devProp.maxGridSize[_Z]);
75  cout << " - Device supports allocating Managed Memory on this system : " << devProp.managedMemory << endl;
76  cout << endl;
77 #endif
78 
79  bool bSupportDouble = false;
80 
82  const int blockSize = 512; //n_threads // blockSize < devProp.maxThreadsPerBlock
83  const ulonglong gridSize = (pnXY + blockSize - 1) / blockSize; //n_blocks
84 
85  cout << ">>> All " << blockSize * gridSize << " threads in CUDA" << endl;
86  cout << ">>> " << blockSize << " threads/block, " << gridSize << " blocks/grid" << endl;
87 
88  //const int n_streams = OPH_CUDA_N_STREAM;
89  int n_streams;
90  if (pc_config_.n_streams == 0)
91  n_streams = pc_data_.n_points / 300 + 1;
92  else if (pc_config_.n_streams < 0)
93  {
94  LOG("Invalid value : NumOfStream");
95  return;
96  }
97  else
98  n_streams = pc_config_.n_streams;
99 
100  LOG(">>> Number Of Stream : %d\n", n_streams);
101 
102  //threads number
103  const ulonglong bufferSize = pnXY * sizeof(Real);
104 
105  //Host Memory Location
106  const int n_colors = pc_data_.n_colors;
107  Real* host_pc_data = nullptr;
108  Real* host_amp_data = pc_data_.color;
109  Real* host_dst = nullptr;
110 
111  // Keep original buffer
112  if (is_ViewingWindow) {
113  host_pc_data = new Real[n_points * 3];
114  transVW(n_points * 3, host_pc_data, pc_data_.vertex);
115  }
116  else {
117  host_pc_data = pc_data_.vertex;
118  }
119 
120  if ((diff_flag == PC_DIFF_RS) || (diff_flag == PC_DIFF_FRESNEL)) {
121  host_dst = new Real[pnXY * 2];
122  memset(host_dst, 0., bufferSize * 2);
123  }
124 
125  uint nChannel = context_.waveNum;
126 
127  for (uint ch = 0; ch < nChannel; ch++)
128  {
129  memset(host_dst, 0., bufferSize * 2);
130  context_.k = (2 * M_PI) / context_.wave_length[ch];
131 
132  GpuConst* host_config = new GpuConst(
133  n_points, n_colors, pc_config_.n_streams,
134  pc_config_.scale, pc_config_.offset_depth,
137  context_.ss,
138  context_.k
139  );
140 
141  //Device(GPU) Memory Location
142  Real* device_pc_data;
143  HANDLE_ERROR(cudaMalloc((void**)&device_pc_data, n_points * 3 * sizeof(Real)));
144 
145  Real* device_amp_data;
146  HANDLE_ERROR(cudaMalloc((void**)&device_amp_data, n_points * n_colors * sizeof(Real)));
147 
148  Real* device_dst = nullptr;
149  if ((diff_flag == PC_DIFF_RS) || (diff_flag == PC_DIFF_FRESNEL)) {
150  HANDLE_ERROR(cudaMalloc((void**)&device_dst, bufferSize * 2));
151  HANDLE_ERROR(cudaMemset(device_dst, 0., bufferSize * 2));
152  }
153 
154  GpuConst* device_config = nullptr;
155  switch (diff_flag) {
156  case PC_DIFF_RS/*_NOT_ENCODED*/: {
157  host_config = new GpuConstNERS(*host_config, context_.wave_length[ch]);
158  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(GpuConstNERS)));
159  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(GpuConstNERS), cudaMemcpyHostToDevice));
160  break;
161  }
162  case PC_DIFF_FRESNEL/*_NOT_ENCODED*/: {
163  host_config = new GpuConstNEFR(*host_config, context_.wave_length[ch]);
164  HANDLE_ERROR(cudaMalloc((void**)&device_config, sizeof(GpuConstNEFR)));
165  HANDLE_ERROR(cudaMemcpy(device_config, host_config, sizeof(GpuConstNEFR), cudaMemcpyHostToDevice));
166  break;
167  }
168  }
169 
170  int stream_points = n_points / n_streams;
171  int remainder = n_points % n_streams;
172 
173  int offset = 0;
174  for (int i = 0; i < n_streams; ++i) {
175  offset = i * stream_points;
176  if (i == n_streams - 1) { // 마지막 스트림 연산 시,
177  stream_points += remainder;
178  }
179  HANDLE_ERROR(cudaMemcpy(device_pc_data + 3 * offset, host_pc_data + 3 * offset, stream_points * 3 * sizeof(Real), cudaMemcpyHostToDevice));
180  HANDLE_ERROR(cudaMemcpy(device_amp_data + n_colors * offset, host_amp_data + n_colors * offset, stream_points * sizeof(Real), cudaMemcpyHostToDevice));
181 
182  switch (diff_flag) {
183  case PC_DIFF_RS/*_NOT_ENCODED*/: {
184 
185  cudaGenCghPointCloud_NotEncodedRS(gridSize, blockSize, stream_points, device_pc_data + 3 * offset, device_amp_data + n_colors * offset, device_dst, device_dst + pnXY, (GpuConstNERS*)device_config);
186  HANDLE_ERROR(cudaMemcpy(host_dst, device_dst, bufferSize * 2, cudaMemcpyDeviceToHost));
187  HANDLE_ERROR(cudaMemset(device_dst, 0., bufferSize * 2));
188 
189  for (ulonglong n = 0; n < pnXY; ++n) {
190  complex_H[ch][n][_RE] += host_dst[n];
191  complex_H[ch][n][_IM] += host_dst[n + pnXY];
192  }
193  break;
194  }
195  case PC_DIFF_FRESNEL/*_NOT_ENCODED*/: {
196  cudaGenCghPointCloud_NotEncodedFrsn(gridSize, blockSize, stream_points, device_pc_data + 3 * offset, device_amp_data + n_colors * offset, device_dst, device_dst + pnXY, (GpuConstNEFR*)device_config);
197  HANDLE_ERROR(cudaMemcpy(host_dst, device_dst, bufferSize * 2, cudaMemcpyDeviceToHost));
198  HANDLE_ERROR(cudaMemset(device_dst, 0., bufferSize * 2));
199 
200  for (ulonglong n = 0; n < pnXY; ++n) {
201  complex_H[ch][n][_RE] += host_dst[n];
202  complex_H[ch][n][_IM] += host_dst[n + pnXY];
203  }
204  break;
205  } // case
206  } // switch
207 
208 
209  n_percent = (int)((Real)(ch*n_streams + i + 1) * 100 / ((Real)n_streams * nChannel));
210  LOG("GPU(%d/%d) > %.16f / %.16f\n", i+1, n_streams,
211  complex_H[ch][0][_RE], complex_H[ch][0][_IM]);
212 
213  } // for
214 
215  //free memory
216  HANDLE_ERROR(cudaFree(device_pc_data));
217  HANDLE_ERROR(cudaFree(device_amp_data));
218  HANDLE_ERROR(cudaFree(device_dst));
219  HANDLE_ERROR(cudaFree(device_config));
220 
221  delete host_config;
222  }
223 
224  delete[] host_dst;
225  if (is_ViewingWindow) {
226  delete[] host_pc_data;
227  }
228 
229  auto end = CUR_TIME;
230  Real elapsed_time = ((chrono::duration<Real>)(end - begin)).count();
231  LOG("\n%s : %lf(s) \n\n",
232  __FUNCTION__,
233  elapsed_time);
234 
235  return elapsed_time;
236 }
Real * vertex
Geometry of point clouds.
Definition: ophGen.h:433
Real offset_depth
Offset value of point cloud.
Definition: ophGen.h:404
#define HANDLE_ERROR(err)
Real * color
Color data of point clouds.
Definition: ophGen.h:435
Real k
Definition: Openholo.h:66
Real * wave_length
Definition: Openholo.h:69
void cudaGenCghPointCloud_NotEncodedFrsn(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Real *cuda_pc_data, Real *cuda_amp_data, Real *cuda_dst_real, Real *cuda_dst_imag, const GpuConstNEFR *cuda_config)
ulonglong n_points
Number of points.
Definition: ophGen.h:429
float Real
Definition: typedef.h:55
void cudaGenCghPointCloud_NotEncodedRS(const int &nBlocks, const int &nThreads, const int &n_pts_per_stream, Real *cuda_pc_data, Real *cuda_amp_data, Real *cuda_dst_real, Real *cuda_dst_imag, const GpuConstNERS *cuda_config)
#define CUR_TIME
Definition: function.h:58
Openholo Point Cloud based CGH generation with CUDA GPGPU.
vec2 ss
Definition: Openholo.h:67
#define _Y
Definition: define.h:84
#define _IM
Definition: complex.h:57
KernelConst_NotEncodedFrsn GpuConstNEFR
vec3 scale
Scaling factor of coordinate of point cloud.
Definition: ophGen.h:402
unsigned long long ulonglong
Definition: typedef.h:67
#define _X
Definition: define.h:80
int n_streams
stream count for CUDA
Definition: ophGen.h:400
oph::ivec2 pixel_number
Definition: Openholo.h:63
KernelConst_NotEncodedRS GpuConstNERS
#define _RE
Definition: complex.h:54
uint waveNum
Definition: Openholo.h:68
int n_colors
Number of color channel.
Definition: ophGen.h:431
OphConfig context_
Definition: Openholo.h:297
Complex< Real > ** complex_H
Definition: Openholo.h:298
#define _Z
Definition: define.h:88
unsigned int uint
Definition: typedef.h:62
#define M_PI
Definition: define.h:52
oph::vec2 pixel_pitch
Definition: Openholo.h:64