15 #include "reconstruct3d.h" 16 #include "visiontransfer/alignedallocator.h" 26 #include <immintrin.h> 28 #include <emmintrin.h> 39 class Reconstruct3D::Pimpl {
43 float* createPointMap(
const unsigned short* dispMap,
int width,
int height,
44 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
45 unsigned short maxDisparity);
47 float* createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity);
49 float* createZMap(
const ImageSet& imageSet,
unsigned short minDisparity,
unsigned short maxDisparity);
51 void projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
const float* q,
52 float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor);
54 void writePlyFile(
const char* file,
const unsigned short* dispMap,
56 int dispRowStride,
int imageRowStride,
const float* q,
57 double maxZ,
bool binary,
int subpixelFactor,
unsigned short maxDisparity);
59 void writePlyFile(
const char* file,
const ImageSet& imageSet,
60 double maxZ,
bool binary);
63 std::vector<float, AlignedAllocator<float> > pointMap;
65 float* createPointMapFallback(
const unsigned short* dispMap,
int width,
int height,
66 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
67 unsigned short maxDisparity);
69 float* createPointMapSSE2(
const unsigned short* dispMap,
int width,
int height,
70 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
71 unsigned short maxDisparity);
73 float* createPointMapAVX2(
const unsigned short* dispMap,
int width,
int height,
74 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
75 unsigned short maxDisparity);
80 Reconstruct3D::Reconstruct3D()
84 Reconstruct3D::~Reconstruct3D() {
89 int rowStride,
const float* q,
unsigned short minDisparity,
int subpixelFactor,
90 unsigned short maxDisparity) {
91 return pimpl->createPointMap(dispMap, width, height, rowStride, q, minDisparity,
92 subpixelFactor, maxDisparity);
96 return pimpl->createPointMap(imageSet, minDisparity);
100 unsigned short maxDisparity) {
101 return pimpl->createZMap(imageSet, minDisparity, maxDisparity);
105 const float* q,
float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor) {
106 pimpl->projectSinglePoint(imageX, imageY, disparity, q, pointX, pointY, pointZ,
112 int imageRowStride,
const float* q,
double maxZ,
bool binary,
int subpixelFactor,
113 unsigned short maxDisparity) {
114 pimpl->writePlyFile(file, dispMap, image, width, height, format, dispRowStride,
115 imageRowStride, q, maxZ, binary, subpixelFactor, maxDisparity);
119 double maxZ,
bool binary) {
120 pimpl->writePlyFile(file, imageSet, maxZ, binary);
125 Reconstruct3D::Pimpl::Pimpl() {
128 float* Reconstruct3D::Pimpl::createPointMap(
const unsigned short* dispMap,
int width,
129 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
130 int subpixelFactor,
unsigned short maxDisparity) {
133 if(pointMap.size() <
static_cast<unsigned int>(4*width*height)) {
134 pointMap.resize(4*width*height);
138 if(width % 16 == 0 && (uintptr_t)dispMap % 32 == 0) {
139 return createPointMapAVX2(dispMap, width, height, rowStride, q,
140 minDisparity, subpixelFactor, maxDisparity);
144 if(width % 8 == 0 && (uintptr_t)dispMap % 16 == 0) {
145 return createPointMapSSE2(dispMap, width, height, rowStride, q,
146 minDisparity, subpixelFactor, maxDisparity);
149 return createPointMapFallback(dispMap, width, height, rowStride, q,
150 minDisparity, subpixelFactor, maxDisparity);
153 float* Reconstruct3D::Pimpl::createPointMap(
const ImageSet& imageSet,
unsigned short minDisparity) {
155 throw std::runtime_error(
"ImageSet does not contain a disparity map!");
159 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
167 float* Reconstruct3D::Pimpl::createPointMapFallback(
const unsigned short* dispMap,
int width,
168 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
169 int subpixelFactor,
unsigned short maxDisparity) {
171 float* outputPtr = &pointMap[0];
172 int stride = rowStride / 2;
174 for(
int y = 0; y < height; y++) {
175 double qx = q[1]*y + q[3];
176 double qy = q[5]*y + q[7];
177 double qz = q[9]*y + q[11];
178 double qw = q[13]*y + q[15];
180 const unsigned short* dispRow = &dispMap[y*stride];
181 for(
int x = 0; x < width; x++) {
182 unsigned short intDisp = std::max(minDisparity, dispRow[x]);
183 if(intDisp >= maxDisparity) {
184 intDisp = minDisparity;
187 double d = intDisp / double(subpixelFactor);
188 double w = qw + q[14]*d;
190 *outputPtr =
static_cast<float>((qx + q[2]*d)/w);
193 *outputPtr =
static_cast<float>((qy + q[6]*d)/w);
196 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
208 float* Reconstruct3D::Pimpl::createZMap(
const ImageSet& imageSet,
unsigned short minDisparity,
209 unsigned short maxDisparity) {
211 if(pointMap.size() <
static_cast<unsigned int>(imageSet.
getWidth()*imageSet.
getHeight())) {
215 float* outputPtr = &pointMap[0];
216 int stride = imageSet.
getRowStride(ImageSet::IMAGE_DISPARITY) / 2;
217 const unsigned short* dispMap =
reinterpret_cast<const unsigned short*
>(imageSet.
getPixelData(ImageSet::IMAGE_DISPARITY));
221 for(
int y = 0; y < imageSet.
getHeight(); y++) {
222 double qz = q[9]*y + q[11];
223 double qw = q[13]*y + q[15];
225 const unsigned short* dispRow = &dispMap[y*stride];
226 for(
int x = 0; x < imageSet.
getWidth(); x++) {
227 unsigned short intDisp = std::max(minDisparity, dispRow[x]);
228 if(intDisp >= maxDisparity) {
229 intDisp = minDisparity;
232 double d = intDisp / double(subpixelFactor);
233 double w = qw + q[14]*d;
235 *outputPtr =
static_cast<float>((qz + q[10]*d)/w);
244 void Reconstruct3D::Pimpl::projectSinglePoint(
int imageX,
int imageY,
unsigned short disparity,
245 const float* q,
float& pointX,
float& pointY,
float& pointZ,
int subpixelFactor) {
247 double d = disparity / double(subpixelFactor);
248 double w = q[15] + q[14]*d;
249 pointX =
static_cast<float>((imageX*q[0] + q[3])/w);
250 pointY =
static_cast<float>((imageY*q[5] + q[7])/w);
251 pointZ =
static_cast<float>(q[11]/w);
255 float* Reconstruct3D::Pimpl::createPointMapAVX2(
const unsigned short* dispMap,
int width,
256 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
257 int subpixelFactor,
unsigned short maxDisparity) {
260 const __m256 qCol0 = _mm256_setr_ps(q[0], q[4], q[8], q[12], q[0], q[4], q[8], q[12]);
261 const __m256 qCol1 = _mm256_setr_ps(q[1], q[5], q[9], q[13], q[1], q[5], q[9], q[13]);
262 const __m256 qCol2 = _mm256_setr_ps(q[2], q[6], q[10], q[14], q[2], q[6], q[10], q[14]);
263 const __m256 qCol3 = _mm256_setr_ps(q[3], q[7], q[11], q[15], q[3], q[7], q[11], q[15]);
266 const __m256i minDispVector = _mm256_set1_epi16(minDisparity);
267 const __m256i maxDispVector = _mm256_set1_epi16(maxDisparity);
268 const __m256 scaleVector = _mm256_set1_ps(1.0/
double(subpixelFactor));
269 const __m256i zeroVector = _mm256_set1_epi16(0);
271 float* outputPtr = &pointMap[0];
273 for(
int y = 0; y < height; y++) {
274 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
275 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
278 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 32) {
279 __m256i disparities = _mm256_load_si256(reinterpret_cast<const __m256i*>(ptr));
282 __m256i validMask = _mm256_cmpgt_epi16(maxDispVector, disparities);
283 disparities = _mm256_and_si256(validMask, disparities);
286 disparities = _mm256_max_epi16(disparities, minDispVector);
289 __m256i disparitiesMixup = _mm256_permute4x64_epi64(disparities, 0xd8);
292 __m256 floatDisp = _mm256_cvtepi32_ps(_mm256_unpacklo_epi16(disparitiesMixup, zeroVector));
293 __m256 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
297 __declspec(align(32))
float dispArray[16];
299 float dispArray[16]__attribute__((aligned(32)));
301 _mm256_store_ps(&dispArray[0], dispScaled);
304 floatDisp = _mm256_cvtepi32_ps(_mm256_unpackhi_epi16(disparitiesMixup, zeroVector));
305 dispScaled = _mm256_mul_ps(floatDisp, scaleVector);
306 _mm256_store_ps(&dispArray[8], dispScaled);
309 for(
int i=0; i<16; i+=2) {
311 __m256 vec = _mm256_setr_ps(x, y, dispArray[i], 1.0,
312 x+1, y, dispArray[i+1], 1.0);
315 __m256 u1 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
316 __m256 u2 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
317 __m256 u3 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
318 __m256 u4 = _mm256_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
320 __m256 prod1 = _mm256_mul_ps(u1, qCol0);
321 __m256 prod2 = _mm256_mul_ps(u2, qCol1);
322 __m256 prod3 = _mm256_mul_ps(u3, qCol2);
323 __m256 prod4 = _mm256_mul_ps(u4, qCol3);
325 __m256 multResult = _mm256_add_ps(_mm256_add_ps(prod1, prod2), _mm256_add_ps(prod3, prod4));
328 __m256 point = _mm256_div_ps(multResult,
329 _mm256_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
332 _mm256_store_ps(outputPtr, point);
345 float* Reconstruct3D::Pimpl::createPointMapSSE2(
const unsigned short* dispMap,
int width,
346 int height,
int rowStride,
const float* q,
unsigned short minDisparity,
347 int subpixelFactor,
unsigned short maxDisparity) {
350 const __m128 qCol0 = _mm_setr_ps(q[0], q[4], q[8], q[12]);
351 const __m128 qCol1 = _mm_setr_ps(q[1], q[5], q[9], q[13]);
352 const __m128 qCol2 = _mm_setr_ps(q[2], q[6], q[10], q[14]);
353 const __m128 qCol3 = _mm_setr_ps(q[3], q[7], q[11], q[15]);
356 const __m128i minDispVector = _mm_set1_epi16(minDisparity);
357 const __m128i maxDispVector = _mm_set1_epi16(maxDisparity);
358 const __m128 scaleVector = _mm_set1_ps(1.0/
double(subpixelFactor));
359 const __m128i zeroVector = _mm_set1_epi16(0);
361 float* outputPtr = &pointMap[0];
363 for(
int y = 0; y < height; y++) {
364 const unsigned char* rowStart = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride];
365 const unsigned char* rowEnd = &
reinterpret_cast<const unsigned char*
>(dispMap)[y*rowStride + 2*width];
368 for(
const unsigned char* ptr = rowStart; ptr != rowEnd; ptr += 16) {
369 __m128i disparities = _mm_load_si128(reinterpret_cast<const __m128i*>(ptr));
372 __m128i validMask = _mm_cmplt_epi16(disparities, maxDispVector);
373 disparities = _mm_and_si128(validMask, disparities);
376 disparities = _mm_max_epi16(disparities, minDispVector);
379 __m128 floatDisp = _mm_cvtepi32_ps(_mm_unpacklo_epi16(disparities, zeroVector));
380 __m128 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
384 __declspec(align(16))
float dispArray[8];
386 float dispArray[8]__attribute__((aligned(16)));
388 _mm_store_ps(&dispArray[0], dispScaled);
391 floatDisp = _mm_cvtepi32_ps(_mm_unpackhi_epi16(disparities, zeroVector));
392 dispScaled = _mm_mul_ps(floatDisp, scaleVector);
393 _mm_store_ps(&dispArray[4], dispScaled);
396 for(
int i=0; i<8; i++) {
398 __m128 vec = _mm_setr_ps(static_cast<float>(x), static_cast<float>(y), dispArray[i], 1.0);
401 __m128 u1 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(0,0,0,0));
402 __m128 u2 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(1,1,1,1));
403 __m128 u3 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(2,2,2,2));
404 __m128 u4 = _mm_shuffle_ps(vec,vec, _MM_SHUFFLE(3,3,3,3));
406 __m128 prod1 = _mm_mul_ps(u1, qCol0);
407 __m128 prod2 = _mm_mul_ps(u2, qCol1);
408 __m128 prod3 = _mm_mul_ps(u3, qCol2);
409 __m128 prod4 = _mm_mul_ps(u4, qCol3);
411 __m128 multResult = _mm_add_ps(_mm_add_ps(prod1, prod2), _mm_add_ps(prod3, prod4));
414 __m128 point = _mm_div_ps(multResult,
415 _mm_shuffle_ps(multResult,multResult, _MM_SHUFFLE(3,3,3,3)));
418 _mm_store_ps(outputPtr, point);
430 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const unsigned short* dispMap,
432 int imageRowStride,
const float* q,
double maxZ,
bool binary,
int subpixelFactor,
433 unsigned short maxDisparity) {
435 float* pointMap =
createPointMap(dispMap, width, height, dispRowStride,
436 q, 0, subpixelFactor, maxDisparity);
441 for(
int i=0; i<width*height; i++) {
442 if(pointMap[4*i+2] <= maxZ) {
447 pointsCount = width*height;
451 fstream strm(file, binary ? (ios::out | ios::binary) : ios::out);
452 strm <<
"ply" << endl;
455 strm <<
"format binary_little_endian 1.0" << endl;
457 strm <<
"format ascii 1.0" << endl;
460 strm <<
"element vertex " << pointsCount << endl
461 <<
"property float x" << endl
462 <<
"property float y" << endl
463 <<
"property float z" << endl;
464 if (image !=
nullptr) {
466 strm <<
"property uchar red" << endl
467 <<
"property uchar green" << endl
468 <<
"property uchar blue" << endl;
470 strm <<
"end_header" << endl;
473 for(
int i=0; i<width*height; i++) {
477 if(maxZ < 0 || pointMap[4*i+2] <= maxZ) {
480 strm.write(reinterpret_cast<char*>(&pointMap[4*i]),
sizeof(
float)*3);
481 if (image ==
nullptr) {
484 const unsigned char* col = &image[y*imageRowStride + 3*x];
485 strm.write(reinterpret_cast<const char*>(col), 3*
sizeof(*col));
487 const unsigned char* col = &image[y*imageRowStride + x];
488 unsigned char writeData[3] = {*col, *col, *col};
489 strm.write(reinterpret_cast<const char*>(writeData),
sizeof(writeData));
491 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
492 unsigned char writeData[3] = {
493 (
unsigned char)(*col >> 4),
494 (
unsigned char)(*col >> 4),
495 (
unsigned char)(*col >> 4)
497 strm.write(reinterpret_cast<const char*>(writeData),
sizeof(writeData));
501 if(std::isfinite(pointMap[4*i + 2])) {
502 strm << pointMap[4*i]
503 <<
" " << pointMap[4*i + 1]
504 <<
" " << pointMap[4*i + 2];
506 strm <<
"NaN NaN NaN";
509 if (image ==
nullptr) {
513 const unsigned char* col = &image[y*imageRowStride + 3*x];
514 strm <<
" " <<
static_cast<int>(col[0])
515 <<
" " << static_cast<int>(col[1])
516 <<
" " <<
static_cast<int>(col[2]) << endl;
518 const unsigned char* col = &image[y*imageRowStride + x];
519 strm <<
" " <<
static_cast<int>(*col)
520 <<
" " <<
static_cast<int>(*col)
521 <<
" " <<
static_cast<int>(*col) << endl;
523 const unsigned short* col =
reinterpret_cast<const unsigned short*
>(&image[y*imageRowStride + 2*x]);
524 strm <<
" " <<
static_cast<int>(*col >> 4)
525 <<
" " << static_cast<int>(*col >> 4)
526 <<
" " <<
static_cast<int>(*col >> 4) << endl;
533 void Reconstruct3D::Pimpl::writePlyFile(
const char* file,
const ImageSet& imageSet,
534 double maxZ,
bool binary) {
535 int indexDisp = imageSet.
getIndexOf(ImageSet::IMAGE_DISPARITY);
536 int indexImg = imageSet.
getIndexOf(ImageSet::IMAGE_LEFT);
537 if(indexDisp == -1) {
538 throw std::runtime_error(
"No disparity channel present, cannot create point map!");
541 throw std::runtime_error(
"Disparity map must have 12-bit pixel format!");
546 (indexImg == -1) ? nullptr : imageSet.
getPixelData(indexImg),
int getRowStride(int imageNumber) const
Returns the row stride for the pixel data of one image.
const float * getQMatrix() const
Returns a pointer to the disparity-to-depth mapping matrix q.
int getIndexOf(ImageType what, bool throwIfNotFound=false) const
Returns the index of a specific image type.
int getHeight() const
Returns the height of each image.
float * createPointMap(const ImageSet &imageSet, unsigned short minDisparity=1)
Reconstructs the 3D location of each pixel in the given disparity map.
float * createZMap(const ImageSet &imageSet, unsigned short minDisparity=1, unsigned short maxDisparity=0xFFF)
Converts the disparit in an image set to a depth map.
int getSubpixelFactor() const
Gets the subpixel factor for this image set.
ImageFormat
Image formats that can be transferred.
unsigned char * getPixelData(int imageNumber) const
Returns the pixel data for the given image.
void projectSinglePoint(int imageX, int imageY, unsigned short disparity, const float *q, float &pointX, float &pointY, float &pointZ, int subpixelFactor=16)
Reconstructs the 3D location of one individual point.
bool hasImageType(ImageType what) const
Returns whether a left camera image is included in the enabled data.
A set of one to three images, but usually two (the left camera image and the disparity map)...
ImageFormat getPixelFormat(int imageNumber) const
Returns the pixel format for the given image.
int getWidth() const
Returns the width of each image.
void writePlyFile(const char *file, const ImageSet &imageSet, double maxZ=std::numeric_limits< double >::max(), bool binary=false)
Projects the given disparity map to 3D points and exports the result to a PLY file.