12 #if CVD_HAVE_XMMINTRIN
13 #include <tmmintrin.h>
20 : mimTemplate(
ImageRef(nPatchSize,nPatchSize))
40 double dOneOverCameraZ = 1.0 / v3Cam[2];
45 mm2WarpInverse.T()[0] = m2CamDerivs * (v3MotionRight.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionRight[2] * dOneOverCameraZ) * dOneOverCameraZ;
46 mm2WarpInverse.T()[1] = m2CamDerivs * (v3MotionDown.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionDown[2] * dOneOverCameraZ) * dOneOverCameraZ;
61 if(dDet > 3 || dDet < 0.25)
93 bool bNeedToRefreshTemplate =
false;
95 bNeedToRefreshTemplate =
true;
97 for(
int i=0; !bNeedToRefreshTemplate && i<2; i++)
100 const double dRefreshLimit = 0.07;
101 if(v2Diff * v2Diff > dRefreshLimit * dRefreshLimit)
102 bNeedToRefreshTemplate =
true;
106 if(bNeedToRefreshTemplate)
186 irPos = irPos / nLevelScale;
187 nRange = (nRange + nLevelScale - 1) / nLevelScale;
190 int nTop = irPos.
y - nRange;
191 int nBottomPlusOne = irPos.
y + nRange + 1;
192 int nLeft = irPos.
x - nRange;
193 int nRight = irPos.
x + nRange;
203 if(nBottomPlusOne <= 0)
210 vector<ImageRef>::iterator i;
211 vector<ImageRef>::iterator i_end;
215 if(nBottomPlusOne >= L.
im.
size().
y)
225 if( i->x < nLeft || i->x > nRight)
227 if((irPos - *i).mag_squared() > nRange * nRange)
268 m3H += v3Grad.as_col() * v3Grad.as_row();
288 const double dConvLimit = 0.03;
289 bool bConverged =
false;
291 for(nIts = 0; nIts < nMaxIts && !bConverged; nIts++)
294 if(dUpdateSquared < 0)
296 if(dUpdateSquared < dConvLimit*dConvLimit)
327 double dX = v2Base[0]-floor(v2Base[0]);
328 double dY = v2Base[1]-floor(v2Base[1]);
329 float fMixTL = (1.0 - dX) * (1.0 - dY);
330 float fMixTR = (dX) * (1.0 - dY);
331 float fMixBL = (1.0 - dX) * (dY);
332 float fMixBR = (dX) * (dY);
342 fMixTL * pTopLeftPixel[0] + fMixTR * pTopLeftPixel[1] +
343 fMixBL * pTopLeftPixel[nRowOffset] + fMixBR * pTopLeftPixel[nRowOffset + 1];
357 double dPixelUpdateSquared = v3Update.slice<0,2>() * v3Update.slice<0,2>();
358 return dPixelUpdateSquared;
380 #if CVD_HAVE_XMMINTRIN
382 inline int SumXMM_16(__m128i &target)
384 unsigned short int sums_store[8];
385 _mm_storeu_si128((__m128i*)sums_store, target);
386 return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3] +
387 sums_store[4] + sums_store[5] + sums_store[6] + sums_store[7];
390 inline int SumXMM_32(__m128i &target)
392 unsigned int sums_store[4];
393 _mm_storeu_si128((__m128i*)sums_store, target);
394 return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3];
407 byte *templatepointer;
413 #if CVD_HAVE_XMMINTRIN
416 long unsigned int imagepointerincrement;
418 __m128i xImageAsEightBytes;
419 __m128i xImageAsWords;
420 __m128i xTemplateAsEightBytes;
421 __m128i xTemplateAsWords;
424 __m128i xImageSqSums;
429 xImageSums = _mm_setzero_si128();
430 xImageSqSums = _mm_setzero_si128();
431 xCrossSums = _mm_setzero_si128();
432 xZero = _mm_setzero_si128();
434 imagepointer = &im[irImgBase +
ImageRef(0,0)];
436 imagepointerincrement = &im[irImgBase +
ImageRef(0,1)] - imagepointer;
438 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
439 imagepointer += imagepointerincrement;
440 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
441 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
442 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
443 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
444 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
445 templatepointer += 16;
446 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
447 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
448 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
449 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
450 imagepointer += imagepointerincrement;
451 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
452 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
453 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
454 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
455 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
456 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
457 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
459 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
460 imagepointer += imagepointerincrement;
461 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
462 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
463 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
464 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
465 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
466 templatepointer += 16;
467 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
468 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
469 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
470 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
471 imagepointer += imagepointerincrement;
472 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
473 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
474 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
475 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
476 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
477 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
478 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
480 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
481 imagepointer += imagepointerincrement;
482 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
483 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
484 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
485 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
486 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
487 templatepointer += 16;
488 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
489 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
490 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
491 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
492 imagepointer += imagepointerincrement;
493 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
494 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
495 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
496 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
497 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
498 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
499 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
501 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
502 imagepointer += imagepointerincrement;
503 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
504 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
505 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
506 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
507 xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
508 templatepointer += 16;
509 xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
510 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
511 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
512 xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
513 xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
514 xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
515 xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
516 xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
517 xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
518 xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
519 xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
521 nImageSum = SumXMM_16(xImageSums);
522 nCrossSum = SumXMM_32(xCrossSums);
523 nImageSumSq = SumXMM_32(xImageSqSums);
530 imagepointer = &im[irImgBase +
ImageRef(0,nRow)];
534 int n = imagepointer[nCol];
537 nCrossSum += n * templatepointer[nCol];
546 return ((2*SA*SB - SA*SA - SB*SB)/N + nImageSumSq +
mnTemplateSumSq - 2*nCrossSum);
Vector< 3 > v3PixelRight_W
Matrix< 2 > mm2LastWarpMatrix
int LevelScale(int nLevel)
double LevelNPos(double dRootPos, int nLevel)
double IterateSubPix(KeyFrame &kf)
bool FindPatchCoarse(CVD::ImageRef ir, KeyFrame &kf, unsigned int nRange)
SO3< Precision > & get_rotation()
Returns the rotation part of the transformation as a SO3.
static Operator< Internal::Identity< Internal::One > > Identity
CVD::ImageRef mirPredictedPos
void resize(const ImageRef &size)
const int nMaxSSDPerPixel
void MakeTemplateCoarseNoWarp(MapPoint &p)
TooN::Vector< 2 > vec(const ImageRef &ir)
CVD::Image< CVD::byte > im
ImageRef ir_rounded(const TooN::Vector< 2 > &v)
void MakeSubPixTemplate()
CVD::Image< std::pair< float, float > > mimJacs
Vector<(Size==Dynamic?Dynamic:Size+1), Precision > unproject(const Vector< Size, Precision, Base > &v)
int CalcSearchLevelAndWarpMatrix(MapPoint &p, SE3<> se3CFromW, Matrix< 2 > &m2CamDerivs)
void copy(const BasicImage< S > &in, BasicImage< T > &out, ImageRef size=ImageRef(-1,-1), ImageRef begin=ImageRef(), ImageRef dst=ImageRef())
PatchFinder(int nPatchSize=8)
Matrix< Size, Size, Precision > get_inverse()
bool IterateSubPixToConvergence(KeyFrame &kf, int nMaxIts)
bool next(const ImageRef &max)
Vector< 3 > v3PixelDown_W
std::vector< CVD::ImageRef > vCorners
Matrix< 2 > M2Inverse(const Matrix< 2 > &m)
int transform(const BasicImage< S > &in, BasicImage< T > &out, const TooN::Matrix< 2 > &M, const TooN::Vector< 2 > &inOrig, const TooN::Vector< 2 > &outOrig, const T defaultValue=T())
CVD::Image< CVD::byte > mimTemplate
ImageRef size() const
What is the size of this image?
bool in_image_with_border(const ImageRef &ir, int border) const
Matrix< 2 > mm2WarpInverse
ImageRef ir(const TooN::Vector< 2 > &v)
std::vector< int > vCornerRowLUT
int ZMSSDAtPoint(CVD::BasicImage< CVD::byte > &im, const CVD::ImageRef &ir)
KeyFrame * pPatchSourceKF
MapPoint * mpLastTemplateMapPoint
void MakeTemplateCoarse(MapPoint &p, SE3<> se3CFromW, Matrix< 2 > &m2CamDerivs)
double LevelZeroPos(double dLevelPos, int nLevel)
void MakeTemplateCoarseCont(MapPoint &p)
static Operator< Internal::Zero > Zeros