12 #if CVD_HAVE_XMMINTRIN 
   13 #include <tmmintrin.h> 
   20   : mimTemplate(
ImageRef(nPatchSize,nPatchSize))
 
   40   double dOneOverCameraZ = 1.0 / v3Cam[2];
 
   45   mm2WarpInverse.T()[0] = m2CamDerivs * (v3MotionRight.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionRight[2] * dOneOverCameraZ) * dOneOverCameraZ;
 
   46   mm2WarpInverse.T()[1] = m2CamDerivs * (v3MotionDown.slice<0,2>() - v3Cam.slice<0,2>() * v3MotionDown[2] * dOneOverCameraZ) * dOneOverCameraZ;
 
   61   if(dDet > 3 || dDet < 0.25)
 
   93   bool bNeedToRefreshTemplate = 
false;
 
   95     bNeedToRefreshTemplate = 
true;
 
   97   for(
int i=0; !bNeedToRefreshTemplate && i<2; i++)
 
  100       const double dRefreshLimit = 0.07;  
 
  101       if(v2Diff * v2Diff > dRefreshLimit * dRefreshLimit)
 
  102     bNeedToRefreshTemplate = 
true;
 
  106   if(bNeedToRefreshTemplate)
 
  186   irPos = irPos / nLevelScale;
 
  187   nRange = (nRange + nLevelScale - 1) / nLevelScale;
 
  190   int nTop = irPos.
y - nRange;
 
  191   int nBottomPlusOne = irPos.
y + nRange + 1;
 
  192   int nLeft = irPos.
x - nRange;
 
  193   int nRight = irPos.
x + nRange;
 
  203   if(nBottomPlusOne <= 0)
 
  210   vector<ImageRef>::iterator i;
 
  211   vector<ImageRef>::iterator i_end;
 
  215   if(nBottomPlusOne >= L.
im.
size().
y)
 
  225       if( i->x < nLeft || i->x > nRight)
 
  227       if((irPos - *i).mag_squared() > nRange * nRange)
 
  268     m3H += v3Grad.as_col() * v3Grad.as_row(); 
 
  288   const double dConvLimit = 0.03;
 
  289   bool bConverged = 
false;
 
  291   for(nIts = 0; nIts < nMaxIts && !bConverged; nIts++)
 
  294       if(dUpdateSquared < 0) 
 
  296       if(dUpdateSquared < dConvLimit*dConvLimit)
 
  327   double dX = v2Base[0]-floor(v2Base[0]); 
 
  328   double dY = v2Base[1]-floor(v2Base[1]);
 
  329   float fMixTL = (1.0 - dX) * (1.0 - dY);
 
  330   float fMixTR = (dX)       * (1.0 - dY);
 
  331   float fMixBL = (1.0 - dX) * (dY);
 
  332   float fMixBR = (dX)       * (dY);
 
  342         fMixTL * pTopLeftPixel[0]          + fMixTR * pTopLeftPixel[1] + 
 
  343         fMixBL * pTopLeftPixel[nRowOffset] + fMixBR * pTopLeftPixel[nRowOffset + 1];
 
  357   double dPixelUpdateSquared = v3Update.slice<0,2>() * v3Update.slice<0,2>();
 
  358   return dPixelUpdateSquared;
 
  380 #if CVD_HAVE_XMMINTRIN 
  382 inline int SumXMM_16(__m128i &target)
 
  384   unsigned short int sums_store[8];    
 
  385   _mm_storeu_si128((__m128i*)sums_store, target);
 
  386   return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3] +
 
  387     sums_store[4] + sums_store[5] + sums_store[6] + sums_store[7];
 
  390 inline int SumXMM_32(__m128i &target)
 
  392   unsigned int sums_store[4];    
 
  393   _mm_storeu_si128((__m128i*)sums_store, target);
 
  394   return sums_store[0] + sums_store[1] + sums_store[2] + sums_store[3];
 
  407   byte *templatepointer;
 
  413 #if CVD_HAVE_XMMINTRIN 
  416       long unsigned int imagepointerincrement;
 
  418       __m128i xImageAsEightBytes;
 
  419       __m128i xImageAsWords;
 
  420       __m128i xTemplateAsEightBytes;
 
  421       __m128i xTemplateAsWords;
 
  424       __m128i xImageSqSums; 
 
  429       xImageSums = _mm_setzero_si128();
 
  430       xImageSqSums = _mm_setzero_si128();
 
  431       xCrossSums = _mm_setzero_si128();
 
  432       xZero = _mm_setzero_si128();
 
  434       imagepointer = &im[irImgBase + 
ImageRef(0,0)];
 
  436       imagepointerincrement = &im[irImgBase + 
ImageRef(0,1)] - imagepointer;
 
  438       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  439       imagepointer += imagepointerincrement;
 
  440       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  441       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  442       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  443       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  444       xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
 
  445       templatepointer += 16;
 
  446       xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
 
  447       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  448       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  449       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  450       imagepointer += imagepointerincrement;
 
  451       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  452       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  453       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  454       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  455       xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
 
  456       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  457       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  459       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  460       imagepointer += imagepointerincrement;
 
  461       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  462       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  463       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  464       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  465       xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
 
  466       templatepointer += 16;
 
  467       xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
 
  468       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  469       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  470       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  471       imagepointer += imagepointerincrement;
 
  472       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  473       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  474       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  475       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  476       xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
 
  477       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  478       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  480       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  481       imagepointer += imagepointerincrement;
 
  482       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  483       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  484       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  485       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  486       xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
 
  487       templatepointer += 16;
 
  488       xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
 
  489       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  490       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  491       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  492       imagepointer += imagepointerincrement;
 
  493       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  494       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  495       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  496       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  497       xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
 
  498       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  499       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  501       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  502       imagepointer += imagepointerincrement;
 
  503       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  504       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  505       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  506       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  507       xTemplateAsEightBytes=_mm_load_si128((__m128i*) templatepointer);
 
  508       templatepointer += 16;
 
  509       xTemplateAsWords = _mm_unpacklo_epi8(xTemplateAsEightBytes,xZero);
 
  510       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  511       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  512       xImageAsEightBytes=_mm_loadl_epi64((__m128i*) imagepointer);
 
  513       xImageAsWords = _mm_unpacklo_epi8(xImageAsEightBytes,xZero);
 
  514       xImageSums = _mm_adds_epu16(xImageAsWords,xImageSums);
 
  515       xProduct = _mm_madd_epi16(xImageAsWords, xImageAsWords);
 
  516       xImageSqSums = _mm_add_epi32(xProduct, xImageSqSums);
 
  517       xTemplateAsWords = _mm_unpackhi_epi8(xTemplateAsEightBytes,xZero);
 
  518       xProduct = _mm_madd_epi16(xImageAsWords, xTemplateAsWords);
 
  519       xCrossSums = _mm_add_epi32(xProduct, xCrossSums);
 
  521       nImageSum = SumXMM_16(xImageSums);
 
  522       nCrossSum = SumXMM_32(xCrossSums);
 
  523       nImageSumSq = SumXMM_32(xImageSqSums);
 
  530       imagepointer = &im[irImgBase + 
ImageRef(0,nRow)];
 
  534           int n = imagepointer[nCol];
 
  537           nCrossSum += n * templatepointer[nCol];
 
  546   return ((2*SA*SB - SA*SA - SB*SB)/N + nImageSumSq + 
mnTemplateSumSq - 2*nCrossSum);
 
Vector< 3 > v3PixelRight_W
Matrix< 2 > mm2LastWarpMatrix
int LevelScale(int nLevel)
double LevelNPos(double dRootPos, int nLevel)
double IterateSubPix(KeyFrame &kf)
bool FindPatchCoarse(CVD::ImageRef ir, KeyFrame &kf, unsigned int nRange)
SO3< Precision > & get_rotation()
Returns the rotation part of the transformation as a SO3. 
static Operator< Internal::Identity< Internal::One > > Identity
CVD::ImageRef mirPredictedPos
void resize(const ImageRef &size)
const int nMaxSSDPerPixel
void MakeTemplateCoarseNoWarp(MapPoint &p)
TooN::Vector< 2 > vec(const ImageRef &ir)
CVD::Image< CVD::byte > im
ImageRef ir_rounded(const TooN::Vector< 2 > &v)
void MakeSubPixTemplate()
CVD::Image< std::pair< float, float > > mimJacs
Vector<(Size==Dynamic?Dynamic:Size+1), Precision > unproject(const Vector< Size, Precision, Base > &v)
int CalcSearchLevelAndWarpMatrix(MapPoint &p, SE3<> se3CFromW, Matrix< 2 > &m2CamDerivs)
void copy(const BasicImage< S > &in, BasicImage< T > &out, ImageRef size=ImageRef(-1,-1), ImageRef begin=ImageRef(), ImageRef dst=ImageRef())
PatchFinder(int nPatchSize=8)
Matrix< Size, Size, Precision > get_inverse()
bool IterateSubPixToConvergence(KeyFrame &kf, int nMaxIts)
bool next(const ImageRef &max)
Vector< 3 > v3PixelDown_W
std::vector< CVD::ImageRef > vCorners
Matrix< 2 > M2Inverse(const Matrix< 2 > &m)
int transform(const BasicImage< S > &in, BasicImage< T > &out, const TooN::Matrix< 2 > &M, const TooN::Vector< 2 > &inOrig, const TooN::Vector< 2 > &outOrig, const T defaultValue=T())
CVD::Image< CVD::byte > mimTemplate
ImageRef size() const 
What is the size of this image? 
bool in_image_with_border(const ImageRef &ir, int border) const 
Matrix< 2 > mm2WarpInverse
ImageRef ir(const TooN::Vector< 2 > &v)
std::vector< int > vCornerRowLUT
int ZMSSDAtPoint(CVD::BasicImage< CVD::byte > &im, const CVD::ImageRef &ir)
KeyFrame * pPatchSourceKF
MapPoint * mpLastTemplateMapPoint
void MakeTemplateCoarse(MapPoint &p, SE3<> se3CFromW, Matrix< 2 > &m2CamDerivs)
double LevelZeroPos(double dLevelPos, int nLevel)
void MakeTemplateCoarseCont(MapPoint &p)
static Operator< Internal::Zero > Zeros