SIFT 特征变换未检测到正确/不完整的尺度空间极值

Question

这是出于学习目的（优先级是正确性第一，我知道实现中明显的低效和不那么明显，但首先我想获得正确性方面，然后专注于重构和优化它，无论如何以下代码用于教育目的，易于理解优先于生产质量），我正在研究用于检测关键点的尺度不变特征变换，并且我发现初始的尺度空间极值集被错误地检测到，输出与预期有很大不同。我想知道我是否犯了逻辑或实施错误？导致结果不理想。这个实现仍然不完整，我正在关键点定位中计算初始尺度空间极值的点进行测试，并且我预计点的数量会更大，并且输出中点的相对分布也非常糟糕。我还记录了一些调试信息以更好地理解问题。

有关代码和算法的关键细节。

  Implmn Details:
      1) Converting Mat -> vector<vector<float>> and only dealing with image as
  a matrix floats, since static_casting back and fourth from u_char costs in
  data loss.

      2) A pyramid is a vector<Octaves> an Octave is a vector<Scales> a scale is
  an image level rep/ vector<vector<float>>

      So the
            img/scale = vector<vector<float>>
            octave = vector<vector<vector<float>>> or vector<scale>
            pyramid = vector<vector<vector<vector<float>>>> or vector<octave>

      3) Keypoints in a scale are vector<pair<int,int>>, a vector of
  co-ordinates.

  Params : (as set in default constructor)
      k = 1.414 (Scale sigma multiplier) : as suggested in sift paper

      num of octaves = 4

      num of scales per octave = 4

      gaussian kernel size = 3 : This could be a potential source of bugs, as
  this has to be ideally computed based on given sigma.

  Algorithm (Partially completed yet):
  Following the Std algorithm as per my level of understanding so far.
      1) Compute Scale space pyramid
          Get a scale space pyramid for each octave (4 octaves), compute 4
  scales ( number of scales per octave = 4). Start with inital img, and compute
  Gaussian blurred image with initalSigma, and for the successive scales in this
  octave, compute Gaussian blur of previously blurred image with a sigma of =
  previousSigma * k, this achieves successive blurring in an octave. So we have
  4 scales in an octave. Now downsample the orignal image ( for octave = 1,
  octaves = (0 1 2 3)) get the image of half the dimensions of source image, and
  perform the above steps, and for each successive octave downsample the image
  of previous octave. Finally return the scaleSpacePyramid.

      2) Compute Difference of Gaussian Pyramid.
          Get a pyramid of Differences of Gaussians previously computed, For
  each octave, traverse the scale from (1 , 2 , 3), and compute the diff of
  scale(i) - scale(i-1). So we will be left with 3 scales in DoGPyramid. To
  compute diff of scale(i) - scale(i-1) for i = 1,2,3. Take element wise diff of
  image in scale i and image in scale i-1. Finally return this Pyramid.

      3) Compute Scale Space Extremas.
          Using The DoGPyramid For each octave (4 octaves yet),
  there are 3 scales per octave in DogPyramid (0,1,2). Now to
  generalize, say there are s scales per octave in DoGPyramid
  (0,1,2 ... s-1, s), for scales [1,s-1], for a scale s for
  every pixel in [1,rows-1][1,cols-1] range, there are 8 pixels
  around it in the current scale, and 9 pixel in the scale s+1,
  and 9 in the scale s-1. Compute the maximum in the rest of
  the 26 pixels (8 in scale s, 9 in scale s+1, 9 in scale s-1),
  I name scale s as middle, scale s+1 as upper and scale s-1 as lower. And if
  this current point value is higher or equal than the maximum, lower or equal
  than the minimum, then mark it as a potential keypoint and insert it in a list
  of points, and return it.

代码：

#include "assert.h"
#include <cmath>
#include <iomanip>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <vector>
#define DEBUG 1
#define IMGLVLDIAGNOSIS 0
#define KEYPOINTPLT 0
using namespace std;
using namespace cv;

// Use subPatchSize x subPatchSize image in the orignal image to obtain
// debugging info.
const unsigned subPatchSize = 6;


static void displayImage(Mat &img, unsigned int time = 0,
                         string title = "frame") {
  imshow(title, img);
  waitKey(time);
}

static float square(float x) { return x * x; }

static float computeGaussianFunc(float x, float y, float mu, float sigma) {
  float val =
      exp(-0.5 * ((square((x - mu) / sigma)) + square((y - mu) / sigma))) /
      (2 * M_PI * sigma * sigma);
  return val;
}

bool isImgPoint(int rows, int cols, int x, int y) {
  return (x >= 0 and x < rows and y >= 0 and y < cols);
}

vector<vector<float>>
computeGaussianConvolution(pair<vector<vector<float>>, float> &KernalInfo,
                           int size, vector<vector<float>> &img) {
  int rows = img.size();
  int cols = img[0].size();
  vector<vector<float>> res(rows, vector<float>(cols, 0));
  vector<vector<float>> Kernal = KernalInfo.first;
  float normF = KernalInfo.second;
  int hk = size / 2;
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      float val = 0.0;
      for (int x = -hk; x <= hk; x++) {
        for (int y = -hk; y <= hk; y++) {
          if (isImgPoint(rows, cols, i + x, j + y)) {
            val += Kernal[x + hk][y + hk] * (img[i + x][j + y]);
          }
        }
      }
      res[i][j] = (val / normF);
    }
  }
  return res;
}

pair<vector<vector<float>>, float> computeKernalInfo(int size, float s) {
  vector<vector<float>> Kernal(size, vector<float>(size, 0.0));
  float sum = 0.0;
  int center = size / 2;
  for (int i = 0; i < size; i++) {
    for (int j = 0; j < size; j++) {
      Kernal[i][j] = computeGaussianFunc(i, j, center, s);
      sum += Kernal[i][j];
    }
  }
  return {Kernal, sum};
}

vector<vector<float>> computeGussianBlur(vector<vector<float>> &img, int Ksize,
                                         float s) {
  if (Ksize <= 0 or s == 0) {
    return img;
  }
  vector<vector<float>> res;
  pair<vector<vector<float>>, float> KernalInfo;
  KernalInfo = computeKernalInfo(Ksize, s);
  vector<vector<float>> gaussianFilterd =
      computeGaussianConvolution(KernalInfo, Ksize, img);
  return gaussianFilterd;
}

vector<vector<float>> getImageFltMatrix(Mat &img) {
  int rows = img.rows;
  int cols = img.cols;
  vector<vector<float>> grayScalImg(rows, vector<float>(cols, 0));
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      if (img.channels() == 1)
        grayScalImg[i][j] = static_cast<int>(img.at<uchar>(i, j));
      if (img.channels() == 3) {
        float val = (0.0722 * (img.at<Vec3b>(i, j)[0])) +
                    (0.7152 * (img.at<Vec3b>(i, j)[1])) +
                    (0.2126 * (img.at<Vec3b>(i, j)[2]));
        grayScalImg[i][j] = val;
      }
    }
  }
  return grayScalImg;
}

void plotImageMatrix(vector<vector<float>> &img) {
  int rows = img.size();
  int cols = img[0].size();
  Mat image(rows, cols, CV_8U);
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      image.at<u_char>(i, j) = static_cast<u_char>((int)(img[i][j]));
    }
  }
  displayImage(image);
}

void plotKeyPointsInImg(Mat &img, vector<pair<int, int>> &points) {
  for (auto point : points) {
    circle(img, Point(point.second, point.first), 3, Scalar(0, 255, 0), 1);
  }
}

void logSubPatchPrint(const vector<vector<float>> &img) {
  if (img.size() > 9 and img[0].size() > 9) {
    cout << "START : ====================\n\n";
    for (int i = 1; i <= subPatchSize; i++) {
      for (int j = 1; j <= subPatchSize; j++) {
        cout << img[i][j] << " ";
      }
      cout << "\n";
    }
    cout << "\nEND : ====================\n\n";
  }
}


namespace MvSIFT {

class SIFT {
  float k;
  int numOctaves;
  int scalesPerOct;
  float initalSigma;
  int gKernelSize;

  bool boundsCheck(int x, int y, int r, int c) {
    return (x >= 0 and x + 1 < r and y >= 0 and y + 1 < c);
  }

  vector<vector<float>> downsampleImg(vector<vector<float>> &currentImg) {
    int rows = currentImg.size();
    int cols = currentImg[0].size();
    vector<vector<float>> output(rows / 2, vector<float>(cols / 2, 0));
    for (int i = 0; i < rows; i += 2) {
      for (int j = 0; j < cols; j += 2) {
        if (not boundsCheck(i, j, rows, cols))
          continue;
        float val = 0;
        for (int v = 0; v <= 1; v++) {
          for (int w = 0; w <= 1; w++) {
            val += currentImg[i + v][j + w];
          }
        }
        output[i / 2][j / 2] = val / 4;
      }
    }
    return output;
  }

  vector<vector<vector<vector<float>>>>
  computeScaleSpacePyramid(vector<vector<float>> &img) {
    vector<vector<vector<vector<float>>>> scaleSpacePyramid;
    vector<vector<float>> currentImg = img;
    auto propTonxtOct = img;
    for (int oct = 1; oct <= numOctaves; oct++) {
      vector<vector<vector<float>>> octImages;
      float sigma = initalSigma;
      for (int scl = 1; scl <= scalesPerOct; scl++) {
        vector<vector<float>> blurredImg =
            computeGussianBlur(currentImg, gKernelSize, sigma);
        octImages.push_back(blurredImg);
        currentImg = blurredImg;
        sigma *= k;
      }
      scaleSpacePyramid.push_back(octImages);
      currentImg = downsampleImg(propTonxtOct);
      propTonxtOct = currentImg;
    }
    return scaleSpacePyramid;
  }

  vector<vector<float>> getInterScaleImgDiff(vector<vector<float>> &img1,
                                             vector<vector<float>> &img2) {
    int rows = img1.size();
    int cols = img1[0].size();
    assert(rows == img2.size() and cols == img2[0].size() &&
           "dimentions for image diff do not match");
    vector<vector<float>> diff(rows, vector<float>(cols, 0));
    for (int i = 0; i < rows; i++) {
      for (int j = 0; j < cols; j++) {
        diff[i][j] = img1[i][j] - img2[i][j];
      }
    }
    return diff;
  }

  vector<vector<vector<vector<float>>>> computeDiffOfGaussianPyramid(
      vector<vector<vector<vector<float>>>> &ScaleSpacePyramid) {
    vector<vector<vector<vector<float>>>> diffOfGaussianPyrmd;
    assert(ScaleSpacePyramid.size() == numOctaves &&
           "SSP lenght does not match number of octaves");
    // For each octave, compute dog.
    for (int oct = 0; oct < numOctaves; oct++) {
      vector<vector<vector<float>>> diffOfGaussian;
      for (int scl = 1; scl < scalesPerOct; scl++) {
        // get the diff of scl-1 and scl.
        vector<vector<float>> diff = getInterScaleImgDiff(
            ScaleSpacePyramid[oct][scl], ScaleSpacePyramid[oct][scl - 1]);
        diffOfGaussian.push_back(diff);
      }
      diffOfGaussianPyrmd.push_back(diffOfGaussian);
    }
    return diffOfGaussianPyrmd;
  }

  // Get potential extrema for this scale.
  vector<pair<int, int>>
  searchInCurrentScale(int scale, vector<vector<vector<float>>> &Octave) {
    int OSize = Octave.size();
    assert(scale > 0 and scale < OSize - 1 &&
           "Required a Sandwich scale, Not a Sandwiched scale.");
    auto &lower = Octave[scale - 1];
    auto &middle = Octave[scale];
    auto &upper = Octave[scale + 1];
    vector<pair<int, int>> potentialKeypoints;
    int rows = middle.size();
    int cols = middle[0].size();
    for (int i = 1; i < rows - 1; i++) {
      for (int j = 1; j < cols - 1; j++) {
        float currPtVal = middle[i][j];
        float maxVal = FLT_MIN;
        float minVal = FLT_MAX;
        // first search current scale.
        for (int x = -1; x <= 1; x++) {
          for (int y = -1; y <= 1; y++) {
            // Skip the currentPtVal
            if (x == 0 and y == 0)
              continue;
            maxVal = max(maxVal, middle[i + x][j + y]);
            minVal = min(minVal, middle[i + x][j + y]);
          }
        }
        // Now search upper scale.
        for (int x = -1; x <= 1; x++) {
          for (int y = -1; y <= 1; y++) {
            maxVal = max(maxVal, upper[i + x][j + y]);
            minVal = min(minVal, upper[i + x][j + y]);
          }
        }
        // search lower scale.
        for (int x = -1; x <= 1; x++) {
          for (int y = -1; y <= 1; y++) {
            maxVal = max(maxVal, lower[i + x][j + y]);
            minVal = min(minVal, lower[i + x][j + y]);
          }
        }
        // current pixel is a candidate if it is greter than maximum or lesser
        // than minimum found.
        if (currPtVal >= maxVal or currPtVal <= minVal) {
          potentialKeypoints.push_back(make_pair(i, j));
          // cout << "### Detected KeyP : " << i << " " << j << "\n";
        }
      }
    }
    return potentialKeypoints;
  }

  // vector<vector<vector<pair<float, float>>>> computeGradientPyramid(
  //     const vector<vector<vector<pair<int, int>>>> &scaleSpaceExtremas,
  //     const vector<vector<vector<vector<float>>>> &scaleSpacePyramid) {
  //   vector<vector<vector<pair<float, float>>>> gradPyramid;
  // }

  vector<vector<vector<pair<int, int>>>>
  findScaleSpaceExtrma(vector<vector<vector<vector<float>>>> &DoGPyramid) {
    assert(DoGPyramid.size() == numOctaves &&
           "DOG pyramid lenght does not match number of octaves");
    // AcrossOctaves, InAnOctave(Scales), Within a Scale
    vector<vector<vector<pair<int, int>>>> potentialKeyPoints;
    for (int oct = 0; oct < numOctaves; oct++) {
      // number of scales in this octave.
      int scs = DoGPyramid[oct].size();
      auto Octave = DoGPyramid[oct];
      vector<vector<pair<int, int>>> keyPointsInOctave;
      // Search this octaves to get points of interest, that is extrema points
      // wrt to 26, 3d points in scale above and below current scale.
      for (int sc = 1; sc < scs - 1; sc++) {
        // In this scale, search in scale above and below the extrema.
        auto keyPtsInScale = searchInCurrentScale(sc, Octave);
        keyPointsInOctave.push_back(keyPtsInScale);
      }
      potentialKeyPoints.push_back(keyPointsInOctave);
    }
    return potentialKeyPoints;
  }

public:
  SIFT() {
    // Scale factor Chosen to be /2 = 1.414
    k = 1.414;
    numOctaves = 4;
    scalesPerOct = 4;
    initalSigma = 1;
    gKernelSize = 3;
  }

  SIFT(float userK, unsigned userNumOct, unsigned userSclsPerOct,
       float userInitalSigma, unsigned userGaussianKernalSize) {
    k = userK;
    numOctaves = userNumOct;
    scalesPerOct = userSclsPerOct;
    initalSigma = userInitalSigma;
    gKernelSize = userGaussianKernalSize;
  }
/*
Implmn Details: 
    Converting Mat -> vector<vector<float>> and only dealing with image as a matrix floats, since static_casting back and fourth from u_char costs in data loss.

    A pyramid is a vector<Octaves> an Octave is a vector<Scales> a scale is an image level rep/ vector<vector<float>>

    So an img/scale = vector<vector<float>> 
          octave = vector<vector<vector<float>>> or vector<scale>
          pyramid = vector<vector<vector<vector<float>>>> or vector<octave>

    Keypoints in a scale are vector<pair<int,int>>, a vector of co-ordinates.

Params : (as set in default constructor)
    k = 1.414 (Scale sigma multiplier) : as suggested in sift paper

    num of octaves = 4 

    num of scales per octave = 4

    gaussian kernel size = 3 : This could be a potential source of bugs, as this has to be ideally computed based on given sigma. 

Algorithm (Partially completed yet): 
Following the Std algorithm as per my level of understanding so far. 
    1) Compute Scale space pyramid
        Get a scale space pyramid for each octave (4 octaves), compute 4 scales ( number of scales per octave = 4). Start with inital img, and compute Gaussian blurred image with initalSigma, and for the successive scales in this octave, compute Gaussian blur of previously blurred image with a sigma of = previousSigma * k, this achieves successive blurring in an octave. So we have 4 scales in an octave. Now downsample the orignal image ( for octave = 1, octaves = (0 1 2 3))
        get the image of half the dimensions of source image, and perform the above steps, and for each successive octave downsample the image of previous octave. Finally return the scaleSpacePyramid.

    2) Compute Difference of Gaussian Pyramid. 
        Get a pyramid of Differences of Gaussians previously computed, For each octave, traverse the scale from (1 , 2 , 3), and compute the diff of scale(i) - scale(i-1). So we will be left with 3 scales in DoGPyramid. To compute diff of scale(i) - scale(i-1) for i = 1,2,3. Take element wise diff of image in scale i and image in scale i-1. Finally return this Pyramid. 

    3) Compute Scale Space Extremas. 
        Using The DoGPyramid For each octave (4 octaves yet), there are 3 scales per octave in DogPyramid (0,1,2). Now to generalize, say there are s scales per octave in DoGPyramid (0,1,2 ... s-1, s), for scales [1,s-1], for a scale s for every pixel in [1,rows-1][1,cols-1] range, there are 8 pixels around it in the current scale, and 9 pixel in the scale s+1, and 9 in the scale s-1. 
        Compute the maximum in the rest of the 26 pixels (8 in scale s, 9 in scale s+1, 9 in scale s-1), I name scale s as middle, scale s+1 as upper and scale s-1 as lower. And if this current point value is higher or equal than the maximum, lower or equal than the minimum, then mark it as a potential keypoint and insert it in a list of points, and return it. 
     
*/
  void computeSIFT(Mat &image, Mat &ClrCpy) {
    // get image data in float
    vector<vector<float>> img = getImageFltMatrix(image);
    vector<vector<vector<vector<float>>>> scaleSpacePyramid =
        computeScaleSpacePyramid(img);
#if DEBUG
    for (int i = 0; i < scaleSpacePyramid.size(); i++) {
      cout << "Octave = " << i << "\n";
      for (int j = 0; j < scaleSpacePyramid[i].size(); j++) {
        cout << " Scale = " << j;
        cout << " Img sizes = " << scaleSpacePyramid[i][j].size() << " x "
             << scaleSpacePyramid[i][j][0].size() << "\n";
        // plotImageMatrix(scaleSpacePyramid[i][j]);
        // logSubPatchPrint(scaleSpacePyramid[i][j]);
      }
    }
    cout << "\n";
#endif
    vector<vector<vector<vector<float>>>> DoGPyramid =
        computeDiffOfGaussianPyramid(scaleSpacePyramid);
#if DEBUG
    for (int oct = 0; oct < DoGPyramid.size(); oct++) {
      auto Octave = DoGPyramid[oct];
      cout << ">>> Diff Of Gaussian Octave = " << oct;
      cout << " Num Scales = " << Octave.size() << "\n ";
      for (int scl = 0; scl < Octave.size(); scl++) {
        auto dogImg = Octave[scl];
        // plotImageMatrix(dogImg);
        logSubPatchPrint(dogImg);
#if IMGLVLDIAGNOSIS
        for (int i = 0; i < dogImg.size(); i++) {
          for (int j = 0; j < dogImg[0].size(); j++) {
            cout << dogImg[i][j] << " ";
          }
          cout << "\n";
        }
        cout << "\n\n";
#endif
      }
    }
#endif

    vector<vector<vector<pair<int, int>>>> scaleSpaceExtremas =
        findScaleSpaceExtrma(DoGPyramid);
    // this concludes inital scale space extrema detection step. Generates basic
    // keypoints which can be later pruned.
#if DEBUG
    unsigned count = 0;
    for (int oct = 0; oct < scaleSpaceExtremas.size(); oct++) {
      cout << ">>> Scale Space Extrema Octave = " << oct << "\n";
      auto Octave = scaleSpaceExtremas[oct];
      cout << " Num Scales = " << Octave.size();
      for (int sc = 0; sc < Octave.size(); sc++) {
        auto keypts = Octave[sc];
        count += keypts.size();
        cout << " Keypts in scale" << sc << " Keypts = " << keypts.size();
      }
      cout << "\n";
    }
#endif

#if !KEYPOINTPLT
    for (auto &Octave : scaleSpaceExtremas) {
      for (auto &scale : Octave) {
        plotKeyPointsInImg(ClrCpy, scale);
      }
    }
#endif

  } // computeSIFT()
};

} // namespace MvSIFT

int main() {
  string imPath =
      "/home/panirpal/workspace/Projects/ComputerVision/data/images/astro1.jpg";
  Mat img = imread(imPath, IMREAD_GRAYSCALE);
  Mat cimg = imread(imPath);
  if (!img.empty()) {
    displayImage(img);
    MvSIFT::SIFT().computeSIFT(img, cimg);
    displayImage(cimg);
  } else
    cerr << "image not found! exiting...";
}

构建并运行t的命令（确保opencv lib版本/路径是您在重现问题时使用的版本）：

g++ -g sift.cpp -I/usr/local/include/opencv4 -Wl,-rpath,/usr/local/lib /usr/local/lib/libopencv_highgui.so.4.8.0 /usr/local/lib/libopencv_ml.so.4.8.0 /usr/local/lib/libopencv_objdetect.so.4.8.0 /usr/local/lib/libopencv_photo.so.4.8.0 /usr/local/lib/libopencv_stitching.so.4.8.0 /usr/local/lib/libopencv_video.so.4.8.0 /usr/local/lib/libopencv_videoio.so.4.8.0 /usr/local/lib/libopencv_imgcodecs.so.4.8.0 /usr/local/lib/libopencv_calib3d.so.4.8.0 /usr/local/lib/libopencv_dnn.so.4.8.0 /usr/local/lib/libopencv_features2d.so.4.8.0 /usr/local/lib/libopencv_flann.so.4.8.0 /usr/local/lib/libopencv_imgproc.so.4.8.0 /usr/local/lib/libopencv_core.so.4.8.0 -lm -o sift

./sift

输入图像：

预期输出（检测到的关键点的密度和分布，而不是颜色和内容）：

此代码的输出：

期望关键点密度和关键点计数（大致相同，因为期望是由opencv中现有的SIFT实现生成的，脚本如下，opencv版本是高度优化的并且非常准确，但即使我们有80％的准确度将是一个很好的起点。）

# Important NOTE: Use opencv >=4.4 
import cv2

# Loading the image
img = cv2.imread('/home/panirpal/workspace/Projects/ComputerVision/data/images/astro1.jpg')

# Converting image to grayscale
gray= cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)

# Applying SIFT detector
sift = cv2.SIFT_create()
kp = sift.detect(gray, None)

# Marking the keypoint on the image using circles
img=cv2.drawKeypoints(gray ,
                    kp ,
                    img)

cv2.imwrite('image-with-keypoints.jpg', img)

Answer 1

经过多天的辛苦工作，我终于能够让它工作了，问题在于内核大小计算，它必须基于 Sigma 来计算，而不是传递像 3 这样的静态幻数。以下是更新的代码以供将来参考.

#include "assert.h"
#include <cmath>
#include <iomanip>
#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/opencv.hpp>
#include <vector>
#define DEBUG 0
#define IMGLVLDIAGNOSIS 0
#define KEYPOINTPLT 0
using namespace std;
using namespace cv;

// Use subPatchSize x subPatchSize image in the orignal image to obtain
// debugging info.
const unsigned subPatchSize = 6;

static void displayImage(Mat &img, unsigned int time = 0,
                         string title = "frame") {
  imshow(title, img);
  waitKey(time);
}

static float square(float x) { return x * x; }

static float computeGaussianFunc(float x, float y, float mu, float sigma) {
  float val =
      exp(-0.5 * ((square((x - mu) / sigma)) + square((y - mu) / sigma))) /
      (2 * M_PI * sigma * sigma);
  return val;
}

bool isImgPoint(int rows, int cols, int x, int y) {
  return (x >= 0 and x < rows and y >= 0 and y < cols);
}

vector<vector<float>>
computeGaussianConvolution(pair<vector<vector<float>>, float> &KernalInfo,
                           int size, vector<vector<float>> &img) {
  int rows = img.size();
  int cols = img[0].size();
  vector<vector<float>> res(rows, vector<float>(cols, 0));
  vector<vector<float>> Kernal = KernalInfo.first;
  float normF = KernalInfo.second;
  int hk = size / 2;
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      float val = 0.0;
      for (int x = -hk; x <= hk; x++) {
        for (int y = -hk; y <= hk; y++) {
          if (isImgPoint(rows, cols, i + x, j + y)) {
            val += Kernal[x + hk][y + hk] * (img[i + x][j + y]);
          }
        }
      }
      res[i][j] = (val / normF);
    }
  }
  return res;
}

pair<vector<vector<float>>, float> computeKernalInfo(int size, float s) {
  vector<vector<float>> Kernal(size, vector<float>(size, 0.0));
  float sum = 0.0;
  int center = size / 2;
  for (int i = 0; i < size; i++) {
    for (int j = 0; j < size; j++) {
      Kernal[i][j] = computeGaussianFunc(i, j, center, s);
      sum += Kernal[i][j];
    }
  }
  return {Kernal, sum};
}

vector<vector<float>> computeGussianBlur(vector<vector<float>> &img, float s) {
  int Ksize = 2 * ceil(3 * s) + 1;
  vector<vector<float>> res;
  pair<vector<vector<float>>, float> KernalInfo;
  KernalInfo = computeKernalInfo(Ksize, s);
  vector<vector<float>> gaussianFilterd =
      computeGaussianConvolution(KernalInfo, Ksize, img);
  return gaussianFilterd;
}

vector<vector<float>> getImageFltMatrix(Mat &img) {
  int rows = img.rows;
  int cols = img.cols;
  vector<vector<float>> grayScalImg(rows, vector<float>(cols, 0));
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      if (img.channels() == 1)
        grayScalImg[i][j] = static_cast<int>(img.at<uchar>(i, j));
      if (img.channels() == 3) {
        float val = (0.0722 * (img.at<Vec3b>(i, j)[0])) +
                    (0.7152 * (img.at<Vec3b>(i, j)[1])) +
                    (0.2126 * (img.at<Vec3b>(i, j)[2]));
        grayScalImg[i][j] = val;
      }
    }
  }
  return grayScalImg;
}

void plotImageMatrix(vector<vector<float>> &img) {
  int rows = img.size();
  int cols = img[0].size();
  Mat image(rows, cols, CV_8U);
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      image.at<u_char>(i, j) = static_cast<u_char>((int)(img[i][j]));
    }
  }
  displayImage(image);
}

void plotKeyPointsInImg(Mat &img, vector<pair<int, int>> &points) {
  for (auto point : points) {
    circle(img, Point(point.second, point.first), 3, Scalar(0, 255, 0), 1);
  }
}

void logSubPatchPrint(const vector<vector<float>> &img) {
  if (img.size() > 9 and img[0].size() > 9) {
    cout << "START : ====================\n\n";
    for (int i = 1; i <= subPatchSize; i++) {
      for (int j = 1; j <= subPatchSize; j++) {
        cout << img[i][j] << " ";
      }
      cout << "\n";
    }
    cout << "\nEND : ====================\n\n";
  }
}

namespace MvSIFT {

class SIFT {
  float k;
  int numOctaves;
  int scalesPerOct;
  float initalSigma;
  int gKernelSize;

  bool boundsCheck(int x, int y, int r, int c) {
    return (x >= 0 and x + 1 < r and y >= 0 and y + 1 < c);
  }

  vector<vector<float>> downsampleImg(vector<vector<float>> &currentImg) {
    int rows = currentImg.size();
    int cols = currentImg[0].size();
    vector<vector<float>> output(rows / 2, vector<float>(cols / 2, 0));
    for (int i = 0; i < rows; i += 2) {
      for (int j = 0; j < cols; j += 2) {
        if (not boundsCheck(i, j, rows, cols))
          continue;
        float val = 0;
        for (int v = 0; v <= 1; v++) {
          for (int w = 0; w <= 1; w++) {
            val += currentImg[i + v][j + w];
          }
        }
        output[i / 2][j / 2] = val / 4;
      }
    }
    return output;
  }

  vector<vector<vector<vector<float>>>>
  computeScaleSpacePyramid(vector<vector<float>> &img) {
    vector<vector<vector<vector<float>>>> scaleSpacePyramid;
    vector<vector<float>> currentImg = img;
    auto propTonxtOct = img;
    for (int oct = 1; oct <= numOctaves; oct++) {
      vector<vector<vector<float>>> octImages;
      float sigma = initalSigma;
      for (int scl = 1; scl <= scalesPerOct; scl++) {
        vector<vector<float>> blurredImg =
            computeGussianBlur(currentImg, sigma);
        octImages.push_back(blurredImg);
        currentImg = blurredImg;
        sigma *= k;
      }
      scaleSpacePyramid.push_back(octImages);
      currentImg = downsampleImg(propTonxtOct);
      propTonxtOct = currentImg;
    }
    return scaleSpacePyramid;
  }

  vector<vector<float>> getInterScaleImgDiff(vector<vector<float>> &img1,
                                             vector<vector<float>> &img2) {
    int rows = img1.size();
    int cols = img1[0].size();
    assert(rows == img2.size() and cols == img2[0].size() &&
           "dimentions for image diff do not match");
    vector<vector<float>> diff(rows, vector<float>(cols, 0));
    for (int i = 0; i < rows; i++) {
      for (int j = 0; j < cols; j++) {
        diff[i][j] = img1[i][j] - img2[i][j];
      }
    }
    return diff;
  }

  vector<vector<vector<vector<float>>>> computeDiffOfGaussianPyramid(
      vector<vector<vector<vector<float>>>> &ScaleSpacePyramid) {
    vector<vector<vector<vector<float>>>> diffOfGaussianPyrmd;
    assert(ScaleSpacePyramid.size() == numOctaves &&
           "SSP lenght does not match number of octaves");
    // For each octave, compute dog.
    for (int oct = 0; oct < numOctaves; oct++) {
      vector<vector<vector<float>>> diffOfGaussian;
      for (int scl = 1; scl < scalesPerOct; scl++) {
        // get the diff of scl-1 and scl.
        vector<vector<float>> diff = getInterScaleImgDiff(
            ScaleSpacePyramid[oct][scl], ScaleSpacePyramid[oct][scl - 1]);
        diffOfGaussian.push_back(diff);
      }
      diffOfGaussianPyrmd.push_back(diffOfGaussian);
    }
    return diffOfGaussianPyrmd;
  }

  // Get potential extrema for this scale.
  vector<pair<int, int>>
  searchInCurrentScale(int scale, vector<vector<vector<float>>> &Octave) {
    int OSize = Octave.size();
    assert(scale > 0 and scale < OSize - 1 &&
           "Required a Sandwich scale, Not a Sandwiched scale.");
    auto &lower = Octave[scale - 1];
    auto &middle = Octave[scale];
    auto &upper = Octave[scale + 1];
    vector<pair<int, int>> potentialKeypoints;
    int rows = middle.size();
    int cols = middle[0].size();
    for (int i = 1; i < rows - 1; i++) {
      for (int j = 1; j < cols - 1; j++) {
        float currPtVal = middle[i][j];
        float maxVal = FLT_MIN;
        float minVal = FLT_MAX;
        // first search current scale.
        for (int x = -1; x <= 1; x++) {
          for (int y = -1; y <= 1; y++) {
            // Skip the currentPtVal
            if (x == 0 and y == 0)
              continue;
            maxVal = max(maxVal, middle[i + x][j + y]);
            minVal = min(minVal, middle[i + x][j + y]);
          }
        }
        // Now search upper scale.
        for (int x = -1; x <= 1; x++) {
          for (int y = -1; y <= 1; y++) {
            maxVal = max(maxVal, upper[i + x][j + y]);
            minVal = min(minVal, upper[i + x][j + y]);
          }
        }
        // search lower scale.
        for (int x = -1; x <= 1; x++) {
          for (int y = -1; y <= 1; y++) {
            maxVal = max(maxVal, lower[i + x][j + y]);
            minVal = min(minVal, lower[i + x][j + y]);
          }
        }
        // current pixel is a candidate if it is greter than maximum or lesser
        // than minimum found.
        if (currPtVal > maxVal or currPtVal < minVal) {
          potentialKeypoints.push_back(make_pair(i, j));
          // cout << "### Detected KeyP : " << i << " " << j << "\n";
        }
      }
    }
    return potentialKeypoints;
  }



  vector<vector<vector<pair<int, int>>>>
  findScaleSpaceExtrma(vector<vector<vector<vector<float>>>> &DoGPyramid) {
    assert(DoGPyramid.size() == numOctaves &&
           "DOG pyramid lenght does not match number of octaves");
    // AcrossOctaves, InAnOctave(Scales), Within a Scale
    vector<vector<vector<pair<int, int>>>> potentialKeyPoints;
    for (int oct = 0; oct < numOctaves; oct++) {
      // number of scales in this octave.
      int scs = DoGPyramid[oct].size();
      auto Octave = DoGPyramid[oct];
      vector<vector<pair<int, int>>> keyPointsInOctave;
      // Search this octaves to get points of interest, that is extrema points
      // wrt to 26, 3d points in scale above and below current scale.
      for (int sc = 1; sc < scs - 1; sc++) {
        // In this scale, search in scale above and below the extrema.
        auto keyPtsInScale = searchInCurrentScale(sc, Octave);
        keyPointsInOctave.push_back(keyPtsInScale);
      }
      potentialKeyPoints.push_back(keyPointsInOctave);
    }
    return potentialKeyPoints;
  }



public:
  SIFT() {
    // Scale factor Chosen to be /2 = 1.414
    k = 1.414;
    numOctaves = 4;
    scalesPerOct = 5;
    initalSigma = 1;
    gKernelSize = 3;
  }

  SIFT(float userK, unsigned userNumOct, unsigned userSclsPerOct,
       float userInitalSigma, unsigned userGaussianKernalSize) {
    k = userK;
    numOctaves = userNumOct;
    scalesPerOct = userSclsPerOct;
    initalSigma = userInitalSigma;
    gKernelSize = userGaussianKernalSize;
  }
  /*
  Implmn Details:
      1) Converting Mat -> vector<vector<float>> and only dealing with image as
  a matrix floats, since static_casting back and fourth from u_char costs in
  data loss.

      2) A pyramid is a vector<Octaves> an Octave is a vector<Scales> a scale is
  an image level rep/ vector<vector<float>>

      So the
            img/scale = vector<vector<float>>
            octave = vector<vector<vector<float>>> or vector<scale>
            pyramid = vector<vector<vector<vector<float>>>> or vector<octave>

      3) Keypoints in a scale are vector<pair<int,int>>, a vector of
  co-ordinates.

  Params : (as set in default constructor)
      k = 1.414 (Scale sigma multiplier) : as suggested in sift paper

      num of octaves = 4

      num of scales per octave = 4

      gaussian kernel size = 3 : This could be a potential source of bugs, as
  this has to be ideally computed based on given sigma.

  Algorithm (Partially completed yet):

  https://www.analyticsvidhya.com/blog/2019/10/detailed-guide-powerful-sift-technique-image-matching-python/

  Following the Std algorithm as per my level of understanding so far.
      1) Compute Scale space pyramid
          Get a scale space pyramid for each octave (4 octaves), compute 4
  scales ( number of scales per octave = 4). Start with inital img, and compute
  Gaussian blurred image with initalSigma, and for the successive scales in this
  octave, compute Gaussian blur of previously blurred image with a sigma of =
  previousSigma * k, this achieves successive blurring in an octave. So we have
  4 scales in an octave. Now downsample the orignal image ( for octave = 1,
  octaves = (0 1 2 3)) get the image of half the dimensions of source image, and
  perform the above steps, and for each successive octave downsample the image
  of previous octave. Finally return the scaleSpacePyramid.

      2) Compute Difference of Gaussian Pyramid.
          Get a pyramid of Differences of Gaussians previously computed, For
  each octave, traverse the scale from (1 , 2 , 3), and compute the diff of
  scale(i) - scale(i-1). So we will be left with 3 scales in DoGPyramid. To
  compute diff of scale(i) - scale(i-1) for i = 1,2,3. Take element wise diff of
  image in scale i and image in scale i-1. Finally return this Pyramid.

      3) Compute Scale Space Extremas.
          Using The DoGPyramid For each octave (4 octaves yet),
  there are 3 scales per octave in DogPyramid (0,1,2). Now to
  generalize, say there are s scales per octave in DoGPyramid
  (0,1,2 ... s-1, s), for scales [1,s-1], for a scale s for
  every pixel in [1,rows-1][1,cols-1] range, there are 8 pixels
  around it in the current scale, and 9 pixel in the scale s+1,
  and 9 in the scale s-1. Compute the maximum in the rest of
  the 26 pixels (8 in scale s, 9 in scale s+1, 9 in scale s-1),
  I name scale s as middle, scale s+1 as upper and scale s-1 as lower. And if
  this current point value is higher or equal than the maximum, lower or equal
  than the minimum, then mark it as a potential keypoint and insert it in a list
  of points, and return it.

  */
  void computeSIFT(Mat &image, Mat &ClrCpy) {
    // get image data in float
    vector<vector<float>> img = getImageFltMatrix(image);
    vector<vector<vector<vector<float>>>> scaleSpacePyramid =
        computeScaleSpacePyramid(img);
#if DEBUG
    for (int i = 0; i < scaleSpacePyramid.size(); i++) {
      cout << "Octave = " << i << "\n";
      for (int j = 0; j < scaleSpacePyramid[i].size(); j++) {
        cout << " Scale = " << j;
        cout << " Img sizes = " << scaleSpacePyramid[i][j].size() << " x "
             << scaleSpacePyramid[i][j][0].size() << "\n";
        // plotImageMatrix(scaleSpacePyramid[i][j]);
        // logSubPatchPrint(scaleSpacePyramid[i][j]);
      }
    }
    cout << "\n";
#endif
    vector<vector<vector<vector<float>>>> DoGPyramid =
        computeDiffOfGaussianPyramid(scaleSpacePyramid);
#if DEBUG
    for (int oct = 0; oct < DoGPyramid.size(); oct++) {
      auto Octave = DoGPyramid[oct];
      cout << ">>> Diff Of Gaussian Octave = " << oct;
      cout << " Num Scales = " << Octave.size() << "\n ";
      for (int scl = 0; scl < Octave.size(); scl++) {
        auto dogImg = Octave[scl];
        plotImageMatrix(dogImg);
        logSubPatchPrint(dogImg);
#if IMGLVLDIAGNOSIS
        for (int i = 0; i < dogImg.size(); i++) {
          for (int j = 0; j < dogImg[0].size(); j++) {
            cout << dogImg[i][j] << " ";
          }
          cout << "\n";
        }
        cout << "\n\n";
#endif
      }
    }
#endif

    vector<vector<vector<pair<int, int>>>> scaleSpaceExtremas =
        findScaleSpaceExtrma(DoGPyramid);
    // this concludes inital scale space extrema detection step. Generates basic
    // keypoints which can be later pruned.
#if DEBUG
    unsigned count = 0;
    for (int oct = 0; oct < scaleSpaceExtremas.size(); oct++) {
      cout << ">>> Scale Space Extrema Octave = " << oct << "\n";
      auto Octave = scaleSpaceExtremas[oct];
      cout << " Num Scales = " << Octave.size();
      for (int sc = 0; sc < Octave.size(); sc++) {
        auto keypts = Octave[sc];
        count += keypts.size();
        cout << " Keypts in scale" << sc << " Keypts = " << keypts.size();
      }
      cout << "\n";
    }
#endif

#if !KEYPOINTPLT
    for (auto &Octave : scaleSpaceExtremas) {
      for (auto &scale : Octave) {
        plotKeyPointsInImg(ClrCpy, scale);
      }
    }
#endif
    
  } // computeSIFT()
};

} // namespace MvSIFT

int main() {
  string imPath =
      "/home/panirpal/workspace/Projects/ComputerVision/data/images/astro1.jpg";
  Mat img = imread(imPath, IMREAD_GRAYSCALE);
  Mat cimg = imread(imPath);
  if (!img.empty()) {
    displayImage(img);
    MvSIFT::SIFT().computeSIFT(img, cimg);
    displayImage(cimg);
  } else
    cerr << "image not found! exiting...";
}

SIFT 特征变换未检测到正确/不完整的尺度空间极值

问题描述投票：0回答：1

1个回答

最新问题

SIFT 特征变换未检测到正确/不完整的尺度空间极值

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1