早期的RCNN目标检测算法中使用selective search算法来产生region proposals.
该算法的大致流程如下:
1、利用图分割算法将图像分成不同的区域,并为这些区域产生相应的包围框
2、计算包围框之间的相似度,相似度可以分为四个部分(颜色相似度、纹理相似度和大小,相交约束),合并相近的包围框
3、排除过大、过小、长宽比过大、过小的包围框即可
参考代码:https://github.com/tttanikawa/selective-search-cpp
#pragma once
#include <algorithm>
#include <array>
#include <cassert>
#include <chrono>
#include <cmath>
#include <iostream>
#include <iterator>
#include <map>
#include <memory>
#include <opencv2/core.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/ml.hpp>
#include <opencv2/objdetect.hpp>
#include <random>
#include <unordered_map>
#include <vector>
namespace std
{
template <>
class hash<std::pair<int, int>>
{
public:
std::size_t operator()(const std::pair<int, int> &x) const
{
return hash<int>()(x.first) ^ hash<int>()(x.second);
}
};
} // namespace std
namespace ss
{
inline double square(double a)
{
return a * a;
}
inline double diff(const cv::Mat &img, int x1, int y1, int x2, int y2)
{
return sqrt(square(img.at<cv::Vec3f>(y1, x1)[0] - img.at<cv::Vec3f>(y2, x2)[0]) +
square(img.at<cv::Vec3f>(y1, x1)[1] - img.at<cv::Vec3f>(y2, x2)[1]) +
square(img.at<cv::Vec3f>(y1, x1)[2] - img.at<cv::Vec3f>(y2, x2)[2]));
}
struct UniverseElement
{
int rank;
int p;
int size;
UniverseElement() : rank(0), size(1), p(0) {}
UniverseElement(int rank, int size, int p) : rank(rank), size(size), p(p) {}
bool operator==(const UniverseElement &other) const
{
return rank == other.rank && p == other.p && size == other.size;
}
};
class Universe
{
private:
std::vector<UniverseElement> elements;
int num;
public:
Universe(int num) : num(num)
{
elements.reserve(num);
for (int i = 0; i < num; i++)
{
elements.emplace_back(0, 1, i);
}
}
~Universe() {}
int findFast(int x)
{
return elements[x].p;
}
int find(int x)
{
int y = x;
while (y != elements[y].p)
{
y = elements[y].p;
}
elements[x].p = y;
return y;
}
void join(int x, int y)
{
if (elements[x].rank > elements[y].rank)
{
elements[y].p = x;
elements[x].size += elements[y].size;
}
else
{
elements[x].p = y;
elements[y].size += elements[x].size;
if (elements[x].rank == elements[y].rank)
{
elements[y].rank++;
}
}
num--;
}
int size(int x) const { return elements[x].size; }
int numSets() const { return num; }
};
struct edge
{
int a;
int b;
double w;
};
bool operator<(const edge &a, const edge &b)
{
return a.w < b.w;
}
inline double calThreshold(int size, double scale)
{
return scale / size;
}
std::shared_ptr<Universe> segmentGraph(int numVertices, int numEdges, std::vector<edge> &edges, double scale)
{
std::sort(edges.begin(), edges.end());
auto universe = std::make_shared<Universe>(numVertices);
std::vector<double> threshold(numVertices, scale);
for (auto &pedge : edges)
{
int a = universe->find(pedge.a);
int b = universe->find(pedge.b);
if (a != b)
{
if ((pedge.w <= threshold[a]) && (pedge.w <= threshold[b]))
{
universe->join(a, b);
a = universe->find(a);
threshold[a] = pedge.w + calThreshold(universe->size(a), scale);
}
}
}
return universe;
}
// image segmentation using "Efficient Graph-Based Image Segmentation" // 类似并查集
std::shared_ptr<Universe> segmentation(const cv::Mat &img, double scale, double sigma, int minSize)
{
const int width = img.cols;
const int height = img.rows;
cv::Mat imgF;
img.convertTo(imgF, CV_32FC3);
cv::Mat blurred;
cv::GaussianBlur(imgF, blurred, cv::Size(5, 5), sigma);
std::vector<edge> edges(width * height * 4);
int num = 0;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
if (x < width - 1)
{
edges[num].a = y * width + x;
edges[num].b = y * width + (x + 1);
edges[num].w = diff(blurred, x, y, x + 1, y);
num++;
}
if (y < height - 1)
{
edges[num].a = y * width + x;
edges[num].b = (y + 1) * width + x;
edges[num].w = diff(blurred, x, y, x, y + 1);
num++;
}
if ((x < width - 1) && (y < height - 1))
{
edges[num].a = y * width + x;
edges[num].b = (y + 1) * width + (x + 1);
edges[num].w = diff(blurred, x, y, x + 1, y + 1);
num++;
}
if ((x < width - 1) && (y > 0))
{
edges[num].a = y * width + x;
edges[num].b = (y - 1) * width + (x + 1);
edges[num].w = diff(blurred, x, y, x + 1, y - 1);
num++;
}
}
}
edges.erase(edges.begin() + num, edges.end());
auto universe = segmentGraph(width * height, num, edges, scale);
for (int i = 0; i < num; i++)
{
int a = universe->find(edges[i].a);
int b = universe->find(edges[i].b);
if ((a != b) && ((universe->size(a) < minSize) || (universe->size(b) < minSize)))
{
universe->join(a, b);
}
}
return universe;
}
void visualize(const cv::Mat &img, std::shared_ptr<Universe> universe)
{
const int height = img.rows;
const int width = img.cols;
std::vector<cv::Vec3b> colors;
cv::Mat segmentated(height, width, CV_8UC3);
std::random_device rnd;
std::mt19937 mt(rnd());
std::uniform_int_distribution<> rand256(0, 255);
for (int i = 0; i < height * width; i++)
{
cv::Vec3b color(rand256(mt), rand256(mt), rand256(mt));
colors.push_back(color);
}
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
segmentated.at<cv::Vec3b>(y, x) = colors[universe->findFast(y * width + x)];
}
}
cv::imshow("Initial Segmentation Result", segmentated);
cv::waitKey(1);
}
struct Region
{
int size;
cv::Rect rect;
std::vector<int> labels;
std::vector<float> colourHist;
std::vector<float> textureHist;
std::vector<cv::Vec2i> points;
Region() {}
Region(const cv::Rect &rect, int label) : rect(rect)
{
labels.push_back(label);
}
Region(
const cv::Rect &rect, int size, const std::vector<float> &&colourHist, const std::vector<float> &&textureHist, const std::vector<int> &&labels)
: rect(rect), size(size), colourHist(std::move(colourHist)), textureHist(std::move(textureHist)), labels(std::move(labels)) {}
Region &operator=(const Region ®ion) = default;
Region &operator=(Region &®ion) noexcept
{
if (this != ®ion)
{
this->size = region.size;
this->rect = region.rect;
this->labels = std::move(region.labels);
this->colourHist = std::move(region.colourHist);
this->textureHist = std::move(region.textureHist);
}
return *this;
}
Region(Region &®ion) noexcept
{
*this = std::move(region);
}
};
std::shared_ptr<Universe> generateSegments(const cv::Mat &img, double scale, double sigma, int minSize)
{
auto universe = segmentation(img, scale, sigma, minSize);
//visualize(img, universe);
return universe;
}
double calcSimOfColour(const Region &r1, const Region &r2)
{
assert(r1.colourHist.size() == r2.colourHist.size());
float sum = 0.0;
for (auto i1 = r1.colourHist.cbegin(), i2 = r2.colourHist.cbegin(); i1 != r1.colourHist.cend(); i1++, i2++)
{
sum += std::min(*i1, *i2);
}
return sum;
}
double calcSimOfTexture(const Region &r1, const Region &r2)
{
assert(r1.colourHist.size() == r2.colourHist.size());
double sum = 0.0;
// cbegin, cend返回const迭代器
for (auto i1 = r1.textureHist.cbegin(), i2 = r2.textureHist.cbegin(); i1 != r1.textureHist.cend(); i1++, i2++)
{
sum += std::min(*i1, *i2);
}
return sum;
}
inline double calcSimOfSize(const Region &r1, const Region &r2, int imSize)
{
return (1.0 - (double)(r1.size + r2.size) / imSize);// 降低两个较大的矩形合并的概率
}
inline double calcSimOfRect(const Region &r1, const Region &r2, int imSize)
{
return (1.0 - (double)((r1.rect | r2.rect).area() - r1.size - r2.size) / imSize); //相交面积越大,相似度越高
}
inline double calcSimilarity(const Region &r1, const Region &r2, int imSize)
{
return (calcSimOfColour(r1, r2) + calcSimOfTexture(r1, r2) + calcSimOfSize(r1, r2, imSize) + calcSimOfRect(r1, r2, imSize)); // 相似度包括颜色、纹理、size、rect
}
void calcColourHist(const cv::Mat &img, std::shared_ptr<Universe> universe, int label, Region ®ion)
{
std::array<std::vector<unsigned char>, 3> hsv;
for (auto &e : hsv)
{
e.reserve(region.points.size());
}
for (cv::Vec2i point : region.points)
{
for (int channel = 0; channel < 3; channel++)
{
hsv[channel].push_back(img.at<cv::Vec3b>(point[0], point[1])[channel]);
}
}
int channels[] = {0};
const int bins = 25;
int histSize[] = {bins};
float range[] = {0, 256};
const float *ranges[] = {range};
for (int channel = 0; channel < 3; channel++)
{
cv::Mat hist;
cv::Mat input(hsv[channel]);
cv::calcHist(&input, 1, channels, cv::Mat(), hist, 1, histSize, ranges, true, false);
cv::normalize(hist, hist, 1.0, 0.0, cv::NORM_L1);
std::vector<float> histogram;
hist.copyTo(histogram);
if (region.colourHist.empty())
{
region.colourHist = std::move(histogram);
}
else
{
std::copy(histogram.begin(), histogram.end(), std::back_inserter(region.colourHist));
}
}
}
cv::Mat calcTextureGradient(const cv::Mat &img)
{
cv::Mat sobelX, sobelY;
cv::Sobel(img, sobelX, CV_32F, 1, 0);
cv::Sobel(img, sobelY, CV_32F, 0, 1);
cv::Mat magnitude, angle;
cv::cartToPolar(sobelX, sobelY, magnitude, angle, true);
return angle;
}
// 统计一阶sobel算子的方向角直方图
void calcTextureHist(const cv::Mat &img, const cv::Mat &gradient, std::shared_ptr<Universe> universe, int label, Region ®ion)
{
const int orientations = 8;
std::array<std::array<std::vector<unsigned char>, orientations>, 3> intensity;
for (auto &e : intensity)
{
for (auto &ee : e)
{
ee.reserve(region.points.size());
}
}
for (cv::Vec2i point : region.points)
{
for (int channel = 0; channel < 3; channel++)
{
int angle = (int)(gradient.at<cv::Vec3f>(point[0], point[1])[channel] / 22.5) % orientations;
intensity[channel][angle].push_back(img.at<cv::Vec3b>(point[0], point[1])[channel]);
}
}
int channels[] = {0};
const int bins = 10;
int histSize[] = {bins};
float range[] = {0, 256};
const float *ranges[] = {range};
for (int channel = 0; channel < 3; channel++)
{
for (int angle = 0; angle < orientations; angle++)
{
cv::Mat hist;
cv::Mat input(intensity[channel][angle]);
cv::calcHist(&input, 1, channels, cv::Mat(), hist, 1, histSize, ranges, true, false);
cv::normalize(hist, hist, 1.0, 0.0, cv::NORM_L1);
std::vector<float> histogram;
hist.copyTo(histogram);
if (region.textureHist.empty())
{
region.textureHist = std::move(histogram);
}
else
{
std::copy(histogram.begin(), histogram.end(), std::back_inserter(region.textureHist));
}
}
}
}
std::map<int, Region> extractRegions(const cv::Mat &img, std::shared_ptr<Universe> universe)
{
std::map<int, Region> R;
for (int y = 0; y < img.rows; y++)
{
for (int x = 0; x < img.cols; x++)
{
int label = universe->findFast(y * img.cols + x);
if (R.find(label) == R.end())
{
R[label] = Region(cv::Rect(100000, 100000, 0, 0), label);
}
Region ®ion = R[label];
if (region.rect.x > x)
{
region.rect.x = x;
}
if (region.rect.y > y)
{
region.rect.y = y;
}
if (region.rect.br().x < x)
{
region.rect.width = x - region.rect.x + 1;
}
if (region.rect.br().y < y)
{
region.rect.height = y - region.rect.y + 1;
}
region.points.push_back(cv::Vec2i(y, x));
}
}
cv::Mat gradient = calcTextureGradient(img);
cv::Mat hsv;
cv::cvtColor(img, hsv, cv::COLOR_BGR2HSV);
for (auto &labelRegion : R)
{
labelRegion.second.size = labelRegion.second.points.size();
calcColourHist(hsv, universe, labelRegion.first, labelRegion.second);
calcTextureHist(img, gradient, universe, labelRegion.first, labelRegion.second);
}
return R;
}
inline bool isIntersecting(const Region &a, const Region &b)
{
return ((a.rect & b.rect).area() != 0);
}
using LabelRegion = std::pair<int, Region>;
using Neighbour = std::pair<int, int>;
std::vector<Neighbour> extractNeighbours(const std::map<int, Region> &R)
{
std::vector<Neighbour> neighbours;
neighbours.reserve(R.size() * (R.size() - 1) / 2);
for (auto a = R.cbegin(); a != R.cend(); a++)
{
auto tmp = a;
tmp++;
for (auto b = tmp; b != R.cend(); b++)
{
if (isIntersecting(a->second, b->second))
{
neighbours.push_back(std::make_pair(std::min(a->first, b->first), std::max(a->first, b->first)));
}
}
}
return neighbours;
}
std::vector<float> merge(const std::vector<float> &a, const std::vector<float> &b, int asize, int bsize)
{
std::vector<float> newVector;
newVector.reserve(a.size());
for (auto ai = a.begin(), bi = b.begin(); ai != a.end(); ai++, bi++)
{
newVector.push_back(((*ai) * asize + (*bi) * bsize) / (asize + bsize));
}
return newVector;
};
Region mergeRegions(const Region &r1, const Region &r2)
{
assert(r1.colourHist.size() == r2.colourHist.size());
assert(r1.textureHist.size() == r2.textureHist.size());
int newSize = r1.size + r2.size;
std::vector<int> newLabels(r1.labels);
std::copy(r2.labels.begin(), r2.labels.end(), std::back_inserter(newLabels));
return Region(r1.rect | r2.rect,
newSize,
std::move(merge(r1.colourHist, r2.colourHist, r1.size, r2.size)),
std::move(merge(r1.textureHist, r2.textureHist, r1.size, r2.size)),
std::move(newLabels));
}
std::vector<cv::Rect> selectiveSearch(const cv::Mat &img, double scale = 1.0, double sigma = 0.8, int minSize = 50, int smallest = 1000, int largest = 270000, double distorted = 5.0)
{
assert(img.channels() == 3);
auto universe = generateSegments(img, scale, sigma, minSize);
int imgSize = img.total();
auto R = extractRegions(img, universe); // 先通过采样获得regions
auto neighbours = extractNeighbours(R); // 为region提取neighbours, 保存为vector<pair<int, int>>
std::unordered_map<std::pair<int, int>, double> S;
for (auto &n : neighbours)
{
S[n] = calcSimilarity(R[n.first], R[n.second], imgSize); // 为每个pair 计算相似度
}
using NeighbourSim = std::pair<std::pair<int, int>, double>;
while (!S.empty())
{
auto cmp = [](const NeighbourSim &a, const NeighbourSim &b)
{ return a.second < b.second; };
auto m = std::max_element(S.begin(), S.end(), cmp); //找到最大相似度的
//if(m->second<10) break; // 这里为啥不把相似度太低的直接排除
int i = m->first.first;
int j = m->first.second;
auto ij = std::make_pair(i, j);
int t = R.rbegin()->first + 1;// 最大序号加1
R[t] = mergeRegions(R[i], R[j]);// 保存region i + region j
std::vector<std::pair<int, int>> keyToDelete;
for (auto &s : S)
{
auto key = s.first;
if ((i == key.first) || (i == key.second) || (j == key.first) || (j == key.second))
{
keyToDelete.push_back(key); // 所有i,j相关的区域都加进去
}
}
for (auto &key : keyToDelete)
{
S.erase(key);
if (key == ij)
{
continue;
}
int n = (key.first == i || key.first == j) ? key.second : key.first;// n 除i,j外的其他
S[std::make_pair(n, t)] = calcSimilarity(R[n], R[t], imgSize); // 重新计算相似度, S的大小每次减1,知道为0?
}
}
std::vector<cv::Rect> proposals;
proposals.reserve(R.size());
for (auto &r : R)
{
// exclude same rectangle (with different segments)
if (std::find(proposals.begin(), proposals.end(), r.second.rect) != proposals.end())
{
continue;
}
// exclude regions that is smaller/larger than assigned size
if (r.second.size < smallest || r.second.size > largest) //
{
continue;
}
double w = r.second.rect.width;
double h = r.second.rect.height;
// exclude distorted rects
if ((w / h > distorted) || (h / w > distorted))
{
continue;
}
proposals.push_back(r.second.rect);
}
return proposals;
}
} // namespace ss
test:
#include "selective_search.hpp"
#include <opencv2/opencv.hpp>
int main(int argc, char** argv) {
std::string fileName = "./deer.jpg";
cv::Mat img = cv::imread(fileName, cv::IMREAD_COLOR);
// selective search
auto proposals = ss::selectiveSearch(img, 500, 0.8, 50, 20000, 100000, 2.5);
// do something...
for (auto&& rect : proposals) {
cv::rectangle(img, rect, cv::Scalar(0, 255, 0), 3, 8);
}
cv::imwrite("./result.jpg", img);
cv::imshow("result", img);
cv::waitKey(0);
return 0;
}
makefile:
CC = g++
CFLAGS = -g -Wall
SRCS = test.cpp
PROG = test
all: $(PROG)
clean:
$(RM) $(PROG)
OPENCV = `pkg-config opencv --cflags --libs`
LIBS = $(OPENCV)
$(PROG):$(SRCS)
$(CC) $(CFLAGS) -o $(PROG) $(SRCS) $(LIBS)
图分割结果:
最后产生的包围框:
图分割和其他部分的耗时对比:
1158397: 815254