TopK的问题在面试的时候经常出现,下面根据参考博客的理论来实现:
参考:
【1】拜托,面试别再问我TopK了!!!
【2】Leetcode 215. 数组中的第K个最大元素
代码实现:
#include <iostream>
#include <vector>
#include <queue>
#include <ctime>
#include <random>
#include <chrono>
#include <functional>
#include <queue>
#include <cassert>
using namespace std;
using FuncPtr = std::function<void(vector<int>&, int)>;
/// 随机生成n个数
void GenerateData(vector<int>& vec, int n) {
vec.resize(n);
srand((unsigned int)time(0));
for (int i = 0; i < n; ++i) {
vec[i] = rand();
}
}
///
/// 全排: 直接用sort
/// 时间复杂度: O(n*logn)
///
void AllSort(vector<int>& vec, int k) {
assert(vec.size() >= k);
sort(vec.begin(), vec.end(), std::greater<int>());
for (int i = 0; i < k; ++i) {
cout << vec[i] << " ";
}
cout << endl;
}
///
/// 局部排序:用冒泡排topk即可(只排topk个)
/// 时间复杂度: O(n*k)
///
void BubblePartSort(vector<int>& vec, int k) {
assert(vec.size() >= k);
int size = vec.size();
for (int i = 0; i < k; ++i) {
for (int j = 0; j < size - i - 1; ++j) {
if (vec[j] > vec[j + 1]) {
swap(vec[j], vec[j + 1]);
}
}
}
for (int i = size - 1; i >= size - k; --i) {
cout << vec[i] << " ";
}
cout << endl;
}
///
/// 堆排序:topk也不排序了
/// 时间复杂度: O(n*logk)
///
void HeapPartSort(vector<int>& vec, int k) {
assert(vec.size() >= k);
int size = vec.size();
priority_queue<int, vector<int>, greater<int>> pque(vec.begin(), vec.begin() + k);
for (int i = k; i < size; ++i) {
if (pque.top() < vec[i]) {
pque.pop();
pque.push(vec[i]);
}
}
while (!pque.empty()) {
cout << pque.top() << " ";
pque.pop();
}
cout << endl;
}
///
/// 堆排序:一趟快排, 一次之后,pivot左边的值全部小于vec[pivot],右边的值全部大于vec[pivot]
/// 时间复杂度: O(n)
/// 参考:Leetcode 215. 数组中的第K个最大元素
///
int Partition(vector<int>& vec, int left, int right) {
int base = vec[left];
while (left < right) {
// 从右向左找第一个小于base的值
while (left < right && vec[right] >= base) --right;
// 把它放到左边
vec[left] = vec[right];
// 从左向右找第一个大于base的值
while (left < right && vec[left] <= base) ++left;
// 把它放到右边
vec[right] = vec[left];
}
vec[left] = base;
return left;
}
int QuickSelect(vector<int>& vec, int left, int right, int index) {
int pivot = Partition(vec, left, right);
if (pivot == index) {
return vec[pivot];
} else if (pivot < index) {
return QuickSelect(vec, pivot + 1, right, index);
} else {
return QuickSelect(vec, left, pivot - 1, index);
}
}
/// 寻找第k大的数
int FindKthLargest(vector<int>& vec, int k) {
return QuickSelect(vec, 0, vec.size() - 1, vec.size() - k);
}
/// 经过快排后,第k大的数一定在第k个位置上,其右边的数都大于它
void PartitionSort(vector<int>& vec, int k) {
int topk = FindKthLargest(vec, k);
int size = vec.size();
for (int i = size - 1; i >= size - k; --i) {
cout << vec[i] << " ";
}
cout << endl;
}
void OnExecuteFunc(const vector<int>& vec, int k, FuncPtr func, const std::string& method_name) {
// 复制数组
vector<int> ary(vec.begin(), vec.end());
// 计时开始
auto start = std::chrono::steady_clock::now();
// 调用排序函数
func(ary, k);
// 结束计时
auto end = std::chrono::steady_clock::now();
std::chrono::duration<double, std::micro> elapsed = end - start;
printf("Method: %s\tTime: %lfs\n", method_name.c_str(), static_cast<double>(elapsed.count()) / 1'000'000);
}
int main() {
vector<int> vec;
int n = 1'000'000;
int k = 10;
GenerateData(vec, n);
auto fn = std::bind(OnExecuteFunc, std::cref(vec), k, std::placeholders::_1, std::placeholders::_2);
fn(AllSort, "直接排序");
fn(BubblePartSort, "局部冒泡");
fn(HeapPartSort, "局部堆排");
fn(PartitionSort, "快排");
return 0;
}
测试结果: