ostringstream 性能测试
分析与建议
性能分析
-
ostringstream 格式转换的性能并不差,至少从测试上看与 snprintf 差距不是数量级上的
-
每次创建一个 ostringstream 对象都会触发一次全局 locale 对象的引用计数递增(参考1,参考2),多线程场景下有 false sharing 的性能损耗,而且很明显
-
不要重复的创建与销毁 ostringstream 对象,避免 false sharing
-
ostringstream 会在堆上创建内存,系统会优化 malloc/free,所以不必太过关注这个问题
测试结果
下面代码在我机器(4 cores,g++ -O2)上运行的结果如下,false sharing 的影响可以从线程增加但 use_oss
函数耗时未降上看出来
Thread num from 1 to 9
1: use_printf/585, use_printf_op/582, use_printf_malloc/682, use_printf_malloc_op/582, use_oss_op/658, use_oss/3115,
2: use_printf/294, use_printf_op/291, use_printf_malloc/341, use_printf_malloc_op/292, use_oss_op/329, use_oss/3752,
3: use_printf/195, use_printf_op/194, use_printf_malloc/228, use_printf_malloc_op/194, use_oss_op/219, use_oss/3616,
4: use_printf/147, use_printf_op/147, use_printf_malloc/172, use_printf_malloc_op/147, use_oss_op/166, use_oss/3636,
5: use_printf/119, use_printf_op/118, use_printf_malloc/138, use_printf_malloc_op/118, use_oss_op/135, use_oss/3721,
6: use_printf/105, use_printf_op/100, use_printf_malloc/119, use_printf_malloc_op/102, use_oss_op/113, use_oss/3791,
7: use_printf/118, use_printf_op/134, use_printf_malloc/134, use_printf_malloc_op/115, use_oss_op/126, use_oss/3778,
8: use_printf/112, use_printf_op/116, use_printf_malloc/122, use_printf_malloc_op/114, use_oss_op/137, use_oss/3791,
9: use_printf/104, use_printf_op/102, use_printf_malloc/123, use_printf_malloc_op/101, use_oss_op/124, use_oss/3792,
测试代码
#include <map>
#include <future>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string.h>
#include <thread>
#include <vector>
int use_oss(int cnt)
{
int cnt_tmp = 0;
while (cnt--) {
std::ostringstream oss;
oss.str("");
oss.clear();
oss << "hi" << 999;
cnt_tmp += oss.str().size();
}
return cnt_tmp;
}
int use_oss_op(int cnt)
{
int cnt_tmp = 0;
std::ostringstream oss;
while (cnt--) {
oss.str("");
oss.clear();
oss << "hi" << 999 << std::flush;
cnt_tmp += oss.str().size();
}
return cnt_tmp;
}
int use_printf(int cnt)
{
int cnt_tmp = 0;
while (cnt--) {
char buf[64];
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
}
return cnt_tmp;
}
int use_printf_op(int cnt)
{
int cnt_tmp = 0;
char buf[64];
while (cnt--) {
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
}
return cnt_tmp;
}
int use_printf_malloc(int cnt)
{
int cnt_tmp = 0;
while (cnt--) {
char* buf = (char*)malloc(64);
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
free(buf);
}
return cnt_tmp;
}
int use_printf_malloc_op(int cnt)
{
int cnt_tmp = 0;
char* buf = (char*)malloc(64);
while (cnt--) {
snprintf(buf, sizeof(buf), "hi%d", 999);
cnt_tmp += strlen(buf);
}
free(buf);
return cnt_tmp;
}
// 1*2*3*4*5*6*7*8*9 = 362880
const int TOTAL_USE_CNT = 1 * 2 * 3 * 4 * 5 * 6 * 7 * 8 * 9 * 30;
const int MAX_THREAD_NUM = 9;
int main()
{
std::map<decltype(use_oss)*, std::string> funs;
funs[use_oss] = "use_oss";
funs[use_oss_op] = "use_oss_op";
funs[use_printf] = "use_printf";
funs[use_printf_op] = "use_printf_op";
funs[use_printf_malloc] = "use_printf_malloc";
funs[use_printf_malloc_op] = "use_printf_malloc_op";
std::cout << "Thread num from 1 to " << MAX_THREAD_NUM << std::endl << std::endl;
for (int t = 1; t <= MAX_THREAD_NUM; t++) {
int USE_CNT_PER_THREAD = TOTAL_USE_CNT / t;
std::cout << t <<": ";
size_t acc = 0;
for (auto p : funs) {
auto f = p.first;
std::vector<std::future<int>> future_vec;
future_vec.reserve(11);
auto ms_begin = std::chrono::steady_clock::now();
for (int i = 0; i < t; i++) {
future_vec.push_back(std::async(std::launch::async,
[USE_CNT_PER_THREAD,f]() -> int { return f(USE_CNT_PER_THREAD); }));
}
for (auto& f : future_vec) {
f.wait();
acc += f.get();
}
auto ms_end = std::chrono::steady_clock::now();
auto mc_s = std::chrono::duration_cast<std::chrono::milliseconds>(ms_end - ms_begin);
std::cout << funs[f] << "/" << mc_s.count() << ", ";
}
std::cout << std::endl;
}
}