ostringstream 性能测试

ostringstream 性能测试

分析与建议

性能分析

  1. ostringstream 格式转换的性能并不差,至少从测试上看与 snprintf 差距不是数量级上的

  2. 每次创建一个 ostringstream 对象都会触发一次全局 locale 对象的引用计数递增(参考1参考2),多线程场景下有 false sharing 的性能损耗,而且很明显

  3. 不要重复的创建与销毁 ostringstream 对象,避免 false sharing

  4. ostringstream 会在堆上创建内存,系统会优化 malloc/free,所以不必太过关注这个问题

测试结果

下面代码在我机器(4 cores,g++ -O2)上运行的结果如下,false sharing 的影响可以从线程增加但 use_oss 函数耗时未降上看出来

Thread num from 1 to 9

1: use_printf/585, use_printf_op/582, use_printf_malloc/682, use_printf_malloc_op/582, use_oss_op/658, use_oss/3115,
2: use_printf/294, use_printf_op/291, use_printf_malloc/341, use_printf_malloc_op/292, use_oss_op/329, use_oss/3752,
3: use_printf/195, use_printf_op/194, use_printf_malloc/228, use_printf_malloc_op/194, use_oss_op/219, use_oss/3616,
4: use_printf/147, use_printf_op/147, use_printf_malloc/172, use_printf_malloc_op/147, use_oss_op/166, use_oss/3636,
5: use_printf/119, use_printf_op/118, use_printf_malloc/138, use_printf_malloc_op/118, use_oss_op/135, use_oss/3721,
6: use_printf/105, use_printf_op/100, use_printf_malloc/119, use_printf_malloc_op/102, use_oss_op/113, use_oss/3791,
7: use_printf/118, use_printf_op/134, use_printf_malloc/134, use_printf_malloc_op/115, use_oss_op/126, use_oss/3778,
8: use_printf/112, use_printf_op/116, use_printf_malloc/122, use_printf_malloc_op/114, use_oss_op/137, use_oss/3791,
9: use_printf/104, use_printf_op/102, use_printf_malloc/123, use_printf_malloc_op/101, use_oss_op/124, use_oss/3792,

测试代码

#include <map>
#include <future>
#include <iostream>
#include <numeric>
#include <sstream>
#include <string.h>
#include <thread>
#include <vector>

int use_oss(int cnt)
{
    int cnt_tmp = 0;
    while (cnt--) {
        std::ostringstream oss;
        oss.str("");
        oss.clear();
        oss << "hi" << 999;
        cnt_tmp += oss.str().size();
    }

    return cnt_tmp;
}

int use_oss_op(int cnt)
{
    int cnt_tmp = 0;
    std::ostringstream oss;
    while (cnt--) {
        oss.str("");
        oss.clear();
        oss << "hi" << 999 << std::flush;
        cnt_tmp += oss.str().size();
    }

    return cnt_tmp;
}

int use_printf(int cnt)
{
    int cnt_tmp = 0;
    while (cnt--) {
        char buf[64];
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
    }

    return cnt_tmp;
}

int use_printf_op(int cnt)
{
    int cnt_tmp = 0;
    char buf[64];
    while (cnt--) {
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
    }

    return cnt_tmp;
}

int use_printf_malloc(int cnt)
{
    int cnt_tmp = 0;
    while (cnt--) {
        char* buf = (char*)malloc(64);
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
        free(buf);
    }

    return cnt_tmp;
}

int use_printf_malloc_op(int cnt)
{
    int cnt_tmp = 0;
    char* buf = (char*)malloc(64);
    while (cnt--) {
        snprintf(buf, sizeof(buf), "hi%d", 999);
        cnt_tmp += strlen(buf);
    }

    free(buf);
    return cnt_tmp;
}

// 1*2*3*4*5*6*7*8*9 = 362880
const int TOTAL_USE_CNT  = 1 * 2 * 3 * 4 * 5 * 6 * 7 * 8 * 9 * 30;
const int MAX_THREAD_NUM = 9;

int main()
{
    std::map<decltype(use_oss)*, std::string> funs;
    
    funs[use_oss]    = "use_oss";
    funs[use_oss_op] = "use_oss_op";

    funs[use_printf]           = "use_printf";
    funs[use_printf_op]        = "use_printf_op";
    funs[use_printf_malloc]    = "use_printf_malloc";
    funs[use_printf_malloc_op] = "use_printf_malloc_op";

    std::cout << "Thread num from 1 to " << MAX_THREAD_NUM << std::endl << std::endl;
    for (int t = 1; t <= MAX_THREAD_NUM; t++) {
        int USE_CNT_PER_THREAD = TOTAL_USE_CNT / t;

        std::cout << t <<": ";
        size_t acc = 0;
        for (auto p : funs) {

            auto f = p.first;
            std::vector<std::future<int>> future_vec;
            future_vec.reserve(11);
            auto ms_begin = std::chrono::steady_clock::now();
            for (int i = 0; i < t; i++) {
                future_vec.push_back(std::async(std::launch::async,
                    [USE_CNT_PER_THREAD,f]() -> int { return f(USE_CNT_PER_THREAD); }));
            }

            for (auto& f : future_vec) {
                f.wait();
                acc += f.get();
            }
            auto ms_end = std::chrono::steady_clock::now();
            auto mc_s = std::chrono::duration_cast<std::chrono::milliseconds>(ms_end - ms_begin);
            std::cout << funs[f] << "/" << mc_s.count() << ", ";
        }
        std::cout << std::endl;
    }
}
上一篇:阿里云弹性高性能计算产品商业化正式发布


下一篇:类EF框架Chloe.ORM升级:只为更完美