利用用户级线程提高多线程应用的性能

该博文转载自:http://www.cnblogs.com/sniperHW/archive/2012/04/02/2429634.html

随着处理器往多核的发展,多线程被越来越多的应用到软件的开发中。但是如果没有正确的使用多线程,反而可能会导致软件性能的下降。

多线程程序中一个影响程序性能的因素就是同步。对于windows系统来说,最快的同步方案就是critical_section,critical_section基本上可以被认为是一个用户态的同步机制(特别是设定了spincount,只有在自旋超过了spincount次之后任然不能获得锁,才会切入核心态并把当前线程阻塞).但即使是这样,如果在临界区中的代码如果处理时间比较长,任然会导致处理器浪费在自旋上。如果我们可以让线程在无法获得锁的时候就切换线程(当然是在用户态,切换核心态线程的代价很大,除了进入核心态的开销,还有因为线程切换而导致缓存失效带来的代价)那么就可以把浪费在自旋上的cpu时间用来做有用的工作了。

 

下面介绍一种利用用户态线程的多线程解决方案,首先,创建跟cpu数量一致的线程,每个线程上将会运行一个用户级线程调度器。

所有的业务处理都交给用户级线程处理,每当用户级线程无法获得锁时,就将自己阻塞并回到调度器中,由调度器选择另一个用户级线程来运行。当一个用户级线程释放锁的时候,会唤醒一个阻塞在这个锁上的用户级线程。当然,因为用户级线程是没有时间片控制的,如果在里面处理耗时的代码,将会导致在同一调度器上运行的其它用户级线程无法运行。

 

下面是代码:

首先是一个lockfree队列,队列是线程安全的,并且无需任何锁

lockfree_queue.h

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#ifndef _LOCKFREE_QUEUE_H
#define _LOCKFREE_QUEUE_H
template <typename T>
struct _node
{
T val;
_node<T> *_next;
};
template <typename T>
class LockFreeQueue
{
public:
LockFreeQueue():_head(0){}
//在队列头插入一个节点
void push(_node<T> *newnode)
{
while(1)
{
_node<T> *lhead = _head;//本地保存
newnode->_next = lhead;
//成功就退出,失败就重做
if(InterlockedCompareExchangePointer((volatile PVOID *)&_head,newnode,lhead) == lhead)
break;
}
}

//从队列头弹出一个节点
_node<T>* pop()
{
while(1)
{
_node<T> *lhead = _head;//本地保存
if(!lhead)
return NULL;
_node<T> *ret = _head;
if(InterlockedCompareExchangePointer((volatile PVOID *)&_head,_head->_next,lhead) == lhead)
{
ret->_next = NULL;
return ret;
}
}
}
private:
_node<T> *_head;
};
#endif
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

用户级线程

uthread.h

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#ifndef _UTHREAD_H
#define _UTHREAD_H
#include <Windows.h>
#include "lockfree_queue.h"
//#include "luaWrapper.h"
enum
{
NONE,
WAIT4EVENT = 1, //等待某事件的来临
DEAD, //纤程已死亡
ACTIVED, //可运行的
UNACTIVED, //不可被添加到调度队列中
YIELD,
SLEEP,
};
enum
{
BS_MOV = 0,
BS_ATK,
BS_OTHER,
BS_WAIT4LOCK,
BS_END,
};
//阻塞结构
class BlockStruct
{
public:
BlockStruct(uChar type=BS_OTHER):bs_type(type){}
//返回true纤程将从block中返回
virtual bool WakeUp() = 0;
uChar bs_type;
};
typedef int uthread_t;
class uthread;
class runnable
{
public:
virtual void main_routine() = 0;
};
class uthread;
struct st_timeout
{
st_timeout(uthread *ut):ut(ut),_timeout(0),index(0){}
bool operator < (st_timeout &r)
{
return _timeout < r._timeout;
}
uLong _timeout;
uthread *ut;
int index;//在超时队列中的下标
private:
st_timeout & operator = (const st_timeout &other);
st_timeout(const st_timeout &other);
};
class Scheduler;
class uthread;
struct ulstruct
{
void *lock_addr;
uthread *ut;
};
//纤程
class uthread
{
public:
uthread(Scheduler *sc):m_runnable(0),m_bs(0),uthread_id(-1),m_status(NONE),_st_timeout(this),p_uthreadContext(0),m_next(0),wakeuptick(0),m_scheduler(sc)
{
m_unlockevent = (_node<ulstruct>*)_aligned_malloc(sizeof(*m_unlockevent),4);
m_locknode = (_node<uthread*>*)_aligned_malloc(sizeof(*m_locknode),4);
m_locknode->val = this;
m_unlockevent->val.ut = this;
}
static void WINAPI fiber_routine(LPVOID pvParam);

Scheduler *GetScheduler()
{
return m_scheduler;
}
//有事件到达,尝试唤醒block的纤程
void Signal();

void SetStatus(unsigned char st)
{
m_status = st;
}
unsigned char GetStatus()
{
return m_status;
}
PVOID GetUContext()
{
return p_uthreadContext;
}
void SetUContext(PVOID uct)
{
p_uthreadContext = uct;
}
void SetBs(BlockStruct *bs)
{
m_bs = bs;
}
BlockStruct *GetBs()
{
return m_bs;
}
void SetRunnable(runnable *ra)
{
m_runnable = ra;
}
runnable *GetRunnable()
{
return m_runnable;
}

st_timeout &GetTimeoutSt()
{
return _st_timeout;
}

uthread_t GetUid()
{
return uthread_id;
}
void SetUid(uthread_t uid)
{
uthread_id = uid;
}
uthread *Next()
{
return m_next;
}
void SetNext(uthread *ut)
{
m_next = ut;
}
uLong wakeuptick;

_node<ulstruct>* GetUnlockEvent()
{
return m_unlockevent;
}
_node<uthread*>* GetLockNode()
{
return m_locknode;
}

private:
unsigned char m_status;

uthread_t uthread_id;

PVOID p_uthreadContext;
BlockStruct *m_bs;
runnable *m_runnable;
uthread *m_next;
st_timeout _st_timeout;
_node<ulstruct>* m_unlockevent;
_node<uthread*>* m_locknode;
Scheduler *m_scheduler;
};
#endif
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

uthread.cpp

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#include "stdafx.h"
#include "uthread.h"
#include "fiberApi.h"
#include <assert.h>
#include <iostream>
#include "ulock.h"
void WINAPI uthread::fiber_routine(LPVOID pvParam)
{
uthread *_uthread = (uthread*)pvParam;
while(1)
{
assert(_uthread->m_runnable);
std::cout << "Ai Start,threadid :" << _uthread->uthread_id << std::endl;
_uthread->m_runnable->main_routine();
std::cout << "Ai Stop" << std::endl;
_uthread->m_runnable = 0;
//从可运行队列中删除
//Scheduler::m_uthreads[Scheduler::m_curuid]->m_status = UNACTIVED;
//SetCurrentUthreadState(UNACTIVED);

//Scheduler::ReleaseUthread(Scheduler::m_curuid);

ReleaseCurrentUthread();
//Scheduler::_Yield(UNACTIVED);
_Yield(UNACTIVED);
}

//Scheduler::m_uthreads[Scheduler::m_curuid]->m_status = DEAD;
//SetCurrentUthreadState(DEAD);
/*这里不能直接退出纤程运行函数,否则会导致运行线程的退出,
* 正确的做法是把运行权交回给scheduler,由scheduler来删除
* 这个纤程
*/
//Scheduler::_Yield(DEAD);
_Yield(DEAD);
}
//等待的事件到达了,将纤程重新插入到可运行队列中
void uthread::Signal()
{
if(m_bs->WakeUp())
{
//printf("满足唤醒条件 %d /n",this->GetUid());
//等待的条件满足了,把fiber置为可运行态并添加到运行队列中
//Scheduler::Add2Active(this);
Add2Active(this);
m_bs = 0;
wakeuptick = 0;
}
}
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

然后是用户态的锁

uLock.h

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#ifndef _ULOCK_H
#define _ULOCK_H
#pragma pack(push)
#pragma pack(4)
#include "fiberApi.h"
#include "lockfree_queue.h"
class Scheduler;
//纤程间使用的用户级锁
struct umutex
{
friend class Scheduler;
public:
umutex():flag(0){}
void Lock()
{
if(InterlockedCompareExchange(&flag,1,0) == 1)
{

uthread *currentUThread = GetCurrentUThread();
_node<uthread*> *tmp = currentUThread->GetLockNode();
m_blockthread.push(tmp);
//加锁失败,阻塞当前纤程
Wait4Lock();
}
}
void UnLock()
{
if(InterlockedCompareExchange(&flag,0,1) == 0)
{
//没有lock
return;
}
//已经解锁,唤醒阻塞在这个锁上的纤程
_node<uthread*> *tmp = m_blockthread.pop();
if(tmp)
{
NotifyUnLock(this,tmp->val);
}
}
private:
bool _Lock(uthread *ut)
{
bool ret = InterlockedCompareExchange(&flag,1,0) == 0;
if(!ret)
{
//uthread *currentUThread = GetCurrentUThread();
_node<uthread*> *tmp = ut->GetLockNode();
m_blockthread.push(tmp);
}
return ret;
}
private:
volatile long flag;//如果被持有则置1,否则置0
LockFreeQueue<uthread*> m_blockthread;//阻塞在这个锁上的纤程
};
#pragma pack(pop)
#endif
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

调度器

scheduler.h

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#ifndef _SCHEDULER_H
#define _SCHEDULER_H
#include <Windows.h>
#include "uthread.h"
#include <map>
#include <list>
#include <time.h>
#include "minHeap.h"
#include "lockfree_queue.h"
#define MAX_FIBER 32
class Scheduler
{
friend class uthread;
friend void _Yield(uChar);
friend void ReleaseUthread(int);
friend void ReleaseCurrentUthread();
friend void SetCurrentUthreadState(uChar);
friend void Add2Active(uthread*);
friend uthread *GetCurrentUThread();
friend uthread_t GetCurrentUThreadId();
public:
Scheduler():m_active_head(0),m_active_tail(0),m_count(0),m_curuid(-1),pending_index(0)
{}
//初始化纤程库
void Init();
void Destroy();
//将一个纤程加入到调度列表中以运行runnable
uthread_t FiberStartRun(runnable *param);
//选择一个纤程以进行调度
void Schedule();
void SwitchTo(uthread_t uid)
{
SwitchToFiber(m_uthreads[uid]->GetUContext());
}
void SwitchToBlock(uthread_t uid)
{
if(m_uthreads[uid]->GetBs())
SwitchTo(uid);
}
void _Yield(uChar status = YIELD)
{
m_uthreads[m_curuid]->SetStatus(status);
SwitchToFiber(m_pUthreadContext);
}
//将一个纤程添加到可运行队列中
void Add2Active(uthread *ut);
//阻塞纤程,直到wc得到满足
void Block(BlockStruct *bs,uLong ms);
uthread_t GetFreeUthread()
{
if(!m_uthreadpool.empty())
{
uthread_t ret = m_uthreadpool.front();
m_uthreadpool.pop_front();
return ret;
}
return -1;
}
void ReleaseUthread(uthread_t uid)
{
if(uid < MAX_FIBER)
{
m_uthreads[uid]->SetStatus(UNACTIVED);
m_uthreadpool.push_back(uid);
}
}
//尝试唤醒uid
void TryWakeup(uthread_t uid)
{
if(m_uthreads[uid]->GetBs())
m_uthreads[uid]->Signal();
}
//强制唤醒纤程
void ForceWakeup(uthread_t uid)
{
if(m_uthreads[uid]->GetStatus() != ACTIVED)
{
//printf("强制唤醒/n");
Add2Active(m_uthreads[uid]);
}
}
//强制唤醒阻塞在type条件上的纤程
void ForceWakeup(uthread_t uid,uChar type)
{
if(m_uthreads[uid]->GetStatus() != ACTIVED &&
m_uthreads[uid]->GetBs()->bs_type == type)
{
//printf("强制唤醒/n");
Add2Active(m_uthreads[uid]);
}
}
//清空activelist,和pendingadd
void ClearActiveList();
void ClearTimeOut()
{
m_timeoutlist.Clear();
}
void Sleep(uLong ms);
void NotifyUnlock(_node<ulstruct> *nn)
{
m_unlockevent.push(nn);
}
void Wait4Lock();
private:
uthread *m_active_head;
uthread *m_active_tail;
//也可以不使用m_pendingAdd,根据测试结果决定
uthread_t m_pendingAdd[MAX_FIBER];
unsigned int pending_index;
minheap<MAX_FIBER> m_timeoutlist;
PVOID m_pUthreadContext;//调度器所在纤程的上下文

uthread *m_uthreads[MAX_FIBER];
LockFreeQueue<ulstruct> m_unlockevent;
int m_count;
int m_curuid; //当前正在运行的纤程的uid,==-1表示在scheduler中运行
std::list<uthread_t> m_uthreadpool;//fiber池
//std::map<void*,std::list<uthread*> > m_wait4lock;
//std::list<uthread*> m_wait4lock;
static const int reservesize = 65536;
static const int commitsize = 8192;
};
#endif
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

scheduler.cpp

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#include "stdafx.h"
//#include "Scheduler.h"
#include <assert.h>
#include <iostream>
#include "fiberApi.h"
#include "uLock.h"
//extern umutex *g_lock;
uthread_t Scheduler::FiberStartRun(runnable *param)
{
uthread_t uid = GetFreeUthread();
if(uid != -1)
{
m_uthreads[uid]->SetRunnable(param);
Add2Active(m_uthreads[uid]);
}
return uid;
}
void Scheduler::Schedule()
{
{
//看看是否有可以获取锁的纤程
_node<ulstruct> *tmp = NULL;
while(tmp = m_unlockevent.pop())
{
umutex *um = (umutex*)tmp->val.lock_addr;
uthread *ut = tmp->val.ut;
if(um->_Lock(ut))
{
//加锁成功,将纤程从等待队列中删除并投入到可运行队列中
Add2Active(ut);
}
//std::map<void*,std::list<uthread*> >::iterator it = m_wait4lock.find(tmp->val);
//if(it != m_wait4lock.end())
//{
//尝试加锁
/*if(!it->second.empty())
{
umutex *um = (umutex*)it->first;
uthread *ut = it->second.front();
if(um->_Lock(ut))
{
//加锁成功,将纤程从等待队列中删除并投入到可运行队列中

it->second.pop_front();
Add2Active(ut);
}
}
*/

//}
//else
//{
//在Wait4Lock调用完成前,其它线程的解锁可能已经调用过NotifyUnLock了,
//所以这里把解锁消息重新放回队列中,再次尝试
// m_unlockevent.push(tmp);
//}
}
}
//将所有等待添加到m_activeList中的纤程都添加进去
{
for(unsigned int i = 0; i < pending_index; ++i)
{
uthread *ut = m_uthreads[m_pendingAdd[i]];
ut->SetNext(0);
if(m_active_tail)
{
m_active_tail->SetNext(ut);
m_active_tail = ut;
}
else
{
m_active_head = m_active_tail = ut;
}
}
pending_index = 0;
}
uthread *cur = m_active_head;
uthread *pre = NULL;
while(cur)
{
m_curuid = cur->GetUid();
SwitchToFiber(cur->GetUContext());
m_curuid = -1;
unsigned char status = cur->GetStatus();
//当纤程处于以下状态时需要从可运行队列中移除
if(status == DEAD || status == SLEEP || status == WAIT4EVENT || status == UNACTIVED || status == YIELD)
{
//删除首元素
if(cur == m_active_head)
{
//同时也是尾元素
if(cur == m_active_tail)
m_active_head = m_active_tail = NULL;
else
m_active_head = cur->Next();
}
else if(cur == m_active_tail)
{
pre->SetNext(NULL);
m_active_tail = pre;
}
else
pre->SetNext(cur->Next());
uthread *tmp = cur;
cur = cur->Next();
tmp->SetNext(0);
//如果仅仅是让出处理器,需要重新投入到可运行队列中
if(status == YIELD)
Add2Active(tmp);

}
else
{
pre = cur;
cur = cur->Next();
}
}
//看看有没有timeout的纤程
{
uLong now = GetTickCount();
while(m_timeoutlist.Min() !=0 && m_timeoutlist.Min() <= now)
{
st_timeout *timeout = m_timeoutlist.PopMin();
if(timeout->ut->GetStatus() == WAIT4EVENT || timeout->ut->GetStatus() == SLEEP)
{
timeout->ut->wakeuptick = timeout->_timeout;
Add2Active(timeout->ut);
}
}
}
}
void Scheduler::Destroy()
{
for(int i = 0; i < MAX_FIBER; ++i)
{
if(m_uthreads[i])
{
DeleteFiber(m_uthreads[i]->GetUContext());
delete m_uthreads[i];
}
}
ConvertFiberToThread();
}
void Scheduler::Block(BlockStruct *bs,uLong ms)
{
if(ms > 0)
{
st_timeout &_st_timeout = m_uthreads[m_curuid]->GetTimeoutSt();
_st_timeout._timeout = GetTickCount() + ms;//time(NULL) + timeout;
if(!_st_timeout.index)
{
m_timeoutlist.Insert(&_st_timeout);
}
else
{
m_timeoutlist.Change(&_st_timeout);
}
}
m_uthreads[m_curuid]->SetBs(bs);
m_uthreads[m_curuid]->SetStatus(WAIT4EVENT);
SwitchToFiber(m_pUthreadContext);
m_uthreads[m_curuid]->SetBs(0);
}
void Scheduler::Init()
{
m_pUthreadContext = ConvertThreadToFiber(NULL);

//创建fiber池
for(int i = 0 ; i < MAX_FIBER; ++i)
{
uthread *nthread = new uthread(this);
PVOID uthreadcontext = CreateFiberEx(commitsize,reservesize,0,uthread::fiber_routine,nthread);
assert(uthreadcontext);
nthread->SetUContext(uthreadcontext);
m_uthreads[i] = nthread;
nthread->SetUid(i);
m_uthreadpool.push_back(i);
}
}
//将一个纤程添加到可运行队列中
void Scheduler::Add2Active(uthread *ut)
{
//如果已经在active中了则不能再次添加
if(ut->GetStatus() != ACTIVED)
{
ut->SetStatus(ACTIVED);
m_pendingAdd[pending_index++] = ut->GetUid();
}
}
void Scheduler::ClearActiveList()
{
pending_index = 0;
uthread *cur = m_active_head;
while(cur)
{
uthread *next = cur->Next();
cur->SetNext(0);
cur = next;
}
m_active_head = m_active_tail = NULL;
}
void Scheduler::Sleep(uLong ms)
{
if(ms > 0)
{
st_timeout &_st_timeout = m_uthreads[m_curuid]->GetTimeoutSt();
_st_timeout._timeout = GetTickCount() + ms;//time(NULL) + seconds;
if(!_st_timeout.index)
{
m_timeoutlist.Insert(&_st_timeout);
}
else
{
m_timeoutlist.Change(&_st_timeout);
}
m_uthreads[m_curuid]->SetStatus(SLEEP);
}
SwitchToFiber(m_pUthreadContext);
}
//纤程在等待lock_addr锁,需要将纤程移出可运行队列,并记等待信息
void Scheduler::Wait4Lock()
{
/*std::map<void*,std::list<uthread*> >::iterator it = m_wait4lock.find(lock_addr);
uthread *current_uthread = m_uthreads[m_curuid];
if(it == m_wait4lock.end())
m_wait4lock.insert(std::make_pair(lock_addr,std::list<uthread*>(1,current_uthread)));
else
it->second.push_back(current_uthread);
*/
uthread *current_uthread = m_uthreads[m_curuid];
//m_wait4lock.push_back(current_uthread);
current_uthread->SetStatus(WAIT4EVENT);
//切换回调度器
SwitchToFiber(m_pUthreadContext);
}
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

然后是一些API

fiberApi.h

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#ifndef _FIBERAPI_H
#define _FIBERAPI_H
#include "Scheduler.h"
#include <map>
//与每个线程相关的纤程调度器
//extern std::map<DWORD,Scheduler*> g_tlssc;
extern Scheduler* g_tlssc[1019];
void _Yield(uChar);
void ReleaseUthread(int);
void ReleaseCurrentUthread();//释放当前的纤程
void SetCurrentUthreadState(uChar);//设置当前纤程的状态
void Add2Active(uthread*);
uthread *GetCurrentUThread();
uthread_t GetCurrentUThreadId();
void Wait4Lock();
void NotifyUnLock(void*,uthread*);
#endif
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

fiberApi.cpp

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#include "stdafx.h"
#include "fiberApi.h"
//std::map<DWORD,Scheduler*> g_tlssc;
Scheduler* g_tlssc[1019];
void _Yield(uChar state)
{
DWORD currenttrheadid = GetCurrentThreadId();
g_tlssc[currenttrheadid%512]->_Yield(state);
}
void ReleaseUthread(int uthreadid)
{
DWORD currenttrheadid = GetCurrentThreadId();
g_tlssc[currenttrheadid%512]->ReleaseUthread(uthreadid);
}
void ReleaseCurrentUthread()
{
DWORD currenttrheadid = GetCurrentThreadId();
Scheduler *sc = g_tlssc[currenttrheadid%512];
sc->ReleaseUthread(sc->m_curuid);
}
void SetCurrentUthreadState(uChar state)
{
DWORD currenttrheadid = GetCurrentThreadId();
Scheduler *sc = g_tlssc[currenttrheadid%512];
sc->m_uthreads[sc->m_curuid]->SetStatus(state);
}
void Add2Active(uthread *ut)
{
DWORD currenttrheadid = GetCurrentThreadId();
g_tlssc[currenttrheadid%512]->Add2Active(ut);
}
uthread *GetCurrentUThread()
{
DWORD currenttrheadid = GetCurrentThreadId();
Scheduler *sc = g_tlssc[currenttrheadid%512];
return sc->m_uthreads[sc->m_curuid];
}
uthread_t GetCurrentUThreadId()
{
DWORD currenttrheadid = GetCurrentThreadId();
Scheduler *sc = g_tlssc[currenttrheadid%512];
return sc->m_curuid;
}
void Wait4Lock()
{
DWORD currenttrheadid = GetCurrentThreadId();
g_tlssc[currenttrheadid%512]->Wait4Lock();
}
void NotifyUnLock(void *lock_addr,uthread *ut)
{
_node<ulstruct> *nn = ut->GetUnlockEvent();
nn->val.lock_addr = lock_addr;
ut->GetScheduler()->NotifyUnlock(nn);
//g_tlssc[threadid]->NotifyUnlock(lock_addr);
//std::map<DWORD,Scheduler>::iterator it = g_tlssc.begin();
//std::map<DWORD,Scheduler>::iterator end = g_tlssc.end();
//for(; it != end; ++it)
// it->second.NotifyUnlock(lock_addr);
}
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

经过进一步测试,在ulock的lock和unlock中使用的无锁队列m_blockthread可能因为多线程操作导致解锁通告丢失。

因此,m_blockthread需要改为普通队列,并且在操作前暂时用自旋锁锁定(暂时使用,希望可以找到更好的方法)。

大致修改如下:

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
void Lock()
{
if(InterlockedCompareExchange(&flag,1,0) == 1)
{
uthread *currentUThread = GetCurrentUThread();
_node<uthread*> tmp = currentUThread->GetLockNode();
while(InterlockedCompareExchange(&spinlock,1,0) == 1);
push(tmp);
InterlockedCompareExchange(&spinlock,0,1);
Wait4Lock();
}
}
void UnLock()
{
if(InterlockedCompareExchange(&flag,0,1) == 0)
return;
while(InterlockedCompareExchange(&spinlock,1,0) == 1);
_node<uthread*> *tmp = pop();
InterlockedCompareExchange(&spinlock,0,1);
if(tmp)
NotifyUnLock(this,tmp->val);
}
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

其次,还有一个问题需要解决,就是各纤程获得锁的次数不平均,具体例子如下:在双核机器上启动两个线程,线程上各运行一个纤程,对testlist进行写入的时候

会发现,大部分的写入是由其中一个纤程完成的,而另外一个纤程则很少能获得写入的机会。

 

下面是修改后的uLock.h,解决了纤程获得锁不平均的问题,只要创建的调度线程不超过cpu的数量,基本保证了各纤程有均等的机会获得锁。

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
#ifndef _ULOCK_H
#define _ULOCK_H
#pragma pack(push)
#pragma pack(4)
#include "fiberApi.h"
//#include "lockfree_queue.h"
class Scheduler;
//纤程间使用的用户级锁
struct umutex
{
friend class Scheduler;
public:
umutex():flag(0),spinlock(0),m_head(0),m_tail(0)
{
}
void Lock()
{
if(InterlockedCompareExchange(&flag,1,0) == 1)
{

uthread *currentUThread = GetCurrentUThread();
_node<uthread*> *tmp = currentUThread->GetLockNode();
while(InterlockedCompareExchange(&spinlock,1,0) == 1);
//再次尝试加锁
if(InterlockedCompareExchange(&flag,1,0) == 0)
{
InterlockedCompareExchange(&spinlock,0,1);
return;
}
push(tmp);
InterlockedCompareExchange(&spinlock,0,1);

//加锁失败,阻塞当前纤程
Wait4Lock();
}
}
void UnLock()
{
if(InterlockedCompareExchange(&flag,0,1) == 0)
{
//没有lock
return;
}
//已经解锁,挑选一个纤程,并将它唤醒
while(InterlockedCompareExchange(&spinlock,1,0) == 1);
_node<uthread*> *tmp = pop();
InterlockedCompareExchange(&spinlock,0,1);

if(tmp)
{
NotifyUnLock(this,tmp->val);
}
}
private:
bool _Lock(uthread *ut)
{
bool ret = InterlockedCompareExchange(&flag,1,0) == 0;
if(!ret)
{
uthread *currentUThread = GetCurrentUThread();
_node<uthread*> *tmp = ut->GetLockNode();
while(InterlockedCompareExchange(&spinlock,1,0) == 1);
//再次尝试加锁
if(InterlockedCompareExchange(&flag,1,0) == 0)
{
InterlockedCompareExchange(&spinlock,0,1);
return true;
}

push(tmp);
InterlockedCompareExchange(&spinlock,0,1);

}
return ret;
}
void push(_node<uthread*> *blockut)
{
blockut->_next = NULL;
if(NULL == m_tail)
{
m_head = m_tail = blockut;
}
else
{
m_tail->_next = blockut;
m_tail = blockut;
}
}
_node<uthread*> *pop()
{
if(NULL == m_head)
return NULL;
else
{
_node<uthread*> *ret = m_head;
m_head = m_head->_next;
if(m_head == NULL)
m_tail = m_head;
return ret;
}
}
private:
volatile long flag;//如果被持有则置1,否则置0
volatile long spinlock;//自旋锁,保护m_blockthread;
//队列,记录阻塞在这个锁上的纤程
_node<uthread*> *m_head;
_node<uthread*> *m_tail;
};
#pragma pack(pop)
#endif
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

测试代码:

利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能
// fiberFramework.cpp : 定义控制台应用程序的入口点。
//
#include "stdafx.h"
#include "CThread.h"
#include "fiberApi.h"
#include "uLock.h"
#include "CLock.h"
#define TESTSIZE 1000000
int g_testlist[TESTSIZE];
int g_testlistcs[TESTSIZE];
int g_testmutex[TESTSIZE];
umutex *g_lock;
zMutex *g_lockmutex;
zLightMutex *g_lockcs;
/*std::list<int> g_testlist2;
std::list<int> g_testlistcs2;
std::list<int> g_testmutex2;
umutex *g_lock2;
zMutex *g_lockmutex2;
zLightMutex *g_lockcs2;
*/
static volatile bool finish = false;
static volatile long count = 0;
zThreadGroup g_threadgroup;
DWORD starttime = 0;
DWORD endtime = 0;
class uworker : public runnable
{
public:
void main_routine()
{
while(1)
{
g_lock->Lock();
if(count==0)
{
starttime = GetTickCount();
}
if(count == TESTSIZE)
{
endtime = GetTickCount();
finish = true;
g_lock->UnLock();
return;
}
else
{
g_testlist[count] = GetCurrentThreadId()+GetCurrentUThreadId();
//InterlockedIncrement(&count);
}
++count;
g_lock->UnLock();
_Yield(YIELD);
volatile int c = 0;
for(volatile int cc = 0; cc < 100; ++cc)
c++;
}
}
};
class CWorkerThread : public zThread,private Noncopyable
{
public:
CWorkerThread(const std::string &name = std::string("zThread"),const bool joinable = true)
:zThread(name,joinable){}
~CWorkerThread(){}
void run()
{
Scheduler *sc = new Scheduler;
sc->Init();
//g_tlssc.insert(std::make_pair(GetCurrentThreadId(),sc));
if(g_tlssc[GetCurrentThreadId()%TLSSIZE] != NULL)
{
printf("error here/n");
getchar();
exit(0);
}
g_tlssc[GetCurrentThreadId()%TLSSIZE] = sc;
{
uworker uw1;
uworker uw2;
uworker uw3;
uworker uw4;
sc->FiberStartRun(&uw1);
sc->FiberStartRun(&uw2);
sc->FiberStartRun(&uw3);
sc->FiberStartRun(&uw4);
}
/*{
uworker uw1;
uworker uw2;
uworker uw3;
uworker uw4;
sc->FiberStartRun(&uw1);
sc->FiberStartRun(&uw2);
sc->FiberStartRun(&uw3);
sc->FiberStartRun(&uw4);
}
*/

while(!finish)
{
sc->Schedule();
}

}
};
class CWorkerThreadCs : public zThread,private Noncopyable
{
public:
CWorkerThreadCs(const std::string &name = std::string("zThread"),const bool joinable = true)
:zThread(name,joinable){}
~CWorkerThreadCs(){}
void run()
{
while(1)
{
g_lockcs->Lock();
if(count == 0)
{
starttime = GetTickCount();;
}
if(count == TESTSIZE)
{
endtime = GetTickCount();
g_lockcs->UnLock();
return;
}
else
{
g_testlistcs[count] = GetCurrentThreadId();
//InterlockedIncrement(&count);
}
++count;
g_lockcs->UnLock();
volatile int c = 0;
for(volatile int cc = 0; cc < 100; ++cc)
c++;
}
}
};
class CWorkerThreadMutex : public zThread,private Noncopyable
{
public:
CWorkerThreadMutex(const std::string &name = std::string("zThread"),const bool joinable = true)
:zThread(name,joinable){}
~CWorkerThreadMutex(){}
void run()
{
while(1)
{
g_lockmutex->Lock();
if(count == 0)
{
starttime = GetTickCount();;
}
if(count == TESTSIZE)
{
endtime = GetTickCount();
g_lockmutex->UnLock();
//printf("finish/n");
return;
}
else
{
g_testmutex[count] = GetCurrentThreadId();
//InterlockedIncrement(&count);
//printf("uthread:%d/n",GetCurrentThreadId());
}
++count;
g_lockmutex->UnLock();
volatile int c = 0;
for(volatile int cc = 0; cc < 100; ++cc)
c++;
}
}
};
struct TestCallback : public zThreadGroup::Callback
{
void exec(zThread *e)
{
e->start();
}
~TestCallback(){}
};
void testfiber(int n)
{
void *buf = _aligned_malloc(sizeof(*g_lock),4);
g_lock = new (buf)umutex;
for(int i = 0; i < n; ++i)
{
CWorkerThread *cw1 = new CWorkerThread;
g_threadgroup.add(cw1);
}
TestCallback CallBack;
g_threadgroup.execAll(CallBack);
g_threadgroup.joinAll();
printf("test fiber/n");
printf("count %d/n",count);
printf("time %d/n",endtime - starttime);
std::map<int,int> stat;
for(int i = 0; i < TESTSIZE; ++i)
{
std::map<int,int>::iterator it = stat.find(g_testlist[i]);
if(it == stat.end())
{
stat.insert(std::make_pair(g_testlist[i],1));
}
else
{
stat[g_testlist[i]]++;
}
}
printf("stat size = %d/n",stat.size());
for(std::map<int,int>::iterator it = stat.begin(); it != stat.end(); ++it)
{
printf("id=%d,count=%d/n",it->first,it->second);
}
}
void testcs(int n)
{
g_lockcs = new zLightMutex;
for(int i = 0; i < n; ++i)
{
CWorkerThreadCs *cw1 = new CWorkerThreadCs;
g_threadgroup.add(cw1);
}
TestCallback CallBack;
g_threadgroup.execAll(CallBack);
//while(!_kbhit())//等待服务器终止
//{
// Sleep(10);
//}
g_threadgroup.joinAll();
printf("test cs/n");
printf("count %d/n",count);
printf("time %d/n",endtime - starttime);
std::map<int,int> stat;
for(int i = 0; i < TESTSIZE; ++i)
{
std::map<int,int>::iterator it = stat.find(g_testlistcs[i]);
if(it == stat.end())
{
stat.insert(std::make_pair(g_testlistcs[i],1));
}
else
{
stat[g_testlistcs[i]]++;
}
}
printf("stat size = %d/n",stat.size());
for(std::map<int,int>::iterator it = stat.begin(); it != stat.end(); ++it)
{
printf("id=%d,count=%d/n",it->first,it->second);
}
}
void testmutex(int n)
{
g_lockmutex = new zMutex;
for(int i = 0; i < n; ++i)
{
CWorkerThreadMutex *cw1 = new CWorkerThreadMutex;
g_threadgroup.add(cw1);
}
TestCallback CallBack;
g_threadgroup.execAll(CallBack);
g_threadgroup.joinAll();
printf("test mutex/n");
printf("count %d/n",count);
printf("time %d/n",endtime - starttime);
std::map<int,int> stat;
for(int i = 0; i < TESTSIZE; ++i)
{
std::map<int,int>::iterator it = stat.find(g_testmutex[i]);
if(it == stat.end())
{
stat.insert(std::make_pair(g_testmutex[i],1));
}
else
{
stat[g_testmutex[i]]++;
}
}
printf("stat size = %d/n",stat.size());
for(std::map<int,int>::iterator it = stat.begin(); it != stat.end(); ++it)
{
printf("id=%d,count=%d/n",it->first,it->second);
}
}
int _tmain(int argc, _TCHAR* argv[])
{
int n = _ttol(argv[1]);
memset(g_tlssc,0,sizeof(g_tlssc));
count = 0;
testfiber(n/4);
count = 0;
testcs(n);
count = 0;
testmutex(n);

/*LockFreeQueue<int> q;
for(int i = 0; i < 5; ++i)
{
_node<int> *pNode = new _node<int>;
pNode->val = i;
q.push(pNode);
}
//q.print();
for(int i = 0; i < 5; ++i)
{
_node<int> *pNode = q.pop();
printf("%d/n",pNode->val);
}
_node<int> *pNode = q.pop();
*/
getchar();
return 0;
}
利用用户级线程提高多线程应用的性能
利用用户级线程提高多线程应用的性能

利用用户级线程提高多线程应用的性能

上一篇:《小猪CMS(PigCms)多用户微信营销服务平台系统V6.1完美破解至尊版带微用户管理CRM+微信支付》


下一篇:从阿里云数据库入选Gartner谈数据库的演化