#include <osg/Notify> #include <osgViewer/Viewer> #include <osgCompute/Memory> #include <osgCompute/Module> #include <osgCuda/Memory> #include <memory.h> #include <iostream> #pragma comment(lib, "osgViewerd.lib") #pragma comment(lib, "osgComputed.lib") #pragma comment(lib, "osgCudad.lib") #pragma comment(lib, "osgd.lib") extern "C" void MyCudaTest(unsigned int numBlocks, unsigned int numThreads, void * bytes1, void * bytes2, void * bytes3); class MyModule : public osgCompute::Module { public: MyModule() :osgCompute::Module() { clearLocal(); } META_Object(osgCompute,MyModule) virtual bool init() { _numThreads = 1; _numBlocks = _buffer1->getDimension(0)/_numThreads; return osgCompute::Module::init(); } virtual void clear() { clearLocal(); osgCompute::Module::clear(); } virtual void launch() { MyCudaTest(_numBlocks, _numThreads, _buffer1->map(), _buffer2->map(), _buffer3->map()); } inline void setBuffer1(osgCompute::Memory * buffer) { _buffer1 = buffer; } inline void setBuffer2(osgCompute::Memory * buffer) { _buffer2 = buffer; } inline void setBuffer3(osgCompute::Memory * buffer) { _buffer3 = buffer; } protected: virtual ~MyModule() { clearLocal(); } virtual void clearLocal() { _buffer1 = NULL; _buffer2 = NULL; _buffer3 = NULL; } protected: unsigned int _numThreads; unsigned int _numBlocks; osg::ref_ptr<osgCompute::Memory> _buffer1; osg::ref_ptr<osgCompute::Memory> _buffer2; osg::ref_ptr<osgCompute::Memory> _buffer3; private: MyModule(const MyModule &, const osg::CopyOp &) { } inline MyModule & operator = (const MyModule &) { return *this; } }; int main() { unsigned int a[3] = {1, 2, 3}; unsigned int b[3] = {4, 5, 6}; unsigned int c[3] = {0, 0, 0}; unsigned int num = sizeof(a)/sizeof(unsigned int); osg::ref_ptr<osgCuda::Memory> buffer1 = new osgCuda::Memory; buffer1->setElementSize(sizeof(int)); buffer1->setDimension(0, num); buffer1->init(); osg::ref_ptr<osgCuda::Memory> buffer2 = new osgCuda::Memory; buffer2->setElementSize(sizeof(int)); buffer2->setDimension(0, num); buffer2->init(); osg::ref_ptr<osgCuda::Memory> buffer3 = new osgCuda::Memory; buffer3->setElementSize(sizeof(int)); buffer3->setDimension(0, num); buffer3->init(); osg::ref_ptr<MyModule> module = new MyModule; module->setBuffer1(buffer1.get()); module->setBuffer2(buffer2.get()); module->setBuffer3(buffer3.get()); module->init(); unsigned int * bufferPtr1 = (unsigned int *)(buffer1->map(osgCompute::MAP_HOST_TARGET)); memcpy(bufferPtr1, a, sizeof(a)); unsigned int * bufferPtr2 = (unsigned int *)(buffer2->map(osgCompute::MAP_HOST_TARGET)); memcpy(bufferPtr2, b, sizeof(b)); unsigned int * bufferPtr3 = (unsigned int *)(buffer3->map(osgCompute::MAP_HOST_TARGET)); memcpy(bufferPtr3, c, sizeof(c)); module->launch(); bufferPtr3 = (unsigned int *)(buffer3->map(osgCompute::MAP_HOST_SOURCE)); std::cout << "并行计算:(1, 2, 3)于(4, 5, 6)的和:"<< std::endl; for (unsigned int i = 0; i < buffer3->getDimension(0); ++i) { std::cout << bufferPtr3[i] << std::endl; } }
#include <cuda_runtime.h> __global__ void addKernel(int * bytes1, int * bytes2, int * bytes3) { int tid = blockIdx.x; bytes3[tid] = bytes2[tid] + bytes1[tid]; } extern "C" void MyCudaTest(unsigned int numBlocks, unsigned int numThreads, void * bytes1, void * bytes2, void * bytes3) { addKernel<<<numBlocks, numThreads>>>((int *)bytes1, (int *)bytes2, (int *)bytes3); }