一步一步写一个简单通用的makefile(三)

上一篇一步一步写一个简单通用的makefile(二) 里面的makefile 实现对通用的代码进行编译,这一章我将会对上一次的makefile 进行进一步的优化.

优化后的makefile:

#Hellomake
#Magnum, 2014-10-20
# 指令编译器和选项
CC=gcc
CFLAGS=-Wall
# 需要链接库的库名,比如libm.a,就是-lm,需要去掉前面的lib和后面的.a
LIBS=-lm
# 设置默认搜索头文件的路径,优先是这个,然后是系统路径
IncludeDir = -I./include/
# 需要链接的库的路径
LinkDir = #-L
OBJ_DIR = ./obj
BIN_DIR = ./bin #PROJECT_TOP_DIR 设置成pwd 或者"./"都行
PROJECT_TOP_DIR=$(shell pwd)#$(shell cd ../; pwd)
PROJECT_BIN_DIR=$(PROJECT_TOP_DIR)/bin
PROJECT_SRC_DIR=$(PROJECT_TOP_DIR)/src
PROJECT_LIB_DIR=$(PROJECT_TOP_DIR)/lib
PROJECT_OBJ_DIR=$(PROJECT_TOP_DIR)/objs
MKDIR := mkdir -p # 目标文件
EXE_NAME=hellomake
TARGET=$(BIN_DIR)/$(EXE_NAME) #源文件的文件类型
FILE_TYPE=c
src=$(wildcard $(PROJECT_SRC_DIR)/*.$(FILE_TYPE))
dir= $(notdir $(src))
PROJECT_OBJ= $(patsubst %.$(FILE_TYPE),%.o,$(dir) )
PROJECT_ALL_OBJS= $(addprefix $(PROJECT_OBJ_DIR)/, $(PROJECT_OBJ)) all: chdir $(TARGET)
@echo "magnum $(PROJECT_OBJ)"
@echo "magnum $(PROJECT_OBJ_DIR)"
@echo "magnum $(PROJECT_ALL_OBJS)" $(TARGET): $(PROJECT_ALL_OBJS)
$(CC) -o $@ $^ $(LinkDir) $(LIBS) chdir:
@if test ! -d $(PROJECT_OBJ_DIR) ; \
then \
mkdir $(PROJECT_OBJ_DIR) ; \
fi @if test ! -d $(PROJECT_BIN_DIR) ; \
then \
mkdir $(PROJECT_BIN_DIR) ; \
fi .PHONY : clean
clean:
-rm -rf $(PROJECT_BIN_DIR) $(PROJECT_OBJ_DIR) $(PROJECT_OBJ_DIR)/%.o:$(PROJECT_SRC_DIR)/%.$(FILE_TYPE)
$(CC) $(CFLAGS) -o $@ -c $< $(IncludeDir)

这个优化后的makefile 对于一般需要写一个小的测试程序都有一定的通用性,如果需要对新的程序进行修改:
1. 编译类型C用gcc, c++用g++

2. 源文件的路径 PROJECT_SRC_DIR

3. 文件类型c 还是cpp

4. 还有这三个:

# 需要链接库的库名,比如libm.a,就是-lm,需要去掉前面的lib和后面的.a
LIBS=-lm
# 设置默认搜索头文件的路径,优先是这个,然后是系统路径
IncludeDir = -I./include/
# 需要链接的库的路径
LinkDir = #-L

下面我就用这个模板去编译一个opencl的小程序。

文件树如下:

.
├── convolve.cl
├── convolve_cl.cpp
├── makefile
└── makefile~

很简单的只有3个文件, convolve.cl, convolve_cl.cpp, makefile.

convolve_cl.cpp源码:

// newTutorial1.cpp : Defines the entry point for the console application.
// //#include "stdafx.h"
#include <CL/cl.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <iostream>
#include <fstream>
#include <string.h>
#include <string>
#include <pthread.h>
using namespace std;
#pragma comment (lib,"OpenCL.lib") #define WIDTH 1920
#define HEIGHT 1080
#define FRAMES 1
typedef unsigned char uint8_t; static double now_ms(void) {
struct timespec res;
clock_gettime(CLOCK_REALTIME, &res);
return 1000.0 * res.tv_sec + (double) res.tv_nsec / 1e6;
} static int filter0(uint8_t *src, uint8_t *dst, uint8_t *ocl, int w, int h) {
printf("filter 0 ,w=%d, h=%d\n",w, h);
double start, end;
int i =;
int j =;
if(w > WIDTH || h > HEIGHT)
return ;
//start = now_ms();
for (i = ; i < h - ; ++i) {
for (j = ; j < w - ; ++j) {
int index = j + i * w;
uint8_t lu = src[index - - w];
uint8_t lm = src[index - ];
uint8_t ld = src[index - + w];
uint8_t mu = src[index - w];
uint8_t mm = src[index];
uint8_t md = src[index + w];
uint8_t ru = src[index + - w];
uint8_t rm = src[index + ];
uint8_t rd = src[index + + w];
int sum = lu + lm + ld + mu + mm + md + ru + rm + rd;
// printf("%d, %d, %d, %d, %d, %d, %d %d, %d,\n",lu, lm, ld, mu, mm, md, ru, rm, rd);
dst[index] = (uint8_t)sum / + ;
// printf(" dst[%d] =%d",index, dst[index]);
if(ocl[index] != dst[index])
printf("index[%d] differ \n", index);
}
// printf("\n");
}
end = now_ms();
//printf("filter 0 %f \n", end - start);
return ;
} //°ÑÎıŸÎÄŒþ¶ÁÈëÒ»žöstringÖÐ
int convertToString(const char *filename, std::string& s)
{
size_t size;
char* str; std::fstream f(filename, (std::fstream::in | std::fstream::binary)); if(f.is_open())
{
size_t fileSize;
f.seekg(, std::fstream::end);
size = fileSize = (size_t)f.tellg();
f.seekg(, std::fstream::beg); str = new char[size+];
if(!str)
{
f.close();
return NULL;
} f.read(str, fileSize);
f.close();
str[size] = '\0'; s = str;
delete[] str;
return ;
}
printf("Error: Failed to open file %s\n", filename);
return ;
} int main(int argc, char* argv[])
{
int i, ret;
double start, end;
uint8_t * inputBuf;
uint8_t * dstBuf1;
uint8_t * dstBuf2;
inputBuf =(uint8_t *)malloc(WIDTH * HEIGHT * sizeof(uint8_t));
dstBuf1 =(uint8_t *)malloc(WIDTH * HEIGHT * sizeof(uint8_t));
dstBuf2 =(uint8_t *)malloc(WIDTH * HEIGHT * sizeof(uint8_t));
memset(dstBuf1,,WIDTH * HEIGHT * sizeof(uint8_t));
memset(dstBuf2,,WIDTH * HEIGHT * sizeof(uint8_t)); srand( (unsigned)time( NULL ) );
for(i = ; i < WIDTH * HEIGHT; i++) {
inputBuf[i] = rand()%;
//printf("[%d] =%d\n", i, inputBuf[i]);
} //return 0;
cl_uint status;
cl_platform_id platform; //ŽŽœšÆœÌš¶ÔÏó
status = clGetPlatformIDs( , &platform, NULL ); cl_device_id device; //ŽŽœšGPUÉ豞
clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU,
,
&device,
NULL);
cl_uint maxComputeUnits;
status = clGetDeviceInfo(device,CL_DEVICE_MAX_COMPUTE_UNITS,
sizeof(cl_uint),
&maxComputeUnits,
NULL);
printf("maxComputeUnits = %d\n" ,maxComputeUnits); //ŽŽœšcontext
cl_context context = clCreateContext( NULL,
,
&device,
NULL, NULL, NULL);
//ŽŽœšÃüÁî¶ÓÁÐ
cl_command_queue queue = clCreateCommandQueue( context,
device,
CL_QUEUE_PROFILING_ENABLE, NULL ); //ŽŽœšÈýžöOpenCLÄÚŽæ¶ÔÏó
cl_mem clinbuf = clCreateBuffer(context,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
WIDTH*HEIGHT*sizeof(cl_uchar),inputBuf,
NULL ); cl_mem cloutbuf = clCreateBuffer( context,
CL_MEM_WRITE_ONLY,
WIDTH*HEIGHT * sizeof(cl_uchar),
NULL, NULL ); const char * filename = "convolve.cl";
std::string sourceStr;
status = convertToString(filename, sourceStr);
const char * source = sourceStr.c_str();
size_t sourceSize[] = { strlen(source) }; //ŽŽœš³ÌÐò¶ÔÏó
cl_program program = clCreateProgramWithSource(
context,
,
&source,
sourceSize,
NULL);
//±àÒë³ÌÐò¶ÔÏó
status = clBuildProgram( program, , &device, NULL, NULL, NULL );
if(status != )
{
printf("clBuild failed:%d\n", status);
char tbuf[0x10000];
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0x10000, tbuf, NULL);
printf("\n%s\n", tbuf);
return -;
} cl_int dimx = WIDTH;
cl_int dimy = HEIGHT;
cl_event ev;
cl_kernel kernel;
cl_ulong startTime, endTime;
cl_ulong kernelExecTimeNs;
float *op_data = ; #if 1
//ŽŽœšKernel¶ÔÏó
kernel = clCreateKernel( program, "filter", NULL ); //ÉèÖÃKernel²ÎÊý clSetKernelArg(kernel, , sizeof(cl_mem), (void *)&clinbuf);
clSetKernelArg(kernel, , sizeof(cl_int), (void *)&dimx);
clSetKernelArg(kernel, , sizeof(cl_int), (void *)&dimy);
clSetKernelArg(kernel, , sizeof(cl_mem), (void *)&cloutbuf); //Set local and global workgroup sizes
size_t localws[] = {, } ;
size_t globalws[] = {WIDTH,HEIGHT}; //ÖŽÐÐkernel
clEnqueueNDRangeKernel(
queue ,kernel,
, , globalws, NULL,
, NULL, &ev); clFinish( queue );
//ŒÆËãkerenlÖŽÐÐʱŒä clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_START,
sizeof(cl_ulong), &startTime, NULL);
clGetEventProfilingInfo(ev, CL_PROFILING_COMMAND_END,
sizeof(cl_ulong), &endTime, NULL);
kernelExecTimeNs = endTime-startTime;
printf("kernal exec time :%8.6f ms\n ", kernelExecTimeNs*1e- ); //ÊýŸÝ¿œ»ØhostÄÚŽæ
cl_uchar *ptr;
ptr = (cl_uchar *) clEnqueueMapBuffer( queue,
cloutbuf,
CL_TRUE,
CL_MAP_READ,
,
WIDTH*HEIGHT * sizeof(cl_uchar),
, NULL, NULL, NULL );
//œá¹ûÑéÖ€£¬ºÍcpuŒÆËãµÄœá¹û±ÈœÏ start = now_ms();
for(i = ; i< FRAMES; i++) {
ret = filter0(inputBuf, dstBuf1, ptr,WIDTH, HEIGHT);
if(ret)
printf("filter Fail \n");
}
end = now_ms();
printf("filter 0 %f \n", (end - start)/FRAMES); //ÑéÖ€GPUŒÆËãœá¹û
/* for(i = 0; i < M*N; i++)
{
//printf("%d, %6.3f,%6.3f\n",i,outbuf[i],op_data[i]);
if(abs(dstBuf1[i] - dstBuf2[i]) > 0.0001)
{
printf("check failed\n");
break;
}
}
if(i == M*N)
printf("check passed\n"); */
#endif if(inputBuf)
free(inputBuf);
if(dstBuf1)
free(dstBuf1);
if(dstBuf2)
free(dstBuf2); //ÉŸ³ýOpenCL×ÊÔŽ¶ÔÏó
clReleaseMemObject(clinbuf);
clReleaseMemObject(cloutbuf);
clReleaseProgram(program);
clReleaseCommandQueue(queue);
clReleaseContext(context);
return ;
}

convolve.cl 源码:

#pragma OPENCL EXTENSION cl_amd_printf : enable

__kernel void filter( __global uchar* in, int Width, int Height, __global uchar* out)
{
// WIDTH
int row = get_global_id();
//HEIGHT
int col = get_global_id();
// int wi = get_global_size(0);
// int he = get_global_size(1);
// printf("Magnum Global w= %d, h= %d,row=%d, col =%d\n",wi,he,row,col);
if(row == || col == ||row == Width - || col == Height -)
return; int index = row + col * Width;
uchar lu = in[index - - Width];
uchar lm = in[index - ];
uchar ld = in[index - + Width];
uchar mu = in[index - Width];
uchar mm = in[index];
uchar md = in[index + Width];
uchar ru = in[index + - Width];
uchar rm = in[index + ];
uchar rd = in[index + + Width];
int sum = lu + lm + ld + mu + mm + md + ru + rm + rd;
out[index] = (uchar)sum / + ;
// printf("%d, %d, %d, %d, %d, %d, %d %d, %d,\n",lu, lm, ld, mu, mm, md, ru, rm, rd);
// printf("dst[%d] = %d\n", index, out[index]);
}

下面是修改上面给出的模板makefile文件,来编译这个程序:
1. 因为是cpp,所以CC=g++, FILE_TYPE=cpp
2. 可执行文件的名字:EXE_NAME=convolve_cl

3. 链接的库:

LIBS= -lOpenCL -lfreeimage -lrt
IncludeDir = -I/opt/AMDAPP/include
LinkDir = -L/opt/AMDAPP/lib/x86_64
修改后的makefile如下:

#Hellomake
#Magnum, --
# 指令编译器和选项
CC=g++
CFLAGS=-Wall
# 需要链接库的库名,比如libm.a,就是-lm,需要去掉前面的lib和后面的.a
LIBS= -lOpenCL -lfreeimage -lrt
# 设置默认搜索头文件的路径,优先是这个,然后是系统路径
IncludeDir = -I/opt/AMDAPP/include
# 需要链接的库的路径
LinkDir = -L/opt/AMDAPP/lib/x86_64
OBJ_DIR = ./obj
BIN_DIR = ./bin #PROJECT_TOP_DIR 设置成pwd 或者"./"都行
PROJECT_TOP_DIR=.#$(shell pwd)#$(shell cd ../; pwd)
PROJECT_BIN_DIR=$(PROJECT_TOP_DIR)/bin
PROJECT_SRC_DIR=$(PROJECT_TOP_DIR)/
PROJECT_LIB_DIR=$(PROJECT_TOP_DIR)/lib
PROJECT_OBJ_DIR=$(PROJECT_TOP_DIR)/objs
MKDIR := mkdir -p # 目标文件
EXE_NAME=convolve_cl
TARGET=$(BIN_DIR)/$(EXE_NAME) #源文件的文件类型
FILE_TYPE=cpp
src=$(wildcard $(PROJECT_SRC_DIR)/*.$(FILE_TYPE))
dir= $(notdir $(src))
PROJECT_OBJ= $(patsubst %.$(FILE_TYPE),%.o,$(dir) )
PROJECT_ALL_OBJS= $(addprefix $(PROJECT_OBJ_DIR)/, $(PROJECT_OBJ)) all: chdir $(TARGET)
@echo "magnum $(PROJECT_OBJ)"
@echo "magnum $(PROJECT_OBJ_DIR)"
@echo "magnum $(PROJECT_ALL_OBJS)" $(TARGET): $(PROJECT_ALL_OBJS)
$(CC) -o $@ $^ $(LinkDir) $(LIBS) chdir:
@if test ! -d $(PROJECT_OBJ_DIR) ; \
then \
mkdir $(PROJECT_OBJ_DIR) ; \
fi @if test ! -d $(PROJECT_BIN_DIR) ; \
then \
mkdir $(PROJECT_BIN_DIR) ; \
fi .PHONY : clean
clean:
-rm -rf $(PROJECT_BIN_DIR) $(PROJECT_OBJ_DIR) $(PROJECT_OBJ_DIR)/%.o:$(PROJECT_SRC_DIR)/%.$(FILE_TYPE)
$(CC) $(CFLAGS) -o $@ -c $< $(IncludeDir)

可以看到相比较之前的makefile只需要修改几个文件即可。
我的这个是ubuntu 环境 AMD显卡的opencl程序,虽然你们的环境有些不同,但是对于你们需要修改的编译程序也是同样适用的

上一篇:Spring整合JAX-WS


下一篇:Windows环境下PHP安装pthreads多线程扩展