zynq hls定点数计算

在神经网络的fpga加速中,定点运算不仅比浮点运算更节省资源,还具有更快的速度,而且因为定点运算造成的神经网络的精度损失亦可忽略不计。本节介绍如何使用HLS进行定点运算以及如何与zynq cpu交互。

<ap_fixed.h>

HLS中,有头文件ap_fixed.h,极大的方便了我们使用定点数,具体情况略。

HLS代码示例

#include<cstring>
#include<ap_fixed.h>
typedef ap_fixed<16,8, AP_RND, AP_SAT> data_t;
data_t fixed_test(volatile data_t *src1,volatile data_t *src2,volatile data_t *dest){
#pragma HLS INTERFACE m_axi depth=100 port=src1 offset=slave bundle=MASTER_BUS1
#pragma HLS INTERFACE m_axi depth=100 port=src2 offset=slave bundle=MASTER_BUS2
#pragma HLS INTERFACE m_axi depth=100 port=dest offset=slave bundle=MASTER_BUS3
#pragma HLS INTERFACE s_axilite port=return bundle=CRTL_BUS
data_t buff1[100];
data_t buff2[100];
data_t buff3[100];
memcpy(buff1,(const data_t*)src1,10*sizeof(data_t));
memcpy(buff2,(const data_t*)src2,10*sizeof(data_t));
int i;
for(i=0;i<100;i++){
	buff3[i]=buff1[i]*buff2[i];
}
memcpy((data_t*)dest,(const data_t*)buff3,10*sizeof(data_t));
return buff1[0]*buff2[0];
}

上图中,data_t数据类型为16位有符号定点数,其中整数位占8位,AP_RND和AP_SAT分别表示舍入和溢出的设置。程序实现了读取两个16位定点数数组,并且求积赋值给另一个数组的简单功能。

PS端代码

ps端,因为没有16为定点数类型,因此我们使用short类型来表示16位定点数,设该定点数为a,定点数小数部分8位,则short b=(short)(a*(2^8))的二进制表示即为该定点数的二进制表示。PL计算结束后将结果存在dest数组中,则float c=dest[i]/256.0的值就是最终结果。


/******************************************************************************
*
* Copyright (C) 2009 - 2014 Xilinx, Inc.  All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* Use of the Software is limited solely to applications:
* (a) running on a Xilinx device, or
* (b) that interact with a Xilinx device through a bus or interconnect.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* XILINX  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
* OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Except as contained in this notice, the name of the Xilinx shall not be used
* in advertising or otherwise to promote the sale, use or other dealings in
* this Software without prior written authorization from Xilinx.
*
******************************************************************************/

/*
 * helloworld.c: simple test application
 *
 * This application configures UART 16550 to baud rate 9600.
 * PS7 UART (Zynq) is not initialized by this application, since
 * bootrom/bsp configures it to baud rate 115200
 *
 * ------------------------------------------------
 * | UART TYPE   BAUD RATE                        |
 * ------------------------------------------------
 *   uartns550   9600
 *   uartlite    Configurable only in HW design
 *   ps7_uart    115200 (configured by bootrom/bsp)
 */

#include <stdio.h>
#include <stdlib.h>
#include "platform.h"
#include "xil_printf.h"
#include "xfixed_test_hw.h"
#include "xfixed_test.h"
#include "xil_cache.h"
int main()
{
    Xil_DCacheDisable();
    int i;
    short *src1=(short*)malloc(10*sizeof(short));
    short *src2=(short*)malloc(10*sizeof(short));
    short *dest=(short*)malloc(10*sizeof(short));
    for(i=0;i<10;i++){
         src1[i]=(short)((rand()%100-50)/10.0*256);
         src2[i]=(short)((rand()%100-50)/10.0*256);
    }
    for(i=0;i<5;i++)
    	printf("%f,",(float)src1[i]/256);
    printf("\n");
    for(i=0;i<5;i++)
    	printf("%f,",(float)src2[i]/256);
    printf("\n");
    XFixed_test HlsXFixed_test;
    XFixed_test_Config *ExamplePtr;
    printf("Look Up the device configuration.\n");
    ExamplePtr = XFixed_test_LookupConfig(XPAR_FIXED_TEST_0_DEVICE_ID);
    if (!ExamplePtr) {
    printf("ERROR: Lookup of accelerator configuration failed.\n\r");
    return XST_FAILURE;
    }
    printf("Initialize the Device\n");
    long status = XFixed_test_CfgInitialize(&HlsXFixed_test, ExamplePtr);
    if (status != XST_SUCCESS) {
    printf("ERROR: Could not initialize accelerator.\n\r");
    return(-1);
    }
    XFixed_test_Set_src1_V(&HlsXFixed_test,(u32)src1);
    XFixed_test_Set_src2_V(&HlsXFixed_test,(u32)src2);
    XFixed_test_Set_dest_V(&HlsXFixed_test,(u32)dest);
    XFixed_test_Start(&HlsXFixed_test);
    while (XFixed_test_IsDone(&HlsXFixed_test) == 0);
    xil_printf("***********************************\n");
    for(i=0;i<5;i++){
        printf("%f,",(float)dest[i]/256);
        fflush(stdout);
        }
    return 0;
}


上一篇:[Exception] java.lang.IllegalArgumentException: Prefix string too short


下一篇:JAVA八大基本数据类型