海思3559 SVP NNIE demo解读
图片处理——yolo3为例
1. 先看函数void SAMPLE_SVP_NNIE_Yolov3(void)
– 部分1
HI_CHAR *pcSrcFile = "./data/nnie_image/rgb_planar/dog_bike_car_416x416.bgr";
HI_CHAR *pcModelName = "./data/nnie_model/detection/inst_yolov3_cycle.wk";
HI_U32 u32PicNum = 1;
HI_FLOAT f32PrintResultThresh = 0.0f;
HI_S32 s32Ret = HI_SUCCESS;
SAMPLE_SVP_NNIE_CFG_S stNnieCfg = {0};
SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S stInputDataIdx = {0};
SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S stProcSegIdx = {0};
结构体SAMPLE_SVP_NNIE_CFG_S:
typedef struct hiSAMPLE_SVP_NNIE_CFG_S
{HI_CHAR *pszPic; //图片路径HI_U32 u32MaxInputNum; //每个batch最大输入图片数HI_U32 u32MaxRoiNum; //最大ROI数HI_U64 au64StepVirAddr[SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM * SVP_NNIE_MAX_NET_SEG_NUM]; //virtual addr of LSTM's or RNN's step bufferSVP_NNIE_ID_E aenNnieCoreId[SVP_NNIE_MAX_NET_SEG_NUM]; //选用段对应的核
} SAMPLE_SVP_NNIE_CFG_S;
主要涉及NNIE的设置
结构体SVP_NNIE_ID_E:
typedef enum hiSVP_NNIE_ID_E {SVP_NNIE_ID_0 = 0x0,SVP_NNIE_ID_1 = 0x1,SVP_NNIE_ID_BUTT
} SVP_NNIE_ID_E;
主要定义了NNIE的核的枚举
结构体SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S:
typedef struct hiSAMPLE_SVP_NNIE_DATA_INDEX_S
{HI_U32 u32SegIdx;HI_U32 u32NodeIdx;
} SAMPLE_SVP_NNIE_DATA_INDEX_S;
typedef SAMPLE_SVP_NNIE_DATA_INDEX_S SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S;
SegIdx是指段(但是Yolo不需要分段,因此仅有一段)
NodeIdx是指节点数
结构体SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S:
typedef SAMPLE_SVP_NNIE_DATA_INDEX_S SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S;
同上
– 部分2
/*Set configuration parameter*/
f32PrintResultThresh = 0.8f;
stNnieCfg.pszPic= pcSrcFile;
stNnieCfg.u32MaxInputNum = u32PicNum; //max input image num in each batch
stNnieCfg.u32MaxRoiNum = 0;
stNnieCfg.aenNnieCoreId[0] = SVP_NNIE_ID_0;//set NNIE core
主要设置了SAMPLE_SVP_NNIE_CFG_S结构体的相关内容
– 部分3
SAMPLE_COMM_SVP_CheckSysInit();
主要进行初始化
HI_VOID SAMPLE_COMM_SVP_CheckSysInit(HI_VOID)
{//s_bSampleSvpInit是一个bool类型变量,指示了是否进行了系统初始化if (HI_FALSE == s_bSampleSvpInit){if (SAMPLE_COMM_SVP_SysInit()){SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_ERROR, "Svp mpi init failed!\n");exit(-1);}s_bSampleSvpInit = HI_TRUE;}SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_DEBUG, "Svp mpi init ok!\n");
}
static HI_S32 SAMPLE_COMM_SVP_SysInit(HI_VOID)
{HI_S32 s32Ret = HI_FAILURE;VB_CONFIG_S struVbConf;HI_MPI_SYS_Exit();HI_MPI_VB_Exit();//将struVbConf所有内容赋值0memset(&struVbConf, 0, sizeof(VB_CONFIG_S));struVbConf.u32MaxPoolCnt = 2;struVbConf.astCommPool[1].u64BlkSize = 768 * 576 * 2;struVbConf.astCommPool[1].u32BlkCnt = 1;//设置MPP 视频缓存池属性//HI_S32 HI_MPI_VB_SetConfig(const VB_CONFIG_S *pstVbConfig);//pstVbConfig 视频缓存池属性指针。静态属性。//输入s32Ret = HI_MPI_VB_SetConfig((const VB_CONFIG_S *)&struVbConf);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_VB_SetConf failed!\n", s32Ret);//初始化MPP 视频缓存池。s32Ret = HI_MPI_VB_Init();SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_VB_Init failed!\n", s32Ret);//初始化MPP 系统。包括音频输入输出、视频输入输出、视频编解码、视频叠加区域、视频处理、图形处理等模块都会被初始化。s32Ret = HI_MPI_SYS_Init();SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error(%#x):HI_MPI_SYS_Init failed!\n", s32Ret);return s32Ret;
}
结构体VB_CONFIG_S:
typedef struct hiVB_CONFIG_S {HI_U32 u32MaxPoolCnt;VB_POOL_CONFIG_S astCommPool[VB_MAX_COMM_POOLS];
} VB_CONFIG_S;
结构体VB_POOL_CONFIG_S:
typedef struct hiVB_POOL_CONFIG_S {HI_U64 u64BlkSize;HI_U32 u32BlkCnt;VB_REMAP_MODE_E enRemapMode;HI_CHAR acMmzName[MAX_MMZ_NAME_LEN];
} VB_POOL_CONFIG_S;
-部分4
static SAMPLE_SVP_NNIE_MODEL_S s_stYolov3Model = {0};
s32Ret = SAMPLE_COMM_SVP_NNIE_LoadModel(pcModelName,&s_stYolov3Model);
向NNIE核载入模型
结构体SAMPLE_SVP_NNIE_MODEL_S:
typedef struct hiSAMPLE_SVP_NNIE_MODEL_S
{SVP_NNIE_MODEL_S stModel;SVP_MEM_INFO_S stModelBuf; //store Model file
} SAMPLE_SVP_NNIE_MODEL_S;
结构体SVP_NNIE_MODEL_S
typedef struct hiSVP_NNIE_MODEL_S {SVP_NNIE_RUN_MODE_E enRunMode;//枚举类型,网络模型运行模式HI_U32 u32TmpBufSize; /* temp buffer size */HI_U32 u32NetSegNum;SVP_NNIE_SEG_S astSeg[SVP_NNIE_MAX_NET_SEG_NUM];SVP_NNIE_ROIPOOL_INFO_S astRoiInfo[SVP_NNIE_MAX_ROI_LAYER_NUM]; /* ROIPooling info */SVP_MEM_INFO_S stBase;
} SVP_NNIE_MODEL_S;
主要存了模型用于NNIE核的一些属性
结构体SVP_MEM_INFO_S
/* Mem information */
typedef struct hiSVP_MEM_INFO_S {HI_U64 u64PhyAddr; /* RW;The physical address of the memory */HI_U64 u64VirAddr; /* RW;The virtual address of the memory */HI_U32 u32Size; /* RW;The size of memory */
} SVP_MEM_INFO_S;
主要存了内存分配的信息
HI_S32 SAMPLE_COMM_SVP_NNIE_LoadModel(HI_CHAR *pszModelFile,SAMPLE_SVP_NNIE_MODEL_S *pstNnieModel)
{HI_S32 s32Ret = HI_INVALID_VALUE;HI_U64 u64PhyAddr = 0;HI_U8 *pu8VirAddr = NULL;HI_SL slFileSize = 0;/*Get model file size*/FILE *fp = fopen(pszModelFile, "rb");SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, open model file failed!\n");s32Ret = fseek(fp, 0L, SEEK_END);SAMPLE_SVP_CHECK_EXPR_GOTO(-1 == s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");slFileSize = ftell(fp);SAMPLE_SVP_CHECK_EXPR_GOTO(slFileSize <= 0, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, ftell failed!\n");s32Ret = fseek(fp, 0L, SEEK_SET);SAMPLE_SVP_CHECK_EXPR_GOTO(-1 == s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR, "Error, fseek failed!\n");/*malloc model file mem*///申请内存,内存地址既有虚拟地址(进程内地址)和物理地址(内存线性地址)s32Ret = SAMPLE_COMM_SVP_MallocMem("SAMPLE_NNIE_MODEL", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, slFileSize);SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL_0, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error(%#x),Malloc memory failed!\n", s32Ret);pstNnieModel->stModelBuf.u32Size = (HI_U32)slFileSize;pstNnieModel->stModelBuf.u64PhyAddr = u64PhyAddr;pstNnieModel->stModelBuf.u64VirAddr = (HI_U64)pu8VirAddr;s32Ret = fread(pu8VirAddr, slFileSize, 1, fp);SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,read model file failed!\n");/*load model*///从用户事先加载到buf中的模型中解析出网络模型// pstModelBuf 存储模型的buf,用户需事先开辟好,且将// NNIE 编译器得到的wk文件加载到该buf中。 不能为空。 输入// pstModel 网络模型结构体。输出s32Ret = HI_MPI_SVP_NNIE_LoadModel(&pstNnieModel->stModelBuf, &pstNnieModel->stModel);SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL_1, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,HI_MPI_SVP_NNIE_LoadModel failed!\n");fclose(fp);return HI_SUCCESS;
FAIL_1:SAMPLE_SVP_MMZ_FREE(pstNnieModel->stModelBuf.u64PhyAddr, pstNnieModel->stModelBuf.u64VirAddr);pstNnieModel->stModelBuf.u32Size = 0;
FAIL_0:if (NULL != fp){fclose(fp);}return HI_FAILURE;
}
HI_S32 SAMPLE_COMM_SVP_MallocMem(HI_CHAR *pszMmb, HI_CHAR *pszZone, HI_U64 *pu64PhyAddr, HI_VOID **ppvVirAddr, HI_U32 u32Size)
{HI_S32 s32Ret = HI_SUCCESS;//mmz是海思用来存储媒体的内存,https://www.cnblogs.com/wlzy/p/9733110.html/*pu64PhyAddr 物理地址指针。输出*ppVirAddr 指向虚拟地址指针的指针。输出*strMmb Mmb 名称的字符串指针。输入*strZone MMZ zone 名称的字符串指针。输入*u32Len 内存块大小。输入*/s32Ret = HI_MPI_SYS_MmzAlloc(pu64PhyAddr, ppvVirAddr, pszMmb, pszZone, u32Size);return s32Ret;
}
– 部分5
static SAMPLE_SVP_NNIE_PARAM_S s_stYolov3NnieParam = {0};
s_stYolov3NnieParam.pstModel = &s_stYolov3Model.stModel;
s32Ret = SAMPLE_SVP_NNIE_Yolov3_ParamInit(&stNnieCfg,&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
该部分主要进行模型的设置
结构体SAMPLE_SVP_NNIE_PARAM_S:
typedef struct hiSAMPLE_SVP_NNIE_PARAM_S
{SVP_NNIE_MODEL_S *pstModel;HI_U32 u32TmpBufSize;HI_U32 au32TaskBufSize[SVP_NNIE_MAX_NET_SEG_NUM];SVP_MEM_INFO_S stTaskBuf;SVP_MEM_INFO_S stTmpBuf;SVP_MEM_INFO_S stStepBuf; //store Lstm step infoSAMPLE_SVP_NNIE_SEG_DATA_S astSegData[SVP_NNIE_MAX_NET_SEG_NUM]; //each seg's input and output blobSVP_NNIE_FORWARD_CTRL_S astForwardCtrl[SVP_NNIE_MAX_NET_SEG_NUM];SVP_NNIE_FORWARD_WITHBBOX_CTRL_S astForwardWithBboxCtrl[SVP_NNIE_MAX_NET_SEG_NUM];
} SAMPLE_SVP_NNIE_PARAM_S;
结构体 hiSAMPLE_SVP_NNIE_SEG_DATA_S:
/*each seg input and output memory*/
typedef struct hiSAMPLE_SVP_NNIE_SEG_DATA_S
{SVP_SRC_BLOB_S astSrc[SVP_NNIE_MAX_INPUT_NUM];SVP_DST_BLOB_S astDst[SVP_NNIE_MAX_OUTPUT_NUM];
} SAMPLE_SVP_NNIE_SEG_DATA_S;
结构体SVP_BLOB_S:
typedef SVP_BLOB_S SVP_SRC_BLOB_S;
typedef SVP_BLOB_S SVP_DST_BLOB_S;
/****************************** Blob struct ******************************
In Caffe, the blob contain shape info as the following order:
Image\FeatureMap: N C H W
FC(normal vector): N C
RNN\LSTM(Recurrent) vector: T N DThe relationship of the following blob struct with Caffe blob is as follows:
Image\FeatureMap: Num Chn Height With
FC(VEC_S32): Num Width
RNN\LSTM(SEQ_S32) vector: Step Num Dim
The stride, which measuring unit is byte, is always algined by the width or
dim direction.
**************************************************************************/
typedef struct hiSVP_BLOB_S {SVP_BLOB_TYPE_E enType; /* Blob type */HI_U32 u32Stride; /* Stride, a line bytes num */HI_U64 u64VirAddr; /* virtual addr */HI_U64 u64PhyAddr; /* physical addr */HI_U32 u32Num; /* N: frame num or sequence num, correspond to caffe blob's n */union {struct {HI_U32 u32Width; /* W: frame width, correspond to caffe blob's w */HI_U32 u32Height; /* H: frame height, correspond to caffe blob's h */HI_U32 u32Chn; /* C: frame channel, correspond to caffe blob's c */} stWhc;struct {HI_U32 u32Dim; /* D: vecotr dimension */HI_U64 u64VirAddrStep; /* T: virtual adress of time steps array in each sequence */} stSeq;} unShape;
} SVP_BLOB_S;
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_ParamInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{HI_S32 s32Ret = HI_SUCCESS;/*init hardware para*/s32Ret = SAMPLE_COMM_SVP_NNIE_ParamInit(pstCfg,pstNnieParam);SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error(%#x),SAMPLE_COMM_SVP_NNIE_ParamInit failed!\n",s32Ret);/*init software para*/s32Ret = SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(pstCfg,pstNnieParam,pstSoftWareParam);SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret,INIT_FAIL_0,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error(%#x),SAMPLE_SVP_NNIE_Yolov3_SoftwareInit failed!\n",s32Ret);return s32Ret;
INIT_FAIL_0:s32Ret = SAMPLE_SVP_NNIE_Yolov3_Deinit(pstNnieParam,pstSoftWareParam,NULL);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error(%#x),SAMPLE_SVP_NNIE_Yolov3_Deinit failed!\n",s32Ret);return HI_FAILURE;}
函数SAMPLE_COMM_SVP_NNIE_ParamInit
HI_S32 SAMPLE_COMM_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{HI_S32 s32Ret = HI_SUCCESS;/*check*/SAMPLE_SVP_CHECK_EXPR_RET((NULL == pstNnieCfg || NULL == pstNnieParam), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieCfg and pstNnieParam can't be NULL!\n");SAMPLE_SVP_CHECK_EXPR_RET((NULL == pstNnieParam->pstModel), HI_ERR_SVP_NNIE_ILLEGAL_PARAM,SAMPLE_SVP_ERR_LEVEL_ERROR, "Error,pstNnieParam->pstModel can't be NULL!\n");/*NNIE parameter initialization */s32Ret = SAMPLE_SVP_NNIE_ParamInit(pstNnieCfg, pstNnieParam);SAMPLE_SVP_CHECK_EXPR_GOTO(HI_SUCCESS != s32Ret, FAIL, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error, SAMPLE_SVP_NNIE_ParamInit failed!\n");return s32Ret;
FAIL:s32Ret = SAMPLE_COMM_SVP_NNIE_ParamDeinit(pstNnieParam);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error, SAMPLE_COMM_SVP_NNIE_ParamDeinit failed!\n");return HI_FAILURE;
}
/*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_ParamInit
* Description : Fill info of NNIE Forward parameters
* Input : SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg NNIE configure parameter
* SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam NNIE parameters
*
*
*
* Output :
* Return Value : HI_S32,HI_SUCCESS:Success,Other:failure
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-03-14
* Author :
* Modification : Create
*
*****************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_ParamInit(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{HI_U32 i = 0, j = 0;HI_U32 u32TotalSize = 0;HI_U32 u32TotalTaskBufSize = 0;HI_U32 u32TmpBufSize = 0;HI_S32 s32Ret = HI_SUCCESS;HI_U32 u32Offset = 0;HI_U64 u64PhyAddr = 0;HI_U8 *pu8VirAddr = NULL;SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[SVP_NNIE_MAX_NET_SEG_NUM] = {0};/*fill forward info*/s32Ret = SAMPLE_SVP_NNIE_FillForwardInfo(pstNnieCfg, pstNnieParam);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,SAMPLE_SVP_NNIE_FillForwardCtrl failed!\n");/*Get taskInfo and Blob mem size*/s32Ret = SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(pstNnieCfg, pstNnieParam, &u32TotalTaskBufSize,&u32TmpBufSize, astBlobSize, &u32TotalSize);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize failed!\n");/*Malloc mem*/s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_NNIE_TASK", NULL, (HI_U64 *)&u64PhyAddr, (void **)&pu8VirAddr, u32TotalSize);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Malloc memory failed!\n");//分配到的内存进行初始化memset(pu8VirAddr, 0, u32TotalSize);SAMPLE_COMM_SVP_FlushCache(u64PhyAddr, (void *)pu8VirAddr, u32TotalSize);/*fill taskinfo mem addr*/pstNnieParam->stTaskBuf.u32Size = u32TotalTaskBufSize;pstNnieParam->stTaskBuf.u64PhyAddr = u64PhyAddr;pstNnieParam->stTaskBuf.u64VirAddr = (HI_U64)pu8VirAddr;/*fill Tmp mem addr*/pstNnieParam->stTmpBuf.u32Size = u32TmpBufSize;pstNnieParam->stTmpBuf.u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize;pstNnieParam->stTmpBuf.u64VirAddr = (HI_U64)pu8VirAddr + u32TotalTaskBufSize;/*fill forward ctrl addr*/for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++){if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType){pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];}else if (SVP_NNIE_NET_TYPE_CNN == pstNnieParam->pstModel->astSeg[i].enNetType ||SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType){pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];}u32Offset += pstNnieParam->au32TaskBufSize[i];}/*fill each blob's mem addr*/u64PhyAddr = u64PhyAddr + u32TotalTaskBufSize + u32TmpBufSize;pu8VirAddr = pu8VirAddr + u32TotalTaskBufSize + u32TmpBufSize;for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++){/*first seg has src blobs, other seg's src blobs from the output blobs ofthose segs before it or from software output results*/if (0 == i){for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++){if (j != 0){u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];}pstNnieParam->astSegData[i].astSrc[j].u64PhyAddr = u64PhyAddr;pstNnieParam->astSegData[i].astSrc[j].u64VirAddr = (HI_U64)pu8VirAddr;}u64PhyAddr += astBlobSize[i].au32SrcSize[j - 1];pu8VirAddr += astBlobSize[i].au32SrcSize[j - 1];}/*fill the mem addrs of each seg's output blobs*/for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++){if (j != 0){u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];}pstNnieParam->astSegData[i].astDst[j].u64PhyAddr = u64PhyAddr;pstNnieParam->astSegData[i].astDst[j].u64VirAddr = (HI_U64)pu8VirAddr;}u64PhyAddr += astBlobSize[i].au32DstSize[j - 1];pu8VirAddr += astBlobSize[i].au32DstSize[j - 1];}return s32Ret;
}
函数SAMPLE_SVP_NNIE_FillForwardInfo
static HI_S32 SAMPLE_SVP_NNIE_FillForwardInfo(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam)
{HI_U32 i = 0, j = 0;HI_U32 u32Offset = 0;HI_U32 u32Num = 0;for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++){/*fill forwardCtrl info*///根据不同的网络类型选择不同的数据域if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType){pstNnieParam->astForwardWithBboxCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];pstNnieParam->astForwardWithBboxCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;pstNnieParam->astForwardWithBboxCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;pstNnieParam->astForwardWithBboxCtrl[i].u32ProposalNum = 1;pstNnieParam->astForwardWithBboxCtrl[i].u32NetSegId = i;pstNnieParam->astForwardWithBboxCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;pstNnieParam->astForwardWithBboxCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];}else if (SVP_NNIE_NET_TYPE_CNN == pstNnieParam->pstModel->astSeg[i].enNetType ||SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType){pstNnieParam->astForwardCtrl[i].enNnieId = pstNnieCfg->aenNnieCoreId[i];pstNnieParam->astForwardCtrl[i].u32SrcNum = pstNnieParam->pstModel->astSeg[i].u16SrcNum;pstNnieParam->astForwardCtrl[i].u32DstNum = pstNnieParam->pstModel->astSeg[i].u16DstNum;pstNnieParam->astForwardCtrl[i].u32NetSegId = i;pstNnieParam->astForwardCtrl[i].stTmpBuf = pstNnieParam->stTmpBuf;pstNnieParam->astForwardCtrl[i].stTskBuf.u64PhyAddr = pstNnieParam->stTaskBuf.u64PhyAddr + u32Offset;pstNnieParam->astForwardCtrl[i].stTskBuf.u64VirAddr = pstNnieParam->stTaskBuf.u64VirAddr + u32Offset;pstNnieParam->astForwardCtrl[i].stTskBuf.u32Size = pstNnieParam->au32TaskBufSize[i];}u32Offset += pstNnieParam->au32TaskBufSize[i];/*fill src blob info*/for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16SrcNum; j++){/*Recurrent blob*/if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType){pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u32Dim = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.u32Dim;pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;pstNnieParam->astSegData[i].astSrc[j].unShape.stSeq.u64VirAddrStep = pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM];}else{pstNnieParam->astSegData[i].astSrc[j].enType = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].enType;pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Chn = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Chn;pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Height = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Height;pstNnieParam->astSegData[i].astSrc[j].unShape.stWhc.u32Width = pstNnieParam->pstModel->astSeg[i].astSrcNode[j].unShape.stWhc.u32Width;pstNnieParam->astSegData[i].astSrc[j].u32Num = pstNnieCfg->u32MaxInputNum;}}/*fill dst blob info*/if (SVP_NNIE_NET_TYPE_ROI == pstNnieParam->pstModel->astSeg[i].enNetType){u32Num = pstNnieCfg->u32MaxRoiNum * pstNnieCfg->u32MaxInputNum;}else{u32Num = pstNnieCfg->u32MaxInputNum;}for (j = 0; j < pstNnieParam->pstModel->astSeg[i].u16DstNum; j++){if (SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType){pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u32Dim =pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.u32Dim;pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;pstNnieParam->astSegData[i].astDst[j].unShape.stSeq.u64VirAddrStep =pstNnieCfg->au64StepVirAddr[i * SAMPLE_SVP_NNIE_EACH_SEG_STEP_ADDR_NUM + 1];}else{pstNnieParam->astSegData[i].astDst[j].enType = pstNnieParam->pstModel->astSeg[i].astDstNode[j].enType;pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Chn = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Chn;pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Height = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Height;pstNnieParam->astSegData[i].astDst[j].unShape.stWhc.u32Width = pstNnieParam->pstModel->astSeg[i].astDstNode[j].unShape.stWhc.u32Width;pstNnieParam->astSegData[i].astDst[j].u32Num = u32Num;}}}return HI_SUCCESS;
}
根据 pstNnieParam->pstModel填充pstNnieParam->astSegData段的输入和输出blob,固定的逻辑
函数SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize
/*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize
* Description : Get taskinfo and blob memory size
* Input : SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam NNIE parameter
* HI_U32 *pu32TaskInfoSize Task info size
* HI_U32 *pu32TmpBufSize Tmp buffer size
* SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[] each seg input and output blob mem size
* HI_U32 *pu32TotalSize Total mem size
*
*
* Output :
* Return Value : HI_S32,HI_SUCCESS:Success,Other:failure
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-20
* Author :
* Modification : Create
*
*****************************************************************************/
static HI_S32 SAMPLE_SVP_NNIE_GetTaskAndBlobBufSize(SAMPLE_SVP_NNIE_CFG_S *pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, HI_U32 *pu32TotalTaskBufSize, HI_U32 *pu32TmpBufSize,SAMPLE_SVP_NNIE_BLOB_SIZE_S astBlobSize[], HI_U32 *pu32TotalSize)
{HI_S32 s32Ret = HI_SUCCESS;HI_U32 i = 0, j = 0;HI_U32 u32TotalStep = 0;/*Get each seg's task buf size*/s32Ret = HI_MPI_SVP_NNIE_GetTskBufSize(pstNnieCfg->u32MaxInputNum, pstNnieCfg->u32MaxRoiNum,pstNnieParam->pstModel, pstNnieParam->au32TaskBufSize, pstNnieParam->pstModel->u32NetSegNum);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret, s32Ret, SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,HI_MPI_SVP_NNIE_GetTaskSize failed!\n");/*Get total task buf size*///每个段的TaskBufSize累加*pu32TotalTaskBufSize = 0;for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++){*pu32TotalTaskBufSize += pstNnieParam->au32TaskBufSize[i];}/*Get tmp buf size*/*pu32TmpBufSize = pstNnieParam->pstModel->u32TmpBufSize;*pu32TotalSize += *pu32TotalTaskBufSize + *pu32TmpBufSize;/*calculate Blob mem size*/for (i = 0; i < pstNnieParam->pstModel->u32NetSegNum; i++){if (SVP_NNIE_NET_TYPE_RECURRENT == pstNnieParam->pstModel->astSeg[i].enNetType){for (j = 0; j < pstNnieParam->astSegData[i].astSrc[0].u32Num; j++){u32TotalStep += *((HI_S32 *)pstNnieParam->astSegData[i].astSrc[0].unShape.stSeq.u64VirAddrStep + j);}}/*the first seg's Src Blob mem size, other seg's src blobs from the output blobs ofthose segs before it or from software output results*/if (i == 0){SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astSrcNode[0]),pstNnieParam->pstModel->astSeg[i].u16SrcNum, u32TotalStep, &(pstNnieParam->astSegData[i].astSrc[0]),SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32SrcSize[0]));}/*Get each seg's Dst Blob mem size*/SAMPLE_SVP_NNIE_GetBlobMemSize(&(pstNnieParam->pstModel->astSeg[i].astDstNode[0]),pstNnieParam->pstModel->astSeg[i].u16DstNum, u32TotalStep, &(pstNnieParam->astSegData[i].astDst[0]),SAMPLE_SVP_NNIE_ALIGN_16, pu32TotalSize, &(astBlobSize[i].au32DstSize[0]));}return s32Ret;
}
/*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_GetBlobMemSize
* Description : Get blob mem size
* Input : SVP_NNIE_NODE_S astNnieNode[] NNIE Node
* HI_U32 u32NodeNum Node num
* HI_U32 astBlob[] blob struct
* HI_U32 u32Align stride align type
* HI_U32 *pu32TotalSize Total size
* HI_U32 au32BlobSize[] blob size
*
*
*
*
* Output :
* Return Value : VOID
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-20
* Author :
* Modification : Create
*
*****************************************************************************/
static void SAMPLE_SVP_NNIE_GetBlobMemSize(SVP_NNIE_NODE_S astNnieNode[], HI_U32 u32NodeNum,HI_U32 u32TotalStep, SVP_BLOB_S astBlob[], HI_U32 u32Align, HI_U32 *pu32TotalSize, HI_U32 au32BlobSize[])
{HI_U32 i = 0;HI_U32 u32Size = 0;HI_U32 u32Stride = 0;for (i = 0; i < u32NodeNum; i++){if (SVP_BLOB_TYPE_S32 == astNnieNode[i].enType || SVP_BLOB_TYPE_VEC_S32 == astNnieNode[i].enType ||SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType){u32Size = sizeof(HI_U32);}else{u32Size = sizeof(HI_U8);}if (SVP_BLOB_TYPE_SEQ_S32 == astNnieNode[i].enType){if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align){u32Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.u32Dim * u32Size);}else{u32Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.u32Dim * u32Size);}au32BlobSize[i] = u32TotalStep * u32Stride;}else{if (SAMPLE_SVP_NNIE_ALIGN_16 == u32Align){u32Stride = SAMPLE_SVP_NNIE_ALIGN16(astNnieNode[i].unShape.stWhc.u32Width * u32Size);}else{u32Stride = SAMPLE_SVP_NNIE_ALIGN32(astNnieNode[i].unShape.stWhc.u32Width * u32Size);}au32BlobSize[i] = astBlob[i].u32Num * u32Stride * astNnieNode[i].unShape.stWhc.u32Height *astNnieNode[i].unShape.stWhc.u32Chn;}*pu32TotalSize += au32BlobSize[i];astBlob[i].u32Stride = u32Stride;}
}
结构体SVP_NNIE_NODE_S:
typedef struct hiSVP_NNIE_NODE_S {SVP_BLOB_TYPE_E enType;union {struct {HI_U32 u32Width;HI_U32 u32Height;HI_U32 u32Chn;} stWhc;HI_U32 u32Dim;} unShape;HI_U32 u32NodeId;HI_CHAR szName[SVP_NNIE_NODE_NAME_LEN]; /* Report layer bottom name or data layer bottom name */
} SVP_NNIE_NODE_S;
函数SAMPLE_COMM_SVP_MallocCached
HI_S32 SAMPLE_COMM_SVP_MallocCached(HI_CHAR *pszMmb, HI_CHAR *pszZone, HI_U64 *pu64PhyAddr, HI_VOID **ppvVirAddr, HI_U32 u32Size)
{HI_S32 s32Ret = HI_SUCCESS;//在用户态分配MMZ内存,该内存支持cache缓存。//HI_S32 HI_MPI_SYS_MmzAlloc_Cached(HI_U64* pu64PhyAddr, HI_VOID** ppVirAddr, const HI_CHAR* pstrMmb, const HI_CHAR* pstrZone,HI_U32u32Len);//pu64PhyAddr 物理地址指针。输出//ppVirAddr 指向虚拟地址指针的指针。输出//pstrMmb Mmb 名称的字符串指针。输入//pstrZone MMZ zone 名称的字符串指针。输入//u32Len 内存块大小。输入//本接口与HI_MPI_SYS_MmzAlloc接口的区别:通过本接口分配的内存支持cache缓存,对于频繁使用的内存,最好使用本接口分配内存,这样可以提高cpu读写的效率,提升系统性能,如用户在使用ive算子时,就存在大量数据频繁读写,此时使用此接口来分配内存,就能很好的提高cpu 的效率。当 cpu访问此接口分配的内存时,会将内存中的数据放在cache 中,而硬件设备(如ive)只能访问物理内存,不能访问cache 的内容,对于这种cpu和硬件会共同操作的内存,需调用HI_MPI_SYS_MmzFlushCache做好数据同步s32Ret = HI_MPI_SYS_MmzAlloc_Cached(pu64PhyAddr, ppvVirAddr, pszMmb, pszZone, u32Size);return s32Ret;
}
函数SAMPLE_COMM_SVP_FlushCache
/*
*Flush cached
*/
HI_S32 SAMPLE_COMM_SVP_FlushCache(HI_U64 u64PhyAddr, HI_VOID *pvVirAddr, HI_U32 u32Size)
{HI_S32 s32Ret = HI_SUCCESS;//刷新 cache里的内容到内存并且使cache里的内容无效。//HI_S32 HI_MPI_SYS_MmzFlushCache(HI_U64 u64PhyAddr, HI_VOID* pVirAddr,HI_U32 u32Size);//u64PhyAddr 待操作数据的起始物理地址。输入//pVirAddr 待操作数据的起始虚拟地址指针。不能传NULL。输入//u32Size 待操作数据的大小。输入s32Ret = HI_MPI_SYS_MmzFlushCache(u64PhyAddr, pvVirAddr, u32Size);return s32Ret;
}
函数SAMPLE_SVP_NNIE_Yolov3_SoftwareInit
static HI_S32 SAMPLE_SVP_NNIE_Yolov3_SoftwareInit(SAMPLE_SVP_NNIE_CFG_S* pstCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftWareParam)
{HI_S32 s32Ret = HI_SUCCESS;HI_U32 u32ClassNum = 0;HI_U32 u32TotalSize = 0;HI_U32 u32DstRoiSize = 0;HI_U32 u32DstScoreSize = 0;HI_U32 u32ClassRoiNumSize = 0;HI_U32 u32TmpBufTotalSize = 0;HI_U64 u64PhyAddr = 0;HI_U8* pu8VirAddr = NULL;pstSoftWareParam->u32OriImHeight = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Height;pstSoftWareParam->u32OriImWidth = pstNnieParam->astSegData[0].astSrc[0].unShape.stWhc.u32Width;pstSoftWareParam->u32BboxNumEachGrid = 3;pstSoftWareParam->u32ClassNum = 80;pstSoftWareParam->au32GridNumHeight[0] = 13;pstSoftWareParam->au32GridNumHeight[1] = 26;pstSoftWareParam->au32GridNumHeight[2] = 52;pstSoftWareParam->au32GridNumWidth[0] = 13;pstSoftWareParam->au32GridNumWidth[1] = 26;pstSoftWareParam->au32GridNumWidth[2] = 52;pstSoftWareParam->u32NmsThresh = (HI_U32)(0.3f*SAMPLE_SVP_NNIE_QUANT_BASE);pstSoftWareParam->u32ConfThresh = (HI_U32)(0.5f*SAMPLE_SVP_NNIE_QUANT_BASE);pstSoftWareParam->u32MaxRoiNum = 10;pstSoftWareParam->af32Bias[0][0] = 116;pstSoftWareParam->af32Bias[0][1] = 90;pstSoftWareParam->af32Bias[0][2] = 156;pstSoftWareParam->af32Bias[0][3] = 198;pstSoftWareParam->af32Bias[0][4] = 373;pstSoftWareParam->af32Bias[0][5] = 326;pstSoftWareParam->af32Bias[1][0] = 30;pstSoftWareParam->af32Bias[1][1] = 61;pstSoftWareParam->af32Bias[1][2] = 62;pstSoftWareParam->af32Bias[1][3] = 45;pstSoftWareParam->af32Bias[1][4] = 59;pstSoftWareParam->af32Bias[1][5] = 119;pstSoftWareParam->af32Bias[2][0] = 10;pstSoftWareParam->af32Bias[2][1] = 13;pstSoftWareParam->af32Bias[2][2] = 16;pstSoftWareParam->af32Bias[2][3] = 30;pstSoftWareParam->af32Bias[2][4] = 33;pstSoftWareParam->af32Bias[2][5] = 23;/*Malloc assist buffer memory*/u32ClassNum = pstSoftWareParam->u32ClassNum+1;SAMPLE_SVP_CHECK_EXPR_RET(SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM != pstNnieParam->pstModel->astSeg[0].u16DstNum,HI_FAILURE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,pstNnieParam->pstModel->astSeg[0].u16DstNum(%d) should be %d!\n",pstNnieParam->pstModel->astSeg[0].u16DstNum,SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM);u32TmpBufTotalSize = SAMPLE_SVP_NNIE_Yolov3_GetResultTmpBuf(pstNnieParam,pstSoftWareParam);u32DstRoiSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32)*SAMPLE_SVP_NNIE_COORDI_NUM);u32DstScoreSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32));u32ClassRoiNumSize = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));u32TotalSize = u32TotalSize+u32DstRoiSize+u32DstScoreSize+u32ClassRoiNumSize+u32TmpBufTotalSize;s32Ret = SAMPLE_COMM_SVP_MallocCached("SAMPLE_YOLOV3_INIT",NULL,(HI_U64*)&u64PhyAddr,(void**)&pu8VirAddr,u32TotalSize);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Malloc memory failed!\n");memset(pu8VirAddr,0, u32TotalSize);SAMPLE_COMM_SVP_FlushCache(u64PhyAddr,(void*)pu8VirAddr,u32TotalSize);/*set each tmp buffer addr*/pstSoftWareParam->stGetResultTmpBuf.u64PhyAddr = u64PhyAddr;pstSoftWareParam->stGetResultTmpBuf.u64VirAddr = (HI_U64)(pu8VirAddr);/*set result blob*/pstSoftWareParam->stDstRoi.enType = SVP_BLOB_TYPE_S32;pstSoftWareParam->stDstRoi.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize;pstSoftWareParam->stDstRoi.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize);pstSoftWareParam->stDstRoi.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32)*SAMPLE_SVP_NNIE_COORDI_NUM);pstSoftWareParam->stDstRoi.u32Num = 1;pstSoftWareParam->stDstRoi.unShape.stWhc.u32Chn = 1;pstSoftWareParam->stDstRoi.unShape.stWhc.u32Height = 1;pstSoftWareParam->stDstRoi.unShape.stWhc.u32Width = u32ClassNum*pstSoftWareParam->u32MaxRoiNum*SAMPLE_SVP_NNIE_COORDI_NUM;pstSoftWareParam->stDstScore.enType = SVP_BLOB_TYPE_S32;pstSoftWareParam->stDstScore.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize+u32DstRoiSize;pstSoftWareParam->stDstScore.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize+u32DstRoiSize);pstSoftWareParam->stDstScore.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*pstSoftWareParam->u32MaxRoiNum*sizeof(HI_U32));pstSoftWareParam->stDstScore.u32Num = 1;pstSoftWareParam->stDstScore.unShape.stWhc.u32Chn = 1;pstSoftWareParam->stDstScore.unShape.stWhc.u32Height = 1;pstSoftWareParam->stDstScore.unShape.stWhc.u32Width = u32ClassNum*pstSoftWareParam->u32MaxRoiNum;pstSoftWareParam->stClassRoiNum.enType = SVP_BLOB_TYPE_S32;pstSoftWareParam->stClassRoiNum.u64PhyAddr = u64PhyAddr+u32TmpBufTotalSize+u32DstRoiSize+u32DstScoreSize;pstSoftWareParam->stClassRoiNum.u64VirAddr = (HI_U64)(pu8VirAddr+u32TmpBufTotalSize+u32DstRoiSize+u32DstScoreSize);pstSoftWareParam->stClassRoiNum.u32Stride = SAMPLE_SVP_NNIE_ALIGN16(u32ClassNum*sizeof(HI_U32));pstSoftWareParam->stClassRoiNum.u32Num = 1;pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Chn = 1;pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Height = 1;pstSoftWareParam->stClassRoiNum.unShape.stWhc.u32Width = u32ClassNum;return s32Ret;
}
主要涉及yolo3的一些个性化设置
结构体SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S
/*Yolov3 software parameter*/
typedef struct hiSAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S
{HI_U32 u32OriImHeight;HI_U32 u32OriImWidth;HI_U32 u32BboxNumEachGrid;HI_U32 u32ClassNum;HI_U32 au32GridNumHeight[3];HI_U32 au32GridNumWidth[3];HI_U32 u32NmsThresh;HI_U32 u32ConfThresh;HI_U32 u32MaxRoiNum;HI_FLOAT af32Bias[3][6];SVP_MEM_INFO_S stGetResultTmpBuf;SVP_DST_BLOB_S stClassRoiNum;SVP_DST_BLOB_S stDstRoi;SVP_DST_BLOB_S stDstScore;
} SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S;
– 部分6
/*Fill src data*/
stInputDataIdx.u32SegIdx = 0;
stInputDataIdx.u32NodeIdx = 0;
s32Ret = SAMPLE_SVP_NNIE_FillSrcData(&stNnieCfg,&s_stYolov3NnieParam,&stInputDataIdx);
static HI_S32 SAMPLE_SVP_NNIE_FillSrcData(SAMPLE_SVP_NNIE_CFG_S* pstNnieCfg,SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam, SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx)
{FILE* fp = NULL;HI_U32 i =0, j = 0, n = 0;HI_U32 u32Height = 0, u32Width = 0, u32Chn = 0, u32Stride = 0, u32Dim = 0;HI_U32 u32VarSize = 0;HI_S32 s32Ret = HI_SUCCESS;HI_U8*pu8PicAddr = NULL;HI_U32*pu32StepAddr = NULL;HI_U32 u32SegIdx = pstInputDataIdx->u32SegIdx;HI_U32 u32NodeIdx = pstInputDataIdx->u32NodeIdx;HI_U32 u32TotalStepNum = 0;/*open file*/if (NULL != pstNnieCfg->pszPic){fp = fopen(pstNnieCfg->pszPic,"rb");SAMPLE_SVP_CHECK_EXPR_RET(NULL == fp,HI_INVALID_VALUE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error, open file failed!\n");}/*get data size*/if(SVP_BLOB_TYPE_U8 <= pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType &&SVP_BLOB_TYPE_YVU422SP >= pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType){u32VarSize = sizeof(HI_U8);}else{u32VarSize = sizeof(HI_U32);}/*fill src data*/if(SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType){u32Dim = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stSeq.u32Dim;u32Stride = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride;pu32StepAddr = (HI_U32*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stSeq.u64VirAddrStep);pu8PicAddr = (HI_U8*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr);for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++){for(i = 0;i < *(pu32StepAddr+n); i++){s32Ret = fread(pu8PicAddr,u32Dim*u32VarSize,1,fp);SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");pu8PicAddr += u32Stride;}u32TotalStepNum += *(pu32StepAddr+n);}SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr,(HI_VOID *) pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr,u32TotalStepNum*u32Stride);}else{u32Height = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Height;u32Width = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Width;u32Chn = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].unShape.stWhc.u32Chn;u32Stride = pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Stride;pu8PicAddr = (HI_U8*)(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr);if(SVP_BLOB_TYPE_YVU420SP== pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType){for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++){for(i = 0; i < u32Chn*u32Height/2; i++){s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");pu8PicAddr += u32Stride;}}}else if(SVP_BLOB_TYPE_YVU422SP== pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].enType){for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++){for(i = 0; i < u32Height*2; i++){s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");pu8PicAddr += u32Stride;}}}else{for(n = 0; n < pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num; n++){for(i = 0;i < u32Chn; i++){for(j = 0; j < u32Height; j++){s32Ret = fread(pu8PicAddr,u32Width*u32VarSize,1,fp);SAMPLE_SVP_CHECK_EXPR_GOTO(1 != s32Ret,FAIL,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,Read image file failed!\n");pu8PicAddr += u32Stride;}}}}SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64PhyAddr,(HI_VOID *) pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u64VirAddr,pstNnieParam->astSegData[u32SegIdx].astSrc[u32NodeIdx].u32Num*u32Chn*u32Height*u32Stride);}fclose(fp);return HI_SUCCESS;
FAIL:fclose(fp);return HI_FAILURE;
}
主要完成了读取图片内容,根据pstInputDataIdx放入pstNnieParam里面。
– 部分7
stProcSegIdx.u32SegIdx = 0;
s32Ret = SAMPLE_SVP_NNIE_Forward(&s_stYolov3NnieParam,&stInputDataIdx,&stProcSegIdx,HI_TRUE);
tatic HI_S32 SAMPLE_SVP_NNIE_Forward(SAMPLE_SVP_NNIE_PARAM_S *pstNnieParam,SAMPLE_SVP_NNIE_INPUT_DATA_INDEX_S* pstInputDataIdx,SAMPLE_SVP_NNIE_PROCESS_SEG_INDEX_S* pstProcSegIdx,HI_BOOL bInstant)
{HI_S32 s32Ret = HI_SUCCESS;HI_U32 i = 0, j = 0;HI_BOOL bFinish = HI_FALSE;SVP_NNIE_HANDLE hSvpNnieHandle = 0;HI_U32 u32TotalStepNum = 0;SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64PhyAddr,(HI_VOID *) pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u64VirAddr,pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].stTskBuf.u32Size);/*set input blob according to node name*/if(pstInputDataIdx->u32SegIdx != pstProcSegIdx->u32SegIdx){for(i = 0; i < pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].u16SrcNum; i++){for(j = 0; j < pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum; j++){if(0 == strncmp(pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].astDstNode[j].szName,pstNnieParam->pstModel->astSeg[pstProcSegIdx->u32SegIdx].astSrcNode[i].szName,SVP_NNIE_NODE_NAME_LEN)){pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc[i] =pstNnieParam->astSegData[pstInputDataIdx->u32SegIdx].astDst[j];break;}}SAMPLE_SVP_CHECK_EXPR_RET((j == pstNnieParam->pstModel->astSeg[pstInputDataIdx->u32SegIdx].u16DstNum),HI_FAILURE,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,can't find %d-th seg's %d-th src blob!\n",pstProcSegIdx->u32SegIdx,i);}}/*NNIE_Forward*///多节点输入输出的CNN类型网络预测//HI_S32 HI_MPI_SVP_NNIE_Forward(SVP_NNIE_HANDLE *phSvpNnieHandle, const SVP_SRC_BLOB_S astSrc[],const SVP_NNIE_MODEL_S *pstModel, const SVP_DST_BLOB_S astDst[],const SVP_NNIE_FORWARD_CTRL_S *pstForwardCtrl,HI_BOOL bInstant);//phSvpNnieHandle handle指针。输出//astSrc[] 多个节点输入,节点的顺序跟网络描述中的顺序要求一致,支持多帧同时输入。输入//pstModel 网络模型结构体。输入//astDst[] 网络段的多个节点输出,包含用户标记需要上报输出的中间层结果,以及网络段的最终结果。输出//pstForwardCtrl 控制结构体。输入//bInstant 及时返回结果标志。输入
s32Ret = HI_MPI_SVP_NNIE_Forward(&hSvpNnieHandle,pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astSrc,pstNnieParam->pstModel, pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst,&pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx], bInstant);SAMPLE_SVP_CHECK_EXPR_RET(HI_SUCCESS != s32Ret,s32Ret,SAMPLE_SVP_ERR_LEVEL_ERROR,"Error,HI_MPI_SVP_NNIE_Forward failed!\n");if(bInstant){/*Wait NNIE finish*///查询任务是否完成。//HI_S32 HI_MPI_SVP_NNIE_Query(SVP_NNIE_ID_E enNnieId,SVP_NNIE_HANDLE svpNnieHandle,HI_BOOL *pbFinish,HI_BOOL bBlock);//enNnieId 任务所运行的NNIE 核指示标志输入//svpNnieHandle handle。输入//pbFinish 是否完成标志。输出//bBlock 是否阻塞查询。输入while(HI_ERR_SVP_NNIE_QUERY_TIMEOUT == (s32Ret = HI_MPI_SVP_NNIE_Query(pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].enNnieId,hSvpNnieHandle, &bFinish, HI_TRUE))){usleep(100);SAMPLE_SVP_TRACE(SAMPLE_SVP_ERR_LEVEL_INFO,"HI_MPI_SVP_NNIE_Query Query timeout!\n");}}bFinish = HI_FALSE;for(i = 0; i < pstNnieParam->astForwardCtrl[pstProcSegIdx->u32SegIdx].u32DstNum; i++){if(SVP_BLOB_TYPE_SEQ_S32 == pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].enType){for(j = 0; j < pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num; j++){u32TotalStepNum += *((HI_U32*)(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stSeq.u64VirAddrStep)+j);}SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,(HI_VOID *) pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr,u32TotalStepNum*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);}else{SAMPLE_COMM_SVP_FlushCache(pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64PhyAddr,(HI_VOID *) pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u64VirAddr,pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Num*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Chn*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].unShape.stWhc.u32Height*pstNnieParam->astSegData[pstProcSegIdx->u32SegIdx].astDst[i].u32Stride);}}return s32Ret;
}
主要进行模型推理,并把它填充进去
– 部分8
s32Ret = SAMPLE_SVP_NNIE_Yolov3_GetResult(&s_stYolov3NnieParam,&s_stYolov3SoftwareParam);
*****************************************************************************
* Prototype : SAMPLE_SVP_NNIE_Yolov3_GetResult
* Description : this function is used to Get Yolov3 result
* Input : SAMPLE_SVP_NNIE_PARAM_S* pstNnieParam [IN] the pointer to YOLOV3 NNIE parameter
* SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam [IN] the pointer to YOLOV3 software parameter
*
*
*
*
* Output :
* Return Value : HI_SUCCESS: Success;Error codes: Failure.
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-10
* Author :
* Modification : Create
*
*****************************************************************************/
HI_S32 SAMPLE_SVP_NNIE_Yolov3_GetResult(SAMPLE_SVP_NNIE_PARAM_S*pstNnieParam,SAMPLE_SVP_NNIE_YOLOV3_SOFTWARE_PARAM_S* pstSoftwareParam)
{HI_U32 i = 0;HI_S32 *aps32InputBlob[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};HI_U32 au32Stride[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM] = {0};for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++){aps32InputBlob[i] = (HI_S32*)pstNnieParam->astSegData[0].astDst[i].u64VirAddr;au32Stride[i] = pstNnieParam->astSegData[0].astDst[i].u32Stride;}return SVP_NNIE_Yolov3_GetResult(aps32InputBlob,pstSoftwareParam->au32GridNumWidth,pstSoftwareParam->au32GridNumHeight,au32Stride,pstSoftwareParam->u32BboxNumEachGrid,pstSoftwareParam->u32ClassNum,pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32OriImWidth,pstSoftwareParam->u32MaxRoiNum,pstSoftwareParam->u32NmsThresh,pstSoftwareParam->u32ConfThresh,pstSoftwareParam->af32Bias,(HI_S32*)pstSoftwareParam->stGetResultTmpBuf.u64VirAddr,(HI_S32*)pstSoftwareParam->stDstScore.u64VirAddr,(HI_S32*)pstSoftwareParam->stDstRoi.u64VirAddr,(HI_S32*)pstSoftwareParam->stClassRoiNum.u64VirAddr);
}
主要完成aps32InputBlob和au32Stride的填充后调用SVP_NNIE_Yolov3_GetResult
/*****************************************************************************
* Prototype : SVP_NNIE_Yolov3_GetResult
* Description : Yolov3 GetResult function
* Input : HI_S32 **pps32InputData [IN] pointer to the input data
* HI_U32 au32GridNumWidth[] [IN] Grid num in width direction
* HI_U32 au32GridNumHeight[] [IN] Grid num in height direction
* HI_U32 au32Stride[] [IN] stride of input data
* HI_U32 u32EachGridBbox [IN] Bbox num of each gird
* HI_U32 u32ClassNum [IN] class num
* HI_U32 u32SrcWidth [IN] input image width
* HI_U32 u32SrcHeight [IN] input image height
* HI_U32 u32MaxRoiNum [IN] Max output roi num
* HI_U32 u32NmsThresh [IN] NMS thresh
* HI_U32 u32ConfThresh [IN] conf thresh
* HI_U32 af32Bias[][] [IN] bias
* HI_U32* pu32TmpBuf [IN] assist buffer
* HI_S32 *ps32DstScores [OUT] dst score
* HI_S32 *ps32DstRoi [OUT] dst roi
* HI_S32 *ps32ClassRoiNum [OUT] class roi num
*
* Output :
* Return Value : HI_FLOAT: max score value.
* Spec :
* Calls :
* Called By :
* History:
*
* 1. Date : 2017-11-14
* Author :
* Modification : Create
*
*****************************************************************************/
static HI_S32 SVP_NNIE_Yolov3_GetResult(HI_S32 **pps32InputData,HI_U32 au32GridNumWidth[],HI_U32 au32GridNumHeight[],HI_U32 au32Stride[],HI_U32 u32EachGridBbox,HI_U32 u32ClassNum,HI_U32 u32SrcWidth,HI_U32 u32SrcHeight,HI_U32 u32MaxRoiNum,HI_U32 u32NmsThresh,HI_U32 u32ConfThresh,HI_FLOAT af32Bias[SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM][SAMPLE_SVP_NNIE_YOLOV3_EACH_GRID_BIAS_NUM],HI_S32* ps32TmpBuf,HI_S32 *ps32DstScore, HI_S32 *ps32DstRoi, HI_S32 *ps32ClassRoiNum)
{HI_S32 *ps32InputBlob = NULL;HI_FLOAT *pf32Permute = NULL;SAMPLE_SVP_NNIE_YOLOV3_BBOX_S *pstBbox = NULL;HI_S32 *ps32AssistBuf = NULL;HI_U32 u32TotalBboxNum = 0;HI_U32 u32ChnOffset = 0;HI_U32 u32HeightOffset = 0;HI_U32 u32BboxNum = 0;HI_U32 u32GridXIdx;HI_U32 u32GridYIdx;HI_U32 u32Offset;HI_FLOAT f32StartX;HI_FLOAT f32StartY;HI_FLOAT f32Width;HI_FLOAT f32Height;HI_FLOAT f32ObjScore;HI_U32 u32MaxValueIndex = 0;HI_FLOAT f32MaxScore;HI_S32 s32ClassScore;HI_U32 u32ClassRoiNum;HI_U32 i = 0, j = 0, k = 0, c = 0, h = 0, w = 0;HI_U32 u32BlobSize = 0;HI_U32 u32MaxBlobSize = 0;//获得最大blob sizefor(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++){u32BlobSize = au32GridNumWidth[i]*au32GridNumHeight[i]*sizeof(HI_U32)*SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox;if(u32MaxBlobSize < u32BlobSize){u32MaxBlobSize = u32BlobSize;}}//获得所有bbox数量for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++){u32TotalBboxNum += au32GridNumWidth[i]*au32GridNumHeight[i]*u32EachGridBbox;}//get each tmpbuf addrpf32Permute = (HI_FLOAT*)ps32TmpBuf;pstBbox = (SAMPLE_SVP_NNIE_YOLOV3_BBOX_S*)(pf32Permute+u32MaxBlobSize/sizeof(HI_S32));ps32AssistBuf = (HI_S32*)(pstBbox+u32TotalBboxNum);for(i = 0; i < SAMPLE_SVP_NNIE_YOLOV3_REPORT_BLOB_NUM; i++){//permuteu32Offset = 0;ps32InputBlob = pps32InputData[i];u32ChnOffset = au32GridNumHeight[i]*au32Stride[i]/sizeof(HI_S32);u32HeightOffset = au32Stride[i]/sizeof(HI_S32);for (h = 0; h < au32GridNumHeight[i]; h++){for (w = 0; w < au32GridNumWidth[i]; w++){for (c = 0; c < SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM*u32EachGridBbox; c++){pf32Permute[u32Offset++] = (HI_FLOAT)(ps32InputBlob[c*u32ChnOffset+h*u32HeightOffset+w]) / SAMPLE_SVP_NNIE_QUANT_BASE;}}}//decode bbox and calculate scorefor(j = 0; j < au32GridNumWidth[i]*au32GridNumHeight[i]; j++){u32GridXIdx = j % au32GridNumWidth[i];u32GridYIdx = j / au32GridNumWidth[i];for (k = 0; k < u32EachGridBbox; k++){u32MaxValueIndex = 0;u32Offset = (j * u32EachGridBbox + k) * SAMPLE_SVP_NNIE_YOLOV3_EACH_BBOX_INFER_RESULT_NUM;//decode bboxf32StartX = ((HI_FLOAT)u32GridXIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 0])) / au32GridNumWidth[i];f32StartY = ((HI_FLOAT)u32GridYIdx + SAMPLE_SVP_NNIE_SIGMOID(pf32Permute[u32Offset + 1])) / au32GridNumHeight[i];f32Width = (HI_FLOAT)(exp(pf32Permute[u32Offset + 2]) * af32Bias[i][2*k]) / u32SrcWidth;f32Height = (HI_FLOAT)(exp(pf32Permute[u32Offset + 3]) * af32Bias[i][2*k + 1]) / u32SrcHeight;//calculate score(void)SVP_NNIE_Sigmoid(&pf32Permute[u32Offset + 4], (u32ClassNum+1));f32ObjScore = pf32Permute[u32Offset + 4];f32MaxScore = SVP_NNIE_GetMaxVal(&pf32Permute[u32Offset + 5], u32ClassNum, &u32MaxValueIndex);s32ClassScore = (HI_S32)(f32MaxScore * f32ObjScore*SAMPLE_SVP_NNIE_QUANT_BASE);//filter low score roiif (s32ClassScore > u32ConfThresh){pstBbox[u32BboxNum].f32Xmin= (HI_FLOAT)(f32StartX - f32Width * 0.5f);pstBbox[u32BboxNum].f32Ymin= (HI_FLOAT)(f32StartY - f32Height * 0.5f);pstBbox[u32BboxNum].f32Xmax= (HI_FLOAT)(f32StartX + f32Width * 0.5f);pstBbox[u32BboxNum].f32Ymax= (HI_FLOAT)(f32StartY + f32Height * 0.5f);pstBbox[u32BboxNum].s32ClsScore = s32ClassScore;pstBbox[u32BboxNum].u32Mask= 0;pstBbox[u32BboxNum].u32ClassIdx = (HI_S32)(u32MaxValueIndex+1);u32BboxNum++;}}}}//quick sort(void)SVP_NNIE_Yolo_NonRecursiveArgQuickSort((HI_S32*)pstBbox, 0, u32BboxNum - 1,sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32),4,(SAMPLE_SVP_NNIE_STACK_S*)ps32AssistBuf);//Yolov3 and Yolov2 have the same Nms operation(void)SVP_NNIE_Yolov2_NonMaxSuppression(pstBbox, u32BboxNum, u32NmsThresh, sizeof(SAMPLE_SVP_NNIE_YOLOV3_BBOX_S)/sizeof(HI_U32));//Get resultfor (i = 1; i < u32ClassNum; i++){u32ClassRoiNum = 0;for(j = 0; j < u32BboxNum; j++){if ((0 == pstBbox[j].u32Mask) && (i == pstBbox[j].u32ClassIdx) && (u32ClassRoiNum < u32MaxRoiNum)){*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Xmin*u32SrcWidth), 0);*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MAX((HI_S32)(pstBbox[j].f32Ymin*u32SrcHeight), 0);*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Xmax*u32SrcWidth), u32SrcWidth);*(ps32DstRoi++) = SAMPLE_SVP_NNIE_MIN((HI_S32)(pstBbox[j].f32Ymax*u32SrcHeight), u32SrcHeight);*(ps32DstScore++) = pstBbox[j].s32ClsScore;u32ClassRoiNum++;}}*(ps32ClassRoiNum+i) = u32ClassRoiNum;}return HI_SUCCESS;
}
结构体SAMPLE_SVP_NNIE_YOLOV3_BBOX_S:
typedef SAMPLE_SVP_NNIE_YOLOV2_BBOX_S SAMPLE_SVP_NNIE_YOLOV3_BBOX_S;
typedef struct hiSAMPLE_SVP_NNIE_YOLOV2_BBOX
{HI_FLOAT f32Xmin;HI_FLOAT f32Xmax;HI_FLOAT f32Ymin;HI_FLOAT f32Ymax;HI_S32 s32ClsScore;HI_U32 u32ClassIdx;HI_U32 u32Mask;
}SAMPLE_SVP_NNIE_YOLOV2_BBOX_S;
– 部分9
(void)SAMPLE_SVP_NNIE_Detection_PrintResult(&s_stYolov3SoftwareParam.stDstScore,&s_stYolov3SoftwareParam.stDstRoi, &s_stYolov3SoftwareParam.stClassRoiNum,f32PrintResultThresh);
主要完成打印结果
static HI_S32 SAMPLE_SVP_NNIE_Detection_PrintResult(SVP_BLOB_S *pstDstScore,SVP_BLOB_S *pstDstRoi, SVP_BLOB_S *pstClassRoiNum, HI_FLOAT f32PrintResultThresh)
{HI_U32 i = 0, j = 0;HI_U32 u32RoiNumBias = 0;HI_U32 u32ScoreBias = 0;HI_U32 u32BboxBias = 0;HI_FLOAT f32Score = 0.0f;HI_S32* ps32Score = (HI_S32*)pstDstScore->u64VirAddr;HI_S32* ps32Roi = (HI_S32*)pstDstRoi->u64VirAddr;HI_S32* ps32ClassRoiNum = (HI_S32*)pstClassRoiNum->u64VirAddr;HI_U32 u32ClassNum = pstClassRoiNum->unShape.stWhc.u32Width;HI_S32 s32XMin = 0,s32YMin= 0,s32XMax = 0,s32YMax = 0;u32RoiNumBias += ps32ClassRoiNum[0];for (i = 1; i < u32ClassNum; i++){u32ScoreBias = u32RoiNumBias;u32BboxBias = u32RoiNumBias * SAMPLE_SVP_NNIE_COORDI_NUM;/*if the confidence score greater than result threshold, the result will be printed*/if((HI_FLOAT)ps32Score[u32ScoreBias] / SAMPLE_SVP_NNIE_QUANT_BASE >=f32PrintResultThresh && ps32ClassRoiNum[i]!=0){SAMPLE_SVP_TRACE_INFO("==== The %dth class box info====\n", i);}for (j = 0; j < (HI_U32)ps32ClassRoiNum[i]; j++){f32Score = (HI_FLOAT)ps32Score[u32ScoreBias + j] / SAMPLE_SVP_NNIE_QUANT_BASE;if (f32Score < f32PrintResultThresh){break;}s32XMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM];s32YMin = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 1];s32XMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 2];s32YMax = ps32Roi[u32BboxBias + j*SAMPLE_SVP_NNIE_COORDI_NUM + 3];SAMPLE_SVP_TRACE_INFO("%d %d %d %d %f\n", s32XMin, s32YMin, s32XMax, s32YMax, f32Score);}u32RoiNumBias += ps32ClassRoiNum[i];}return HI_SUCCESS;
}