x265探索与研究(九):compressFrame()函数

x265探索与研究(九):compressFrame()函数

 

        compressFrame()函数是一个功能繁杂且分析难度较大的函数,主要包括时间戳的初始化工作、access unit的设计、加权预测技术、运动参考帧的估计、当前SliceQP值确定、熵编码相关信息配置、并行计算与否及其空间的申请、SEI相关配置、线程控制、CTU分析、Multi-pass Encoding、滤波与去噪处理等等,其中最重要的就是调用了encodeSlice()函数。

 

        compressFrame()函数中调用的主要函数如下图所示:

 

 x265探索与研究(九):compressFrame()函数

 

下面给出compressFrame()函数的代码分析:

 

/*=============================================================*/
/*
 ====== Analysed by: RuiDong Fang 
 ====== Csdn Blog:	 http://blog.csdn.net/frd2009041510 
 ====== Date:		 2016.04.18
 ====== Funtion:	 compressFrame()函数,编码一帧。
 */
/*=============================================================*/
void FrameEncoder::compressFrame()
{
    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////1、初始化一些变量
	ProfileScopeEvent(frameThread);	//帧线程的档次范围

    m_startCompressTime = x265_mdate();	//编码的开始时间戳(timestamp时间戳 when frame encoder is given a frame)
    m_totalActiveWorkerCount = 0;	//统计m_activeWorkerCount的和,即经过CTU压缩后统计,进入帧前初始化为0(sum of m_activeWorkerCount sampled at end of each CTU)
    m_activeWorkerCountSamples = 0;	//当前帧已经编码分析完毕的CTU个数(count of times m_activeWorkerCount was sampled (think vbv restarts))
    m_totalWorkerElapsedTime = 0;	//初始化所有CTU编码滤波占用的时间(total elapsed time spent by worker threads processing CTUs)
    m_totalNoWorkerTime = 0;	//初始化当前帧编码占用的时间(total elapsed time without any active worker threads)
    m_countRowBlocks = 0;	//正在运行的CTU行因为上一行没有完成完毕而强制退出的个数,在帧编码前初始化为0(count of workers forced to abandon a row because of top dependency)
    m_allRowsAvailableTime = 0;	//初始化当前帧所有CTU行准备好的时间点(timestamp when all reference dependencies are resolved)
    m_stallStartTime = 0;	//初始化正在进行编码的rows个数为0时的时间点(timestamp when worker count becomes 0)

    m_completionCount = 0;
    m_bAllRowsStop = false;	//是否将所有CTU的编码停止,在每帧进入前初始化为false,在CTU编码决策中需要重新编码时将置为true
    m_vbvResetTriggerRow = -1;	//需要重新编码的CTU行号,每帧开始编码前初始化为-1

    m_SSDY = m_SSDU = m_SSDV = 0;
    m_ssim = 0;
    m_ssimCnt = 0;
    memset(&(m_frame->m_encData->m_frameStats), 0, sizeof(m_frame->m_encData->m_frameStats));	//将当前帧的统计信息初始化为0

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////2、存取单元access unit
    /* Emit access unit delimiter unless this is the first frame and the user is
     * not repeating headers (since AUD is supposed to be the first NAL in the access
     * unit) */
	//当是第一帧时,需要给出一个定界符
    Slice* slice = m_frame->m_encData->m_slice;	//获取当前slice
    if (m_param->bEnableAccessUnitDelimiters && (m_frame->m_poc || m_param->bRepeatHeaders))
    {
        m_bs.resetBits();
        m_entropyCoder.setBitstream(&m_bs);
        m_entropyCoder.codeAUD(*slice);
        m_bs.writeByteAlignment();
        m_nalList.serialize(NAL_UNIT_ACCESS_UNIT_DELIMITER, m_bs);
    }
    if (m_frame->m_lowres.bKeyframe && m_param->bRepeatHeaders)
        m_top->getStreamHeaders(m_nalList, m_entropyCoder, m_bs);	//====================get stream headers

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////3、加权预测
    // Weighted Prediction parameters estimation.
    bool bUseWeightP = slice->m_sliceType == P_SLICE && slice->m_pps->bUseWeightPred;	//P帧加权预测标志位
    bool bUseWeightB = slice->m_sliceType == B_SLICE && slice->m_pps->bUseWeightedBiPred;	//B帧加权预测标志位
    
	//加权预测的参数估计
	if (bUseWeightP || bUseWeightB)	//使能加权预测
    {
#if DETAILED_CU_STATS
        m_cuStats.countWeightAnalyze++;
        ScopedElapsedTime time(m_cuStats.weightAnalyzeTime);
#endif
        WeightAnalysis wa(*this);	//用于多线程加权分析
        if (m_pool && wa.tryBondPeers(*this, 1))	//从当前job中拥有核并且sleep状态的核可以触发多线程,如果没有可用核则在当前线程中完成进入else
            /* use an idle worker for weight analysis */
            wa.waitForExit();	//一直等待到任务全部完成,这里等待的是核释放,内核释放了任务也就完成了
        else
            weightAnalyse(*slice, *m_frame, *m_param);	//====================加权预测分析(每个list的第一帧分析加权与否,其它不加权)
    }
    else
        slice->disableWeights();	//不使能加权预测

    ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////4、运动参考,配置参考帧信息
	// Generate motion references
    int numPredDir = slice->isInterP() ? 1 : slice->isInterB() ? 2 : 0;	//预测方向的数量,若为P帧则数量为1,若为B帧则数量为2,否则为0,即获取当前有几个list
    for (int l = 0; l < numPredDir; l++)	//根据预测方向的数量遍历list的个数
    {
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)	//遍历当前list的所有参考帧
        {
            WeightParam *w = NULL;
            if ((bUseWeightP || bUseWeightB) && slice->m_weightPredTable[l][ref][0].bPresentFlag)	//如果是P帧或B帧,并且bPresentFlag为1
                w = slice->m_weightPredTable[l][ref];	//获取加权参数
            m_mref[l][ref].init(slice->m_refPicList[l][ref]->m_reconPic, w, *m_param);	//获取参考帧信息,申请加权帧内存
        }
    }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////5、根据码率控制确定QP的大小
    /* Get the QP for this frame from rate control. This call may block until
     * frames ahead of it in encode order have called rateControlEnd() */
    int qp = m_top->m_rateControl->rateControlStart(m_frame, &m_rce, m_top);	//====================rateControlStart()函数,即码率控制开始
    m_rce.newQp = qp;	//获取当前估计的量化参数

    /* Clip slice QP to 0-51 spec range before encoding */
    slice->m_sliceQp = x265_clip3(-QP_BD_OFFSET, QP_MAX_SPEC, qp);	//编码前,根据Offset确定/修正当前Slice的具体QP,取值必须处在0~51内

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////6、熵编码前的一些准备
    m_initSliceContext.resetEntropy(*slice);	//====================重置熵编码相关信息

    m_frameFilter.start(m_frame, m_initSliceContext, qp);	//====================滤波,在后面的调用中若bEnableSAO使能,则SAO

    /* ensure all rows are blocked prior to initializing row CTU counters */
    WaveFront::clearEnabledRowMask();	//将当前WPPmap全部初始化为不可执行

    /* reset entropy coders */
    m_entropyCoder.load(m_initSliceContext);
    for (uint32_t i = 0; i < m_numRows; i++)
        m_rows[i].init(m_initSliceContext);
	
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////7、并行与否,申请空间
    uint32_t numSubstreams = m_param->bEnableWavefront ? slice->m_sps->numCuInHeight : 1;	//一帧并行的流数(CTU行数)
    //根据并行的流数申请相关空间
	if (!m_outStreams)	
    {
        m_outStreams = new Bitstream[numSubstreams];	//申请空间
        m_substreamSizes = X265_MALLOC(uint32_t, numSubstreams);	//申请空间
        if (!m_param->bEnableSAO)	//若没有使能SAO
            for (uint32_t i = 0; i < numSubstreams; i++)
                m_rows[i].rowGoOnCoder.setBitstream(&m_outStreams[i]);
    }
    else
        for (uint32_t i = 0; i < numSubstreams; i++)
            m_outStreams[i].resetBits();

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////8、SEI(Supplemental Enhancement Information)相关配置
    int prevBPSEI = m_rce.encodeOrder ? m_top->m_lastBPSEI : 0;

    if (m_frame->m_lowres.bKeyframe)
    {
        if (m_param->bEmitHRDSEI)
        {
            SEIBufferingPeriod* bpSei = &m_top->m_rateControl->m_bufPeriodSEI;

            // since the temporal layer HRD is not ready, we assumed it is fixed
            bpSei->m_auCpbRemovalDelayDelta = 1;
            bpSei->m_cpbDelayOffset = 0;
            bpSei->m_dpbDelayOffset = 0;

            // hrdFullness() calculates the initial CPB removal delay and offset
            m_top->m_rateControl->hrdFullness(bpSei);

            m_bs.resetBits();
            bpSei->write(m_bs, *slice->m_sps);
            m_bs.writeByteAlignment();

            m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);

            m_top->m_lastBPSEI = m_rce.encodeOrder;
        }
    }

    if (m_param->bEmitHRDSEI || !!m_param->interlaceMode)
    {
        SEIPictureTiming *sei = m_rce.picTimingSEI;
        const VUI *vui = &slice->m_sps->vuiParameters;
        const HRDInfo *hrd = &vui->hrdParameters;
        int poc = slice->m_poc;

        if (vui->frameFieldInfoPresentFlag)
        {
            if (m_param->interlaceMode == 2)
                sei->m_picStruct = (poc & 1) ? 1 /* top */ : 2 /* bottom */;
            else if (m_param->interlaceMode == 1)
                sei->m_picStruct = (poc & 1) ? 2 /* bottom */ : 1 /* top */;
            else
                sei->m_picStruct = 0;
            sei->m_sourceScanType = 0;
            sei->m_duplicateFlag = false;
        }

        if (vui->hrdParametersPresentFlag)
        {
            // The m_aucpbremoval delay specifies how many clock ticks the
            // access unit associated with the picture timing SEI message has to
            // wait after removal of the access unit with the most recent
            // buffering period SEI message
            sei->m_auCpbRemovalDelay = X265_MIN(X265_MAX(1, m_rce.encodeOrder - prevBPSEI), (1 << hrd->cpbRemovalDelayLength));
            sei->m_picDpbOutputDelay = slice->m_sps->numReorderPics + poc - m_rce.encodeOrder;
        }

        m_bs.resetBits();
        sei->write(m_bs, *slice->m_sps);
        m_bs.writeByteAlignment();
        m_nalList.serialize(NAL_UNIT_PREFIX_SEI, m_bs);
    }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////9、配合RC的线程控制
    /* CQP and CRF (without capped VBV) doesn't use mid-frame statistics to 
     * tune RateControl parameters for other frames.
     * Hence, for these modes, update m_startEndOrder and unlock RC for previous threads waiting in
     * RateControlEnd here, after the slicecontexts are initialized. For the rest - ABR
     * and VBV, unlock only after rateControlUpdateStats of this frame is called */
	//因为m_startEndOrder在rateControlUpdateStats中只对ABR或者VBV模式更新,在此更新为了配合RC的线程控制
    if (m_param->rc.rateControlMode != X265_RC_ABR && !m_top->m_rateControl->m_isVbv)
    {
        m_top->m_rateControl->m_startEndOrder.incr();	//更新计数

        if (m_rce.encodeOrder < m_param->frameNumThreads - 1)	//刚启动时多更新一次
            m_top->m_rateControl->m_startEndOrder.incr(); // faked rateControlEnd calls for negative frames
    }

	//================================================================================================================大部分的计算处理都在此处之下
	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////10、CTU分析(分两种情况:WPP使能、WPP不使能)
    /* Analyze CTU rows, most of the hard work is done here.  Frame is
     * compressed in a wave-front pattern if WPP is enabled. Row based loop
     * filters runs behind the CTU compression and reconstruction */

    m_rows[0].active = true;	//触发第一个CTU行
    if (m_param->bEnableWavefront)	//如果WPP使能
    {
        for (uint32_t row = 0; row < m_numRows; row++)	//遍历所有CTU行
        {
            // block until all reference frames have reconstructed the rows we need
            for (int l = 0; l < numPredDir; l++)	//当前list的个数
            {
                for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)	//当前list 中ref的个数
                {
                    Frame *refpic = slice->m_refPicList[l][ref];	//获取参考帧

                    uint32_t reconRowCount = refpic->m_reconRowCount.get();
                    while ((reconRowCount != m_numRows) && (reconRowCount < row + m_refLagRows))
                        reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);

                    if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
                        m_mref[l][ref].applyWeight(row + m_refLagRows, m_numRows);
                }
            }

			//当前外部参考块(如参考帧对应的参考块)准备完毕,将当前row对应位置的map置为1,标记可以执行
            enableRowEncoder(row); //===========================/* clear external dependency for this row */
            if (!row)	//如果是第一行
            {
                m_row0WaitTime = x265_mdate();	//获取当前帧开始编码的的时间点,用于计算当前帧的编码时间
                
				//当前内部参考块准备完毕,将当前row对应位置的map置为1,标记可以执行
				enqueueRowEncoder(0); //===========================/* clear internal dependency, start wavefront */
            }
            tryWakeOne();	//CTU行准备好并触发wpp, 在findjob中运行
        }

        m_allRowsAvailableTime = x265_mdate();	//当前帧所有CTU行准备好的时间点
        tryWakeOne(); //多触发一次/* ensure one thread is active or help-wanted flag is set prior to blocking */
        static const int block_ms = 250;	//超时时间
        //每250ms触发一次,保证全部CTU行都能够执行(如果m_completionEvent在某一位置触发,则会造成不超时,循环退出)
		while (m_completionEvent.timedWait(block_ms))
            tryWakeOne();	//触发
    }
    else	//如果WPP不使能
    {
        for (uint32_t i = 0; i < m_numRows + m_filterRowDelay; i++)
        {
            // compress
            if (i < m_numRows)
            {
                // block until all reference frames have reconstructed the rows we need
                for (int l = 0; l < numPredDir; l++)
                {
                    int list = l;
                    for (int ref = 0; ref < slice->m_numRefIdx[list]; ref++)
                    {
                        Frame *refpic = slice->m_refPicList[list][ref];	//获取参考帧

                        uint32_t reconRowCount = refpic->m_reconRowCount.get();
                        while ((reconRowCount != m_numRows) && (reconRowCount < i + m_refLagRows))
                            reconRowCount = refpic->m_reconRowCount.waitForChange(reconRowCount);

                        if ((bUseWeightP || bUseWeightB) && m_mref[l][ref].isWeighted)
                            m_mref[list][ref].applyWeight(i + m_refLagRows, m_numRows);
                    }
                }

                if (!i)
                    m_row0WaitTime = x265_mdate();
                else if (i == m_numRows - 1)
                    m_allRowsAvailableTime = x265_mdate();
                processRowEncoder(i, m_tld[m_localTldIdx]);	//===========================Called by worker threads
            }

            // filter
            if (i >= m_filterRowDelay)
                m_frameFilter.processRow(i - m_filterRowDelay);	//===========================滤波每一行(Row based loop filters)
        }
    }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////11、Multi-pass encoding
    if (m_param->rc.bStatWrite)	//Multi-pass encoding(Enable writing the stats in a multi-pass encode to the stat output file)
    {
        int totalI = 0, totalP = 0, totalSkip = 0;

        // accumulate intra,inter,skip cu count per frame for 2 pass
        for (uint32_t i = 0; i < m_numRows; i++)
        {
            m_frame->m_encData->m_frameStats.mvBits    += m_rows[i].rowStats.mvBits;
            m_frame->m_encData->m_frameStats.coeffBits += m_rows[i].rowStats.coeffBits;
            m_frame->m_encData->m_frameStats.miscBits  += m_rows[i].rowStats.miscBits;
            totalI                                     += m_rows[i].rowStats.intra8x8Cnt;
            totalP                                     += m_rows[i].rowStats.inter8x8Cnt;
            totalSkip                                  += m_rows[i].rowStats.skip8x8Cnt;
        }
        int totalCuCount = totalI + totalP + totalSkip;	//总CU数目
        m_frame->m_encData->m_frameStats.percent8x8Intra = (double)totalI / totalCuCount;
        m_frame->m_encData->m_frameStats.percent8x8Inter = (double)totalP / totalCuCount;
        m_frame->m_encData->m_frameStats.percent8x8Skip  = (double)totalSkip / totalCuCount;
    }
    for (uint32_t i = 0; i < m_numRows; i++)
    {
        m_frame->m_encData->m_frameStats.cntIntraNxN      += m_rows[i].rowStats.cntIntraNxN;
        m_frame->m_encData->m_frameStats.totalCu          += m_rows[i].rowStats.totalCu;
        m_frame->m_encData->m_frameStats.totalCtu         += m_rows[i].rowStats.totalCtu;
        m_frame->m_encData->m_frameStats.lumaDistortion   += m_rows[i].rowStats.lumaDistortion;
        m_frame->m_encData->m_frameStats.chromaDistortion += m_rows[i].rowStats.chromaDistortion;
        m_frame->m_encData->m_frameStats.psyEnergy        += m_rows[i].rowStats.psyEnergy;
        m_frame->m_encData->m_frameStats.lumaLevel        += m_rows[i].rowStats.lumaLevel;

        if (m_rows[i].rowStats.maxLumaLevel > m_frame->m_encData->m_frameStats.maxLumaLevel)
            m_frame->m_encData->m_frameStats.maxLumaLevel = m_rows[i].rowStats.maxLumaLevel;
        for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
        {
            m_frame->m_encData->m_frameStats.cntSkipCu[depth] += m_rows[i].rowStats.cntSkipCu[depth];
            m_frame->m_encData->m_frameStats.cntMergeCu[depth] += m_rows[i].rowStats.cntMergeCu[depth];
            for (int m = 0; m < INTER_MODES; m++)
                m_frame->m_encData->m_frameStats.cuInterDistribution[depth][m] += m_rows[i].rowStats.cuInterDistribution[depth][m];
            for (int n = 0; n < INTRA_MODES; n++)
                m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] += m_rows[i].rowStats.cuIntraDistribution[depth][n];
        }
    }
    m_frame->m_encData->m_frameStats.avgLumaDistortion   = (double)(m_frame->m_encData->m_frameStats.lumaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;
    m_frame->m_encData->m_frameStats.avgChromaDistortion = (double)(m_frame->m_encData->m_frameStats.chromaDistortion) / m_frame->m_encData->m_frameStats.totalCtu;
    m_frame->m_encData->m_frameStats.avgPsyEnergy        = (double)(m_frame->m_encData->m_frameStats.psyEnergy) / m_frame->m_encData->m_frameStats.totalCtu;
    m_frame->m_encData->m_frameStats.avgLumaLevel        = m_frame->m_encData->m_frameStats.lumaLevel / m_frame->m_encData->m_frameStats.totalCtu;
    m_frame->m_encData->m_frameStats.percentIntraNxN     = (double)(m_frame->m_encData->m_frameStats.cntIntraNxN * 100) / m_frame->m_encData->m_frameStats.totalCu;
    for (uint32_t depth = 0; depth <= g_maxCUDepth; depth++)
    {
        m_frame->m_encData->m_frameStats.percentSkipCu[depth]  = (double)(m_frame->m_encData->m_frameStats.cntSkipCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
        m_frame->m_encData->m_frameStats.percentMergeCu[depth] = (double)(m_frame->m_encData->m_frameStats.cntMergeCu[depth] * 100) / m_frame->m_encData->m_frameStats.totalCu;
        for (int n = 0; n < INTRA_MODES; n++)
            m_frame->m_encData->m_frameStats.percentIntraDistribution[depth][n] = (double)(m_frame->m_encData->m_frameStats.cuIntraDistribution[depth][n] * 100) / m_frame->m_encData->m_frameStats.totalCu;
        uint64_t cuInterRectCnt = 0; // sum of Nx2N, 2NxN counts
        cuInterRectCnt += m_frame->m_encData->m_frameStats.cuInterDistribution[depth][1] + m_frame->m_encData->m_frameStats.cuInterDistribution[depth][2];
        m_frame->m_encData->m_frameStats.percentInterDistribution[depth][0] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][0] * 100) / m_frame->m_encData->m_frameStats.totalCu;
        m_frame->m_encData->m_frameStats.percentInterDistribution[depth][1] = (double)(cuInterRectCnt * 100) / m_frame->m_encData->m_frameStats.totalCu;
        m_frame->m_encData->m_frameStats.percentInterDistribution[depth][2] = (double)(m_frame->m_encData->m_frameStats.cuInterDistribution[depth][3] * 100) / m_frame->m_encData->m_frameStats.totalCu;
    }

    m_bs.resetBits();
    m_entropyCoder.load(m_initSliceContext);
    m_entropyCoder.setBitstream(&m_bs);
    m_entropyCoder.codeSliceHeader(*slice, *m_frame->m_encData);	//===========================codeSliceHeader()函数

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////12、SAO,并调用encodeSlice()
    // finish encode of each CTU row, only required when SAO is enabled
    if (m_param->bEnableSAO)
        encodeSlice();	//===========================调用encodeSlice()

    // serialize each row, record final lengths in slice header
    uint32_t maxStreamSize = m_nalList.serializeSubstreams(m_substreamSizes, numSubstreams, m_outStreams);

    // complete the slice header by writing WPP row-starts
    m_entropyCoder.setBitstream(&m_bs);
    if (slice->m_pps->bEntropyCodingSyncEnabled)
        m_entropyCoder.codeSliceHeaderWPPEntryPoints(*slice, m_substreamSizes, maxStreamSize);
    m_bs.writeByteAlignment();

    m_nalList.serialize(slice->m_nalUnitType, m_bs);

    if (m_param->decodedPictureHashSEI)
    {
        if (m_param->decodedPictureHashSEI == 1)
        {
            m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::MD5;
            for (int i = 0; i < 3; i++)
                MD5Final(&m_state[i], m_seiReconPictureDigest.m_digest[i]);
        }
        else if (m_param->decodedPictureHashSEI == 2)
        {
            m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CRC;
            for (int i = 0; i < 3; i++)
                crcFinish(m_crc[i], m_seiReconPictureDigest.m_digest[i]);
        }
        else if (m_param->decodedPictureHashSEI == 3)
        {
            m_seiReconPictureDigest.m_method = SEIDecodedPictureHash::CHECKSUM;
            for (int i = 0; i < 3; i++)
                checksumFinish(m_checksum[i], m_seiReconPictureDigest.m_digest[i]);
        }

        m_bs.resetBits();
        m_seiReconPictureDigest.write(m_bs, *slice->m_sps);
        m_bs.writeByteAlignment();

        m_nalList.serialize(NAL_UNIT_SUFFIX_SEI, m_bs);
    }

    uint64_t bytes = 0;
    for (uint32_t i = 0; i < m_nalList.m_numNal; i++)
    {
        int type = m_nalList.m_nal[i].type;

        // exclude SEI
        if (type != NAL_UNIT_PREFIX_SEI && type != NAL_UNIT_SUFFIX_SEI)
        {
            bytes += m_nalList.m_nal[i].sizeBytes;
            // and exclude start code prefix
            bytes -= (!i || type == NAL_UNIT_SPS || type == NAL_UNIT_PPS) ? 4 : 3;
        }
    }
    m_accessUnitBits = bytes << 3;

    m_endCompressTime = x265_mdate();	//timestamp after all CTUs are compressed
//	printf("编码时间(Compressinging Time): %d ms\n",m_endCompressTime-m_startCompressTime);	//added by Fred

    /* rateControlEnd may also block for earlier frames to call rateControlUpdateStats */
    if (m_top->m_rateControl->rateControlEnd(m_frame, m_accessUnitBits, &m_rce) < 0)
        m_top->m_aborted = true;	//一般不进入,错误返回

    /* Decrement referenced frame reference counts, allow them to be recycled */
    for (int l = 0; l < numPredDir; l++)
    {
        for (int ref = 0; ref < slice->m_numRefIdx[l]; ref++)	//遍历每个List的参考帧
        {
            Frame *refpic = slice->m_refPicList[l][ref];	//获取参考帧地址
            ATOMIC_DEC(&refpic->m_countRefEncoders);	//被参考计数减一
        }
    }

	///////////////////////////////////////////////////////////////////////////////////////////////////////////////////13、去噪处理NR updates(Noise Reduction)
    int numTLD;	//TLD=Thread Local Data当前并行线程个数
    if (m_pool)
        numTLD = m_param->bEnableWavefront ? m_pool->m_numWorkers : m_pool->m_numWorkers + m_pool->m_numProviders;	//应用WPP,为当前机器的核数,否则为当前核数加上当前的线程个数
    else
        numTLD = 1;	//不应用多线程,并行个数为1

    if (m_nr)	//如果noise reduction(更新去噪偏移值)
    {
        /* Accumulate NR statistics from all worker threads */
        //加速所有线程的NR统计
		for (int i = 0; i < numTLD; i++)	//遍历当前帧编码应用所有线程:累加当前帧各个系数的统计数字
        {
            //获取当前线程对应当前帧的去噪类;i 确定当前帧的所有线程,如每个WPP行  m_jpId 确定当前帧
			NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];	
            for (int cat = 0; cat < MAX_NUM_TR_CATEGORIES; cat++)	//遍历所有TU类别
            {
                for (int coeff = 0; coeff < MAX_NUM_TR_COEFFS; coeff++)
                    m_nr->residualSum[cat][coeff] += nr->residualSum[cat][coeff];	//累加当前帧所有TU对应位置的系数绝对值和
            
                m_nr->count[cat] += nr->count[cat];	//累加当前帧的TU计数
            }
        }

        noiseReductionUpdate();	//===========================DCT-domain noise reduction / adaptive deadzone from libavcodec

        /* Copy updated NR coefficients back to all worker threads */
		//拷贝NR系数至所有的工作线程
        for (int i = 0; i < numTLD; i++)	//遍历当前帧编码应用所有线程
        {
            //获取当前线程对应当前帧的去噪类;i 确定当前帧的所有线程,如每个WPP行  m_jpId 确定当前帧
			NoiseReduction* nr = &m_tld[i].analysis.m_quant.m_frameNr[m_jpId];
            //将当前更新的去噪偏移值拷贝到各个线程中的去噪偏移中去
			memcpy(nr->offsetDenoise, m_nr->offsetDenoise, sizeof(uint16_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);
            memset(nr->count, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES);	//初始为0
            memset(nr->residualSum, 0, sizeof(uint32_t) * MAX_NUM_TR_CATEGORIES * MAX_NUM_TR_COEFFS);	//初始为0
        }
    }

#if DETAILED_CU_STATS	//统计数据
    /* Accumulate CU statistics from each worker thread, we could report
     * per-frame stats here, but currently we do not. */
    for (int i = 0; i < numTLD; i++)
        m_cuStats.accumulate(m_tld[i].analysis.m_stats[m_jpId]);
#endif

    m_endFrameTime = x265_mdate();	//帧编码的结束时间(timestamp after RCEnd, NR updates, etc)

}

上一篇:x265探索与研究(八):x265中的并行处理机制函数关系分析


下一篇:x265探索与研究(六):main()函数