在上一篇介绍了编码器的VCL编码操作,分析了函数x264_slice_write()。函数x264_slice_write()里有四个关键模块,分别是宏块分析模块、宏块编码模块、熵编码模块和滤波模块,再加上NAL打包输出部分,是我们这里要讲的内容。
1.编码模块
宏块分析模块:调用函数x264_macroblock_analyse()。分为两部分:帧内宏块和帧间宏块。帧内宏块用于分析帧内的预测模式,而帧间宏块进行运动估计,分析帧间的预测模式。
x264_macroblock_analyse():
void x264_macroblock_analyse( x264_t *h ) { x264_mb_analysis_t analysis; int i_cost = COST_MAX; //通过码率控制方法,获取本宏块QP h->mb.i_qp = x264_ratecontrol_mb_qp( h ); /* If the QP of this MB is within 1 of the previous MB, code the same QP as the previous MB, * to lower the bit cost of the qp_delta. Don't do this if QPRD is enabled. */ if( h->param.rc.i_aq_mode && h->param.analyse.i_subpel_refine < 10 ) h->mb.i_qp = abs(h->mb.i_qp - h->mb.i_last_qp) == 1 ? h->mb.i_last_qp : h->mb.i_qp; if( h->param.analyse.b_mb_info ) h->fdec->effective_qp[h->mb.i_mb_xy] = h->mb.i_qp; /* Store the real analysis QP. */ //初始化 x264_mb_analyse_init( h, &analysis, h->mb.i_qp ); //I帧:只使用帧内预测,分别计算亮度16x16(4种)和4x4(9种)所有模式的代价值,选出代价最小的模式 //P帧:计算帧内模式和帧间模式( P Slice允许有Intra宏块和P宏块;同理B帧也支持Intra宏块)。 //对P帧的每一种分割进行帧间预测,得到最佳的运动矢量及最佳匹配块。 //帧间预测过程:选出最佳矢量——>找到最佳的整像素点——>找到最佳的二分之一像素点——>找到最佳的1/4像素点 //然后取代价最小的为最佳MV和分割方式 //最后从帧内模式和帧间模式中选择代价比较小的方式(有可能没有找到很好的匹配块,这时候就直接使用帧内预测而不是帧间预测)。 if( h->sh.i_type == SLICE_TYPE_I ) { //I slice //通过一系列帧内预测模式(16x16的4种,4x4的9种)代价的计算得出代价最小的最优模式 intra_analysis: if( analysis.i_mbrd ) x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); //帧内预测分析 //从16×16的SAD,4个8×8的SAD和,16个4×4SAD中选出最优方式 x264_mb_analyse_intra( h, &analysis, COST_MAX ); if( analysis.i_mbrd ) x264_intra_rd( h, &analysis, COST_MAX ); //分析结果都存储在analysis结构体中 //开销 i_cost = analysis.i_satd_i16x16; h->mb.i_type = I_16x16; //如果I4x4或者I8x8开销更小的话就拷贝 //copy if little COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, h->mb.i_type, I_4x4 ); COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, h->mb.i_type, I_8x8 ); //画面极其特殊的时候,才有可能用到PCM if( analysis.i_satd_pcm < i_cost ) h->mb.i_type = I_PCM; else if( analysis.i_mbrd >= 2 ) x264_intra_rd_refine( h, &analysis ); } else if( h->sh.i_type == SLICE_TYPE_P ) { //P slice int b_skip = 0; h->mc.prefetch_ref( h->mb.pic.p_fref[0][0][h->mb.i_mb_x&3], h->mb.pic.i_stride[0], 0 ); analysis.b_try_skip = 0; if( analysis.b_force_intra ) { if( !h->param.analyse.b_psy ) { x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) ); goto intra_analysis; } } else { if( h->fdec->mb_info && (h->fdec->mb_info[h->mb.i_mb_xy]&X264_MBINFO_CONSTANT) ) { if( !SLICE_MBAFF && (h->fdec->i_frame - h->fref[0][0]->i_frame) == 1 && !h->sh.b_weighted_pred && h->fref[0][0]->effective_qp[h->mb.i_mb_xy] <= h->mb.i_qp ) { h->mb.i_partition = D_16x16; if( !M32(h->mb.cache.pskip_mv) ) { b_skip = 1; h->mb.i_type = P_SKIP; } else { h->mb.i_type = P_L0; analysis.l0.me16x16.i_ref = 0; M32( analysis.l0.me16x16.mv ) = 0; } goto skip_analysis; } else if( h->param.analyse.b_mb_info_update ) h->fdec->mb_info[h->mb.i_mb_xy] &= ~X264_MBINFO_CONSTANT; } int skip_invalid = h->i_thread_frames > 1 && h->mb.cache.pskip_mv[1] > h->mb.mv_max_spel[1]; /* If the current macroblock is off the frame, just skip it. */ if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height && !skip_invalid ) b_skip = 1; /* Fast P_SKIP detection */ else if( h->param.analyse.b_fast_pskip ) { if( skip_invalid ) // FIXME don't need to check this if the reference frame is done {} else if( h->param.analyse.i_subpel_refine >= 3 ) analysis.b_try_skip = 1; else if( h->mb.i_mb_type_left[0] == P_SKIP || h->mb.i_mb_type_top == P_SKIP || h->mb.i_mb_type_topleft == P_SKIP || h->mb.i_mb_type_topright == P_SKIP ) b_skip = x264_macroblock_probe_pskip( h );//检查是否是Skip类型 } } h->mc.prefetch_ref( h->mb.pic.p_fref[0][0][h->mb.i_mb_x&3], h->mb.pic.i_stride[0], 1 ); if( b_skip ) { h->mb.i_type = P_SKIP; h->mb.i_partition = D_16x16; assert( h->mb.cache.pskip_mv[1] <= h->mb.mv_max_spel[1] || h->i_thread_frames == 1 ); skip_analysis: for( int i = 0; i < h->mb.pic.i_fref[0]; i++ ) M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0; } else { const unsigned int flags = h->param.analyse.inter; int i_type; int i_partition; int i_satd_inter, i_satd_intra; x264_mb_analyse_load_costs( h, &analysis ); /* * 16x16 帧间预测宏块分析-P * * +--------+--------+ * | | * | | * | | * + + + * | | * | | * | | * +--------+--------+ * */ x264_mb_analyse_inter_p16x16( h, &analysis ); if( h->mb.i_type == P_SKIP ) { for( int i = 1; i < h->mb.pic.i_fref[0]; i++ ) M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0; return; } if( flags & X264_ANALYSE_PSUB16x16 ) { if( h->param.analyse.b_mixed_references ) x264_mb_analyse_inter_p8x8_mixed_ref( h, &analysis ); else{ /* * 8x8帧间预测宏块分析-P * +--------+ * | | * | | * | | * +--------+ */ x264_mb_analyse_inter_p8x8( h, &analysis ); } } /* Select best inter mode */ i_type = P_L0; i_partition = D_16x16; i_cost = analysis.l0.me16x16.cost; //如果8x8的代价值小于16x16 //则进行8x8子块分割的处理 //处理的数据源自于l0 if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate || analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost) ) { i_type = P_8x8; i_partition = D_8x8; i_cost = analysis.l0.i_cost8x8; /* Do sub 8x8 */ if( flags & X264_ANALYSE_PSUB8x8 ) { for( int i = 0; i < 4; i++ ) { //8x8块的子块的分析 /* * 4x4 * +----+----+ * | | | * +----+----+ * | | | * +----+----+ * */ x264_mb_analyse_inter_p4x4( h, &analysis, i ); int i_thresh8x4 = analysis.l0.me4x4[i][1].cost_mv + analysis.l0.me4x4[i][2].cost_mv; //如果4x4小于8x8 //则再分析8x4,4x8的代价 if( !analysis.b_early_terminate || analysis.l0.i_cost4x4[i] < analysis.l0.me8x8[i].cost + i_thresh8x4 ) { int i_cost8x8 = analysis.l0.i_cost4x4[i]; h->mb.i_sub_partition[i] = D_L0_4x4; /* * 8x4 * +----+----+ * | | * +----+----+ * | | * +----+----+ * */ //如果8x4小于8x8 x264_mb_analyse_inter_p8x4( h, &analysis, i ); COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost8x4[i], h->mb.i_sub_partition[i], D_L0_8x4 ); /* * 4x8 * +----+----+ * | | | * + + + * | | | * +----+----+ * */ //如果4x8小于8x8 x264_mb_analyse_inter_p4x8( h, &analysis, i ); COPY2_IF_LT( i_cost8x8, analysis.l0.i_cost4x8[i], h->mb.i_sub_partition[i], D_L0_4x8 ); i_cost += i_cost8x8 - analysis.l0.me8x8[i].cost; } x264_mb_cache_mv_p8x8( h, &analysis, i ); } analysis.l0.i_cost8x8 = i_cost; } } /* Now do 16x8/8x16 */ int i_thresh16x8 = analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[2].cost_mv; //前提要求8x8的代价值小于16x16 if( ( flags & X264_ANALYSE_PSUB16x16 ) && (!analysis.b_early_terminate || analysis.l0.i_cost8x8 < analysis.l0.me16x16.cost + i_thresh16x8) ) { int i_avg_mv_ref_cost = (analysis.l0.me8x8[2].cost_mv + analysis.l0.me8x8[2].i_ref_cost + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1; analysis.i_cost_est16x8[1] = analysis.i_satd8x8[0][2] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost; /* * 16x8 宏块划分 * * +--------+--------+ * | | | * | | | * | | | * +--------+--------+ * */ x264_mb_analyse_inter_p16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.l0.i_cost16x8, i_type, P_L0, i_partition, D_16x8 ); i_avg_mv_ref_cost = (analysis.l0.me8x8[1].cost_mv + analysis.l0.me8x8[1].i_ref_cost + analysis.l0.me8x8[3].cost_mv + analysis.l0.me8x8[3].i_ref_cost + 1) >> 1; analysis.i_cost_est8x16[1] = analysis.i_satd8x8[0][1] + analysis.i_satd8x8[0][3] + i_avg_mv_ref_cost; /* * 8x16 宏块划分 * * +--------+ * | | * | | * | | * +--------+ * | | * | | * | | * +--------+ * */ x264_mb_analyse_inter_p8x16( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.l0.i_cost8x16, i_type, P_L0, i_partition, D_8x16 ); } h->mb.i_partition = i_partition; /* refine qpel */ //亚像素精度搜索 //FIXME mb_type costs? if( analysis.i_mbrd || !h->mb.i_subpel_refine ) { /* refine later */ } else if( i_partition == D_16x16 ) { x264_me_refine_qpel( h, &analysis.l0.me16x16 ); i_cost = analysis.l0.me16x16.cost; } else if( i_partition == D_16x8 ) { x264_me_refine_qpel( h, &analysis.l0.me16x8[0] ); x264_me_refine_qpel( h, &analysis.l0.me16x8[1] ); i_cost = analysis.l0.me16x8[0].cost + analysis.l0.me16x8[1].cost; } else if( i_partition == D_8x16 ) { x264_me_refine_qpel( h, &analysis.l0.me8x16[0] ); x264_me_refine_qpel( h, &analysis.l0.me8x16[1] ); i_cost = analysis.l0.me8x16[0].cost + analysis.l0.me8x16[1].cost; } else if( i_partition == D_8x8 ) { i_cost = 0; for( int i8x8 = 0; i8x8 < 4; i8x8++ ) { switch( h->mb.i_sub_partition[i8x8] ) { case D_L0_8x8: x264_me_refine_qpel( h, &analysis.l0.me8x8[i8x8] ); i_cost += analysis.l0.me8x8[i8x8].cost; break; case D_L0_8x4: x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][0] ); x264_me_refine_qpel( h, &analysis.l0.me8x4[i8x8][1] ); i_cost += analysis.l0.me8x4[i8x8][0].cost + analysis.l0.me8x4[i8x8][1].cost; break; case D_L0_4x8: x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][0] ); x264_me_refine_qpel( h, &analysis.l0.me4x8[i8x8][1] ); i_cost += analysis.l0.me4x8[i8x8][0].cost + analysis.l0.me4x8[i8x8][1].cost; break; case D_L0_4x4: x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][0] ); x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][1] ); x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][2] ); x264_me_refine_qpel( h, &analysis.l0.me4x4[i8x8][3] ); i_cost += analysis.l0.me4x4[i8x8][0].cost + analysis.l0.me4x4[i8x8][1].cost + analysis.l0.me4x4[i8x8][2].cost + analysis.l0.me4x4[i8x8][3].cost; break; default: x264_log( h, X264_LOG_ERROR, "internal error (!8x8 && !4x4)\n" ); break; } } } if( h->mb.b_chroma_me ) { if( CHROMA444 ) { x264_mb_analyse_intra( h, &analysis, i_cost ); x264_mb_analyse_intra_chroma( h, &analysis ); } else { x264_mb_analyse_intra_chroma( h, &analysis ); x264_mb_analyse_intra( h, &analysis, i_cost - analysis.i_satd_chroma ); } analysis.i_satd_i16x16 += analysis.i_satd_chroma; analysis.i_satd_i8x8 += analysis.i_satd_chroma; analysis.i_satd_i4x4 += analysis.i_satd_chroma; } else x264_mb_analyse_intra( h, &analysis, i_cost );//P Slice中也允许有Intra宏块,所以也要进行分析 i_satd_inter = i_cost; i_satd_intra = X264_MIN3( analysis.i_satd_i16x16, analysis.i_satd_i8x8, analysis.i_satd_i4x4 ); if( analysis.i_mbrd ) { x264_mb_analyse_p_rd( h, &analysis, X264_MIN(i_satd_inter, i_satd_intra) ); i_type = P_L0; i_partition = D_16x16; i_cost = analysis.l0.i_rd16x16; COPY2_IF_LT( i_cost, analysis.l0.i_cost16x8, i_partition, D_16x8 ); COPY2_IF_LT( i_cost, analysis.l0.i_cost8x16, i_partition, D_8x16 ); COPY3_IF_LT( i_cost, analysis.l0.i_cost8x8, i_partition, D_8x8, i_type, P_8x8 ); h->mb.i_type = i_type; h->mb.i_partition = i_partition; if( i_cost < COST_MAX ) x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost ); x264_intra_rd( h, &analysis, i_satd_inter * 5/4 + 1 ); } //获取最小的代价 COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 ); COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 ); COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 ); COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM ); h->mb.i_type = i_type; if( analysis.b_force_intra && !IS_INTRA(i_type) ) { /* Intra masking: copy fdec to fenc and re-encode the block as intra in order to make it appear as if * it was an inter block. */ x264_analyse_update_cache( h, &analysis ); x264_macroblock_encode( h ); for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ ) h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fenc[p], FENC_STRIDE, h->mb.pic.p_fdec[p], FDEC_STRIDE, 16 ); if( !CHROMA444 ) { int height = 16 >> CHROMA_V_SHIFT; h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[1], FENC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, height ); h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fenc[2], FENC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, height ); } x264_mb_analyse_init_qp( h, &analysis, X264_MAX( h->mb.i_qp - h->mb.ip_offset, h->param.rc.i_qp_min ) ); goto intra_analysis; } if( analysis.i_mbrd >= 2 && h->mb.i_type != I_PCM ) { if( IS_INTRA( h->mb.i_type ) ) { x264_intra_rd_refine( h, &analysis ); } else if( i_partition == D_16x16 ) { x264_macroblock_cache_ref( h, 0, 0, 4, 4, 0, analysis.l0.me16x16.i_ref ); analysis.l0.me16x16.cost = i_cost; x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 ); } else if( i_partition == D_16x8 ) { h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] = h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8; x264_macroblock_cache_ref( h, 0, 0, 4, 2, 0, analysis.l0.me16x8[0].i_ref ); x264_macroblock_cache_ref( h, 0, 2, 4, 2, 0, analysis.l0.me16x8[1].i_ref ); x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[0], analysis.i_lambda2, 0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[1], analysis.i_lambda2, 8, 0 ); } else if( i_partition == D_8x16 ) { h->mb.i_sub_partition[0] = h->mb.i_sub_partition[1] = h->mb.i_sub_partition[2] = h->mb.i_sub_partition[3] = D_L0_8x8; x264_macroblock_cache_ref( h, 0, 0, 2, 4, 0, analysis.l0.me8x16[0].i_ref ); x264_macroblock_cache_ref( h, 2, 0, 2, 4, 0, analysis.l0.me8x16[1].i_ref ); x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[0], analysis.i_lambda2, 0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[1], analysis.i_lambda2, 4, 0 ); } else if( i_partition == D_8x8 ) { x264_analyse_update_cache( h, &analysis ); for( int i8x8 = 0; i8x8 < 4; i8x8++ ) { if( h->mb.i_sub_partition[i8x8] == D_L0_8x8 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i8x8], analysis.i_lambda2, i8x8*4, 0 ); } else if( h->mb.i_sub_partition[i8x8] == D_L0_8x4 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me8x4[i8x8][1], analysis.i_lambda2, i8x8*4+2, 0 ); } else if( h->mb.i_sub_partition[i8x8] == D_L0_4x8 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x8[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 ); } else if( h->mb.i_sub_partition[i8x8] == D_L0_4x4 ) { x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][0], analysis.i_lambda2, i8x8*4+0, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][1], analysis.i_lambda2, i8x8*4+1, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][2], analysis.i_lambda2, i8x8*4+2, 0 ); x264_me_refine_qpel_rd( h, &analysis.l0.me4x4[i8x8][3], analysis.i_lambda2, i8x8*4+3, 0 ); } } } } } } else if( h->sh.i_type == SLICE_TYPE_B )//B Slice的时候 { int i_bskip_cost = COST_MAX; int b_skip = 0; if( analysis.i_mbrd ) x264_mb_init_fenc_cache( h, analysis.i_mbrd >= 2 ); h->mb.i_type = B_SKIP; if( h->mb.b_direct_auto_write ) { /* direct=auto heuristic: prefer whichever mode allows more Skip macroblocks */ for( int i = 0; i < 2; i++ ) { int b_changed = 1; h->sh.b_direct_spatial_mv_pred ^= 1; analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h, i && analysis.b_direct_available ? &b_changed : NULL ); if( analysis.b_direct_available ) { if( b_changed ) { x264_mb_mc( h ); b_skip = x264_macroblock_probe_bskip( h ); } h->stat.frame.i_direct_score[ h->sh.b_direct_spatial_mv_pred ] += b_skip; } else b_skip = 0; } } else analysis.b_direct_available = x264_mb_predict_mv_direct16x16( h, NULL ); analysis.b_try_skip = 0; if( analysis.b_direct_available ) { if( !h->mb.b_direct_auto_write ) x264_mb_mc( h ); /* If the current macroblock is off the frame, just skip it. */ if( HAVE_INTERLACED && !MB_INTERLACED && h->mb.i_mb_y * 16 >= h->param.i_height ) b_skip = 1; else if( analysis.i_mbrd ) { i_bskip_cost = ssd_mb( h ); /* 6 = minimum cavlc cost of a non-skipped MB */ b_skip = h->mb.b_skip_mc = i_bskip_cost <= ((6 * analysis.i_lambda2 + 128) >> 8); } else if( !h->mb.b_direct_auto_write ) { /* Conditioning the probe on neighboring block types * doesn't seem to help speed or quality. */ analysis.b_try_skip = x264_macroblock_probe_bskip( h ); if( h->param.analyse.i_subpel_refine < 3 ) b_skip = analysis.b_try_skip; } /* Set up MVs for future predictors */ if( b_skip ) { for( int i = 0; i < h->mb.pic.i_fref[0]; i++ ) M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0; for( int i = 0; i < h->mb.pic.i_fref[1]; i++ ) M32( h->mb.mvr[1][i][h->mb.i_mb_xy] ) = 0; } } if( !b_skip ) { const unsigned int flags = h->param.analyse.inter; int i_type; int i_partition; int i_satd_inter; h->mb.b_skip_mc = 0; h->mb.i_type = B_DIRECT; x264_mb_analyse_load_costs( h, &analysis ); /* select best inter mode */ /* direct must be first */ if( analysis.b_direct_available ) x264_mb_analyse_inter_direct( h, &analysis ); /* * 16x16 帧间预测宏块分析-B * * +--------+--------+ * | | * | | * | | * + + + * | | * | | * | | * +--------+--------+ * */ x264_mb_analyse_inter_b16x16( h, &analysis ); if( h->mb.i_type == B_SKIP ) { for( int i = 1; i < h->mb.pic.i_fref[0]; i++ ) M32( h->mb.mvr[0][i][h->mb.i_mb_xy] ) = 0; for( int i = 1; i < h->mb.pic.i_fref[1]; i++ ) M32( h->mb.mvr[1][i][h->mb.i_mb_xy] ) = 0; return; } i_type = B_L0_L0; i_partition = D_16x16; i_cost = analysis.l0.me16x16.cost; COPY2_IF_LT( i_cost, analysis.l1.me16x16.cost, i_type, B_L1_L1 ); COPY2_IF_LT( i_cost, analysis.i_cost16x16bi, i_type, B_BI_BI ); COPY2_IF_LT( i_cost, analysis.i_cost16x16direct, i_type, B_DIRECT ); if( analysis.i_mbrd && analysis.b_early_terminate && analysis.i_cost16x16direct <= i_cost * 33/32 ) { x264_mb_analyse_b_rd( h, &analysis, i_cost ); if( i_bskip_cost < analysis.i_rd16x16direct && i_bskip_cost < analysis.i_rd16x16bi && i_bskip_cost < analysis.l0.i_rd16x16 && i_bskip_cost < analysis.l1.i_rd16x16 ) { h->mb.i_type = B_SKIP; x264_analyse_update_cache( h, &analysis ); return; } } if( flags & X264_ANALYSE_BSUB16x16 ) { /* * 8x8 帧间预测宏块分析-B * +--------+ * | | * | | * | | * +--------+ * */ if( h->param.analyse.b_mixed_references ) x264_mb_analyse_inter_b8x8_mixed_ref( h, &analysis ); else x264_mb_analyse_inter_b8x8( h, &analysis ); COPY3_IF_LT( i_cost, analysis.i_cost8x8bi, i_type, B_8x8, i_partition, D_8x8 ); /* Try to estimate the cost of b16x8/b8x16 based on the satd scores of the b8x8 modes */ int i_cost_est16x8bi_total = 0, i_cost_est8x16bi_total = 0; int i_mb_type, i_partition16x8[2], i_partition8x16[2]; for( int i = 0; i < 2; i++ ) { int avg_l0_mv_ref_cost, avg_l1_mv_ref_cost; int i_l0_satd, i_l1_satd, i_bi_satd, i_best_cost; // 16x8 i_best_cost = COST_MAX; i_l0_satd = analysis.i_satd8x8[0][i*2] + analysis.i_satd8x8[0][i*2+1]; i_l1_satd = analysis.i_satd8x8[1][i*2] + analysis.i_satd8x8[1][i*2+1]; i_bi_satd = analysis.i_satd8x8[2][i*2] + analysis.i_satd8x8[2][i*2+1]; avg_l0_mv_ref_cost = ( analysis.l0.me8x8[i*2].cost_mv + analysis.l0.me8x8[i*2].i_ref_cost + analysis.l0.me8x8[i*2+1].cost_mv + analysis.l0.me8x8[i*2+1].i_ref_cost + 1 ) >> 1; avg_l1_mv_ref_cost = ( analysis.l1.me8x8[i*2].cost_mv + analysis.l1.me8x8[i*2].i_ref_cost + analysis.l1.me8x8[i*2+1].cost_mv + analysis.l1.me8x8[i*2+1].i_ref_cost + 1 ) >> 1; COPY2_IF_LT( i_best_cost, i_l0_satd + avg_l0_mv_ref_cost, i_partition16x8[i], D_L0_8x8 ); COPY2_IF_LT( i_best_cost, i_l1_satd + avg_l1_mv_ref_cost, i_partition16x8[i], D_L1_8x8 ); COPY2_IF_LT( i_best_cost, i_bi_satd + avg_l0_mv_ref_cost + avg_l1_mv_ref_cost, i_partition16x8[i], D_BI_8x8 ); analysis.i_cost_est16x8[i] = i_best_cost; // 8x16 i_best_cost = COST_MAX; i_l0_satd = analysis.i_satd8x8[0][i] + analysis.i_satd8x8[0][i+2]; i_l1_satd = analysis.i_satd8x8[1][i] + analysis.i_satd8x8[1][i+2]; i_bi_satd = analysis.i_satd8x8[2][i] + analysis.i_satd8x8[2][i+2]; avg_l0_mv_ref_cost = ( analysis.l0.me8x8[i].cost_mv + analysis.l0.me8x8[i].i_ref_cost + analysis.l0.me8x8[i+2].cost_mv + analysis.l0.me8x8[i+2].i_ref_cost + 1 ) >> 1; avg_l1_mv_ref_cost = ( analysis.l1.me8x8[i].cost_mv + analysis.l1.me8x8[i].i_ref_cost + analysis.l1.me8x8[i+2].cost_mv + analysis.l1.me8x8[i+2].i_ref_cost + 1 ) >> 1; COPY2_IF_LT( i_best_cost, i_l0_satd + avg_l0_mv_ref_cost, i_partition8x16[i], D_L0_8x8 ); COPY2_IF_LT( i_best_cost, i_l1_satd + avg_l1_mv_ref_cost, i_partition8x16[i], D_L1_8x8 ); COPY2_IF_LT( i_best_cost, i_bi_satd + avg_l0_mv_ref_cost + avg_l1_mv_ref_cost, i_partition8x16[i], D_BI_8x8 ); analysis.i_cost_est8x16[i] = i_best_cost; } i_mb_type = B_L0_L0 + (i_partition16x8[0]>>2) * 3 + (i_partition16x8[1]>>2); analysis.i_cost_est16x8[1] += analysis.i_lambda * i_mb_b16x8_cost_table[i_mb_type]; i_cost_est16x8bi_total = analysis.i_cost_est16x8[0] + analysis.i_cost_est16x8[1]; i_mb_type = B_L0_L0 + (i_partition8x16[0]>>2) * 3 + (i_partition8x16[1]>>2); analysis.i_cost_est8x16[1] += analysis.i_lambda * i_mb_b16x8_cost_table[i_mb_type]; i_cost_est8x16bi_total = analysis.i_cost_est8x16[0] + analysis.i_cost_est8x16[1]; /* We can gain a little speed by checking the mode with the lowest estimated cost first */ int try_16x8_first = i_cost_est16x8bi_total < i_cost_est8x16bi_total; if( try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) ) { x264_mb_analyse_inter_b16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); } if( !analysis.b_early_terminate || i_cost_est8x16bi_total < i_cost ) { x264_mb_analyse_inter_b8x16( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 ); } if( !try_16x8_first && (!analysis.b_early_terminate || i_cost_est16x8bi_total < i_cost) ) { x264_mb_analyse_inter_b16x8( h, &analysis, i_cost ); COPY3_IF_LT( i_cost, analysis.i_cost16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); } } if( analysis.i_mbrd || !h->mb.i_subpel_refine ) { /* refine later */ } /* refine qpel */ else if( i_partition == D_16x16 ) { analysis.l0.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L0_L0]; analysis.l1.me16x16.cost -= analysis.i_lambda * i_mb_b_cost_table[B_L1_L1]; if( i_type == B_L0_L0 ) { x264_me_refine_qpel( h, &analysis.l0.me16x16 ); i_cost = analysis.l0.me16x16.cost + analysis.i_lambda * i_mb_b_cost_table[B_L0_L0]; } else if( i_type == B_L1_L1 ) { x264_me_refine_qpel( h, &analysis.l1.me16x16 ); i_cost = analysis.l1.me16x16.cost + analysis.i_lambda * i_mb_b_cost_table[B_L1_L1]; } else if( i_type == B_BI_BI ) { x264_me_refine_qpel( h, &analysis.l0.bi16x16 ); x264_me_refine_qpel( h, &analysis.l1.bi16x16 ); } } else if( i_partition == D_16x8 ) { for( int i = 0; i < 2; i++ ) { if( analysis.i_mb_partition16x8[i] != D_L1_8x8 ) x264_me_refine_qpel( h, &analysis.l0.me16x8[i] ); if( analysis.i_mb_partition16x8[i] != D_L0_8x8 ) x264_me_refine_qpel( h, &analysis.l1.me16x8[i] ); } } else if( i_partition == D_8x16 ) { for( int i = 0; i < 2; i++ ) { if( analysis.i_mb_partition8x16[i] != D_L1_8x8 ) x264_me_refine_qpel( h, &analysis.l0.me8x16[i] ); if( analysis.i_mb_partition8x16[i] != D_L0_8x8 ) x264_me_refine_qpel( h, &analysis.l1.me8x16[i] ); } } else if( i_partition == D_8x8 ) { for( int i = 0; i < 4; i++ ) { x264_me_t *m; int i_part_cost_old; int i_type_cost; int i_part_type = h->mb.i_sub_partition[i]; int b_bidir = (i_part_type == D_BI_8x8); if( i_part_type == D_DIRECT_8x8 ) continue; if( x264_mb_partition_listX_table[0][i_part_type] ) { m = &analysis.l0.me8x8[i]; i_part_cost_old = m->cost; i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L0_8x8]; m->cost -= i_type_cost; x264_me_refine_qpel( h, m ); if( !b_bidir ) analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old; } if( x264_mb_partition_listX_table[1][i_part_type] ) { m = &analysis.l1.me8x8[i]; i_part_cost_old = m->cost; i_type_cost = analysis.i_lambda * i_sub_mb_b_cost_table[D_L1_8x8]; m->cost -= i_type_cost; x264_me_refine_qpel( h, m ); if( !b_bidir ) analysis.i_cost8x8bi += m->cost + i_type_cost - i_part_cost_old; } /* TODO: update mvp? */ } } i_satd_inter = i_cost; if( analysis.i_mbrd ) { x264_mb_analyse_b_rd( h, &analysis, i_satd_inter ); i_type = B_SKIP; i_cost = i_bskip_cost; i_partition = D_16x16; COPY2_IF_LT( i_cost, analysis.l0.i_rd16x16, i_type, B_L0_L0 ); COPY2_IF_LT( i_cost, analysis.l1.i_rd16x16, i_type, B_L1_L1 ); COPY2_IF_LT( i_cost, analysis.i_rd16x16bi, i_type, B_BI_BI ); COPY2_IF_LT( i_cost, analysis.i_rd16x16direct, i_type, B_DIRECT ); COPY3_IF_LT( i_cost, analysis.i_rd16x8bi, i_type, analysis.i_mb_type16x8, i_partition, D_16x8 ); COPY3_IF_LT( i_cost, analysis.i_rd8x16bi, i_type, analysis.i_mb_type8x16, i_partition, D_8x16 ); COPY3_IF_LT( i_cost, analysis.i_rd8x8bi, i_type, B_8x8, i_partition, D_8x8 ); h->mb.i_type = i_type; h->mb.i_partition = i_partition; } if( h->mb.b_chroma_me ) { if( CHROMA444 ) { x264_mb_analyse_intra( h, &analysis, i_satd_inter ); x264_mb_analyse_intra_chroma( h, &analysis ); } else { x264_mb_analyse_intra_chroma( h, &analysis ); x264_mb_analyse_intra( h, &analysis, i_satd_inter - analysis.i_satd_chroma ); } analysis.i_satd_i16x16 += analysis.i_satd_chroma; analysis.i_satd_i8x8 += analysis.i_satd_chroma; analysis.i_satd_i4x4 += analysis.i_satd_chroma; } else x264_mb_analyse_intra( h, &analysis, i_satd_inter ); if( analysis.i_mbrd ) { x264_mb_analyse_transform_rd( h, &analysis, &i_satd_inter, &i_cost ); x264_intra_rd( h, &analysis, i_satd_inter * 17/16 + 1 ); } COPY2_IF_LT( i_cost, analysis.i_satd_i16x16, i_type, I_16x16 ); COPY2_IF_LT( i_cost, analysis.i_satd_i8x8, i_type, I_8x8 ); COPY2_IF_LT( i_cost, analysis.i_satd_i4x4, i_type, I_4x4 ); COPY2_IF_LT( i_cost, analysis.i_satd_pcm, i_type, I_PCM ); h->mb.i_type = i_type; h->mb.i_partition = i_partition; if( analysis.i_mbrd >= 2 && IS_INTRA( i_type ) && i_type != I_PCM ) x264_intra_rd_refine( h, &analysis ); if( h->mb.i_subpel_refine >= 5 ) x264_refine_bidir( h, &analysis ); if( analysis.i_mbrd >= 2 && i_type > B_DIRECT && i_type < B_SKIP ) { int i_biweight; x264_analyse_update_cache( h, &analysis ); if( i_partition == D_16x16 ) { if( i_type == B_L0_L0 ) { analysis.l0.me16x16.cost = i_cost; x264_me_refine_qpel_rd( h, &analysis.l0.me16x16, analysis.i_lambda2, 0, 0 ); } else if( i_type == B_L1_L1 ) { analysis.l1.me16x16.cost = i_cost; x264_me_refine_qpel_rd( h, &analysis.l1.me16x16, analysis.i_lambda2, 0, 1 ); } else if( i_type == B_BI_BI ) { i_biweight = h->mb.bipred_weight[analysis.l0.bi16x16.i_ref][analysis.l1.bi16x16.i_ref]; x264_me_refine_bidir_rd( h, &analysis.l0.bi16x16, &analysis.l1.bi16x16, i_biweight, 0, analysis.i_lambda2 ); } } else if( i_partition == D_16x8 ) { for( int i = 0; i < 2; i++ ) { h->mb.i_sub_partition[i*2] = h->mb.i_sub_partition[i*2+1] = analysis.i_mb_partition16x8[i]; if( analysis.i_mb_partition16x8[i] == D_L0_8x8 ) x264_me_refine_qpel_rd( h, &analysis.l0.me16x8[i], analysis.i_lambda2, i*8, 0 ); else if( analysis.i_mb_partition16x8[i] == D_L1_8x8 ) x264_me_refine_qpel_rd( h, &analysis.l1.me16x8[i], analysis.i_lambda2, i*8, 1 ); else if( analysis.i_mb_partition16x8[i] == D_BI_8x8 ) { i_biweight = h->mb.bipred_weight[analysis.l0.me16x8[i].i_ref][analysis.l1.me16x8[i].i_ref]; x264_me_refine_bidir_rd( h, &analysis.l0.me16x8[i], &analysis.l1.me16x8[i], i_biweight, i*2, analysis.i_lambda2 ); } } } else if( i_partition == D_8x16 ) { for( int i = 0; i < 2; i++ ) { h->mb.i_sub_partition[i] = h->mb.i_sub_partition[i+2] = analysis.i_mb_partition8x16[i]; if( analysis.i_mb_partition8x16[i] == D_L0_8x8 ) x264_me_refine_qpel_rd( h, &analysis.l0.me8x16[i], analysis.i_lambda2, i*4, 0 ); else if( analysis.i_mb_partition8x16[i] == D_L1_8x8 ) x264_me_refine_qpel_rd( h, &analysis.l1.me8x16[i], analysis.i_lambda2, i*4, 1 ); else if( analysis.i_mb_partition8x16[i] == D_BI_8x8 ) { i_biweight = h->mb.bipred_weight[analysis.l0.me8x16[i].i_ref][analysis.l1.me8x16[i].i_ref]; x264_me_refine_bidir_rd( h, &analysis.l0.me8x16[i], &analysis.l1.me8x16[i], i_biweight, i, analysis.i_lambda2 ); } } } else if( i_partition == D_8x8 ) { for( int i = 0; i < 4; i++ ) { if( h->mb.i_sub_partition[i] == D_L0_8x8 ) x264_me_refine_qpel_rd( h, &analysis.l0.me8x8[i], analysis.i_lambda2, i*4, 0 ); else if( h->mb.i_sub_partition[i] == D_L1_8x8 ) x264_me_refine_qpel_rd( h, &analysis.l1.me8x8[i], analysis.i_lambda2, i*4, 1 ); else if( h->mb.i_sub_partition[i] == D_BI_8x8 ) { i_biweight = h->mb.bipred_weight[analysis.l0.me8x8[i].i_ref][analysis.l1.me8x8[i].i_ref]; x264_me_refine_bidir_rd( h, &analysis.l0.me8x8[i], &analysis.l1.me8x8[i], i_biweight, i, analysis.i_lambda2 ); } } } } } } x264_analyse_update_cache( h, &analysis ); /* In rare cases we can end up qpel-RDing our way back to a larger partition size * without realizing it. Check for this and account for it if necessary. */ if( analysis.i_mbrd >= 2 ) { /* Don't bother with bipred or 8x8-and-below, the odds are incredibly low. */ static const uint8_t check_mv_lists[X264_MBTYPE_MAX] = {[P_L0]=1, [B_L0_L0]=1, [B_L1_L1]=2}; int list = check_mv_lists[h->mb.i_type] - 1; if( list >= 0 && h->mb.i_partition != D_16x16 && M32( &h->mb.cache.mv[list][x264_scan8[0]] ) == M32( &h->mb.cache.mv[list][x264_scan8[12]] ) && h->mb.cache.ref[list][x264_scan8[0]] == h->mb.cache.ref[list][x264_scan8[12]] ) h->mb.i_partition = D_16x16; } if( !analysis.i_mbrd ) x264_mb_analyse_transform( h ); if( analysis.i_mbrd == 3 && !IS_SKIP(h->mb.i_type) ) x264_mb_analyse_qp_rd( h, &analysis ); h->mb.b_trellis = h->param.analyse.i_trellis; h->mb.b_noise_reduction = h->mb.b_noise_reduction || (!!h->param.analyse.i_noise_reduction && !IS_INTRA( h->mb.i_type )); if( !IS_SKIP(h->mb.i_type) && h->mb.i_psy_trellis && h->param.analyse.i_trellis == 1 ) x264_psy_trellis_init( h, 0 ); if( h->mb.b_trellis == 1 || h->mb.b_noise_reduction ) h->mb.i_skip_intra = 0; }
大致流程:
(1)如果当前是I Slice,调用x264_mb_analyse_intra()进行Intra宏块的帧内预测模式分析;
(2)如果是P Slice:
(a)调用x264_macroblock_probe_pskip()分析是否为Skip宏块,如果是下面步骤不再进行分析;
(b)调用x264_mb_analyse_inter_p16x16()分析P16x16帧间预测的代价;
(c)调用x264_mb_analyse_inter_p8x8()分析P8x8帧间预测的代价;
(d)如果P8x8代价值小于P16x16,则依次对4个8x8的子宏块分割进行判断:
(i)调用x264_mb_analyse_inter_p4x4()分析P4x4帧间预测的代价;
(ii)如果P4x4的代价值小于P8x8,则调用x264_mb_analyse_inter_p8x4和x264_mb_analyse_inter_p4x8分析P8x4和P4x8帧间预测的代价;
(e)如果P8x8代价值小于P16x16,调用x264_mb_analyse_inter_p16x8和x264_mb_analyse_inter_p8x16分析P16x8和P8x16帧间预测的代价;
(f)此外调用x264_mb_analyse_intra(),检查当前宏块作为Intra宏块编码的代价是否小于作为P宏块编码的代价;
(3)如果当前是B Slice,则进行和P Slice同样的处理。
宏块编码模块:调用函数x264_macroblock_encode_internal()。
x264_macroblock_encode_internal():
static ALWAYS_INLINE void x264_macroblock_encode_internal( x264_t *h, int plane_count, int chroma ) { int i_qp = h->mb.i_qp; int b_decimate = h->mb.b_dct_decimate; int b_force_no_skip = 0; int nz; h->mb.i_cbp_luma = 0; for( int p = 0; p < plane_count; p++ ) h->mb.cache.non_zero_count[x264_scan8[LUMA_DC+p]] = 0; //PCM,不常见 if( h->mb.i_type == I_PCM ) { /* if PCM is chosen, we need to store reconstructed frame data */ for( int p = 0; p < plane_count; p++ ) h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[p], FDEC_STRIDE, h->mb.pic.p_fenc[p], FENC_STRIDE, 16 ); if( chroma ) { int height = 16 >> CHROMA_V_SHIFT; h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fenc[1], FENC_STRIDE, height ); h->mc.copy[PIXEL_8x8] ( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fenc[2], FENC_STRIDE, height ); } return; } if( !h->mb.b_allow_skip ) { b_force_no_skip = 1; if( IS_SKIP(h->mb.i_type) ) { if( h->mb.i_type == P_SKIP ) h->mb.i_type = P_L0; else if( h->mb.i_type == B_SKIP ) h->mb.i_type = B_DIRECT; } } //根据不同的宏块类型,进行编码 if( h->mb.i_type == P_SKIP ) { /* don't do pskip motion compensation if it was already done in macroblock_analyse */ if( !h->mb.b_skip_mc ) { int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0], h->mb.mv_min[0], h->mb.mv_max[0] ); int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1], h->mb.mv_min[1], h->mb.mv_max[1] ); for( int p = 0; p < plane_count; p++ ) h->mc.mc_luma( h->mb.pic.p_fdec[p], FDEC_STRIDE, &h->mb.pic.p_fref[0][0][p*4], h->mb.pic.i_stride[p], mvx, mvy, 16, 16, &h->sh.weight[0][p] ); if( chroma ) { int v_shift = CHROMA_V_SHIFT; int height = 16 >> v_shift; /* Special case for mv0, which is (of course) very common in P-skip mode. */ if( mvx | mvy ) h->mc.mc_chroma( h->mb.pic.p_fdec[1], h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1], mvx, 2*mvy>>v_shift, 8, height ); else h->mc.load_deinterleave_chroma_fdec( h->mb.pic.p_fdec[1], h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1], height ); if( h->sh.weight[0][1].weightfn ) h->sh.weight[0][1].weightfn[8>>2]( h->mb.pic.p_fdec[1], FDEC_STRIDE, h->mb.pic.p_fdec[1], FDEC_STRIDE, &h->sh.weight[0][1], height ); if( h->sh.weight[0][2].weightfn ) h->sh.weight[0][2].weightfn[8>>2]( h->mb.pic.p_fdec[2], FDEC_STRIDE, h->mb.pic.p_fdec[2], FDEC_STRIDE, &h->sh.weight[0][2], height ); } } //编码skip类型宏块 x264_macroblock_encode_skip( h ); return; } if( h->mb.i_type == B_SKIP ) { /* don't do bskip motion compensation if it was already done in macroblock_analyse */ if( !h->mb.b_skip_mc ) x264_mb_mc( h ); x264_macroblock_encode_skip( h ); return; } if( h->mb.i_type == I_16x16 ) { h->mb.b_transform_8x8 = 0; //Intra16x16宏块编码-需要Hadamard变换 //分别编码Y,U,V /* * 16x16 宏块 * * +--------+--------+ * | | * | | * | | * + + + * | | * | | * | | * +--------+--------+ * */ for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp ) x264_mb_encode_i16x16( h, p, i_qp ); } else if( h->mb.i_type == I_8x8 ) { h->mb.b_transform_8x8 = 1; /* If we already encoded 3 of the 4 i8x8 blocks, we don't have to do them again. */ if( h->mb.i_skip_intra ) { h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i8x8_fdec_buf, 16, 16 ); M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i8x8_nnz_buf[0]; M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i8x8_nnz_buf[1]; M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i8x8_nnz_buf[2]; M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i8x8_nnz_buf[3]; h->mb.i_cbp_luma = h->mb.pic.i8x8_cbp; /* In RD mode, restore the now-overwritten DCT data. */ if( h->mb.i_skip_intra == 2 ) h->mc.memcpy_aligned( h->dct.luma8x8, h->mb.pic.i8x8_dct_buf, sizeof(h->mb.pic.i8x8_dct_buf) ); } for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp ) { for( int i = (p == 0 && h->mb.i_skip_intra) ? 3 : 0 ; i < 4; i++ ) { int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]]; x264_mb_encode_i8x8( h, p, i, i_qp, i_mode, NULL, 1 ); } } } //Intra4x4类型 else if( h->mb.i_type == I_4x4 ) { /* * 帧内预测:16x16 宏块被划分为16个4x4子块 * * +----+----+----+----+ * | | | | | * +----+----+----+----+ * | | | | | * +----+----+----+----+ * | | | | | * +----+----+----+----+ * | | | | | * +----+----+----+----+ * */ h->mb.b_transform_8x8 = 0; /* If we already encoded 15 of the 16 i4x4 blocks, we don't have to do them again. */ if( h->mb.i_skip_intra ) { h->mc.copy[PIXEL_16x16]( h->mb.pic.p_fdec[0], FDEC_STRIDE, h->mb.pic.i4x4_fdec_buf, 16, 16 ); M32( &h->mb.cache.non_zero_count[x264_scan8[ 0]] ) = h->mb.pic.i4x4_nnz_buf[0]; M32( &h->mb.cache.non_zero_count[x264_scan8[ 2]] ) = h->mb.pic.i4x4_nnz_buf[1]; M32( &h->mb.cache.non_zero_count[x264_scan8[ 8]] ) = h->mb.pic.i4x4_nnz_buf[2]; M32( &h->mb.cache.non_zero_count[x264_scan8[10]] ) = h->mb.pic.i4x4_nnz_buf[3]; h->mb.i_cbp_luma = h->mb.pic.i4x4_cbp; /* In RD mode, restore the now-overwritten DCT data. */ if( h->mb.i_skip_intra == 2 ) h->mc.memcpy_aligned( h->dct.luma4x4, h->mb.pic.i4x4_dct_buf, sizeof(h->mb.pic.i4x4_dct_buf) ); } //分别编码Y,U,V for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp ) { //循环16次,编码16个Intra4x4宏块 for( int i = (p == 0 && h->mb.i_skip_intra) ? 15 : 0 ; i < 16; i++ ) { pixel *p_dst = &h->mb.pic.p_fdec[p][block_idx_xy_fdec[i]]; int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]]; if( (h->mb.i_neighbour4[i] & (MB_TOPRIGHT|MB_TOP)) == MB_TOP ) /* emulate missing topright samples */ MPIXEL_X4( &p_dst[4-FDEC_STRIDE] ) = PIXEL_SPLAT_X4( p_dst[3-FDEC_STRIDE] ); //Intra4x4宏块编码 /* * +----+ * | | * +----+ */ x264_mb_encode_i4x4( h, p, i, i_qp, i_mode, 1 ); } } } //包含帧间预测 else /* Inter MB */ { int i_decimate_mb = 0; /* Don't repeat motion compensation if it was already done in non-RD transform analysis */ if( !h->mb.b_skip_mc ) x264_mb_mc( h ); if( h->mb.b_lossless )//lossless情况没研究过 { if( h->mb.b_transform_8x8 ) for( int p = 0; p < plane_count; p++ ) for( int i8x8 = 0; i8x8 < 4; i8x8++ ) { int x = i8x8&1; int y = i8x8>>1; nz = h->zigzagf.sub_8x8( h->dct.luma8x8[p*4+i8x8], h->mb.pic.p_fenc[p] + 8*x + 8*y*FENC_STRIDE, h->mb.pic.p_fdec[p] + 8*x + 8*y*FDEC_STRIDE ); STORE_8x8_NNZ( p, i8x8, nz ); h->mb.i_cbp_luma |= nz << i8x8; } else for( int p = 0; p < plane_count; p++ ) for( int i4x4 = 0; i4x4 < 16; i4x4++ ) { nz = h->zigzagf.sub_4x4( h->dct.luma4x4[p*16+i4x4], h->mb.pic.p_fenc[p]+block_idx_xy_fenc[i4x4], h->mb.pic.p_fdec[p]+block_idx_xy_fdec[i4x4] ); h->mb.cache.non_zero_count[x264_scan8[p*16+i4x4]] = nz; h->mb.i_cbp_luma |= nz << (i4x4>>2); } } else if( h->mb.b_transform_8x8 )//DCT8x8情况暂时没研究过 { ALIGNED_ARRAY_N( dctcoef, dct8x8,[4],[64] ); b_decimate &= !h->mb.b_trellis || !h->param.b_cabac; // 8x8 trellis is inherently optimal decimation for CABAC for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp ) { CLEAR_16x16_NNZ( p ); h->dctf.sub16x16_dct8( dct8x8, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] ); h->nr_count[1+!!p*2] += h->mb.b_noise_reduction * 4; int plane_cbp = 0; for( int idx = 0; idx < 4; idx++ ) { nz = x264_quant_8x8( h, dct8x8[idx], i_qp, ctx_cat_plane[DCT_LUMA_8x8][p], 0, p, idx ); if( nz ) { h->zigzagf.scan_8x8( h->dct.luma8x8[p*4+idx], dct8x8[idx] ); if( b_decimate ) { int i_decimate_8x8 = h->quantf.decimate_score64( h->dct.luma8x8[p*4+idx] ); i_decimate_mb += i_decimate_8x8; if( i_decimate_8x8 >= 4 ) plane_cbp |= 1<<idx; } else plane_cbp |= 1<<idx; } } if( i_decimate_mb >= 6 || !b_decimate ) { h->mb.i_cbp_luma |= plane_cbp; FOREACH_BIT( idx, 0, plane_cbp ) { h->quantf.dequant_8x8( dct8x8[idx], h->dequant8_mf[p?CQM_8PC:CQM_8PY], i_qp ); h->dctf.add8x8_idct8( &h->mb.pic.p_fdec[p][8*(idx&1) + 8*(idx>>1)*FDEC_STRIDE], dct8x8[idx] ); STORE_8x8_NNZ( p, idx, 1 ); } } } } else//最普通的情况 { /* * 帧间预测:16x16 宏块被划分为8x8 * 每个8x8再次被划分为4x4 * * ++====+====++====+====++ * || | || | || * ++====+====++====+====++ * || | || | || * ++====+====++====+====++ * || | || | || * ++====+====++====+====++ * || | || | || * ++====+====+=====+====++ * */ ALIGNED_ARRAY_N( dctcoef, dct4x4,[16],[16] ); for( int p = 0; p < plane_count; p++, i_qp = h->mb.i_chroma_qp ) { CLEAR_16x16_NNZ( p ); //16x16DCT(实际上分解为16个4x4DCT) //求编码帧p_fenc和重建帧p_fdec之间的残差,然后进行DCT变换 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[p], h->mb.pic.p_fdec[p] ); if( h->mb.b_noise_reduction ) { h->nr_count[0+!!p*2] += 16; for( int idx = 0; idx < 16; idx++ ) h->quantf.denoise_dct( dct4x4[idx], h->nr_residual_sum[0+!!p*2], h->nr_offset[0+!!p*2], 16 ); } int plane_cbp = 0; //16x16的块分成4个8x8的块 for( int i8x8 = 0; i8x8 < 4; i8x8++ ) { int i_decimate_8x8 = b_decimate ? 0 : 6; int nnz8x8 = 0; if( h->mb.b_trellis ) { for( int i4x4 = 0; i4x4 < 4; i4x4++ ) { int idx = i8x8*4+i4x4; if( x264_quant_4x4_trellis( h, dct4x4[idx], CQM_4PY, i_qp, ctx_cat_plane[DCT_LUMA_4x4][p], 0, !!p, p*16+idx ) ) { h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] ); h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[p?CQM_4PC:CQM_4PY], i_qp ); if( i_decimate_8x8 < 6 ) i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] ); h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = 1; nnz8x8 = 1; } } } else { //8x8的块分成4个4x4的块,每个4x4的块再分别进行量化 nnz8x8 = nz = h->quantf.quant_4x4x4( &dct4x4[i8x8*4], h->quant4_mf[CQM_4PY][i_qp], h->quant4_bias[CQM_4PY][i_qp] ); if( nz ) { FOREACH_BIT( idx, i8x8*4, nz ) { //这几步用于建立重建帧 h->zigzagf.scan_4x4( h->dct.luma4x4[p*16+idx], dct4x4[idx] ); //反量化 h->quantf.dequant_4x4( dct4x4[idx], h->dequant4_mf[p?CQM_4PC:CQM_4PY], i_qp ); if( i_decimate_8x8 < 6 ) i_decimate_8x8 += h->quantf.decimate_score16( h->dct.luma4x4[p*16+idx] ); h->mb.cache.non_zero_count[x264_scan8[p*16+idx]] = 1; } } } if( nnz8x8 ) { i_decimate_mb += i_decimate_8x8; if( i_decimate_8x8 < 4 ) STORE_8x8_NNZ( p, i8x8, 0 ); else plane_cbp |= 1<<i8x8; } } if( i_decimate_mb < 6 ) { plane_cbp = 0; CLEAR_16x16_NNZ( p ); } else { h->mb.i_cbp_luma |= plane_cbp; FOREACH_BIT( i8x8, 0, plane_cbp ) { //用于建立重建帧 //残差进行DCT反变换之后,叠加到预测数据上 h->dctf.add8x8_idct( &h->mb.pic.p_fdec[p][(i8x8&1)*8 + (i8x8>>1)*8*FDEC_STRIDE], &dct4x4[i8x8*4] ); } } } } } /* encode chroma */ if( chroma ) { if( IS_INTRA( h->mb.i_type ) ) { int i_mode = h->mb.i_chroma_pred_mode; if( h->mb.b_lossless ) x264_predict_lossless_chroma( h, i_mode ); else { h->predict_chroma[i_mode]( h->mb.pic.p_fdec[1] ); h->predict_chroma[i_mode]( h->mb.pic.p_fdec[2] ); } } /* encode the 8x8 blocks */ x264_mb_encode_chroma( h, !IS_INTRA( h->mb.i_type ), h->mb.i_chroma_qp ); } else h->mb.i_cbp_chroma = 0; /* store cbp */ int cbp = h->mb.i_cbp_chroma << 4 | h->mb.i_cbp_luma; if( h->param.b_cabac ) cbp |= h->mb.cache.non_zero_count[x264_scan8[LUMA_DC ]] << 8 | h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+0]] << 9 | h->mb.cache.non_zero_count[x264_scan8[CHROMA_DC+1]] << 10; h->mb.cbp[h->mb.i_mb_xy] = cbp; /* Check for P_SKIP * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account * (if multiple mv give same result)*/ if( !b_force_no_skip ) { if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) && M32( h->mb.cache.mv[0][x264_scan8[0]] ) == M32( h->mb.cache.pskip_mv ) && h->mb.cache.ref[0][x264_scan8[0]] == 0 ) { h->mb.i_type = P_SKIP; } /* Check for B_SKIP */ if( h->mb.i_type == B_DIRECT && !(h->mb.i_cbp_luma | h->mb.i_cbp_chroma) ) { h->mb.i_type = B_SKIP; } } }
(1)如果是Skip类型,调用x264_macroblock_encode_skip()编码宏块;
(2)如果Intra16x16类型,调用x264_mb_encode_i16x16()编码宏块;
(3)如果Intra4x4类型,循环16次调用x264_mb_encode_i4x4()编码宏块;
(4)如果Inter类型,则不再调用子函数,二是直接进行编码;
(5)如果对色度编码,调用x264_mb_encode_chroma()。
滤波模块:调用函数x264_fdec_filter_row()。
x264_fdec_filter_row():
static void x264_fdec_filter_row( x264_t *h, int mb_y, int pass ) { /* mb_y is the mb to be encoded next, not the mb to be filtered here */ int b_hpel = h->fdec->b_kept_as_ref; int b_deblock = h->sh.i_disable_deblocking_filter_idc != 1; int b_end = mb_y == h->i_threadslice_end; int b_measure_quality = 1; int min_y = mb_y - (1 << SLICE_MBAFF); int b_start = min_y == h->i_threadslice_start; /* Even in interlaced mode, deblocking never modifies more than 4 pixels * above each MB, as bS=4 doesn't happen for the top of interlaced mbpairs. */ int minpix_y = min_y*16 - 4 * !b_start; int maxpix_y = mb_y*16 - 4 * !b_end; b_deblock &= b_hpel || h->param.b_full_recon || h->param.psz_dump_yuv; if( h->param.b_sliced_threads ) { switch( pass ) { /* During encode: only do deblock if asked for */ default: case 0: b_deblock &= h->param.b_full_recon; b_hpel = 0; break; /* During post-encode pass: do deblock if not done yet, do hpel for all * rows except those between slices. */ case 1: b_deblock &= !h->param.b_full_recon; b_hpel &= !(b_start && min_y > 0); b_measure_quality = 0; break; /* Final pass: do the rows between slices in sequence. */ case 2: b_deblock = 0; b_measure_quality = 0; break; } } if( mb_y & SLICE_MBAFF ) return; if( min_y < h->i_threadslice_start ) return; //去块效应滤波 if( b_deblock ) for( int y = min_y; y < mb_y; y += (1 << SLICE_MBAFF) ) x264_frame_deblock_row( h, y );//处理一行 /* FIXME: Prediction requires different borders for interlaced/progressive mc, * but the actual image data is equivalent. For now, maintain this * consistency by copying deblocked pixels between planes. */ if( PARAM_INTERLACED && (!h->param.b_sliced_threads || pass == 1) ) for( int p = 0; p < h->fdec->i_plane; p++ ) for( int i = minpix_y>>(CHROMA_V_SHIFT && p); i < maxpix_y>>(CHROMA_V_SHIFT && p); i++ ) memcpy( h->fdec->plane_fld[p] + i*h->fdec->i_stride[p], h->fdec->plane[p] + i*h->fdec->i_stride[p], h->mb.i_mb_width*16*sizeof(pixel) ); if( h->fdec->b_kept_as_ref && (!h->param.b_sliced_threads || pass == 1) ) x264_frame_expand_border( h, h->fdec, min_y ); //半像素内插 if( b_hpel ) { int end = mb_y == h->mb.i_mb_height; /* Can't do hpel until the previous slice is done encoding. */ if( h->param.analyse.i_subpel_refine ) { //半像素内插 x264_frame_filter( h, h->fdec, min_y, end ); x264_frame_expand_border_filtered( h, h->fdec, min_y, end ); } } if( SLICE_MBAFF && pass == 0 ) for( int i = 0; i < 3; i++ ) { XCHG( pixel *, h->intra_border_backup[0][i], h->intra_border_backup[3][i] ); XCHG( pixel *, h->intra_border_backup[1][i], h->intra_border_backup[4][i] ); } if( h->i_thread_frames > 1 && h->fdec->b_kept_as_ref ) x264_frame_cond_broadcast( h->fdec, mb_y*16 + (b_end ? 10000 : -(X264_THREAD_HEIGHT << SLICE_MBAFF)) ); //计算编码的质量 if( b_measure_quality ) { maxpix_y = X264_MIN( maxpix_y, h->param.i_height ); //如果需要打印输出PSNR if( h->param.analyse.b_psnr ) { //实际上是计算SSD //输出的时候调用x264_psnr()换算SSD为PSNR /** * 计算PSNR的过程 * * MSE = SSD*1/(w*h) * PSNR= 10*log10(MAX^2/MSE) * * 其中MAX指的是图像的灰度级,对于8bit来说就是2^8-1=255 */ for( int p = 0; p < (CHROMA444 ? 3 : 1); p++ ) h->stat.frame.i_ssd[p] += x264_pixel_ssd_wxh( &h->pixf, h->fdec->plane[p] + minpix_y * h->fdec->i_stride[p], h->fdec->i_stride[p],//重建帧 h->fenc->plane[p] + minpix_y * h->fenc->i_stride[p], h->fenc->i_stride[p],//编码帧 h->param.i_width, maxpix_y-minpix_y ); if( !CHROMA444 ) { uint64_t ssd_u, ssd_v; int v_shift = CHROMA_V_SHIFT; x264_pixel_ssd_nv12( &h->pixf, h->fdec->plane[1] + (minpix_y>>v_shift) * h->fdec->i_stride[1], h->fdec->i_stride[1], h->fenc->plane[1] + (minpix_y>>v_shift) * h->fenc->i_stride[1], h->fenc->i_stride[1], h->param.i_width>>1, (maxpix_y-minpix_y)>>v_shift, &ssd_u, &ssd_v ); h->stat.frame.i_ssd[1] += ssd_u; h->stat.frame.i_ssd[2] += ssd_v; } } //如果需要打印输出SSIM if( h->param.analyse.b_ssim ) { int ssim_cnt; x264_emms(); /* offset by 2 pixels to avoid alignment of ssim blocks with dct blocks, * and overlap by 4 */ minpix_y += b_start ? 2 : -6; //计算SSIM h->stat.frame.f_ssim += x264_pixel_ssim_wxh( &h->pixf, h->fdec->plane[0] + 2+minpix_y*h->fdec->i_stride[0], h->fdec->i_stride[0],//重建帧 h->fenc->plane[0] + 2+minpix_y*h->fenc->i_stride[0], h->fenc->i_stride[0],//编码帧 h->param.i_width-2, maxpix_y-minpix_y, h->scratch_buffer, &ssim_cnt ); h->stat.frame.i_ssim_cnt += ssim_cnt; } } }
函数x264_fdec_filter_row()完成了三步工作:
环路滤波;半像素内插;视频质量SSIM和PSNR计算。
熵编码模块:熵编码模块包含两个函数:如果输出设置为CABAC编码,调用函数x264_macroblock_write_cabac();如果输出设置为CAVLC编码,调用函数x264_macroblock_write_cavlc()。
x264_macroblock_write_cavlc():
void x264_macroblock_write_cavlc( x264_t *h ) { bs_t *s = &h->out.bs; const int i_mb_type = h->mb.i_type; int plane_count = CHROMA444 ? 3 : 1; int chroma = !CHROMA444; #if RDO_SKIP_BS s->i_bits_encoded = 0; #else const int i_mb_pos_start = bs_pos( s ); int i_mb_pos_tex; #endif if( SLICE_MBAFF && (!(h->mb.i_mb_y & 1) || IS_SKIP(h->mb.type[h->mb.i_mb_xy - h->mb.i_mb_stride])) ) { bs_write1( s, MB_INTERLACED ); #if !RDO_SKIP_BS h->mb.field_decoding_flag = MB_INTERLACED; #endif } #if !RDO_SKIP_BS if( i_mb_type == I_PCM ) { static const uint8_t i_offsets[3] = {5,23,0}; uint8_t *p_start = s->p_start; bs_write_ue( s, i_offsets[h->sh.i_type] + 25 ); i_mb_pos_tex = bs_pos( s ); h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; bs_align_0( s ); for( int p = 0; p < plane_count; p++ ) for( int i = 0; i < 256; i++ ) bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[p][i] ); if( chroma ) for( int ch = 1; ch < 3; ch++ ) for( int i = 0; i < 16>>CHROMA_V_SHIFT; i++ ) for( int j = 0; j < 8; j++ ) bs_write( s, BIT_DEPTH, h->mb.pic.p_fenc[ch][i*FENC_STRIDE+j] ); bs_init( s, s->p, s->p_end - s->p ); s->p_start = p_start; h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex; return; } #endif if( h->sh.i_type == SLICE_TYPE_P ) x264_cavlc_mb_header_p( h, i_mb_type, chroma );//写入P宏块MB Header数据-CAVLC else if( h->sh.i_type == SLICE_TYPE_B ) x264_cavlc_mb_header_b( h, i_mb_type, chroma );//写入B宏块MB Header数据-CAVLC else //if( h->sh.i_type == SLICE_TYPE_I ) x264_cavlc_mb_header_i( h, i_mb_type, 0, chroma );//写入I宏块MB Header数据-CAVLC #if !RDO_SKIP_BS i_mb_pos_tex = bs_pos( s ); h->stat.frame.i_mv_bits += i_mb_pos_tex - i_mb_pos_start; #endif /* Coded block pattern */ if( i_mb_type != I_16x16 ) bs_write_ue( s, cbp_to_golomb[chroma][IS_INTRA(i_mb_type)][(h->mb.i_cbp_chroma << 4)|h->mb.i_cbp_luma] ); /* transform size 8x8 flag */ if( x264_mb_transform_8x8_allowed( h ) && h->mb.i_cbp_luma ) bs_write1( s, h->mb.b_transform_8x8 ); if( i_mb_type == I_16x16 ) { x264_cavlc_qp_delta( h ); /* DC Luma */ for( int p = 0; p < plane_count; p++ ) { x264_cavlc_block_residual( h, DCT_LUMA_DC, LUMA_DC+p, h->dct.luma16x16_dc[p] ); /* AC Luma */ if( h->mb.i_cbp_luma ) for( int i = p*16; i < p*16+16; i++ ) x264_cavlc_block_residual( h, DCT_LUMA_AC, i, h->dct.luma4x4[i]+1 ); } } else if( h->mb.i_cbp_luma | h->mb.i_cbp_chroma ) { x264_cavlc_qp_delta( h ); //残差数据 x264_cavlc_macroblock_luma_residual( h, plane_count ); } if( h->mb.i_cbp_chroma ) { /* Chroma DC residual present */ x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+0, h->dct.chroma_dc[0] ); x264_cavlc_block_residual( h, DCT_CHROMA_DC, CHROMA_DC+1, h->dct.chroma_dc[1] ); if( h->mb.i_cbp_chroma == 2 ) /* Chroma AC residual present */ { int step = 8 << CHROMA_V_SHIFT; for( int i = 16; i < 3*16; i += step ) for( int j = i; j < i+4; j++ ) x264_cavlc_block_residual( h, DCT_CHROMA_AC, j, h->dct.luma4x4[j]+1 ); } } #if !RDO_SKIP_BS h->stat.frame.i_tex_bits += bs_pos(s) - i_mb_pos_tex; #endif }
2.NAL打包:
前面所说的压缩编码过程已经把所有的宏块循环完毕,实现了VCL编码。进行NAL打包是为了增强码流的健壮性,适应网络传输。VCL编码加上NAL头信息就组成完整的NAL单元,输出文件。
这部分的代码位于函数x264_encoder_encode()中,调用了函数x264_encoder_frame_end()。
x264_encoder_frame_end():在编码结束后做一些后续处理,比如说加上起始码,封装MALU。
//结束的时候做一些处理,记录一些统计信息 //pp_nal:输出的NALU //pic_out:输出的重建帧 static int x264_encoder_frame_end( x264_t *h, x264_t *thread_current, x264_nal_t **pp_nal, int *pi_nal, x264_picture_t *pic_out ) { char psz_message[80]; if( !h->param.b_sliced_threads && h->b_thread_active ) { h->b_thread_active = 0; if( (intptr_t)x264_threadpool_wait( h->threadpool, h ) ) return -1; } if( !h->out.i_nal ) { pic_out->i_type = X264_TYPE_AUTO; return 0; } x264_emms(); /* generate buffering period sei and insert it into place */ if( h->i_thread_frames > 1 && h->fenc->b_keyframe && h->sps->vui.b_nal_hrd_parameters_present ) { x264_hrd_fullness( h ); x264_nal_start( h, NAL_SEI, NAL_PRIORITY_DISPOSABLE ); x264_sei_buffering_period_write( h, &h->out.bs ); if( x264_nal_end( h ) ) return -1; /* buffering period sei must follow AUD, SPS and PPS and precede all other SEIs */ int idx = 0; while( h->out.nal[idx].i_type == NAL_AUD || h->out.nal[idx].i_type == NAL_SPS || h->out.nal[idx].i_type == NAL_PPS ) idx++; x264_nal_t nal_tmp = h->out.nal[h->out.i_nal-1]; memmove( &h->out.nal[idx+1], &h->out.nal[idx], (h->out.i_nal-idx-1)*sizeof(x264_nal_t) ); h->out.nal[idx] = nal_tmp; } //封装一帧数据对应的NALU. //例如给NALU添加起始码0x00000001 int frame_size = x264_encoder_encapsulate_nals( h, 0 ); if( frame_size < 0 ) return -1; /* Set output picture properties */ //pic_out为x264_picture_t类型结构体。是libx264对外的结构体 //fenc,fdec是x264_frame_t类型结构体。是libx264的内部结构体 pic_out->i_type = h->fenc->i_type; pic_out->b_keyframe = h->fenc->b_keyframe; pic_out->i_pic_struct = h->fenc->i_pic_struct; pic_out->i_pts = h->fdec->i_pts; pic_out->i_dts = h->fdec->i_dts; if( pic_out->i_pts < pic_out->i_dts ) x264_log( h, X264_LOG_WARNING, "invalid DTS: PTS is less than DTS\n" ); pic_out->opaque = h->fenc->opaque; pic_out->img.i_csp = h->fdec->i_csp; #if HIGH_BIT_DEPTH pic_out->img.i_csp |= X264_CSP_HIGH_DEPTH; #endif pic_out->img.i_plane = h->fdec->i_plane; //图像数据 for( int i = 0; i < pic_out->img.i_plane; i++ ) { pic_out->img.i_stride[i] = h->fdec->i_stride[i] * sizeof(pixel); pic_out->img.plane[i] = (uint8_t*)h->fdec->plane[i]; } //回收用过的编码帧fenc x264_frame_push_unused( thread_current, h->fenc ); /* ---------------------- Update encoder state ------------------------- */ /* update rc */ int filler = 0; if( x264_ratecontrol_end( h, frame_size * 8, &filler ) < 0 ) return -1; pic_out->hrd_timing = h->fenc->hrd_timing; pic_out->prop.f_crf_avg = h->fdec->f_crf_avg; /* Filler in AVC-Intra mode is written as zero bytes to the last slice * We don't know the size of the last slice until encapsulation so we add filler to the encapsulated NAL */ if( h->param.i_avcintra_class ) { x264_t *h0 = h->thread[0]; int ret = x264_check_encapsulated_buffer( h, h0, h->out.i_nal, frame_size, frame_size + filler ); if( ret < 0 ) return -1; memset( h->out.nal[0].p_payload + frame_size, 0, filler ); h->out.nal[h->out.i_nal-1].i_payload += filler; h->out.nal[h->out.i_nal-1].i_padding = filler; frame_size += filler; } else { while( filler > 0 ) { int f, overhead; overhead = (FILLER_OVERHEAD - h->param.b_annexb); if( h->param.i_slice_max_size && filler > h->param.i_slice_max_size ) { int next_size = filler - h->param.i_slice_max_size; int overflow = X264_MAX( overhead - next_size, 0 ); f = h->param.i_slice_max_size - overhead - overflow; } else f = X264_MAX( 0, filler - overhead ); if( x264_bitstream_check_buffer_filler( h, f ) ) return -1; x264_nal_start( h, NAL_FILLER, NAL_PRIORITY_DISPOSABLE ); x264_filler_write( h, &h->out.bs, f ); if( x264_nal_end( h ) ) return -1; int total_size = x264_encoder_encapsulate_nals( h, h->out.i_nal-1 ); if( total_size < 0 ) return -1; frame_size += total_size; filler -= total_size; } } /* End bitstream, set output */ *pi_nal = h->out.i_nal; *pp_nal = h->out.nal; h->out.i_nal = 0; x264_noise_reduction_update( h ); /* ---------------------- Compute/Print statistics --------------------- */ x264_thread_sync_stat( h, h->thread[0] ); /* Slice stat */ //stat中存储了统计信息 //帧数+1 (根据类型) h->stat.i_frame_count[h->sh.i_type]++; //帧大小 h->stat.i_frame_size[h->sh.i_type] += frame_size; h->stat.f_frame_qp[h->sh.i_type] += h->fdec->f_qp_avg_aq; //统计MB个数,把不同类型的累加起来 for( int i = 0; i < X264_MBTYPE_MAX; i++ ) h->stat.i_mb_count[h->sh.i_type][i] += h->stat.frame.i_mb_count[i]; for( int i = 0; i < X264_PARTTYPE_MAX; i++ ) h->stat.i_mb_partition[h->sh.i_type][i] += h->stat.frame.i_mb_partition[i]; for( int i = 0; i < 2; i++ ) h->stat.i_mb_count_8x8dct[i] += h->stat.frame.i_mb_count_8x8dct[i]; for( int i = 0; i < 6; i++ ) h->stat.i_mb_cbp[i] += h->stat.frame.i_mb_cbp[i]; for( int i = 0; i < 4; i++ ) for( int j = 0; j < 13; j++ ) h->stat.i_mb_pred_mode[i][j] += h->stat.frame.i_mb_pred_mode[i][j]; if( h->sh.i_type != SLICE_TYPE_I ) for( int i_list = 0; i_list < 2; i_list++ ) for( int i = 0; i < X264_REF_MAX*2; i++ ) h->stat.i_mb_count_ref[h->sh.i_type][i_list][i] += h->stat.frame.i_mb_count_ref[i_list][i]; for( int i = 0; i < 3; i++ ) h->stat.i_mb_field[i] += h->stat.frame.i_mb_field[i]; if( h->sh.i_type == SLICE_TYPE_P && h->param.analyse.i_weighted_pred >= X264_WEIGHTP_SIMPLE ) { h->stat.i_wpred[0] += !!h->sh.weight[0][0].weightfn; h->stat.i_wpred[1] += !!h->sh.weight[0][1].weightfn || !!h->sh.weight[0][2].weightfn; } if( h->sh.i_type == SLICE_TYPE_B ) { h->stat.i_direct_frames[ h->sh.b_direct_spatial_mv_pred ] ++; if( h->mb.b_direct_auto_write ) { //FIXME somewhat arbitrary time constants if( h->stat.i_direct_score[0] + h->stat.i_direct_score[1] > h->mb.i_mb_count ) for( int i = 0; i < 2; i++ ) h->stat.i_direct_score[i] = h->stat.i_direct_score[i] * 9/10; for( int i = 0; i < 2; i++ ) h->stat.i_direct_score[i] += h->stat.frame.i_direct_score[i]; } } else h->stat.i_consecutive_bframes[h->fenc->i_bframes]++; psz_message[0] = '\0'; double dur = h->fenc->f_duration; h->stat.f_frame_duration[h->sh.i_type] += dur; //需要计算PSNR if( h->param.analyse.b_psnr ) { //SSD(Sum of Squared Difference)即差值的平方和 int64_t ssd[3] = { h->stat.frame.i_ssd[0], h->stat.frame.i_ssd[1], h->stat.frame.i_ssd[2], }; int luma_size = h->param.i_width * h->param.i_height; int chroma_size = CHROMA_SIZE( luma_size ); //SSD是已经在“滤波”环节计算过的 //SSD简单换算成PSNR,调用x264_psnr() pic_out->prop.f_psnr[0] = x264_psnr( ssd[0], luma_size ); pic_out->prop.f_psnr[1] = x264_psnr( ssd[1], chroma_size ); pic_out->prop.f_psnr[2] = x264_psnr( ssd[2], chroma_size ); //平均值 pic_out->prop.f_psnr_avg = x264_psnr( ssd[0] + ssd[1] + ssd[2], luma_size + chroma_size*2 ); //mean系列的需要累加 h->stat.f_ssd_global[h->sh.i_type] += dur * (ssd[0] + ssd[1] + ssd[2]); h->stat.f_psnr_average[h->sh.i_type] += dur * pic_out->prop.f_psnr_avg; h->stat.f_psnr_mean_y[h->sh.i_type] += dur * pic_out->prop.f_psnr[0]; h->stat.f_psnr_mean_u[h->sh.i_type] += dur * pic_out->prop.f_psnr[1]; h->stat.f_psnr_mean_v[h->sh.i_type] += dur * pic_out->prop.f_psnr[2]; snprintf( psz_message, 80, " PSNR Y:%5.2f U:%5.2f V:%5.2f", pic_out->prop.f_psnr[0], pic_out->prop.f_psnr[1], pic_out->prop.f_psnr[2] ); } //需要计算SSIM if( h->param.analyse.b_ssim ) { //SSIM是已经在“滤波”环节计算过的 pic_out->prop.f_ssim = h->stat.frame.f_ssim / h->stat.frame.i_ssim_cnt; //mean系列的需要累加 h->stat.f_ssim_mean_y[h->sh.i_type] += pic_out->prop.f_ssim * dur; snprintf( psz_message + strlen(psz_message), 80 - strlen(psz_message), " SSIM Y:%.5f", pic_out->prop.f_ssim ); } psz_message[79] = '\0'; //Debug时候输出 x264_log( h, X264_LOG_DEBUG, "frame=%4d QP=%.2f NAL=%d Slice:%c Poc:%-3d I:%-4d P:%-4d SKIP:%-4d size=%d bytes%s\n", h->i_frame, h->fdec->f_qp_avg_aq, h->i_nal_ref_idc, h->sh.i_type == SLICE_TYPE_I ? 'I' : (h->sh.i_type == SLICE_TYPE_P ? 'P' : 'B' ), h->fdec->i_poc, h->stat.frame.i_mb_count_i, h->stat.frame.i_mb_count_p, h->stat.frame.i_mb_count_skip, frame_size, psz_message ); // keep stats all in one place x264_thread_sync_stat( h->thread[0], h ); // for the use of the next frame x264_thread_sync_stat( thread_current, h ); #ifdef DEBUG_MB_TYPE { static const char mb_chars[] = { 'i', 'i', 'I', 'C', 'P', '8', 'S', 'D', '<', 'X', 'B', 'X', '>', 'B', 'B', 'B', 'B', '8', 'S' }; for( int mb_xy = 0; mb_xy < h->mb.i_mb_width * h->mb.i_mb_height; mb_xy++ ) { if( h->mb.type[mb_xy] < X264_MBTYPE_MAX && h->mb.type[mb_xy] >= 0 ) fprintf( stderr, "%c ", mb_chars[ h->mb.type[mb_xy] ] ); else fprintf( stderr, "? " ); if( (mb_xy+1) % h->mb.i_mb_width == 0 ) fprintf( stderr, "\n" ); } } #endif /* Remove duplicates, must be done near the end as breaks h->fref0 array * by freeing some of its pointers. */ for( int i = 0; i < h->i_ref[0]; i++ ) if( h->fref[0][i] && h->fref[0][i]->b_duplicate ) { x264_frame_push_blank_unused( h, h->fref[0][i] ); h->fref[0][i] = 0; } if( h->param.psz_dump_yuv ) x264_frame_dump( h ); x264_emms(); return frame_size; }
x264_encoder_frame_end()中封装NALU调用了函数x264_encoder_encapsulate_nals()。
x264_encoder_encapsulate_nals():
//封装一帧数据对应的NALU. //例如给NALU添加起始码0x00000001 static int x264_encoder_encapsulate_nals( x264_t *h, int start ) { x264_t *h0 = h->thread[0]; int nal_size = 0, previous_nal_size = 0; if( h->param.nalu_process ) { for( int i = start; i < h->out.i_nal; i++ ) nal_size += h->out.nal[i].i_payload; return nal_size; } for( int i = 0; i < start; i++ ) previous_nal_size += h->out.nal[i].i_payload; for( int i = start; i < h->out.i_nal; i++ ) nal_size += h->out.nal[i].i_payload; /* Worst-case NAL unit escaping: reallocate the buffer if it's too small. */ int necessary_size = previous_nal_size + nal_size * 3/2 + h->out.i_nal * 4 + 4 + 64; for( int i = start; i < h->out.i_nal; i++ ) necessary_size += h->out.nal[i].i_padding; if( x264_check_encapsulated_buffer( h, h0, start, previous_nal_size, necessary_size ) ) return -1; uint8_t *nal_buffer = h0->nal_buffer + previous_nal_size; //一个一个NALU处理 for( int i = start; i < h->out.i_nal; i++ ) { int old_payload_len = h->out.nal[i].i_payload; h->out.nal[i].b_long_startcode = !i || h->out.nal[i].i_type == NAL_SPS || h->out.nal[i].i_type == NAL_PPS || h->param.i_avcintra_class; //添加起始码 x264_nal_encode( h, nal_buffer, &h->out.nal[i] ); nal_buffer += h->out.nal[i].i_payload; if( h->param.i_avcintra_class ) { h->out.nal[i].i_padding -= h->out.nal[i].i_payload - (old_payload_len + NALU_OVERHEAD); if( h->out.nal[i].i_padding > 0 ) { memset( nal_buffer, 0, h->out.nal[i].i_padding ); nal_buffer += h->out.nal[i].i_padding; h->out.nal[i].i_payload += h->out.nal[i].i_padding; } h->out.nal[i].i_padding = X264_MAX( h->out.nal[i].i_padding, 0 ); } } x264_emms(); return nal_buffer - (h0->nal_buffer + previous_nal_size); }
其内部又调用了另一个函数x264_nal_encode()逐个给一帧数据中的各个NALU添加起始码以及NALU Header。
x264_nal_encode():
//添加起始码 void x264_nal_encode( x264_t *h, uint8_t *dst, x264_nal_t *nal ) { uint8_t *src = nal->p_payload; uint8_t *end = nal->p_payload + nal->i_payload; uint8_t *orig_dst = dst; //起始码 ============================================ //annexb格式,起始码为0x00000001 if( h->param.b_annexb ) { if( nal->b_long_startcode ) *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x00; *dst++ = 0x01; } else /* save room for size later */ dst += 4;//mp4格式 //NALU Header ======================================= /* nal header */ *dst++ = ( 0x00 << 7 ) | ( nal->i_ref_idc << 5 ) | nal->i_type; dst = h->bsf.nal_escape( dst, src, end ); int size = (dst - orig_dst) - 4; /* Write the size header for mp4/etc */ //重新回到起始码的位置,写入mp4格式的起始码(size大小,不含起始码) if( !h->param.b_annexb ) { /* Size doesn't include the size of the header we're writing now. */ orig_dst[0] = size>>24; orig_dst[1] = size>>16; orig_dst[2] = size>> 8; orig_dst[3] = size>> 0; } //NALU负载大小,包含起始码 nal->i_payload = size+4; nal->p_payload = orig_dst; x264_emms(); }
添加过程:
(1)annexb模式:在每个NALU前面添加0x00000001;
(2)mp4模式:先计算NALU的长度(不包含前四个字节),再将长度信息写入NALU前面的四个字节;
添加过程分两种是因为H264码流格式有两种:
(1)annexb模式:在这个模式下,每个NALU包含起始码0x00000001,SPS、PPS存储在码流中,最常见的H264裸流就是这种;
(2)mp4模式:这种模式下,每个NALU不包含起始码,原本存储起始码前4个字节存储的是NALU的长度,SPS、PPS单独放在容器的其他位置上,这种H264一般存储在容器中,比如说mp4中。