流媒体-H264协议-编码-x264学习-相关概念x264编译及文件解析(一)
流媒体-H264协议-编码-x264学习-主要结构体(二)
文章目录
写在前面:所有结构体属性意义内容可在:[ITU-T H.264建议书]查询
libx264相关结构体
x264_param_t 结构体
结构体x264_param_t定义在x264.h中,用来初始化编码器
typedef struct x264_param_t
{
/* CPU flags */
uint32_t cpu;
int i_threads; /* 并行编码多帧线程数*/
int i_lookahead_threads; /* 计算slicetype_slice_cost使用线程数*/
int b_sliced_threads; /* 是否开启基于slice分片的多线程*/
int b_deterministic; /* 是否允许非确定性时线程优化*/
int b_cpu_independent; /* force canonical behavior rather than cpu-dependent optimal algorithms */
int i_sync_lookahead; /* threaded lookahead buffer 前向参考帧数。用于配置线程预测的帧缓存大小。 */
/* Video Properties */
int i_width; /*画面宽 */
int i_height; /*画面高*/
int i_csp; /* 编码比特流的CSP(Colorspace type:颜色空间类型),视频图像色彩空间设置 */
int i_bitdepth; /*像素比特位深度,8位即0-255范围*/
int i_level_idc; /* level值的设置,和profile一起设置*/
int i_frame_total; /* /* 编码帧的总数, 默认 0 */
/* NAL HRD(hypothetical reference decoder (HRD) 假定参考解码器,具体查看标准书)
* Uses Buffering and Picture Timing SEIs to signal HRD
* The HRD in H.264 was not designed with VFR in mind.
* It is therefore not recommendeded to use NAL HRD with VFR.
* Furthermore, reconfiguring the VBV (via x264_encoder_reconfig)
* will currently generate invalid HRD. */
int i_nal_hrd;
/* vui(Video Usability Information Guide) 视频可用性信息视频标准化选项 */
struct
{
/* they will be reduced to be 0 < x <= 65535 and prime */
int i_sar_height; /*样本宽高比的高度*/
int i_sar_width; /*样本宽高比的宽度*/
int i_overscan; /* 0=undef, 1=no overscan, 2=overscan */
/* see h264 annex E for the values of the following */
int i_vidformat; /*写到sps中,告诉解码器画面扫描方式*/
int b_fullrange; /*画面颜色值范围 默认为off 16-255;on 表示0-255。色域范围更大*/
int i_colorprim; /*表示最初的原色的色度坐标*/
int i_transfer; /*设置光电传输特性*/
int i_colmatrix; /*设置从RGB转换时亮度和色度的矩阵系数*/
int i_chroma_loc; /* both top & bottom 设置色度取样位置*/
} vui;
/* Bitstream parameters 字节流参数,详情见建议书附件B*/
int i_frame_reference; /* Maximum number of reference frames */
int i_dpb_size; /* Force a DPB size larger than that implied by B-frames and reference frames.
* Useful in combination with interactive error resilience. */
int i_keyint_max; /* Force an IDR keyframe at this interval 最大IDR帧间隔,gop_size */
int i_keyint_min; /* Scenecuts closer together than this are coded as I, not IDR 最小IDR帧间隔. */
int i_scenecut_threshold; /* how aggressively to insert extra I frames 自动场景切换门限,根据其含义,表示场景变换的百分比。
* 计算场景间的相似度,如果相似度小于该门限值则认为检测到场景切换。
* 如果此时距离上一个IDR帧的距离小于最小IDR帧间隔,则插入一个I帧,否则插入一个IDR帧。*/
int b_intra_refresh; /* 是否使用周期帧内刷新替代IDR帧 */
int i_bframe; /* how many b-frame between 2 references pictures */
int i_bframe_adaptive; /*b帧插入策略*/
int i_bframe_bias; /*控制插入B帧判定,范围-100~+100,越高越容易插入B帧,默认0*/
int i_bframe_pyramid; /*允许部分B为参考帧,可选值为0,1,2: 0=off, 1=strict hierarchical, 2=normal 查看x264_b_pyramid_names*/
int b_open_gop; /*是否开启opengop功能*/
int b_bluray_compat; /* */
int i_avcintra_class; /* */
int i_avcintra_flavor; /* */
int b_deblocking_filter;
int i_deblocking_filter_alphac0; /* [-6, 6] -6 light filter, 6 strong */
int i_deblocking_filter_beta; /* [-6, 6] idem */
/*熵编码 */
int b_cabac;
int i_cabac_init_idc;
int b_interlaced; /* 隔行扫描 */
int b_constrained_intra;
/*量化 */
int i_cqm_preset;
char *psz_cqm_file; /* filename (in UTF-8) of CQM file, JM format 自定义量化矩阵(CQM),初始化量化模式为flat*/
uint8_t cqm_4iy[16]; /* used only if i_cqm_preset == X264_CQM_CUSTOM */
uint8_t cqm_4py[16];
uint8_t cqm_4ic[16];
uint8_t cqm_4pc[16];
uint8_t cqm_8iy[64];
uint8_t cqm_8py[64];
uint8_t cqm_8ic[64];
uint8_t cqm_8pc[64];
/* Log 日志*/
void (*pf_log)( void *, int i_level, const char *psz, va_list );
void *p_log_private;
int i_log_level;
int b_full_recon; /* fully reconstruct frames, even when not necessary for encoding. Implied by psz_dump_yuv */
char *psz_dump_yuv; /* filename (in UTF-8) for reconstructed frames */
/* 编码分析参数 */
struct
{
unsigned int intra; /* intra partitions 帧内预测模式,见/Analyse flags宏定义 */
unsigned int inter; /* inter partitions 帧间预测模式,见/Analyse flags宏定义*/
int b_transform_8x8; /*是否使用亮度信号8x8帧内预测模式*/
int i_weighted_pred; /*p帧加权预测*/
int b_weighted_bipred; /*B帧隐式加权 */
int i_direct_mv_pred; /* 时间空间运动向量预测模式,定义在x264_direct_pred_names*/
int i_chroma_qp_offset; /*色度量化步长偏移量*/
int i_me_method; /* 运动估计算法 (X264_ME_*),5种选择*/
int i_me_range; /* 整像素运动估计搜索范围 (from predicted mv) */
int i_mv_range; /*运动矢量最大长度(in pixels). -1 = auto, based on level */
int i_mv_range_thread; /*线程之间的最小运动向量缓冲. -1 = auto, based on number of threads. */
int i_subpel_refine; /* 亚像素运动估计质量 */
int b_chroma_me; /* 亚像素色度运动估计和P帧的模式选择 */
int b_mixed_references; /*允许每个宏块的分区有它自己的参考号 */
int i_trellis; /* Trellis量化是一种可以改善基于DCT的编码方法的数据压缩的算法*/
int b_fast_pskip; /* 快速P帧跳过检测 */
int b_dct_decimate; /* P帧变换系数阈值 */
int i_noise_reduction; /* 自适应伪盲区*/
//Psychovisual optimization strength for RDO:在rdo中使用psy算法(一种心理视觉模型)
float f_psy_rd; /* Psy RD强度 */
float f_psy_trellis; /* Psy trellis strength */
int b_psy; /* Toggle all psy optimizations */
int b_mb_info; /* Use input mb_info data in x264_picture_t */
int b_mb_info_update; /* Update the values in mb_info according to the results of encoding. */
/* the deadzone size that will be used in luma quantization */
int i_luma_deadzone[2]; /* {inter, intra} */
int b_psnr; /* 计算和打印PSNR信息 */
int b_ssim; /* 计算和打印SSIM信息 */
} analyse;
/* 码率控制参数 */
struct
{
int i_rc_method; /* X264_RC_* 码率控制方法:X264_RC_CQP- 0;X264_RC_CRF-1;X264_RC_ABR- 2*/
int i_qp_constant; /* 0=lossless */
int i_qp_min; /* 默认值: 0。定义X264可以使用的最小量化值。量化值越小,输出视频质量就越好 */
int i_qp_max; /* 默认值: 51。定义X264可以使用的最大量化值。默认值51是H.264规格中可供使用的最大量化值*/
int i_qp_step; /* 默认值: 4。设置两帧间量化值的最大变化幅度 */
int i_bitrate; /*设置目标平均码率,单位kbps*/
float f_rf_constant; /* 1pass VBR, nominal QP X264_RC_CRF模式下有效,配置CRF模式下期望得到的视频质量 */
float f_rf_constant_max; /* maximum CRF as caused by VBV:X264_RC_CRF模式下有效,配置CRF模式下最差码率质量的门限*/
float f_rate_tolerance; /*ABR码控模式下,瞬时码率可以偏离的倍数,范围0.1~100.0,默认为1.0*/
//Video Buffering Verifier:视频缓存检验器
int i_vbv_max_bitrate; /*ABR码控模式下,瞬时峰值码率,单位kbps:该值与i_bitrate相等,就是CBR恒定码控模式*/
int i_vbv_buffer_size; /*码率控制缓冲区的大小,单位kbit*/
float f_vbv_buffer_init; /* <=1: fraction of buffer_size. >1: kbit */
float f_ip_factor; /*设置平均I帧QP相比P帧QP的差值*/
float f_pb_factor; /*设置平均B帧QP相比P帧QP的差值*/
/* VBV filler: force CBR VBV and use filler bytes to ensure hard-CBR.
* Implied by NAL-HRD CBR. */
int b_filler;
int i_aq_mode; /* psy 自适应量化参数. (X264_AQ_*) */
float f_aq_strength; /*自适应量化强度。减少平坦区域块效应和纹理区域模糊效应的强度*/
int b_mb_tree; /* Macroblock-tree ratecontrol. */
int i_lookahead;
/* 2pass 多次压缩码率控制 */
int b_stat_write; /* Enable stat writing in psz_stat_out */
char *psz_stat_out; /* output filename (in UTF-8) of the 2pass stats file */
int b_stat_read; /* Read stat from psz_stat_in and use it */
char *psz_stat_in; /* input filename (in UTF-8) of the 2pass stats file */
/* 2pass params (same as ffmpeg ones) */
float f_qcompress; /* 线性量化控制 0.0 => cbr固定码率, 1.0 => constant qp固定量化值 */
float f_qblur; /* temporally blur quants 时间上模糊量化,减少QP的波动 */
float f_complexity_blur; /* temporally blur complexity 时间上模糊复杂性,减少QP的波动 */
x264_zone_t *zones; /* ratecontrol overrides 码率控制覆盖 */
int i_zones; /* number of zone_t's */
char *psz_zones; /* alternate method of specifying zones 指定区的另一种码控方法 */
} rc;
/* Cropping Rectangle parameters: added to those implicitly defined by
non-mod16 video resolutions.裁剪矩形参数 */
struct
{
int i_left;
int i_top;
int i_right;
int i_bottom;
} crop_rect;
/* frame packing arrangement flag */
int i_frame_packing;
/* alternative transfer SEI */
int i_alternative_transfer;
/* Muxing parameters封装参数 */
int b_aud; /* generate access unit delimiters */
int b_repeat_headers; /* put SPS/PPS before each keyframe */
int b_annexb; /* if set, place start codes (4 bytes) before NAL units,
* otherwise place size (4 bytes) before NAL units. */
int i_sps_id; /* SPS and PPS id number */
int b_vfr_input; /* VFR input. If 1, use timebase and timestamps for ratecontrol purposes.
* If 0, use fps only. */
int b_pulldown; /* use explicity set timebase for CFR */
uint32_t i_fps_num;
uint32_t i_fps_den;
uint32_t i_timebase_num; /* Timebase numerator */
uint32_t i_timebase_den; /* Timebase denominator */
int b_tff;
/* Pulldown:
* The correct pic_struct must be passed with each input frame.
* The input timebase should be the timebase corresponding to the output framerate. This should be constant.
* e.g. for 3:2 pulldown timebase should be 1001/30000
* The PTS passed with each frame must be the PTS of the frame after pulldown is applied.
* Frame doubling and tripling require b_vfr_input set to zero (see H.264 Table D-1)
*
* Pulldown changes are not clearly defined in H.264. Therefore, it is the calling app's responsibility to manage this.
*/
int b_pic_struct;
/* Fake Interlaced.
*
* Used only when b_interlaced=0. Setting this flag makes it possible to flag the stream as PAFF interlaced yet
* encode all frames progessively. It is useful for encoding 25p and 30p Blu-Ray streams.
*/
int b_fake_interlaced;
/* Don't optimize header parameters based on video content, e.g. ensure that splitting an input video, compressing
* each part, and stitching them back together will result in identical SPS/PPS. This is necessary for stitching
* with container formats that don't allow multiple SPS/PPS. */
int b_stitchable;
int b_opencl; /* use OpenCL when available */
int i_opencl_device; /* specify count of GPU devices to skip, for CLI users */
void *opencl_device_id; /* pass explicit cl_device_id as void*, for API users */
char *psz_clbin_file; /* filename (in UTF-8) of the compiled OpenCL kernel cache file */
/* Slicing parameters 切片参数*/
int i_slice_max_size; /* 每片字节的最大数,包括预计的NAL开销 */
int i_slice_max_mbs; /* 每片宏块的最大数,重写 i_slice_count */
int i_slice_min_mbs; /* Min number of MBs per slice */
int i_slice_count; /* 每帧的分片数目: forces rectangular slices. */
int i_slice_count_max; /* Absolute cap on slices per frame; stops applying slice-max-size
* and slice-max-mbs if this is reached. */
/* Optional callback for freeing this x264_param_t when it is done being used.
* Only used when the x264_param_t sits in memory for an indefinite period of time,
* i.e. when an x264_param_t is passed to x264_t in an x264_picture_t or in zones.
* Not used when x264_encoder_reconfig is called directly. */
void (*param_free)( void* );
void (*nalu_process)( x264_t *h, x264_nal_t *nal, void *opaque );
/* For internal use only */
void *opaque;
} x264_param_t;
cli_opt_t 结构体
结构体定义在x264.c中,主要记录一些与编码关系较小的设置信息,其变量可通过读取main()的参数获得
typedef struct {
int b_progress; // bool类型的变量,用来控制是否显示编码进度。取值为0或1
int i_seek; // 整数类型 表示开始从哪一帧开始编码
/* hnd_t(hnd=handle)是一个空指针, 定义在x264cli.h 中。void *在C语言里空指针是有几个
特性的,它是一个一般化指针,可以指向任何一种类型,但却不能解引用,需要解引
用的时候,需要进行强制转换。采用空指针的策略,是为了声明变量的简便和统一。 */
hnd_t hin; // 指向输入yuv文件的指针
hnd_t hout; // 指向编码过后生成的文件的指针
/* 一个指向文件类型的指针,它是文本文件,其每一行的格式是 :framenum frametye
QP用于强制指定某些帧或者全部帧的帧类型和QP(quant param量化参数)的值. */
FILE *qpfile;
FILE *tcfile_out;
double timebase_convert_multiplier;
int i_pulldown;
} cli_opt_t;
x264_level_t 结构体
结构体定义在x264.h
//在给定的profile下,level通常与解码器的处理能力和内存容量相对应。每一个档次设置不同的参数
//(如取样速率、图像尺寸、编码比特率等),得到对应的编解码器性能的不同level。
typedef struct x264_level_t
{
int level_idc;
int mbps; //兆比特每秒
int frame_size; //最大帧的大小
int dpb; // 最大的解码图像缓冲区
int bitrate; //比特率
int cpb; /* max vbv buffer (kbit) */
int mv_range; //最大的垂直的mv分量的范围
int mvs_per_2mb; /* max mvs per 2 consecutive mbs. */
int slice_rate; /* ?? */
int mincr; /* min compression ratio */
int bipred8x8; /* limit bipred to >=8x8 */
int direct8x8; /* limit b_direct to >=8x8 */
int frame_only; /* forbid interlacing */
} x264_level_t;
x264_image_t 结构体:存放一帧图像实际像素数据
该结构体定义在x264.h中
typedef struct x264_image_t
{
int i_csp; /* 设置彩色空间,通常取值 X264_CSP_I420*/
int i_plane; /* 图像平面个数 */
int i_stride[4]; /* 每个图像平面的跨度,也就是每一行数据的字节数 */
uint8_t *plane[4]; /* 每个图像平面存放数据的起始地址 */
} x264_image_t;
x264_image_properties_t 结构体
结构体定义在x264.h
typedef struct x264_image_properties_t
{
/* All arrays of data here are ordered as follows:
* each array contains one offset per macroblock, in raster scan order. In interlaced
* mode, top-field MBs and bottom-field MBs are interleaved at the row level.
* Macroblocks are 16x16 blocks of pixels (with respect to the luma plane). For the
* purposes of calculating the number of macroblocks, width and height are rounded up to
* the nearest 16. If in interlaced mode, height is rounded up to the nearest 32 instead.
这里的所有数据数组排序如下:
每个数组包含每个宏块的一个偏移量,按光栅扫描顺序排列。
在交错模式下,在行级别上交错顶场MBS和底场MBS。
宏块是16x16块像素(相对于Luma平面)。
为了计算宏块的数量,宽度和高度被舍入到最近的16。
如果在交错模式下,高度被舍入到最近的32。*/
/* In: an array of quantizer offsets to be applied to this image during encoding.
* These are added on top of the decisions made by x264.
* Offsets can be fractional; they are added before QPs are rounded to integer.
* Adaptive quantization must be enabled to use this feature. Behavior if quant
* offsets differ between encoding passes is undefined.
输入:在编码过程中要应用于此图像的量化器偏移量数组。
这些都是在x264所做决定的基础上添加的。
偏移量可以是分数,它们是在QPS四舍五入到整数之前添加的。
必须启用自适应量化来使用此特性。 如果编码传递之间的量化偏移不同,则行为是未定义的。*/
float *quant_offsets;
/* In: optional callback to free quant_offsets when used.
* Useful if one wants to use a different quant_offset array for each frame.
输入:可选的回调用于释放 quant_offsets。
如果您想为每个帧使用不同的quant_offset数组,则非常有用。 */
void (*quant_offsets_free)( void* );
/* In: optional array of flags for each macroblock.
* Allows specifying additional information for the encoder such as which macroblocks
* remain unchanged. Usable flags are listed below.
* x264_param_t.analyse.b_mb_info must be set to use this, since x264 needs to track
* extra data internally to make full use of this information.
*
* Out: if b_mb_info_update is set, x264 will update this array as a result of encoding.
*
* For "MBINFO_CONSTANT", it will remove this flag on any macroblock whose decoded
* pixels have changed. This can be useful for e.g. noting which areas of the
* frame need to actually be blitted. Note: this intentionally ignores the effects
* of deblocking for the current frame, which should be fine unless one needs exact
* pixel-perfect accuracy.
*
* Results for MBINFO_CONSTANT are currently only set for P-frames, and are not
* guaranteed to enumerate all blocks which haven't changed. (There may be false
* negatives, but no false positives.)
输入:为每个宏块可选的标志数组。
允许为编码器指定其他信息,例如哪些宏块保持不变。 下面列出了可用的标志。
x264_param_t.analyse.b_mb_info必须设置为使用此功能,因为x264需要在内部跟踪额外的数据,以充分利用这些信息。
输出:如果设置了b_mb_info_update,x264将由于编码而更新此数组。
对于“MBINFO_CONSTANT”,它将在任何解码像素已更改的宏块上删除此标志。 这可能对例如有用。 注意框架的哪些区域实际上需要被模糊。 注意:这故意忽略了当前帧的去阻塞效果,这应该是好的,除非一个人需要精确的像素完美的精度。
MBINFO_CONSTANT的结果目前只为P帧设置,不能保证枚举所有未更改的块。 (可能有假阴性,但没有假阳性。 )*/
uint8_t *mb_info;
/* In: optional callback to free mb_info when used.
输入:可选的回调用于释放 mb_info。*/
void (*mb_info_free)( void* );
/* The macroblock is constant and remains unchanged from the previous frame.
宏块是恒定的,与前一个帧保持不变。*/
#define X264_MBINFO_CONSTANT (1U<<0)
/* More flags may be added in the future. */
未来可能会增加更多的标记。
/* Out: SSIM of the the frame luma (if x264_param_t.b_ssim is set) */
double f_ssim;
/* Out: Average PSNR of the frame (if x264_param_t.b_psnr is set) */
double f_psnr_avg;
/* Out: PSNR of Y, U, and V (if x264_param_t.b_psnr is set) */
double f_psnr[3];
/* Out: Average effective CRF of the encoded frame
输出:编码帧的平均有效CRF*/
double f_crf_avg;
} x264_image_properties_t;
x264_picture_t 结构体:描述一视频帧的特征
结构体定义在x264.h
typedef struct
{
int i_type; // 帧的类型,取值有X264_TYPE_KEYFRAME、X264_TYPE_P、
// X264_TYPE_AUTO等。初始化为auto,则在编码过程自行控制。
int i_qpplus1; // 此参数减1代表当前帧的量化参数值
int i_pic_struct; // 帧的结构类型,表示是帧还是场,是逐行还是隔行,
// 取值为枚举值 pic_struct_e,定义在x264.h中
int b_keyframe; // 输出:是否是关键帧
int64_t i_pts; // 一帧的显示时间戳
int64_t i_dts; // 输出:解码时间戳。当一帧的pts非常接近0时,该dts值可能为负。
/* 编码器参数设置,如果为NULL则表示继续使用前一帧的设置。某些参数
(例如aspect ratio) 由于收到H264本身的限制,只能每隔一个GOP才能改变。
这种情况下,如果想让这些改变的参数立即生效,则必须强制生成一个IDR帧。
*/
x264_param_t *param;
x264_image_t img; // 存放一帧图像的真实数据
x264_image_properties_t prop;
x264_hrd_t hrd_timing; // 输出:HRD时间信息,仅当i_nal_hrd设置了才有效
void *opaque; // 私有数据存放区,将输入数据拷贝到输出帧中
} x264_picture_t ;
/* Slice type */
#define X264_TYPE_AUTO 0x0000 /* Let x264 choose the right type */
#define X264_TYPE_IDR 0x0001
#define X264_TYPE_I 0x0002
#define X264_TYPE_P 0x0003
#define X264_TYPE_BREF 0x0004 /* Non-disposable B-frame */
#define X264_TYPE_B 0x0005
#define X264_TYPE_KEYFRAME 0x0006 /* IDR or I depending on b_open_gop option */
#define IS_X264_TYPE_I(x) ((x)==X264_TYPE_I || (x)==X264_TYPE_IDR || (x)==X264_TYPE_KEYFRAME)
#define IS_X264_TYPE_B(x) ((x)==X264_TYPE_B || (x)==X264_TYPE_BREF)
/****************************************************************************
* x264_picture_init:
初始化一个x264_picture_t。如果调用应用程序分配自己的x264_picture_t而不是使用x264_picture_alloc,则需要执行此操作
****************************************************************************/
REALIGN_STACK void x264_picture_init( x264_picture_t *pic )
{
memset( pic, 0, sizeof( x264_picture_t ) );
pic->i_type = X264_TYPE_AUTO;
pic->i_qpplus1 = X264_QP_AUTO;
pic->i_pic_struct = PIC_STRUCT_AUTO;
}
/****************************************************************************
* x264_picture_alloc:
* alloc data for a picture. You must call x264_picture_clean on it.
* returns 0 on success, or -1 on malloc failure or invalid colorspace.
****************************************************************************/
REALIGN_STACK int x264_picture_alloc( x264_picture_t *pic, int i_csp, int i_width, int i_height )
{
typedef struct
{
int planes;
int width_fix8[3];
int height_fix8[3];
} x264_csp_tab_t;
static const x264_csp_tab_t csp_tab[] =
{
[X264_CSP_I400] = { 1, { 256*1 }, { 256*1 } },
[X264_CSP_I420] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_YV12] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256/2, 256/2 } },
[X264_CSP_NV12] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_NV21] = { 2, { 256*1, 256*1 }, { 256*1, 256/2 }, },
[X264_CSP_I422] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV16] = { 3, { 256*1, 256/2, 256/2 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_NV16] = { 2, { 256*1, 256*1 }, { 256*1, 256*1 }, },
[X264_CSP_YUYV] = { 1, { 256*2 }, { 256*1 }, },
[X264_CSP_UYVY] = { 1, { 256*2 }, { 256*1 }, },
[X264_CSP_I444] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_YV24] = { 3, { 256*1, 256*1, 256*1 }, { 256*1, 256*1, 256*1 } },
[X264_CSP_BGR] = { 1, { 256*3 }, { 256*1 }, },
[X264_CSP_BGRA] = { 1, { 256*4 }, { 256*1 }, },
[X264_CSP_RGB] = { 1, { 256*3 }, { 256*1 }, },
};
int csp = i_csp & X264_CSP_MASK;
if( csp <= X264_CSP_NONE || csp >= X264_CSP_MAX || csp == X264_CSP_V210 )
return -1;
x264_picture_init( pic );
pic->img.i_csp = i_csp;
pic->img.i_plane = csp_tab[csp].planes;
int depth_factor = i_csp & X264_CSP_HIGH_DEPTH ? 2 : 1;
int64_t plane_offset[3] = {0};
int64_t frame_size = 0;
for( int i = 0; i < pic->img.i_plane; i++ )
{
int stride = (((int64_t)i_width * csp_tab[csp].width_fix8[i]) >> 8) * depth_factor;
int64_t plane_size = (((int64_t)i_height * csp_tab[csp].height_fix8[i]) >> 8) * stride;
pic->img.i_stride[i] = stride;
plane_offset[i] = frame_size;
frame_size += plane_size;
}
pic->img.plane[0] = x264_malloc( frame_size );
if( !pic->img.plane[0] )
return -1;
for( int i = 1; i < pic->img.i_plane; i++ )
pic->img.plane[i] = pic->img.plane[0] + plane_offset[i];
return 0;
}
/****************************************************************************
* x264_picture_clean:
****************************************************************************/
REALIGN_STACK void x264_picture_clean( x264_picture_t *pic )
{
x264_free( pic->img.plane[0] );
/* just to be safe */
memset( pic, 0, sizeof( x264_picture_t ) );
}
x264_frame_t 结构体:图像帧编解码的相关参数
typedef struct x264_frame
{
/* */
uint8_t *base; /* Base pointer for all malloced data in this frame. 在此帧中所有已分配数据的基地址指针 */
int i_poc; // poc 视频帧播放顺序号
int i_delta_poc[2];
int i_type; // 帧类型
int i_forced_type; // 帧强制转换类型
int i_qpplus1; // qp+1
int64_t i_pts; // pts 画面显示时间戳
int64_t i_dts; // dts 帧解码时间戳
int64_t i_reordered_pts;
int64_t i_duration; /* in SPS time_scale units (i.e 2 * timebase units) used for vfr 用于可变帧率 */
float f_duration; /* in seconds */
int64_t i_cpb_duration;
int64_t i_cpb_delay; /* in SPS time_scale units (i.e 2 * timebase units) */
int64_t i_dpb_output_delay;
x264_param_t *param;
int i_frame; /* Presentation frame number 显示帧序号 */
int i_coded; /* Coded frame number 已编码帧序号 */
int64_t i_field_cnt; /* Presentation field count */
int i_frame_num; /* 7.4.3 frame_num */
int b_kept_as_ref;
int i_pic_struct;
int b_keyframe; // 是否为关键帧
uint8_t b_fdec;
uint8_t b_last_minigop_bframe; /* this frame is the last b in a sequence of bframes 在GOP内是否为最后一个B帧 */
uint8_t i_bframes; /* number of bframes following this nonb in coded order 在编码序列中非B帧之间的B帧个数 */
float f_qp_avg_rc; /* QPs as decided by ratecontrol 由码率控制确定的QP */
float f_qp_avg_aq; /* QPs as decided by AQ in addition to ratecontrol 由码率控制和AQ算法共同控制的QP */
float f_crf_avg; /* Average effective CRF for this frame */
int i_poc_l0ref0; /* poc of first refframe in L0, used to check if direct temporal is possible */
/* YUV buffer */
int i_csp; /* Internal csp YUV颜色空间 */
int i_plane; // YUV数据的平面下标
int i_stride[3]; // 步幅,每个通道对应一个步幅
int i_width[3];
int i_lines[3];
int i_stride_lowres;
int i_width_lowres;
int i_lines_lowres;
pixel *plane[3]; // YUV数据
pixel *plane_fld[3];
pixel *filtered[3][4]; /* plane[0], H, V, HV */
pixel *filtered_fld[3][4];
pixel *lowres[4]; /* half-size copy of input frame: Orig, H, V, HV */
uint16_t *integral;
/* for unrestricted mv we allocate more data than needed
* allocated data are stored in buffer */
pixel *buffer[4];
pixel *buffer_fld[4];
pixel *buffer_lowres[4];
x264_weight_t weight[X264_REF_MAX][3]; /* [ref_index][plane] */
pixel *weighted[X264_REF_MAX]; /* plane[0] weighted of the reference frames */
int b_duplicate; // 是否为副本
struct x264_frame *orig;
/* motion data 运动向量数据 */
int8_t *mb_type; // 宏块类型
uint8_t *mb_partition; // 宏块分割
int16_t (*mv[2])[2]; // 运动向量数据,两个运动向量(应该是用于B帧)
int16_t (*mv16x16)[2]; // 16*16宏块的运动向量
int16_t (*lowres_mvs[2][X264_BFRAME_MAX+1])[2];
uint8_t *field;
uint8_t *effective_qp;
/* Stored as (lists_used << LOWRES_COST_SHIFT) + (cost).
* Doesn't need special addressing for intra cost because
* lists_used is guaranteed to be zero in that cast. */
uint16_t (*lowres_costs[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2]);
#define LOWRES_COST_MASK ((1<<14)-1)
#define LOWRES_COST_SHIFT 14
int *lowres_mv_costs[2][X264_BFRAME_MAX+1];
int8_t *ref[2];
int i_ref[2];
int ref_poc[2][X264_REF_MAX];
int16_t inv_ref_poc[2]; // inverse values of ref0 poc to avoid divisions in temporal MV prediction
/* for adaptive B-frame decision.
* contains the SATD cost of the lowres frame encoded in various modes
* FIXME: how big an array do we need? */
int i_cost_est[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int i_cost_est_aq[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int i_satd; // the i_cost_est of the selected frametype
int i_intra_mbs[X264_BFRAME_MAX+2];
int *i_row_satds[X264_BFRAME_MAX+2][X264_BFRAME_MAX+2];
int *i_row_satd;
int *i_row_bits;
float *f_row_qp;
float *f_row_qscale;
float *f_qp_offset;
float *f_qp_offset_aq;
int b_intra_calculated; // 是否帧内编码计算
uint16_t *i_intra_cost; // 帧内编码代码
uint16_t *i_propagate_cost;
uint16_t *i_inv_qscale_factor;
int b_scenecut; /* Set to zero if the frame cannot possibly be part of a real scenecut. 是否使用场景切换 */
float f_weighted_cost_delta[X264_BFRAME_MAX+2];
uint32_t i_pixel_sum[3];
uint64_t i_pixel_ssd[3];
/* hrd */
x264_hrd_t hrd_timing;
/* vbv */
uint8_t i_planned_type[X264_LOOKAHEAD_MAX+1];
int i_planned_satd[X264_LOOKAHEAD_MAX+1];
double f_planned_cpb_duration[X264_LOOKAHEAD_MAX+1];
int64_t i_coded_fields_lookahead;
int64_t i_cpb_delay_lookahead;
/* threading */
int i_lines_completed; /* in pixels */
int i_lines_weighted; /* FIXME: this only supports weighting of one reference frame */
int i_reference_count; /* number of threads using this frame (not necessarily the number of pointers) */
x264_pthread_mutex_t mutex;
x264_pthread_cond_t cv;
int i_slice_count; /* Atomically written to/read from with slice threads */
/* periodic intra refresh */
float f_pir_position;
int i_pir_start_col;
int i_pir_end_col;
int i_frames_since_pir;
/* interactive encoder control */
int b_corrupt; // 交互式编码器控制
/* user sei */
x264_sei_t extra_sei; // SEI信息
/* user data */
void *opaque; // 用户数据
/* user frame properties */
uint8_t *mb_info;
void (*mb_info_free)( void* );
#if HAVE_OPENCL
x264_frame_opencl_t opencl; // opencl相关参数
#endif
} x264_frame_t;
x264_nal_t 结构体
结构体定义在x264.h,x264_nal_t中的数据在下一次调用x264_encoder_encode之后就无效了,因此必须在调用x264_encoder_encode 或 x264_encoder_headers 之前使用或拷贝其中的数据
enum nal_unit_type_e //nal类型
{
NAL_UNKNOWN = 0,
NAL_SLICE = 1,
NAL_SLICE_DPA = 2,
NAL_SLICE_DPB = 3,
NAL_SLICE_DPC = 4,
NAL_SLICE_IDR = 5, /* ref_idc != 0 */
NAL_SEI = 6, /* ref_idc == 0 */
NAL_SPS = 7,
NAL_PPS = 8,
NAL_AUD = 9,
NAL_FILLER = 12,
/* ref_idc == 0 for 6,9,10,11,12 */
};
enum nal_priority_e //nal优先级
{
NAL_PRIORITY_DISPOSABLE = 0,
NAL_PRIORITY_LOW = 1,
NAL_PRIORITY_HIGH = 2,
NAL_PRIORITY_HIGHEST = 3,
};
/* The data within the payload is already NAL-encapsulated; the ref_idc and type
* are merely in the struct for easy access by the calling application.
* All data returned in an x264_nal_t, including the data in p_payload, is no longer
* valid after the next call to x264_encoder_encode. Thus it must be used or copied
* before calling x264_encoder_encode or x264_encoder_headers again. */
typedef struct x264_nal_t
{
int i_ref_idc; /* nal_priority_e Nal的优先级*/
int i_type; /* nal_unit_type_e Nal的类型*/
int b_long_startcode; /*是否采用长前缀码0x00 00 00 01,有两种0x 00 00 00 01或0x 00 00 01*/
int i_first_mb; /* 如果Nal为一个slice,则表示该条带第一个宏块的位置. */
int i_last_mb; /* 如果Nal为一个slice,则表示该条带最后一个宏块的位置. */
/* 负载大小Size of payload (including any padding) in bytes. */
int i_payload;
/* 存放编码后的数据,已经封装成Nal单元
* If param->b_annexb is set, Annex-B bytestream with startcode.
* Otherwise, startcode is replaced with a 4-byte size.
* This size is the size used in mp4/similar muxing; it is equal to i_payload-4 */
uint8_t *p_payload;
/* Size of padding in bytes. */
int i_padding;
} x264_nal_t;
主要参考文献
- [ITU-T H.264建议书]
- x264-master\x264-master\x264.h头文件
- X264参数汇总