首先我们看ffmpeg中examples中的一个案例:
/*
* Copyright (c) 2001 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/**
* @file
* video decoding with libavcodec API example
*
* @example decode_video.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <libavcodec/avcodec.h>
#define INBUF_SIZE 4096
static void pgm_save(unsigned char *buf, int wrap, int xsize, int ysize,
char *filename)
{
FILE *f;
int i;
f = fopen(filename,"wb");
fprintf(f, "P5\n%d %d\n%d\n", xsize, ysize, 255);
for (i = 0; i < ysize; i++)
fwrite(buf + i * wrap, 1, xsize, f);
fclose(f);
}
static void decode(AVCodecContext *dec_ctx, AVFrame *frame, AVPacket *pkt,
const char *filename)
{
char buf[1024];
int ret;
ret = avcodec_send_packet(dec_ctx, pkt);
if (ret < 0) {
fprintf(stderr, "Error sending a packet for decoding\n");
exit(1);
}
while (ret >= 0) {
ret = avcodec_receive_frame(dec_ctx, frame);
if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
return;
else if (ret < 0) {
fprintf(stderr, "Error during decoding\n");
exit(1);
}
printf("saving frame %3d\n", dec_ctx->frame_number);
fflush(stdout);
/* the picture is allocated by the decoder. no need to
free it */
snprintf(buf, sizeof(buf), "%s-%d", filename, dec_ctx->frame_number);
pgm_save(frame->data[0], frame->linesize[0],
frame->width, frame->height, buf);
}
}
//我们直接从main函数看起,这个案例是从本地读取一个h264/265文件,(当然也可以是其它类型文件)
//但是一定要保证这个文件没有被封装过
//输入必须是只包含视频编码数据“裸流”(例如H.264、HEVC码流文件),而不能是包含封装格式的媒体数据(例如AVI、MKV、MP4)
int main(int argc, char **argv)
{
const char *filename, *outfilename;
const AVCodec *codec;
AVCodecParserContext *parser;
AVCodecContext *c= NULL;
FILE *f;
AVFrame *frame;
uint8_t inbuf[INBUF_SIZE + AV_INPUT_BUFFER_PADDING_SIZE];
uint8_t *data;
size_t data_size;
int ret;
AVPacket *pkt;
if (argc <= 2) {
fprintf(stderr, "Usage: %s <input file> <output file>\n"
"And check your input file is encoded by mpeg1video please.\n", argv[0]);
exit(0);
}
filename = argv[1];
outfilename = argv[2];
pkt = av_packet_alloc();
if (!pkt)
exit(1);
/* set end of buffer to 0 (this ensures that no overreading happens for damaged MPEG streams) */
memset(inbuf + INBUF_SIZE, 0, AV_INPUT_BUFFER_PADDING_SIZE);
/* find the MPEG-1 video decoder */
//这里是寻找解码器,一般来说,ffmpeg都包含一个contex上下文,然后其中包含一个具体的业务指针
//比如这里AVCodecContext和
codec = avcodec_find_decoder(AV_CODEC_ID_H264);
if (!codec) {
fprintf(stderr, "Codec not found\n");
exit(1);
}
//初始化解析器,也就是根据codec->id来确定解析器的具体类型,解析器具体类型可以看文章最后。
parser = av_parser_init(codec->id);
if (!parser) {
fprintf(stderr, "parser not found\n");
exit(1);
}
c = avcodec_alloc_context3(codec);
if (!c) {
fprintf(stderr, "Could not allocate video codec context\n");
exit(1);
}
/* For some codecs, such as msmpeg4 and mpeg4, width and height
MUST be initialized there because this information is not
available in the bitstream. */
/* open it */
//打开解码器
//如果是cpu解码,可以设置解码线程数量
//分配AVCodecContext
/**
* thread count
* is used to decide how many independent tasks should be passed to execute()
* - encoding: Set by user.
* - decoding: Set by user.
*/
c->thread_count=10;
if (avcodec_open2(c, codec, NULL) < 0) {
fprintf(stderr, "Could not open codec\n");
exit(1);
}
f = fopen(filename, "rb");
if (!f) {
fprintf(stderr, "Could not open %s\n", filename);
exit(1);
}
frame = av_frame_alloc();
if (!frame) {
fprintf(stderr, "Could not allocate video frame\n");
exit(1);
}
while (!feof(f)) {
/* read raw data from the input file */
data_size = fread(inbuf, 1, INBUF_SIZE, f);
if (!data_size)
break;
/* use the parser to split the data into frames */
data = inbuf;
//注意这里,这里是个小循环,也就是说,data中有可能包含多个nal单元,
//每次呢只会找出一个nal,循环,直到所有nal找出来为止
while (data_size > 0) {
//本章重点其实在这里,这个函数是做上面的呢,
ret = av_parser_parse2(parser, c, &pkt->data, &pkt->size,
data, data_size, AV_NOPTS_VALUE, AV_NOPTS_VALUE, 0);
if (ret < 0) {
fprintf(stderr, "Error while parsing\n");
exit(1);
}
//这里向前移位
data += ret;
data_size -= ret;
if (pkt->size)
decode(c, frame, pkt, outfilename);
}
}
/* flush the decoder */
decode(c, frame, NULL, outfilename);
fclose(f);
av_parser_close(parser);
avcodec_free_context(&c);
av_frame_free(&frame);
av_packet_free(&pkt);
return 0;
}
这里看一下av_parse_parse2
用于解析输入的数据流并把它们分成一帧一帧的压缩编码数据。比较形象的说法就是把长长的一段连续的数据“切割”成一段段的数据。核心函数是av_parser_parse2():
av_parser_parse2():解析数据获得一个Packet, 从输入的数据流中分离出一帧一帧的压缩编码数据。
int av_parser_parse2(AVCodecParserContext *s, AVCodecContext *avctx,
uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size,
int64_t pts, int64_t dts, int64_t pos)
{
int index, i;
uint8_t dummy_buf[AV_INPUT_BUFFER_PADDING_SIZE];
av_assert1(avctx->codec_id != AV_CODEC_ID_NONE);
/* Parsers only work for the specified codec ids. */
av_assert1(avctx->codec_id == s->parser->codec_ids[0] ||
avctx->codec_id == s->parser->codec_ids[1] ||
avctx->codec_id == s->parser->codec_ids[2] ||
avctx->codec_id == s->parser->codec_ids[3] ||
avctx->codec_id == s->parser->codec_ids[4]);
if (!(s->flags & PARSER_FLAG_FETCHED_OFFSET)) {
s->next_frame_offset =
s->cur_offset = pos;
s->flags |= PARSER_FLAG_FETCHED_OFFSET;
}
if (buf_size == 0) {
/* padding is always necessary even if EOF, so we add it here */
memset(dummy_buf, 0, sizeof(dummy_buf));
buf = dummy_buf;
} else if (s->cur_offset + buf_size != s->cur_frame_end[s->cur_frame_start_index]) { /* skip remainder packets */
/* add a new packet descriptor */
i = (s->cur_frame_start_index + 1) & (AV_PARSER_PTS_NB - 1);
s->cur_frame_start_index = i;
s->cur_frame_offset[i] = s->cur_offset;
s->cur_frame_end[i] = s->cur_offset + buf_size;
s->cur_frame_pts[i] = pts;
s->cur_frame_dts[i] = dts;
s->cur_frame_pos[i] = pos;
}
if (s->fetch_timestamp) {
s->fetch_timestamp = 0;
s->last_pts = s->pts;
s->last_dts = s->dts;
s->last_pos = s->pos;
ff_fetch_timestamp(s, 0, 0, 0);
}
/* WARNING: the returned index can be negative */
//这里就是具体解析器的解析函数了,比如我假设我们打开的文件是test.h264裸流文件
/*
AVCodecParser ff_h264_parser = {
.codec_ids = { AV_CODEC_ID_H264 },
.priv_data_size = sizeof(H264ParseContext),
.parser_init = init,
.parser_parse = h264_parse,
.parser_close = h264_close,
.split = h264_split,
};
*/
index = s->parser->parser_parse(s, avctx, (const uint8_t **) poutbuf,
poutbuf_size, buf, buf_size);
av_assert0(index > -0x20000000); // The API does not allow returning AVERROR codes
#define FILL(name) if(s->name > 0 && avctx->name <= 0) avctx->name = s->name
if (avctx->codec_type == AVMEDIA_TYPE_VIDEO) {
FILL(field_order);
}
/* update the file pointer */
if (*poutbuf_size) {
/* fill the data for the current frame */
s->frame_offset = s->next_frame_offset;
/* offset of the next frame */
s->next_frame_offset = s->cur_offset + index;
s->fetch_timestamp = 1;
}
if (index < 0)
index = 0;
s->cur_offset += index;
return index;
}
/这个函数的功能是从一大片数据中,分割出一个个nal单元
一个图像序列的组成:SPS+PPS+SEI+一个I帧+若干个P帧。SPS、PPS、SEI、一个I帧、一个P帧都可以称为一个NALU
什么是NALU?
H264码流可以分为两层,VCL层和NAL层,NAL的全称是Network abstraction layer,叫网络抽象层,它保存了H264相关的参数信息和图像信息,NAL层由多个单元NALU组成,NALU由了NALU头(00 00 00 01或者00 00 01)、sps(序列参数集)、pps(图像参数集合)、slice、sei、IDR帧、I帧(在图像运动变化较少时,I帧后面是7个P帧,如果图像运动变化大时,一个序列就短了,I帧后面可能是3个或者4个P帧)、P帧、B帧等数据。
sps、pps、I帧、P帧在NALU中的关系和nalu type判断
一个完整的NALU单元结构图如下:
static int h264_split(AVCodecContext *avctx,
const uint8_t *buf, int buf_size)
{
uint32_t state = -1;
int has_sps = 0;
int has_pps = 0;
const uint8_t *ptr = buf, *end = buf + buf_size;
int nalu_type;
while (ptr < end) {
//这里寻找起始头
ptr = avpriv_find_start_code(ptr, end, &state);
if ((state & 0xFFFFFF00) != 0x100)
break;
nalu_type = state & 0x1F;
if (nalu_type == H264_NAL_SPS) {
has_sps = 1;
} else if (nalu_type == H264_NAL_PPS)
has_pps = 1;
/* else if (nalu_type == 0x01 ||
* nalu_type == 0x02 ||
* nalu_type == 0x05) {
* }
*/
else if ((nalu_type != H264_NAL_SEI || has_pps) &&
nalu_type != H264_NAL_AUD && nalu_type != H264_NAL_SPS_EXT &&
nalu_type != 0x0f) {
//
if (has_sps) {
while (ptr - 4 > buf && ptr[-5] == 0)
ptr--;
return ptr - 4 - buf;
}
}
}
return 0;
}
int h264_parse(AVCodecParserContext *s,
AVCodecContext *avctx,
const uint8_t **poutbuf, int *poutbuf_size,
const uint8_t *buf, int buf_size)
{
H264ParseContext *p = s->priv_data;
ParseContext *pc = &p->pc;
int next;
if (!p->got_first) {
p->got_first = 1;
if (avctx->extradata_size) {
ff_h264_decode_extradata(avctx->extradata, avctx->extradata_size,
&p->ps, &p->is_avc, &p->nal_length_size,
avctx->err_recognition, avctx);
}
}
if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
next = buf_size;
} else {
next = h264_find_frame_end(p, buf, buf_size, avctx);
if (ff_combine_frame(pc, next, &buf, &buf_size) < 0) {
*poutbuf = NULL;
*poutbuf_size = 0;
return buf_size;
}
if (next < 0 && next != END_NOT_FOUND) {
av_assert1(pc->last_index + next >= 0);
h264_find_frame_end(p, &pc->buffer[pc->last_index + next], -next, avctx); // update state
}
}
parse_nal_units(s, avctx, buf, buf_size);
if (avctx->framerate.num)
avctx->time_base = av_inv_q(av_mul_q(avctx->framerate, (AVRational){avctx->ticks_per_frame, 1}));
if (p->sei.picture_timing.cpb_removal_delay >= 0) {
s->dts_sync_point = p->sei.buffering_period.present;
s->dts_ref_dts_delta = p->sei.picture_timing.cpb_removal_delay;
s->pts_dts_delta = p->sei.picture_timing.dpb_output_delay;
} else {
s->dts_sync_point = INT_MIN;
s->dts_ref_dts_delta = INT_MIN;
s->pts_dts_delta = INT_MIN;
}
if (s->flags & PARSER_FLAG_ONCE) {
s->flags &= PARSER_FLAG_COMPLETE_FRAMES;
}
if (s->dts_sync_point >= 0) {
int64_t den = avctx->time_base.den * (int64_t)avctx->pkt_timebase.num;
if (den > 0) {
int64_t num = avctx->time_base.num * (int64_t)avctx->pkt_timebase.den;
if (s->dts != AV_NOPTS_VALUE) {
// got DTS from the stream, update reference timestamp
p->reference_dts = s->dts - av_rescale(s->dts_ref_dts_delta, num, den);
} else if (p->reference_dts != AV_NOPTS_VALUE) {
// compute DTS based on reference timestamp
s->dts = p->reference_dts + av_rescale(s->dts_ref_dts_delta, num, den);
}
if (p->reference_dts != AV_NOPTS_VALUE && s->pts == AV_NOPTS_VALUE)
s->pts = s->dts + av_rescale(s->pts_dts_delta, num, den);
if (s->dts_sync_point > 0)
p->reference_dts = s->dts; // new reference
}
}
*poutbuf = buf;
*poutbuf_size = buf_size;
return next;
}
static int h264_find_frame_end(H264ParseContext *p, const uint8_t *buf,
int buf_size, void *logctx)
{
int i, j;
uint32_t state;
ParseContext *pc = &p->pc;
int next_avc = p->is_avc ? 0 : buf_size;
// mb_addr= pc->mb_addr - 1;
state = pc->state;
if (state > 13)
state = 7;
if (p->is_avc && !p->nal_length_size)
av_log(logctx, AV_LOG_ERROR, "AVC-parser: nal length size invalid\n");
for (i = 0; i < buf_size; i++) {
if (i >= next_avc) {
int nalsize = 0;
i = next_avc;
for (j = 0; j < p->nal_length_size; j++)
nalsize = (nalsize << 8) | buf[i++];
if (nalsize <= 0 || nalsize > buf_size - i) {
av_log(logctx, AV_LOG_ERROR, "AVC-parser: nal size %d remaining %d\n", nalsize, buf_size - i);
return buf_size;
}
next_avc = i + nalsize;
state = 5;
}
if (state == 7) {
i += p->h264dsp.startcode_find_candidate(buf + i, next_avc - i);
if (i < next_avc)
state = 2;
} else if (state <= 2) {
if (buf[i] == 1)
state ^= 5; // 2->7, 1->4, 0->5
else if (buf[i])
state = 7;
else
state >>= 1; // 2->1, 1->0, 0->0
} else if (state <= 5) {
int nalu_type = buf[i] & 0x1F;
if (nalu_type == H264_NAL_SEI || nalu_type == H264_NAL_SPS ||
nalu_type == H264_NAL_PPS || nalu_type == H264_NAL_AUD) {
if (pc->frame_start_found) {
i++;
goto found;
}
} else if (nalu_type == H264_NAL_SLICE || nalu_type == H264_NAL_DPA ||
nalu_type == H264_NAL_IDR_SLICE) {
state += 8;
continue;
}
state = 7;
} else {
unsigned int mb, last_mb = p->parse_last_mb;
GetBitContext gb;
p->parse_history[p->parse_history_count++] = buf[i];
init_get_bits(&gb, p->parse_history, 8*p->parse_history_count);
mb= get_ue_golomb_long(&gb);
if (get_bits_left(&gb) > 0 || p->parse_history_count > 5) {
p->parse_last_mb = mb;
if (pc->frame_start_found) {
if (mb <= last_mb) {
i -= p->parse_history_count - 1;
p->parse_history_count = 0;
goto found;
}
} else
pc->frame_start_found = 1;
p->parse_history_count = 0;
state = 7;
}
}
}
pc->state = state;
if (p->is_avc)
return next_avc;
return END_NOT_FOUND;
found:
pc->state = 7;
pc->frame_start_found = 0;
if (p->is_avc)
return next_avc;
return i - (state & 5);
}
这里包含了解析器具体类型
static const AVCodecParser * const parser_list[] = {
&ff_aac_parser,
&ff_aac_latm_parser,
&ff_ac3_parser,
&ff_adx_parser,
&ff_av1_parser,
&ff_avs2_parser,
&ff_bmp_parser,
&ff_cavsvideo_parser,
&ff_cook_parser,
&ff_dca_parser,
&ff_dirac_parser,
&ff_dnxhd_parser,
&ff_dpx_parser,
&ff_dvaudio_parser,
&ff_dvbsub_parser,
&ff_dvdsub_parser,
&ff_dvd_nav_parser,
&ff_flac_parser,
&ff_g723_1_parser,
&ff_g729_parser,
&ff_gif_parser,
&ff_gsm_parser,
&ff_h261_parser,
&ff_h263_parser,
&ff_h264_parser,
&ff_hevc_parser,
&ff_mjpeg_parser,
&ff_mlp_parser,
&ff_mpeg4video_parser,
&ff_mpegaudio_parser,
&ff_mpegvideo_parser,
&ff_opus_parser,
&ff_png_parser,
&ff_pnm_parser,
&ff_rv30_parser,
&ff_rv40_parser,
&ff_sbc_parser,
&ff_sipr_parser,
&ff_tak_parser,
&ff_vc1_parser,
&ff_vorbis_parser,
&ff_vp3_parser,
&ff_vp8_parser,
&ff_vp9_parser,
&ff_xma_parser,
NULL };