MP4文件是一个多媒体容器格式,它可以包含多种类型的音视频数据,包括H.264视频。MP4文件使用了一种称为“盒子”(box)或“原子”(atom)的层次结构来组织数据。每个盒子都有特定的功能和用途,用于存储文件元数据、音视频数据以及其他信息。
MP4文件由多个盒子(box)组成,每个盒子都有一个标头(header)和内容(payload)。盒子的层次结构允许MP4文件灵活地存储和组织数据。常见的盒子包括:
H.264视频数据通常存储在trak盒子中,具体在mdia(媒体)、minf(媒体信息)、stbl(示例表)子盒子中。以下是详细的存储方式:
ftyp
盒子包含文件类型和兼容性信息,指示文件格式和版本。
moov
盒子包含全局元数据,包括以下关键子盒子:
mdat
盒子包含实际的媒体数据,包括H.264视频数据。这点与 Annex-B 格式不同,视频数据通常不包含NAL单元起始码,而是使用长度字段。
stsd
(示例描述盒子)中存储了有关H.264流的详细信息,包括SPS和PPS数据:
起始码 vs 长度字段:
lengthSizeMinusOne
决定,通常为 4 字节。0x00000001
或 0x000001
,用于标识NAL单元的边界。SPS 和 PPS 数据存储:
AVCDecoderConfigurationRecord
中,并且在解码器初始化时解析。用途:
#include #include #include #include #ifndef AV_WB32 # define AV_WB32(p, val) do { \ uint32_t d = (val); \ ((uint8_t*)(p))[3] = (d); \ ((uint8_t*)(p))[2] = (d)>>8; \ ((uint8_t*)(p))[1] = (d)>>16; \ ((uint8_t*)(p))[0] = (d)>>24; \ } while(0) #endif //读取内存中以大端字节序(big-endian)存储的16位无符号整数 #ifndef AV_RB16 # define AV_RB16(x) \ ((((const uint8_t*)(x))[0] << 8) | \ ((const uint8_t*)(x))[1]) #endif static int alloc_and_copy(AVPacket *out, const uint8_t *sps_pps, uint32_t sps_pps_size, const uint8_t *in, uint32_t in_size) { uint32_t offset = out->size; uint8_t nal_header_size = offset ? 3 : 4; int err; err = av_grow_packet(out, sps_pps_size + in_size + nal_header_size); if (err < 0) return err; if (sps_pps) memcpy(out->data + offset, sps_pps, sps_pps_size); memcpy(out->data + sps_pps_size + nal_header_size + offset, in, in_size); if (!offset) { AV_WB32(out->data + sps_pps_size, 1); } else { (out->data + offset + sps_pps_size)[0] = (out->data + offset + sps_pps_size)[1] = 0; (out->data + offset + sps_pps_size)[2] = 1; } return 0; } //将 H.264 编码器的 extradata (额外数据),从 MP4/AVCC 格式转换为 Annex-B 格式,并将其存储在 AVPacket 结构中。 int h264_extradata_to_annexb(const uint8_t *codec_extradata, const int codec_extradata_size, AVPacket *out_extradata, int padding) { uint16_t unit_size; uint64_t total_size = 0; uint8_t *out = NULL, unit_nb, sps_done = 0, sps_seen = 0, pps_seen = 0, sps_offset = 0, pps_offset = 0; const uint8_t *extradata = codec_extradata + 4; // 跳过AVCC 格式中的前四个字节,这些信息在解析NAL单元的时候并不需要 static const uint8_t nalu_header[4] = { 0, 0, 0, 1 }; //填充起始码 int length_size = (*extradata++ & 0x3) + 1; // retrieve length coded size, 用于指示表示编码数据长度所需字节数 sps_offset = pps_offset = -1; /* retrieve sps and pps unit(s) */ unit_nb = *extradata++ & 0x1f; /* number of sps unit(s) */ if (!unit_nb) { goto pps; }else { sps_offset = 0; sps_seen = 1; } while (unit_nb--) { int err; unit_size = AV_RB16(extradata); total_size += unit_size + 4; if (total_size > INT_MAX - padding) { av_log(NULL, AV_LOG_ERROR, "Too big extradata size, corrupted stream or invalid MP4/AVCC bitstream\n"); av_free(out); return AVERROR(EINVAL); } if (extradata + 2 + unit_size > codec_extradata + codec_extradata_size) { av_log(NULL, AV_LOG_ERROR, "Packet header is not contained in global extradata, " "corrupted stream or invalid MP4/AVCC bitstream\n"); av_free(out); return AVERROR(EINVAL); } if ((err = av_reallocp(&out, total_size + padding)) < 0) return err; memcpy(out + total_size - unit_size - 4, nalu_header, 4); memcpy(out + total_size - unit_size, extradata + 2, unit_size); extradata += 2 + unit_size; pps: if (!unit_nb && !sps_done++) { unit_nb = *extradata++; /* number of pps unit(s) */ if (unit_nb) { pps_offset = total_size; pps_seen = 1; } } } if (out) memset(out + total_size, 0, padding); if (!sps_seen) av_log(NULL, AV_LOG_WARNING, "Warning: SPS NALU missing or invalid. " "The resulting stream may not play.\n"); if (!pps_seen) av_log(NULL, AV_LOG_WARNING, "Warning: PPS NALU missing or invalid. " "The resulting stream may not play.\n"); out_extradata->data = out; out_extradata->size = total_size; return length_size; } //将MP4中的AVCC格式转为annexb格式 int h264_mp4toannexb(AVFormatContext *fmt_ctx, AVPacket *in, FILE *dst_fd) { AVPacket *out = NULL; AVPacket spspps_pkt; int len; uint8_t unit_type; int32_t nal_size; uint32_t cumul_size = 0; const uint8_t *buf; const uint8_t *buf_end; int buf_size; int ret = 0, i; out = av_packet_alloc(); // buf = in->data; buf_size = in->size; buf_end = in->data + in->size; do { ret= AVERROR(EINVAL); if (buf + 4 /*s->length_size*/ > buf_end) goto fail; for (nal_size = 0, i = 0; i<4/*s->length_size*/; i++) nal_size = (nal_size << 8) | buf[i]; buf += 4; /*s->length_size;*/ unit_type = *buf & 0x1f; //确定单元类型 if (nal_size > buf_end - buf || nal_size < 0) goto fail; /* if (unit_type == 7) s->idr_sps_seen = s->new_idr = 1; else if (unit_type == 8) { s->idr_pps_seen = s->new_idr = 1; */ /* if SPS has not been seen yet, prepend the AVCC one to PPS */ /* if (!s->idr_sps_seen) { if (s->sps_offset == -1) av_log(ctx, AV_LOG_WARNING, "SPS not present in the stream, nor in AVCC, stream may be unreadable\n"); else { if ((ret = alloc_and_copy(out, ctx->par_out->extradata + s->sps_offset, s->pps_offset != -1 ? s->pps_offset : ctx->par_out->extradata_size - s->sps_offset, buf, nal_size)) < 0) goto fail; s->idr_sps_seen = 1; goto next_nal; } } } */ /* if this is a new IDR picture following an IDR picture, reset the idr flag. * Just check first_mb_in_slice to be 0 as this is the simplest solution. * This could be checking idr_pic_id instead, but would complexify the parsing. */ /* if (!s->new_idr && unit_type == 5 && (buf[1] & 0x80)) s->new_idr = 1; */ /* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */ if (/*s->new_idr && */unit_type == 5 /*&& !s->idr_sps_seen && !s->idr_pps_seen*/) { //说明是个关键帧,需要将MP4中的SPS/PPS 填充到NAL单元之前 h264_extradata_to_annexb( fmt_ctx->streams[in->stream_index]->codec->extradata, fmt_ctx->streams[in->stream_index]->codec->extradata_size, &spspps_pkt, AV_INPUT_BUFFER_PADDING_SIZE); if ((ret=alloc_and_copy(out, spspps_pkt.data, spspps_pkt.size, buf, nal_size)) < 0) goto fail; /*s->new_idr = 0;*/ /* if only SPS has been seen, also insert PPS */ } /*else if (s->new_idr && unit_type == 5 && s->idr_sps_seen && !s->idr_pps_seen) { if (s->pps_offset == -1) { av_log(ctx, AV_LOG_WARNING, "PPS not present in the stream, nor in AVCC, stream may be unreadable\n"); if ((ret = alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0) goto fail; } else if ((ret = alloc_and_copy(out, ctx->par_out->extradata + s->pps_offset, ctx->par_out->extradata_size - s->pps_offset, buf, nal_size)) < 0) goto fail; }*/ else { if ((ret=alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0) goto fail; /* if (!s->new_idr && unit_type == 1) { s->new_idr = 1; s->idr_sps_seen = 0; s->idr_pps_seen = 0; } */ } len = fwrite( out->data, 1, out->size, dst_fd); if(len != out->size){ av_log(NULL, AV_LOG_DEBUG, "warning, length of writed data isn't equal pkt.size(%d, %d)\n", len, out->size); } fflush(dst_fd); next_nal: buf += nal_size; cumul_size += nal_size + 4;//s->length_size; } while (cumul_size < buf_size); /* ret = av_packet_copy_props(out, in); if (ret < 0) goto fail; */ fail: av_packet_free(&out); return ret; } int main(int argc, char *argv[]) { int err_code; char errors[1024]; char *src_filename = NULL; char *dst_filename = NULL; FILE *dst_fd = NULL; int video_stream_index = -1; //AVFormatContext *ofmt_ctx = NULL; //AVOutputFormat *output_fmt = NULL; //AVStream *out_stream = NULL; AVFormatContext *fmt_ctx = NULL; AVPacket pkt; //AVFrame *frame = NULL; av_log_set_level(AV_LOG_DEBUG); if(argc < 3){ av_log(NULL, AV_LOG_DEBUG, "the count of parameters should be more than three!\n"); return -1; } src_filename = argv[1]; dst_filename = argv[2]; if(src_filename == NULL || dst_filename == NULL){ av_log(NULL, AV_LOG_ERROR, "src or dts file is null, plz check them!\n"); return -1; } /*register all formats and codec*/ av_register_all(); dst_fd = fopen(dst_filename, "wb"); if (!dst_fd) { av_log(NULL, AV_LOG_DEBUG, "Could not open destination file %s\n", dst_filename); return -1; } /*open input media file, and allocate format context*/ if((err_code = avformat_open_input(&fmt_ctx, src_filename, NULL, NULL)) < 0){ av_strerror(err_code, errors, 1024); av_log(NULL, AV_LOG_DEBUG, "Could not open source file: %s, %d(%s)\n", src_filename, err_code, errors); return -1; } /*dump input information*/ av_dump_format(fmt_ctx, 0, src_filename, 0); /*initialize packet*/ av_init_packet(&pkt); pkt.data = NULL; pkt.size = 0; /*find best video stream*/ video_stream_index = av_find_best_stream(fmt_ctx, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0); if(video_stream_index < 0){ av_log(NULL, AV_LOG_DEBUG, "Could not find %s stream in input file %s\n", av_get_media_type_string(AVMEDIA_TYPE_VIDEO), src_filename); return AVERROR(EINVAL); } /* if (avformat_write_header(ofmt_ctx, NULL) < 0) { av_log(NULL, AV_LOG_DEBUG, "Error occurred when opening output file"); exit(1); } */ /*read frames from media file*/ while(av_read_frame(fmt_ctx, &pkt) >=0 ){ if(pkt.stream_index == video_stream_index){ /* pkt.stream_index = 0; av_write_frame(ofmt_ctx, &pkt); av_free_packet(&pkt); */ h264_mp4toannexb(fmt_ctx, &pkt, dst_fd); } //release pkt->data av_packet_unref(&pkt); } //av_write_trailer(ofmt_ctx); /*close input media file*/ avformat_close_input(&fmt_ctx); if(dst_fd) { fclose(dst_fd); } //avio_close(ofmt_ctx->pb); return 0; }