FFmpeg 视频解码进阶：H264_CUVID 硬解码器简单示例-平芜编程栈

🎬 FFmpeg 视频解码入门：H264_CUVID 硬解码器简单示例
📅 更新时间：2026 年1月2日
🏷️ 标签：FFmpeg | H264_CUVID | NVIDIA | 硬件解码 | CUDA | GPU

文章目录

📖 前言
🔄 与软解码的主要区别
💻 完整代码
🎯 硬解码关键点详解
- 1️⃣ 新增头文件
- 2️⃣ 新增变量
- 3️⃣ 创建 CUDA 硬件设备上下文
- 4️⃣ 使用 `avcodec_find_decoder_by_name` 查找解码器
- 5️⃣ 绑定硬件设备上下文
- 6️⃣ GPU → CPU 数据传输（核心！）
- 7️⃣ NV12 格式处理
- 8️⃣ 资源释放
📊 软解码 vs 硬解码代码对比
🎥 验证解码结果
📋 总结
- 硬解码相比软解码的额外步骤
- 使用硬解码的前提条件

📖 前言

在上一篇文章中，我们介绍了如何使用 FFmpeg 调用 H264 软解码器进行视频解码。本文将介绍如何使用h264_cuvid硬件解码器，利用 NVIDIA GPU 进行硬件加速解码(仅解码视频流!!!)

软解码 vs 硬解码：

特性	h264 (软解码)	h264_cuvid (硬解码)
执行设备	CPU	NVIDIA GPU
CPU 占用	高	低
适用场景	通用	多路解码、高分辨率
依赖	无	NVIDIA GPU + CUDA

🔄 与软解码的主要区别

硬解码的流程与软解码基本相同，但有以下4 个关键区别：

┌─────────────────────────────────────────────────────────────┐ │ 软解码 vs 硬解码 │ ├─────────────────────────────────────────────────────────────┤ │ │ │ 1️⃣ 解码器查找方式不同 │ │ 软解: avcodec_find_decoder(codec_id) │ │ 硬解: avcodec_find_decoder_by_name("h264_cuvid") │ │ │ │ 2️⃣ 需要创建硬件设备上下文 │ │ av_hwdevice_ctx_create(&hw_device_ctx, CUDA, ...) │ │ │ │ 3️⃣ 解码后数据在 GPU 显存，需要传输到 CPU │ │ av_hwframe_transfer_data(sw_frame, frame, 0) │ │ │ │ 4️⃣ 输出像素格式不同 │ │ 软解: YUV420P (平面存储) │ │ 硬解: NV12 (UV交错存储) │ │ │ └─────────────────────────────────────────────────────────────┘

💻 完整代码

#include<iostream>extern"C"{#include<libavcodec/avcodec.h>#include<libavformat/avformat.h>#include<libavutil/avutil.h>#include<libavutil/hwcontext.h>}//打印错误原因voidlog_error(interror,std::string tmp){charerrbuf[256];av_strerror(error,errbuf,sizeof(errbuf));std::cout<<tmp<<","<<errbuf<<std::endl;}//将NV12格式的帧写入YUV420P文件voidwrite_nv12_to_yuv420p(AVFrame*frame,FILE*file_yuv){//写入Y分量for(inti=0;i<frame->height;i++){fwrite(frame->data[0]+i*frame->linesize[0],1,frame->width,file_yuv);}//NV12的UV是交错存储的，需要分离成U和Vintuv_height=frame->height/2;intuv_width=frame->width/2;//分配临时缓冲区存储U和Vuint8_t*u_plane=newuint8_t[uv_height*uv_width];uint8_t*v_plane=newuint8_t[uv_height*uv_width];//从NV12的UV交错数据中分离U和Vfor(inti=0;i<uv_height;i++){uint8_t*uv_row=frame->data[1]+i*frame->linesize[1];for(intj=0;j<uv_width;j++){u_plane[i*uv_width+j]=uv_row[j*2];// Uv_plane[i*uv_width+j]=uv_row[j*2+1];// V}}//写入U分量fwrite(u_plane,1,uv_height*uv_width,file_yuv);//写入V分量fwrite(v_plane,1,uv_height*uv_width,file_yuv);delete[]u_plane;delete[]v_plane;}intmain(){// 配置AVFormatContext*avformat_context=nullptr;AVCodecContext*avcodec_context=nullptr;AVStream*video_stream=nullptr;AVPacket*packet=nullptr;AVFrame*frame=nullptr;AVFrame*sw_frame=nullptr;//用于存储从GPU传输到CPU的帧constAVCodec*decode=nullptr;AVBufferRef*hw_device_ctx=nullptr;// 硬件设备上下文constchar*file_url="D:/桌面/视频录制/500001652967108-1-192.mp4";intresult=0;intvideo_index=0;FILE*file_yuv=fopen("output_h264_cuvid.yuv","wb");if(!file_yuv){std::cout<<"无法创建输出文件"<<std::endl;return-1;}std::cout<<"成功创建输出文件"<<std::endl;//创建CUDA硬件设备上下文result=av_hwdevice_ctx_create(&hw_device_ctx,AV_HWDEVICE_TYPE_CUDA,nullptr,nullptr,0);if(result<0){log_error(result,"无法创建CUDA硬件设备上下文");fclose(file_yuv);return-1;}std::cout<<"成功创建CUDA硬件设备上下文"<<std::endl;//打开视频result=avformat_open_input(&avformat_context,file_url,nullptr,nullptr);if(result<0){log_error(result,"视频文件打开失败");av_buffer_unref(&hw_device_ctx);fclose(file_yuv);return-1;}std::cout<<"成功打开mp4视频文件"<<std::endl;//寻找视频流result=av_find_best_stream(avformat_context,AVMEDIA_TYPE_VIDEO,-1,-1,nullptr,0);if(result<0){log_error(result,"未找到视频流索引");avformat_close_input(&avformat_context);av_buffer_unref(&hw_device_ctx);fclose(file_yuv);return-1;}video_index=result;std::cout<<"视频流索引:"<<video_index<<std::endl;video_stream=avformat_context->streams[video_index];//使用h264_cuvid硬件解码器decode=avcodec_find_decoder_by_name("h264_cuvid");if(!decode){std::cout<<"未找到h264_cuvid解码器"<<std::endl;avformat_close_input(&avformat_context);av_buffer_unref(&hw_device_ctx);fclose(file_yuv);return-1;}std::cout<<"解码器:"<<decode->name<<std::endl;//分配解码器上下文avcodec_context=avcodec_alloc_context3(decode);avcodec_parameters_to_context(avcodec_context,video_stream->codecpar);avcodec_context->pkt_timebase=video_stream->time_base;avcodec_context->hw_device_ctx=av_buffer_ref(hw_device_ctx);//打开解码器result=avcodec_open2(avcodec_context,decode,nullptr);if(result<0){log_error(result,"解码器信息配置失败");avcodec_free_context(&avcodec_context);avformat_close_input(&avformat_context);av_buffer_unref(&hw_device_ctx);fclose(file_yuv);return-1;}std::cout<<"解码器信息配置成功"<<std::endl;//分配packet和framepacket=av_packet_alloc();frame=av_frame_alloc();sw_frame=av_frame_alloc();intframe_count=0;//解码循环while(av_read_frame(avformat_context,packet)>=0){if(packet->stream_index==video_index){result=avcodec_send_packet(avcodec_context,packet);if(result<0){log_error(result,"发送packet给解码器失败");av_packet_unref(packet);continue;}while(avcodec_receive_frame(avcodec_context,frame)==0){//GPU -> CPU 数据传输，输出格式为NV12result=av_hwframe_transfer_data(sw_frame,frame,0);if(result<0){log_error(result,"GPU到CPU数据传输失败");av_frame_unref(frame);break;}//写入YUV文件（NV12转YUV420P）write_nv12_to_yuv420p(sw_frame,file_yuv);frame_count++;if(frame_count%100==0){std::cout<<"已解码 "<<frame_count<<" 帧"<<std::endl;}av_frame_unref(sw_frame);av_frame_unref(frame);}}av_packet_unref(packet);}//刷新解码器缓冲区avcodec_send_packet(avcodec_context,nullptr);while(avcodec_receive_frame(avcodec_context,frame)==0){result=av_hwframe_transfer_data(sw_frame,frame,0);if(result<0){av_frame_unref(frame);break;}write_nv12_to_yuv420p(sw_frame,file_yuv);frame_count++;av_frame_unref(sw_frame);av_frame_unref(frame);}std::cout<<"解码完成，共解码 "<<frame_count<<" 帧"<<std::endl;//资源回收fclose(file_yuv);av_packet_free(&packet);av_frame_free(&frame);av_frame_free(&sw_frame);avcodec_free_context(&avcodec_context);avformat_close_input(&avformat_context);av_buffer_unref(&hw_device_ctx);return0;}

🎯 硬解码关键点详解

1️⃣ 新增头文件

#include<libavutil/hwcontext.h>// 硬件上下文相关API

2️⃣ 新增变量

AVFrame*sw_frame=nullptr;// 用于存储从GPU传输到CPU的帧AVBufferRef*hw_device_ctx=nullptr;// 硬件设备上下文

变量	作用
`sw_frame`	存储从 GPU 传输到 CPU 的帧数据
`hw_device_ctx`	CUDA 硬件设备上下文，管理 GPU 资源

3️⃣ 创建 CUDA 硬件设备上下文

result=av_hwdevice_ctx_create(&hw_device_ctx,AV_HWDEVICE_TYPE_CUDA,nullptr,nullptr,0);

函数原型：

intav_hwdevice_ctx_create(AVBufferRef**device_ctx,enumAVHWDeviceTypetype,constchar*device,AVDictionary*opts,intflags);

参数	说明
`device_ctx`	输出参数，创建的硬件设备上下文
`type`	硬件类型，这里是`AV_HWDEVICE_TYPE_CUDA`
`device`	设备名称，nullptr 表示使用默认 GPU
`opts`	额外选项，通常为 nullptr
`flags`	标志位，通常为 0

4️⃣ 使用`avcodec_find_decoder_by_name`查找解码器

// 软解码：根据 codec_id 自动查找decode=avcodec_find_decoder(video_stream->codecpar->codec_id);// 硬解码：必须指定解码器名称decode=avcodec_find_decoder_by_name("h264_cuvid");

为什么不能用avcodec_find_decoder？

因为avcodec_find_decoder(AV_CODEC_ID_H264)会返回默认的软解码器h264，而不是硬解码器h264_cuvid。

5️⃣ 绑定硬件设备上下文

avcodec_context->pkt_timebase=video_stream->time_base;// 设置时间基准avcodec_context->hw_device_ctx=av_buffer_ref(hw_device_ctx);// 绑定硬件上下文

注意：

pkt_timebase必须设置，否则会出现Invalid pkt_timebase警告
hw_device_ctx必须在avcodec_open2之前绑定

6️⃣ GPU → CPU 数据传输（核心！）

// frame 在 GPU 显存中result=av_hwframe_transfer_data(sw_frame,frame,0);// sw_frame 在 CPU 内存中，可以进行后续处理

数据流向：

GPU显存 CPU内存 ┌─────────┐ ┌─────────┐ │ frame │ ──传输(拷贝)──→ │sw_frame │ │ (解码后) │ av_hwframe_ │ (副本) │ │ CUDA │ transfer_data() │ NV12 │ └─────────┘ └─────────┘

为什么需要传输？

硬解码后的数据存储在 GPU 显存中，CPU 无法直接访问。如果要写入文件或进行 CPU 处理，必须先传输到 CPU 内存。

7️⃣ NV12 格式处理

硬解码输出的像素格式是NV12，与软解码的YUV420P不同：

YUV420P (软解码输出): NV12 (硬解码输出): ┌──────────┐ ┌──────────┐ │ Y │ data[0] │ Y │ data[0] ├──────────┤ ├──────────┤ │ U │ data[1] │ UVUV │ data[1] (UV交错) ├──────────┤ └──────────┘ │ V │ data[2] └──────────┘

NV12 转 YUV420P 的关键代码：

// 从 NV12 的 UV 交错数据中分离 U 和 Vfor(inti=0;i<uv_height;i++){uint8_t*uv_row=frame->data[1]+i*frame->linesize[1];for(intj=0;j<uv_width;j++){u_plane[i*uv_width+j]=uv_row[j*2];// U 在偶数位置v_plane[i*uv_width+j]=uv_row[j*2+1];// V 在奇数位置}}

8️⃣ 资源释放

// 新增：释放硬件设备上下文av_buffer_unref(&hw_device_ctx);// 新增：释放 sw_frameav_frame_free(&sw_frame);

📊 软解码 vs 硬解码代码对比

步骤	软解码	硬解码
头文件	无额外	`+ hwcontext.h`
变量	frame	`+ sw_frame, hw_device_ctx`
硬件初始化	无	`av_hwdevice_ctx_create()`
查找解码器	`avcodec_find_decoder()`	`avcodec_find_decoder_by_name()`
绑定上下文	无	`avcodec_context->hw_device_ctx = ...`
数据传输	无	`av_hwframe_transfer_data()`
像素格式	YUV420P	NV12
资源释放	无额外	`+ av_buffer_unref()`

🎥 验证解码结果

解码完成后，会生成output_h264_cuvid.yuv文件。使用 ffplay 播放：

ffplay-frawvideo-video_size1280x720-pixel_formatyuv420p output_h264_cuvid.yuv

📋 总结

硬解码相比软解码的额外步骤

1. av_hwdevice_ctx_create() ← 创建 CUDA 设备上下文 2. avcodec_find_decoder_by_name("h264_cuvid") ← 指定硬解码器 3. avcodec_context->hw_device_ctx = ... ← 绑定硬件上下文 4. av_hwframe_transfer_data() ← GPU → CPU 数据传输 5. NV12 → YUV420P 格式转换 ← 处理不同的像素格式 6. av_buffer_unref() ← 释放硬件上下文

使用硬解码的前提条件

✅ NVIDIA GPU（支持 NVDEC）
✅ 正确安装的 NVIDIA 驱动
✅ FFmpeg 编译时启用了--enable-cuda --enable-cuvid

如果您觉得这篇文章对您有帮助，不妨点赞 + 收藏 + 关注，更多 FFmpeg 系列教程将持续更新 🔥！

FFmpeg 视频解码进阶：H264_CUVID 硬解码器简单示例

文章目录

📖 前言

🔄 与软解码的主要区别

💻 完整代码

🎯 硬解码关键点详解

1️⃣ 新增头文件

2️⃣ 新增变量

3️⃣ 创建 CUDA 硬件设备上下文

4️⃣ 使用`avcodec_find_decoder_by_name`查找解码器

5️⃣ 绑定硬件设备上下文

6️⃣ GPU → CPU 数据传输（核心！）

7️⃣ NV12 格式处理

8️⃣ 资源释放

📊 软解码 vs 硬解码代码对比

🎥 验证解码结果

📋 总结

硬解码相比软解码的额外步骤

使用硬解码的前提条件

2026开年12条重磅消息！机器人与AI正悄悄改变你的生活

《利用混合整数规划优化航空旅行网络简介》

从零实现有源蜂鸣器和无源区分功能测试

motion_scale控制在1.0-1.1，避免Sonic动作僵硬或夸张

STM32CubeMX下载安装与驱动配置：手把手教学（含示例）

Sonic与Dify结合使用？构建企业知识库问答数字人助手

文章目录

📖 前言

🔄 与软解码的主要区别

💻 完整代码

🎯 硬解码关键点详解

1️⃣ 新增头文件

2️⃣ 新增变量

3️⃣ 创建 CUDA 硬件设备上下文

4️⃣ 使用avcodec_find_decoder_by_name查找解码器

5️⃣ 绑定硬件设备上下文

6️⃣ GPU → CPU 数据传输（核心！）

7️⃣ NV12 格式处理

8️⃣ 资源释放

📊 软解码 vs 硬解码代码对比

🎥 验证解码结果

📋 总结

硬解码相比软解码的额外步骤

使用硬解码的前提条件

2026开年12条重磅消息！机器人与AI正悄悄改变你的生活

《利用混合整数规划优化航空旅行网络简介》

从零实现有源蜂鸣器和无源区分功能测试

motion_scale控制在1.0-1.1，避免Sonic动作僵硬或夸张

STM32CubeMX下载安装与驱动配置：手把手教学（含示例）

Sonic与Dify结合使用？构建企业知识库问答数字人助手

4️⃣ 使用`avcodec_find_decoder_by_name`查找解码器