1. 关于IPB帧
From: 什么是I帧,P帧,B帧 - 2011.06.30
视频压缩中,每帧代表一幅静止的图像。而在实际压缩时,会采取各种算法减少数据的容量,其中IPB就是最常见的。
简单地说,I 帧是关键帧,属于帧内压缩。就是和AVI的压缩是一样的。 P是向前搜索的意思。B是双向搜索。他们都是基于 I 帧来压缩数据。
[1] - I帧表示关键帧,可以理解为这一帧画面的完整保留;解码时只需要本帧数据就可以完成(因为包含完整画面)
[2] - P帧表示的是这一帧跟之前的一个关键帧(或P帧)的差别,解码时需要用之前缓存的画面叠加上本帧定义的差别,生成最终画面。(也就是差别帧,P帧没有完整画面数据,只有与前一帧的画面差别的数据)
[3] - B帧是双向差别帧,也就是B帧记录的是本帧与前后帧的差别(具体比较复杂,有4种情况),换言之,要解码B帧,不仅要取得之前的缓存画面,还要解码之后的画面,通过前后画面的与本帧数据的叠加取得最终的画面。B帧压缩率高,但是解码时CPU会比较累~。
2. 视频帧提取
视频帧提取这里包括两种(基于 Python):
[1] - 视频关键帧(I 帧)提取
如:
import os
command = ['ffmpeg', '-i', video_path,
'-vf', '"select=eq(pict_type\,I)"', #I 帧
'-vsync', 'vfr', '-qscale:v', '2',
'-f', 'image2',
'%005d.jpg']
os.system(' '.join(command))
[2] - 视频均匀帧提取,如隔 1s 提取一帧
#
command = ['ffmpeg', '-i', video_path,
'-r', str(frame_per_sec), # 指定抽取的帧率, 即从视频中每秒钟抽取图片的数量, 1代表每秒抽取一帧.
'-q:v', '2', '-f', 'image2',
'%008d.jpg']
os.system(' '.join(command))
3. 完整实现
采用 multiprocessing 多进程,
参考:https://github.com/dasiki/CCF-BDCI-VideoCopyDetection/blob/master/frame_extract.ipynb
#!/usr/bin/python3
#!--*-- coding: utf-8 --*--
import os
import sys
import glob
import shutil
import codecs
import time
from multiprocessing import Pool
def extract_keyframe(video_path, frame_path):
video_id = os.path.basename(video_path).split('.')[0]
if not os.path.exists(os.path.join(frame_path, video_id)):
os.mkdir(os.path.join(frame_path, video_id))
#
command = ['ffmpeg', '-i', video_path,
'-vf', '"select=eq(pict_type\,I)"',
'-vsync', 'vfr', '-qscale:v', '2',
'-f', 'image2',
os.path.join(frame_path, video_id, '{}_%005d.jpg'.format(video_id))]
os.system(' '.join(command))
# 抽取视频关键帧时间
command = ['ffprobe', '-i', video_path,
'-v', 'quiet', '-select_streams',
'v', '-show_entries', 'frame=pkt_pts_time,pict_type|grep',
'-B', '1', 'pict_type=I|grep pkt_pts_time', '>',
os.path.join(frame_path, video_id, '{}.log'.format(video_id))]
os.system(' '.join(command))
#
# 采用时间戳重命名关键帧
def rename_keyframe(video_path, frame_path, mode='key', frame_per_sec=1):
video_id = os.path.basename(video_path).split('.')[0]
id_files = glob.glob(os.path.join(frame_path, video_id, '*.jpg'))
#
id_files.sort()
if mode == 'key':
id_times = codecs.open(os.path.join(frame_path, video_id, '{}.log'.format(video_id))).readlines()
id_times = [x.strip().split('=')[1] for x in id_times]
for id_file, id_time in zip(id_files, id_times):
shutil.move(id_file, id_file[:-9] + id_time.zfill(15) + '.jpg')
else:
id_time = 0.0
for id_file in id_files:
shutil.move(id_file, id_file[:-19] + '{:0>15.4f}'.format(id_time) + '.jpg')
id_time += 1.0 / frame_per_sec
#
def extract_uniformframe(video_path, frame_path, frame_per_sec=1):
video_id = os.path.basename(video_path).split('.')[0]
if not os.path.exists(frame_path + video_id):
os.mkdir(frame_path + video_id)
#
command = ['ffmpeg', '-i', video_path,
'-r', str(frame_per_sec),
'-q:v', '2', '-f', 'image2',
os.path.join(frame_path, video_id, '{}_%08d.jpg'.format(video_id))]
os.system(' '.join(command))
#
def extract(video_path, frame_path, mode='key', num_worker=5, frame_per_sec_q=1):
if mode == 'key':
pool = Pool(processes=num_worker)
pool.apply_async(extract_keyframe, (video_path, frame_path),)
pool.close()
pool.join()
rename_keyframe(video_path, frame_path)
elif mode == 'uniform':
pool = Pool(processes=num_worker)
pool.apply_async(extract_uniformframe, (video_path, frame_path, frame_per_sec_q), )
pool.close()
pool.join()
rename_keyframe(video_path, frame_path, mode='uniform', frame_per_sec=frame_per_sec_q)
else:
pass
if __name__ == '__main__':
video_path = 'test.mp4'
frame_path = './outputs'
extract(video_path, frame_path, mode='uniform')