Dlib 官网 - Dlib C++ Library

Dlib - Github

Dlib 是一个十分优秀好用的机器学习库,其源码均由 C++ 实现,并提供了 Python 接口,可广泛适用于很多场景.

这里主要记录 Dlib 中关于人脸检测和人脸关键点等技术的 python 应用.

pip 安装:

sudo apt-get install cmake
sudo pip install dlib

Github 源码安装.

1. 人脸检测 Face Detector

人脸检测,是检测出图片中包含的正面人脸.

1.1. 基于 HOG 特征和线性分类器的人脸检测

下面是采用经典的 HOG(Histogram of Oriented Gradients) 特征 结合线性分类器、图像金字塔(image pyramid) 及滑窗检测机制(sliding window detection scheme)实现的人脸检测器.

#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import dlib

detector = dlib.get_frontal_face_detector()

def face_detect(imgfile):
    win = dlib.image_window()

    print("Processing file: {}".format(imgfile))
    img = dlib.load_rgb_image(imgfile)

    # 人脸检测
    dets = detector(img, 1)

    # len(dets) 即为检测到的人脸个数
    print("Number of faces detected: {}".format(len(dets)))

    # 遍历所有检测到人脸的坐标
    # left:人脸左边距离图片左边界的距离
    # right:人脸右边距离图片左边界的距离
    # top:人脸上边距离图片上边界的距离
    # bottom:人脸下边距离图片上边界的距离
    for i, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            i, d.left(), d.top(), d.right(), d.bottom()))

    win.clear_overlay()
    win.set_image(img)
    win.add_overlay(dets)
    dlib.hit_enter_to_continue()


def face_detect_with_scores(imgfile):
    win = dlib.image_window()

    # 如果要进一步得到更全面的检测信息,如每个检测结果的分数(score).
    # score 值越大,则检测的可信度越高.
    img = dlib.load_rgb_image(imgfile)

    # 第三个参数 -1 用于设置调整检测阈值;
    # 如果该参数值为负数,则返回更多的结果;
    # 如果该参数值为正数,则返回较少的结果.
    dets, scores, idx = detector.run(img, 1, -1)
    for i, d in enumerate(dets):
        print("Detection {}, score: {}, face_type:{}".format(
            d, scores[i], idx[i]))

    win.clear_overlay()
    win.set_image(img)
    win.add_overlay(dets)
    dlib.hit_enter_to_continue()


if __name__ == '__main__':
    imgfile = "test.jpg"
    face_detect(imgfile)
    face_detect_with_scores(imgfile)

1.2. 基于 CNN 的人脸检测

采用预训练的 CNN 模型进行图片中的人脸检测.

基于 CNN 模型比基于 HOG 特征模型的人脸检测准确度更高. 但是需要更多的计算资源,即在 GPU 上运行才可有较好的运行速率.

预训练模型:

http://dlib.net/files/mmod_human_face_detector.dat.bz2

#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import dlib


# 加载预训练人脸检测CNN模型
cnn_face_model = "mmod_human_face_detector.dat"
cnn_face_detector = dlib.cnn_face_detection_model_v1(cnn_face_model)

def cnn_face_detect(imgfile):
    win = dlib.image_window()

    print("Processing file: {}".format(imgfile))
    img = dlib.load_rgb_image(imgfile)

    # CNN 人脸检测器
    # 检测器返回 mmod_rectangles object,其包含 mmod_rectangle objects 列表.
    # mmod_rectangle object 包含两个变量:
    #     [1]. dlib.rectangle object
    #     [2]. confidence score
    dets = cnn_face_detector(img, 1)

    # 批量检测图片
    # dets = cnn_face_detector([image list], upsample_num, batch_size = 128)

    # len(dets) 即为检测到的人脸个数
    print("Number of faces detected: {}".format(len(dets)))
    for i, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} Confidence: {}".format(
            i, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom(), d.confidence))

    rects = dlib.rectangles()
    rects.extend([d.rect for d in dets])

    win.clear_overlay()
    win.set_image(img)
    win.add_overlay(rects)
    dlib.hit_enter_to_continue()


if __name__ == '__main__':
    imgfile = "test.jpg"
    cnn_face_detect(imgfile)

2. 人脸关键点检测 Face Landmark Detection

人脸关键点检测,首先需要检测出图片中的人脸,并估计人脸的关键点姿态(pose).

人脸关键点共有 68 个,分别是人脸各部位的点,如嘴角(corners of the mouth),眼睛边(corners of the mouth)等.

image

From:https://blog.csdn.net/kgzhang/article/details/75309395

2.1. HOG 人脸框及CNN人脸关键点检测

人脸关键点检测预训练模型:

http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2

iBUG 300-W 人脸关键点数据集:

https://ibug.doc.ic.ac.uk/resources/facial-point-annotations/

#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import os
import dlib


# 加载人脸检测器
detector = dlib.get_frontal_face_detector()
# 加载人脸关键点检测模型
predictor_path = "shape_predictor_68_face_landmarks.dat"
predictor = dlib.shape_predictor(predictor_path)

def face_landmark_detect(imgfile):
    win = dlib.image_window()

    print("Processing file: {}".format(imgfile))
    img = dlib.load_rgb_image(imgfile)

    win.clear_overlay()
    win.set_image(img)

    # 检测每个人脸的边界框
    dets = detector(img, 1)

    # len(dets) 是检测到的人脸数量
    print("Number of faces detected: {}".format(len(dets)))
    for k, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
            k, d.left(), d.top(), d.right(), d.bottom()))

        # 检测 box d 内的人脸关键点
        shape = predictor(img, d)
        print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
                                                  shape.part(1)))
        # 画出人脸关键点
        win.add_overlay(shape)

    win.add_overlay(dets)
    dlib.hit_enter_to_continue()


if __name__ == '__main__':
    imgfile = "test.jpg"
    face_landmark_detect(imgfile)

2.2. CNN 人脸框及人脸关键点检测

#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import os
import dlib


# 加载预训练人脸检测CNN模型
cnn_face_model = "mmod_human_face_detector.dat"
cnn_face_detector = dlib.cnn_face_detection_model_v1(cnn_face_model)

# 加载人脸关键点检测模型
predictor_path = "shape_predictor_68_face_landmarks.dat"
predictor = dlib.shape_predictor(predictor_path)


def cnn_face_landmark_detect(imgfile):
    win = dlib.image_window()

    print("Processing file: {}".format(imgfile))
    img = dlib.load_rgb_image(imgfile)

    win.clear_overlay()
    win.set_image(img)

    # 检测每个人脸的边界框
    dets = cnn_face_detector(img, 1)

    # len(dets) 是检测到的人脸数量
    print("Number of faces detected: {}".format(len(dets)))
    for i, d in enumerate(dets):
        print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} Confidence: {}".format(
            i, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom(), d.confidence))

        # 检测 box i 内的人脸关键点
        shape = predictor(img, d.rect)
        print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
                                                  shape.part(1)))
        # 画出人脸关键点
        win.add_overlay(shape)

    rects = dlib.rectangles()
    rects.extend([d.rect for d in dets])

    win.add_overlay(rects)
    dlib.hit_enter_to_continue()


if __name__ == '__main__':
    imgfile = "test.jpg"
    cnn_face_landmark_detect(imgfile)

2.3. 人脸关键点检测 CNN 模型训练

这里主要是介绍对于论文 One Millisecond Face Alignment with an Ensemble of Regression Trees(CVPR 2014) 采用 dlib 的实现.

基于小规模数据集的人脸关键点模型训练. 假设数据集路径为:examples/faces_folder

http://dlib.net/train_shape_predictor.py.html

#!/usr/bin/python
import os
import sys
import glob

import dlib

def train_face_landmark_model(faces_folder):
    options = dlib.shape_predictor_training_options()
    options.oversampling_amount = 300

    options.nu = 0.05
    options.tree_depth = 2
    options.be_verbose = True
    
    # dlib.train_shape_predictor() 函数进行模型训练,
    # 同时保存最终的 predictor - predictor.dat.
    # 训练输入是 XML 文件,其包含了训练数据集的图片列表和人脸的位置.
    training_xml_path = os.path.join(faces_folder,
                                     "training_with_face_landmarks.xml")
    dlib.train_shape_predictor(training_xml_path, "predictor.dat", options)
    
    # dlib.test_shape_predictor() 函数测试模型标签,
    # 其度量 shape_predictor 输出人脸关键点值和 groundtruth 数据间的平均距离.
    print("\nTraining accuracy: {}".format(
        dlib.test_shape_predictor(training_xml_path, "predictor.dat")))
    
    # 测试数据集仅用于评估训练模型的表现,而不用于模型训练.
    # 虽然是在小规模数据集上进行的训练,但结果仍然相当好.
    # 如果在更大规模的人脸关键点数据集上进行训练,会得到更好的结果.
    testing_xml_path = os.path.join(faces_folder, 
                                    "testing_with_face_landmarks.xml")
    print("Testing accuracy: {}".format(
        dlib.test_shape_predictor(testing_xml_path, "predictor.dat")))
    
    # 模型应用
    # 首先从磁盘加载训练的模型;
    # 还需要采用人脸检测器检测人脸位置.
    predictor = dlib.shape_predictor("predictor.dat")
    detector = dlib.get_frontal_face_detector()
    
    # 运行人脸检测,及人脸关键点检测,并显示结果.
    print("Showing detections and predictions on the images in the faces folder...")
    win = dlib.image_window()
    for f in glob.glob(os.path.join(faces_folder, "*.jpg")):
        print("Processing file: {}".format(f))
        img = dlib.load_rgb_image(f)
    
        win.clear_overlay()
        win.set_image(img)
        
        # 检测每个人脸的边界框
        dets = detector(img, 1)
        print("Number of faces detected: {}".format(len(dets)))
        for k, d in enumerate(dets):
            print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
                k, d.left(), d.top(), d.right(), d.bottom()))
            
            # 得到第 d 个框的人脸关键点
            shape = predictor(img, d)
            print("Part 0: {}, Part 1: {} ...".format(shape.part(0),
                                                      shape.part(1)))
            # 画出人脸关键点
            win.add_overlay(shape)

    win.add_overlay(dets)
    dlib.hit_enter_to_continue()
Last modification:April 24th, 2019 at 09:09 pm