在 StyleGAN2 中,可以采用基于 Dlib 库实现人脸关键点检测、人脸对齐.
1. 人脸关键点检测
import dlib
#predictor_model_path='http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2'
class LandmarksDetector:
def __init__(self, predictor_model_path):
"""
:param predictor_model_path: path to shape_predictor_68_face_landmarks.dat file
"""
self.detector = dlib.get_frontal_face_detector() # cnn_face_detection_model_v1 also can be used
self.shape_predictor = dlib.shape_predictor(predictor_model_path)
def get_landmarks(self, image):
img = dlib.load_rgb_image(image)
dets = self.detector(img, 1)
for detection in dets:
try:
face_landmarks = [(item.x, item.y) for item in self.shape_predictor(img, detection).parts()]
yield face_landmarks
except:
print("Exception in get_landmarks()!")
2. 人脸对齐
import numpy as np
import scipy.ndimage
import os
import PIL.Image
def image_align(src_file,
dst_file,
face_landmarks,
output_size=1024,
transform_size=4096,
enable_padding=True,
x_scale=1,
y_scale=1,
em_scale=0.1,
alpha=False):
# Align function from FFHQ dataset pre-processing step
# https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py
lm = np.array(face_landmarks)
lm_chin = lm[0 : 17] # left-right
lm_eyebrow_left = lm[17 : 22] # left-right
lm_eyebrow_right = lm[22 : 27] # left-right
lm_nose = lm[27 : 31] # top-down
lm_nostrils = lm[31 : 36] # top-down
lm_eye_left = lm[36 : 42] # left-clockwise
lm_eye_right = lm[42 : 48] # left-clockwise
lm_mouth_outer = lm[48 : 60] # left-clockwise
lm_mouth_inner = lm[60 : 68] # left-clockwise
# Calculate auxiliary vectors.
eye_left = np.mean(lm_eye_left, axis=0)
eye_right = np.mean(lm_eye_right, axis=0)
eye_avg = (eye_left + eye_right) * 0.5
eye_to_eye = eye_right - eye_left
mouth_left = lm_mouth_outer[0]
mouth_right = lm_mouth_outer[6]
mouth_avg = (mouth_left + mouth_right) * 0.5
eye_to_mouth = mouth_avg - eye_avg
# Choose oriented crop rectangle.
x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
x /= np.hypot(*x)
x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
x *= x_scale
y = np.flipud(x) * [-y_scale, y_scale]
c = eye_avg + eye_to_mouth * em_scale
quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
qsize = np.hypot(*x) * 2
# Load in-the-wild image.
if not os.path.isfile(src_file):
print('\nCannot find source image. Please run "--wilds" before "--align".')
return
img = PIL.Image.open(src_file).convert('RGBA').convert('RGB')
# Shrink.
shrink = int(np.floor(qsize / output_size * 0.5))
if shrink > 1:
rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
img = img.resize(rsize, PIL.Image.ANTIALIAS)
quad /= shrink
qsize /= shrink
# Crop.
border = max(int(np.rint(qsize * 0.1)), 3)
crop = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
img = img.crop(crop)
quad -= crop[0:2]
# Pad.
pad = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0))
if enable_padding and max(pad) > border - 4:
pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
h, w, _ = img.shape
y, x, _ = np.ogrid[:h, :w, :1]
mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w-1-x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h-1-y) / pad[3]))
blur = qsize * 0.02
img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
img += (np.median(img, axis=(0,1)) - img) * np.clip(mask, 0.0, 1.0)
img = np.uint8(np.clip(np.rint(img), 0, 255))
if alpha:
mask = 1-np.clip(3.0 * mask, 0.0, 1.0)
mask = np.uint8(np.clip(np.rint(mask*255), 0, 255))
img = np.concatenate((img, mask), axis=2)
img = PIL.Image.fromarray(img, 'RGBA')
else:
img = PIL.Image.fromarray(img, 'RGB')
quad += pad[:2]
# Transform.
img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR)
if output_size < transform_size:
img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS)
# Save aligned image.
img.save(dst_file, 'PNG')
3. 人脸检测及对齐
# from https://github.com/rolux
import os
import sys
import bz2
from face_alignment import image_align
from landmarks_detector import LandmarksDetector
def unpack_bz2(src_path):
data = bz2.BZ2File(src_path).read()
dst_path = src_path[:-4]
with open(dst_path, 'wb') as fp:
fp.write(data)
return dst_path
if __name__ == "__main__":
"""
Extracts and aligns all faces from images using DLib and
a function from original FFHQ dataset preparation step
python align_images.py /raw_images /aligned_images
"""
#'http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2'
landmarks_model_path = unpack_bz2('./shape_predictor_68_face_landmarks.dat.bz2')
RAW_IMAGES_DIR = sys.argv[1]
ALIGNED_IMAGES_DIR = sys.argv[2]
landmarks_detector = LandmarksDetector(landmarks_model_path)
for img_name in [x for x in os.listdir(RAW_IMAGES_DIR) if x[0] not in '._']:
raw_img_path = os.path.join(RAW_IMAGES_DIR, img_name)
for i, face_landmarks in enumerate(landmarks_detector.get_landmarks(raw_img_path), start=1):
face_img_name = '%s_%02d.png' % (os.path.splitext(img_name)[0], i)
aligned_face_path = os.path.join(ALIGNED_IMAGES_DIR, face_img_name)
os.makedirs(ALIGNED_IMAGES_DIR, exist_ok=True)
image_align(raw_img_path, aligned_face_path, face_landmarks)
使用:
python align_images.py ./unaligned_images ./aligned_images