人脸识别

原理 #

insightface==0.7.3

人脸识别的技术本质是把一张人脸转换为一个在高维空间中可比较的向量,并以几何度量(通常是余弦相似度)来判断两张脸是否属于同一身份。

  • c

InsightFace调用代码 #

import insightface
from .base import BaseModel
from typing import Union
import numpy as np
import cv2


class InsightFaceModel(BaseModel):
    def __init__(self, model_path, classes):
        super().__init__(model_path, classes)
        model = insightface.app.FaceAnalysis(root=model_path)
        model.prepare(ctx_id=0, det_size=(640, 640))
        self.model = model

    def _preprocess(self, data: Union[str, bytes, np.ndarray]):
        if isinstance(data, np.ndarray):
            return data
        if isinstance(data, bytes):
            np_arr = np.frombuffer(data, np.uint8)
            img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
            return img
        if isinstance(data, str):
            img = cv2.imread(data, cv2.IMREAD_COLOR)
            return img
        raise Exception("未知的数据类型")

    def inference(self, data):
        img_data = self._preprocess(data)
        detections = self.model.get(img_data)
        res = self._postprocess(detections)
        return res

    def _postprocess(self, detections):
        res = []
        for detection in detections:
            item = {
                "class": self.classes[0]['name'],
                "age": detection['age'],
                "bbox": detection['bbox'].tolist(),
                "sex": detection.sex,
                "confidence": float(detection['det_score']),
                "embedding": detection['embedding'].tolist(),
            }
            res.append(item)
        return res

测试函数 #

import json
import os
import cv2
import numpy as np
import app.cv as cv_mod
from app.services.frame import _read_video_interval_frames
import sys

def _draw(frame, dets):
    for d in dets:
        if d.get("class") == "head":
            continue
        x1, y1, x2, y2 = [int(v) for v in d["bbox"]]
        color = (255, 255, 0)
        text_color = (0, 0, 255)
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        label = f"match {d.get('match_score', d.get('confidence', 0)):.2f}"
        cv2.putText(frame, label, (x1, max(0, y1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2)
    return frame

def _cosine(a, b):
    a = np.asarray(a, dtype=np.float32)
    b = np.asarray(b, dtype=np.float32)
    if a.size == 0 or b.size == 0:
        return 0.0
    na = np.linalg.norm(a)
    nb = np.linalg.norm(b)
    if na == 0 or nb == 0:
        return 0.0
    return float(np.dot(a, b) / (na * nb))

def main():
    video_path = r"E:\镜像测试\公交车1.mp4"
    model_root = r"C:\Users\xxx\AppData\Roaming\FirmamentAIEngine\models\face-detection"
    model_name = "face-detection"
    track=False
    start_ms = 0
    end_ms = sys.maxsize
    display_delay_ms = 1
    if not os.path.exists(video_path):
        print(f"视频文件不存在: {video_path}")
        return

    cv_mod.init_model(model_root, model_name)
    from app.cv import CV_MODEL
    from app.cv.model_pool import ModelPool
    has_model_pool = isinstance(CV_MODEL, ModelPool)

    if has_model_pool:
        try:
            model_instance = cv_mod.CV_MODEL.get_model()
        except TimeoutError:
            print("系统资源不足,请稍后再试")
            return
    else:
        from app.cv import CV_MODEL
        try:
            model_instance = CV_MODEL
        except TimeoutError:
            print("The system resources are insufficient, please try again later.")
            return


    target_path=r"E:\镜像测试\face_target2.png"
    target=model_instance.inference(target_path)
    target_vec = None
    best_conf = -1.0
    for d in target:
        emb = d.get('embedding')
        conf = float(d.get('confidence', 0.0))
        if isinstance(emb, list) and len(emb) > 0 and conf > best_conf:
            target_vec = np.asarray(emb, dtype=np.float32)
            best_conf = conf
    if target_vec is None:
        print("目标图像未获取到有效人脸特征向量")
        return

    thresh = 0.4
    for frame, idx, pts in _read_video_interval_frames(video_path, 0,start_ms, end_ms):
        det = model_instance.inference(frame)
        matches = []
        for d in det:
            emb = d.get('embedding')
            if isinstance(emb, list) and len(emb) > 0:
                sim = _cosine(target_vec, emb)
                if sim >= thresh:
                    matches.append({
                        "class": d.get("class", "face"),
                        "bbox": d["bbox"],
                        "confidence": d.get("confidence", 0.0),
                        "match_score": sim
                    })
        out = {"frame_index": idx, "timestamp_ms": pts, "detections": matches}
        print(json.dumps(out, ensure_ascii=False))
        # vis = _draw(frame.copy(), det)
        vis = _draw(frame.copy(), matches)
        cv2.imshow("YOLO-Track", vis)
    cv2.destroyAllWindows()
    from app.cv.model_pool import ModelPool
    if isinstance(cv_mod.CV_MODEL, ModelPool):
        cv_mod.CV_MODEL.return_model(model_instance)

if __name__ == "__main__":
    main()