原理 #
insightface==0.7.3
人脸识别的技术本质是把一张人脸转换为一个在高维空间中可比较的向量,并以几何度量(通常是余弦相似度)来判断两张脸是否属于同一身份。
- c
InsightFace调用代码 #
import insightface
from .base import BaseModel
from typing import Union
import numpy as np
import cv2
class InsightFaceModel(BaseModel):
def __init__(self, model_path, classes):
super().__init__(model_path, classes)
model = insightface.app.FaceAnalysis(root=model_path)
model.prepare(ctx_id=0, det_size=(640, 640))
self.model = model
def _preprocess(self, data: Union[str, bytes, np.ndarray]):
if isinstance(data, np.ndarray):
return data
if isinstance(data, bytes):
np_arr = np.frombuffer(data, np.uint8)
img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
return img
if isinstance(data, str):
img = cv2.imread(data, cv2.IMREAD_COLOR)
return img
raise Exception("未知的数据类型")
def inference(self, data):
img_data = self._preprocess(data)
detections = self.model.get(img_data)
res = self._postprocess(detections)
return res
def _postprocess(self, detections):
res = []
for detection in detections:
item = {
"class": self.classes[0]['name'],
"age": detection['age'],
"bbox": detection['bbox'].tolist(),
"sex": detection.sex,
"confidence": float(detection['det_score']),
"embedding": detection['embedding'].tolist(),
}
res.append(item)
return res
测试函数 #
import json
import os
import cv2
import numpy as np
import app.cv as cv_mod
from app.services.frame import _read_video_interval_frames
import sys
def _draw(frame, dets):
for d in dets:
if d.get("class") == "head":
continue
x1, y1, x2, y2 = [int(v) for v in d["bbox"]]
color = (255, 255, 0)
text_color = (0, 0, 255)
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
label = f"match {d.get('match_score', d.get('confidence', 0)):.2f}"
cv2.putText(frame, label, (x1, max(0, y1 - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, text_color, 2)
return frame
def _cosine(a, b):
a = np.asarray(a, dtype=np.float32)
b = np.asarray(b, dtype=np.float32)
if a.size == 0 or b.size == 0:
return 0.0
na = np.linalg.norm(a)
nb = np.linalg.norm(b)
if na == 0 or nb == 0:
return 0.0
return float(np.dot(a, b) / (na * nb))
def main():
video_path = r"E:\镜像测试\公交车1.mp4"
model_root = r"C:\Users\xxx\AppData\Roaming\FirmamentAIEngine\models\face-detection"
model_name = "face-detection"
track=False
start_ms = 0
end_ms = sys.maxsize
display_delay_ms = 1
if not os.path.exists(video_path):
print(f"视频文件不存在: {video_path}")
return
cv_mod.init_model(model_root, model_name)
from app.cv import CV_MODEL
from app.cv.model_pool import ModelPool
has_model_pool = isinstance(CV_MODEL, ModelPool)
if has_model_pool:
try:
model_instance = cv_mod.CV_MODEL.get_model()
except TimeoutError:
print("系统资源不足,请稍后再试")
return
else:
from app.cv import CV_MODEL
try:
model_instance = CV_MODEL
except TimeoutError:
print("The system resources are insufficient, please try again later.")
return
target_path=r"E:\镜像测试\face_target2.png"
target=model_instance.inference(target_path)
target_vec = None
best_conf = -1.0
for d in target:
emb = d.get('embedding')
conf = float(d.get('confidence', 0.0))
if isinstance(emb, list) and len(emb) > 0 and conf > best_conf:
target_vec = np.asarray(emb, dtype=np.float32)
best_conf = conf
if target_vec is None:
print("目标图像未获取到有效人脸特征向量")
return
thresh = 0.4
for frame, idx, pts in _read_video_interval_frames(video_path, 0,start_ms, end_ms):
det = model_instance.inference(frame)
matches = []
for d in det:
emb = d.get('embedding')
if isinstance(emb, list) and len(emb) > 0:
sim = _cosine(target_vec, emb)
if sim >= thresh:
matches.append({
"class": d.get("class", "face"),
"bbox": d["bbox"],
"confidence": d.get("confidence", 0.0),
"match_score": sim
})
out = {"frame_index": idx, "timestamp_ms": pts, "detections": matches}
print(json.dumps(out, ensure_ascii=False))
# vis = _draw(frame.copy(), det)
vis = _draw(frame.copy(), matches)
cv2.imshow("YOLO-Track", vis)
cv2.destroyAllWindows()
from app.cv.model_pool import ModelPool
if isinstance(cv_mod.CV_MODEL, ModelPool):
cv_mod.CV_MODEL.return_model(model_instance)
if __name__ == "__main__":
main()