把YOLOv11和Python Qt做个用户界面程序-电子发烧友网

早些时间，小编想把PID控制器优化部分通过Py Qt来实现用户界面化，不过看着窗口一堆参数，有点发怵：这玩意谁用啊？

参考《PID控制器参数自动优化示例和比较》

后来就搁置了。在通过Python以及YOLO来检测图中或者视频中的各种物体以及人物的时候，就会考虑：全部用代码来输出的？怎么通过一个简单直观的窗口界面来测试相应的功能？我觉得要再试一下，哪怕是最简单的方式呈现。这便是这篇文章的目的所在。我们通过YOLO现成的各种检测模块，实现：

图中物体的识别检测

检出物体的区域分割

人体姿态的检测

还有其他的检测功能，感觉功能有重叠，就选了上面的三个功能简单地集成到一个windows的因为分类的功能只是软件界面中。以下图片均有AI生成，但是人体姿态检测的结果可能看不清楚输出。

按照我们之前的惯例，先提供结果，后提供代码。

[1]物体检测

[2]物体分割

[3]姿态检测

我们可以自己尝试运行一下。

# This program is part of a project developed by Amphenol Sensors.
#Copyright(C)2024,Leo Lu
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .


import sys
from PyQt5.QtWidgets import (QApplication, QLabel, QPushButton, QVBoxLayout, QWidget, 
                             QFileDialog, QComboBox, QHBoxLayout)
from PyQt5.QtGui import QPixmap, QImage
from PyQt5.QtCore import Qt
import cv2
import numpy as np
import random


from ultralytics import YOLO
from ultralytics.models.yolo.pose.predict import PosePredictor


class YOLOInterface(QWidget):
    def __init__(self):
        super().__init__()


        self.initUI()
        self.model_detec = YOLO('./yolo_models/yolo11s.pt')
        self.model_seg = YOLO('./yolo_models/yolo11s-seg.pt')
        self.model_bd = './yolo_models/yolo11s-pose.pt'
        self.connections = ((2, 4), (1, 3), (10, 8), (8, 6), (6, 5), (5, 7), (7, 9), 
                            (6, 12), (12, 14), (14, 16), (5, 11), (11, 13), (13, 15))


    def initUI(self):
        self.setWindowTitle('YOLO 图像检测')
        self.setGeometry(100, 100, 1200, 600)
        self.setFixedSize(1200, 600)
        # 主布局
        main_layout = QVBoxLayout()


        # 图像显示布局
        image_layout = QHBoxLayout()


        self.image_label = QLabel(self)
        self.image_label.setFixedSize(600, 400)
        self.image_label.setAlignment(Qt.AlignCenter)
        image_layout.addWidget(self.image_label)


        self.result_label = QLabel(self)
        self.result_label.setFixedSize(600, 400)
        self.result_label.setAlignment(Qt.AlignCenter)
        image_layout.addWidget(self.result_label)


        main_layout.addLayout(image_layout)


        # 控制布局
        control_layout = QHBoxLayout()


        self.detect_button = QPushButton('选择图片', self)
        self.detect_button.clicked.connect(self.load_image)
        control_layout.addWidget(self.detect_button)


        self.yolo_combo = QComboBox(self)
        self.yolo_combo.addItems(['物体检测', '物体分割', '人体姿态识别'])  # 假设在此处添加不同的YOLO任务
        control_layout.addWidget(self.yolo_combo)


        self.run_button = QPushButton('开始检测', self)
        self.run_button.clicked.connect(self.run_yolo)
        control_layout.addWidget(self.run_button)


        self.quit_button = QPushButton('退出', self)
        self.quit_button.clicked.connect(self.close_application)
        control_layout.addWidget(self.quit_button)


        main_layout.addLayout(control_layout)
        self.setLayout(main_layout)


    def load_image(self):
        options = QFileDialog.Options()
        file_name, _ = QFileDialog.getOpenFileName(self, "选择图片文件", "", "Images (*.png *.jpg *.bmp)", options=options)
        if file_name:
            self.current_image = file_name
            pixmap = QPixmap(file_name)
            scaled_pixmap = pixmap.scaled(self.image_label.size(), Qt.KeepAspectRatio)
            self.image_label.setPixmap(scaled_pixmap)


    def plot_keypoints(self, image, keypoints, line_color=(60, 179, 113), point_color=(255, 0, 0),
                   offset=(0, 0), show_idx=False):
        if keypoints is None:
            return image
        
        for data in keypoints.xy:
            if len(data) == 0:
                continue
            # Draw connections between keypoints
            for start_index, end_index in self.connections:
                start_point, end_point = data[start_index], data[end_index]
                if all(start_point[:2] > 0) and all(end_point[:2] > 0):  # Ignore invalid points
                    cv2.line(image, 
                             tuple(map(lambda v, o: int(v + o), start_point[:2], offset)), 
                             tuple(map(lambda v, o: int(v + o), end_point[:2], offset)), 
                             line_color, 2)
            # Draw keypoints
            for index, (x, y) in enumerate(data[:, :2]):
                if x > 0 or y > 0:  # Ignore invalid points
                    cv2.circle(image, 
                               (int(x + offset[0]), int(y + offset[1])), 
                               5, point_color, -1)
                    if show_idx:
                        cv2.putText(image, 
                                    str(index), 
                                    (int(x + offset[0]), int(y + offset[1])), 
                                    cv2.FONT_HERSHEY_SIMPLEX, 0.5, point_color, 1, cv2.LINE_AA)
    
        return image


    def run_yolo(self):
        if hasattr(self, 'current_image'):
            img = cv2.imread(self.current_image)
            
            # YOLO推理示例:
            task = self.yolo_combo.currentText()
            if task =='物体检测':
                #model = self.model_detec #YOLO('yolo11s.pt')
                results = self.model_detec(img)
                
                for result in results:
                    boxes = result.boxes                # Pseudo-code; adjust based on actual library


                    for box in boxes:
                        x1, y1, x2, y2 = box.xyxy[0]    # Extracting the bounding box coordinates
                        class_name = self.model_detec.names[int(box.cls[0])]  # Using class_id to get class_name from model
                        confidence = box.conf.item()
                        cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                        txt_y_pos = int(y1) - 10
                        if txt_y_pos <= 10:
                            txt_y_pos = int(y2) - 10
                        
                        class_name = class_name + " "+ "{:.2g}".format(confidence)
                        cv2.putText(img, class_name, (int(x1), txt_y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)


            elif task =='物体分割':
                #model = YOLO('yolo11s-seg.pt')
                results = self.model_seg(img)
                
                # Prepare an overlay image with the same dimensions
                overlay = img.copy()
                
                for result in results:
                    boxes = result.boxes  # Pseudo-code; adjust based on actual library
                    masks = result.masks
                    names = result.names
                    for box, mask in zip(boxes, masks):
                        for cls, contour in zip(box.cls, mask.xy):
                            class_id = cls.item()  # Get scalar from tensor
                            class_name = names[class_id]
                            #print(class_name)
                            #print(cv2.contourArea(contour))  # Calculate contour area
                            
                            # Generate a random color
                            color = [random.randint(0, 255) for _ in range(3)]
                            
                            # Fill the contour on the overlay with the random color
                            cv2.drawContours(overlay, [contour.astype(np.int32)], -1, color, thickness=cv2.FILLED)
                # Define the alpha (transparency factor)
                alpha = 0.4  # Value between 0 (transparent) and 1 (opaque)
                
                # Blend the overlay with the original image
                """
                Parameters
                overlay (src1):


                This is the first input array (image).
                In your context, this represents the overlay image, which typically contains modifications like semi-transparent masks drawn over contours.
                
                alpha:
                This is the weight of the first array (image).
                It controls the opacity of the overlay. A value closer to 1 makes the overlay more prominent, while a value closer to 0 makes it less prominent.
                
                img (src2):
                This is the second input array (image).
                It represents the original image onto which the overlay is being applied.
                
                1 - alpha (beta):
                This is the weight of the second array (image).
                Complementary to alpha, it controls the visibility of the original image. A value closer to 1 makes the original image more visible, while closer to 0 makes it less visible.
                
                0 (gamma):
                A scalar added to each sum.
                Typically set to 0 when blending for direct overlay purposes without additional brightness adjustment.
                
                img (dst):
                The destination array where the output is stored.
                It uses the same variable as the original image, implying that the blended result will overwrite this variable.
                """
                cv2.addWeighted(overlay, alpha, img, 1 - alpha, 0, img)


            elif task == '人体姿态识别':
                #model = YOLO('yolo11s-pose.pt')
                #results = model(img)
                
                overrides_Body_pose = {
                    "task": "pose",
                    "mode": "predict",
                    "model": self.model_bd,        #'yolo11s-pose.pt'
                    "verbose": False,
                    "classes": [0],
                    "iou": 0.5,
                    "conf": 0.3
                    }


                predictor_ren_pose = PosePredictor(overrides=overrides_Body_pose)
                pose_ren = predictor_ren_pose(img)[0]
                
                img = self.plot_keypoints(img, pose_ren.keypoints)
                
            # Convert the image to a format suitable for PyQt
            height, width, channel = img.shape
            bytes_per_line = 3 * width
            q_image = QImage(img.data, width, height, bytes_per_line, QImage.Format_RGB888).rgbSwapped()
            pixmap = QPixmap.fromImage(q_image)
            self.display_results(pixmap)


    def display_results(self, pixmap):
        scaled_pixmap = pixmap.scaled(self.result_label.size(), Qt.KeepAspectRatio)
        self.result_label.setPixmap(scaled_pixmap)


    def close_application(self):
        self.close()


if __name__ == '__main__':
    app = QApplication(sys.argv)
    ex = YOLOInterface()
    ex.show()
    sys.exit(app.exec_())