opencv+yolov8实现监控画面报警功能

项目背景

最近停在门前的车被人开走了，虽然有监控，但是看监控太麻烦了，于是想着框选一个区域用yolov8直接检测闯入到这个区域的所有目标，这样1ms一帧，很快就可以跑完一天的视频

用到的技术

C++
OpenCV
Yolov8 + OnnxRuntime

yolov8介绍

YOLOv8支持Pose和Segment,在使用TensorRT可以跑到1-2ms一帧
YOLOv8提供了一个全新的SOTA模型，包括P5 640和P6 1280分辨率的目标检测网络和基于YOLACT的实例分割模型。
YOLOv8和YOLOv5一样，基于缩放系数也提供了N/S/M/L/X尺度的不同大小模型，用于满足不同场景需求。
YOLOv8骨干网络和Neck部分可能参考了YOLOv7 ELAN设计思想，将YOLOv5的C3结构换成了梯度流更丰富的C2f结构，并对不同尺度模型调整了不同的通道数。
YOLOv8 Head部分相比YOLOv5改动较大，换成了目前主流的解耦头结构，将分类和检测头分离，同时也从Anchor-Based换成了Anchor-Free。
YOLOv8 Loss计算方面采用了TaskAlignedAssigner正样本分配策略，并引入了Distribution Focal Loss。
YOLOv8训练的数据增强部分引入了YOLOX中的最后10 epoch关闭Mosiac增强的操作，可以有效地提升精度。

实现步骤

首先打开视频第一帧，框选区域，我们直接使用opencv实现这个功能
加载模型检测画面中的所有对象
计算IOU，如果有重合就保存这一帧具体信息
跟踪闯入画面的目标，否则会重复保存信息

opencv打开视频并框选区域">使用opencv打开视频，并框选区域

#include <opencv2/opencv.hpp>
#include "inference.h"

using namespace cv;

// 定义一个全局变量，用于存放鼠标框选的矩形区域
Rect g_rect;
// 定义一个全局变量，用于标记鼠标是否按下
bool g_bDrawingBox = false;

// 定义一个回调函数，用于处理鼠标事件
void on_MouseHandle(int event, int x, int y, int flags, void* param)
{
    // 将param转换为Mat类型的指针
    Mat& image = *(Mat*) param;
    // 根据不同的鼠标事件进行处理
    switch (event)
    {
        // 鼠标左键按下事件
        case EVENT_LBUTTONDOWN:
        {
            // 标记鼠标已按下
            g_bDrawingBox = true;
            // 记录矩形框的起始点
            g_rect.x = x;
            g_rect.y = y;
            break;
        }
        // 鼠标移动事件
        case EVENT_MOUSEMOVE:
        {
            // 如果鼠标已按下，更新矩形框的宽度和高度
            if (g_bDrawingBox)
            {
                g_rect.width = x - g_rect.x;
                g_rect.height = y - g_rect.y;
            }
            break;
        }
        // 鼠标左键松开事件
        case EVENT_LBUTTONUP:
        {
            // 标记鼠标已松开
            g_bDrawingBox = false;
            // 如果矩形框的宽度和高度为正，绘制矩形框到图像上
            if (g_rect.width > 0 && g_rect.height > 0)
            {
                rectangle(image, g_rect, Scalar(0, 255, 0));
            }
            break;
        }
    }
}

int main(int argc, char* argv[])
{
    // 读取视频文件
    cv::VideoCapture vc;
    vc.open(argv[1]);
    
    if(vc.isOpened()){
        cv::Mat frame;
        vc >> frame;
        if(!frame.empty()){
            // 创建一个副本图像，用于显示框选过程
            Mat temp;
            frame.copyTo(temp);
            // 创建一个窗口，显示图像
            namedWindow("image");
            // 设置鼠标回调函数，传入副本图像作为参数
            setMouseCallback("image", on_MouseHandle, (void*)&temp);
            while (1)
            {
                // 如果鼠标正在框选，绘制一个虚线矩形框到副本图像上，并显示框的大小和坐标
                if (g_bDrawingBox)
                {
                    temp.copyTo(frame);
                    rectangle(frame, g_rect, Scalar(0, 255, 0), 1, LINE_AA);
                    char text[32];
                    sprintf(text, "w=%d, h=%d", g_rect.width, g_rect.height);
                    putText(frame, text, Point(g_rect.x + 5, g_rect.y - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
                }
                // 显示副本图像
                imshow("image", frame);
                // 等待按键，如果按下ESC键，退出循环
                if (waitKey(10) == 27)
                {
                    break;
                }
            }

            while(!frame.empty()){
                cv::imshow("image", frame);
                cv::waitKey(1);

                vc >> frame;
            }
        }
    }
    
    return 0;
}

使用YoloV8检测目标

inference.h

#pragma once

#define    RET_OK nullptr

#ifdef _WIN32
#include <Windows.h>
#include <direct.h>
#include <io.h>
#endif

#include <string>
#include <vector>
#include <cstdio>
#include <opencv2/opencv.hpp>
#include "onnxruntime_cxx_api.h"

#ifdef USE_CUDA
#include <cuda_fp16.h>
#endif


enum MODEL_TYPE {
    //FLOAT32 MODEL
    YOLO_ORIGIN_V5 = 0,
    YOLO_ORIGIN_V8 = 1,//only support v8 detector currently
    YOLO_POSE_V8 = 2,
    YOLO_CLS_V8 = 3,
    YOLO_ORIGIN_V8_HALF = 4,
    YOLO_POSE_V8_HALF = 5,
    YOLO_CLS_V8_HALF = 6
};


typedef struct _DCSP_INIT_PARAM {
    std::string ModelPath;
    MODEL_TYPE ModelType = YOLO_ORIGIN_V8;
    std::vector<int> imgSize = {640, 640};
    float RectConfidenceThreshold = 0.6;
    float iouThreshold = 0.5;
    bool CudaEnable = false;
    int LogSeverityLevel = 3;
    int IntraOpNumThreads = 1;
} DCSP_INIT_PARAM;


typedef struct _DCSP_RESULT {
    int classId;
    float confidence;
    cv::Rect box;
} DCSP_RESULT;


class DCSP_CORE {
public:
    DCSP_CORE();

    ~DCSP_CORE();

public:
    char *CreateSession(DCSP_INIT_PARAM &iParams);

    char *RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT> &oResult);

    char *WarmUpSession();

    template<typename N>
    char *TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
                        std::vector<DCSP_RESULT> &oResult);

    std::vector<std::string> classes{};

private:
    Ort::Env env;
    Ort::Session *session;
    bool cudaEnable;
    Ort::RunOptions options;
    std::vector<const char *> inputNodeNames;
    std::vector<const char *> outputNodeNames;

    MODEL_TYPE modelType;
    std::vector<int> imgSize;
    float rectConfidenceThreshold;
    float iouThreshold;
};

inference.cpp

#include "inference.h"
#include <regex>

#define benchmark

DCSP_CORE::DCSP_CORE() {

}


DCSP_CORE::~DCSP_CORE() {
    delete session;
}

#ifdef USE_CUDA
namespace Ort
{
    template<>
    struct TypeToTensorType<half> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; };
}
#endif


template<typename T>
char *BlobFromImage(cv::Mat &iImg, T &iBlob) {
    int channels = iImg.channels();
    int imgHeight = iImg.rows;
    int imgWidth = iImg.cols;

    for (int c = 0; c < channels; c++) {
        for (int h = 0; h < imgHeight; h++) {
            for (int w = 0; w < imgWidth; w++) {
                iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer<T>::type(
                        (iImg.at<cv::Vec3b>(h, w)[c]) / 255.0f);
            }
        }
    }
    return RET_OK;
}


char *PostProcess(cv::Mat &iImg, std::vector<int> iImgSize, cv::Mat &oImg) {
    cv::Mat img = iImg.clone();
    cv::resize(iImg, oImg, cv::Size(iImgSize.at(0), iImgSize.at(1)));
    if (img.channels() == 1) {
        cv::cvtColor(oImg, oImg, cv::COLOR_GRAY2BGR);
    }
    cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB);
    return RET_OK;
}


char *DCSP_CORE::CreateSession(DCSP_INIT_PARAM &iParams) {
    char *Ret = RET_OK;
    std::regex pattern("[\u4e00-\u9fa5]");
    bool result = std::regex_search(iParams.ModelPath, pattern);
    if (result) {
        Ret = "[DCSP_ONNX]:Model path error.Change your model path without chinese characters.";
        std::cout << Ret << std::endl;
        return Ret;
    }
    try {
        rectConfidenceThreshold = iParams.RectConfidenceThreshold;
        iouThreshold = iParams.iouThreshold;
        imgSize = iParams.imgSize;
        modelType = iParams.ModelType;
        env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Yolo");
        Ort::SessionOptions sessionOption;
        if (iParams.CudaEnable) {
            cudaEnable = iParams.CudaEnable;
            OrtCUDAProviderOptions cudaOption;
            cudaOption.device_id = 0;
            sessionOption.AppendExecutionProvider_CUDA(cudaOption);
        }
        sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);
        sessionOption.SetIntraOpNumThreads(iParams.IntraOpNumThreads);
        sessionOption.SetLogSeverityLevel(iParams.LogSeverityLevel);

#ifdef _WIN32
        int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), nullptr, 0);
        wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1];
        MultiByteToWideChar(CP_UTF8, 0, iParams.ModelPath.c_str(), static_cast<int>(iParams.ModelPath.length()), wide_cstr, ModelPathSize);
        wide_cstr[ModelPathSize] = L'\0';
        const wchar_t* modelPath = wide_cstr;
#else
        const char *modelPath = iParams.ModelPath.c_str();
#endif // _WIN32

        session = new Ort::Session(env, modelPath, sessionOption);
        Ort::AllocatorWithDefaultOptions allocator;
        size_t inputNodesNum = session->GetInputCount();
        for (size_t i = 0; i < inputNodesNum; i++) {
            Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator);
            char *temp_buf = new char[50];
            strcpy(temp_buf, input_node_name.get());
            inputNodeNames.push_back(temp_buf);
        }
        size_t OutputNodesNum = session->GetOutputCount();
        for (size_t i = 0; i < OutputNodesNum; i++) {
            Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator);
            char *temp_buf = new char[10];
            strcpy(temp_buf, output_node_name.get());
            outputNodeNames.push_back(temp_buf);
        }
        options = Ort::RunOptions{nullptr};
        WarmUpSession();
        return RET_OK;
    }
    catch (const std::exception &e) {
        const char *str1 = "[DCSP_ONNX]:";
        const char *str2 = e.what();
        std::string result = std::string(str1) + std::string(str2);
        char *merged = new char[result.length() + 1];
        std::strcpy(merged, result.c_str());
        std::cout << merged << std::endl;
        delete[] merged;
        return "[DCSP_ONNX]:Create session failed.";
    }

}


char *DCSP_CORE::RunSession(cv::Mat &iImg, std::vector<DCSP_RESULT> &oResult) {
#ifdef benchmark
    clock_t starttime_1 = clock();
#endif // benchmark

    char *Ret = RET_OK;
    cv::Mat processedImg;
    PostProcess(iImg, imgSize, processedImg);
    if (modelType < 4) {
        float *blob = new float[processedImg.total() * 3];
        BlobFromImage(processedImg, blob);
        std::vector<int64_t> inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)};
        TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult);
    } else {
#ifdef USE_CUDA
        half* blob = new half[processedImg.total() * 3];
        BlobFromImage(processedImg, blob);
        std::vector<int64_t> inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) };
        TensorProcess(starttime_1, iImg, blob, inputNodeDims, oResult);
#endif
    }

    return Ret;
}


template<typename N>
char *DCSP_CORE::TensorProcess(clock_t &starttime_1, cv::Mat &iImg, N &blob, std::vector<int64_t> &inputNodeDims,
                               std::vector<DCSP_RESULT> &oResult) {
    Ort::Value inputTensor = Ort::Value::CreateTensor<typename std::remove_pointer<N>::type>(
            Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
            inputNodeDims.data(), inputNodeDims.size());
#ifdef benchmark
    clock_t starttime_2 = clock();
#endif // benchmark
    auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(),
                                     outputNodeNames.size());
#ifdef benchmark
    clock_t starttime_3 = clock();
#endif // benchmark

    Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo();
    auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo();
    std::vector<int64_t> outputNodeDims = tensor_info.GetShape();
    auto output = outputTensor.front().GetTensorMutableData<typename std::remove_pointer<N>::type>();
    delete blob;
    switch (modelType) {
        case 1://V8_ORIGIN_FP32
        case 4://V8_ORIGIN_FP16
        {
            int strideNum = outputNodeDims[2];
            int signalResultNum = outputNodeDims[1];
            std::vector<int> class_ids;
            std::vector<float> confidences;
            std::vector<cv::Rect> boxes;

            cv::Mat rawData;
            if (modelType == 1) {
                // FP32
                rawData = cv::Mat(signalResultNum, strideNum, CV_32F, output);
            } else {
                // FP16
                rawData = cv::Mat(signalResultNum, strideNum, CV_16F, output);
                rawData.convertTo(rawData, CV_32F);
            }
            rawData = rawData.t();
            float *data = (float *) rawData.data;

            float x_factor = iImg.cols / 640.;
            float y_factor = iImg.rows / 640.;
            for (int i = 0; i < strideNum; ++i) {
                float *classesScores = data + 4;
                cv::Mat scores(1, this->classes.size(), CV_32FC1, classesScores);
                cv::Point class_id;
                double maxClassScore;
                cv::minMaxLoc(scores, 0, &maxClassScore, 0, &class_id);
                if (maxClassScore > rectConfidenceThreshold) {
                    confidences.push_back(maxClassScore);
                    class_ids.push_back(class_id.x);

                    float x = data[0];
                    float y = data[1];
                    float w = data[2];
                    float h = data[3];

                    int left = int((x - 0.5 * w) * x_factor);
                    int top = int((y - 0.5 * h) * y_factor);

                    int width = int(w * x_factor);
                    int height = int(h * y_factor);

                    boxes.emplace_back(left, top, width, height);
                }
                data += signalResultNum;
            }

            std::vector<int> nmsResult;
            cv::dnn::NMSBoxes(boxes, confidences, rectConfidenceThreshold, iouThreshold, nmsResult);

            for (int i = 0; i < nmsResult.size(); ++i) {
                int idx = nmsResult[i];
                DCSP_RESULT result;
                result.classId = class_ids[idx];
                result.confidence = confidences[idx];
                result.box = boxes[idx];
                oResult.push_back(result);
            }


#ifdef benchmark
            clock_t starttime_4 = clock();
            double pre_process_time = (double) (starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000;
            double process_time = (double) (starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000;
            double post_process_time = (double) (starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000;
            if (cudaEnable) {
                std::cout << "[DCSP_ONNX(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time
                          << "ms inference, " << post_process_time << "ms post-process." << std::endl;
            } else {
                std::cout << "[DCSP_ONNX(CPU)]: " << pre_process_time << "ms pre-process, " << process_time
                          << "ms inference, " << post_process_time << "ms post-process." << std::endl;
            }
#endif // benchmark

            break;
        }
    }
    return RET_OK;
}


char *DCSP_CORE::WarmUpSession() {
    clock_t starttime_1 = clock();
    cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3);
    cv::Mat processedImg;
    PostProcess(iImg, imgSize, processedImg);
    if (modelType < 4) {
        float *blob = new float[iImg.total() * 3];
        BlobFromImage(processedImg, blob);
        std::vector<int64_t> YOLO_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)};
        Ort::Value input_tensor = Ort::Value::CreateTensor<float>(
                Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1),
                YOLO_input_node_dims.data(), YOLO_input_node_dims.size());
        auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(),
                                           outputNodeNames.size());
        delete[] blob;
        clock_t starttime_4 = clock();
        double post_process_time = (double) (starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
        if (cudaEnable) {
            std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
        }
    } else {
#ifdef USE_CUDA
        half* blob = new half[iImg.total() * 3];
        BlobFromImage(processedImg, blob);
        std::vector<int64_t> YOLO_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) };
        Ort::Value input_tensor = Ort::Value::CreateTensor<half>(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), YOLO_input_node_dims.data(), YOLO_input_node_dims.size());
        auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size());
        delete[] blob;
        clock_t starttime_4 = clock();
        double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000;
        if (cudaEnable)
        {
            std::cout << "[DCSP_ONNX(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl;
        }
#endif
    }
    return RET_OK;
}

main.cpp

int read_coco_yaml(DCSP_CORE *&p) {
    // Open the YAML file
    std::ifstream file("coco.yaml");
    if (!file.is_open()) {
        std::cerr << "Failed to open file" << std::endl;
        return 1;
    }

    // Read the file line by line
    std::string line;
    std::vector<std::string> lines;
    while (std::getline(file, line)) {
        lines.push_back(line);
    }

    // Find the start and end of the names section
    std::size_t start = 0;
    std::size_t end = 0;
    for (std::size_t i = 0; i < lines.size(); i++) {
        if (lines[i].find("names:") != std::string::npos) {
            start = i + 1;
        } else if (start > 0 && lines[i].find(':') == std::string::npos) {
            end = i;
            break;
        }
    }

    // Extract the names
    std::vector<std::string> names;
    for (std::size_t i = start; i < end; i++) {
        std::stringstream ss(lines[i]);
        std::string name;
        std::getline(ss, name, ':'); // Extract the number before the delimiter
        std::getline(ss, name); // Extract the string after the delimiter
        names.push_back(name);
    }

    p->classes = names;
    return 0;
}

int main(int argc, char* argv[])
{
    DCSP_CORE *yoloDetector = new DCSP_CORE;
    //std::string model_path = "yolov8n.onnx";
    std::string model_path = argv[1];
    read_coco_yaml(yoloDetector);
    #ifdef USE_CUDA
    // GPU FP32 inference
    DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {640, 640},  0.1, 0.5, true };
    // GPU FP16 inference
    // DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8_HALF, {640, 640},  0.1, 0.5, true };
    #else
    // CPU inference
    DCSP_INIT_PARAM params{model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, false};
    #endif
    yoloDetector->CreateSession(params);
    
    cv::VideoCapture vc;
    vc.open(argv[2]);
    
    if(vc.isOpened()){
    cv::Mat frame;
    vc >> frame;
            while(!frame.empty()){
                std::vector<DCSP_RESULT> res;
                yoloDetector->RunSession(frame, res);

                for (int i = 0; i < res.size(); ++i)
                {
                    DCSP_RESULT detection = res[i];

                    cv::Rect box = detection.box;
                    cv::RNG rng(cv::getTickCount());
                    cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));;

                    // Detection box
                    cv::rectangle(frame, box, color, 2);

                    // Detection box text
                    std::string classString = yoloDetector->classes[detection.classId] + ' ' + std::to_string(detection.confidence).substr(0, 4);
                    cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
                    cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

                    cv::rectangle(frame, textBox, color, cv::FILLED);
                    cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
                }
                cv::rectangle(frame, g_rect, Scalar(0, 255, 0), 3, cv::LINE_AA);

                cv::imshow("image", frame);
                cv::waitKey(1);

                vc >> frame;
          }
        }
}

opencv的框选区域和yolov8检测目标框融合">opencv的框选区域和yolov8检测目标框融合

#include <opencv2/opencv.hpp>
#include <fstream>
#include "inference.h"

using namespace cv;

// 定义一个全局变量，用于存放鼠标框选的矩形区域
Rect g_rect;
// 定义一个全局变量，用于标记鼠标是否按下
bool g_bDrawingBox = false;

// 定义一个回调函数，用于处理鼠标事件
void on_MouseHandle(int event, int x, int y, int flags, void* param)
{
    // 将param转换为Mat类型的指针
    Mat& image = *(Mat*) param;
    // 根据不同的鼠标事件进行处理
    switch (event)
    {
        // 鼠标左键按下事件
        case EVENT_LBUTTONDOWN:
        {
            // 标记鼠标已按下
            g_bDrawingBox = true;
            // 记录矩形框的起始点
            g_rect.x = x;
            g_rect.y = y;
            break;
        }
        // 鼠标移动事件
        case EVENT_MOUSEMOVE:
        {
            // 如果鼠标已按下，更新矩形框的宽度和高度
            if (g_bDrawingBox)
            {
                g_rect.width = x - g_rect.x;
                g_rect.height = y - g_rect.y;
            }
            break;
        }
        // 鼠标左键松开事件
        case EVENT_LBUTTONUP:
        {
            // 标记鼠标已松开
            g_bDrawingBox = false;
            // 如果矩形框的宽度和高度为正，绘制矩形框到图像上
            if (g_rect.width > 0 && g_rect.height > 0)
            {
                rectangle(image, g_rect, Scalar(0, 255, 0));
            }
            break;
        }
    }
}

int read_coco_yaml(DCSP_CORE *&p) {
    // Open the YAML file
    std::ifstream file("coco.yaml");
    if (!file.is_open()) {
        std::cerr << "Failed to open file" << std::endl;
        return 1;
    }

    // Read the file line by line
    std::string line;
    std::vector<std::string> lines;
    while (std::getline(file, line)) {
        lines.push_back(line);
    }

    // Find the start and end of the names section
    std::size_t start = 0;
    std::size_t end = 0;
    for (std::size_t i = 0; i < lines.size(); i++) {
        if (lines[i].find("names:") != std::string::npos) {
            start = i + 1;
        } else if (start > 0 && lines[i].find(':') == std::string::npos) {
            end = i;
            break;
        }
    }

    // Extract the names
    std::vector<std::string> names;
    for (std::size_t i = start; i < end; i++) {
        std::stringstream ss(lines[i]);
        std::string name;
        std::getline(ss, name, ':'); // Extract the number before the delimiter
        std::getline(ss, name); // Extract the string after the delimiter
        names.push_back(name);
    }

    p->classes = names;
    return 0;
}

int main(int argc, char* argv[])
{
    // 读取原始图像
    // Mat src = imread(argv[1]);

    DCSP_CORE *yoloDetector = new DCSP_CORE;
    //std::string model_path = "yolov8n.onnx";
    std::string model_path = argv[1];
    read_coco_yaml(yoloDetector);
#ifdef USE_CUDA
    // GPU FP32 inference
    DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {640, 640},  0.1, 0.5, true };
    // GPU FP16 inference
    // DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8_HALF, {640, 640},  0.1, 0.5, true };
#else
    // CPU inference
    DCSP_INIT_PARAM params{model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, false};
#endif
    yoloDetector->CreateSession(params);

    cv::VideoCapture vc;
    vc.open(argv[2]);
    
    if(vc.isOpened()){
        cv::Mat frame;
        vc >> frame;
        if(!frame.empty()){
            // 创建一个副本图像，用于显示框选过程
            Mat temp;
            frame.copyTo(temp);
            // 创建一个窗口，显示图像
            namedWindow("image");
            // 设置鼠标回调函数，传入副本图像作为参数
            setMouseCallback("image", on_MouseHandle, (void*)&temp);
            while (1)
            {
                // 如果鼠标正在框选，绘制一个虚线矩形框到副本图像上，并显示框的大小和坐标
                if (g_bDrawingBox)
                {
                    temp.copyTo(frame);
                    rectangle(frame, g_rect, Scalar(0, 255, 0), 1, LINE_AA);
                    char text[32];
                    sprintf(text, "w=%d, h=%d", g_rect.width, g_rect.height);
                    putText(frame, text, Point(g_rect.x + 5, g_rect.y - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
                }
                // 显示副本图像
                imshow("image", frame);
                // 等待按键，如果按下ESC键，退出循环
                if (waitKey(10) == 27)
                {
                    break;
                }
            }

            while(!frame.empty()){
                std::vector<DCSP_RESULT> res;
                yoloDetector->RunSession(frame, res);

                for (int i = 0; i < res.size(); ++i)
                {
                    DCSP_RESULT detection = res[i];

                    cv::Rect box = detection.box;
                    cv::RNG rng(cv::getTickCount());
                    cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));;

                    // Detection box
                    cv::rectangle(frame, box, color, 2);

                    // Detection box text
                    std::string classString = yoloDetector->classes[detection.classId] + ' ' + std::to_string(detection.confidence).substr(0, 4);
                    cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
                    cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

                    cv::rectangle(frame, textBox, color, cv::FILLED);
                    cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);
                }
                cv::rectangle(frame, g_rect, Scalar(0, 255, 0), 3, cv::LINE_AA);

                cv::imshow("image", frame);
                cv::waitKey(1);

                vc >> frame;
            }
        }
    }
    
    return 0;
}

计算预警区域和目标框重合度

double calIou(const cv::Rect& rc1, const cv::Rect& rc2)
{
    cv::Rect intersection = rc1 & rc2;
    
    if (!intersection.empty()) {
        double intersectionArea = intersection.width * intersection.height;
        double rect1Area = rc1.width * rc1.height;
        double rect2Area = rc2.width * rc2.height;
        
        // 计算IOU
        double iou = intersectionArea / (rect1Area + rect2Area - intersectionArea);
        return iou;
    } else {
        // 没有重叠，IOU为0
        return 0.0;
    }
}

跟踪实现

不断的去循环激活的目标，来过滤掉重复的代码，这块以后实现

完整代码

#include <opencv2/opencv.hpp>
#include <fstream>
#include "inference.h"

using namespace cv;

// 定义一个全局变量，用于存放鼠标框选的矩形区域
Rect g_rect;
// 定义一个全局变量，用于标记鼠标是否按下
bool g_bDrawingBox = false;

// 定义一个回调函数，用于处理鼠标事件
void on_MouseHandle(int event, int x, int y, int flags, void* param)
{
    // 将param转换为Mat类型的指针
    Mat& image = *(Mat*) param;
    // 根据不同的鼠标事件进行处理
    switch (event)
    {
        // 鼠标左键按下事件
        case EVENT_LBUTTONDOWN:
        {
            // 标记鼠标已按下
            g_bDrawingBox = true;
            // 记录矩形框的起始点
            g_rect.x = x;
            g_rect.y = y;
            break;
        }
        // 鼠标移动事件
        case EVENT_MOUSEMOVE:
        {
            // 如果鼠标已按下，更新矩形框的宽度和高度
            if (g_bDrawingBox)
            {
                g_rect.width = x - g_rect.x;
                g_rect.height = y - g_rect.y;
            }
            break;
        }
        // 鼠标左键松开事件
        case EVENT_LBUTTONUP:
        {
            // 标记鼠标已松开
            g_bDrawingBox = false;
            // 如果矩形框的宽度和高度为正，绘制矩形框到图像上
            if (g_rect.width > 0 && g_rect.height > 0)
            {
                rectangle(image, g_rect, Scalar(0, 255, 0));
            }
            break;
        }
    }
}

int read_coco_yaml(DCSP_CORE *&p) {
    // Open the YAML file
    std::ifstream file("coco.yaml");
    if (!file.is_open()) {
        std::cerr << "Failed to open file" << std::endl;
        return 1;
    }

    // Read the file line by line
    std::string line;
    std::vector<std::string> lines;
    while (std::getline(file, line)) {
        lines.push_back(line);
    }

    // Find the start and end of the names section
    std::size_t start = 0;
    std::size_t end = 0;
    for (std::size_t i = 0; i < lines.size(); i++) {
        if (lines[i].find("names:") != std::string::npos) {
            start = i + 1;
        } else if (start > 0 && lines[i].find(':') == std::string::npos) {
            end = i;
            break;
        }
    }

    // Extract the names
    std::vector<std::string> names;
    for (std::size_t i = start; i < end; i++) {
        std::stringstream ss(lines[i]);
        std::string name;
        std::getline(ss, name, ':'); // Extract the number before the delimiter
        std::getline(ss, name); // Extract the string after the delimiter
        names.push_back(name);
    }

    p->classes = names;
    return 0;
}

double calIou(const cv::Rect& rc1, const cv::Rect& rc2)
{
    cv::Rect intersection = rc1 & rc2;
    
    if (!intersection.empty()) {
        double intersectionArea = intersection.width * intersection.height;
        double rect1Area = rc1.width * rc1.height;
        double rect2Area = rc2.width * rc2.height;
        
        // 计算IOU
        double iou = intersectionArea / (rect1Area + rect2Area - intersectionArea);
        return iou;
    } else {
        // 没有重叠，IOU为0
        return 0.0;
    }
}

int main(int argc, char* argv[])
{
    // 读取原始图像
    // Mat src = imread(argv[1]);

    DCSP_CORE *yoloDetector = new DCSP_CORE;
    //std::string model_path = "yolov8n.onnx";
    std::string model_path = argv[1];
    read_coco_yaml(yoloDetector);
#ifdef USE_CUDA
    // GPU FP32 inference
    DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8, {640, 640},  0.1, 0.5, true };
    // GPU FP16 inference
    // DCSP_INIT_PARAM params{ model_path, YOLO_ORIGIN_V8_HALF, {640, 640},  0.1, 0.5, true };
#else
    // CPU inference
    DCSP_INIT_PARAM params{model_path, YOLO_ORIGIN_V8, {640, 640}, 0.1, 0.5, false};
#endif
    yoloDetector->CreateSession(params);

    cv::VideoCapture vc;
    vc.open(argv[2]);
    
    if(vc.isOpened()){
        cv::Mat frame;
        vc >> frame;
        if(!frame.empty()){
            // 创建一个副本图像，用于显示框选过程
            Mat temp;
            frame.copyTo(temp);
            // 创建一个窗口，显示图像
            namedWindow("image");
            // 设置鼠标回调函数，传入副本图像作为参数
            setMouseCallback("image", on_MouseHandle, (void*)&temp);
            while (1)
            {
                // 如果鼠标正在框选，绘制一个虚线矩形框到副本图像上，并显示框的大小和坐标
                if (g_bDrawingBox)
                {
                    temp.copyTo(frame);
                    rectangle(frame, g_rect, Scalar(0, 255, 0), 1, LINE_AA);
                    char text[32];
                    sprintf(text, "w=%d, h=%d", g_rect.width, g_rect.height);
                    putText(frame, text, Point(g_rect.x + 5, g_rect.y - 5), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
                }
                // 显示副本图像
                imshow("image", frame);
                // 等待按键，如果按下ESC键，退出循环
                if (waitKey(10) == 27)
                {
                    break;
                }
            }

            while(!frame.empty()){
                std::vector<DCSP_RESULT> res;
                yoloDetector->RunSession(frame, res);

                for (int i = 0; i < res.size(); ++i)
                {
                    DCSP_RESULT detection = res[i];

                    cv::Rect box = detection.box;
                    cv::RNG rng(cv::getTickCount());
                    cv::Scalar color(rng.uniform(0, 256), rng.uniform(0, 256), rng.uniform(0, 256));;

                    // Detection box
                    cv::rectangle(frame, box, color, 2);

                    // Detection box text
                    std::string classString = yoloDetector->classes[detection.classId] + ' ' + std::to_string(detection.confidence).substr(0, 4);
                    cv::Size textSize = cv::getTextSize(classString, cv::FONT_HERSHEY_DUPLEX, 1, 2, 0);
                    cv::Rect textBox(box.x, box.y - 40, textSize.width + 10, textSize.height + 20);

                    cv::rectangle(frame, textBox, color, cv::FILLED);
                    cv::putText(frame, classString, cv::Point(box.x + 5, box.y - 10), cv::FONT_HERSHEY_DUPLEX, 1, cv::Scalar(0, 0, 0), 2, 0);

                    double iou = calIou(g_rect, box);
                    if(iou > 0)
                        std::cout << "iou:" << iou << std::endl;
                }
                cv::rectangle(frame, g_rect, Scalar(0, 255, 0), 3, cv::LINE_AA);

                cv::imshow("image", frame);
                cv::waitKey(1);

                vc >> frame;
            }
        }
    }
    
    return 0;
}