#include"yolov5.hpp" #define checkRuntime(op) __check_cuda_runtime_yolov5((op), #op, __FILE__, __LINE__) bool __check_cuda_runtime_yolov5(cudaError_t code, const char* op, const char* file, int line) { if (code != cudaSuccess) { const char* err_name = cudaGetErrorName(code); const char* err_message = cudaGetErrorString(code); printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message); return false; } return true; } inline const char* severity_string(nvinfer1::ILogger::Severity t) { switch (t) { case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error"; case nvinfer1::ILogger::Severity::kERROR: return "error"; case nvinfer1::ILogger::Severity::kWARNING: return "warning"; case nvinfer1::ILogger::Severity::kINFO: return "info"; case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose"; default: return "unknow"; } } class Logger : public ILogger { void log(Severity severity, const char* msg) noexcept { // suppress info-level messages if (severity != Severity::kINFO) printf("%s: %s zzj\n", severity_string(severity), msg); } }; void warp_affine_bilinear(uint8_t* src, int src_line_size, int src_width, int src_height, float* dst, int dst_width, int dst_height, uint8_t fill_value, AffineMatrix matrix, cudaStream_t stream); HAT::~HAT() { // ͬ同步结束,释放资源 checkRuntime(cudaStreamSynchronize(stream)); if (stream != nullptr) { checkRuntime(cudaStreamDestroy(stream)); } if (!context) { context->destroy(); } if (!engine) { engine->destroy(); } if (!runtime) { runtime->destroy(); } if (!output_data_host) { delete[] output_data_host; } if (!input_data_host) { delete[] input_data_host; } if (!input_data_device) { checkRuntime(cudaFree(input_data_device)); } } bool HAT::initConfig(const char* enginefile, double confThreshold, double nmsThreshold) { //const std::string trtfile = enginefile; std::ifstream file(enginefile, std::ios::binary); char* trtModelStream = NULL; int size = 0; if (file.good()) { file.seekg(0, file.end); size = file.tellg(); file.seekg(0, file.beg); trtModelStream = new char[size]; assert(trtModelStream); file.read(trtModelStream, size); file.close(); } else { return false; } // 初始化几个对象 Logger logger; this->runtime = createInferRuntime(logger); assert(this->runtime != nullptr); this->engine = runtime->deserializeCudaEngine(trtModelStream, size); assert(this->engine != nullptr); this->context = engine->createExecutionContext(); assert(this->context != nullptr); delete[] trtModelStream; auto input_dims = engine->getBindingDimensions(0); input_batch = input_dims.d[0]; input_channel = input_dims.d[1]; input_height = input_dims.d[2]; input_width = input_dims.d[3]; input_numel = input_batch * input_channel * input_height * input_width; //printf("input size=%d*%d*%d*%d\n", input_batch, input_channel, input_height,input_width); input_data_host = new float[input_numel * sizeof(float)]{ 0 }; auto output_dims = engine->getBindingDimensions(1); output_numbox = output_dims.d[1]; output_numprob = output_dims.d[2]; num_classes = output_numprob - 5; output_numel = input_batch * output_numbox * output_numprob; //printf("output size=%d*%d*%d\n", input_batch, output_numbox, output_numprob); output_data_host = new float[sizeof(float) * output_numel]{ 0 }; checkRuntime(cudaMalloc(&input_data_device, input_numel * sizeof(float))); checkRuntime(cudaMalloc(&output_data_device, output_numel * sizeof(float))); checkRuntime(cudaStreamCreate(&stream)); confThreshold_ = confThreshold; nmsThreshold_ = nmsThreshold; return true; } void HAT::detect(cv::Mat& image, std::vector& boxes) { try { int width = image.cols; int height = image.rows; int channels = image.channels(); int src_size = width * height * channels; uint8_t* psrc_device = nullptr; checkRuntime(cudaMalloc(&psrc_device, src_size)); checkRuntime(cudaMemcpyAsync(psrc_device, image.data, src_size, cudaMemcpyHostToDevice, stream)); AffineMatrix affine; affine.compute(width, height, input_width, input_height); warp_affine_bilinear(psrc_device, width * 3, width, height, input_data_device, input_width, input_height, 114, affine, stream); float* bindings[] = { input_data_device,output_data_device }; context->enqueueV2((void**)bindings, stream, nullptr); checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream)); checkRuntime(cudaFree(psrc_device)); std::vector bboxes; std::vector classIds; std::vector scores; for (int i = 0; i < output_numbox; ++i) { float* ptr = output_data_host + i * output_numprob; float objness = ptr[4]; if (objness < confThreshold_) { continue; } float* pclass = ptr + 5; int label = std::max_element(pclass, pclass + num_classes) - pclass; float prob = pclass[label] * objness; if (prob < confThreshold_) continue; float cx = ptr[0]; float cy = ptr[1]; float w1 = ptr[2]; float h1 = ptr[3]; float x1 = cx - w1 / 2; float y1 = cy - h1 / 2; cv::Rect box; box.x = x1; box.y = y1; box.width = w1; box.height = h1; bboxes.emplace_back(box); classIds.emplace_back(label); scores.emplace_back(prob); } if (bboxes.size() <= 0) { return; } std::vector indexes; cv::dnn::NMSBoxes(bboxes, scores, confThreshold_, nmsThreshold_, indexes); //有框 for (size_t i = 0; i < indexes.size(); i++) { int idx = indexes[i]; auto& ibox = bboxes[idx]; float x = ibox.x; float y = ibox.y; float w = ibox.width; float h = ibox.height; // 对应原图左上角点、宽、高 int image_base_x = static_cast(affine.d2i[0] * x + affine.d2i[2]); int image_base_y = static_cast(affine.d2i[0] * y + affine.d2i[5]); int image_base_width = static_cast(affine.d2i[0] * w); int image_base_height = static_cast(affine.d2i[0] * h); if ((image_base_width <= 0) || (image_base_height <= 0)) continue; struct Box box; box.x = image_base_x; box.y = image_base_y; box.width = image_base_width; box.height = image_base_height; box.score = scores[idx]; box.class_id = classIds[idx]; boxes.push_back(box); } } catch (const std::exception& ex) { std::string errorMessage = "yolov5-"; errorMessage += ex.what(); throw std::runtime_error(errorMessage); } }