#include"clasi.h"
|
|
#define checkRuntime3(op) __check_cuda_runtime3((op), #op, __FILE__, __LINE__)
|
|
bool __check_cuda_runtime3(cudaError_t code, const char* op, const char* file, int line) {
|
if (code != cudaSuccess) {
|
const char* err_name = cudaGetErrorName(code);
|
const char* err_message = cudaGetErrorString(code);
|
printf("runtime error %s:%d %s failed. \n code = %s, message = %s\n", file, line, op, err_name, err_message);
|
return false;
|
}
|
return true;
|
}
|
|
|
|
inline const char* severity_string3(nvinfer1::ILogger::Severity t) {
|
switch (t) {
|
case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";
|
case nvinfer1::ILogger::Severity::kERROR: return "error";
|
case nvinfer1::ILogger::Severity::kWARNING: return "warning";
|
case nvinfer1::ILogger::Severity::kINFO: return "info";
|
case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";
|
default: return "unknow";
|
}
|
}
|
|
|
class Logger3 : public ILogger
|
{
|
void log(Severity severity, const char* msg) noexcept
|
{
|
// suppress info-level messages
|
if (severity != Severity::kINFO)
|
printf("%s: %s\n", severity_string3(severity), msg);
|
}
|
};
|
|
|
|
bool clasification::initConfig(const char* modelpath) {
|
std::ifstream file(modelpath, std::ios::binary);
|
char* trtModelStream = NULL;
|
int size = 0;
|
if (file.good()) {
|
file.seekg(0, file.end);
|
size = file.tellg();
|
file.seekg(0, file.beg);
|
trtModelStream = new char[size];
|
assert(trtModelStream);
|
file.read(trtModelStream, size);
|
file.close();
|
}
|
else {
|
return false;
|
}
|
|
// ��ʼ����������
|
Logger3 logger;
|
this->runtime = createInferRuntime(logger);
|
assert(this->runtime != nullptr);
|
this->engine = runtime->deserializeCudaEngine(trtModelStream, size);
|
assert(this->engine != nullptr);
|
this->context = engine->createExecutionContext();
|
assert(this->context != nullptr);
|
delete[] trtModelStream;
|
|
auto input_dims = engine->getBindingDimensions(0);
|
input_batch = input_dims.d[0];
|
input_channel = input_dims.d[1];
|
input_height = input_dims.d[2];
|
input_width = input_dims.d[3];
|
input_numel = input_batch * input_channel * input_height * input_width;
|
|
image_area = input_width * input_height;
|
|
printf("input size=%d*%d*%d*%d\n", input_batch, input_channel, input_height, input_width);
|
auto output_dims = engine->getBindingDimensions(1);
|
output_classs = output_dims.d[0];
|
num_classes = output_dims.d[1];
|
printf("output size=%d*%d\n", output_classs, num_classes);
|
|
output_numel = output_classs * num_classes;
|
|
checkRuntime3(cudaMallocHost(&input_data_host, input_numel * sizeof(float)));
|
checkRuntime3(cudaMalloc(&input_data_device, input_numel * sizeof(float)));
|
|
checkRuntime3(cudaMallocHost(&output_data_host, sizeof(float) * output_numel));
|
checkRuntime3(cudaMalloc(&output_data_device, sizeof(float) * output_numel));
|
|
|
checkRuntime3(cudaStreamCreate(&stream));
|
|
|
return true;
|
|
|
}
|
|
|
int clasification::detect(cv::Mat& image) {
|
cv::Mat blob;
|
cv::resize(image,blob, cv::Size(input_width, input_height));
|
unsigned char* pimage = blob.data;
|
float* host_b = input_data_host + image_area * 0;
|
float* host_g = input_data_host + image_area * 1;
|
float* host_r = input_data_host + image_area * 2;
|
for (int i = 0; i < image_area; ++i, pimage += 3) {
|
*host_r++ = (pimage[0] / 255.0f - 0.406) / 0.225;
|
*host_g++ = (pimage[1] / 255.0f - 0.456) / 0.224;
|
*host_b++ = (pimage[2] / 255.0f - 0.485) / 0.229;
|
}
|
|
checkRuntime3(cudaMemcpyAsync(input_data_device, input_data_host, input_numel * sizeof(float), cudaMemcpyHostToDevice, stream));
|
float* bindings[] = { input_data_device, output_data_device };
|
bool success = context->enqueueV2((void**)bindings, stream, nullptr);
|
checkRuntime3(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream));
|
checkRuntime3(cudaStreamSynchronize(stream));
|
|
float* prob = output_data_host;
|
int predict_label = std::max_element(prob, prob + num_classes) - prob;
|
//float confidence = prob[predict_label];
|
//printf("predict: confidence = %f, label = %d\n", confidence, predict_label);
|
|
return predict_label;
|
|
|
}
|
|
|
clasification::~clasification() {
|
|
// ͬ���������ͷ���Դ
|
|
checkRuntime3(cudaStreamSynchronize(stream));
|
//if (stream != nullptr)
|
//{
|
checkRuntime3(cudaStreamDestroy(stream));
|
//}
|
|
if (!context) {
|
context->destroy();
|
}
|
if (!engine) {
|
engine->destroy();
|
}
|
if (!runtime) {
|
runtime->destroy();
|
}
|
|
if (!output_data_host) {
|
delete[] output_data_host;
|
|
}
|
|
if (!output_data_device) {
|
checkRuntime3(cudaFree(output_data_device));
|
}
|
|
if (!input_data_device) {
|
checkRuntime3(cudaFree(input_data_device));
|
}
|
|
|
if (!input_data_host) {
|
delete[] input_data_host;
|
}
|
|
}
|