ai/AIRecognitionLinux.git

#include"clasi.h" 
 
#define checkRuntime3(op)  __check_cuda_runtime3((op), #op, __FILE__, __LINE__) 
 
bool __check_cuda_runtime3(cudaError_t code, const char* op, const char* file, int line) { 
    if (code != cudaSuccess) { 
        const char* err_name = cudaGetErrorName(code); 
        const char* err_message = cudaGetErrorString(code); 
        printf("runtime error %s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message); 
        return false; 
    } 
    return true; 
} 
 
 
 
inline const char* severity_string3(nvinfer1::ILogger::Severity t) { 
    switch (t) { 
    case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error"; 
    case nvinfer1::ILogger::Severity::kERROR:   return "error"; 
    case nvinfer1::ILogger::Severity::kWARNING: return "warning"; 
    case nvinfer1::ILogger::Severity::kINFO:    return "info"; 
    case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose"; 
    default: return "unknow"; 
    } 
} 
 
 
class Logger3 : public ILogger 
{ 
    void log(Severity severity, const char* msg)  noexcept 
    { 
        // suppress info-level messages 
        if (severity != Severity::kINFO) 
            printf("%s: %s\n", severity_string3(severity), msg); 
    } 
}; 
 
 
 
bool clasification::initConfig(const char* modelpath) { 
    std::ifstream file(modelpath, std::ios::binary); 
    char* trtModelStream = NULL; 
    int size = 0; 
    if (file.good()) { 
        file.seekg(0, file.end); 
        size = file.tellg(); 
        file.seekg(0, file.beg); 
        trtModelStream = new char[size]; 
        assert(trtModelStream); 
        file.read(trtModelStream, size); 
        file.close(); 
    } 
    else { 
        return false; 
    } 
 
    // ��ʼ���������� 
    Logger3 logger; 
    this->runtime = createInferRuntime(logger); 
    assert(this->runtime != nullptr); 
    this->engine = runtime->deserializeCudaEngine(trtModelStream, size); 
    assert(this->engine != nullptr); 
    this->context = engine->createExecutionContext(); 
    assert(this->context != nullptr); 
    delete[] trtModelStream; 
 
    auto input_dims = engine->getBindingDimensions(0); 
    input_batch = input_dims.d[0]; 
    input_channel = input_dims.d[1]; 
    input_height = input_dims.d[2]; 
    input_width = input_dims.d[3]; 
    input_numel = input_batch * input_channel * input_height * input_width; 
 
    image_area = input_width * input_height; 
 
    printf("input size=%d*%d*%d*%d\n", input_batch, input_channel, input_height, input_width); 
    auto output_dims = engine->getBindingDimensions(1); 
    output_classs = output_dims.d[0]; 
    num_classes = output_dims.d[1]; 
    printf("output size=%d*%d\n", output_classs, num_classes); 
 
    output_numel = output_classs * num_classes; 
 
    checkRuntime3(cudaMallocHost(&input_data_host, input_numel * sizeof(float))); 
    checkRuntime3(cudaMalloc(&input_data_device, input_numel * sizeof(float))); 
 
    checkRuntime3(cudaMallocHost(&output_data_host, sizeof(float) * output_numel)); 
    checkRuntime3(cudaMalloc(&output_data_device, sizeof(float) * output_numel)); 
 
 
    checkRuntime3(cudaStreamCreate(&stream)); 
 
 
    return true; 
 
 
} 
 
 
int clasification::detect(cv::Mat& image) { 
    cv::Mat blob; 
    cv::resize(image,blob, cv::Size(input_width, input_height)); 
    unsigned char* pimage = blob.data; 
    float* host_b = input_data_host + image_area * 0; 
    float* host_g = input_data_host + image_area * 1; 
    float* host_r = input_data_host + image_area * 2; 
    for (int i = 0; i < image_area; ++i, pimage += 3) { 
        *host_r++ = (pimage[0] / 255.0f - 0.406) / 0.225; 
        *host_g++ = (pimage[1] / 255.0f - 0.456) / 0.224; 
        *host_b++ = (pimage[2] / 255.0f - 0.485) / 0.229; 
    } 
 
    checkRuntime3(cudaMemcpyAsync(input_data_device, input_data_host, input_numel * sizeof(float), cudaMemcpyHostToDevice, stream)); 
    float* bindings[] = { input_data_device, output_data_device }; 
    bool success = context->enqueueV2((void**)bindings, stream, nullptr); 
    checkRuntime3(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream)); 
    checkRuntime3(cudaStreamSynchronize(stream)); 
 
    float* prob = output_data_host; 
    int predict_label = std::max_element(prob, prob + num_classes) - prob; 
    //float confidence = prob[predict_label];  
    //printf("predict: confidence = %f, label = %d\n", confidence, predict_label); 
 
    return predict_label; 
 
 
} 
 
 
clasification::~clasification() { 
 
    // ͬ���������ͷ���Դ 
 
    checkRuntime3(cudaStreamSynchronize(stream)); 
    //if (stream != nullptr) 
    //{ 
        checkRuntime3(cudaStreamDestroy(stream)); 
    //} 
 
    if (!context) { 
        context->destroy(); 
    } 
    if (!engine) { 
        engine->destroy(); 
    } 
    if (!runtime) { 
        runtime->destroy(); 
    } 
 
    if (!output_data_host) { 
        delete[] output_data_host; 
 
    } 
 
    if (!output_data_device) { 
        checkRuntime3(cudaFree(output_data_device)); 
    } 
 
    if (!input_data_device) { 
        checkRuntime3(cudaFree(input_data_device)); 
    } 
 
 
    if (!input_data_host) { 
        delete[] input_data_host; 
    } 
 
}