#include "DMJC.h"

#define checkRuntime(op)  __check_cuda_runtime((op), #op, __FILE__, __LINE__)

bool __check_cuda_runtime(cudaError_t code,const char* op,const char* file,int line) {
	if (code != cudaSuccess) {
		const char* err_name = cudaGetErrorName(code);
		const char* err_message = cudaGetErrorString(code);
		printf("runtime error %s:%d  %s failed. \n  code = %s, message = %s\n", file, line, op, err_name, err_message);
		return false;
	}
	return true;
}

inline const char* severity_string(nvinfer1::ILogger::Severity t) {
	switch (t) {
	case nvinfer1::ILogger::Severity::kINTERNAL_ERROR: return "internal_error";
	case nvinfer1::ILogger::Severity::kERROR:   return "error";
	case nvinfer1::ILogger::Severity::kWARNING: return "warning";
	case nvinfer1::ILogger::Severity::kINFO:    return "info";
	case nvinfer1::ILogger::Severity::kVERBOSE: return "verbose";
	default: return "unknow";
	}
}

void warp_affine_bilinear(uint8_t* src, int src_line_size, int src_width, int src_height,
	float* dst, int dst_width, int dst_height, uint8_t fill_value, AffineMatrix matrix, cudaStream_t stream);

class Logger : public ILogger
{
	void log(Severity severity, const char* msg)  noexcept
	{
		// suppress info-level messages
		if (severity != Severity::kINFO)
			printf("%s: %s zzj2\n", severity_string(severity), msg);
	}
};

//DMJC::~DMJC()
//{
//	// 同步结束，释放资源
//	checkRuntime(cudaStreamSynchronize(stream));
//	checkRuntime(cudaStreamDestroy(stream));
//
//	if (!output_data_host) {
//		delete[] output_data_host;
//	}
//
//	if (!input_data_device) {
//		checkRuntime(cudaFree(input_data_device));
//	}
//
//	if (!input_data_host) {
//		delete[] input_data_host;
//	}
//
//	if (!context) {
//		context->destroy();
//	}
//	if (!engine) {
//		engine->destroy();
//	}
//	if (!runtime) {
//		runtime->destroy();
//	}
//}

bool DMJC::initConfig(const char* enginefile,int* beltregion)
{
	const std::string trtfile = enginefile;
	std::ifstream file(trtfile, std::ios::binary);
	char* trtModelStream = NULL;
	int size = 0;
	if (file.good()) {
		file.seekg(0, file.end);
		size = file.tellg();
		file.seekg(0, file.beg);
		trtModelStream = new char[size];
		assert(trtModelStream);
		file.read(trtModelStream, size);
		file.close();
	}
	else {
		return false;
	}

	// ��ʼ��
	Logger logger;
	this->runtime = createInferRuntime(logger);
	assert(this->runtime != nullptr);
	this->engine = runtime->deserializeCudaEngine(trtModelStream, size);
	assert(this->engine != nullptr);
	this->context = engine->createExecutionContext();
	assert(this->context != nullptr);
	delete[] trtModelStream;

	auto input_dims = engine->getBindingDimensions(0);
	input_batch = input_dims.d[0];
	input_channel = input_dims.d[1];
	input_height = input_dims.d[2];
	input_width = input_dims.d[3];
	input_numel = input_batch * input_channel * input_height * input_width;
	//printf("input size=%d*%d*%d*%d\n", input_batch, input_channel, input_height,input_width);

	auto output_dims = engine->getBindingDimensions(1);
	num_classes = output_dims.d[1];
	output_height = output_dims.d[2];
	output_width = output_dims.d[2];

	output_numel = input_batch * num_classes * output_height * output_width;
	//printf("output size=%d*%d*%d*%d\n",input_batch, num_classes, output_height, output_width);

	input_data_host = new float[sizeof(float) * input_numel] { 0 };
	output_data_host = new float[sizeof(float) * output_numel] { 0 };

	checkRuntime(cudaMalloc(&input_data_device, input_numel * sizeof(float)));
	checkRuntime(cudaMalloc(&output_data_device, output_numel * sizeof(float)));

	checkRuntime(cudaStreamCreate(&stream));

	for (int i = 0; i < 4; i++) {
		pts_.push_back(cv::Point(beltregion[i * 2], beltregion[i * 2 + 1]));
	}

	this->beltarea = (pts_[1].x - pts_[0].x + pts_[2].x - pts_[3].x) * (pts_[2].y - pts_[1].y) / 2.;

	if (this->beltarea <= 0) {
		return false;
	}


	return true;
}

float DMJC::detect(cv::Mat& image) 
{
	try
	{
		int width = image.cols;
		int height = image.rows;
		int channels = image.channels();
		int src_size = width * height * channels;
		uint8_t* psrc_device = nullptr;
		checkRuntime(cudaMalloc(&psrc_device, src_size));
		checkRuntime(cudaMemcpyAsync(psrc_device, image.data, src_size, cudaMemcpyHostToDevice, stream));

		AffineMatrix affine;
		affine.compute(width, height, input_width, input_height);
		warp_affine_bilinear(psrc_device, width * 3, width, height, input_data_device, input_width, input_height, 114, affine, stream);

		float* bindings[] = { input_data_device,output_data_device };
		context->enqueueV2((void**)bindings, stream, nullptr);

		checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream));
		checkRuntime(cudaFree(psrc_device));

		float counts = 0;
		for (int r = 0; r < input_height; r++) {
			for (int c = 0; c < input_width; c++) {
				//ú����
				if (output_data_host[c + r * input_width] < output_data_host[c + r * input_width + input_width * input_height]) {
					counts += 1;
				}
			}
		}

		if (counts == 0) {
			return 0;
		}

		if (counts < 0) {
			return -1;
		}

		float scale = affine.i2d[0];

		return counts / (scale * scale * this->beltarea);
	}
	catch (const std::exception& ex)
	{
		std::string errorMessage = "DMJC-";
		errorMessage += ex.what();
		throw std::runtime_error(errorMessage);
	}
}

float DMJC::detect_pic(cv::Mat& image, cv::Mat& image_pic) {
	int width = image.cols;
	int height = image.rows;
	int channels = image.channels();
	int src_size = width * height * channels;
	uint8_t* psrc_device = nullptr;
	checkRuntime(cudaMalloc(&psrc_device, src_size));
	checkRuntime(cudaMemcpyAsync(psrc_device, image.data, src_size, cudaMemcpyHostToDevice, stream));

	AffineMatrix affine;
	affine.compute(width, height, input_width, input_height);
	warp_affine_bilinear(psrc_device, width * 3, width, height, input_data_device, input_width, input_height, 114, affine, stream);

	float* bindings[] = { input_data_device,output_data_device };
	context->enqueueV2((void**)bindings, stream, nullptr);

	checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) * output_numel, cudaMemcpyDeviceToHost, stream));
	checkRuntime(cudaFree(psrc_device));
	//ú��
	float counts = 0;
	cv::Mat mask(input_height, input_width, CV_8UC3);
	for (int r = 0; r < input_height; r++) {
		cv::Vec3b* line = mask.ptr<cv::Vec3b>(r);
		for (int c = 0; c < input_width; c++) {
			//ú����
			if (output_data_host[c + r * input_width] < output_data_host[c + r * input_width + input_width * input_height])
			{
				counts += 1;
				line[c][0] = 0;
				line[c][1] = 70;
				line[c][2] = 70;
			}

			else {
				//�����Ӧ��ԭͼ
				point p;
				p.x = affine.d2i[0] * c + affine.d2i[2];
				p.y = affine.d2i[0] * r + affine.d2i[5];
				if (panduandm(pts_, p))
				{
					line[c][0] = 0;
					line[c][1] = 215;
					line[c][2] = 50;
				}
				else
				{
					line[c][0] = 32;
					line[c][1] = 12;
					line[c][2] = 215;
				}

			}

		}

	}


	cv::Mat m2x3_d2i(2, 3, CV_32F, affine.d2i);
	cv::Mat mask_out(height, width, CV_8UC3);
	cv::warpAffine(mask, mask_out, m2x3_d2i, mask_out.size(), cv::INTER_LINEAR, 0, cv::Scalar::all(114));//��ͼ����ƽ��������ת�任,����
	//����
	cv::polylines(mask_out, pts_, true, cv::Scalar(215, 0, 0), 2);
	cv::addWeighted(image, 0.7, mask_out, 0.3, 0, image_pic);

	if (counts == 0) {
		return 0;
	}

	if (counts < 0) {
		return -1;
	}

	return counts / (affine.i2d[0] * affine.i2d[0] * this->beltarea);

}

bool DMJC::panduandm(std::vector<cv::Point>& pts, point p) {
	int num = 0;
	for (int i = 0; i < 4; i++) {

		point p1, p2;
		p1.x = pts[i % 4 % 4].x;
		p1.y = pts[i % 4 % 4].y;
		p2.x = pts[(i % 4 + 1) % 4].x;
		p2.y = pts[(i % 4 + 1) % 4].y;
		if (p1.y == p2.y) {
			continue;
		}
		if (p.y < std::min(p1.y, p2.y)) {
			continue;
		}
		if (p.y >= std::max(p1.y, p2.y)) {
			continue;
		}
		if (p.y == std::min(p1.y, p2.y)) {
			num++;
			continue;
		}

		float x = (p.y - p1.y) * (p1.x - p2.x) / (p1.y - p2.y) + p1.x;
		if (x > p.x) {
			num++;
		}
	}

	if (num & 1)
	{
		return true;//��������
	}
	else
	{
		return false;//��������
	}

}