/*
 * Generic methods for GPU computing.
 */
#include "gpu.h"
#include <math.h>
#include <strings.h>
#include <stdlib.h>

#define MAX_SOURCE_SIZE 10000000


/*
 * Error function called by OpenCL
 */
void pfn_notify (const char *errinfo, const void *private_info, size_t
                cb, void *user_data) {
	(void) private_info;
	(void) cb;
	(void) user_data;
	fprintf(stderr, "OpenCL Error (via pfn_notify): %s\n", errinfo);
}

/*
 * Put coordinates (position or velocity) in a cl_double3.
 *
 * cl_double3 is a peculiar structure: it works like a cl_double4, which means
 * it is a 4-vector. However, only the first 3 coordinates will be used in the
 * OpenCL code.
 */
cl_double3 *redim_coords(int dim, int nb_planet, double coords[nb_planet][dim]) {
	if(dim > MAXDIM) {
		fprintf(stderr, "Dimension not valid (should be less than %d).\n", MAXDIM);
		abort();
	}
	int planet, k;
	cl_double3 *new_coords;

	posix_memalign((void **)&new_coords, sizeof(cl_double3), nb_planet*sizeof(cl_double3));
	memset(new_coords, 0, nb_planet*sizeof(cl_double3));
	for(planet = 0; planet < nb_planet; planet++) {
		for(k = 0; k < dim; k++)
			new_coords[planet].s[k] = coords[planet][k];
	}
	return new_coords;
}

/*
 * Initialize GPU.
 *
 * Only the default device is set up.
 */
cl_host* initialize_gpu(void) {
	cl_int ret;
	cl_host *host = malloc(sizeof(cl_host));

	host->platforms = malloc(sizeof(cl_platform_id));
	CL_CHECK(clGetPlatformIDs(1, host->platforms, &host->nb_platform));

	if(host->nb_platform == 0) {
		fprintf(stderr, "No CL platform found.\n");
		abort();
	}

	host->devices = malloc(sizeof(cl_device_id)); 
	CL_CHECK(clGetDeviceIDs(host->platforms[0], CL_DEVICE_TYPE_DEFAULT, 1, host->devices,
				&host->nb_device));

	if (host->nb_device == 0) {
		fprintf(stderr, "No CL device found.\n");
		abort();
	}

	CL_CHECK(clGetDeviceInfo(host->devices[0], CL_DEVICE_MAX_WORK_GROUP_SIZE,
				sizeof(size_t), &host->max_wg_size, NULL));

	host->context = clCreateContext(
			NULL, 1, host->devices, &pfn_notify, NULL, &ret);
	if (ret != CL_SUCCESS) {
		fprintf(stderr, "clCreateContext returned %d", ret);
		abort();
	}

	host->command_queue = clCreateCommandQueue(
			host->context, host->devices[0], 0, &ret);
	if (ret != CL_SUCCESS) {
		fprintf(stderr, "clCreateCommandQueue returned %d", ret);
		abort();
	}
	return host;
}

/*
 * Build the OpenCL program from file filename.
 */
cl_program buildProgramFromFile(cl_host *host, const char *filename,
		const char *options) {
	FILE *fp;
	size_t source_size;
	char *source_str;
	cl_int ret;

	cl_program program;

	fp = fopen(filename, "r");
	
	if(!fp) {
		fprintf(stderr, "Can't read file %s.\n", filename);
		abort();
	}
	source_str = malloc(MAX_SOURCE_SIZE);
	source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
	fclose(fp);
	if(source_size >= MAX_SOURCE_SIZE) {
		fprintf(stderr, "File %s is too big.\n", filename);
		abort();
	}

	program = clCreateProgramWithSource(host->context,
			1, (const char **) &source_str, &source_size, &ret);
	host->program = program;
	free(source_str);
	if(ret != CL_SUCCESS) {
		fprintf(stderr, "clCreateProgramWithSource returned %d.\n", ret);
		abort();
	}

	ret = clBuildProgram(program, 1, host->devices, options, NULL, NULL);

	if(ret != CL_SUCCESS) {
		char *log;
		size_t log_size;
		clGetProgramBuildInfo(program, host->devices[0],
			CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
		log = (char*) malloc(log_size+1);
		clGetProgramBuildInfo(program, host->devices[0],
			CL_PROGRAM_BUILD_LOG, log_size, log, NULL);
		fprintf(stderr, "Build failed :\n%s\n", log);
		abort();
	}

	CL_CHECK(clUnloadCompiler());
	return program;
}

int clReleaseHost(cl_host *host) {
	CL_CHECK(clReleaseProgram(host->program));
	CL_CHECK(clReleaseCommandQueue(host->command_queue));
	CL_CHECK(clReleaseContext(host->context));
	return 0;
}

/*
 * Compute optimal workgroup size.
 *
 * In order to be optimal, the workgroup size must be a multiple of the
 * maximum workgroup size.
 */
size_t optimal_workgroup_size(cl_host *host, uint nb_thread) {
	return nb_thread < host->max_wg_size ?
		nb_thread :
		host->max_wg_size;
}

/*
 * Compute the optimal global work size, in accordance with the optimal
 * workgroup size computed.
 */
size_t optimal_globalwork_size(cl_host *host, uint nb_thread) {
	if(nb_thread < host->max_wg_size)
		return nb_thread;
	else 
		return (int) ceil((double) nb_thread/host->max_wg_size)*host->max_wg_size;
}

