// vim: set filetype=c:

/*
 * This is the GPU-side of GPU computation of Euler and Velocity-Verlet
 * methods.
 *
 * Each kernel (a special function that can be called by the host) has a bunch
 * of parameters flagged as _global. These parameters can be forwarded to or
 * from the host. Generally, we use this set of parameter:
 * - planets_position_input
 * - planets_position_output
 * - planets_velocity_input
 * - planets_velocity_output
 * - planets_accel_input (only for Verlet)
 * - planets_accel_output (only for Verlet)
 * - planets_mass
 * planets_mass should be constant through the computations.
 * Parameters suffixed by _input are the parameters at the current step. When
 * a kernel is called, it puts the values of the next iteration on the
 * _output-suffixed parameters.
 * Then, copy from _output-suffixed values to _input-suffixed ones must be done
 * from the host.
 * Verlet requires to store acceleration because we need to take the mean
 * between the current-step and the next-step acceleration. However, we do not
 * compute acceleration more with Verlet than with Euler.
 *
 * There is one kernel for Euler called 'step_euler'. It computes the values
 * of the position and velocity for the next step.
 *
 * There are three kernels for Velocity-Verlet:
 * - init_vverlet does the initialization step
 * - step1_vverlet and step2_vverlet, when called one after the other perform
 * the computation of one iteration of Velocity-Verlet. We have to do so
 * because we cannot synchronize work-items outside workgroups.
 */
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#else
#ifdef cl_amd_fp64
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#endif
#endif

#ifndef NB_PLANET
#define NB_PLANET 1
#endif // NB_PLANET

#ifndef STEPSIZE
#define STEPSIZE 1
#endif // STEPSIZE

#ifndef G
#define G 6.67398E-11
#endif // G

#ifndef EPSILON
#define EPSILON 0
#endif // EPSILON

static inline double square(double x) {
	return x*x;
}

/*
 * Compute acceleration and return it (used by Euler).
 */
static inline double3 compute_accel(
		uint planet,
		__global double3 *planets_position,
		__global const double *planets_mass
		) {
	uint i;
	double3 vect; // vector between the computed planet and one other
	double3 accel = 0;
	for(i = 0; i < NB_PLANET; i++) {
		if(i == planet)
			continue;
		vect = planets_position[i]
			- planets_position[planet];
		
		accel += G * planets_mass[i] * vect * 1.0/ (pow(length(vect),3.0) + EPSILON);
	}
	return accel;
}

/*
 * Compute acceleration and put it into the __global array *accel (used by Verlet).
 */
static inline void compute_global_accel(
		uint planet,
		__global double3 *planets_position,
		__global const double *planets_mass,
		__global double3 *accel
		) {
	uint i;
	double3 vect; // vector between the computed planet and one other
	accel[planet] = 0;
	for(i = 0; i < NB_PLANET; i++) {
		if(i == planet)
			continue;
		vect = planets_position[i]
			- planets_position[planet];
		
		accel[planet] += G * planets_mass[i] * vect * 1 / (pow(length(vect),3.0) + EPSILON);
	}
	return;
}

/*
 * Compute one step of Euler.
 * 
 * We use the _input-suffixed values as current step, and store the next
 * step in the _output-suffixed values.
 */
__kernel void step_euler(
		__global double3 *planets_position_input,
		__global double3 *planets_velocity_input,
		__global double3 *planets_position_output,
		__global double3 *planets_velocity_output,
		__global const double *planets_mass
		) {
	uint planet = get_global_id(0);
	double3 accel;
	if(planet < NB_PLANET) {
		accel = compute_accel(
				planet,
				planets_position_input,
				planets_mass);

		planets_velocity_output[planet] =
			planets_velocity_input[planet] + accel*STEPSIZE;
		planets_position_output[planet] =
			planets_position_input[planet] +
			planets_velocity_input[planet]*STEPSIZE;
	}
	return;
}

/*
 * First part of a Velocity-Verlet step.
 *
 * We compute the position and the acceleration for the next step.
 * We can do that because next-step position and acceleration depend only
 * on current-step values.
 */
__kernel void step1_vverlet(
		__global double3 *planets_position_input,
		__global double3 *planets_velocity_input,
		__global double3 *planets_accel_input,
		__global double3 *planets_position_output,
		__global double3 *planets_velocity_output,
		__global double3 *planets_accel_output,
		__global const double *planets_mass
		) {
	uint planet = get_global_id(0);
	if(planet < NB_PLANET) {
		planets_position_output[planet] =
			planets_position_input[planet] +
			planets_velocity_input[planet]*STEPSIZE +
			planets_accel_input[planet]*square(STEPSIZE);

		compute_global_accel(
				planet,
				planets_position_output,
				planets_mass,
				planets_accel_output);
	}
}

/*
 * Second part of a Velocity-Verlet step.
 *
 * We compute the velocity.
 * We need to separate this part, because next-step velocity depends on
 * current-step and next-step acceleration. Since we can't synchronize
 * between two workgroups outside a kernel, the synchronization has to be done
 * on the CPU-side.
 */
__kernel void step2_vverlet(
		__global double3 *planets_position_input,
		__global double3 *planets_velocity_input,
		__global double3 *planets_accel_input,
		__global double3 *planets_position_output,
		__global double3 *planets_velocity_output,
		__global double3 *planets_accel_output,
		__global const double *planets_mass
		) {
	uint planet = get_global_id(0);
	if(planet < NB_PLANET) {
		planets_velocity_output[planet] =
			planets_velocity_input[planet] +
			0.5 * (planets_accel_input[planet] + planets_accel_output[planet])*STEPSIZE;
	}
}

/*
 * Initialize Velocity-Verlet method.
 *
 * We only compute acceleration to start the algorithm.
 */
__kernel void init_vverlet(
		__global double3 *planets_position_input,
		__global double3 *planets_velocity_input,
		__global double3 *planets_accel_input,
		__global double3 *planets_position_output,
		__global double3 *planets_velocity_output,
		__global double3 *planets_accel_output,
		__global const double *planets_mass
		) {
	uint planet = get_global_id(0);
	if(planet < NB_PLANET) {
		compute_global_accel(
				planet,
				planets_position_output,
				planets_mass,
				planets_accel_output);
	}
}
