#ifndef GPUPPUR_GPUPPURAY_DEFAULT_IMPLEMENT
#define GPUPPUR_GPUPPURAY_DEFAULT_IMPLEMENT

/**
 *	@file	
 *	@brief	This is default implementation of GPUPPURay.
 *	@author	Tomohiro Matsumoto
 *
 */

#include <gpuppur/utility/begin_suppress_warnings_from_others_code.hpp>
#include <boost/operators.hpp>
#include <boost/ref.hpp>
#include <boost/bind.hpp>
#include <boost/function.hpp>
#include <stack>
#include <gpuppur/utility/end_suppress_warnings.hpp>

#include <gpuppur/3dmath/num_convert.hpp>
#include <gpuppur/texbuf/gl_tex_buf.hpp>
#include <gpuppur/texbuf/dx_tex_buf.hpp>
#include <gpuppur/shader/glsl.hpp>
#include <gpuppur/shader/hlsl.hpp>
#include <gpuppur/utility/lens.hpp>
#include <gpuppur/mesh/rectangle_opengl.hpp>
#include <gpuppur/mesh/rectangle_directx.hpp>
#include "gpuppuray.hpp"
#include "gpu_buf.hpp"
#include "ppu/nxphysics.hpp"
#include "cpu/cpu_raytracer.hpp"
#include "texture.hpp"

//#define BATCHED_QUERIES 0

namespace gpuppur
{

template
<
	class					LightingParamBuf=gpu_buf<>,
	class					ShaderClass=GLSL,
	class					Rectangle=rectangle_opengl,
	template<class> class	TextureImpl=gl_texbuf
>
class gpuppuray_default_implement
{
public:

	typedef
		gpuppuray_default_implement
		<
			LightingParamBuf,
			ShaderClass,
			Rectangle,
			TextureImpl
		>
		this_type;

template<class Base>
class c : public Base
{
public:
	typedef Base base;
	typedef c<Base> this_type;
	const static bool is_virtual = boost::is_polymorphic<Base>::value;

private:

	typedef physics<gpuppur::material>			raytracer;
//	typedef cpu_raytracer<gpuppur::material>	raytracer;

public:

	typedef LightingParamBuf	lp_buf;
	typedef
		typename boost::mpl::if_c
		<
			is_virtual,
			gpuppur::instance3d_generic,
			typename raytracer::instance3d_static
		>::type
		instance3d;

	typedef
		typename boost::mpl::if_c
		<
			is_virtual,
			gpuppur::mesh_generic,
			raytracer::mesh_static
		>::type
		mesh;

private:

	template<class Group>
	class inner_texture
	{
	public:
		typedef
		texture_implement<texbuf<Group, TextureImpl>, this_type>
		impl;

		typedef
		typename impl::texture_tmp
		tmp;
	};

public:

	template<class Group>
	struct texture
	{
		typedef
		typename boost::mpl::if_c
		<
			is_virtual,
			gpuppur::texture_generic<Group>,
			typename this_type::template inner_texture<Group>::impl::texture_static
		>::type type;
	};

private:

	class arg
	{
		int i, j, max_i, max_j;
		int step;
		typename LightingParamBuf::buf_iterator itr;

		arg
		(
			int i, int j,
			int max_i, int max_j,
			int step, typename LightingParamBuf::buf_iterator itr
		):
			i(i), j(j), max_i(max_i), max_j(max_j), step(step), itr(itr)
		{}

		friend 
		void this_type::adaptive_subsampling
		(
			const int								width,
			const int								i,
			const int								j,
			const int								step,
			const typename LightingParamBuf::buf_iterator&
													begin_itr,
			const vector3&							camera_side_dir
		);
	};

	void sampling
	(
		const int								j,
		const int								i,
		const vector3&							camera_side_dir,
		const typename lp_buf::buf_iterator&	iterator
	)
	{
		float x = (float)j/(this->width/2);
		float y = (float)i/(this->height/2);

		vector3 dir
		=
		//this->camera_front_dir + camera_side_dir*x + this->camera_up_dir*y;
		this->lens.get_ray_dir
		(
			this->camera_front_dir,
			camera_side_dir,
			this->camera_up_dir,
			x,
			y
		);

		float dir_dot_front = dir.innerProduct(this->camera_front_dir);
		float min_t = this->lens.get_near()/dir_dot_front;
		float max_t = this->lens.get_far()/dir_dot_front;

		ray r
		(
			this->camera_pos
			+
			this->lens.get_ray_local_pos
			(
				this->camera_front_dir,
				camera_side_dir,
				this->camera_up_dir,
				x,
				y
			)
			+
			dir*min_t,	//Behave like near cliping plane by moving ray origin.
			dir
		);

	#ifndef BATCHED_QUERIES 	//no batch query
		vector3 pos, normal;
		raytracer::instance3d_tmp ins(this->ppu.cast_ray(r, max_t, pos, normal));

		gpuppur::material back_ground(vector3(0.0f, 0.0f, 0.0f));
		const gpuppur::material* mtrl;
		if(!ins)
		{
			mtrl = &back_ground;
		}else
		{ 
			mtrl = &ins.get_user_data();
		}

	//	static float x = 0.0f;
	//	x = x >= 1.0f ? 0.0f : x+0.01f;
		x = 0.0f;
	//	normal = vector3(x, x, x);
	//	material = vector3(x, x, x);

		iterator.write(normal, mtrl->diffuse);
	#else	//batched queries
		this->ppu.cast_ray_batch
		(
			r, max_t, static_cast<int>(iterator-this->rendering_buf.get_iterator())
		);
	#endif
	}

	#ifdef BATCHED_QUERIES
	void casted_ray_report
	(
		const physics::instance3d_tmp ins,
		const vector3& pos,
		const vector3& normal,
		const int id
	)
	{
		const gpuppur::material* mtrl
		=
		reinterpret_cast<const gpuppur::material*>
		(
			ins.get_user_data()
		);

		(this->rendering_buf.get_iterator()+id).write(normal, mtrl->diffuse);
	}
	#endif

	//loop
	void _adaptive_subsampling
	(
		const int								width,
		const int								i,
		const int								j,
		const int								step,
		const typename lp_buf::buf_iterator&	begin_itr,
		const vector3&							camera_side_dir
	)
	{
		std::stack<arg>	stk;						//Stack to store state of loop
		arg crnt(i, j, i+step, j+step, step, begin_itr);	//State of current loop

		do
		{
			recur:
			;
			for(; crnt.i<crnt.max_i; crnt.i+=crnt.step)
			{
				for(; crnt.j<crnt.max_j; crnt.j+=crnt.step)
				{
					if
					(
						this->rendering_buf.is_similar
						(
							crnt.itr,
							crnt.itr+crnt.step,
							crnt.itr+width*crnt.step,
							crnt.itr+width*crnt.step+crnt.step
						)
					)
					{
						typename LightingParamBuf::buf_iterator base_itr(crnt.itr);
						typename LightingParamBuf::buf_iterator fill_itr(crnt.itr+width+1);
						for(int k=1; k<crnt.step; ++k)
						{
							for(int l=1; l<crnt.step+1; ++l)
							{
								fill_itr.write(base_itr);
							fill_itr.write_while();
								fill_itr+=1;
							}

							fill_itr+=(width-crnt.step);
						}
						for(int l=1; l<crnt.step; ++l)
						{
							fill_itr.write(base_itr);
						fill_itr.write_while();
							fill_itr+=1;
						}
					}else
					{
					//	stk.push(k, l, step/2, itr);
						this->sampling
						(
							crnt.j+crnt.step/2, crnt.i+crnt.step/2,
							camera_side_dir,
							crnt.itr+(width*crnt.step/2)+crnt.step/2
						);
						this->sampling
						(
							crnt.j+crnt.step, crnt.i+crnt.step/2,
							camera_side_dir,
							crnt.itr+(width*crnt.step/2)+crnt.step
						);
						this->sampling
						(
							crnt.j+crnt.step/2, crnt.i+crnt.step,
							camera_side_dir,
							crnt.itr+(width*crnt.step)+crnt.step/2
						);

						if(crnt.step != 2)
						{
							stk.push
							(
								arg
								(
									crnt.i, crnt.j+crnt.step,
									crnt.max_i, crnt.max_j,
									crnt.step, crnt.itr+crnt.step
								)
							);

							crnt =
							arg(
								crnt.i, crnt.j,
								crnt.i+crnt.step, crnt.j+crnt.step,
								crnt.step/2, crnt.itr
							);
							goto recur;
						}
					}
					crnt.itr+=crnt.step;
				}
				crnt.itr+=(width*crnt.step)-crnt.step*2;
				crnt.j=crnt.max_j-crnt.step*2;
			}

			if(stk.size() == 0)
			{
				break;
			}else
			{
				crnt = stk.top();
				stk.pop();
			}
		}while(1);
	}

	struct inner
	{
		static void inner_func
		(
			const int								width,
				  int								i,
				  int								j,
			const int								max_i,
			const int								max_j,
			const int								step,
			const typename lp_buf::buf_iterator&	begin_itr,
			const vector3&							camera_side_dir,
			this_type&								that
		)
		{
			typename lp_buf::buf_iterator itr(begin_itr);

			for(; i<max_i; i+=step)
			{
				for(; j<max_j; j+=step)
				{
					if
					(
						that.rendering_buf.is_similar
						(
							itr,
							itr+step,
							itr+width*step,
							itr+width*step+step
						)
					)
					{
						typename lp_buf::buf_iterator base_itr(itr);
						typename lp_buf::buf_iterator fill_itr(itr+width+1);
						for(int k=1; k<step; ++k)
						{
							for(int l=1; l<step+1; ++l)
							{
								fill_itr.write(base_itr);
						//	fill_itr.write_while();
								fill_itr+=1;
							}

							fill_itr+=(width-step);
						}
						for(int l=1; l<step; ++l)
						{
							fill_itr.write(base_itr);
					//	fill_itr.write_while();
							fill_itr+=1;
						}
					}else
					{
						that.sampling
						(
							j+step/2, i+step/2,
							camera_side_dir,
							itr+(width*step/2)+step/2
						);
						that.sampling
						(
							j+step, i+step/2,
							camera_side_dir,
							itr+(width*step/2)+step
						);
						that.sampling
						(
							j+step/2, i+step,
							camera_side_dir,
							itr+(width*step)+step/2
						);

						if(step != 2)
						{
							inner::inner_func
							(
								width,
								i, j,
								i+step, j+step,
								step/2, itr,
								camera_side_dir,
								that
							);
						}
					}
					itr+=step;
				}
				itr+=(width*step)-step*2;
				j=max_j-step*2;
			}
		}
	};	//end of struct inner

	// recursive
	void adaptive_subsampling
	(
		const int								width,
		const int								i,
		const int								j,
		const int								step,
		const typename lp_buf::buf_iterator&	begin_itr,
		const vector3&							camera_side_dir
	)
	{

		inner::inner_func
		(
			width,
			i, j,
			i+step, j+step,
			step, begin_itr,
			camera_side_dir, *this
		);
	}

protected:

	c():
		base_sampling_step(1)
	{
	}

	~c()
	{
	}

	void uninitialize()
	{
		this->rendering_buf.uninitialize();
		this->ppu.uninitialize();
		this->programmable_shader.uninitialize();
		this->rectangle.uninitialize();

		base::uninitialize();
	}

	bool initialize
	(
		std::size_t							width,
		std::size_t							height,
		typename base::context_param_type	context
	)
	{
		if(!base::initialize(width, height, context))
		{
			return this->failed();
		}

		if(!this->rendering_buf.initialize(width, height, context))
		{
			return this->failed();
		}

		if
		(
			!this->programmable_shader.initialize
			(
				std::wstring(L"../data/shader_ray/vert_shader"),
				std::wstring(L"../data/shader_ray/frag_shader"),
				context
			)
		)
		{
			std::cerr << "Failed to initialize programmable_shader" << std::endl;
			return this->failed();
		}

		this->programmable_shader.bind_shader();
		this->view_mat_handle
		=
		this->programmable_shader.get_uniform_handle("view_mat");
		if(!this->view_mat_handle)
		{
			std::cerr << "Not uniform value \"view_mat\" in shader :p" << std::endl;
		}

		this->normal_mat_handle
		=
		this->programmable_shader.get_uniform_handle("normal_mat");
		if(!this->normal_mat_handle)
		{
			std::cerr << "Not uniform value \"normal_mat\" in shader :p" << std::endl;
		}

		this->rendering_buf.bind_to_shader(this->programmable_shader);

		if(!this->ppu.initialize())
		{
			std::cerr << "Failed to initialize physics." << std::endl;
			return this->failed();
		}

		#ifdef BATCHED_QUERIES
		this->ppu.set_casted_ray_func
		(
			boost::bind
			(
				&this_type::casted_ray_report,
				boost::ref(*this),
				_1, _2, _3, _4
			)
		);
		#endif

		if(!this->rectangle.initialize(context))
		{
			std::cerr << "Failed to initialize rectangle." << std::endl;
			return this->failed();
		}

//		this->rectangle.set_texture_size(static_cast<float>(width), static_cast<float>(height));
		this->rectangle.set_state(context);

		return true;
	}

	void process()
	{
		this->ppu.process();

		vector3 camera_side_dir
		=
		this->camera_front_dir.outerProduct(this->camera_up_dir);
		camera_side_dir.normalize();

		this->rectangle.set_state(this->context);

		this->rendering_buf.lock_for_write();
		typename lp_buf::buf_iterator iterator = this->rendering_buf.get_iterator();

		const int width = static_cast<int>(this->width);
		const int height = static_cast<int>(this->height);

		int half_w = static_cast<int>(width/2);
		int half_h = static_cast<int>(height/2);

		assert(	width % base_sampling_step==0
				&&
				this->height % base_sampling_step==0
				&&
				(base_sampling_step & (base_sampling_step-1)) == 0
				&&
				base_sampling_step > 0
				&&
				"This adaptive sampling implement currently works "
				"only if "
				"this->width%base_sampling_step==0 "
				"&& this->height%base_sampling_step==0 "
				"&& base_sampling_step is power of 2 "
				"&& base_sampling_step > 0.");

		int step = base_sampling_step;
		for(int i=-half_h; ; i+=step)
		{
			for(int j=-half_w; j<half_w; j+=step)
			{
				this->sampling(j, i, camera_side_dir, iterator);
				iterator+=step;
			}

			if(i==half_h-step)
			{
				break;
			}
			iterator += width*(step-1);
		}

		//Adaptive Subsamplin.
		iterator = this->rendering_buf.get_iterator();
	//	stk.push(-half_h, -half_w, -half_h, -half_w, step, iterator);

		if(this->base_sampling_step>1)
		{
			for(int i=-half_h; i<half_h-step; i+=step)
			{
				for(int j=-half_w; j<half_w-step; j+=step)
				{
					this->adaptive_subsampling
					(
						width,
						i, j, step,
						iterator,
						camera_side_dir
					);
					iterator += step;
				}
				iterator += (width*(step-1))+step;
			}
		}

	#ifdef BATCHED_QUERIES
		this->ppu.cast_ray_execute();
	#endif

		this->rendering_buf.unlock();

	//	static Timer timer(8);
	//	timer.start();

		this->rendering_buf.activate(this->context);
		matrix4x4 view_matrix =
		get_view_matrix<float, false>
		(
			this->camera_pos,
			this->camera_front_dir,
			this->camera_up_dir
		);
		this->programmable_shader.set_uniform
		(
			this->view_mat_handle,
			view_matrix
		);

		this->programmable_shader.set_uniform
		(
			this->normal_mat_handle,
			get_normal_matrix<float, false>
			(
				view_matrix
			)
		);
	//	std::cout << view_matrix << std::endl;
	//	std::cout << get_normal_matrix<float, false>(view_matrix) << std::endl;

	//	timer.report();

		this->rectangle.draw(this->context);
	}

public:

	mesh create_mesh
	(
		const std::vector<vector3>& vertices,
		const std::vector<unsigned short>& triangles
	)
	{
		return mesh
		(
			this->ppu.load_mesh(vertices, triangles)
		);
	}

	mesh create_mesh_from_wavefront(const std::string& filename)
	{
		return mesh
		(
			this->ppu.load_mesh_from_wavefront(filename)
		);
	}

	mesh create_mesh_from_cooked(const std::string& filename)
	{
		
		return mesh
		(
			this->ppu.load_mesh_from_cooked(filename)
		);
	}

	instance3d create_instance
	(
		mesh handle,
		const vector3& position,
		const gpuppur::material& mtrl
	)
	{
		return instance3d
		(
			this->ppu.create_mesh
			(
				handle,
				position,
				mtrl
			)
		);
	}

	instance3d create_sphere
	(
		float radius,
		const vector3& position,
		const gpuppur::material& mtrl
	)
	{
		return instance3d
		(
			this->ppu.create_sphere
			(
				radius,
				position,
				mtrl
			)
		);
	}

	template<class Group>
	typename texture<Group>::type create_texture
	(
		std::size_t width,
		std::size_t height
	)
	{
		std::auto_ptr<texbuf<Group, TextureImpl> > tmp
		(
			new texbuf<Group, TextureImpl>()
		);

		if(!tmp->initialize(width, height, 2, this->context))
		{
			return texture<Group>::type();
		}

		tmp->activate(this->context);
		this->programmable_shader.set_uniform_texture("env_tex", *tmp);

		return texture<Group>::type
		(
			inner_texture<Group>::tmp(*tmp.release())
		);
	}

public:

	raytracer& get_ppu()
	{
		return this->ppu;
	}

	void set_sampling_step(const int step)
	{
		this->base_sampling_step = step;
	}
/*
	gpuppur::lens<>& get_lens()
	{
		return this->lens;
	}
*/
	void set_viewport
	(
		int x,
		int y,
		int width,
		int height
	)
	{
		this->rectangle.set_viewport
		(
			x, y, width, height, this->context
		);
	}

	void set_projection
	(
		float left,
		float right,
		float bottom,
		float top,
		float neear,
		float faar,
		bool  is_perspective=true
	)
	{
		this->lens = gpuppur::lens<>
		(
			left,
			right,
			bottom,
			top,
			neear,
			faar,
			is_perspective
		);
	}

protected:

	lp_buf									rendering_buf;
	ShaderClass								programmable_shader;
	typename ShaderClass::uniform_handle	view_mat_handle;
	typename ShaderClass::uniform_handle	normal_mat_handle;
	Rectangle								rectangle;
	raytracer								ppu;
	gpuppur::lens<>							lens;
	int										base_sampling_step;
};
};


class gpuppuray_default_implement_with_opengl
{
public:
	typedef gpuppuray_default_implement
	<
		gpu_buf<gl_texbuf, /*gl_pbo_sync_buf/*/gl_texbuf/**/>,
		GLSL, rectangle_opengl, gl_texbuf
	> def_impl;
template<class Base>
class c : public def_impl::c<Base>
{
public:

	typedef def_impl::c<Base>	base;
	typedef boost::mpl::void_	context_type;

	using base::initialize;

	bool initialize(std::size_t width, std::size_t height, void*)
	{
		return base::initialize(width, height, boost::mpl::void_());
	}
};
};


#ifndef D3D_SDK_VERSION

class gpuppuray_default_implement_with_directx
{
	template<class Base>
	struct c
	{
		typedef void* context_type;
	};
};

#else

class gpuppuray_default_implement_with_directx
{
public:
	typedef gpuppuray_default_implement
	<
		gpu_buf<dx_texbuf, dx_texbuf>, HLSL, rectangle_directx, dx_texbuf
	> def_impl;
template<class Base>
class c : public def_impl::c<Base>
{
public:

	typedef def_impl::c<Base>	base;
	typedef IDirect3DDevice9*	context_type;

	using base::initialize;

	bool initialize(std::size_t width, std::size_t height, void* context)
	{
		return base::initialize
		(
			width, height, reinterpret_cast<IDirect3DDevice9*>(context)
		);
	}
};
};

#endif

///GPUPPUR implementation with GPUPPURay and OpenGL. Runtime polymorphic. Instance of this class must have vtable.
typedef gpuppur::GPUPPUR
		<
			gpuppur::gpuppuray
			<
				gpuppuray_default_implement_with_opengl::c
			>::c
		>::c<gpuppur::tail<gpuppur::gpuppuray_virtual> >
		gl_raytracer;
typedef gl_raytracer gl_raytracer_virtual;

///GPUPPUR implementation with GPUPPURay and OpenGL. Compile time polymorphic. Instance of this class must not have vtable.
typedef gpuppur::GPUPPUR
		<
			gpuppur::gpuppuray
			<
				gpuppuray_default_implement_with_opengl::c
			>::c
		>::c<gpuppur::tail<boost::mpl::void_> >
		gl_raytracer_static;

///GPUPPUR implementation with GPUPPURay and DirectX9. Runtime polymorphic. Instance of this class must have vtable.
typedef gpuppur::GPUPPUR
		<
			gpuppur::gpuppuray
			<
				gpuppuray_default_implement_with_directx::c
			>::c
		>::c
		<
			gpuppur::tail
			<
				gpuppur::gpuppuray_virtual
			//	boost::mpl::void_
			>
		>
		dx_raytracer;
typedef dx_raytracer dx_raytracer_virtual;

///GPUPPUR implementation with GPUPPURay and OpenGL. Compile time polymorphic. Instance of this class must not have vtable.
typedef gpuppur::GPUPPUR
		<
			gpuppur::gpuppuray
			<
				gpuppuray_default_implement_with_directx::c
			>::c
		>::c
		<
			gpuppur::tail
			<
				boost::mpl::void_
			>
		>
		dx_raytracer_static;

}	// end of namespace gpuppur

#endif
