/*************************************************************************************************/
/*!
   	@file		pp_cpp_texture_weight4.h
	@author 	Fanzo
 	@date 		2008/4/20
*/
/*************************************************************************************************/
#pragma		once

///////////////////////////////////////////////////////////////////////////////////////////////////
//include files


#pragma pack( push , 8 )		//set align

namespace icubic
{
using namespace icubic;

//=================================================================================================
// weight4
//=================================================================================================
//=================================================================================================
cb_inline
void weight4_frgb
		(
		pp_pixel_calc*	c , 
		pp_pixel_calc	c1 , 
		pp_pixel_calc	c2 , 
		uint8			d , 
		const float*	weighttbl
		)
{
	// c=tbl[d]*c1 + tbl[1-d]*c2
	MulPixel_frgb( &c1 , weighttbl[ d ] );
	MulPixel_frgb( &c2 , weighttbl[ 256 - d ] );
	AddPixel_frgb( &c1 , c2 );
	*c	= c1;
}
//=================================================================================================
cb_inline
void weight4_frgba
		(
		pp_pixel_calc*	c , 
		pp_pixel_calc	c1 , 
		pp_pixel_calc	c2 , 
		uint8			d , 
		const float*	weighttbl
		)
{
	// c=tbl[d]*c1 + tbl[1-d]*c2
	MulPixel_frgba( &c1 , weighttbl[ d ] );
	MulPixel_frgba( &c2 , weighttbl[ 256 - d ] );
	AddPixel_frgba( &c1 , c2 );
	*c	= c1;
}
//=================================================================================================
cb_inline
void weight4_fa
		(
		pp_pixel_calc*	c , 
		pp_pixel_calc	c1 , 
		pp_pixel_calc	c2 , 
		uint8			d , 
		const float*	weighttbl
		)
{
	// c=tbl[d]*c1 + tbl[1-d]*c2
	MulPixel_fa( &c1 , weighttbl[ d ] );
	MulPixel_fa( &c2 , weighttbl[ 256 - d ] );
	AddPixel_fa( &c1 , c2 );
	*c	= c1;
}

//=================================================================================================
// texture weight4
//=================================================================================================

//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_repeat_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_clamp_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		
		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
// texture weight4 alpha
//=================================================================================================

//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgb( &cc , alpha );

		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgb( &cc , alpha );

		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_fa( &cc , alpha );

		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_fa( &cc , alpha );

		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_repeat_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_repeat( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_fa( &cc , alpha );

		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_rgb_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgb( &cc , alpha );

		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_rgb_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_rgb_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgb( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgb( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgb( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgb( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgb( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgb( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgb( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgb( &cc , alpha );

		// store		
		frgb_to_rgb( &cc , cc );
		StoreAddr_rgb( dest , cc );
		
		dest = AddPixelSize_rgb( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_rgba_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_rgba_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_rgba_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_frgba( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_frgba( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_frgba( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_frgba( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_frgba( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_frgba( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_frgba( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_frgba( &cc , alpha );

		// store		
		frgba_to_rgba( &cc , cc );
		StoreAddr_rgba( dest , cc );
		
		dest = AddPixelSize_rgba( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_a_rgb
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgb( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgb() );
		LoadAddr_rgb( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgb() );
		
		// convert ppformat
		rgb_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgb_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgb_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgb_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_fa( &cc , alpha );

		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_a_rgba
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_rgba( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_rgba() );
		LoadAddr_rgba( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_rgba() );
		
		// convert ppformat
		rgba_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		rgba_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		rgba_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		rgba_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_fa( &cc , alpha );

		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}
//=================================================================================================
cb_inline
void pp_cpp_texture_weight4f_m_alpha_clamp_a_a
		(
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		const float*	weighttbl
		)
{
	int32	su	= ( int32 )( ssu * 0x10000 );
	int32	sv	= ( int32 )( ssv * 0x10000 );
	int32	tu	= ( int32 )( ttu * 0x10000 );
	int32	tv	= ( int32 )( ttv * 0x10000 );
	
	int32	du	= ( tu - su ) / len;
	int32	dv	= ( tv - sv ) / len;
	su	+= ( du >> 1 );
	sv	+= ( dv >> 1 );

	pp_pixel_calc	c[ 4 ];
	const uint8	*psrc = ( const uint8* )src;

	int			off;
	for( off = 0; off < len ; off++ )
	{
		// uv
		int32	x[2] , y[2];
		uint8	dx , dy;
		WrapNear2_clamp( x , y , &dx , &dy , su , sv , src_w , src_h );
		
		// load pixels
		LoadAddr_a( &c[ 0 ] , psrc + y[0] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 1 ] , psrc + y[0] * src_pitchbyte + x[1] * PixelSize_a() );
		LoadAddr_a( &c[ 2 ] , psrc + y[1] * src_pitchbyte + x[0] * PixelSize_a() );
		LoadAddr_a( &c[ 3 ] , psrc + y[1] * src_pitchbyte + x[1] * PixelSize_a() );
		
		// convert ppformat
		a_to_fa( &c[ 0 ] , b_color , c[ 0 ] );
		a_to_fa( &c[ 1 ] , b_color , c[ 1 ] );
		a_to_fa( &c[ 2 ] , b_color , c[ 2 ] );
		a_to_fa( &c[ 3 ] , b_color , c[ 3 ] );
		
		// weight
		pp_pixel_calc	cc , c1 , c2;
		weight4_fa( &c1 , c[ 0 ] , c[ 1 ] , dx , weighttbl );
		weight4_fa( &c2 , c[ 2 ] , c[ 3 ] , dx , weighttbl );
		weight4_fa( &cc , c1 , c2 , dy , weighttbl );
		MulAlpha_fa( &cc , alpha );

		// store		
		fa_to_a( &cc , cc );
		StoreAddr_a( dest , cc );
		
		dest = AddPixelSize_a( dest );
		su += du;
		sv += dv;
	}	
}

//=================================================================================================
//!	texture weight4f
//!	@retval			---
//-------------------------------------------------------------------------------------------------
cb_inline
void pp_cpp_texture_weight4f_m
		(
		pp_format		destformat , 
		void*			dest , 
		int32			len , 
		float			ssu , 
		float			ssv , 
		float			ttu , 
		float			ttv , 
		pp_format		srcformat , 
		const void*		src , 
		int32			src_pitchbyte , 
		int16			src_w , 
		int16			src_h , 
		const pp_color&	b_color , 
		const pp_alpha&	alpha , 
		pp_wraptype		wrap , 
		const float*	weighttbl
		)
{
	typedef void (*func)( void* , int32 , float , float , float , float , const void* , int32 , int16 , int16 , const pp_color& , const float* );
	typedef void (*func_a)( void* , int32 , float , float , float , float , const void* , int32 , int16 , int16 , const pp_color& , const pp_alpha& , const float* );
	static
	func	funclist[2][4][4] = 
	{
		{
			{
			pp_cpp_texture_weight4f_m_repeat_rgb_rgb , 
			pp_cpp_texture_weight4f_m_repeat_rgb_rgba , 
			pp_cpp_texture_weight4f_m_repeat_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_repeat_rgba_rgb , 
			pp_cpp_texture_weight4f_m_repeat_rgba_rgba , 
			pp_cpp_texture_weight4f_m_repeat_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_repeat_a_rgb , 
			pp_cpp_texture_weight4f_m_repeat_a_rgba , 
			pp_cpp_texture_weight4f_m_repeat_a_a , 
			} , 
		} , 
		{
			{
			pp_cpp_texture_weight4f_m_clamp_rgb_rgb , 
			pp_cpp_texture_weight4f_m_clamp_rgb_rgba , 
			pp_cpp_texture_weight4f_m_clamp_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_clamp_rgba_rgb , 
			pp_cpp_texture_weight4f_m_clamp_rgba_rgba , 
			pp_cpp_texture_weight4f_m_clamp_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_clamp_a_rgb , 
			pp_cpp_texture_weight4f_m_clamp_a_rgba , 
			pp_cpp_texture_weight4f_m_clamp_a_a , 
			} , 
		} , 
	};
	static
	func_a	funclist_a[2][4][4] = 
	{
		{
			{
			pp_cpp_texture_weight4f_m_alpha_repeat_rgb_rgb , 
			pp_cpp_texture_weight4f_m_alpha_repeat_rgb_rgba , 
			pp_cpp_texture_weight4f_m_alpha_repeat_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_alpha_repeat_rgba_rgb , 
			pp_cpp_texture_weight4f_m_alpha_repeat_rgba_rgba , 
			pp_cpp_texture_weight4f_m_alpha_repeat_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_alpha_repeat_a_rgb , 
			pp_cpp_texture_weight4f_m_alpha_repeat_a_rgba , 
			pp_cpp_texture_weight4f_m_alpha_repeat_a_a , 
			} , 
		} , 
		{
			{
			pp_cpp_texture_weight4f_m_alpha_clamp_rgb_rgb , 
			pp_cpp_texture_weight4f_m_alpha_clamp_rgb_rgba , 
			pp_cpp_texture_weight4f_m_alpha_clamp_rgb_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_alpha_clamp_rgba_rgb , 
			pp_cpp_texture_weight4f_m_alpha_clamp_rgba_rgba , 
			pp_cpp_texture_weight4f_m_alpha_clamp_rgba_a , 
			} , 
			{
			pp_cpp_texture_weight4f_m_alpha_clamp_a_rgb , 
			pp_cpp_texture_weight4f_m_alpha_clamp_a_rgba , 
			pp_cpp_texture_weight4f_m_alpha_clamp_a_a , 
			} , 
		} , 
	};
	if( alpha.a == 256 )
		( funclist[ wrap ][ destformat ][ srcformat ] )( dest , len , ssu , ssv , ttu , ttv , src , src_pitchbyte , src_w , src_h , b_color , weighttbl );
	else
		( funclist_a[ wrap ][ destformat ][ srcformat ] )( dest , len , ssu , ssv , ttu , ttv , src , src_pitchbyte , src_w , src_h , b_color , alpha , weighttbl );
}

};	//namespace

//using namespace icubic;		

#pragma pack( pop )			//release align

