/*
 * MGL -- MobileGear Graphic Library -
 * Copyright (C) 1998, 1999
 *      Koji Suzuki (suz@at.sakura.ne.jp)
 *      Yukihiko Sano (yukihiko@yk.rim.or.jp)
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY KOJI SUZUKI AND YUKIHIKO SANO ``AS IS'' AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE TERRENCE R. LAMBERT BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 *
 */
#define MGL_PREFIX
#include "config.h"
#include "mgl2.h"
#include "draw_engine.h"
#include <stdlib.h>

#ifdef USE_LOCAL_MEMSET
#define memset	mgl_memset
#endif
#ifdef USE_LOCAL_MEMMOVE
#define memmove	mgl_memmove
#endif

static int lp_mask_data[4] = {0x00000000, 0x55555555, 0xaaaaaaaa, 0xffffffff};

#define lp_color	(s->_pen_color.local_color)
#define lp_mask		(*(int *)(&(s->_pen_color.opt)))
#define lp_dodither	(s->_pen_color.flags)

#ifndef NULL
#define NULL ((void *)0)
#endif

static struct draw_engine dec4rr;

/*
  0 %       20%       40%       60%    80%     100%
  .....    #....    #..#.    #.##.    ####.    #####
  .....    ..#..    ..#.#    #.#.#    #.###    #####

*/
static const int dither_map[2][5][16] = {
{  {3,2,2,2,2   , 2,1,1,1,1,  1,0,0,0,0,  0},
   {3,3,3,3,2   , 2,2,2,2,1,  1,1,1,1,0,  0},
   {3,3,3,2,2   , 2,2,2,1,1,  1,1,1,0,0,  0},
   {3,3,2,2,2   , 2,2,1,1,1,  1,1,0,0,0,  0},
   {3,3,3,3,3   , 2,2,2,2,2,  1,1,1,1,1,  0},
}, {
   {3,3,3,2,2   , 2,2,2,1,1,  1,1,1,0,0,  0},
   {3,3,3,3,3   , 2,2,2,2,2,  1,1,1,1,1,  0},
   {3,2,2,2,2   , 2,1,1,1,1,  1,0,0,0,0,  0},
   {3,3,3,3,2   , 2,2,2,2,1,  1,1,1,1,0,  0},
   {3,3,2,2,2   , 2,2,1,1,1,  1,1,0,0,0,  0},
}
};

static const int dither_mask_map[2][5][16] = {
{
{0x00,0x55,0x55,0x55,0x55,0x55,0xaa,0xaa
,0xaa,0xaa,0xaa,0xff,0xff,0xff,0xff,0xff}
,{0x00,0x00,0x00,0x00,0x55,0x55,0x55,0x55
,0x55,0xaa,0xaa,0xaa,0xaa,0xaa,0xff,0xff}
,{0x00,0x00,0x00,0x55,0x55,0x55,0x55,0x55
,0xaa,0xaa,0xaa,0xaa,0xaa,0xff,0xff,0xff}
,{0x00,0x00,0x55,0x55,0x55,0x55,0x55,0xaa
,0xaa,0xaa,0xaa,0xaa,0xff,0xff,0xff,0xff}
,{0x00,0x00,0x00,0x00,0x00,0x55,0x55,0x55
,0x55,0x55,0xaa,0xaa,0xaa,0xaa,0xaa,0xff}
},{
{0x00,0x00,0x00,0x55,0x55,0x55,0x55,0x55
,0xaa,0xaa,0xaa,0xaa,0xaa,0xff,0xff,0xff}
,{0x00,0x00,0x00,0x00,0x00,0x55,0x55,0x55
,0x55,0x55,0xaa,0xaa,0xaa,0xaa,0xaa,0xff}
,{0x00,0x55,0x55,0x55,0x55,0x55,0xaa,0xaa
,0xaa,0xaa,0xaa,0xff,0xff,0xff,0xff,0xff}
,{0x00,0x00,0x00,0x00,0x55,0x55,0x55,0x55
,0x55,0xaa,0xaa,0xaa,0xaa,0xaa,0xff,0xff}
,{0x00,0x00,0x55,0x55,0x55,0x55,0x55,0xaa
,0xaa,0xaa,0xaa,0xaa,0xff,0xff,0xff,0xff}
}
};

static struct screen *dec4rr_create_memscreen(int xs,int ys,char *bitmap,int op) {
	struct screen *ret;
	int wb;
	int type = STK_GENERIC_4COLOR | ST_ALLOCED;

	wb = (xs+3)/4;
	//wb = (wb + 3) & ~3;	/* align to int */

	ret = (struct screen *)de_create_memscreen(xs,ys,bitmap,op);
	if (!ret) return NULL;

	if (!bitmap) {
		bitmap = (char *)malloc(ys * wb);
        	if(!bitmap){
			perror("malloc");
			return (struct screen *)NULL;
        	}
		type |= ST_ALLOCED_BITMAP;
		memset(bitmap,0,ys * wb);
	}

	ret->type |= type;
	ret->de = &dec4rr;
	ret->wbytes = wb;
	ret->bitmap = bitmap;
	return ret;
}

static struct screen *dec4rr_create_subscreen(struct screen *org, int x, int y,
                                int xs, int ys,int opt) {
	struct screen *ret;

	ret = de_create_subscreen(org,x,y,xs,ys,opt);
	if (!ret) return NULL;

	if (opt & CSS_AS_MEMSCREEN) {
		ret->type &= ~ST_SUBSCREEN;
		ret->bitmap = org->bitmap + (ret->off_y * ret->wbytes)
			+ (ret->off_x /4);
		ret->off_y = 0;
		ret->off_x = 0;
	}
	return ret;
}

static void dec4rr_free_screen(struct screen *s) {
	de_free_screen(s);
}


static int dec4rr_get_pixel(struct screen *s,int x, int y,int op) {
	unsigned char *p;
	int col,ret;

	if (x < 0 || x >= s->width
	   || y < 0 || y >= s->height) return 0;

	x += s->off_x;
	y += s->off_y;
	if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;		
	} else {
		p = (char *)s->bitmap;
	}
	p += y * s->wbytes + (x>>2);

	col = (*p >> (6 - ((x & 3)*2))) & 3;
	ret = CONV_FROM_COL4(col);
	if ((op & BLT_MASKING) && (ret == (op & BLT_MASKCOL))) {
		ret = COLOR_TRANSPARENT;
//printf("dec4rr_get_pixel (%d,%d) = %08x \n",x,y,ret);
	}
//printf("dec4rr_get_pixel (%d,%d) = %d %08x\n",x,y,ret & 0xfff,p);
	return ret;
}

static void dec4rr_put_pixel(struct screen *s,int x, int y, int col) {
	char *p;
	int mask;

	if (col & COLOR_TRANSPARENT) return;
	if (col < 0) return;

	CLIP_POINT(s,x,y);

	x += s->off_x;
	y += s->off_y;
	if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	} else {
		p = (char *)s->bitmap;
	}
	p += y * s->wbytes + (x>>2);

	mask = 0xc0 >> ((x & 3) * 2);
	if (col & COLOR_DITHER) {
		int dx = (x + (y>>2) + (y>>4))%5;
		int dy = y % 2;
		col = dither_map[dy][dx][col & 0xf];
		*p = (*p & ~mask)|(lp_mask_data[col] & mask);
 	} else {
	    *p = (*p & ~mask)|(lp_mask_data[CONV_TO_COL4(col)] & mask);
 	}
	return;
}

static void dec4rr_get_pixstream(struct screen *s,int x, int y,int *buf
		,int length,int dir,int op,struct draw_engine *self) {
	unsigned char *p;
	int i;
	int d;
	int col,r;

	x += s->off_x;
	y += s->off_y;
	if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	} else {
		p = (char *)s->bitmap;
	}
	p += y * s->wbytes + (x>>2);

	switch(dir) {
	case DIR_NORTH:
		if (op & BLT_MASKING) for (i=0; i<length; i++,x++) {
			d = (x & 3)*2;
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			if (r == (op & BLT_MASKCOL)) {
				r = COLOR_TRANSPARENT;
			}
			*buf++ = r;
			if (d == 6) p++;
		} else for (i=0; i<length; i++,x++) {
			d = (x & 3)*2;
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			*buf++ = r;
			if (d == 6) p++;
		}
		break;
	case DIR_WEST:
		d = (x & 3)*2;
		if (op & BLT_MASKING) for (i=0; i<length; i++) {
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			if (r == (op & BLT_MASKCOL)) {
				r = COLOR_TRANSPARENT;
			}
			*buf++ = r;
			p -= s->wbytes;
		} else for (i=0; i<length; i++) {
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			*buf++ = r;
			p -= s->wbytes;
		}
		break;
	case DIR_SOUTH:
		if (op & BLT_MASKING) for (i=0; i<length; i++,x--) {
			d = (x & 3)*2;
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			if (r == (op & BLT_MASKCOL)) {
				r = COLOR_TRANSPARENT;
			}
			*buf++ = r;
			if (d == 0) p--;
		} else for (i=0; i<length; i++,x--) {
			d = (x & 3)*2;
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			*buf++ = r;
			if (d == 0) p--;
		}
		break;
	case DIR_EAST:
		d = (x & 3)*2;
		if (op & BLT_MASKING) for (i=0; i<length; i++) {
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			if (r == (op & BLT_MASKCOL)) {
				r = COLOR_TRANSPARENT;
			}
			*buf++ = r;
			p += s->wbytes;
		} else  for (i=0; i<length; i++) {
			col = (*p >> (6 - d)) & 3;
			r = CONV_FROM_COL4(col);
			*buf++ = r;
			p += s->wbytes;
		}
		break;
	}
}

static void dec4rr_put_pixstream(struct screen *s,int x, int y,int *buf
		,int length,int dir,struct draw_engine *self) {
	char *p;
	int col,mask;
	int d,i;
	int dx=0,dy=0;
	int dodither = 0;

	x += s->off_x;
	y += s->off_y;
	if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	} else {
		p = (char *)s->bitmap;
	}
	p += y * s->wbytes + (x>>2);

	switch(dir) {
	case DIR_NORTH:
	    if (*buf & COLOR_DITHER) {
		dy = y % 2;
		dx = (x + (y>>2) + (y>>4))%5;
		dodither = 1;
	    }
	    for (i=0; i<length;i++,x++) {
		col = *buf++;
		d = (x & 3)*2;
		if (!(col & COLOR_TRANSPARENT)) {
			mask = 0xc0 >> d;
		    if (dodither) {
			col = dither_map[dy][dx][col & 0xf];
	        	*p = (*p & ~mask)|(lp_mask_data[col] & mask);
		    } else {
		        *p = (*p & ~mask)
			    | (lp_mask_data[CONV_TO_COL4(col)] & mask);
		    }
		}
		if (d == 6) p++;
		if (dodither) if (++dx >= 5) dx = 0;
	    }
	    break;
	case DIR_WEST:
	    d = (x & 3)*2;
	    mask = 0xc0 >> d;
	    for (i=0; i<length;i++) {
		col = *buf++;
		if (!(col & COLOR_TRANSPARENT)) {
		    if (col & COLOR_DITHER) {
			dx = (x + (y>>2) + (y>>4))%5;
			dy = y % 2;
			col = dither_map[dy][dx][col & 0xf];
	        	*p = (*p & ~mask)|(lp_mask_data[col] & mask);
		    } else {
	 		*p = (*p & ~mask)
			   | (lp_mask_data[CONV_TO_COL4(col)] & mask);
		    }
		}
		p -= s->wbytes;
	    }
	    break;
	case DIR_SOUTH:
	    if (*buf & COLOR_DITHER) {
		dy = y % 2;
		dx = (x + (y>>2) + (y>>4))%5;
		dodither = 1;
	    }
	    for (i=0; i<length;i++,x--) {
		col = *buf++;
		d = (x & 3)*2;
		if (!(col & COLOR_TRANSPARENT)) {
		    mask = 0xc0 >> d;
		    if (dodither) {
			col = dither_map[dy][dx][col & 0xf];
	        	*p = (*p & ~mask)|(lp_mask_data[col] & mask);
		    } else {
			*p = (*p & ~mask)
			    | (lp_mask_data[CONV_TO_COL4(col)] & mask);
		    }
		}
	    if (d == 0) p--;
	    if (dodither) if (--dx < 0) dx = 4;
	    }
	    break;
	case DIR_EAST:
	    d = (x & 3)*2;
	    mask = 0xc0 >> d;
	    for (i=0; i<length;i++) {
		col = *buf++;
		if (!(col & COLOR_TRANSPARENT)) {
		    if (col & COLOR_DITHER) {
			dx = (x + (y>>2) + (y>>4))%5;
			dy = y % 2;
			col = dither_map[dy][dx][col & 0xf];
	        	*p = (*p & ~mask)|(lp_mask_data[col] & mask);
		    } else {
			*p = (*p & ~mask)
			   | (lp_mask_data[CONV_TO_COL4(col)] & mask);
		    }
		}
		p += s->wbytes;
	    }
	    break;
	}
	return;
}

static void dec4rr_set_color(struct screen *s,int col) {
	lp_color = CONV_TO_COL4(col);
	lp_mask = lp_mask_data[lp_color];
	lp_dodither = 0;
	if (col & COLOR_DITHER) lp_dodither = 1;
	if ((CONV_FROM_COL4(lp_color) | COLOR_DITHER) == (col & ~0xff0))
		lp_dodither = 0;
//printf("set_color to %d\n",lp_color);
	return;
}

static void dec4rr_draw_pixel(struct screen *s,int x, int y) {
	char *p;
	int mask;

//printf("dec4rr_draw_pixel (%d,%d) %d\n",x,y,lp_color);
	CLIP_POINT(s,x,y);

	x += s->off_x;
	y += s->off_y;
	if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	} else {
		p = (char *)s->bitmap;
	}
	p += y * s->wbytes + (x>>2);

	mask = 0xc0 >> ((x & 3) * 2);
	if (pen_color == COLOR_REVERSE) {
		*p = (*p ^ mask);
	} else if (lp_dodither) {
		int dx = (x + (y>>2) + (y>>4))%5;
		int dy = y % 2;
		int m = dither_mask_map[dy][dx][pen_color & 0xf];
		*p = (*p & ~mask) | (m & mask);
	} else {
		*p = (*p & ~mask) | (lp_mask & mask);
	}
	return;
}

#define ABS(a) (((a)<0) ? -(a) : (a))

static void dec4rr_draw_line_vertical(struct screen *s,int x1, int y1, int x2, int y2
		,struct draw_engine *self) {
	char *p;
	int mask;
	int i;
	int n = y2 - y1 +1;

	CLIP_VLINE(s,x1,y1,x2,y2);

	x1 += s->off_x;
	y1 += s->off_y;
	x2 += s->off_x;
	y2 += s->off_y;
	if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	} else {
		p = (char *)s->bitmap;
	}
	p += y1 * s->wbytes + (x1>>2);

	mask = 0xc0 >> ((x1 & 3) * 2);
	if (pen_color == COLOR_REVERSE) {
		for (i=0; i<n; i++,p+=s->wbytes)
			*p = (*p ^ mask);
	} else if (lp_dodither) {
		for (i=0; i<n; i++,p+=s->wbytes,y1++) {
		    int dx = (x1 + (y1>>2) + (y1>>4))%5;
		    int dy = y1 % 2;
		    int m = dither_mask_map[dy][dx][pen_color & 0xf];
		    *p = (*p & ~mask) | (m & mask);
		}
	} else {
		for (i=0; i<n; i++,p+=s->wbytes)
			*p = (*p & ~mask) | (lp_mask & mask);
	}
}

static void dec4rr_draw_line_horizontal(struct screen *s,int x1, int y1, int x2, int y2
		,struct draw_engine *self) {
	char *p;
	int wb;
	int i;
	int off_x,off_y;

//printf("dec4rr_draw_line_horizontal \n",x1,y1,x2,y2);
	CLIP_HLINE(s,x1,y1,x2,y2);
//printf("clip pass \n");
	s->need_clipping--;

	off_x = s->off_x;
	off_y = s->off_y;

	x1 += off_x;
	x2 += off_x;
	y1 += off_y;
	y2 += off_y;

	while ((x1 % 4) != 0) {
	    draw_pixel(s,x1-off_x,y1-off_y);
	    x1++;
	}
	wb = (x2 + 1 - x1)/4;
	if (wb) {
	     if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	     } else {
		p = (char *)s->bitmap;
	    }
	    p += y1 * s->wbytes + (x1>>2);

	    if (pen_color == COLOR_REVERSE) {
#if (OPTIMIZE_FOR == OPTTYPE_SLOW)
		for (i=0; i< wb; i++) {
		    *p++ ^= 0xff;
		}
#else
		mgl_memxor(p, 0xff, wb);
#endif
	    } else if (lp_dodither) {
		int dx = (x1 + (y1>>2) + (y1>>4))%5;
		int dy = y1 % 2;
		int m;
		for (i=0; i< wb; i++) {
			m = dither_mask_map[dy][dx][pen_color & 0xf] & 0x3;
			dx = (dx + 1)%5;
			m |= dither_mask_map[dy][dx][pen_color & 0xf] & 0xc;
			dx = (dx + 1)%5;
			m |= dither_mask_map[dy][dx][pen_color & 0xf] & 0x30;
			dx = (dx + 1)%5;
			m |= dither_mask_map[dy][dx][pen_color & 0xf] & 0xc0;
			dx = (dx + 1)%5;
		        *p++ = m;
		}
	    } else {
		memset(p, lp_mask & 0xff, wb);
	    }
	}
	x1 += wb * 4;
	while (x1 <= x2) {
	    draw_pixel(s,x1-off_x, y1-off_y);
	    x1++;
	}
	s->need_clipping++;
    return;
}


static void dec4rr_clear_screen(struct screen *s,struct draw_engine *self) {
	int x1,y1,x2,y2;
	int off_x,off_y;
	char *p,*pp;
	int wb;
	int i;
	int y;

	x1 = 0;
	x2 = s->width-1;
	y1 = 0;
	y2 = s->height-1;
	s->need_clipping--;

	off_x = s->off_x;
	off_y = s->off_y;
	x1 += off_x;
	x2 += off_x;
	y1 += off_y;
	y2 += off_y;

	while (x1 % 4 != 0) {
	    draw_line_vertical(s,x1-off_x,y1-off_y,x1-off_x,y2-off_y,self);
	    x1++;
	}
	wb = (x2 + 1 - x1)/4;
	if (wb) {
	    if (s->type & ST_SUBSCREEN) {
		p = ((struct screen *)(s->bitmap))->bitmap;
		s->wbytes = 
			((struct screen *)(s->bitmap))->wbytes;
	    } else {
		p = (char *)s->bitmap;
	    }
	    p += y1 * s->wbytes + (x1>>2);
	    for (y=y1; y<=y2; y++) {
		if (pen_color == COLOR_REVERSE) {
#if (OPTIMIZE_FOR == OPTTYPE_SLOW)
		    pp = p;
		    for (i=0; i< wb; i++) {
			*pp++ ^= 0xff;
		    }
#else
		    mgl_memxor(p, 0xff, wb);
#endif
		} else if (lp_dodither) {
		    int dx = (x1 + (y>>2) + (y>>4))%5;
		    int dy = y % 2;
		    int m;
		    pp = p;
		    for (i=0; i< wb; i++) {
			m = dither_mask_map[dy][dx][pen_color & 0xf] & 0x3;
			dx = (dx + 1)%5;
			m |= dither_mask_map[dy][dx][pen_color & 0xf] & 0xc;
			dx = (dx + 1)%5;
			m |= dither_mask_map[dy][dx][pen_color & 0xf] & 0x30;
			dx = (dx + 1)%5;
			m |= dither_mask_map[dy][dx][pen_color & 0xf] & 0xc0;
			dx = (dx + 1)%5;
		        *pp++ = m;
		    }
		} else {
		    memset(p, lp_mask & 0xff, wb);
	        }
		p += s->wbytes;
	    }
	}
	x1 += wb * 4;
	while (x1 <= x2) {
	    draw_line_vertical(s,x1-off_x,y1-off_y,x1-off_x,y2-off_y,self);
	    x1++;
	}
	s->need_clipping++;
}


static void dec4rr_bitblt_copy(struct screen *dst, int dx, int dy
	, struct screen *src, int sx, int sy, int xsize, int ysize, int op
	, struct draw_engine *self) {
	char *dp,*sp;
	int i,len,d,e;
	int mask_d,mask_e;
	if ((dx - sx) & 0x3) goto gen;

	dx += dst->off_x;
	dy += dst->off_y;
	sx += src->off_x;
	sy += src->off_y;
	if (dst->type & ST_SUBSCREEN) {
		dp = ((struct screen *)(dst->bitmap))->bitmap;
		dst->wbytes = 
			((struct screen *)(dst->bitmap))->wbytes;
	} else {
		dp = (char *)dst->bitmap;
	}
	if (src->type & ST_SUBSCREEN) {
		sp = ((struct screen *)(src->bitmap))->bitmap;
		src->wbytes = 
			((struct screen *)(src->bitmap))->wbytes;
	} else {
		sp = (char *)src->bitmap;
	}
	d = 0;
	if (dx & 3) {
		d = 4 - (dx & 3);
	}
	dx += d;
	sx += d;
	xsize -= d;
	e = (xsize & 3);
	xsize -= e;
	dp += dy * dst->wbytes + (dx>>2);
	sp += sy * src->wbytes + (sx>>2);
	len = xsize>>2;
	mask_d = mask_e = 0;
	for (i=0; i<d; i++) {
		mask_d |= 3 << (i*2);
	}
	for (i=0; i<e; i++) {
		mask_e |= 0xc0 >> (i*2);
	}
	for (i=0; i< ysize; i++) {
		if (d) {
		    *(dp-1) = (*(dp-1) & ~mask_d) | (*(sp-1) & mask_d);
		}
		memmove(dp,sp,len);
		if (e) {
		    *(dp+len) = (*(dp+len) & ~mask_e) | (*(sp+len) & mask_e);
		}
		dp += dst->wbytes;
		sp += src->wbytes;
	}
	return;
gen:
	bitblt_generic(dst,dx,dy,src,sx,sy,xsize,ysize,op);
}

static void dec4rr_bitblt_scroll_forward(struct screen *dst, int dx, int dy
	, struct screen *src, int sx, int sy, int xsize, int ysize, int op
	, struct draw_engine *self) {
	char *p,*dp,*sp;
	int i,len,d,e;
	int mask_d,mask_e;
	if ((dx - sx) & 0x3) goto gen;

	dx += dst->off_x;
	dy += dst->off_y;
	sx += src->off_x;
	sy += src->off_y;
	if (dst->type & ST_SUBSCREEN) {
		p = ((struct screen *)(dst->bitmap))->bitmap;
		dst->wbytes = 
			((struct screen *)(dst->bitmap))->wbytes;
	} else {
		p = (char *)dst->bitmap;
	}
	d = 0;
	if (dx & 3) {
		d = 4 - (dx & 3);
	}
	dx += d;
	sx += d;
	xsize -= d;
	e = (xsize & 3);
	xsize -= e;
//printf("bitblt_forward (%d,%d)-(%d,%d) [%d,%d] d = %d e = %d"
//		,dx,dy,sx,sy,xsize,ysize,d,e);
	dp = p + dy * dst->wbytes + (dx>>2);
	sp = p + sy * dst->wbytes + (sx>>2);
	len = xsize>>2;
	mask_d = mask_e = 0;
	for (i=0; i<d; i++) {
		mask_d |= 3 << (i*2);
	}
	for (i=0; i<e; i++) {
		mask_e |= 0xc0 >> (i*2);
	}
	for (i=0; i< ysize; i++) {
		if (d) {
		    *(dp-1) = (*(dp-1) & ~mask_d) | (*(sp-1) & mask_d);
		}
		memmove(dp,sp,len);
		if (e) {
		    *(dp+len) = (*(dp+len) & ~mask_e) | (*(sp+len) & mask_e);
		}
		dp += dst->wbytes;
		sp += dst->wbytes;
	}
	return;
gen:
	bitblt_generic(dst,dx,dy,src,sx,sy,xsize,ysize,op);
}

static void dec4rr_bitblt_scroll_backward(struct screen *dst, int dx, int dy
	, struct screen *src, int sx, int sy, int xsize, int ysize, int op
	, struct draw_engine *self) {
	char *p,*dp,*sp;
	int i,len,d,e;
	int mask_d,mask_e;
	if ((dx - sx) & 0x3) goto gen;

	dx += dst->off_x;
	dy += dst->off_y;
	sx += src->off_x;
	sy += src->off_y;
	if (dst->type & ST_SUBSCREEN) {
		p = ((struct screen *)(dst->bitmap))->bitmap;
		dst->wbytes = 
			((struct screen *)(dst->bitmap))->wbytes;
	} else {
		p = (char *)dst->bitmap;
	}
	d = 0;
	if (dx & 3) {
		d = 4 - (dx & 3);
	}
	dx += d;
	sx += d;
	xsize -= d;
	e = (xsize & 3);
	xsize -= e;
	dy += ysize - 1;
	sy += ysize - 1;
//printf("bitblt_backward (%d,%d)-(%d,%d) [%d,%d] d = %d e = %d"
//		,dx,dy,sx,sy,xsize,ysize,d,e);
	dp = p + dy * dst->wbytes + (dx>>2);
	sp = p + sy * dst->wbytes + (sx>>2);
	len = xsize>>2;
	mask_d = mask_e = 0;
	for (i=0; i<d; i++) {
		mask_d |= 3 << (i*2);
	}
	for (i=0; i<e; i++) {
		mask_e |= 0xc0 >> (i*2);
	}
	for (i=0; i< ysize; i++) {
		if (e) {
		    *(dp+len) = (*(dp+len) & ~mask_e) | (*(sp+len) & mask_e);
		}
		memmove(dp,sp,len);
		if (d) {
		    *(dp-1) = (*(dp-1) & ~mask_d) | (*(sp-1) & mask_d);
		}
		dp -= dst->wbytes;
		sp -= dst->wbytes;
	}
	return;
gen:
	bitblt_generic(dst,dx,dy,src,sx,sy,xsize,ysize,op);
}

void dec4rr_init(int type) {
	dec4rr._create_subscreen = dec4rr_create_subscreen;
	dec4rr._free_screen = dec4rr_free_screen;
	dec4rr._put_pixel = dec4rr_put_pixel;
	dec4rr._get_pixel = dec4rr_get_pixel;

	dec4rr._set_color = dec4rr_set_color;
	dec4rr._draw_pixel = dec4rr_draw_pixel;

	//dec4rr._draw_line = dec4rr_draw_line;

	dec4rr._draw_line_vertical = dec4rr_draw_line_vertical;
	dec4rr._draw_line_horizontal = dec4rr_draw_line_horizontal;

	dec4rr._clear_screen = dec4rr_clear_screen;

	dec4rr._get_pixstream = dec4rr_get_pixstream;
	dec4rr._put_pixstream = dec4rr_put_pixstream;
	//dec4rr._put_pixstream_rect = dec4rr_put_pixstream_rect;

	//dec4rr._bitblt = dec4rr_bitblt;
	dec4rr._bitblt_scroll_forward = dec4rr_bitblt_scroll_forward;
	dec4rr._bitblt_scroll_backward = dec4rr_bitblt_scroll_backward;
	dec4rr._bitblt_copy = dec4rr_bitblt_copy;
	//dec4rr._bitblt_reserved_mask = dec4rr_bitblt_reserved_mask;
	//dec4rr._bitblt_reserved_masktile = dec4rr_bitblt_reserver_masktile;
	//dec4rr._bitblt_gen = dec4rr_bitblt_gen;
	setup_draw_engine(&dec4rr,0);
	_create_memscreen[type] = dec4rr_create_memscreen;
}
