/* ----------------------------------------------------------------------
 * Frequencies of patterns in sequence data
 * Copyright (C) 2000 January Weiner III
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
 * USA.
 ---------------------------------------------------------------------- */

#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <time.h>

#include "genpak.h"
#include "gp_getopt.h"

#define VERSION "template file"
#define PROGNAME "gp_pattern"

char *progname ;

typedef struct {
	int plen ;
	FILE *in ;
	FILE *out ; } opt_s ;

struct gp_table_s {
	int ndim ; /* number of dimensions */
	int *dims ; /* array to hold the actual dimensions */
	double *data ; /* array to hold the data */
	struct gp_table_s *next ; /* can be useful for linked lists */
	struct gp_table_s *prev ; } ;

typedef struct gp_table_s gp_table ;

gp_table * gp_allocatet(int ndim, int*dims) ;
long gp_elpos(gp_table *t, int* crd) ;
double gp_getel(gp_table*t, int* crd) ;
int gp_putel(gp_table *t, int* crd, double el) ;
int gp_zerot(gp_table *t) ;
/*
 *
 */


int main(int argc, char *argv[])
{
	extern int optind ;
	extern char *optarg ;
	int width = 70 ; /* width with which the sequence gets formatted */
	opt_s options ;
	sekw *inseq, *outseq ;
	double **ftable ; /* here we store the frequencies */
	int *dims ;
	int ndim = 3 ;
	int crd[3] = {2,2,2} ;

	gp_table *tablica ;

	int i,j,k,c;
	char message[100] ;
	int errflg = 0 ;

	options.plen = 2 ;

	progname = argv[0] ;

	while ((c = gp_getopt(argc, argv, "l:Hqdvh")) != EOF)
		switch(c) {
		case 'l':
			if(sscanf(optarg,"%i",&options.plen)!= 1)
				gp_error("Could not read argument for option -l") ;
			if(debug) gp_warn("Pattern length %i",options.plen) ;
			break ;
		case 'H':
			html = TRUE ;
			break ;
		case 'q':
			quiet = TRUE ;
			break ;
		case 'v':
			fprintf(stderr,"%s version %s\n",progname,VERSION) ;
			exit(0) ;
			break ;
		case 'd':
			debug = TRUE ;
			gp_warn("Running in debug mode") ;
			break ;
		case 'h':
			Help() ;
			break ;
		default:
			errflg++ ;
			break;
		}


	if(errflg) gp_error("Type '%s -h' for help",progname) ;

/* open the file pointer to read the sequences 
 * from: standard input or a file provided? */
	if(optind >= argc) options.in = stdin ;
	else options.in = gp_file_open(argv[optind],"r") ;

/* opening the file pointer to write the output: 
 * standard output or file provided? */
	optind++ ;

	if(optind >= argc) options.out = stdout ;
	else options.out = gp_file_open(argv[optind],"wb") ;


	dims = calloc(options.plen,sizeof(*dims)) ;
	for(i = 0;i<options.plen;i++) dims[i] = 4 ;

	tablica = gp_allocatet(options.plen,dims) ;
	PrintHead(options) ;

	while((inseq = gp_seq_read_fragment(options.in,0,0,0)) != NULL) {

		gp_zerot(tablica) ;
		GetFreq(tablica,options,inseq) ;
		PrintFreq(tablica,options,inseq) ;

	}
	
	if(html) gp_warn_print_all(options.out) ;
	fclose(options.out) ;
	fclose(options.in) ;
	return(0);
}

int GetFreq(gp_table* t, opt_s o, sekw* s) {

	int i,j,k ;
	int *crd, c[128] ;
	double p ;

	c['A'] = 0;
	c['C'] = 1;
	c['G'] = 2;
	c['T'] = 3;
	c['U'] = 3;

	crd = calloc(o.plen,sizeof(*crd)) ;

	for(i = 0;i<s->leng-o.plen+1;i++) {
		for(j = 0;j<o.plen;j++) 
			crd[j] = c[toupper(s->sequ[i+j])] ;
		p = gp_getel(t,crd) ;
		p++ ;
		gp_putel(t,crd,p) ;
	}


	return EXIT_SUCCESS ;

}

int PrintNuc(opt_s o, int *crd, int cur) {

	int i,j ;
	char c[4] = "ACGT" ;
	if(debug) gp_warn("PrintNuc level %i",cur) ;

		if(cur<o.plen) {
	for(i = 0;i<4;i++) {
			crd[cur] = i ;
			PrintNuc(o,crd,cur+1) ;
	}
		} else {
			for(j = 0;j<o.plen;j++)
				fprintf(o.out,"%c",c[crd[j]]) ;
			fprintf(o.out,"\t") ;
		}
	
}

int PrintHead(opt_s o) {

	int i, *crd ;

	crd = calloc(o.plen,sizeof(*crd)) ;

	fprintf(o.out,"#Name\t") ;
	for(i = 0;i<o.plen;i++) crd[i] = 0 ;
	PrintNuc(o,crd,0) ;

	fprintf(o.out,"\n") ;


	free(crd) ;

	return EXIT_SUCCESS ;


}

int PrintFreq(gp_table* t, opt_s o, sekw* s) {

	int i,j,k ;
	long nel ;
	int *crd ;


	for(i = 0,nel = 1;i<t->ndim;i++)
		nel *= t->dims[i] ;

	fprintf(o.out,"%s\t",s->name) ;

	for(i = 0;i<nel;i++)
		fprintf(o.out,"%.2f\t",t->data[i]) ;
	
	fprintf(o.out,"\n") ;
		
	return EXIT_SUCCESS ;

}


gp_table * gp_allocatet(int ndim, int*dims) {


	gp_table * res ;
	long i,nel ;

	nel = 1;

	for(i = 0;i<ndim;i++) nel *= dims[i] ;
	if(debug) gp_warn("gp_allocatet:nel %i, ndim %i\n",nel,ndim) ;

	if((res = calloc(1,sizeof(*res))) == NULL)
		gp_error("gp_allocatet: (1) could not allocate memory") ;
	res->ndim = ndim ;
	if((res->dims = calloc(ndim,sizeof(*res->dims))) == NULL) 
		gp_error("gp_allocatet: (2) could not allocate memory") ;
	if((res->data = calloc(nel,sizeof(*res->data))) == NULL) 
		gp_error("gp_allocatet: (3) could not allocate memory") ;

	for(i = 0;i<ndim;i++) res->dims[i] = dims[i] ;

	return(res) ;
	
}

long gp_elpos(gp_table *t, int* crd) {

	long i,j,k,p = 0,sum = 1 ;
	

	for(i = t->ndim-1;i>= 0;i--) {
		p += sum*crd[i] ;
		sum *= t->dims[i] ;
	}

	if(debug) {
		fprintf(stderr,"gp_elpos: Coordinates: ") ;
		for(i = 0;i<t->ndim;i++) fprintf(stderr,"%i ",crd[i]) ;
		fprintf(stderr," position %i\n",p) ;
	}

	return p ;

}

double gp_getel(gp_table*t, int* crd) {

	double res ;
	long i,j,k,p,sum ;

	p = gp_elpos(t,crd) ;
	res = t->data[p] ;

	return(res) ;

}

int gp_putel(gp_table *t, int* crd, double el) {

	long i,j,k,p,sum ;

	p = gp_elpos(t,crd) ;

	t->data[p] = el ;

	return EXIT_SUCCESS ;

}


int gp_zerot(gp_table *t) {

	long sum = 1,i,j,k ;

	for(i = 0;i<t->ndim;i++) sum *= t->dims[i];
	for(i = 0;i<sum;i++) t->data[i] = 0.0 ;
		

	return EXIT_SUCCESS ;


}



/* Standard mesage */

void Help()
{
printf("\n");
printf("%s, v. %s- count frequencies of patterns in DNA/RNA sequences",PROGNAME,VERSION);
printf("\n");
printf("  Usage:\n");
printf("     %s [options] [ input file ] [ output file ]\n",progname);
printf("\n");
printf("  Options:\n");
printf("     -l value : length of the pattern is (value)\n");
printf("     -v       : print version information & exit\n");
printf("     -h       : print this help screen & exit\n");
printf("     -q       : quiet, suppress error messages\n\n");
exit(0);
}


			
