/*
 * Copyright 2013 Yuichiro Moriguchi
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.morilib.sh.builtin;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import net.morilib.sh.ShEnvironment;
import net.morilib.sh.ShFileSystem;
import net.morilib.sh.ShProcess;

public class ShUniq implements ShProcess {

	private static class S1 {
		int skip = Integer.MAX_VALUE;
		int endc = Integer.MAX_VALUE;
		int flags = UNIQUE | REPEATED;
	}

	private static final Pattern FNUM = Pattern.compile("-[0-9]+");
	private static final Pattern SNUM = Pattern.compile("\\+[0-9]+");
	private static final Pattern FSEP = Pattern.compile("[ \t]+");
	private static final int UNIQUE = 1;
	private static final int REPEATED = 2;
	private static final int R_MASK = 3;
	private static final int COUNT = 4;
	private static final int SKIP_FIELD = 8;
	private static final int SKIP_CHARS = 16;
	private static final int CHECK_CHARS = 32;

	private int getskip(String s, int i) {
		int k;

		if(i < s.length()) {
			try {
				k = Integer.parseInt(s.substring(i));
				return k < 0 ? Integer.MAX_VALUE : k;
			} catch(NumberFormatException e) {
				return Integer.MAX_VALUE;
			}
		} else {
			return -1;
		}
	}

	private int getskip2(String s, int i) {
		if(i < s.length()) {
			try {
				return Integer.parseInt(s.substring(i));
			} catch(NumberFormatException e) {
				return -1;
			}
		} else {
			return -1;
		}
	}

	private int analyzeopt(String s, S1 f) {
		for(int i = 1; i < s.length(); i++) {
			switch(s.charAt(i)) {
			case 'u':
				f.flags = (f.flags & ~R_MASK) | UNIQUE;
				break;
			case 'd':
				f.flags = (f.flags & ~R_MASK) | REPEATED;
				break;
			case 'c':  f.flags |= COUNT;  break;
			case 'f':
				f.flags |= SKIP_FIELD;
				f.skip   = getskip(s, i + 1);
				return -1;
			case 's':
				f.flags |= SKIP_CHARS;
				f.skip   = getskip(s, i + 1);
				return -1;
			case 'w':
				f.flags |= CHECK_CHARS;
				f.endc   = getskip(s, i + 1);
				return -1;
			default:  return s.charAt(i);
			}
		}
		return -1;
	}

	private String exts(S1 f, String s) {
		Matcher m;
		int i, k;

		switch(f.flags & (SKIP_FIELD | SKIP_CHARS)) {
		case 0:
			return (long)f.endc < s.length() ?
					s.substring(0, f.endc) : s;
		case SKIP_CHARS:
			if(f.skip > s.length())  return "";
			return (long)f.endc + (long)f.skip < s.length() ?
					s.substring(f.skip, f.skip + f.endc) :
						s.substring(f.skip);
		case SKIP_FIELD:  case SKIP_FIELD | SKIP_CHARS:
			m = FSEP.matcher(s);
			for(i = 0; i < f.skip && m.find(); i++);
			if(i == f.skip && (k = m.end()) < s.length()) {
				return (long)f.endc + (long)k < s.length() ?
						s.substring(k, k + f.endc) : s.substring(k);
			}
			return "";
		default:  throw new RuntimeException();
		}
	}

	private void prc(S1 fl, PrintStream out, int c) {
		if((fl.flags & COUNT) != 0) {
			out.format("%7d ", c);
		}
	}

	private void uniq(InputStream ins, PrintStream out, String name,
			S1 fl, Charset cset) throws IOException {
		BufferedReader rd = new BufferedReader(
				new InputStreamReader(ins, cset));
		String s, t, p = null, q = null;
		int c = 1;

		while((t = rd.readLine()) != null) {
			s = exts(fl, t);
			if(p == null) {
				p = t;  q = s;
			} else if(s.equals(q)) {
				c++;
			} else {
				if((fl.flags & UNIQUE) != 0 && c == 1) {
					prc(fl, out, c);
					out.println(p);
				} else if((fl.flags & REPEATED) != 0 && c > 1) {
					prc(fl, out, c);
					out.println(p);
				}
				p = t;  q = s;  c = 1;
			}
		}

		if((fl.flags & UNIQUE) != 0 && c == 1) {
			prc(fl, out, c);
			out.println(p);
		} else if((fl.flags & REPEATED) != 0 && c > 1) {
			prc(fl, out, c);
			out.println(p);
		}
	}

	public int main(ShEnvironment env, ShFileSystem fs, InputStream in,
			PrintStream out, PrintStream err,
			String... args) throws IOException {
		InputStream ins = null;
		PrintStream ous = null;
		S1 f = new S1();
		int k = 1, z;

		for(; k < args.length; k++) {
			if(f.skip < 0) {
				f.skip   = getskip(args[k], 0);
			} else if(f.endc < 0) {
				f.endc   = getskip(args[k], 0);
			} else if(FNUM.matcher(args[k]).matches()) {
				f.flags |= SKIP_FIELD;
				f.skip   = getskip(args[k], 1);
			} else if(SNUM.matcher(args[k]).matches()) {
				f.flags |= SKIP_CHARS;
				f.skip   = getskip(args[k], 1);
			} else if(args[k].startsWith("--skip-fields=") &&
					args[k].length() > 8) {
				f.flags |= SKIP_FIELD;
				if((f.skip = getskip2(args[k], 14)) < 0) {
					err.println("uniq: invalid number");
					return 2;
				}
			} else if(args[k].startsWith("--skip-chars=") &&
					args[k].length() > 8) {
				f.flags |= SKIP_CHARS;
				if((f.skip = getskip2(args[k], 13)) < 0) {
					err.println("uniq: invalid number");
					return 2;
				}
			} else if(args[k].startsWith("--check-chars=") &&
					args[k].length() > 13) {
				f.flags |= CHECK_CHARS;
				if((f.endc = getskip2(args[k], 14)) < 0) {
					err.println("uniq: invalid number");
					return 2;
				}
			} else if(args[k].equals("--unique")) {
				f.flags = (f.flags & ~R_MASK) | UNIQUE;
			} else if(args[k].equals("--repeated")) {
				f.flags = (f.flags & ~R_MASK) | REPEATED;
			} else if(args[k].equals("--count")) {
				f.flags |= COUNT;
			} else if(args[k].equals("--")) {
				k++;
				break;
			} else if(args[k].equals("-")) {
				break;
			} else if(!args[k].startsWith("-")) {
				break;
			} else if((z = analyzeopt(args[k], f)) >= 0) {
				err.println("uniq: invalid option: " + (char)z);
				return 2;
			}
		}

		if(f.skip < 0 || f.endc < 0) {
			err.println("uniq: missing number");
			return 2;
		}

		if(k >= args.length) {
			uniq(in, out, "standard input", f, env.getCharset());
		} else {
			try {
				if(!args[k].equals("-")) {
					ins = fs.getFile(args[k]).getInputStream();
					if(ins == null) {
						err.print("uniq: ");
						err.print(args[k]);
						err.println(": file not found");
						return 2;
					}
				} else {
					ins = in;
				}

				if(k - args.length > 1 && !args[k + 1].equals("-")) {
					ous = fs.getFile(
							args[k + 1]).getPrintStream(false);
				} else {
					ous = out;
				}
				uniq(ins, ous, args[k], f, env.getCharset());
			} finally {
				if(ins != in  && ins != null)  ins.close();
				if(ous != out && ous != null)  ous.close();
			}
		}
		return 0;
	}

}
