using System;
using System.Collections;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;

using SystemNeo;
using SystemNeo.Collections;
using SystemNeo.Collections.Generic;

namespace SystemNeo.Text
{
	/// <summary>
	/// HTML ͂p[TłB
	/// </summary>
	public class HtmlParser : Parser
	{
		#region private static fields
		private static readonly IDictionary<string, string> sequenceDic
				= new Dictionary<string, string>() {
					{"amp",  "&"},
					{"gt",   ">"},
					{"lt",   "<"},
					{"nbsp", " "},
					{"quot", "\""},
					{"copy", "\x00a9"}
				};
		private static readonly Regex reCharRef = new Regex("^#(?<x>x?)(?<n>[0-9A-Fa-f]+)$");
		#endregion

		// public vpeB //

		/// <summary>
		/// 
		/// </summary>
		public HtmlOptions Options { get; set; }

		// public \bh //

		/// <summary>
		/// 
		/// </summary>
		/// <param name="value"></param>
		/// <returns></returns>
		public string ToPlainText(string value)
		{
			var sb = new StringBuilder();
			foreach (TextElement element in this.Parse(value)) {
				if (element is PlainTextElement) {
					sb.Append(element.SourceText);
				} else if (element is CharacterReferenceElement) {
					var cre = (CharacterReferenceElement)element;
					sb.Append(cre.Value);
				} else if (element is TagElement) {
					var tag = (TagElement)element;
					if (tag.Name == "br") {
						sb.Append(Environment.NewLine);
					}
				}
			}
			return sb.ToString();
		}

		// protected \bh //

		/// <summary>
		/// 
		/// </summary>
		/// <param name="value"></param>
		/// <param name="sr"></param>
		/// <param name="readers"></param>
		protected override void GetReader(string value,
				out StringReaderNeo sr, out Func<TextElement>[] readers)
		{
			var hsr = new HtmlStringReader(this, value);
			readers = new Func<TextElement>[] {
				hsr.ReadComment,
				hsr.ReadTag,
				hsr.ReadPlainText,
				hsr.ReadCharacterReference
			};
			sr = hsr;
		}

		// ^ //

		/// <summary>
		/// HTMLɊ܂܂^O̎ނ\܂B
		/// </summary>
		public enum TagType
		{
			Begin,
			End,
			BeginEnd
		}

		/// <summary>
		/// 
		/// </summary>
		public class TagElement : TextElement
		{
			#region private fields
			private readonly ICollection<TagAttributeElement> attributeElements;
			#endregion

			// public vpeB //

			/// <summary>
			/// 
			/// </summary>
			public AttributeDictionary Attributes { get; private set; }

			/// <summary>
			/// 
			/// </summary>
			public string Name { get; private set; }

			/// <summary>
			/// 
			/// </summary>
			public TagType Type { get; private set; }

			// RXgN^ //

			/// <summary>
			/// 
			/// </summary>
			/// <param name="value"></param>
			/// <param name="name"></param>
			/// <param name="type"></param>
			/// <param name="attributeElements"></param>
			public TagElement(string value, string name,
					TagType type, ICollection<TagAttributeElement> attributeElements) : base(value)
			{
				this.Type = type;
				this.Name = name.ToLower();
				this.attributeElements = attributeElements;
				this.Attributes = new AttributeDictionary(this);
			}

			// ^ //

			/// <summary>
			/// 
			/// </summary>
			public class AttributeDictionary : AbstractDictionary<string, string>
			{
				#region private fields
				private readonly TagElement tag;
				#endregion

				// public vpeB //

				/// <summary>
				/// 
				/// </summary>
				public override int Count
				{
					get {
						return this.tag.attributeElements.Count;
					}
				}

				// RXgN^ //

				/// <summary>
				/// 
				/// </summary>
				/// <param name="tag"></param>
				internal AttributeDictionary(TagElement tag) : base(true)
				{
					this.tag = tag;
				}

				// protected \bh //

				/// <summary>
				/// 
				/// </summary>
				/// <param name="key1"></param>
				/// <param name="key2"></param>
				/// <returns></returns>
				protected override bool EqualKeys(string key1, string key2)
				{
					return CaseInsensitiveComparer.Default.Compare(key1, key2) == 0;
				}

				/// <summary>
				/// 
				/// </summary>
				/// <param name="key"></param>
				/// <returns></returns>
				protected override string Get(string key)
				{
					foreach (var attr in this.tag.attributeElements) {
						if (this.EqualKeys(attr.Name, key)) {
							return attr.Value;
						}
					}
					throw new KeyNotFoundException(
							string.Format(" '{0}' ܂B", key));
				}

				/// <summary>
				/// 
				/// </summary>
				/// <returns></returns>
				protected override IEnumerable<KeyValuePair<string, string>> GetEnumeratorInternal()
				{
					foreach (var elem in this.tag.attributeElements) {
						yield return new KeyValuePair<string, string>(elem.Name, elem.Value);
					}
				}

				/// <summary>
				/// 
				/// </summary>
				protected override ICollection<string> GetKeys()
				{
					return this.tag.attributeElements
							.Select<TagAttributeElement, string>(TagAttributeElement.GetName)
							.ToList();
				}

				/// <summary>
				/// 
				/// </summary>
				/// <returns></returns>
				protected override ICollection<string> GetValues()
				{
					return this.tag.attributeElements
							.Select<TagAttributeElement, string>(TagAttributeElement.GetValue)
							.ToList();
				}
			}
		}

		/// <summary>
		/// 
		/// </summary>
		public class TagAttributeElement : TextElement
		{
			// internal vpeB //

			/// <summary>
			/// 
			/// </summary>
			internal string Name { get; private set; }

			/// <summary>
			/// 
			/// </summary>
			internal string Value { get; private set; }

			// internal RXgN^ //

			/// <summary>
			/// 
			/// </summary>
			/// <param name="value"></param>
			/// <param name="name"></param>
			internal TagAttributeElement(string value, string name)
					: this(value, name, null) {}

			/// <summary>
			/// 
			/// </summary>
			/// <param name="value"></param>
			/// <param name="name"></param>
			/// <param name="attributeValue"></param>
			internal TagAttributeElement(
					string value, string name, string attributeValue) : base(value)
			{
				this.Name = name;
				this.Value = attributeValue;
			}

			// internal static \bh //

			/// <summary>
			/// 
			/// </summary>
			/// <param name="elem"></param>
			/// <returns></returns>
			internal static string GetName(TagAttributeElement elem)
			{
				return elem.Name;
			}

			/// <summary>
			/// 
			/// </summary>
			/// <param name="elem"></param>
			/// <returns></returns>
			internal static string GetValue(TagAttributeElement elem)
			{
				return elem.Value;
			}
		}

		/// <summary>
		/// Rg\ TextElement łB
		/// </summary>
		public class CommentElement : TextElement
		{
			/// <summary>
			/// 
			/// </summary>
			/// <param name="value"></param>
			internal CommentElement(string value) : base(value) {}
		}

		/// <summary>
		/// 
		/// </summary>
		public class PlainTextElement : TextElement
		{
			/// <summary>
			/// 
			/// </summary>
			/// <param name="value"></param>
			internal PlainTextElement(string value) : base(value) {}
		}

		/// <summary>
		/// QƂ\ TextElement łB
		/// </summary>
		public class CharacterReferenceElement : TextElement
		{
			#region private fields
			private readonly string sequence;
			#endregion

			// public vpeB //

			/// <summary>
			/// 
			/// </summary>
			public string Value { get; private set; }

			// internal RXgN^ //

			/// <summary>
			/// 
			/// </summary>
			/// <param name="value"></param>
			internal CharacterReferenceElement(string value) : base(value)
			{
				this.sequence = value.Substring(1, value.Length - 2);
				Match m = reCharRef.Match(this.sequence);
				if (m.Success) {
					// lQƂ̏ꍇc
					NumberStyles styles = (m.Groups["x"].Value == string.Empty) ?
							NumberStyles.None : NumberStyles.AllowHexSpecifier;
					ushort n = ushort.Parse(m.Groups["n"].Value, styles);
					this.Value = ((char)n).ToString();
				} else {
					// ̎QƂ̏ꍇc
					try {
						this.Value = sequenceDic[this.sequence];
					} catch (KeyNotFoundException) {
						string msg = string.Format(
								"Q '&{0};' ̓T|[gĂ܂B", this.sequence);
						throw new ParseException(msg);
					}
				}
			}
		}

		/// <summary>
		/// 
		/// </summary>
		private class HtmlStringReader : StringReaderNeo
		{
			#region private fields
			private readonly HtmlParser parser;
			#endregion

			// internal RXgN^ //

			/// <summary>
			/// 
			/// </summary>
			/// <param name="parser"></param>
			/// <param name="value"></param>
			internal HtmlStringReader(HtmlParser parser, string value) : base(value)
			{
				this.parser = parser;
			}

			// internal \bh //

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			internal TextElement ReadTag()
			{
				int c = this.Peek();
				if (c != '<') {
					return null;
				}
				var sb = new StringBuilder();
				sb.Append((char)this.Read());
				TagType tagType = TagType.Begin;
				if (this.Peek() == CharUtil.Slash) {
					tagType = TagType.End;
					sb.Append((char)this.Read());
				}
				string tagName = this.Read(IsTagNameChar);
				if (tagName.Length == 0) {
					throw new ParseException("^O܂B");
				}
				sb.Append(tagName);
				var attributeList = new List<TagAttributeElement>();
				for (;;) {
					string ws = this.Read(char.IsWhiteSpace);
					if (ws.Length == 0) {
						if (this.Peek() == CharUtil.Slash) {
							if (tagType == TagType.End) {
								throw new ParseException("^O '/' Q܂B");
							}
							tagType = TagType.BeginEnd;
							sb.Append((char)this.Read());
						}
						if (this.Peek() == '>') {
							sb.Append((char)this.Read());
							return new TagElement(
									sb.ToString(), tagName.ToString(), tagType, attributeList);
						}
						throw new ParseException("^Oё̌ɂ͋󔒕E'>'E'/>' ̂ꂩKvłB|" + sb);
					}
					sb.Append(ws);
					if (this.Peek() == CharUtil.Slash) {
						if (tagType == TagType.End) {
							throw new ParseException("^O '/' Q܂B");
						}
						tagType = TagType.BeginEnd;
						sb.Append((char)this.Read());
					}
					if (this.Peek() == '>') {
						sb.Append((char)this.Read());
						return new TagElement(sb.ToString(), tagName, tagType, attributeList);
					}
					TagAttributeElement attributeElement = this.ReadTagAttribute();
					if (attributeElement == null) {
						throw new ParseException("󔒂̌ɂ͑E'>'E'/>' ̂ꂩKvłB");
					}
					attributeList.Add(attributeElement);
					sb.Append(attributeElement.SourceText);
				}
			}

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			internal TextElement ReadComment()
			{
				const string COMMENT_START = "<!--";
				const string COMMENT_END = "-->";

				if (! this.StartsWith(COMMENT_START)) {
					return null;
				}
				var sb = new StringBuilder();
				sb.Append(this.Read(COMMENT_START.Length));
				string s = this.FindRead(COMMENT_END);
				if (s == null) {
					throw new ParseException("RgĂ܂B");
				}
				sb.Append(s);
				sb.Append(this.Read(COMMENT_END.Length));
				return new CommentElement(sb.ToString());
			}

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			internal TextElement ReadPlainText()
			{
				string text = this.Read(IsPlainChar);
				return text.Length == 0 ? null : new PlainTextElement(text);
			}

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			internal TextElement ReadCharacterReference()
			{
				string s = this.GetCharacterReference();
				if (s == null) {
					return null;
				} else if (s.Length == 1) {
					// PƂ́u&v
					return new PlainTextElement(s);
				} else {
					return new CharacterReferenceElement(s);
				}
			}

			// private static \bh //

			/// <summary>
			/// 
			/// </summary>
			/// <param name="c"></param>
			/// <returns></returns>
			private static bool IsCharacterReferenceChar(char c)
			{
				return CharUtil.IsAsciiLetter(c) || CharUtil.IsAsciiDigit(c) || c == '#';
			}

			/// <summary>
			/// 
			/// </summary>
			/// <param name="c"></param>
			/// <returns></returns>
			private static bool IsPlainChar(char c)
			{
				return c != '<' && c != CharUtil.Ampersand;
			}

			/// <summary>
			/// 
			/// </summary>
			/// <param name="c"></param>
			/// <returns></returns>
			private static bool IsTagNameChar(char c)
			{
				return char.IsLetterOrDigit(c) || c == CharUtil.Hyphen;
			}

			/// <summary>
			/// 
			/// </summary>
			/// <param name="c"></param>
			/// <returns></returns>
			private static bool IsUnquotedValueChar(char c)
			{
				return c != '>' && ! char.IsWhiteSpace(c);
			}

			// private \bh //

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			private string GetQuotedValue()
			{
				var sb = new StringBuilder();
				int quot = this.Read();
				switch (quot) {
				case CharUtil.DoubleQuote:
				case CharUtil.SingleQuote:
					break;
				default:
					throw new ParseException("NH[g܂B");
				}
				sb.Append((char)quot);
				string s = this.FindRead((char)quot, true);
				if (s.Length == 0) {
					throw new ParseException("NH[g񂪕Ă܂B");
				}
				sb.Append(s);
				return sb.ToString();
			}

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			private string GetCharacterReference()
			{
				if (this.Peek() != CharUtil.Ampersand) {
					return null;
				}
				string ampersand = this.Read(1);            // u&vǂݍ
				string s = this.Read(IsCharacterReferenceChar);
				if (s.Length == 0 || this.Peek() != CharUtil.Semicolon) {
					// PƂ́u&v
					if ((this.parser.Options & HtmlOptions.AllowUnescapedAmpersand) == 0) {
						throw new ParseException("QƂsSłB" + this.Peek());
					} else {
						this.Seek(- s.Length);
						return ampersand;
					}
				}
				return ampersand + s + this.Read(1);        // ́u;v܂œǂݍ
			}

			/// <summary>
			/// 
			/// </summary>
			/// <returns></returns>
			private TagAttributeElement ReadTagAttribute()
			{
				string attributeName = this.Read(IsTagNameChar);
				if (attributeName.Length == 0) {
					return null;
				}
				int c = this.Peek();
				if (c != CharUtil.Equal) {
					return new TagAttributeElement(attributeName, attributeName);
				}
				var sb = new StringBuilder();
				sb.Append(attributeName);
				sb.Append((char)this.Read());
				switch (c = this.Peek()) {
				case CharUtil.DoubleQuote:
				case CharUtil.SingleQuote:
					string quoted = this.GetQuotedValue();
					sb.Append(quoted);
					return new TagAttributeElement(
							sb.ToString(), attributeName, quoted.Substring(1, quoted.Length - 2));
				}
				string unquoted = this.Read(IsUnquotedValueChar);
				sb.Append(unquoted);
				return new TagAttributeElement(sb.ToString(), attributeName, unquoted);
			}
		}

		/// <summary>
		/// 
		/// </summary>
		[Flags]
		public enum HtmlOptions
		{
			/// <summary>
			/// QƂŕ\ĂȂPƂ̃ApThi&je܂B
			/// </summary>
			AllowUnescapedAmpersand = 1
		}
	}
}
