package misc;

import static org.junit.Assert.fail;

import java.io.FileInputStream;
import java.io.InputStreamReader;

import org.cyberneko.html.parsers.DOMParser;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

public class NekoHtmlTest1 {

	String path = "src/test/resources/selenium_test.html";

	@Test
	public void test() {
		try {
			DOMParser parser = new DOMParser();
			parser.parse(new InputSource(
					new InputStreamReader(new FileInputStream(path), "UTF-8")));
			Document doc = parser.getDocument();
			Element root = doc.getDocumentElement();
			this.searchProc(root, 0);
		} catch (Exception e) {
			e.printStackTrace();
			fail();
		}
	}

	public void searchProc(Element root, int indent) {
		System.out.println("*** " + this.createIndent(indent) + root.getTagName() + " ***");
		NodeList items = root.getChildNodes();
		for (int i = 0; i < items.getLength(); i++) {
			Node item = (Node) items.item(i);
			if (item.getNodeType() == Node.ELEMENT_NODE) {
				this.searchProc((Element) item, indent + 1);
			} else if (item.getNodeType() == Node.TEXT_NODE) {
				System.out.println(">> [" + item.getNodeValue().trim() + "] " + item.getNodeName() + "/" + item.getNodeValue());
			}
		}
	}

	private String createIndent(int indent) {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < indent; i++) {
			sb.append("-");
		}
		return sb.toString();
	}

}
