/* xfce4-newsreader-plugin
 * A lightweight RSS/Atom news feed reader on xfce4-panel
 * 
 * Copyright 2005 mueki <>
 *
 * xmlparse.c
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include <stdio.h>
#include <unistd.h>
#include <glib.h>
#include <ne_dates.h>

#include "xmlparse.h"
#include "datetimeparse.h"

#define NEWSREADER_URI "http://sourceforge.jp/xfce4-newsreader-plugin/"
/* xml namespace */
#define NS_DC "http://purl.org/dc/elements/1.1/"
#define NS_DCTESRMS "http://purl.org/dc/terms/"
#define NS_ATOM "http://purl.org/atom/ns#"

/*
 *To compile this file using gcc you can type
 *gcc `xml2-config --cflags --libs` -o xmlparse.c 
 */

#ifdef DEBUG
#define DEBUG_PRINT(...) g_message(__VA_ARGS__)
#else
#define DEBUG_PRINT(...)
#endif

/**
 * print_element_names:
 * @a_node: the initial xml node to consider.
 *
 * Prints the names of the all the xml elements
 * that are siblings or children of a given xml node.
 */

static void
print_item(xmlDocPtr doc, xmlNode * node)
{
	xmlNode *cur = NULL;
  
	for(cur = node; cur != NULL ; cur = cur->next){
		if (cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "title") == 0)
			DEBUG_PRINT("title: %s\n", xmlNodeListGetString(doc, cur->children, 1));
		else if(xmlStrcmp(cur->name, "link") == 0)
			DEBUG_PRINT("link: %s\n", xmlNodeListGetString(doc, cur->children, 1));
	}
}

feed_t* create_new_feed()
{
	feed_t* pFeed = NULL;
	pFeed = g_new0(feed_t, 1);
	pFeed->tmModified = -1;

	return pFeed;
}

item_t* create_new_item()
{
	item_t* pItem = NULL;
	pItem = g_new0(item_t, 1);
	pItem->tmModified = -1;

	return pItem;
}

static void
free_item(item_t* pitem)
{ 
	if(pitem->title)
		g_string_free(pitem->title, TRUE);

	if(pitem->url)
		g_string_free(pitem->url, TRUE);
	free(pitem);
	return;
}

void
free_feed(feed_t* pfeed)
{ 
	GList* plist = NULL;
	for(plist = pfeed->items; plist; plist = plist->next)
		free_item(plist->data);

	g_list_free(pfeed->items);

	if(pfeed->title)
		g_string_free(pfeed->title, TRUE);

	if(pfeed->url)
		g_string_free(pfeed->url, TRUE);

	free(pfeed);
  
	return;
}

static gboolean
get_atom_entry(feed_t* pFeed, xmlDocPtr doc, xmlNode* node)
{
	xmlNode* cur = NULL;
	xmlChar* pchRel = NULL;
	xmlChar* pchHref = NULL;
	item_t* pItem = create_new_item();

	for(cur = node; cur != NULL; cur = cur->next){
		if(cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "title") == 0){
			pItem->title = g_string_new(xmlNodeListGetString(doc, cur->children, 1));
		}
		else if(xmlStrcmp(cur->name, "link") == 0){
			if((pchRel = xmlGetProp(cur, (const xmlChar*) "rel")) &&
			   (pchHref = xmlGetProp(cur, (const xmlChar*) "href")) &&
			   xmlStrcmp(pchRel, "alternate") == 0){
				pItem->url = g_string_new(pchHref);
			}
			xmlFree(pchRel);
			xmlFree(pchHref);
			pchRel = NULL;
			pchHref = NULL;
		}
		else if(xmlStrcmp(cur->name, "modified") == 0){
			pItem->tmModified = datetime_parse((const char*)xmlNodeListGetString(doc, cur->children, 1));
		}
	}

	if(!pItem->title || !pItem->url){
		g_warning("invalid item");
		free_item(pItem);
	}
	else{
		pFeed->items = g_list_append(pFeed->items, pItem);
	}

	return TRUE;
}

static feed_t*
get_atom_feed(xmlDocPtr doc, xmlNode* node)
{
	feed_t* pFeed = NULL;
	xmlNode* cur;
	xmlChar* pchRel = NULL;
	xmlChar* pchHref = NULL;

	pFeed = create_new_feed();

	for(cur = node->children; cur != NULL; cur = cur->next){
		if(cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "title") == 0){
			pFeed->title = g_string_new((xmlNodeListGetString(doc, cur->children, 1)));
		}
		else if(xmlStrcmp(cur->name, "link") == 0){
			if((pchRel = xmlGetProp(cur, (const xmlChar*) "rel")) &&
			   (pchHref = xmlGetProp(cur, (const xmlChar*) "href")) &&
			   xmlStrcmp(pchRel, "alternate") == 0){
				pFeed->url = g_string_new(pchHref);
			}
			xmlFree(pchRel);
			xmlFree(pchHref);
			pchRel = NULL;
			pchHref = NULL;
		}
		else if(xmlStrcmp(cur->name, "entry") == 0){
			if(FALSE == get_atom_entry(pFeed, doc, cur->children)){
				free_feed(pFeed);
				return NULL;
			}
		}
	}
	
	return pFeed;
}

static gboolean
get_channel(feed_t* pfeed, xmlDocPtr doc, xmlNode* node)
{  
	xmlNode *cur = NULL;

	pfeed->title = g_string_new("");
	pfeed->url = g_string_new("");

	for (cur = node; cur != NULL; cur = cur->next) {
		if (cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "title") == 0){
			g_string_free(pfeed->title, TRUE);
			pfeed->title = g_string_new(xmlNodeListGetString(doc, cur->children, 1));
		}
		else if(xmlStrcmp(cur->name, "link") == 0){
			g_string_free(pfeed->url, TRUE);
			pfeed->url = g_string_new(xmlNodeListGetString(doc, cur->children, 1));
		}
	}

	return TRUE;
  
}

static gboolean
get_rss10_item(feed_t* pfeed, xmlDocPtr doc, xmlNode* node)
{
	xmlNode *cur = NULL;
	item_t* pitem = create_new_item();

	for (cur = node; cur != NULL; cur = cur->next) {
		if (cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "title") == 0){
			pitem->title = g_string_new(xmlNodeListGetString(doc, cur->children, 1));
		}
		else if(xmlStrcmp(cur->name, "link") == 0){
			pitem->url = g_string_new(xmlNodeListGetString(doc, cur->children, 1));
		}
		else if(xmlStrcmp(cur->name, "date") == 0 &&
				cur->ns &&
				xmlStrcmp(cur->ns->href, NS_DC) == 0){
			pitem->tmModified = datetime_parse((const char*)xmlNodeListGetString(doc, cur->children, 1));
		}
	}

	if(!pitem->title || !pitem->url){
		g_warning("invalid item");
		free_item(pitem);
	}
	else{
		pfeed->items = g_list_append(pfeed->items, pitem);
	}
	
	return TRUE;
}

static gboolean
get_rss20_item(feed_t* pFeed, xmlDocPtr doc, xmlNode* node)
{
	xmlNode *cur = NULL;
	item_t* pItem = create_new_item();
	xmlChar* pchValue = NULL;
	time_t lcltime;

	for (cur = node; cur != NULL; cur = cur->next) {
		if (cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "title") == 0){
			pchValue = xmlNodeListGetString(doc, cur->children, 1);
			if(pchValue){
				pItem->title = g_string_new(pchValue);
				xmlFree(pchValue);
				pchValue = NULL;
			}			
		}
		else if(xmlStrcmp(cur->name, "link") == 0){
			pchValue = xmlNodeListGetString(doc, cur->children, 1);
			if(pchValue){
				pItem->url = g_string_new(pchValue);
				xmlFree(pchValue);
				pchValue = NULL;
			}
		}
		else if(xmlStrcmp(cur->name, "pubDate") == 0){
			/* FIX ME */
			/* RFC 822? */
			if((pchValue = xmlNodeListGetString(doc, cur->children,1))){
				lcltime = ne_httpdate_parse(pchValue);
				if(lcltime > (time_t)-1){
					/* convert  localtime to GMT */
					pItem->tmModified = mktime(gmtime(&lcltime));
				}
				xmlFree(pchValue);
				pchValue = NULL;
			}			
		}
		else if(xmlStrcmp(cur->name, "description") == 0){
			/* FIX ME */
		}
	}

	if(!pItem->title || !pItem->url){
		g_warning("invalid item");
		free_item(pItem);
	}
	else{
		pFeed->items = g_list_append(pFeed->items, pItem);
	}
	
	return TRUE;
}

static feed_t*
get_rss20_feed(xmlDocPtr doc, xmlNode* node)
{
	feed_t* pFeed;
	xmlNodePtr cur = NULL;
	xmlNodePtr chNode = NULL;

	pFeed = create_new_feed();

	for (cur = node->children; cur != NULL; cur = cur->next) {
		if (cur->type != XML_ELEMENT_NODE)
			continue;
		if(xmlStrcmp(cur->name, "channel") == 0){			
			if(FALSE == get_channel(pFeed, doc, cur->children)){
				free_feed(pFeed);
				return NULL;
			}
			for(chNode = cur->children; chNode; chNode = chNode->next){
				if (chNode->type != XML_ELEMENT_NODE)
					continue;
				if(xmlStrcmp(chNode->name, "item") == 0){
					if(FALSE == get_rss20_item(pFeed, doc, chNode->children)){
						free_feed(pFeed);
						return NULL;
					}
				}
			}
		}
	}
	return pFeed;
}


feed_t*
get_feed(xmlDocPtr doc, xmlNode * node)
{
	feed_t* pfeed = NULL;
	xmlNode *cur = NULL;

	if(xmlStrcmp(node->name, "RDF") == 0){
	
		pfeed = create_new_feed();
	
		for (cur = node->children; cur != NULL; cur = cur->next) {
			if (cur->type != XML_ELEMENT_NODE)
				continue;
			if(xmlStrcmp(cur->name, "channel") == 0){
				if(FALSE == get_channel(pfeed, doc, cur->children)){
					free_feed(pfeed);
					return NULL;
				}
			}else if(xmlStrcmp(cur->name, "item") == 0){
				if(FALSE == get_rss10_item(pfeed, doc, cur->children)){
					free_feed(pfeed);
					return NULL;
				}
			}
		}
	}
	else if( xmlStrcmp(node->name, "feed") == 0 &&
			 node->ns &&
			 xmlStrcmp(node->ns->href, NS_ATOM) == 0){
		DEBUG_PRINT("get atom feed");
		pfeed = get_atom_feed(doc, node);
	}
	else if( xmlStrcmp(node->name, "rss") == 0){
		DEBUG_PRINT("get rss2.0 feed");
		pfeed = get_rss20_feed(doc, node);
	}
	
	return pfeed;
}

void
print_feed(feed_t* pfeed)
{
	item_t* pitem;
  
	printf("print_feed\n");
  
	printf("title: %s\n", pfeed->title->str);
	printf("link: %s\n", pfeed->url->str);

	GList* plist;
	for(plist = pfeed->items; plist; plist = plist->next){
		pitem = (item_t*)plist->data;
		printf(" title: %s\n", pitem->title->str);
	}
}

static void
getMetadata(xmlNodePtr node, outline_t* poBookmark)
{
	xmlNodePtr cur;
	xmlChar* value;
	xmlChar* szProp;

	for(cur = node->children; cur; cur = cur->next){
		if(xmlStrcmp(cur->name, "metadata") != 0)
			continue;
		value = xmlGetProp(cur, (const xmlChar*) "owner");
		if(!value)
			continue;
		if(xmlStrcmp(value, NEWSREADER_URI) == 0){
			DEBUG_PRINT("metadata owner valid");
			szProp = xmlGetProp(cur, (const xmlChar*) "isRead");
			if(szProp){
				if(xmlStrcmp(szProp, "true") == 0){
					poBookmark->bAvailable = TRUE;
				}
				xmlFree(szProp);
			}

			szProp = xmlGetProp(cur, (const xmlChar*) "ValidHours");
			if(szProp){
				poBookmark->iValidHours = strtol(szProp, NULL, 0);
				xmlFree(szProp);
			}
			else{
				poBookmark->iValidHours = 0;
			}
			xmlFree(value);
		}
	}
}

static GList*
getBookmark(xmlNodePtr node, GList* poList)
{
	xmlNodePtr cur;
	xmlChar* value;
	outline_t* poBookmark;

	poBookmark = g_new0(outline_t, 1);
	poBookmark->bAvailable = FALSE;

	value = xmlGetProp(node, (const xmlChar*) "href");
	if(value){
		poBookmark->strURL = g_string_new(value);
		DEBUG_PRINT("bookmark url: %s", value);
		xmlFree(value);
	}
	else{
		poBookmark->strURL = g_string_new("");
	}

	for(cur = node->children; cur; cur = cur->next){
		if(xmlStrcmp(cur->name, "title") == 0){
			poBookmark->strTitle = g_string_new(xmlNodeGetContent(cur));
			DEBUG_PRINT("bookmark title: %s", poBookmark->strTitle->str);
		}
		else if(xmlStrcmp(cur->name, "info") == 0){
			DEBUG_PRINT("get info node");
			getMetadata(cur, poBookmark);
		}
	}
	poList = g_list_append(poList, poBookmark);

	return poList;
}

static GList*
expandFolder(xmlNodePtr node, GList* poList)
{
	xmlNodePtr cur;
	for(cur = node->children; cur; cur = cur->next){
		if(xmlStrcmp(cur->name, "bookmark") != 0)
			poList = getBookmark(cur, poList);
		else if(xmlStrcmp(cur->name, "folder") != 0)
			poList = expandFolder(cur, poList);
	}	

	return poList;
}

GList*
getFeedList_XBEL(xmlDocPtr doc, GList* poList)
{
	xmlNode *root = NULL;
	xmlNode *cur = NULL;

	if(!(root = xmlDocGetRootElement(doc))){
		g_warning("failed getting root element");
		return poList;
	}

	if (xmlStrcmp(root->name, (const xmlChar *)"xbel") != 0) {
		g_warning("Not XBEL");
		xmlUnlinkNode(root);
		xmlFreeNode(root);
		return poList;
	}

	for(cur = root->children; cur; cur = cur->next){
		if(xmlStrcmp(cur->name, "bookmark") == 0){
			DEBUG_PRINT("get bookmark");
			poList = getBookmark(cur, poList);
		}
		else if(xmlStrcmp(cur->name, "folder") == 0)
			poList = expandFolder(cur, poList);
	}	

	xmlUnlinkNode(root);
	xmlFreeNode(root);

	return poList;
}

gboolean
storeFeedList_XBEL(GList* poBookmarkList, gchar* filename)
{
	xmlDoc *doc = NULL;
	xmlNodePtr node,bookmark,info, metadata;
	GList* poList;
	int iListNo = 0;
	gchar value[5];

	doc = xmlNewDoc("1.0");
	xmlCreateIntSubset(doc, "xbel", "+//IDN python.org//DTD XML Bookmark Exchange Language 1.0//EN//XML", "http://www.python.org/topics/xml/dtds/xbel-1.0.dtd");
	node = xmlNewNode(NULL, "xbel");
	xmlSetProp(node, "version", "1.0");

	xmlDocSetRootElement(doc, node);

	for(poList = poBookmarkList; poList; poList = poList->next){
		outline_t* poOutline = (outline_t*)poList->data;
		if(!poOutline){
			continue;
		}

		bookmark = xmlNewChild(node, NULL, "bookmark", NULL);
		
		
		if(poOutline->strURL->str){
			xmlSetProp(bookmark, "href", poOutline->strURL->str);
		}
		else{
			xmlSetProp(bookmark, "href", "");
		}

		if(poOutline->strTitle->str){
			xmlNewChild(bookmark, NULL, "title", poOutline->strTitle->str);
		}

		info = xmlNewChild(bookmark, NULL, "info", NULL);
		metadata = xmlNewChild(info, NULL, "metadata", NULL);
		xmlSetProp(metadata, "owner", NEWSREADER_URI);

		if(poOutline->bAvailable){
			xmlSetProp(metadata, "isRead", "true");
		}

		g_snprintf (value, 4, "%d", poOutline->iValidHours);
		xmlSetProp(metadata, "ValidHours", value);
		
		iListNo++;
	}
	
	unlink(filename);

	if(iListNo)
		xmlSaveFormatFileEnc(filename, doc, "utf-8", 1);

	xmlUnlinkNode(node);
	xmlFreeNode(node);
	xmlFreeDoc(doc);

	return TRUE;

}

gboolean
storeOutlineList(GList* poOutlineList, gchar* filename)
{
	xmlDoc *doc = NULL;
	xmlNodePtr node,body,outline;
	GList* poList;
	int iListNo = 0;
	gchar value[5];

	doc = xmlNewDoc("1.0");
	node = xmlNewNode(NULL, "opml");
	xmlSetProp(node, "version", "1.0");

	xmlDocSetRootElement(doc, node);

	xmlNewChild(node, NULL, "head", NULL);
	body = xmlNewChild(node, NULL, "body", NULL);
	for(poList = poOutlineList; poList; poList = poList->next){
		outline_t* poOutline = (outline_t*)poList->data;
		if(!poOutline){
			continue;
		}

		outline = xmlNewChild(body, NULL, "outline", NULL);
		
		if(poOutline->strTitle->str){
			xmlSetProp(outline, "text", poOutline->strTitle->str);
		}
		else{
			xmlSetProp(outline, "text", "");
		}
		
		if(poOutline->strURL->str){
			xmlSetProp(outline, "xmlURL", poOutline->strURL->str);
		}
		else{
			xmlSetProp(outline, "xmlURL", "");
		}
		
		if(poOutline->bAvailable){
			xmlSetProp(outline, "isRead", "true");
		}

		if(poOutline->iValidHours > 0){
			g_snprintf (value, 4, "%d", poOutline->iValidHours);
			xmlSetProp(outline, "ValidHours", value);
		}
		
		iListNo++;
	}
	
	unlink(filename);

	if(iListNo)
		xmlSaveFormatFileEnc(filename, doc, "utf-8", 1);

	xmlUnlinkNode(node);
	xmlFreeNode(node);
	xmlFreeDoc(doc);

	return TRUE;

}

GList*
getOutlineList(xmlDocPtr doc, GList* poList)
{
	xmlNode *root = NULL;
	xmlNode *cur = NULL;
	xmlNode *node = NULL;
	xmlChar* value;

	if(!(root = xmlDocGetRootElement(doc))){
		g_warning("failed getting root element");
		return poList;
	}

	if (xmlStrcmp(root->name, (const xmlChar *)"opml")) {
		g_warning("Not OPML");
		xmlUnlinkNode(root);
		xmlFreeNode(root);
		return poList;
    }

	outline_t* poOutline;

	for(cur = root->children; cur; cur = cur->next){
		if(xmlStrcmp(cur->name, "body") != 0)
			continue;
		for(node = cur->children; node; node = node->next){
			if(xmlStrcmp(node->name, "outline") != 0)
				continue;
			poOutline = g_new0(outline_t, 1);
			poOutline->bAvailable = FALSE;
			value = xmlGetProp(node, (const xmlChar*) "text");
			if(value){
				poOutline->strTitle = g_string_new(value);
			}
			else{
				poOutline->strTitle = g_string_new("");
			}
			xmlFree(value);

			value = xmlGetProp(node, (const xmlChar*) "xmlURL");
			if(value){
				poOutline->strURL = g_string_new(value);
			}
			else{
				poOutline->strURL = g_string_new("");
			}
			xmlFree(value);

			value = xmlGetProp(node, (const xmlChar*) "isRead");
			if(xmlStrEqual(value, (const xmlChar*)"true")){
				poOutline->bAvailable = TRUE;
			}
			xmlFree(value);

			value = xmlGetProp(node, (const xmlChar*) "ValidHours");
			if(value){
				poOutline->iValidHours = strtol(value, NULL, 0);
			}
			else{
				poOutline->iValidHours = 0;
			}
			
			xmlFree(value);

			poList = g_list_append(poList, poOutline);
		}
	}
	
	xmlUnlinkNode(root);
	xmlFreeNode(root);
	return poList;
}

#ifdef _XMLPARSER_MAIN_
/**
 * Simple example to parse a file called "file.xml", 
 * walk down the DOM, and print the name of the 
 * xml elements nodes.
 */
int
main(int argc, char **argv)
{
    xmlDoc *doc = NULL;
    xmlNode *root_element = NULL;
	feed_t* pfeed;
	
    if (argc != 2)
        return(1);

    /*
     * this initialize the library and check potential ABI mismatches
     * between the version it was compiled for and the actual shared
     * library used.
     */
    LIBXML_TEST_VERSION

    /*parse the file and get the DOM */
    doc = xmlReadFile(argv[1], NULL, 0);

    if (doc == NULL) {
        printf("error: could not parse file %s\n", argv[1]);
    }

    /*Get the root element node */
    root_element = xmlDocGetRootElement(doc);

	/*print_element_names(doc, root_element);*/
	if((pfeed = get_feed(doc, root_element))){
	  print_feed(pfeed);
	}

    /*free the document */
    xmlFreeDoc(doc);

    /*
     *Free the global variables that may
     *have been allocated by the parser.
     */
    xmlCleanupParser();

    return 0;
}
#endif

static xmlNodePtr
get_node(xmlNodePtr node, const xmlChar* tag)
{
	xmlNodePtr cur;

	for(cur = node; cur; cur = cur->next){
		if(xmlStrcmp(cur->name, tag)==0){
			return cur;
		}
	}
	return NULL;
}

gchar*
get_favicon_path(xmlDocPtr doc)
{
	htmlNodePtr node;
	xmlChar* value;
	xmlChar* href = NULL;
	gchar* retStr;

	if(!doc){
		g_warning("doc is null");
		return NULL;
	}

	if(!(node = doc->children)){
		g_warning("node is null\n");
		return NULL;
	}
	
	if(!(node = get_node(node, "html"))){
		g_warning("not found html tag\n");
		return NULL;
	}

	if(!(node = get_node(node->children, "head"))){
		g_warning("not found head tag\n");
		return NULL;
	}

	for(node = node->children; node; node = node->next){
		if(xmlStrcmp(node->name, "link") == 0){
			if(!(value = xmlGetProp(node, "rel")))
				continue;
			if(xmlStrcasecmp(value, "shortcut icon") == 0){
				href = xmlGetProp(node, (const xmlChar*) "href");
			}
			xmlFree(value);
			value = NULL;
		}
	}

	if(href){
		retStr = g_strdup(href);
	}	
	else{
		return NULL;
	}
	
	xmlFree(href);

	return retStr;

}

#ifdef _HTML_MAIN_
int
main(int argc, char* argv[])
{
	xmlDocPtr doc;
	gchar* uri;

	if(!(doc = htmlReadFile("index.html", NULL, HTML_PARSE_NOERROR))){
		printf("parse error\n");
		return 1;
	}
	
	g_message("readfile");

	uri = get_favicon_path(doc);

	g_message("get favicon");

	if(uri)
		g_message(uri);

	xmlFreeDoc(doc);
	g_free(uri);

	return 0;
}

#endif
