<?php
// $Id: rssc_refresh_handler.php,v 1.1.1.1 2006/01/03 05:06:03 ohwada Exp $

// 2006-01-20 K.OHWADA
// small change

//=========================================================
// Rss Center Module
// 2006-01-01 K.OHWADA
//=========================================================

// === class begin ===
if( !class_exists('rssc_refresh_handler') ) 
{

//=========================================================
// class rssc_refresh_handler
//=========================================================
class rssc_refresh_handler extends rssc_error
{
// handler
	var $_link_handler;
	var $_feed_handler;
	var $_black_handler;
	var $_white_handler;
	var $_config_data_handler;

// class instance
	var $_xml_utility;
	var $_strings;

// set parameter
	var $_encoding_local = _CHARSET;
	var $_xml_mode  = 0;

// result
	var $_parsed_data = array();
	var $_html_data;
	var $_rdf_url;
	var $_rss_url;
	var $_atom_url;
	var $_xml_data;
	var $_xml_encoding;

// black & white table
	var $_black_objs;
	var $_white_objs;

// config table
	var $_conf_data;

// basic config
	var $_sel_rss_atom = RSSC_C_SEL_ATOM;

// debug
	var $_flag_link_update    = 1;	// update
	var $_flag_feed_update    = 1;	// update
	var $_flag_force_discover = 0;
	var $_flag_force_refresh  = 0;

	var $_flag_debug_parse = 0;
	var $_debug_xml_url    = '';
	var $_debug_encoding   = '';
	var $_debug_xml_mode   = '';


//---------------------------------------------------------
// constructor
//---------------------------------------------------------
function rssc_refresh_handler( $dirname )
{
	$this->rssc_error();

// handler
	$this->_link_handler   =& rssc_get_handler('link',  $dirname);
	$this->_feed_handler   =& rssc_get_handler('feed',  $dirname);
	$this->_black_handler  =& rssc_get_handler('black', $dirname);
	$this->_white_handler  =& rssc_get_handler('white', $dirname);
	$this->_config_data_handler  =& rssc_get_handler('config_data', $dirname);

// class instance
	$this->_xml_utility  =& rssc_xml_utility::getInstance();
	$this->_strings      =& rssc_strings::getInstance();

	$this->_init_param();
}

//=========================================================
// public
//=========================================================
//---------------------------------------------------------
// refresh headline links
//---------------------------------------------------------
function refreshHeadline($limit=0, $start=0)
{
	$this->_set_log_func_name('refreshHeadline');

	$flag_error = false;
	$error_arr  = array();

	$lids = $this->_link_handler->get_headline_lids($limit, $start);

// refresh
	foreach ($lids as $lid) 
	{
		if ( !$this->refresh($lid) )
		{
			$flag_error = true;
			array_push( $error_arr, "lid: $lid" );
			$error_arr = array_merge( $error_arr, $this->_errors );
		}
	}

	if ($flag_error)
	{
		$this->_errors = $error_arr;
		return false;
	}

	return true;
}

//---------------------------------------------------------
// refresh one link
//---------------------------------------------------------
function refresh($lid)
{
	$this->_set_log_func_name('refresh');

	if ( !$link = $this->_link_handler->get($lid) )
	{
		return false;
	}

	$this->_clear_count();

	if ( $this->_flag_force_refresh || $link->refreshExpired() )
	{
		if ( !$this->refreshXmlUrl($lid) )
		{
			return false;
		}

		if ( !$this->refreshArchive($lid) )
		{
			return false;
		}
	}

	return true;
}

function refreshXmlUrl($lid)
{
	$this->_set_log_func_name('refreshXmlUrl');

	$link = $this->_link_handler->get($lid);
	$link_mode = $link->getVar('mode');
	$link_url  = $link->getVar('url');

// RSS auto discovary
	if ( $this->_flag_force_discover || ( $link_mode == RSSC_C_MODE_AUTO ) )
	{
		if ( !$this->discoverXmlUrl($link_url, $this->_sel_rss_atom) )
		{
			$this->_set_errors( 'rssc: cannot discover xmlurl' );
			return false;
		}

		if ( $this->_xml_mode )
		{
			if ( !$this->updateXmlUrl($lid, $this->_xml_mode, $this->_rdf_url, $this->_rss_url, $this->_atom_url) )
			{
				$this->_set_errors( 'rssc: cannot update xmlurl' );
				return false;
			}
		}
	}

	return true;
}

function refreshArchive($lid)
{
	$this->_set_log_func_name('refreshArchive');

// get new value, if auto discovary
	$link = $this->_link_handler->get($lid);
	$link_mode     = $link->getVar('mode');
	$link_encoding = $link->getVar('encoding');
	$xml_url       = $link->get_rssurl_by_mode();

	if ( $this->_flag_debug_parse )
	{
		$link_mode     = $this->_debug_xml_mode;
		$link_encoding = $this->_debug_encoding;
		$xml_url       = $this->_debug_xml_url;
	}

// check mode to exist rss url
	if ( ($link_mode != RSSC_C_MODE_RDF) && ($link_mode != RSSC_C_MODE_RSS) && ($link_mode != RSSC_C_MODE_ATOM) )
	{
		return true;	// not execute
	}

// get and parse XML
	if ( !$this->parseXmlByUrl($xml_url, $link_encoding, $link_mode) )
	{
		return false;
	}

	if ( empty($link_encoding) )
	{
		if ( !$this->updateLinkEncoding($lid, $this->_xml_encoding) )
		{
			$this->_set_errors( 'rssc: cannot update link encoding' );
			return false;
		}
	}

// update archive
	if ( $this->_flag_link_update )
	{
		if ( !$this->updateLink($lid, $this->_parsed_data, $this->_xml_data, time() ) )
		{
			$this->_set_errors( 'rssc: cannot update link' );
			return false;
		}
	}

	if ( $this->_flag_feed_update )
	{
		if ( !$this->updateFeeds($lid, $this->_parsed_data, $this->_xml_data, $this->_xml_encoding, time() ) )
		{
			$this->_set_errors( 'rssc: cannot update feeds' );
			return false;
		}
	}

	return true;
}

function discoverXmlUrl($html_url, $sel_rss_atom='')
{
	return $this->_discover_rssurl($html_url, $sel_rss_atom);
}

function updateXmlUrl($lid, $rss_mode, $rdf_url, $rss_url, $atom_url)
{
	return $this->_update_link_xmlurl($lid, $rss_mode, $rdf_url, $rss_url, $atom_url);
}

function parseXmlByUrl($xml_url, $xml_encoding='', $xml_mode=0)
{
	return $this->_parse_xml_by_url($xml_url, $xml_encoding, $xml_mode);
}

function updateLinkEncoding($lid, $xml_encoding)
{
	return $this->_update_link_encoding($lid, $xml_encoding);
}

function updateLink($lid, $formated_data, $xml_data, $updated='')
{
	return $this->_update_link($lid, $formated_data, $xml_data, $updated);
}

function updateFeeds($lid, $formated_data, $xml_data, $xml_encoding, $updated='')
{
	return $this->_update_feeds($lid, $formated_data, $xml_data, $xml_encoding, $updated);
}

//---------------------------------------------------------
// get result
//---------------------------------------------------------
function &getData()
{
	return $this->_parsed_data;
}

function get_count_feed()
{
	return $this->_feed_handler->get_count_refresh_feed();
}

//---------------------------------------------------------
// set and get property
// for xml_utility
//---------------------------------------------------------
function setPriorityRssAtom($value)
{
	$this->_xml_utility->set_priority($value);
}

function setRssParser($value)
{
	$this->_xml_utility->set_rss_parser($value);
}

function setAtomParser($value)
{
	$this->_xml_utility->set_atom_parser($value);
}

//---------------------------------------------------------
// set debug parameter
//---------------------------------------------------------
function set_link_update($value)
{
	$this->_flag_link_update = intval($value);
}

function set_link_xml_save($value)
{
	$this->_link_handler->set_xml_save($value);
}

function set_feed_update($value)
{
	$this->_flag_feed_update = intval($value);
}

function set_force_discover($value)
{
	$this->_flag_force_discover = intval($value);
}

function set_force_refresh($value)
{
	$this->_flag_force_refresh = intval($value);
}

function set_force_overwrite($value)
{
	$this->_feed_handler->set_force_overwrite($value);
}

function set_debug_parse($flag, $url='', $encoding='', $mode='')
{
	$this->_flag_debug_parse = intval($flag);
	$this->_debug_xml_url    = $url;
	$this->_debug_encoding   = $encoding;
	$this->_debug_xml_mode   = $mode;
}

//=========================================================
// override
//=========================================================
function set_debug_print_log($value)
{
	$value = intval($value);
	$this->_flag_debug_print_log = $value;
	$this->_link_handler->set_debug_print_log($value);
	$this->_feed_handler->set_debug_print_log($value);
	$this->_xml_utility->set_debug_print_log($value);
}

function set_debug_print_error($value)
{
	$value = intval($value);
	$this->_flag_debug_print_error = $value;
	$this->_link_handler->set_debug_print_error($value);
	$this->_feed_handler->set_debug_print_error($value);
	$this->_xml_utility->set_debug_print_error($value);
}

//=========================================================
// private
//=========================================================
//---------------------------------------------------------
// initial
//---------------------------------------------------------
function _clear_count()
{
	$this->_set_log_func_name('_clear_count');

	$this->_feed_handler->clear_count();
}

//---------------------------------------------------------
// config
//---------------------------------------------------------
function _init_param()
{
	$conf_data =& $this->_config_data_handler->get_config_data();

	$this->set_link_xml_save(  $conf_data['basic_xml_save'] );

// xml utility
	$this->setPriorityRssAtom( $conf_data['basic_rss_atom'] );
	$this->setRssParser(  $conf_data['basic_parser_rss'] );
//	$this->setAtomParser( $conf_data['basic_parser_atom'] );

}

//---------------------------------------------------------
// discover RSS URL
//---------------------------------------------------------
function _discover_rssurl($html_url, $sel='')
{
	$this->_set_log_func_name('_discover_rssurl');

	if ( !$this->_xml_utility->discover($html_url, $sel) )
	{
		$this->_set_errors( "rssc: cannot discover xml link" );
		return false;
	}

	$this->_xml_mode = $this->_xml_utility->get_xml_mode();
	$this->_rdf_url  = $this->_xml_utility->get_rdf_url();
	$this->_rss_url  = $this->_xml_utility->get_rss_url();
	$this->_atom_url = $this->_xml_utility->get_atom_url();

	return true;
}

//---------------------------------------------------------
// update XmlUrl
//---------------------------------------------------------
function _update_link_xmlurl($lid, $rss_mode, $rdf_url, $rss_url, $atom_url)
{
	return $this->_link_handler->update_xml_url($lid, $rss_mode, $rdf_url, $rss_url, $atom_url);
}

//---------------------------------------------------------
// parse XML
//---------------------------------------------------------
function _parse_xml_by_url($xml_url, $xml_encoding='', $xml_mode=0)
{
	$this->_set_log_func_name('_parse_xml_by_url');

	if ( !$this->_xml_utility->parse_by_url($xml_url, $xml_encoding, $xml_mode, $this->_encoding_local) )
	{
		$code = $this->_xml_utility->getErrorCode();
		if ($code)
		{
			$this->_set_error_code($code);
		}

		$this->_set_errors( 'rssc: cannot parse xml' );
		$this->_set_errors( $this->_xml_utility->getErrors() );
		return false;
	}

	$this->_xml_data    = $this->_xml_utility->get_xml();
	$this->_parsed_data = $this->_xml_utility->get_data();

	if ( $xml_encoding )
	{
		$this->_xml_encoding = $xml_encoding;
	}
	else
	{
		$this->_xml_encoding = $this->_xml_utility->get_xml_encoding();
	}

	return true;
}

//---------------------------------------------------------
// update link encoding
//---------------------------------------------------------
function _update_link_encoding($lid, $encoding)
{
	return $this->_link_handler->update_encoding($lid, $encoding);
}

//---------------------------------------------------------
// update archive
//---------------------------------------------------------
function _update_link($lid, $formated_data, $xml_data, $updated='' )
{
	$channel = array();

	if ( isset($formated_data['channel']) )
	{
		$channel = $formated_data['channel'];
	}

	if ( !$this->_link_handler->update_xml($lid, $channel, $xml_data, $updated) )
	{
		$this->_set_errors( $this->_link_handler->getErrors() );
		return false;
	}

	return true;
}

function _update_feeds($lid, $formated_data, $xml_data, $xml_encoding, $updated='' )
{
	if ( !$link = $this->_link_handler->get($lid) )
	{
		return false;
	}

	$uid  = $link->getVar('uid');
	$mid  = $link->getVar('mid');
	$p1   = $link->getVar('p1');
	$p2   = $link->getVar('p2');
	$p3   = $link->getVar('p3');

	$this->_black_objs = $this->_black_handler->getObjects();
	$this->_white_objs = $this->_white_handler->getObjects();

	$flag_err = false;

// refresh ATOM feed
	foreach( $formated_data['items'] as $item )
	{
		if ( !$this->_update_feed($lid, $uid, $mid, $p1, $p2, $p3, $item) )
		{
			$flag_err = true;
		}
	}

	if ( $flag_err )
	{
		return false;
	}

	return true;
}

function _update_feed($lid, $uid, $mid, $p1, $p2, $p3, $item)
{
	if ( $this->_check_black( $item['link'] ) )
	{
		return true;	// no action
	}

	if ( !$this->_feed_handler->refresh($lid, $uid, $mid, $p1, $p2, $p3, $item) )
	{
		$this->_set_errors( 'rssc: cannot update feed table' );
		$this->_set_errors( $this->_feed_handler->getErrors() );
		return false;
	}

	return true;
}

function _check_black($url)
{
	if ( count($this->_black_objs) == 0 )
	{	return false;	}

	foreach ($this->_black_objs as $black)
	{
		$black_url = $black->getVar('url');
		$black_url = str_replace("|", '\|', $black_url);

// match black list
		if ( preg_match("|$black_url|i", $url) )
		{
			if ( !$this->_check_white($url) )
			{
				$this->_set_log( "match black list: $url" );
				return true;
			}
		}
	}

	return false;
}

function _check_white($url)
{
	if ( count($this->_white_objs) == 0 )
	{	return false;	}

	foreach ($this->_white_objs as $white)
	{
		$white_url = $white->getVar('url');
		$white_url = str_replace("|", '\|', $white);

// match white list
		if ( preg_match("|$white_url|i", $url) )
		{
			$this->_set_log( "match white list: $url" );
			return true;
		}
	}

	return false;
}

// --- class end ---
}

// === class end ===
}

?>