#!/bin/perl

# $Id: merge,v 1.1 2007/06/11 14:11:13 jeremy_carroll Exp $
# Take a template file and a file with content and merge them.
# Content is defined as the stuff between certain <div id=""> markers
# NB The id=, not class=

$template = "template.html" ;
$target = undef ;
$prog = $0 ;
$extract = 0 ;

$usage = "$prog [--extract] [--template FILE] FILE" ;


if ( $#ARGV == -1 )
{
    print STDERR $usage, "\n" ;
    exit 0 ;
}

if ( $#ARGV == 0 && ( ( $ARGV[0] eq '-h' ) || ( $ARGV[0] eq '--help' ) ) )
{
    print STDERR $usage, "\n" ;
    exit 0 ;
}
		      
if ( $#ARGV >= 0 && ( $ARGV[0] eq '--extract' ) )
{
    $extract = 1 ;
    shift @ARGV ;
}

if ( $#ARGV == 0 )
{
    $target = $ARGV[0] ;
}
elsif ( ( $#ARGV == 2 ) && ( $ARGV[0] eq '--template' ) )
{
    $template = $ARGV[1] ;
    $target = $ARGV[2] ;
}
else
{
    print STDERR $usage, "\n" ;
    exit 1 ;
}

# Find directory of target (for relative links)

if ( $target =~ m!/$! )
{
    print STDERR "'$target': Filename end in a /\n" ;
    exit 2 ;
}


$dir = undef ;
if ( $target =~ m!(.*)/! )
{
    $dir = $1 ;
    @path = split('/', $dir) ;
}

## Will be used so we can process file not in the current directory
## $unpath='' ;
## for $p (@path)
## {
##     $unpath="$unpath".'../' ;
## }
## print "Unpath: '$unpath'\n" ;
## print "Dir = '$dir'\n" ;
## print $#path , "\n" ;
## exit ;

# Read whole files.
undef $/ ;
open(TARGET, "<$target") || die ;
binmode TARGET ;
$_ = <TARGET> ;
close(TARGET) ;

# Remove comment
## s/\<\!--.*?--\>//g ; 

## ---- Extract the content

@sections =  ("header", "footer", "trail", "content") ;
%sections = () ;

for $s (@sections)
{
    $t = &doSectionNamed($_,$s) ;
    $sections{$s} = $t if ( $t ne '' ) ;
}


# And the HTML title.
$title='' ;
if ( m!<title>(.*?)</title>! )
{
    $title = $& ;
}

# And meta data
@meta = m!\s*<meta[^>]*>\s*!sg  ;

if ( $extract )
{
    for $m (@meta)
    {
	print "Meta: $m\n"
	}
    
    # Debug:
    for $s (@sections)
    {
	print "-----------------------------------------------------------\n" ;
	print "Section:: $s\n" ;
	print $sections{$s} ;
	print "\n" if ( $sections{$s} !~ /\n$/s ) ;
	print "-----------------------------------------------------------\n" ;
    }
    print "Title:\n$title\n" ;
    print "\n" ;
    print "Meta data:\n" ;
    print @meta,"\n" ;
    exit ;
}


## ---- Substitute

open(TEMPLATE,"<$template") || die ;
binmode TEMPLATE ;
$_ = <TEMPLATE> ;
close(TEMPLATE) ;

# Now substitute into template

for $s (@sections)
{
    $t = $sections{$s} ;
    if ( $t eq '' )
    {
	# print STDERR "No section '$s' - skipped\n" ;
	next ;
    }
    s!<div\s*id="${s}"\s*\>.*?</div>!$t!s ;
}

# Title
if ( $title ne '' )
{
    s!<title>(.*?)</title>!$title! ;
}

#Meta data
#Replace the first one with a marker.

s/\s*<meta[^>]*>\s*/META/ ;

# Remove all others
s/\s*<meta[^>]*>//g ;

# Insert the new stuff
$meta = join("",@meta) ;
$meta =~ s/ *$// ;
s/META/$meta/ ; 

# ---- Write out

binmode STDOUT ;
# Fix up ... UNIX newlines
s/\r//g ;
print ;


# -------------------------------------------

sub doSectionNamed
{
    my $all = $_[0] ;
    my $sectName = $_[1] ;
    my $sect = '' ;

    # Break in sections
    # Need to count nesting.

    # Find start (then count nestings)

    if ( $all =~ m!<div\s*id="${s}"\s*>! )
    {
	$sect = $& ;
	# English is discouraged (see man perlre!)
	# $rest = $POSTMATCH ;
	$rest = $' ;
	$sect = $sect.&doSection($rest) ;
    }
    return $sect ;
}

sub doSection
{
    my $all = $_[0] ;
    my $divText = '' ;

    $i = 1 ;
    while($i > 0 )
    {
	# Bit crude : happens to work even if the <div has a string with > in!
	if ( ! ( $all =~ m!<div[^>]*>|</div>! ) )
	{
	    # Early end.  Didn't find closing </div>
	    $divText = $divText.$all ;
	    last ;
	}
	
	$divText = $divText.$`.$& ; # $divText.$PREMATCH.$MATCH
	$r = $' ; # POSTMATCH
	$m = $& ; # MATCH
	
	# Count up or down (could recurse here for finding nested text)
	$i++ if ( $m =~ m!^<div! ) ;
	$i-- if ( $m =~ m!^</div! ) ;
	# Process chars
	$all = $r ;
    }
    return $divText ;
}
