#!/usr/pkg/bin/perl -w

=comment

  YamCha -- Yet Another Multipurpose CHunk Annotator
 
  $Id: mksvmdata.in,v 1.4 2004/03/26 13:33:03 taku-ku Exp $;

  Copyright (C) 2000-2004 Taku Kudo <taku-ku@is.aist-nara.ac.jp>
  This is free software with ABSOLUTELY NO WARRANTY.
  
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2.1 of the License, or (at your option) any later version.
  
  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.
  
  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
=cut

# $Id: mksvmdata.in,v 1.4 2004/03/26 13:33:03 taku-ku Exp $;
use Getopt::Std;
my %arg;
getopts("i:",%arg);

die "Usage: $0 -i output_prefix_name\n" if (! $arg{i} && ! @ARGV);

my %dicHash = ();
my %outHash = ();
my $paramFileName = ($arg{i} || $ARGV[0]) . ".param";
my $dataFileName  = ($arg{i} || $ARGV[0]) . ".data";
my $outFileName   = ($arg{i} || $ARGV[0]) . ".svmdata";

open(F, $paramFileName) || die "$! $paramFileName\n";
# skip headers

while(<F>) {
  last if (/^\s*$/);
}

# contents
while(<F>) {
    chomp;
    last if (/^\s*$/);
    my($id, $str) = split();
    $dicHash{$str} = $id;
}
close(F);

# parse 
open(F, $dataFileName)     || die "$! $dataFileName\n";
open(S, "> $outFileName")  || die "$! $outFileName\n";

while(<F>) {
  last if(/^\s*$/);
}

# contents
while(<F>) {
    chomp;
    next if(/^\s*$/);
    my(@t) = split(/\s+/,$_);
    my($class) = shift(@t);
    my(%tmp);
    for $key (@t) {
	if (defined $dicHash{$key}) {
	    $tmp{$dicHash{$key}} = 1;
	} else {
	    die "[$key] is not defined in $paramFileName\n";
	}
    }
    
    my(@tmp2);
    for (sort { $a <=> $b } keys %tmp) {
	push(@tmp2,"$_:$tmp{$_}");
    }
    print S "$class @tmp2\n";
}
close(F);
close(S);
