#!/usr/local/bin/perl

# Configuration
my $train = .66;
my $i_dir = "images";
my $o_dir = "vectors";

# Global variables
my @datasets;		# set->fake(0)/real(1)->image(#)->image filename

# Redirect errors
open(STDERR, "/dev/null");

# Remove junk
`rm -f ${o_dir}/train*; rm -f ${o_dir}/test*`;

# Obtain gif filenames & determine data sets / highest numbers
my @vars;
my $vx_files = `ls ${i_dir}/*.vx`;
my @gif_files = `ls ${i_dir}/*.gif`;
for ($i = 0; $i <= $#gif_files; $i++) {
  # remove directory
  chomp $gif_files[$i];
  $gif_files[$i] = (split(/\//, $gif_files[$i]))[1];
  
  # check if valid filename
  if ($gif_files[$i] !~ /(f|r)[a-z]\d{3}\.gif/) {
    print "ERROR: weird filename $gif_files[$i]\n";
    exit 1;
  }

  # extract dataset
  my $set = ord((split(//, $gif_files[$i]))[1]) - ord(a);
  if (!$datasets[$set]) {
    $datasets[$set] = ();
    ${$datasets[$set]}[0] = ();
    ${$datasets[$set]}[0] = ();
    open(TRAIN, ">${o_dir}/train${set}");
    close(TRAIN);
    open(TEST, ">${o_dir}/test${set}");
    close(TEST);
  }

  # extract file number
  my $number = join (//, (split(//, $gif_files[$i]))[2..4]);
  my $actual = ((split(//, $gif_files[$i]))[0] eq "r") ? 1 : 0;
  ${${$datasets[$set]}[$actual]}[$number] = $gif_files[$i];

  # preprocess
  if ($vx_files !~ /$gif_files[$i]/) {
    my $tmp = "${i_dir}/$gif_files[$i]";
    `source init.setup; vformat if=${tmp} of=$tmp.vx -gif; vfix -byte if=${tmp}.vx | vmedian | v3thin | v3prune n=20 | vpix hi=1 lo=0 -scale | ./bb of=${tmp}.op.vx; rm -f $tmp.vx; mv $tmp.op.vx $tmp.vx`;
  }
}

for ($set = 0; $set <= $#datasets; $set++) {
  for ($actual = 0; $actual < 2; $actual++) {
    my $max = $#{${$datasets[$set]}[$actual]};
    my $amt = int($max * $train);

    for ($j = 1; $j < $amt; $j++) {
       my $file = ${${$datasets[$set]}[$actual]}[$j];
       open(TRAIN, ">>${o_dir}/train${set}");
       print TRAIN `./feature if=${i_dir}/${file}.vx of=/dev/stdout t=${actual}`;
       close(TRAIN);
    }
    for ($j = $amt; $j <= $max; $j++) {
       my $file = ${${$datasets[$set]}[$actual]}[$j];
       open(TEST, ">>${o_dir}/test${set}");
       print TEST `./feature if=${i_dir}/${file}.vx of=/dev/stdout t=${actual}`;
       close(TEST);
    }
  }
}
