#!/usr/bin/perl -w

#========================================================================
# Author: Lizhi.Xu
# Email:  xulzh@hotmail.com
#========================================================================

use strict;
use warnings;

use FindBin qw($Bin $Script);
#use lib "$FindBin::Bin/lib";

use Getopt::Long;
my $VERSION = "0.1.0";
my %opts = ();

GetOptions(\%opts, 'output=s', 'help') or &usage(2, $!);
# check options
&usage(2, "Show the help message\n") if (exists $opts{help});
&usage(1, "Input was not accepted, use '-help' to get more message\n")  if (@ARGV < 1);

my (%hash, $locus, $length, $date);

open (STDOUT, ">$opts{output}") or die $! if (exists $opts{output});
print "#locus_tag\tseq_id\tstart\tstop\tstrand\tlength\ttype\tdescription\tclassify\tgene_name\tproduct\n";
foreach my $gb_file (@ARGV) {
	open (my $fh, $gb_file) or die "failed to open $gb_file\n";
	while (<$fh>) {
		chomp;
		next if (/^\s*$/);
		if (/LOCUS       (\S+)/) {
			$locus = $1;
		}
		if (/\s*(\S+)\s+(\S.+)$/) {
			my ($key, $value) = ($1, $2);
			$value .= &read_gbf($fh);
			if ($key =~ /FEATURES/) {
				my ($feature, $local, $start, $stop, $strand, $locus_tag, $protein_id, $name, $product, $note) = ('-', '-', '-', '-', '-', '-', '-', '-', '-', '-');
				while (<$fh>) {
					chomp;
					next if (/^\s*$/);
					if (/^ORIGIN/) {
						seek($fh, -7, 1);
						last;
					} elsif (/^     (\S+)\s+(\S.+)/) {
						if ($start ne '-') {
							my $len = $stop - $start + 1;
							print "$locus_tag\t$locus\t$start\t$stop\t$strand\t$len\t$feature\t$protein_id\t\t$name\t$product\n" if ($start =~ /\d+/ and $feature ne 'gene');
							($start, $stop, $strand, $locus_tag, $protein_id, $name, $product, $note) = ('-', '-', '-', '-', '-', '-', '-', '-');
						}
						#warn "$feature\t$local\n";
						($feature, $local) = ($1, $2);
						$local .= &read_feature($fh);
						if ($local =~ /complement\((\d.+)\)/) {
							$local = $1;
							$strand = '-';
						} else {
							$strand = '+';
						}

						($start, $stop) = split (/\.\./, $local);
						# process feature
					} elsif (/^                     \/(\S+)=(\S.+)$/) {
						my ($type, $info) = ($1, $2);
						$info .= &read_feature($fh);
						$info =~ s/^"//g;
						$info =~ s/"$//g;
						if ($type =~ /gene/ or $type =~ /type$/) {
							$name = $info;
						}
						if ($type =~ /product/) {
							$product = $info;
						}
						if ($type =~ /note/) {
							$note = $info;
						}
						if ($type =~ /locus_tag/) {
							$locus_tag = $info;
						}
						if ($type =~ /protein_id/) {
							$protein_id = $info;
						}
					}
				}
			} else {
				$hash{$locus}{$key} = $value;
			}
		}
		
	}
	close $fh;
}

sub read_gbf {
	my $fh = shift;
	my $pos = tell($fh);
	my $line = '';
	while (<$fh>) {
		chomp;
		if (/^            (\S.*)$/) {
			$line .= $1;
			$pos = tell($fh);
		} else {
			seek($fh, $pos, 0);
			last;
		}
	}
	return $line;
}

sub read_feature {
	my $fh = shift;
	my $pos = tell($fh);
	my $line = '';
	while (<$fh>) {
		chomp;
		next if (/^\s*$/);
		if (/^                     \// or /^     \S+/ or /^\S+/) {
			seek($fh, $pos, 0);
			last;
		} elsif (/^                     (\S.*)$/) {
			$line .= $1;
			$pos = tell($fh);
		} else {
			die "found undefault line '$_'\n";
		}
	}
	return $line;
}


sub usage {
    my $flag = shift;
    print qq(@_
    PROGRAM
        $Script  -  $VERSION
    USAGE
        perl $0 [options] <input.gbf> ...
    ARGUMENTS
        <input.gbf>    input genbank format file[s]
    OPTIONS
        -output <STR>    output file to instead of [STDOUT]
    DESCRIPTION
        This script using for genbank to feature table.
    \n);
    exit;
}
