#!/usr/bin/perl
# Description: This script runs the NetOGlyc 3.1d.ws0 Web Service. It reads a FASTA file from STDIN and produces predictions in a simple table.
# Author: Edita Bartaseviciute
# Email: edita@cbs.dtu.dk
# Version: 3.1 ws0
# Date: 2009-07-14
# usage: perl netoglyc.pl [-sp] < example.fsa


use strict;
# include standard XML::Compile helper functions (used to initiate WSDL proxys)
require "xml-compile.pl"; # downloadable from the same site as this script

#taking option from a command line
my $signalp = $ARGV[0];

# create proxy to NetOGlyc Web Service
my $netoglyc = WSDL2proxy ( 'http://www.cbs.dtu.dk/ws/NetOGlyc/NetOGlyc_3_1d_ws0.wsdl' );

# append schema definitions
$netoglyc = appendSchemas ( $netoglyc , 
	"http://www.cbs.dtu.dk/ws/common/ws_common_1_0b.xsd" ,
	"http://www.cbs.dtu.dk/ws/NetOGlyc/ws_netoglyc_3_1_ws0.xsd"
);
# create hash of operations from proxy
my %ops = addOperations ( $netoglyc ) ;

# Get sequence in fasta format from STDIN
my @fasta;
my $entry = -1;

while (<STDIN>) {
	if (/^>(.*)/) {
		my ($id , $comment) = split (" ",$1);
		$entry++;
		$fasta[$entry]->{id} = $id;
		$fasta[$entry]->{comment} = $comment if defined $comment;
	} elsif (/^([A-Za-z]+)/) {
		$fasta[$entry]->{seq} .= $1;
	}
}
# Create sequence for request
my @sequence;
for ( my $i = 0 ; $i < scalar ( @fasta ) ; $i ++ ) {
	push @sequence , { id => $fasta[$i]->{id} , comment => $fasta[$i]->{comment} , seq => $fasta[$i]->{seq} };
}

# Do the request
my $response;
if ($signalp) {	
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   signalp => 'required',
	   sequencedata => {sequence => [@sequence]} } });
}
else {
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   sequencedata => {sequence => [@sequence]} } });
}

# uncomment the two following lines to inspect the structure of $response
#use Data::Dumper;
#print Dumper($response);   

#get job id which can be used to get the results later
my $jobid;

if ( ! defined ( $response->{parameters}->{queueentry}) ) {
	die "error obtaining jobid\n";
} else {
	$jobid = $response->{parameters}->{queueentry}->{jobid};
	print STDERR "# waiting for job $jobid";
	my $status = "UNKNOWN";;
	# poll the queue
	while ( $status =~ /ACTIVE|RUNNING|QUEUED|WAITING|PENDING|UNKNOWN/ ) {
		my $response = $ops{pollQueue}->( job => { job  => { jobid => $jobid }   }) ;
		$status = $response->{queueentry}->{queueentry}->{status};
		print STDERR  ".";
	}
	die "\nunexpected job status '$status'\n" unless $status eq "FINISHED";
	print STDERR "\n# job has finished\n";
}
# when the job is done, fetch the result
$response = $ops{fetchResult}->(job => { jobid => $jobid });

# uncomment the two following lines to inspect the structure of $response
#use Data::Dumper;
#print Dumper($response); 

#printing the results (suitable for one sequence)
foreach my $ann (@{$response->{parameters}->{anndata}->{ann}}) {
	my $sequence = $sequence[0]->{seq};
	my $length = length ($sequence);

	print "Name:  $ann->{sequence}->{id}\t\tLength:  $length\n\n";
	print "Name\t\t\tS/T   Pos  G-score I-score Y/N  Comment\n";
	print "------------------------------------------------------------------------\n";
   	
	foreach my $annrecord (@{$ann->{annrecords}->{annrecord}}) {
		my $G_score = sprintf ("%.3f", $annrecord->{score}[0]->{value});
		my $I_score = sprintf ("%.3f", $annrecord->{score}[1]->{value});
		my $pos = sprintf ("%4s", $annrecord->{pos});
		my $S_T = substr ($sequence, $pos-1, 1);
		my ($Y_N, $comment);
		if (exists $annrecord->{comment}) {
			$Y_N = substr ($annrecord->{comment}, 0, 1);
			if ($annrecord->{comment} =~ m/signal/) {
				$comment = substr ($annrecord->{comment}, 2);
			}
			else {
				$comment = "-";
			}
		}
		else {
			$Y_N = ".";
			$comment = "-";
		}
		print "$ann->{sequence}->{id}\t\t $S_T   $pos   $G_score   $I_score   $Y_N   $comment\n";		
	}
	print "-------------------------------------------------------------------------\n";	
}
