#!/usr/bin/perl
# Description: This script runs the NetMHCcons 1.0.ws0 Web Service. It reads a FASTA file from STDIN and produces predictions.
# Author: Edita Karosiene
# Email: edita@cbs.dtu.dk
# Version: 1.0 ws0
# Date: 2011-05-30
# usage: perl netmhccons.pl [-method method_name] [-length peptide_length(s)] [-a allele_name] [-s] < example.fsa

# include standard XML::Compile helper functions (used to initiate WSDL proxys)
use Data::Dumper;
use Getopt::Long;
use strict;
require "xml-compile.pl";


my ($length, $allele, $sort);
# associating options values with variables

&GetOptions (
'length:s' => \$length,
'a:s' => \$allele,
's' => \$sort,
);


# create proxy to NetMHCcons Web Service
my $netmhccons = WSDL2proxy ( 'http://www.cbs.dtu.dk/ws/NetMHCcons/NetMHCcons_1_0_ws0.wsdl' );

# append schema definitions
$netmhccons = appendSchemas ( $netmhccons , 
	"http://www.cbs.dtu.dk/ws/common/ws_common_1_0b.xsd" ,
	"http://www.cbs.dtu.dk/ws/NetMHCcons/ws_netmhccons_1_0_ws0.xsd"
);

my %ops = addOperations ( $netmhccons ) ;

# Get sequence in fasta format from STDIN
my @fasta;
my $entry = -1;

while (<STDIN>) {
	if (/^>(.*)/) {
		my ($id , $comment) = split (" ",$1);
		$entry++;
		$fasta[$entry]->{id} = $id;
		$fasta[$entry]->{comment} = $comment if defined $comment;
	} elsif (/^([A-Za-z]+)/) {
		$fasta[$entry]->{seq} .= $1;
	}
}

# Create sequence for request
my @sequence;
for ( my $i = 0 ; $i < scalar ( @fasta ) ; $i ++ ) {
	push @sequence , { id => $fasta[$i]->{id} , comment => $fasta[$i]->{comment} , seq => $fasta[$i]->{seq} };
}

# Do the request
my $response;
if ((defined $length) and (defined $allele) and (defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   allele => $allele ,
	   length => $length ,
	   sort=> $sort,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((defined $length) and (defined $allele) and (!defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   allele => $allele ,
	   length => $length ,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((defined $length) and (!defined $allele) and (defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {	 
	   length => $length ,
	   sort=> $sort,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((!defined $length) and (defined $allele) and (defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   allele => $allele ,
	   sort=> $sort,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((defined $length) and (!defined $allele) and (!defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   length => $length ,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((!defined $length) and (defined $allele) and (!defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   allele => $allele ,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((!defined $length) and (!defined $allele) and (defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	   sort=> $sort,
	    sequencedata => {sequence => [@sequence]} } });
}
elsif ((!defined $length) and (!defined $allele) and (!defined $sort)){
	$response = $ops{runService}->(
	 parameters => {
	  parameters => {
	    sequencedata => {sequence => [@sequence]} } });
}

# uncomment the two following lines to inspect the structure of $response
#use Data::Dumper;
#print Dumper($response); 


#get job id which can be used to get the results later
my $jobid;
if ( ! defined ( $response->{parameters}->{queueentry}) ) {
	die "error obtaining jobid\n";
} else {
	$jobid = $response->{parameters}->{queueentry}->{jobid};
	print STDERR "# waiting for job $jobid";
	my $status = "UNKNOWN";;
	# poll the queue
	while ( $status =~ /ACTIVE|RUNNING|QUEUED|WAITING|PENDING|UNKNOWN/ ) {
		my $response = $ops{pollQueue}->( job => { job  => { jobid => $jobid }   }) ;
		$status = $response->{queueentry}->{queueentry}->{status};
		print STDERR  ".";
	}
	die "\nunexpected job status '$status'\n" unless $status eq "FINISHED";
	print STDERR "\n# job has finished\n";
}

# when the job is done, fetch the result
$response = $ops{fetchResult}->(job => { jobid => $jobid });

# uncomment the two following lines to inspect the structure of $response
#use Data::Dumper;
#print Dumper($response); 

#printing the results (suitable for one sequence)
print "\n\n# Method: NetMHCcons\n\n"; #introduce variable here if option 'method' is implemented

print "# Input is in FASTA format\n\n";

if (defined $length) {
	print "# Peptide length $length\n\n";
}
else {
	print "# Peptide length 9\n\n";
}

print "# Threshold for Strong binding peptides (IC50)	50.000 nM\n\n"; # introduce variable here if option 'affS' is implemented
print "# Threshold for Weak binding peptides (IC50)	500.000 nM\n\n"; #introduce variable here if option 'affW' is implemented

print "# Threshold for Strong binding peptides (\%Rank)	0.5%\n\n"; #introduce variable here if option 'rankS' is implemented
print "# Threshold for Weak binding peptides (\%Rank)	2%\n\n"; # introduce variable here if option rankW is implemented

if(defined $allele) {
	print "# Allele: $allele\n\n";
}
else{
	print "# Allele: HLA-A02:01\n\n";
}

print "---------------------------------------------------------------------------------------\n",
"   pos       Allele         peptide         Identity 1-log50k(aff) Affinity(nM)  %Rank  BindingLevel\n",
"---------------------------------------------------------------------------------------\n";
my $strong_binders = 0;
my $weak_binders = 0;
foreach my $ann (@{$response->{parameters}->{anndata}->{ann}}) {
	my $sequence = $sequence[0]->{seq};
	my $id = sprintf("%16s", $sequence[0]->{id});
	foreach my $annrecord (@{$ann->{annrecords}->{annrecord}}) {
			my $pos = sprintf ("%6s", $annrecord->{range}->{begin}->numify);
			my $allele = sprintf ("%12s", $annrecord->{feature});
			my $end = sprintf ("%6s", $annrecord->{range}->{end}->numify);
			my $no_extract = $end - $pos;
			my $pep = substr($sequence, $pos, $no_extract);
			$pep = sprintf("%15s", $pep);
			my $log_score = sprintf ("%13.3f", $annrecord->{score}[0]->{value});
			my $affinity = sprintf ("%12.2f", $annrecord->{score}[1]->{value});
			my $rank = sprintf ("%6.2f", $annrecord->{score}[2]->{value});
			
			my $comment;
			if (exists ($annrecord->{comment})){
				$comment = "<=".$annrecord->{comment};
			
			}
			if ($comment =~ m/SB/) {
				$strong_binders++;
			}
			elsif ($comment =~ m/WB/) {
				$weak_binders++;
			}
							
			print "$pos $allele $pep $id $log_score $affinity $rank $comment\n";
			
	}
print "----------------------------------------------------------------------------------------\n",
"Number of strong binders: $strong_binders Number of weak binders: $weak_binders\n",
"----------------------------------------------------------------------------------------\n";
	
	
	
}
