#!/usr/bin/perl # Description: This is a template example script. It reads an alignment as fasta and produces formatted output from the web service # Author: Edita Bartaseviciute # Email: edita@cbs.dtu.dk # Version: 1.1 ws0 # Date: 2010-04-07 # usage: perl maxalign.pl alignment.fsa # include standard XML::Compile helper functions (used to initiate WSDL proxys) use strict; require "xml-compile.pl"; # create proxy to maxalign my $maxalign = WSDLclient ( 'http://www.cbs.dtu.dk/ws/MaxAlign/MaxAlign_1_1_ws1.wsdl' ); #my $maxalign = WSDL2proxy ( 'http://www.cbs.dtu.dk/ws/MaxAlign/MaxAlign_1_1_ws1.wsdl' ); #use Data::Dumper; #print Dumper($maxalign); =pod # append schema definitions $maxalign = appendSchemas ( $maxalign , "http://www.cbs.dtu.dk/ws/common/ws_common_1_0b.xsd" , "http://www.cbs.dtu.dk/ws/MaxAlign/ws_maxalign_1_1_ws1.xsd", "http://bioxsd.org/BioXSD-0.4.xsd" ); =cut # Get alignment in fasta format from STDIN my @fasta; my $entry = -1; while () { chomp; if (/^>(.*)/) { my ($id , $comment) = split (" ",$1); $entry++; $fasta[$entry]->{id} = $id; $fasta[$entry]->{comment} = $comment if defined $comment; } else { $fasta[$entry]->{seq} .= $_; } } # Create request my @sequence; for ( my $i = 0 ; $i < scalar ( @fasta ) ; $i ++ ) { my @array_gaps; my @seq1; my $dashes = 0; my $letters = 0; my $sequence; my @gaps = (); my @lengths = (); my $string = $fasta[$i]->{seq}; #going through the sequence and saving starting positions and length of the gaps into arrays for (my $i = 0; $i < length($string); $i++) { my $symbol = substr ($string, $i, 1); if ($symbol eq "-") { $dashes++; if ($i == (length($string) - 1)) { push (@gaps, $letters); push (@lengths, $dashes); } } else { $sequence .= $symbol; if ($dashes != 0 and $letters != 0) { push (@gaps, $letters); push (@lengths, $dashes); $dashes = 0; } elsif ($dashes != 0 and $letters == 0) { my $start = 1 - $dashes; push (@gaps, $start); push (@lengths, $dashes); $dashes = 0; } $letters++; } } $sequence[$i] = {sequenceRecord=> {sequence => $sequence, formalReference => {accession => $fasta[$i]->{id} }}}; for (my $m = 0; $m <= $#gaps; $m++) { $sequence[$i] -> {gap} -> {'start'} = $gaps[$m];#, 'len' => [$lengths[$m]] }; $sequence[$i] -> {gap} -> {'len'} = $lengths[$m] } #, {gap => {'start' => [21], 'len' => [2] }}; } use Data::Dumper; print Dumper(@sequence); # Do the request my $response = $maxalign->{maxalign}->( parameters => { parameters => { originalAlignment => {alignedSequence => [@sequence]} } }); use Data::Dumper; print Dumper($response); =pod # Print the response print "# Original number of sequences: $response->{parameters}->{output}->{originalsequencenumber}\n"; print "# Original total number of columns: $response->{parameters}->{output}->{originalcolumnnumber}\n"; print "# Original ungapped columns: $response->{parameters}->{output}->{originalungapcolumnnumber}\n"; print "# Original alignment area: $response->{parameters}->{output}->{originalalignmentarea}\n\n"; if (exists $response->{parameters}->{output}->{resultsequencenumber} and $response->{parameters}->{output}->{resultungapcolumnnumber} and $response->{parameters}->{output}->{resultalignmentarea} ) { print "# Number of sequences in result: $response->{parameters}->{output}->{resultsequencenumber}\n"; print "# Ungapped columns in result: $response->{parameters}->{output}->{resultungapcolumnnumber}\n"; print "# Resulting alignment area: $response->{parameters}->{output}->{resultalignmentarea}\n\n"; print "#New alignment in fasta format:\n\n"; foreach my $seqref (@{$response->{parameters}->{output}->{resultalignment}->{sequence}}) { print ">$seqref->{id}"; print exists $seqref->{comment} ? " $seqref->{comment}\n" : "\n"; for (my $i = 0; $i < length $seqref->{seq}; $i += 60) { print substr($seqref->{seq}, $i, 60), "\n"; } } } else { print "This alignment can't be improved by removing sequences\n"; }