#!/usr/bin/perl ######################################################################## # # Leucine Zipper Prediction 1.1 # ----------------------------- # # [ see Computational Approaches to Identify Leucine Zippers # Erich Bornberg-Bauer, Eric Rivals, Martin Vingron # Nucleic Acids Res., 26(11):2740-2746;(1998) ] # # version 1.1, 4.12.2001 - Hannes Luz # # report problems to # ######################################################################## # # NOTE: LZpred.pl needs to be accompanied by ncoils-binary version 2.2 # (ftp://ftp.ebi.ac.uk/pub/software/unix/coils-2.2/ncoils.tar.gz) # ######################################################################## $0 =~ s!^(.*/)!!; $| = 1; use IPC::Open2; ################################################################## ################################################################## # Constants $USAGE = "Usage: $0 [-help] <-i inputfile with fasta-sequence> [-v] [-info] [-o]"; $TRUE = 1; $FALSE = 0; $CHAR_PER_ROW = 60; $MAX_NB_CCF = 2; $MAX_CC_FRAMES = "two"; $NB_CCF = 0.5; ########################### # fetch input parameter ########################### $infilename = ""; $verboseflag = 0; $outflag = 0; $infoflag = 0; my $sequence, $seqname, $seqlength; &parseargs(@ARGV); &fetchfasta($infilename); $seqlength = length($sequence); #################################################################### # run coils: # assume that there is a executable binary 'ncoils' in a directory # covered by the PATH environment variable. # ncoils is supposed to be compiled from coils-2.2 # (ftp://ftp.ebi.ac.uk/pub/software/unix/coils-2.2/ncoils.tar.gz) # # data structure for predicted coiled coiled segments: %cc # # fields: "start" "length" "nb_ccf" "in_lz_id" #################################################################### $there_is_no_lz = $FALSE; $more_than_three_cc = $FALSE; $already_got_a_reason = $FALSE; $reason = ""; $infolines = ""; $nr_of_cc = 0; %cc = (); $coilscmd = "ncoils -f -win 28"; print STDOUT "trying to run ncoils ...\ncommand line options: $coilscmd\n"; $coilresults = ""; $pid = open2(\*READER, \*WRITER, $coilscmd); print WRITER ">$seqname\n$sequence"; close WRITER; $descriptionline = ; if ($descriptionline !~ /^>/) { $message = "running ncoils failed!"; $coilsdir = $ENV{'COILSDIR'}; $message .= "\n(Environment variable COILSDIR is not set.)" if (! defined $coilsdir); $message .= "\nncoils is available at:"; $message .= "\nftp://ftp.ebi.ac.uk/pub/software/unix/coils-2.2/ncoils.tar.gz"; &error($message); } while () { $line = $_; chomp $line; $line =~ s/ //g; $line =~ s/\t//g; $coilresults .= $line; } close READER; error("conflict in sequence length and length of ncoils' fasta output") if (length($coilresults) != $seqlength); while ($coilresults =~ /(x+)/g) { $coiledcoil = $1; $cc{$nr_of_cc}{"start"} = pos($coilresults) - length($coiledcoil) + 1; $cc{$nr_of_cc}{"length"} = length($coiledcoil); # using ncoils-2.2 the field nb_ccf # in the summary line has changed its meaning, # therefore nb_ccf is set to the minimum P to # define a coil segment $cc{$nr_of_cc}{"nb_ccf"} = $NB_CCF; for ($i=0; $i<5; $i++) { $cc{$nr_of_cc}{"in_lz_id"} = ""; } $nr_of_cc++; } $infolines .= ">$infilename\tCCF"; $infolines .= "\t0" if ($nr_of_cc == 0); for ($i=0; exists $cc{$i}; $i++) { $infolines .= "\t"; $infolines .= $cc{$i}{"start"}; $infolines .= "\t"; $infolines .= $cc{$i}{"start"} + $cc{$i}{"length"} - 1; } $infolines .= "\n"; if ($nr_of_cc == 0) { $already_got_a_reason = $TRUE; $there_is_no_lz = $TRUE; $reason = "(no coiled coil segments detected by ncoils)"; } if ($nr_of_cc > $MAX_NB_CCF) { $already_got_a_reason = $TRUE; $there_is_no_lz = $TRUE; $reason = "(there were more than $MAX_CC_FRAMES coiled coil segments detected)"; $more_than_three_cc = $TRUE; } #################################################################### # determine Leucine Repeats # # data structure for Leucine Repeats containing one mutation: %lrm # # data structure for Leucine Repeats: %lr # # fields: "start" "length" "num" "in_lz_id" # #################################################################### %lr = (); %lrm = (); $lrcounter = 0; $lrmcounter = 0; $seq = $sequence; $seq .= "XXXXXXX"; # to make regular expression containing 6 dots working $mflag = 1; for ($offset=0; ($offset 0) { if ($lmcounter >= 5) { &setlrm($lrmcounter,($start_lmrep+$start),($stop_lmrep-$start_lmrep+1), $lmcounter, $mutpos); $lrmcounter++; } $mutcounter = 0; if ($leucounter == 0) { $mutpos = 0; $lmcounter = 0; $start_lmrep = $stop_lmrep = 0; } else { $mutpos = $leucounter; $lmcounter = $leucounter; $start_lmrep = $start_leurep; $stop_lmrep = $stop_leurep+7; } } elsif ($lmcounter == 0) { $stop_lmrep = (7*$leu); $start_lmrep = $stop_lmrep; } $stop_lmrep = (7*$leu); if ($leucounter >= 4) { $lrstart = $start + $start_leurep; $lrlength = $stop_leurep - $start_leurep + 1; &setlr($lrcounter, $start_leurep+$start, $lrlength, $leucounter); $lrcounter++; } $mutcounter++; $lmcounter++; $leucounter = 0; $mutpos = $lmcounter; $start_leurep = $stop_leurep = 0; } elsif ($residue eq "L") { $stop_leurep = (7*$leu); $start_leurep = $stop_leurep if ($leucounter == 0); $stop_lmrep = (7*$leu); $start_lmrep = $stop_lmrep if ($lmcounter == 0); $leucounter++; $lmcounter++; } } if ($leucounter != $lmcounter && $lmcounter >= 5) { &setlrm($lrmcounter,($start_lmrep+$start),($stop_lmrep-$start_lmrep+1), $lmcounter, $mutpos); $lrmcounter++; } if ($leucounter >= 4) { &setlr($lrcounter,($start_leurep+$start),($stop_leurep-$start_leurep+1), $leucounter); $lrcounter++; } } } %lr4info = (); # key = num, value = "start1\tstop1\tstart2\tstop2" for ($i=0; exists $lr{$i}; $i++) { $num = $lr{$i}{"num"}; $lr4info{$num} .= "\t" if (exists $lr4info{$num}); $lr4info{$num} .= $lr{$i}{"start"}; $lr4info{$num} .= "\t"; $lr4info{$num} .= $lr{$i}{"start"} + $lr{$i}{"length"} - 1; } foreach $num (sort numerically keys %lr4info) { $infolines .= ">$infilename\tLR$num\t"; $infolines .= $lr4info{$num}; $infolines .= "\n"; } %lr4info = (); # key = "$num_$mut_pos", value = "start1\tstop1\tstart2\tstop2" for ($i=0; exists $lrm{$i}; $i++) { $num = $lrm{$i}{"num"}; $num .= "_"; $num .= $lrm{$i}{"mut_pos"}; $lr4info{$num} .= "\t" if (exists $lr4info{$num}); $lr4info{$num} .= $lrm{$i}{"start"}; $lr4info{$num} .= "\t"; $lr4info{$num} .= $lrm{$i}{"start"} + $lrm{$i}{"length"} - 1; } foreach $num (keys %lr4info) { $infolines .= ">$infilename\tLr$num\t"; $infolines .= $lr4info{$num}; $infolines .= "\n"; } ######################################################################################## # # detect overlap between Leucine repeats and Coiled Coils # # a) more than 3 CC or the ccf the lr overlaps with takes more than 90 residues => no lz # # b) overlap of at least 21 residues => lz # ######################################################################################### $lzcounter = 0; $nr_of_lz = 0; %lz = (); $all_ccs_too_long = $TRUE; for ($ccf=0; exists $cc{$ccf}; $ccf++) { if ($cc{$ccf}{"length"} <= 90) { $all_ccs_too_long = $FALSE; my $o; for ($i=0; exists $lr{$i}; $i++) { if (($o = &overlap($cc{$ccf}{"start"}, $cc{$ccf}{"length"}, $lr{$i}{"start"}, $lr{$i}{"length"})) >= 21) { $cc{$ccf}{"in_lz_id"} .= "$lzcounter|"; $lr{$i}{"in_lz_id"} .= "$lzcounter|"; if ($cc{$ccf}{"start"} >= $lr{$i}{"start"}) { $lz{$lzcounter}{"start"} = $cc{$ccf}{"start"}; } else { $lz{$lzcounter}{"start"} = $lr{$i}{"start"}; } $lz{$lzcounter}{"length"} = $o; $lz{$lzcounter}{"start_of_cc"} = $cc{$ccf}{"start"}; $lz{$lzcounter}{"length_of_cc"} = $cc{$ccf}{"length"}; $lz{$lzcounter}{"nb_ccf"} = $NB_CCF; $lzcounter++; } } for ($i=0; exists $lrm{$i}; $i++) { if (($o = &overlap($cc{$ccf}{"start"}, $cc{$ccf}{"length"}, $lrm{$i}{"start"}, $lrm{$i}{"length"})) >= 21) { $cc{$ccf}{"in_lz_id"} .= "$lzcounter|"; $lrm{$i}{"in_lz_id"} .= "$lzcounter|"; if ($cc{$ccf}{"start"} >= $lrm{$i}{"start"}) { $lz{$lzcounter}{"start"} = $cc{$ccf}{"start"}; } else { $lz{$lzcounter}{"start"} = $lrm{$i}{"start"}; } $lz{$lzcounter}{"length"} = $o; $lz{$lzcounter}{"start_of_cc"} = $cc{$ccf}{"start"}; $lz{$lzcounter}{"length_of_cc"} = $cc{$ccf}{"length"}; $lz{$lzcounter}{"nb_ccf"} = $NB_CCF; $lzcounter++; } } } if ($all_ccs_too_long && (!$already_got_a_reason)) { $there_is_no_lz = $TRUE; $reason = "(the Coiled Coils contain more than 90 residues)" if ($nr_of_cc >= 1); $already_got_a_reason = $TRUE; } } $nr_of_lz = $lzcounter; ######################################## ## collect some information for output ######################################## if ($nr_of_lz == 0 && (!$already_got_a_reason)) { $there_is_no_lz = $TRUE; if ((!exists $lr{0}) && (!exists $lrm{0})) { $reason = "there were no Leucine Repeats detected."; $already_got_a_reason = $TRUE; } else { $reason = "the condition of a minimum overlap of 21 residues\nbetween the coiled coil and the Leucine Repeat\nisn't fulfilled."; $already_got_a_reason = $TRUE; } } $infolines .= ">$infilename\tOCL"; $infolines .= "\t0" if ($lzcounter == 0); for ($k=0; exists $lz{$k}; $k++) { $infolines .= "\t"; $infolines .= $lz{$k}{"start"}; $infolines .= "\t"; $infolines .= ($lz{$k}{"start"}+$lz{$k}{"length"}-1); } $infolines .= "\n"; $infolines .= ">$infilename\tLZ"; if (!$there_is_no_lz) { for ($k=0; exists $lz{$k}; $k++) { $infolines .= "\t"; $infolines .= $lz{$k}{"start"}; $infolines .= "\t"; $infolines .= ($lz{$k}{"start"}+$lz{$k}{"length"}-1); } } else { $infolines .= "\t0"; } $infolines .= "\n"; ######################################## ## ## OUTPUT ## ######################################## if ($there_is_no_lz) { print "The amino acid sequence in $infilename is predicted NOT to contain a Leucine Zipper domain\n"; print "$reason\n" if ($already_got_a_reason); } else { print "The amino acid sequence in $infilename is predicted to contain a Leucine Zipper domain\n"; $stop = $lz{"0"}{"start"} + $lz{"0"}{"length"} - 1; print "between positions " . $lz{"0"}{"start"} . " and $stop."; } print "\n\n"; if ($verboseflag) { print "verbose:\n\n"; print "description line:\n$descriptionline\n\n"; print "1) number of potential LEUCINE ZIPPERS: $nr_of_lz\n"; if ($there_is_no_lz && $nr_of_lz > 0) { print "\nA prediction of Leucine Zippers was rejected.\n"; print "$reason.\n"; print "However the overlaps between Coiled Coil Frames and\n"; print "Leucine repeats were detected as follows:\n\n\n"; } $lzcounter = 0; while ($lzcounter < $nr_of_lz) { if (! $there_is_no_lz) { print "\n"; $finished = $FALSE; for ($ccf=0; ($ccf<$MAX_NB_CCF) && (exists $cc{$ccf}) && (!$finished); $ccf++) { foreach $in_lz_id (split /\|/, $cc{$ccf}{"in_lz_id"}) { last if ($finished); if ($in_lz_id == $lzcounter) { $start_of_cc = $cc{$ccf}{"start"}; $length_of_cc = $cc{$ccf}{"length"}; $finished = $TRUE; } } } $lr_is_in_lz = $FALSE; $finished = $FALSE; for ($lrcounter=0; (exists $lr{$lrcounter}) && (!$finished); $lrcounter++) { foreach $in_lz_id (split /\|/, $lr{$lrcounter}{"in_lz_id"}) { last if ($finished); if ($in_lz_id == $lzcounter) { $start_of_lr = $lr{$lrcounter}{"start"}; $nr_of_l = $lr{$lrcounter}{"num"}; $lr_is_in_lz = $TRUE; $finished = $TRUE; } } } $finished = $FALSE; $mutpos = -1; if (! $lr_is_in_lz) { for ($lrcounter=0; (exists $lrm{$lrcounter}) && (!$finished); $lrcounter++) { foreach $in_lz_id (split /\|/, $lrm{$lrcounter}{"in_lz_id"}) { last if ($finished); if ($in_lz_id == $lzcounter) { $start_of_lr = $lrm{$lrcounter}{"start"}; $nr_of_l = $lrm{$lrcounter}{"num"}; $mutpos = $lrm{$lrcounter}{"mut_pos"}; $finished = $TRUE; } } } &lz_annotation($sequence, $start_of_cc, $length_of_cc, $start_of_lr, $nr_of_l, $lz{$lzcounter}{"start"}, $lz{$lzcounter}{"length"}, $mutpos); } else { &lz_annotation($sequence, $start_of_cc, $length_of_cc, $start_of_lr, $nr_of_l, $lz{$lzcounter}{"start"}, $lz{$lzcounter}{"length"}); } $lzcounter++; } # now there is an overlap between cc and lr, but no lz else { $finished = $FALSE; for ($ccf=0; ($ccf<$nr_of_cc) && (exists $cc{$ccf}) && (!$finished); $ccf++) { foreach $in_lz_id (split /\|/, $cc{$ccf}{"in_lz_id"}) { last if ($finished); if ($in_lz_id == $lzcounter) { $start_of_cc = $cc{$ccf}{"start"}; $length_of_cc = $cc{$ccf}{"length"}; $finished = $TRUE; } } } $lr_is_in_lz = $FALSE; $finished = $FALSE; for ($lrcounter=0; exists $lr{$lrcounter} && (!$finished); $lrcounter++) { foreach $in_lz_id (split /\|/, $lr{$lrcounter}{"in_lz_id"}) { last if ($finished); if ($in_lz_id == $lzcounter) { $start_of_lr = $lr{$lrcounter}{"start"}; $nr_of_l = $lr{$lrcounter}{"num"}; $lr_is_in_lz = $TRUE; $finished = $TRUE; } } } $finished = $FALSE; if (! $lr_is_in_lz) { for ($lrcounter=0; exists $lrm{$lrcounter}; $lrcounter++) { foreach $in_lz_id (split /\|/, $lrm{$lrcounter}{"in_lz_id"}) { last if ($finished); if ($in_lz_id == $lzcounter) { $start_of_lr = $lrm{$lrcounter}{"start"}; $mutpos = $lrm{$lrcounter}{"mut_pos"}; $nr_of_l = $lrm{$lrcounter}{"num"}; $finished = $TRUE; } } } &ocl_annotation($sequence, $start_of_cc, $length_of_cc, $start_of_lr, $nr_of_l, $lz{$lzcounter}{"start"}, $lz{$lzcounter}{"length"}, $mutpos); } else { &ocl_annotation($sequence, $start_of_cc, $length_of_cc, $start_of_lr, $nr_of_l, $lz{$lzcounter}{"start"}, $lz{$lzcounter}{"length"}); } $lzcounter++; } } print "\n\n\n"; print "2) COILED COILS which do NOT correspond to a Leucine Zipper\n\n"; $no_cc_verbosed = $TRUE; if (! $more_than_three_cc) { for ($ccf=0; $ccf<3 && exists $cc{$ccf}; $ccf++) { if ($cc{$ccf}{"in_lz_id"} eq "") { &cc_annotation($sequence, $cc{$ccf}{"start"}, $cc{$ccf}{"length"}); $no_cc_verbosed = $FALSE; } } } if ($no_cc_verbosed) { if ($more_than_three_cc) { print "ncoils-2.2 predicted more than $MAX_CC_FRAMES Coiled Coils.\n"; print "Looking for Leucine Zippers therefore seems senseless.\n"; if ($infoflag) { print "(look at infolines for more details)\n"; } elsif ($outflag) { print "(look at outfile $infilename.dat for more details)\n"; } else { print "(Set info-flag (-info) or outflag (-o) for more details)\n"; } } else { print "none\n"; } } print "\n\n\n"; print "3) LEUCINE REPEATS which do NOT correspond to a Leucine Zipper\n\n"; $no_lr_verbosed = $TRUE; for ($lrcounter=0; ($lrcounter<(7*($seqlength/22)+1)) && exists $lr{$lrcounter}; $lrcounter++) { if ($lr{$lrcounter}{"in_lz_id"} eq "") { &lr_annotation($sequence, $lr{$lrcounter}{"start"}, $lr{$lrcounter}{"num"}); $no_lr_verbosed = $FALSE; } } for ($lrcounter=0; ($lrcounter<(7*($seqlength/22)+1)) && exists $lrm{$lrcounter}; $lrcounter++) { if ($lrm{$lrcounter}{"in_lz_id"} eq "") { &lr_annotation($sequence, $lrm{$lrcounter}{"start"}, $lrm{$lrcounter}{"num"}, $lrm{$lrcounter}{"mut_pos"}); $no_lr_verbosed = $FALSE; } } print "none\n" if ($no_lr_verbosed); } if ($infoflag) { print "Info mode:\n\n"; print $infolines; } if ($outflag) { $outfilename = "$infilename.dat"; open (FPO, ">$outfilename") || &error("Cannot open $outfilename for writing!\n"); print FPO $infolines; close FPO; } exit(0); ################################################################## ################################################################## sub fetchfasta { my ($filename) = @_; my $descriptionline; open (FPI, "<$filename") || &error("cannot open $filename!"); $descriptionline = ; chomp $descriptionline; if ($descriptionline =~ /^>([^\s\t]+)/) { $seqname = $1; } else { error("sequence in file $filename not in fasta format"); } while () { $line = $_; chomp $line; $line =~ s/ //g; $line =~ s/\t//g; $line = uc($line); last if ($line =~ /^>/); if ($line =~ /([^ARNDCQEGHILKMFPSTWYVBZX])/) { close FPI; &error("Invalid character \'$1\' in sequence file $filename!"); } else { $sequence .= $line; } } close FPI; return ($seqname, $sequence); } ################################################################## ################################################################## sub parseargs { my @args = @_; if (!defined $args[0]) { print STDERR "$USAGE\n"; exit(1); } for ($i=0; defined $args[$i]; ) { $argument = $args[$i]; if ($argument =~ /^\-/) { if ($argument eq "-i") { if (!defined $args[$i+1]) { error("no name for inputfile with fasta-sequence given after '-i'!"); } elsif (! -e $args[$i+1]) { error("file $args[$i+1] does not exist!"); } else { $infilename = $args[$i+1]; $i += 2; next; } } elsif ($argument eq "-v") { $verboseflag = 1; $i++; next; } elsif ($argument eq "-info") { $infoflag = 1; $i++; next; } elsif ($argument eq "-o") { $outflag = 1; $i++; next; } elsif ($argument eq "-help") { &help(); exit(1); } else { print STDERR "$USAGE\n"; exit(1); } } else { print STDERR "$USAGE\n"; exit(1); } } } ################################################################## ################################################################## sub error { my ($message) = @_; print STDERR "$message\n"; print STDERR "$USAGE\n"; print STDERR "report problems to luz\@molgen.mpg.de\n"; exit(1); } ################################################################## ################################################################## sub setlr { my ($index, $lrstart, $lrlength, $num) = @_; my %newpositions = (); for ($i=$lrstart; $i<($lrstart+$lrlength); $i+=7) { $newpositions{$i} = 1; } my $flag1 = 1; for ($i=0; exists $lr{$i}; $i++) { my $flag2 = 0; my %oldpositions = (); for ($s=$lr{$i}{"start"}; $s<($lr{$i}{"start"}+$lr{$i}{"length"}); $s+=7) { $oldpositions{$s} = 1; } foreach $newposition (keys %newpositions) { if (! exists $oldpositions{$newposition}) { $flag2 = 1; last; } } $flag1 = 0 if (! $flag2); } if ($flag1) { $lr{$index}{"start"} = $lrstart; $lr{$index}{"length"} = $lrlength; $lr{$index}{"num"} = $num; $lr{$index}{"in_lz_id"} = ""; } } ################################################################## ################################################################## sub setlrm { my ($index, $lrstart, $lrlength, $num, $mutpos) = @_; my %newpositions = (); for ($i=$lrstart; $i<($lrstart+$lrlength); $i+=7) { $newpositions{$i} = 1; } my $flag1 = 1; for ($i=0; exists $lrm{$i}; $i++) { my $flag2 = 0; my %oldpositions = (); for ($s=$lrm{$i}{"start"}; $s<($lrm{$i}{"start"}+$lrm{$i}{"length"}); $s+=7) { $oldpositions{$s} = 1; } foreach $newposition (keys %newpositions) { if (! exists $oldpositions{$newposition}) { $flag2 = 1; last; } } $flag1 = 0 if (! $flag2); } if ($flag1) { $lrm{$index}{"start"} = $lrstart; $lrm{$index}{"length"} = $lrlength; $lrm{$index}{"num"} = $num; $lrm{$index}{"mut_pos"} = $mutpos; $lrm{$index}{"in_lz_id"} = ""; } } ################################################################## ################################################################## sub help { print STDOUT "$USAGE\n\n"; print STDOUT "optional flags:\n"; print STDOUT "[-help]\tprints this help on stdout only.\n"; print STDOUT "[-v]\tprints annotations\n\tof all detected Coiled Coils, Leucine Repeats and Leucine Zippers\n\ton stdout\n"; print STDOUT "[-info] prints some InfoLines (Coiled Coil, Leucine Repeats,\n\tLeucine Zipper, Summary Line) on stdout\n"; print STDOUT "[-o]\twrites a file named inputfile.dat containing the InfoLines\n"; print STDOUT "\n\n\n"; print STDOUT "The follwing lines give an example for an annotation:\n"; print STDOUT "1) number of potential LEUCINE ZIPPERS: 1 Annotations:\n"; print STDOUT " |\n"; print STDOUT "1---------11--------21--------31--------41--------51-------- |\n"; print STDOUT "MVVVAAAPNPADGTPKVLLLSGQPASAAGAPAARLPLMVPAQRGASPEAASGGLPQARKR |\n"; print STDOUT " |\n"; print STDOUT " |\n"; print STDOUT " |\n"; print STDOUT " |\n"; print STDOUT "61--------71--------81--------91--------101-------111------- |position \n"; print STDOUT "QRLTHLSPEEKALRRKLKNRVAAQTARDRKKARMSELEQQVVDLEEENQKLLLENQLLRE |sequence\n"; print STDOUT " CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC |coiled coil \n"; print STDOUT " L------L------L------L-- |L-Repeats\n"; print STDOUT " LZLZLZLZLZLZLZLZLZLZLZLZ |LeucineZipper\n"; print STDOUT " |\n"; print STDOUT "121-------131-------141-------151-------161-------171------- |\n"; print STDOUT "KTHGLVVENQELRQRLGMDALVAEEEAEAKGNEVRPVAGSAESAALRLRAPLQQVQAQLS |\n"; print STDOUT "CCCCCCCCCCCC |\n"; print STDOUT "----L------L |\n"; print STDOUT "LZLZLZLZLZLZ |\n"; print STDOUT " |\n"; print STDOUT "181-------191-------201-------211-------221-------231------- |\n"; print STDOUT "PLQNISPWILAVLTLQIQSLISCWAFWTTWTQSCSSNALPQSLPAWRSSQRSTQKDPVPY |\n"; print STDOUT " |\n"; print STDOUT " |\n"; print STDOUT " |\n"; print STDOUT " |\n"; print STDOUT "241-------251------- |\n"; print STDOUT "QPPFLCQWGRHQPSWKPLMN |\n"; print STDOUT "\n"; print STDOUT "report problems to \n"; } sub overlap { my ($cc_start, $cc_len, $lr_start, $lr_len) = @_; my $overlap; if ($lr_start<$cc_start) { return 0 if (($lr_start+$lr_len-1)<$cc_start); return (($lr_start+$lr_len) - $cc_start); } else { return 0 if (($cc_start+$cc_len-1) < $lr_start); return $lr_len if (($cc_start+$cc_len) >= ($lr_start+$lr_len)); return (($cc_start+$cc_len)-$lr_start); } } sub numerically { $a <=> $b } sub lz_annotation { my ($sequence, $cc_start, $cc_length, $lr_start, $nr_of_l, $lz_start, $lz_length, $mutpos) = @_; my $len = 0; my $counter = 0; my $position = 0; $len = length $sequence; $leucine_pattern = " " x ($lr_start-1); if (! defined $mutpos) { $leucine_pattern .= "L------" x ($nr_of_l-1); $leucine_pattern .= "L"; } else { $leucine_pattern .= "L------" x ($mutpos-1); $leucine_pattern .= substr($sequence, ($lr_start+(7*($mutpos-1)))-1, 1); $leucine_pattern .= "------L" x ($nr_of_l - $mutpos); } $leucine_pattern .= " " x ($len-($lr_start+(7*($nr_of_l-1)+1))); $lz_pattern = " " x ($lz_start-1); for ($j=1; $j<$lz_length;) { $lz_pattern .= "LZ"; $j+=2; $lz_pattern .= "L" if ($j == $lz_length); } $lz_pattern .= " " x ($len-($lz_start+$lz_length-1)); while ($counter < $len) { $rest = $len - $counter; $row_length = $CHAR_PER_ROW; $row_length = $rest if ($rest < $CHAR_PER_ROW); $position_merker = $position; while (($position-$position_merker) < $row_length) { $position++; print $position; $stellen = length($position); $position += $stellen; for ($j=$stellen; $j<9 && (($position-$position_merker) < $row_length); $j++) { print "-"; $position++; } print "-" if (($position-$position_merker) <= $row_length || $j == 9); } print "\n"; $position = $position_merker; # put out sequence ... print substr($sequence, $counter, $row_length) . "\n"; # ... coiled coil for ($j=0; $j<$row_length; $j++) { $position++; if ($position>=$cc_start && $position<=($cc_start+$cc_length-1)) { print "C"; } else { print " "; } } print "\n"; # ... leucine repeats print substr($leucine_pattern, $counter, $row_length) . "\n"; # ... leucine zipper print substr($lz_pattern, $counter, $row_length) . "\n\n"; $counter += $CHAR_PER_ROW; } } # annotation for Coiled Coil without Leucine Zipper sub cc_annotation { my ($sequence, $cc_start, $cc_length) = @_; my $len = 0; my $counter = 0; my $position = 0; $len = length($sequence); while ($counter < $len) { $rest = $len - $counter; $row_length = $CHAR_PER_ROW; $row_length = $rest if ($rest < $CHAR_PER_ROW); $position_merker = $position; while (($position-$position_merker) < $row_length) { $position++; print $position; $stellen = length($position); $position += $stellen; for ($j=$stellen; $j<9 && (($position-$position_merker) < $row_length); $j++) { print "-"; $position++; } print "-" if (($position-$position_merker) <= $row_length || $j == 9); } print "\n"; $position = $position_merker; # put out sequence ... print substr($sequence, $counter, $row_length) . "\n"; # ... coiled coil for ($j=0; $j<$row_length; $j++) { $position++; if ($position>=$cc_start && $position<=($cc_start+$cc_length-1)) { print "C"; } else { print " "; } } print "\n\n"; $counter += $CHAR_PER_ROW; } } # annotation for Leucine Repeat without Leucine Zipper sub lr_annotation { my ($sequence, $lr_start, $nr_of_l, $mutpos) = @_; my $len = 0; my $counter = 0; my $position = 0; $len = length $sequence; $leucine_pattern = " " x ($lr_start-1); if (! defined $mutpos) { $leucine_pattern .= "L------" x ($nr_of_l-1); $leucine_pattern .= "L"; } else { $leucine_pattern .= "L------" x ($mutpos-1); $leucine_pattern .= substr($sequence, ($lr_start+(7*($mutpos-1)))-1, 1); $leucine_pattern .= "------L" x ($nr_of_l - $mutpos); } $leucine_pattern .= " " x ($len-($lr_start+(7*($nr_of_l-1)+1))); while ($counter < $len) { $rest = $len - $counter; $row_length = $CHAR_PER_ROW; $row_length = $rest if ($rest < $CHAR_PER_ROW); $position_merker = $position; while (($position-$position_merker) < $row_length) { $position++; print $position; $stellen = length($position); $position += $stellen; for ($j=$stellen; $j<9 && (($position-$position_merker) < $row_length); $j++) { print "-"; $position++; } print "-" if (($position-$position_merker) <= $row_length || $j == 9); } print "\n"; # put out sequence ... print substr($sequence, $counter, $row_length) . "\n"; # ... leucine repeats print substr($leucine_pattern, $counter, $row_length) . "\n\n"; $counter += $CHAR_PER_ROW; } } # annotation for ocl without lz sub ocl_annotation { my ($sequence, $cc_start, $cc_length, $lr_start, $nr_of_l, $lz_start, $lz_length, $mutpos) = @_; my $len = 0; my $counter = 0; my $position = 0; $len = length $sequence; $leucine_pattern = " " x ($lr_start-1); if (! defined $mutpos) { $leucine_pattern .= "L------" x ($nr_of_l-1); $leucine_pattern .= "L"; } else { $leucine_pattern .= "L------" x ($mutpos-1); $leucine_pattern .= substr($sequence, ($lr_start+(7*($mutpos-1)))-1, 1); $leucine_pattern .= "------L" x ($nr_of_l - $mutpos); } $leucine_pattern .= " " x ($len-($lr_start+(7*($nr_of_l-1)+1))); $ocl_pattern = " " x ($lz_start-1); $ocl_pattern .= "O" x ($lz_length); $ocl_pattern .= " " x ($len-($lz_start+$lz_length-1)); while ($counter < $len) { $rest = $len - $counter; $row_length = $CHAR_PER_ROW; $row_length = $rest if ($rest < $CHAR_PER_ROW); $position_merker = $position; while (($position-$position_merker) < $row_length) { $position++; print $position; $stellen = length($position); $position += $stellen; for ($j=$stellen; $j<9 && (($position-$position_merker) < $row_length); $j++) { print "-"; $position++; } print "-" if (($position-$position_merker) <= $row_length || $j == 9); } print "\n"; $position = $position_merker; # put out sequence ... print substr($sequence, $counter, $row_length) . "\n"; # ... coiled coil for ($j=0; $j<$row_length; $j++) { $position++; if ($position>=$cc_start && $position<=($cc_start+$cc_length-1)) { print "C"; } else { print " "; } } print "\n"; # ... leucine repeats print substr($leucine_pattern, $counter, $row_length) . "\n"; # ... overlap between ccf and Leucine Repeat print substr($ocl_pattern, $counter, $row_length) . "\n\n"; $counter += $CHAR_PER_ROW; } }