"Fossies" - the Fresh Open Source Software Archive

Member "augustus-3.3.3/scripts/filterSpliceHints.pl" (13 Sep 2019, 2085 Bytes) of package /linux/misc/augustus-3.3.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "filterSpliceHints.pl" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.3.2_vs_3.3.3.

    1 #!/usr/bin/env perl
    2 
    3 # filter augustus splice hints file (e.g. from RNA-seq data)
    4 # for splice hints that contain a particular splicing motif,
    5 # usually GT-AG
    6 #
    7 # Usage: filterSpliceHints.pl genome.fa hints.gff > filtered.hints.gff
    8 
    9 # Katharina Hoff, 9.6.2011
   10 
   11 my $usage = "filterSpliceHints.pl genome.fa hints.gff splice-pattern> out.gff\n\nThe most typical splice pattern should be GTAG! Sometimes, GCAG is also used.\n";
   12 
   13 if (@ARGV != 3) {
   14     print $usage;
   15     exit;
   16 }
   17 
   18 my $genome = $ARGV[0];
   19 my $hints = $ARGV[1];
   20 my $splice = $ARGV[2];
   21 
   22 open (FASTA, $genome) or die ("\n\ncould not open file $genome!\n");
   23 LINE: while ($line = <FASTA>){
   24     next LINE if $line =~ m/^#/; #discard comments
   25     if ($line =~ /^>/){
   26        chomp($line);
   27            #$line =~ s/[\x0A\x0D]+//g; #removing those ugly whitelines
   28            #$line =~ s/(\n)(\r)//g; #remove them alllll!
   29            #$line =~ m/(^>\w+)/i; #matches a word starting with > (Fasta)
   30            
   31        $hash_key = substr($line, 1, length($line)-1)
   32     }else{
   33     $line =~ s/[\x0A\x0D]+//g; 
   34     $line =~ s/(\s+)(\n)(\r)//g;
   35     $line = uc($line);
   36 #    print "Hash key: $hash_key\n";
   37 #    print "Content: $line\n";
   38     $fasta_hash{$hash_key}.=$line; 
   39 }
   40 }
   41 close(FASTA) or die "Could not close file $genome!\n";
   42 
   43 open (HINTS, $hints) or die ("Could not open file $hints!\n");
   44 LINE: while($line = <HINTS>){
   45     @gff = split(/\t/, $line);
   46 #    print "Scaffold: ".$gff[0]."\n";
   47 #    print "Length: ".length($fasta_hash{$gff[0]})."\n";
   48     $siteA = substr($fasta_hash{$gff[0]}, ($gff[3]-1), 2);   
   49     $siteB = substr($fasta_hash{$gff[0]}, ($gff[4]-2), 2);
   50     $given = $siteA.$siteB;
   51     #print "Splice site: $given\n";
   52     if($given =~ m/$splice/){
   53     print $gff[0]."\t".$gff[1]."\t".$gff[2]."\t".$gff[3]."\t".$gff[4]."\t".$gff[5]."\t+\t".$gff[7]."\t".$gff[8];
   54     }else{
   55     $given = reverse $given;
   56     $given =~ tr/ACGTacgt/TGCAtgca/;
   57     if($given =~ m/$splice/){
   58         print $gff[0]."\t".$gff[1]."\t".$gff[2]."\t".$gff[3]."\t".$gff[4]."\t".$gff[5]."\t-\t".$gff[7]."\t".$gff[8];
   59     }
   60     }
   61 }
   62 close(HINTS) or die "Could not close file $hints!\n";
   63 
   64