#!/usr/ucb/perl -w ################################################################## # CodonUsage.pl # PERL Script by Sven Rahmann, (c) 2002 # # Usage: # CodonUsage.pl [options] FASTA-File(s) # # Options: -c Check all input sequences are ORFs # -s name Specify score file name # # Read FASTA input file(s) # Count in-frame codon frequencies (without start ATG) # Count out-of-frame tripletts # Compute triplett scores for coding/noncoding preference # ################################################################## use strict; use Getopt::Std; ################################################################# my $prg; my $check=0; my $minlength=-1; my $totalseq=0; my %ccount; my %cfreq; my $ctotal; my %ncount; my %nfreq; my $ntotal; my %score; ################################################################# sub info { my $text=shift; print STDERR "$prg: Error: $text\n"; } sub usage { print STDERR </) ) { $header = <>; } chomp($header); while(<>) { chomp; $line=$_; if ($line =~ /^>/) { # Process $seq process($header,$seq); $header = $line; $seq=""; } else { $seq .= $line; } } process($header, $seq); # Write Codon Usage Statistics my $codon; printf("Cod: InFram freq || OOFram freq || ScoreBits\n"); foreach $codon (sort keys %ncount) { $nfreq{$codon}=$ncount{$codon}/$ntotal; if (exists ($ccount{$codon})) { $cfreq{$codon}=$ccount{$codon}/$ctotal; $score{$codon}=log($cfreq{$codon}/$nfreq{$codon})/log(2); } else { $ccount{$codon}=0; $cfreq{$codon}=0; $score{$codon}=-10; } printf("$codon: %6d %.4f || %6d %.4f || %9.5f\n", $ccount{$codon},$cfreq{$codon}, $ncount{$codon},$nfreq{$codon},$score{$codon}); } printf("---: %6d ------ || %6d ------ || ---------\n", $ctotal, $ntotal); # Write additional scorefile if requested if ($scorefile) { open SF,">$scorefile"; foreach $codon (sort keys %ncount) { printf SF "$codon %12.8f\n", $score{$codon}; } close SF; } # Show statistics print STDERR "Done. Processed $totalseq input sequences. Minlength=$minlength\n"; #################################################################