-
Notifications
You must be signed in to change notification settings - Fork 19
Expand file tree
/
Copy pathfastaClean.pl
More file actions
executable file
·62 lines (58 loc) · 1.71 KB
/
fastaClean.pl
File metadata and controls
executable file
·62 lines (58 loc) · 1.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/perl
# Script: fastaClean.pl
# Description: Cleans up fasta descriptions (Not for general use; highly specific)
# Author: Steven Ahrendt
# email: [email protected]
# Date: 01.16.2014
##################################
use warnings;
use strict;
use Getopt::Long;
use Bio::Seq;
use Bio::SeqIO;
#####-----Global Variables-----#####
my $input;
my $org;
my %seqs;
my ($help,$verb);
my $replace; # force overwrite
GetOptions ('i|input=s' => \$input,
'o|org=s' => \$org,
'h|help' => \$help,
'v|verbose' => \$verb,
'r|replace' => \$replace);
my $usage = "Usage: fastaClean.pl -i input -o orgAbbr\n";
die $usage if $help;
die "No input.\n$usage" if (!$input);
die "No organism.\n$usage" if (!$org);
#####-----Main-----#####
my $seq_in = Bio::SeqIO->new(-file => $input,
-format => "fasta");
my $seq_out = Bio::SeqIO->new(-file => ">$input\.clean",
-format => "fasta");
while(my $seq_obj = $seq_in->next_seq)
{
#print $seq_obj->display_id," => \n";
#my @old_id = split(/\|/,$seq_obj->display_id);
#my $tmp = $old_id[1];
#my $num = (split(/\./,$tmp))[1];
#my $num = $old_id[3];
#$tmp =~ s/\s+$//;
#$old_id[1] =~ s/\_1//;
# my $org = "Mani";
#my $ni = $num; #join("_",$tmp,$num);
my $ni = $seq_obj->display_id;
#$ni = (split(/:/,$ni))[1];
my $new_id = join("|",$org,$ni);
#print $new_id;
#print "\n";
my $new_seq = $seq_obj->seq;
$new_seq =~ s/\*//;
my $newSeq_obj = Bio::Seq->new(-display_id => $new_id,
-seq => $new_seq);
$seq_out->write_seq($newSeq_obj);
}
print `mv $input\.clean $input` if ($replace);
warn "Done.\n";
exit(0);
#####-----Subroutines-----#####