#!/usr/bin/perl -w use strict; use Bio::DB::Taxonomy; my $db = new Bio::DB::Taxonomy(-source => 'entrez'); # use NCBI Entrez over HTTP my %used_ids; foreach (<*>) { (/\.pl~*$/ || /\.txt~*$/) && next; my $name = $_; $name =~ s/.fasta//; $name =~ s/:.*$//; $name =~ s/_*strain.*$//; $name =~ s/_*complete.*$//; $name =~ s/_*genome.*$//; my @arr = split(/_/, $_); my $tax_id; my $id; for (my $i = @arr-1; $i >= 0; $i--) { $id = join(" ", @arr[0..$i]); $tax_id = $db->get_taxonid($id); $tax_id && last; } if ($tax_id) { unless ($used_ids{$id}) { print "$tax_id\t$id\t$name\t$_\n"; $used_ids{$id} = 1; } else { print "$tax_id***\t$id\t$name\t$_\n"; } } else { print "UDEF\t$name\n"; } }