1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
| #!/usr/bin/python
import sys, datetime from collections import defaultdict start_time = datetime.datetime.now()
#hash of genome file genome_hash = defaultdict(list)
genome_file = sys.argv[1] seq_pos = sys.argv[2]
with open(genome_file) as fd_genome: for line in fd_genome: if line.startswith('>'): chrom = line.strip()[1:] genome_hash[chrom] = [] else: seq_eachline = line.strip() genome_hash[chrom].append(seq_eachline)
with open(seq_pos) as fd: for line in fd: chrom = line.split()[0] start_pos = int(line.split()[1]) - 1 end_pos = int(line.split()[2]) if chrom in genome_hash.keys(): ID = '>' + chrom + '_' + line.split()[1] + '_' + line.split()[2] chrom_seq = ''.join(genome_hash[chrom]) seq = chrom_seq[start_pos:end_pos] print ID + '\n' + seq else: continue
end_time = datetime.datetime.now() #print '###############' #print (end_time - start_time)
|