我需要帮助。
我有12个文件,我试图比较“key_file.txt”中定义的区域的存在与否,并生成一个显示此的列表。我写了以下代码但是我收到以下错误。
File "filter_bedtools_all_samples_new.py", line 119, in <module> start = elems[1]
IndexError:列表索引超出范围
这是代码
import sys
#read each file from the argument list
A1_file = sys.argv[1]
A2_file = sys.argv[2]
A3_file = sys.argv[3]
B1_file = sys.argv[4]
B2_file = sys.argv[5]
B3_file = sys.argv[6]
C1_file = sys.argv[7]
C2_file = sys.argv[8]
C3_file = sys.argv[9]
D1_file = sys.argv[10]
D2_file = sys.argv[11]
D3_file = sys.argv[12]
key_file = sys.argv[13]
offset1 = int(sys.argv[14])
offset2 = int(sys.argv[15])
out_file = sys.argv[16]
#open the output file
outHandle = open(out_file,'w')
#create a class to hold objects
class Island:
def __init__(self, chr, start, end):
self.chr = chr
self.start = start
self.end = end
#start reading files into lists
with open(A1_file) as A1:
list1 = A1.readlines()
with open(A2_file) as A2:
list2 = A2.readlines()
with open(A3_file) as A3:
list3 = A3.readlines()
with open(B1_file) as B1:
list4 = B1.readlines()
with open(B2_file) as B2:
list5 = B2.readlines()
with open(B3_file) as B3:
list6 = B3.readlines()
with open(C1_file) as C1:
list7 = C1.readlines()
with open(C2_file) as C2:
list8 = C2.readlines()
with open(C3_file) as C3:
list9 = C3.readlines()
with open(D1_file) as D1:
list10 = D1.readlines()
with open(D2_file) as D2:
list11 = D2.readlines()
with open(D3_file) as D3:
list12 = D3.readlines()
#create a list containing the filenames
file_list = ["list1","list2","list3","list4","list5","list6","list7","list8","list9","list10","list11","list12"]
#print(len(list1))
key_dict = {}
out_dict = {}
key_list = []
counter = 0
#open key file and read one line at a time
with open(key_file) as kf:
for eachline in kf:
#initialize a dictionary of lists to 0
temp_list = "list_" + str(counter)
temp_list = [0] * 12
out_dict[counter] = temp_list
els = eachline.split("\t")
k_chr = els[0]
k_start = els[1]
k_end = els[2]
#create a dictionary of objects Island
temp_obj = Island(k_chr,k_start,k_end)
key_dict[counter] = temp_obj
key_list.append(eachline) #decided to try this out
counter += 1
#for k,v in key_dict.iteritems():
for v in key_list:
key_elems = v.split("\t")
key_chr = key_elems[0]
key_start = key_elems[1]
key_end = key_elems[2].strip(' \t\r\n')
for file_name in file_list:
# for i in range(1,13)
# file_name = "list" + str(i)
for eachline in file_name:
elems = eachline.split("\t")
chr = elems[0]
start = elems[1]
end = elems[2]
island = elems[3]
count = elems[4]
start_diff = abs(int(key_start) - int(start))
end_diff = abs(int(key_end) - int(end))
if (chr == key_chr):
if(((key_start == start) or (0 <= start_diff <= offset1)) and ((key_end == end) or (0 <= end_diff <= offset2))):
temp_list = out_dict[k]
temp_list[i] = count
out_dict[k] = temp_list
else:
continue
else:
continue
for key,value in out_dict.iteritems():
outHandle.write(str(value))
print("Processing completed!")
以下是文件A1
Chromosome01 3187178 3187214 island-16 177976 . 3187178 3187214 iR bC bZ bS Chromosome01 5042128 5042182 island-32 943 . 5042128 5042182 iR bC bZ bS
AA
Chromosome01 1102995 1103064 island-4 1558 . 1102995 1103064 iR bC bZ bS Chromosome01 3187178 3187227 island-9 81851 . 3187178 3187227 iR bC bZ bS
我
Chromosome01 4144298 4144467 island-39 354 . 4144298 4144467 iR bC bZ bS Chromosome01 4144671 4145103 island-41 344 . 4144671 4145103 iR bC bZ bS
乙1
Chromosome01 5042128 5042238 island-15 1250 . 5042128 5042238 iR bC bZ bS Chromosome01 5042315 5042535 island-16 3256 . 5042315 5042535 iR bC bZ bS
KB
Chromosome01 1102966 1103182 island-2 3910 . 1102966 1103182 iR bC bZ bS Chromosome01 5042128 5042238 island-19 3488 . 5042128 5042238 iR bC bZ bS
喷
Chromosome01 1102966 1103065 island-3 2462 . 1102966 1103065 iR bC bZ bS Chromosome01 5042128 5042237 island-20 2592 . 5042128 5042237 iR bC bZ bS
C1
Chromosome01 1102973 1103182 island-4 3950 . 1102973 1103182 iR bC bZ bS Chromosome01 5042128 5042237 island-22 4965 . 5042128 5042237 iR bC bZ bS
C2
Chromosome01 1102966 1103182 island-5 3697 . 1102966 1103182 iR bC bZ bS Chromosome01 5042128 5042238 island-29 2730 . 5042128 5042238 iR bC bZ bS
4号
Chromosome01 1102974 1103065 island-6 1673 . 1102974 1103065 iR bC bZ bS Chromosome01 5042128 5042238 island-28 1857 . 5042128 5042238 iR bC bZ bS
D1
Chromosome01 1102957 1103182 island-5 7654 . 1102957 1103182 iR bC bZ bS Chromosome01 3187180 3187215 island-21 223953 . 3187180 3187215 iR bC bZ bS
D2
Chromosome01 1102973 1103182 island-5 4847 . 1102973 1103182 iR bC bZ bS Chromosome01 5042128 5042237 island-24 2300 . 5042128 5042237 iR bC bZ bS
地方
Chromosome01 1102971 1103182 island-6 7091 . 1102971 1103182 iR bC bZ bS Chromosome01 5042128 5042238 island-30 2509 . 5042128 5042238 iR bC bZ bS
key_list文件是:
Chromosome01 1102966 1103065 Chromosome01 1102966 1103182 Chromosome01 1102995 1103064 Chromosome01 3187178 3187214 Chromosome01 3187178 3187227 Chromosome01 4144298 4144467 Chromosome01 4144671 4145103 Chromosome01 5042128 5042182 Chromosome01 5042128 5042238 Chromosome01 5042315 5042535 Chromosome01 5042495 5042532 Chromosome01 5042663 5043093 Chromosome01 5042726 5043093 Chromosome01 5043238 5043392 Chromosome01 5043292 5043394 Chromosome01 5043520 5043752 Chromosome01 5043523 5043664 Chromosome01 5043547 5043617 Chromosome01 5043549 5043752 Chromosome01 5043902 5043961 Chromosome01 5044239 5044547 Chromosome01 5044462 5044505 Chromosome01 5044679 5044870 Chromosome01 5044679 5045096 Chromosome01 5044719 5044870 Chromosome01 5044946 5045096 Chromosome01 5044946 5045115 Chromosome01 5044946 5045168 Chromosome01 5044993 5045096 Chromosome01 5292510 5292635 Chromosome01 5292577 5292635 Chromosome01 6698849 6698976 Chromosome01 13128763 13128846 Chromosome01 13509086 13509169 Chromosome01 13509086 13509182 Chromosome01 18273293 18273468
谢谢您的帮助
正如评论中所说,你的代码无法正常工作,因为"list1"
不是list1
。
打开文件的方式过于复杂
A1_file = sys.argv[1]
with open(A1_file) as A1:
list1 = A1.readlines()
file_list = ["list1","list2","list3","list4","list5","list6","list7","list8","list9","list10","list11","list12"]
for file_name in file_list:
for eachline in file_name:
do_stuff()
这个,你的12个文件的时间。
for i in range(1,13):
with open(sys.argv[i]) as f:
lines = f.readlines()
for line in lines:
do_stuff()
这里没有必要创建一个不能按预期工作的临时file_name