读取多个文件,比较值并创建一个是否存在参数的列表

问题描述 投票:0回答:1

我需要帮助。

我有12个文件,我试图比较“key_file.txt”中定义的区域的存在与否,并生成一个显示此的列表。我写了以下代码但是我收到以下错误。

File "filter_bedtools_all_samples_new.py", line 119, in <module>
    start = elems[1]

IndexError:列表索引超出范围

这是代码

 import sys

 #read each file from the argument list
 A1_file = sys.argv[1]
 A2_file = sys.argv[2]
 A3_file = sys.argv[3]

 B1_file = sys.argv[4]
 B2_file = sys.argv[5]
 B3_file = sys.argv[6]

 C1_file = sys.argv[7]
 C2_file = sys.argv[8]
 C3_file = sys.argv[9]

 D1_file = sys.argv[10]
 D2_file = sys.argv[11]
 D3_file = sys.argv[12]

 key_file = sys.argv[13]

 offset1 = int(sys.argv[14])
 offset2 = int(sys.argv[15])

 out_file = sys.argv[16]

 #open the output file
 outHandle = open(out_file,'w')

 #create a class to hold objects
 class Island:
    def __init__(self, chr, start, end):
            self.chr = chr
            self.start = start
            self.end = end

 #start reading files into lists
 with open(A1_file) as A1:
        list1 = A1.readlines()

 with open(A2_file) as A2:
        list2 = A2.readlines()

 with open(A3_file) as A3:
        list3 = A3.readlines()

 with open(B1_file) as B1:
        list4 = B1.readlines()

 with open(B2_file) as B2:
        list5 = B2.readlines()

 with open(B3_file) as B3:
        list6 = B3.readlines()

 with open(C1_file) as C1:
        list7 = C1.readlines()

 with open(C2_file) as C2:
        list8 = C2.readlines()

 with open(C3_file) as C3:
        list9 = C3.readlines()

 with open(D1_file) as D1:
        list10 = D1.readlines()

 with open(D2_file) as D2:
        list11 = D2.readlines()

 with open(D3_file) as D3:
        list12 = D3.readlines()

 #create a list containing the filenames

 file_list = ["list1","list2","list3","list4","list5","list6","list7","list8","list9","list10","list11","list12"]

 #print(len(list1))

 key_dict = {}
 out_dict = {}
 key_list = []
 counter = 0

 #open key file and read one line at a time
 with open(key_file) as kf:
     for eachline in kf:
    #initialize a dictionary of lists to 0
    temp_list = "list_" + str(counter)
    temp_list = [0] * 12
    out_dict[counter] = temp_list

    els = eachline.split("\t")
    k_chr = els[0]
    k_start = els[1]
    k_end = els[2]

    #create a dictionary of objects Island
    temp_obj = Island(k_chr,k_start,k_end)
    key_dict[counter] = temp_obj
    key_list.append(eachline) #decided to try this out 

    counter += 1

 #for k,v in key_dict.iteritems():
 for v in key_list:
     key_elems = v.split("\t")
     key_chr = key_elems[0]
     key_start = key_elems[1]
     key_end = key_elems[2].strip(' \t\r\n')

for file_name in file_list:
 #  for i in range(1,13)
 #          file_name = "list" + str(i)
            for eachline in file_name:

                    elems = eachline.split("\t")
                    chr = elems[0]
                    start = elems[1]
                    end = elems[2]
        island = elems[3]
        count = elems[4]

        start_diff = abs(int(key_start) - int(start))
                end_diff = abs(int(key_end) - int(end))

        if (chr == key_chr):
                            if(((key_start == start) or (0 <= start_diff <= offset1)) and ((key_end == end) or (0 <= end_diff <= offset2))):
                                    temp_list = out_dict[k]
                                    temp_list[i] = count
                                    out_dict[k] = temp_list
                            else:
                                    continue
                    else:
                            continue


 for key,value in out_dict.iteritems():
     outHandle.write(str(value))

 print("Processing completed!")

以下是文件A1

Chromosome01    3187178 3187214 island-16   177976  .   3187178 3187214 iR  bC  bZ  bS
Chromosome01    5042128 5042182 island-32   943 .   5042128 5042182 iR  bC  bZ  bS

AA

Chromosome01    1102995 1103064 island-4    1558    .   1102995 1103064 iR  bC  bZ  bS
Chromosome01    3187178 3187227 island-9    81851   .   3187178 3187227 iR  bC  bZ  bS

Chromosome01    4144298 4144467 island-39   354 .   4144298 4144467 iR  bC  bZ  bS
Chromosome01    4144671 4145103 island-41   344 .   4144671 4145103 iR  bC  bZ  bS

乙1

Chromosome01    5042128 5042238 island-15   1250    .   5042128 5042238 iR  bC  bZ  bS
Chromosome01    5042315 5042535 island-16   3256    .   5042315 5042535 iR  bC  bZ  bS

KB

Chromosome01    1102966 1103182 island-2    3910    .   1102966 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-19   3488    .   5042128 5042238 iR  bC  bZ  bS

Chromosome01    1102966 1103065 island-3    2462    .   1102966 1103065 iR  bC  bZ  bS
Chromosome01    5042128 5042237 island-20   2592    .   5042128 5042237 iR  bC  bZ  bS

C1

Chromosome01    1102973 1103182 island-4    3950    .   1102973 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042237 island-22   4965    .   5042128 5042237 iR  bC  bZ  bS

C2

Chromosome01    1102966 1103182 island-5    3697    .   1102966 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-29   2730    .   5042128 5042238 iR  bC  bZ  bS

4号

Chromosome01    1102974 1103065 island-6    1673    .   1102974 1103065 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-28   1857    .   5042128 5042238 iR  bC  bZ  bS

D1

Chromosome01    1102957 1103182 island-5    7654    .   1102957 1103182 iR  bC  bZ  bS
Chromosome01    3187180 3187215 island-21   223953  .   3187180 3187215 iR  bC  bZ  bS

D2

Chromosome01    1102973 1103182 island-5    4847    .   1102973 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042237 island-24   2300    .   5042128 5042237 iR  bC  bZ  bS

地方

Chromosome01    1102971 1103182 island-6    7091    .   1102971 1103182 iR  bC  bZ  bS
Chromosome01    5042128 5042238 island-30   2509    .   5042128 5042238 iR  bC  bZ  bS

key_list文件是:

Chromosome01    1102966 1103065 Chromosome01    1102966 1103182
Chromosome01    1102995 1103064 Chromosome01    3187178 3187214
Chromosome01    3187178 3187227 Chromosome01    4144298 4144467
Chromosome01    4144671 4145103 Chromosome01    5042128 5042182
Chromosome01    5042128 5042238 Chromosome01    5042315 5042535
Chromosome01    5042495 5042532 Chromosome01    5042663 5043093
Chromosome01    5042726 5043093 Chromosome01    5043238 5043392
Chromosome01    5043292 5043394 Chromosome01    5043520 5043752
Chromosome01    5043523 5043664 Chromosome01    5043547 5043617
Chromosome01    5043549 5043752 Chromosome01    5043902 5043961
Chromosome01    5044239 5044547 Chromosome01    5044462 5044505
Chromosome01    5044679 5044870 Chromosome01    5044679 5045096
Chromosome01    5044719 5044870 Chromosome01    5044946 5045096
Chromosome01    5044946 5045115 Chromosome01    5044946 5045168
Chromosome01    5044993 5045096 Chromosome01    5292510 5292635
Chromosome01    5292577 5292635 Chromosome01    6698849 6698976
Chromosome01    13128763    13128846 Chromosome01   13509086    13509169
Chromosome01    13509086    13509182 Chromosome01   18273293    18273468

谢谢您的帮助

python list
1个回答
0
投票

正如评论中所说,你的代码无法正常工作,因为"list1"不是list1

打开文件的方式过于复杂

A1_file = sys.argv[1]
with open(A1_file) as A1:
    list1 = A1.readlines()

file_list = ["list1","list2","list3","list4","list5","list6","list7","list8","list9","list10","list11","list12"]

for file_name in file_list:
    for eachline in file_name:
        do_stuff()

这个,你的12个文件的时间。

for i in range(1,13):
    with open(sys.argv[i]) as f:
        lines = f.readlines()
    for line in lines:
        do_stuff()

这里没有必要创建一个不能按预期工作的临时file_name

© www.soinside.com 2019 - 2024. All rights reserved.