从 NCBI 下载基因或基因组

问题描述 投票:0回答:1

获取基因ID

symbols = ['gyrB', 'atpD', 'lepA']
def get_gene_ids(symbols, taxon):
    GeneApi =gene_api.GeneApi()
    gene_metadata = GeneApi.gene_metadata_by_tax_and_symbol(symbols, taxon)
    gene_id_dict = {
    gene_data.gene.symbol: int(gene_data.gene.gene_id)
    for gene_data in gene_metadata.genes
}
    return gene_id_dict

我编写了一个函数来获取细菌类群的一些gene_ids,该函数无法获取我所请求的细菌物种的这些实例gene_ids,而在NCBI数据库中我们可以通过网络搜索工具在大多数细菌类群中找到它们。

例如: 分类单元= Chryseobacter indologenes,成功完成。 分类群 = 恶臭假单胞菌,----> AtrributeError。

sequence bioinformatics taxonomy biopython ncbi
1个回答
0
投票

试试这个方法

from Bio import Entrez

# Set your email address
Entrez.email = "[email protected]"  # Replace with your actual email

# Define symbols and taxon
symbols = ['gyrB', 'atpD', 'lepA']
taxon = "taxon_id"  # Replace with the actual taxon ID

# Construct the E-utility query
query = f"gene_symbol in ({','.join(symbols)}) AND taxon_id:{taxon}"
handle = Entrez.esearch(db="gene", term=query)

# Retrieve the gene IDs
record = Entrez.read(handle)
gene_ids = record["IdList"]

# Process the results
gene_id_dict = {symbol: gene_id for symbol, gene_id in zip(symbols, gene_ids)}
print(gene_id_dict)

from ete3 import NCBITaxa
from Bio import Entrez

def get_gene_ids(symbols, taxon):
    ncbi = NCBITaxa()
    taxon_id = ncbi.get_name_translator([taxon])
    
    if not taxon_id:
        print(f"Taxon '{taxon}' not found.")
        return {}
    
    taxon_id = taxon_id[taxon][0]
    
    gene_id_dict = {}
    
    for symbol in symbols:
        try:
            handle = Entrez.esearch(db="gene", term=f"{symbol}[Gene Symbol] AND {taxon_id}[Organism]", retmax=1)
            record = Entrez.read(handle)
            gene_id = record["IdList"][0]
            gene_id_dict[symbol] = gene_id
        except (IndexError, KeyError):
            print(f"Gene ID not found for symbol '{symbol}' in taxon '{taxon}'.")
    
    return gene_id_dict

# Example usage
symbols = ['GyrB', 'AtpD', 'LepA']
taxon_chryseobacterium = 'Chryseobacterium indologenes'
taxon_pseudomonas = 'Pseudomonas putida'

gene_ids_chryseobacterium = get_gene_ids(symbols, taxon_chryseobacterium)
print(f"Gene IDs for {taxon_chryseobacterium}: {gene_ids_chryseobacterium}")

gene_ids_pseudomonas = get_gene_ids(symbols, taxon_pseudomonas)
print(f"Gene IDs for {taxon_pseudomonas}: {gene_ids_pseudomonas}")
© www.soinside.com 2019 - 2024. All rights reserved.