获取基因ID
symbols = ['gyrB', 'atpD', 'lepA']
def get_gene_ids(symbols, taxon):
GeneApi =gene_api.GeneApi()
gene_metadata = GeneApi.gene_metadata_by_tax_and_symbol(symbols, taxon)
gene_id_dict = {
gene_data.gene.symbol: int(gene_data.gene.gene_id)
for gene_data in gene_metadata.genes
}
return gene_id_dict
我编写了一个函数来获取细菌类群的一些gene_ids,该函数无法获取我所请求的细菌物种的这些实例gene_ids,而在NCBI数据库中我们可以通过网络搜索工具在大多数细菌类群中找到它们。
例如: 分类单元= Chryseobacter indologenes,成功完成。 分类群 = 恶臭假单胞菌,----> AtrributeError。
试试这个方法
from Bio import Entrez
# Set your email address
Entrez.email = "[email protected]" # Replace with your actual email
# Define symbols and taxon
symbols = ['gyrB', 'atpD', 'lepA']
taxon = "taxon_id" # Replace with the actual taxon ID
# Construct the E-utility query
query = f"gene_symbol in ({','.join(symbols)}) AND taxon_id:{taxon}"
handle = Entrez.esearch(db="gene", term=query)
# Retrieve the gene IDs
record = Entrez.read(handle)
gene_ids = record["IdList"]
# Process the results
gene_id_dict = {symbol: gene_id for symbol, gene_id in zip(symbols, gene_ids)}
print(gene_id_dict)
或
from ete3 import NCBITaxa
from Bio import Entrez
def get_gene_ids(symbols, taxon):
ncbi = NCBITaxa()
taxon_id = ncbi.get_name_translator([taxon])
if not taxon_id:
print(f"Taxon '{taxon}' not found.")
return {}
taxon_id = taxon_id[taxon][0]
gene_id_dict = {}
for symbol in symbols:
try:
handle = Entrez.esearch(db="gene", term=f"{symbol}[Gene Symbol] AND {taxon_id}[Organism]", retmax=1)
record = Entrez.read(handle)
gene_id = record["IdList"][0]
gene_id_dict[symbol] = gene_id
except (IndexError, KeyError):
print(f"Gene ID not found for symbol '{symbol}' in taxon '{taxon}'.")
return gene_id_dict
# Example usage
symbols = ['GyrB', 'AtpD', 'LepA']
taxon_chryseobacterium = 'Chryseobacterium indologenes'
taxon_pseudomonas = 'Pseudomonas putida'
gene_ids_chryseobacterium = get_gene_ids(symbols, taxon_chryseobacterium)
print(f"Gene IDs for {taxon_chryseobacterium}: {gene_ids_chryseobacterium}")
gene_ids_pseudomonas = get_gene_ids(symbols, taxon_pseudomonas)
print(f"Gene IDs for {taxon_pseudomonas}: {gene_ids_pseudomonas}")