最近Uniprot更新了新版本,对老的接口也做了一系列的更新,这里是参照最新的API说明重写的数据批量获取的代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
import urllib
from io import StringIO
import pandas as pd
import requests
from tqdm import tqdm
def getfromUniprot(accessions, go=True, entry_name=False, path=None):
res, n = [], 1888
# https://www.uniprot.org/help/uniprotkb_column_names
columns = ['id', 'protein names', 'genes', 'comment(FUNCTION)']
if go:
columns.append("go(biological process)")
columns.append("go(molecular function)")
columns.append("go(cellular component)")
if entry_name:
columns.append("entry name")
for i in tqdm(range(0, len(accessions), n)):
# url = 'https://www.uniprot.org/uploadlists/'
url = 'https://rest.uniprot.org/idmapping/run'
response = requests.post(url,
{'query': ' '.join(accessions[i:i + n]),
'from': 'ACC+ID',
'to': 'ACC',
'columns': ','.join(columns),
'format': 'tab'})
res.append(pd.read_csv(StringIO(response.text), sep='\t'))
result = pd.concat(res)
if path:
result.to_csv(path, index=False)
return result
|