利用Uniport数据接口批量下载信息

最近Uniprot更新了新版本,对老的接口也做了一系列的更新,这里是参照最新的API说明重写的数据批量获取的代码

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import urllib
from io import StringIO
import pandas as pd
import requests
from tqdm import tqdm


def getfromUniprot(accessions, go=True, entry_name=False, path=None):
    res, n = [], 1888
    # https://www.uniprot.org/help/uniprotkb_column_names
    columns = ['id', 'protein names', 'genes', 'comment(FUNCTION)']
    if go:
        columns.append("go(biological process)")
        columns.append("go(molecular function)")
        columns.append("go(cellular component)")
    if entry_name:
        columns.append("entry name")
    for i in tqdm(range(0, len(accessions), n)):
        # url = 'https://www.uniprot.org/uploadlists/'
        url = 'https://rest.uniprot.org/idmapping/run'
        response = requests.post(url,
                                 {'query': ' '.join(accessions[i:i + n]),
                                  'from': 'ACC+ID',
                                  'to': 'ACC',
                                  'columns': ','.join(columns),
                                  'format': 'tab'})
        res.append(pd.read_csv(StringIO(response.text), sep='\t'))
    result = pd.concat(res)
    if path:
        result.to_csv(path, index=False)
    return result