代理使用8089端口,适配clash
使用:同目录下创建paper.txt,一行一个填写一个PMID
生成一个citation.nbib格式的文件


# %%
###预处理,先把文章全部复制到一个paper.txt文本中
###先获取所有PMID: 后面的数字
import re
import pandas as pd
import numpy as np

proxies = {"http": "http://127.0.0.1:8089"}

pm=pd.read_csv('paper.txt',header=None)
# to list
pm=pm[0].tolist()


# %%
import requests
from concurrent.futures import ThreadPoolExecutor

def fetch_pmid(pmid):
    max_retries = 300  # 设置最大重试次数
    for attempt in range(max_retries):
        url = 'https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=medline&id={}'.format(pmid)
        print(url)
        print("-------requests start--------------")
        res = requests.get(url,proxies=proxies)
        print("-------requests end--------------")
        print(res)
        if res.status_code == 200:
            res.close()
            return res.text + "\n"

        else:
            print(f"Request failed, retrying... (attempt {attempt + 1}/{max_retries})")
    
    print(f"Failed to fetch PMID {pmid} after {max_retries} attempts.")
    return ""

cita = []

# 设置线程数,这里设置为4,你可以根据需要调整
num_threads = 32

with ThreadPoolExecutor(max_workers=num_threads) as executor:
    # 使用多线程并行处理每个pmid
    results = list(executor.map(fetch_pmid, pm))

cita.extend(results)

# 替换处理

# %%
cita = [y.replace("\r", "") for y in cita]
cita = [y + "\n" for y in cita]
f=open("citation.nbib","w",encoding='utf-8')
f.writelines(cita)
f.close()