EndNote/Zotero批量添加PubMed文献脚本
代理使用8089端口,适配clash
使用:同目录下创建paper.txt,一行一个填写一个PMID
生成一个citation.nbib格式的文件
# %%
###预处理,先把文章全部复制到一个paper.txt文本中
###先获取所有PMID: 后面的数字
import re
import pandas as pd
import numpy as np
proxies = {"http": "http://127.0.0.1:8089"}
pm=pd.read_csv('paper.txt',header=None)
# to list
pm=pm[0].tolist()
# %%
import requests
from concurrent.futures import ThreadPoolExecutor
def fetch_pmid(pmid):
max_retries = 300 # 设置最大重试次数
for attempt in range(max_retries):
url = 'https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?format=medline&id={}'.format(pmid)
print(url)
print("-------requests start--------------")
res = requests.get(url,proxies=proxies)
print("-------requests end--------------")
print(res)
if res.status_code == 200:
res.close()
return res.text + "\n"
else:
print(f"Request failed, retrying... (attempt {attempt + 1}/{max_retries})")
print(f"Failed to fetch PMID {pmid} after {max_retries} attempts.")
return ""
cita = []
# 设置线程数,这里设置为4,你可以根据需要调整
num_threads = 32
with ThreadPoolExecutor(max_workers=num_threads) as executor:
# 使用多线程并行处理每个pmid
results = list(executor.map(fetch_pmid, pm))
cita.extend(results)
# 替换处理
# %%
cita = [y.replace("\r", "") for y in cita]
cita = [y + "\n" for y in cita]
f=open("citation.nbib","w",encoding='utf-8')
f.writelines(cita)
f.close()