Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 63 additions & 0 deletions Pubmed_searchkeywords.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
from Bio import Entrez
import types
import json
import pdb
from os import path
from wordcloud import WordCloud

Entrez.email = 'huanghesong3231@yahoo.com'
list_json=[]
list_txt=[]

def get_json():
with open('title.json') as json_file:
data = json.load(json_file) #list
for m in data:
m = m.replace("title={","")
m = m.replace("},","")
m = m.strip()
list_json.append(m)

def search(query):
handle = Entrez.esearch(db='pubmed',
sort='relevance',
retmax='1',
retmode='xml',
term=query)
results= Entrez.read(handle)
list=results.get('IdList')
str = ''.join(list)
return str

def getkeywords(id_number):
file = open('constitution.txt', 'w')
handle = Entrez.efetch(db="pubmed", id=id_number,rettype="abstract", retmode="xml")
a=handle.read()
b=a.split("\n")
for c in b:
if "Keyword MajorTopicYN" in c: #string
c = c.replace('<Keyword MajorTopicYN="N">',"")
c = c.replace("</Keyword>","")
list_txt.append(c)
str_change = ''.join(list_txt)
file.write(str_change)
file.close()



if __name__=="__main__":
get_json()
for i in list_json:
str = search(i)
getkeywords(str)
d = path.dirname(__file__)
text = open(path.join(d, 'constitution.txt')).read()
wordcloud = WordCloud().generate(text)
import matplotlib.pyplot as plt
plt.imshow(wordcloud)
plt.axis("off")
wordcloud = WordCloud(max_font_size=40).generate(text)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
15 changes: 15 additions & 0 deletions Usage Instructions
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
(1)To get the whole articles which cited a given article:
1、 get the cited number on google scholar, eg "https://scholar.google.com/scholar?oi=bibs&hl=en&cites=6675397154864859782&as_sdt=5
https://docs.google.com/document/d/1foKQvjIFm8T62-flZ7IWC23g4t52Ad_o4kaLPirUtpI/", and the cited number is "6675397154864859782"
2、 input the following code in your terminal python scholar.py -c 100 -T 6675397154864859782 --citation "bt"
100 represents the number of article you want to get from google scholar
citation "bt" represents get the bibtex of every article
Then all the "Bibtex" information will be download into a jsaon file

(2)To figure out what type of citations it is(journal,conference proceeding or something else) and calculate statistics for each journal title
Run the program journal_name.py

(3)To search the keywords of every journal, run Pubmed_searchkeywords.py. It will get the keywords on Pubmed.



1 change: 1 addition & 0 deletions bibtex.json

Large diffs are not rendered by default.

51 changes: 51 additions & 0 deletions journal_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import matplotlib.pyplot as plt
plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt
import json

def get_json():
jour=0
conf=0
list=[]
diction={}
number=[]
name=[]
with open('bibtex1.json') as json_file:
data = json.load(json_file)
for a in data:
b = a.split('\n')
for m in b:
if "journal={" in m:
jour=jour+1
diction[m]= diction.get(m,0)+1
elif "booktitle={" in m:
conf= conf+1
diction[m]= diction.get(m,0)+1
elif "title" in m:
list.append(m)
print ("the number of journal is:",jour)
print ("the number of conference is:",conf)
print (list)
for key in diction:
number.append(diction[key])
key = key.replace("journal={","")
key = key.replace("},","")
name.append(key)
plt.rcdefaults()
fig, ax = plt.subplots()

y_pos = np.arange(len(name))
ax.barh(y_pos, number,
color='blue')
ax.set_yticks(y_pos)
ax.set_yticklabels(name)
ax.invert_yaxis() # labels read top-to-bottom
ax.set_xlabel('Number')
ax.set_title('How many times a journal has show up')

plt.show()


if __name__=="__main__":
get_json()
Loading