ckreibich · HesongHuang · Mar 8, 2017 · Mar 9, 2017 · Mar 9, 2017 · Mar 9, 2017
diff --git a/Pubmed_searchkeywords.py b/Pubmed_searchkeywords.py
@@ -0,0 +1,63 @@
+from Bio import Entrez
+import types
+import json
+import pdb
+from os import path
+from wordcloud import WordCloud
+
+Entrez.email = 'huanghesong3231@yahoo.com'
+list_json=[]
+list_txt=[]
+
+def get_json():
+    with open('title.json') as json_file:
+        data = json.load(json_file)   #list
+        for m in data:
+            m = m.replace("title={","")
+            m = m.replace("},","")
+            m = m.strip()
+            list_json.append(m)
+
+def search(query):
+    handle = Entrez.esearch(db='pubmed',
+                            sort='relevance',
+                            retmax='1',
+                            retmode='xml',
+                            term=query)
+    results= Entrez.read(handle)
+    list=results.get('IdList') 
+    str = ''.join(list)
+    return str
+
+def getkeywords(id_number):
+    file = open('constitution.txt', 'w')
+    handle = Entrez.efetch(db="pubmed", id=id_number,rettype="abstract", retmode="xml")
+    a=handle.read()
+    b=a.split("\n")
+    for c in b:
+        if "Keyword MajorTopicYN" in c:  #string
+           c = c.replace('<Keyword MajorTopicYN="N">',"")
+           c = c.replace("</Keyword>","")
+           list_txt.append(c)
+    str_change = ''.join(list_txt)
+    file.write(str_change)
+    file.close()
+
+
+
+if __name__=="__main__":
+   get_json()
+   for i in list_json:
+       str = search(i)
+       getkeywords(str)
+   d = path.dirname(__file__)
+   text = open(path.join(d, 'constitution.txt')).read()
+   wordcloud = WordCloud().generate(text)
+   import matplotlib.pyplot as plt
+   plt.imshow(wordcloud)
+   plt.axis("off")
+   wordcloud = WordCloud(max_font_size=40).generate(text)
+   plt.figure()
+   plt.imshow(wordcloud)
+   plt.axis("off")
+   plt.show()
diff --git a/Usage Instructions b/Usage Instructions
@@ -0,0 +1,15 @@
+(1)To get the whole articles which cited a given article:
+1、 get the cited number on google scholar, eg  "https://scholar.google.com/scholar?oi=bibs&hl=en&cites=6675397154864859782&as_sdt=5
+https://docs.google.com/document/d/1foKQvjIFm8T62-flZ7IWC23g4t52Ad_o4kaLPirUtpI/"， and the cited number is "6675397154864859782"
+2、 input the following code in your terminal python scholar.py -c 100 -T 6675397154864859782 --citation "bt" 
+    100 represents the number of article you want to get from google scholar
+    citation "bt" represents get the bibtex of every article
+Then all the "Bibtex" information will be download into a jsaon file
+
+(2)To figure out what type of citations it is(journal,conference proceeding or something else) and calculate statistics for each journal title
+Run the program journal_name.py 
+
+(3)To search the keywords of every journal, run Pubmed_searchkeywords.py. It will get the keywords on Pubmed.
+
+
+
diff --git a/bibtex.json b/bibtex.json
diff --git a/journal_name.py b/journal_name.py
@@ -0,0 +1,51 @@
+import matplotlib.pyplot as plt
+plt.rcdefaults()
+import numpy as np
+import matplotlib.pyplot as plt
+import json
+
+def get_json():
+	jour=0
+	conf=0
+	list=[]
+	diction={}
+	number=[]
+	name=[]
+	with open('bibtex1.json') as json_file:
+		data = json.load(json_file)
+		for a in data:
+			b = a.split('\n')
+			for m in b: 
+				if "journal={" in m:
+					jour=jour+1
+					diction[m]= diction.get(m,0)+1
+				elif "booktitle={" in m:
+					conf= conf+1
+					diction[m]= diction.get(m,0)+1
+				elif "title" in m:
+					list.append(m) 
+		print ("the number of journal is:",jour)
+		print ("the number of conference is:",conf)
+		print (list)
+		for key in diction:
+			number.append(diction[key])
+			key = key.replace("journal={","")
+			key = key.replace("},","")
+			name.append(key)
+	plt.rcdefaults()
+	fig, ax = plt.subplots()
+
+	y_pos = np.arange(len(name))
+	ax.barh(y_pos, number,
+			color='blue')
+	ax.set_yticks(y_pos)
+	ax.set_yticklabels(name)
+	ax.invert_yaxis()  # labels read top-to-bottom
+	ax.set_xlabel('Number')
+	ax.set_title('How many times a journal has show up')
+
+	plt.show()
+
+
+if __name__=="__main__":
+	get_json()