From 024df5b80755c571f2f72c4e88e941f4813a8d40 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 02:29:47 +0430
Subject: [PATCH 01/21] added offset & query. changed way of url making.

now it can search in a normal way by using -q flag, it's like normal searching in google scholar.
changed way of url making, because it was using useless arguments in url, and google i was getting ban from google because of it.
now supports offset by using -o flag, it's how many articles that you want skip.
---
 scholar.py | 125 ++++++++++++++++++++++++++++++++---------------------
 1 file changed, 76 insertions(+), 49 deletions(-)

diff --git a/scholar.py b/scholar.py
index 13ccd43..0b8aee5 100755
--- a/scholar.py
+++ b/scholar.py
@@ -165,6 +165,7 @@
 import os
 import re
 import sys
+from typing import OrderedDict
 import warnings
 
 try:
@@ -745,24 +746,32 @@ class SearchScholarQuery(ScholarQuery):
     This version represents the search query parameters the user can
     configure on the Scholar website, in the advanced search options.
     """
-    SCHOLAR_QUERY_URL = ScholarConf.SCHOLAR_SITE + '/scholar?' \
-        + 'as_q=%(words)s' \
-        + '&as_epq=%(phrase)s' \
-        + '&as_oq=%(words_some)s' \
-        + '&as_eq=%(words_none)s' \
-        + '&as_occt=%(scope)s' \
-        + '&as_sauthors=%(authors)s' \
-        + '&as_publication=%(pub)s' \
-        + '&as_ylo=%(ylo)s' \
-        + '&as_yhi=%(yhi)s' \
-        + '&as_vis=%(citations)s' \
-        + '&btnG=&hl=en' \
-        + '%(num)s' \
-        + '&as_sdt=%(patents)s%%2C5'
+    BASE_URL = ScholarConf.SCHOLAR_SITE + '/scholar?'
+
+    URL_ARGS = OrderedDict({
+        'offset':      'start',
+        'query':       'q',
+        'words':       'as_q',
+        'phrase':      'as_epq',
+        'word_some':   'as_oq',
+        'words_none':  'as_eq',
+        'scope':       'as_occt',
+        'authors':     'as_sauthors',
+        'pub':         'as_publication',
+        'ylo':         'as_ylo',
+        'yhi':         'as_yhi',
+        'citations':   'as_vis',
+        'btnG':        'btnG',
+        'lang':        'hl',
+        'num_results': 'num',
+        'patents':     'as_sdt'
+    })
 
     def __init__(self):
         ScholarQuery.__init__(self)
         self._add_attribute_type('num_results', 'Results', 0)
+        self.offset = None
+        self.query = None
         self.words = None # The default search behavior
         self.words_some = None # At least one of those words
         self.words_none = None # None of these words
@@ -771,9 +780,51 @@ def __init__(self):
         self.author = None
         self.pub = None
         self.timeframe = [None, None]
+        self.btnG = ''
+        self.lang = 'en'
         self.include_patents = True
         self.include_citations = True
 
+    @property
+    def url_query(self):
+        args = {
+            'offset':      self.offset,
+            'query':       self.query,
+            'words':       self.words,
+            'phrase':      self.phrase,
+            'word_some':   self._parenthesize_phrases(self.words_some) if self.words_some else None,
+            'words_none':  self._parenthesize_phrases(self.words_none) if self.words_none else None,
+            'scope':       self.scope_title,
+            'authors':     self.author,
+            'pub':         self.pub,
+            'ylo':         self.timeframe[0],
+            'yhi':         self.timeframe[1],
+            'citations':   '0' if self.include_citations else '1',
+            'btnG':        self.btnG,
+            'lang':        self.lang,
+            'num_results': self.num_results,
+            'patents':     '%s%%2C5' % '0' if self.include_patents else '1'
+        }
+
+        query = ''
+
+        for key, val in args.items():
+            if val != None:
+                query += '%s=%s&' % (self.URL_ARGS[key], quote(encode(val)))
+        
+        # deleting last '&'
+        query = query[: -1]
+        
+        return query
+
+    def set_offset(self, offset):
+        """"sets offset number. it'll skip first (offset) articles in search."""
+        self.offset = offset
+
+    def set_query(self, query):
+        """"it's what you fill in search box."""
+        self.query = query
+
     def set_words(self, words):
         """Sets words that *all* must be found in the result."""
         self.words = words
@@ -826,43 +877,11 @@ def get_url(self):
         if self.words is None and self.words_some is None \
            and self.words_none is None and self.phrase is None \
            and self.author is None and self.pub is None \
-           and self.timeframe[0] is None and self.timeframe[1] is None:
+           and self.timeframe[0] is None and self.timeframe[1] is None \
+           and self.query is None:
             raise QueryArgumentError('search query needs more parameters')
 
-        # If we have some-words or none-words lists, we need to
-        # process them so GS understands them. For simple
-        # space-separeted word lists, there's nothing to do. For lists
-        # of phrases we have to ensure quotations around the phrases,
-        # separating them by whitespace.
-        words_some = None
-        words_none = None
-
-        if self.words_some:
-            words_some = self._parenthesize_phrases(self.words_some)
-        if self.words_none:
-            words_none = self._parenthesize_phrases(self.words_none)
-
-        urlargs = {'words': self.words or '',
-                   'words_some': words_some or '',
-                   'words_none': words_none or '',
-                   'phrase': self.phrase or '',
-                   'scope': 'title' if self.scope_title else 'any',
-                   'authors': self.author or '',
-                   'pub': self.pub or '',
-                   'ylo': self.timeframe[0] or '',
-                   'yhi': self.timeframe[1] or '',
-                   'patents': '0' if self.include_patents else '1',
-                   'citations': '0' if self.include_citations else '1'}
-
-        for key, val in urlargs.items():
-            urlargs[key] = quote(encode(val))
-
-        # The following URL arguments must not be quoted, or the
-        # server will not recognize them:
-        urlargs['num'] = ('&num=%d' % self.num_results
-                          if self.num_results is not None else '')
-
-        return self.SCHOLAR_QUERY_URL % urlargs
+        return self.BASE_URL + self.url_query
 
 
 class ScholarSettings(object):
@@ -1165,6 +1184,10 @@ def main():
     parser = optparse.OptionParser(usage=usage, formatter=fmt)
     group = optparse.OptionGroup(parser, 'Query arguments',
                                  'These options define search query arguments and parameters.')
+    group.add_option('-q', '--query', metavar='QUERY', default=None,
+                     help='Normal search query.')
+    group.add_option('-o', '--offset', metavar='OFFSET', default=None,
+                     help='it\'ll skip first (offset) articles in search.')
     group.add_option('-a', '--author', metavar='AUTHORS', default=None,
                      help='Author name(s)')
     group.add_option('-A', '--all', metavar='WORDS', default=None, dest='allw',
@@ -1265,6 +1288,10 @@ def main():
         query = ClusterScholarQuery(cluster=options.cluster_id)
     else:
         query = SearchScholarQuery()
+        if options.offset:
+            query.set_offset(options.offset)
+        if options.query:
+            query.set_query(options.query)
         if options.author:
             query.set_author(options.author)
         if options.allw:

From c255020a4ca93b37f5edcd8a410e354d6794ac6a Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 02:47:59 +0430
Subject: [PATCH 02/21] fixed scpoe_title bug. and another bug.

---
 scholar.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/scholar.py b/scholar.py
index 0b8aee5..c64d3bc 100755
--- a/scholar.py
+++ b/scholar.py
@@ -794,7 +794,7 @@ def url_query(self):
             'phrase':      self.phrase,
             'word_some':   self._parenthesize_phrases(self.words_some) if self.words_some else None,
             'words_none':  self._parenthesize_phrases(self.words_none) if self.words_none else None,
-            'scope':       self.scope_title,
+            'scope':       'title' if self.scope_title else 'any',
             'authors':     self.author,
             'pub':         self.pub,
             'ylo':         self.timeframe[0],
@@ -808,9 +808,9 @@ def url_query(self):
 
         query = ''
 
-        for key, val in args.items():
-            if val != None:
-                query += '%s=%s&' % (self.URL_ARGS[key], quote(encode(val)))
+        for key, val in self.URL_ARGS.items():
+            if args[key] != None:
+                query += '%s=%s&' % (val, quote(encode(args[key])))
         
         # deleting last '&'
         query = query[: -1]
@@ -881,6 +881,7 @@ def get_url(self):
            and self.query is None:
             raise QueryArgumentError('search query needs more parameters')
 
+        print(self.BASE_URL + self.url_query)
         return self.BASE_URL + self.url_query
 
 

From 83897a4226fc1d70b1b0267b97300004029f3ead Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 02:58:57 +0430
Subject: [PATCH 03/21] deleted print() in code.

---
 scholar.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scholar.py b/scholar.py
index c64d3bc..3b352fb 100755
--- a/scholar.py
+++ b/scholar.py
@@ -880,8 +880,7 @@ def get_url(self):
            and self.timeframe[0] is None and self.timeframe[1] is None \
            and self.query is None:
             raise QueryArgumentError('search query needs more parameters')
-
-        print(self.BASE_URL + self.url_query)
+            
         return self.BASE_URL + self.url_query
 
 

From 35e90bbc440e09c9ebc4ead9ec0e20356f48355e Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 14:18:00 +0430
Subject: [PATCH 04/21] added some comments

---
 scholar.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/scholar.py b/scholar.py
index 3b352fb..fbe8c09 100755
--- a/scholar.py
+++ b/scholar.py
@@ -787,6 +787,8 @@ def __init__(self):
 
     @property
     def url_query(self):
+        """this will create query to add to BASE_URL for requesting"""
+
         args = {
             'offset':      self.offset,
             'query':       self.query,

From dbe884e7517717bec5353ddaf77bdb4720728590 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 14:51:33 +0430
Subject: [PATCH 05/21] added __len__, __iadd__ to ScholarQuerier, and added
 max-results

---
 scholar.py | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/scholar.py b/scholar.py
index fbe8c09..b47f405 100755
--- a/scholar.py
+++ b/scholar.py
@@ -1127,6 +1127,12 @@ def _get_http_response(self, url, log_msg=None, err_msg=None):
             ScholarUtils.log('info', err_msg + ': %s' % err)
             return None
 
+    def __len__(self):
+        return len(self.articles)
+    
+    def __iadd__(self, other):
+        self.articles += other.articles
+        return self
 
 def txt(querier, with_globals):
     if with_globals:
@@ -1214,8 +1220,12 @@ def main():
                      help='Do not include citations in results')
     group.add_option('-C', '--cluster-id', metavar='CLUSTER_ID', default=None,
                      help='Do not search, just use articles in given cluster ID')
-    group.add_option('-c', '--count', type='int', default=None,
-                     help='Maximum number of results')
+    group.add_option('-m', '--max-results', type='int', default=None,
+                     help='Maximum number of results to get, returns all results if is bigger than all results')
+    group.add_option('--all-results', action='store_true', default=False,
+                     help='get all results')
+    # group.add_option('-c', '--count', type='int', default=None,
+                    #  help='Maximum number of results per page')
     parser.add_option_group(group)
 
     group = optparse.OptionGroup(parser, 'Output format',
@@ -1315,12 +1325,20 @@ def main():
         if options.no_citations:
             query.set_include_citations(False)
 
-    if options.count is not None:
-        options.count = min(options.count, ScholarConf.MAX_PAGE_RESULTS)
-        query.set_num_page_results(options.count)
+    if options.max_results is not None:
+        # if user wants less than MAX_PAGE_RESULTS articles
+        # set perpage results to max_results
+        if options.max_results < ScholarConf.MAX_PAGE_RESULTS:
+            query.set_num_page_results(options.max_results)
+        # else:
+            
+        # options.count = min(options.count, ScholarConf.MAX_PAGE_RESULTS)
 
     querier.send_query(query)
 
+    # check 
+    
+
     if options.csv:
         csv(querier)
     elif options.csv_header:

From fb8b5af6627d9f8c917ecff63b0bf5d162db2119 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 17:00:00 +0430
Subject: [PATCH 06/21] added support of getting more than 10 results.

---
 scholar.py | 47 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 40 insertions(+), 7 deletions(-)

diff --git a/scholar.py b/scholar.py
index b47f405..9494c2f 100755
--- a/scholar.py
+++ b/scholar.py
@@ -165,6 +165,7 @@
 import os
 import re
 import sys
+from time import sleep
 from typing import OrderedDict
 import warnings
 
@@ -1031,12 +1032,14 @@ def apply_settings(self, settings):
         ScholarUtils.log('info', 'settings applied')
         return True
 
-    def send_query(self, query):
+    def send_query(self, query, clear=True):
         """
         This method initiates a search query (a ScholarQuery instance)
         with subsequent parsing of the response.
         """
-        self.clear_articles()
+        if clear:
+            self.clear_articles()
+
         self.query = query
 
         html = self._get_http_response(url=query.get_url(),
@@ -1222,10 +1225,13 @@ def main():
                      help='Do not search, just use articles in given cluster ID')
     group.add_option('-m', '--max-results', type='int', default=None,
                      help='Maximum number of results to get, returns all results if is bigger than all results')
+    group.add_option('-D', '--delay', type='float', default=2.0,
+                     help='delay for each requests, to not get banned by google because of a DOS attack! default is 2 sec')
     group.add_option('--all-results', action='store_true', default=False,
                      help='get all results')
     # group.add_option('-c', '--count', type='int', default=None,
                     #  help='Maximum number of results per page')
+                    
     parser.add_option_group(group)
 
     group = optparse.OptionGroup(parser, 'Output format',
@@ -1296,6 +1302,7 @@ def main():
 
     querier.apply_settings(settings)
 
+
     if options.cluster_id:
         query = ClusterScholarQuery(cluster=options.cluster_id)
     else:
@@ -1327,18 +1334,44 @@ def main():
 
     if options.max_results is not None:
         # if user wants less than MAX_PAGE_RESULTS articles
-        # set perpage results to max_results
         if options.max_results < ScholarConf.MAX_PAGE_RESULTS:
+            # set perpage results to max_results
             query.set_num_page_results(options.max_results)
-        # else:
-            
-        # options.count = min(options.count, ScholarConf.MAX_PAGE_RESULTS)
 
     querier.send_query(query)
 
-    # check 
+    # offset is number of first articles to skip
+    offset = options.offset if options.offset else 0
+
+    # all available articles
+    all_results_num = query['num_results'] - offset
+
+    # set results number to get
+    if options.all_results:
+        results_num_to_get = all_results_num
+    elif options.max_results:
+        results_num_to_get = min(options.max_results, all_results_num)
+    else:
+        results_num_to_get = len(querier)
     
+    remaining_to_get = results_num_to_get - len(querier)
+
+    # if we didn't get enough articles get remaining articles
+    while remaining_to_get > 0:
+        sleep(options.delay)
+        # set offset
+        query.offset = offset + len(querier)
+
+        # if remaining articles to get is less than max results per page
+        if remaining_to_get < ScholarConf.MAX_PAGE_RESULTS:
+        # then just get remaining results
+            query.set_num_page_results(remaining_to_get)
+
+        querier.send_query(query, clear=False)
+
+        remaining_to_get = results_num_to_get - len(querier)
 
+    print(len(querier))
     if options.csv:
         csv(querier)
     elif options.csv_header:

From 66a66595f44dbc5554fcad2e772811bd1adc5b69 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 17:51:34 +0430
Subject: [PATCH 07/21] deleted "testing prints()", write better help for query

---
 scholar.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/scholar.py b/scholar.py
index 9494c2f..2af6e51 100755
--- a/scholar.py
+++ b/scholar.py
@@ -1196,8 +1196,8 @@ def main():
     group = optparse.OptionGroup(parser, 'Query arguments',
                                  'These options define search query arguments and parameters.')
     group.add_option('-q', '--query', metavar='QUERY', default=None,
-                     help='Normal search query.')
-    group.add_option('-o', '--offset', metavar='OFFSET', default=None,
+                     help='Normal search query. if your query includes double quotes (") replace it by (\\"). and wrap your query in single quotes (\') example: \'portfolio optimization in \\"stock markets\\"\'')
+    group.add_option('-o', '--offset', type='int', metavar='OFFSET', default=None,
                      help='it\'ll skip first (offset) articles in search.')
     group.add_option('-a', '--author', metavar='AUTHORS', default=None,
                      help='Author name(s)')
@@ -1231,7 +1231,7 @@ def main():
                      help='get all results')
     # group.add_option('-c', '--count', type='int', default=None,
                     #  help='Maximum number of results per page')
-                    
+
     parser.add_option_group(group)
 
     group = optparse.OptionGroup(parser, 'Output format',
@@ -1356,9 +1356,12 @@ def main():
     
     remaining_to_get = results_num_to_get - len(querier)
 
+    print(sys.argv)
     # if we didn't get enough articles get remaining articles
     while remaining_to_get > 0:
+        print(f'{len(querier)}/{remaining_to_get}')
         sleep(options.delay)
+
         # set offset
         query.offset = offset + len(querier)
 
@@ -1371,7 +1374,7 @@ def main():
 
         remaining_to_get = results_num_to_get - len(querier)
 
-    print(len(querier))
+
     if options.csv:
         csv(querier)
     elif options.csv_header:

From 981204723f3cdb42011e9c07ec5ac4b228359acc Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 18:12:37 +0430
Subject: [PATCH 08/21] fixed getting num_results in other than first pages

---
 scholar.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/scholar.py b/scholar.py
index 2af6e51..bbddf62 100755
--- a/scholar.py
+++ b/scholar.py
@@ -417,7 +417,8 @@ def _parse_globals(self):
             # raw text is a list because the body contains <b> etc
             if raw_text is not None and len(raw_text) > 0:
                 try:
-                    num_results = raw_text[0].split()[1]
+                    # first string after 'about ' is maximum results is founded.
+                    num_results = raw_text[0].lower().split('about ')[1].split()[0]
                     # num_results may now contain commas to separate
                     # thousands, strip:
                     num_results = num_results.replace(',', '')
@@ -1196,7 +1197,7 @@ def main():
     group = optparse.OptionGroup(parser, 'Query arguments',
                                  'These options define search query arguments and parameters.')
     group.add_option('-q', '--query', metavar='QUERY', default=None,
-                     help='Normal search query. if your query includes double quotes (") replace it by (\\"). and wrap your query in single quotes (\') example: \'portfolio optimization in \\"stock markets\\"\'')
+                     help='Normal search query. if your query includes double quotes (") or single quotes (\') replace it by (\\") and (\\\'). and wrap your query in single quotes (\') example: \'portfolio\\\'s optimization in \\"stock markets\\"\'')
     group.add_option('-o', '--offset', type='int', metavar='OFFSET', default=None,
                      help='it\'ll skip first (offset) articles in search.')
     group.add_option('-a', '--author', metavar='AUTHORS', default=None,
@@ -1356,15 +1357,13 @@ def main():
     
     remaining_to_get = results_num_to_get - len(querier)
 
-    print(sys.argv)
     # if we didn't get enough articles get remaining articles
     while remaining_to_get > 0:
-        print(f'{len(querier)}/{remaining_to_get}')
         sleep(options.delay)
 
         # set offset
         query.offset = offset + len(querier)
-
+        
         # if remaining articles to get is less than max results per page
         if remaining_to_get < ScholarConf.MAX_PAGE_RESULTS:
         # then just get remaining results
@@ -1374,7 +1373,6 @@ def main():
 
         remaining_to_get = results_num_to_get - len(querier)
 
-
     if options.csv:
         csv(querier)
     elif options.csv_header:

From 3e8a2a3ac5458132c85779ef04cc4031a5a959ff Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 18:42:30 +0430
Subject: [PATCH 09/21] break cycle if we got banned by google

---
 scholar.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/scholar.py b/scholar.py
index bbddf62..d365bba 100755
--- a/scholar.py
+++ b/scholar.py
@@ -1363,7 +1363,7 @@ def main():
 
         # set offset
         query.offset = offset + len(querier)
-        
+
         # if remaining articles to get is less than max results per page
         if remaining_to_get < ScholarConf.MAX_PAGE_RESULTS:
         # then just get remaining results
@@ -1371,7 +1371,18 @@ def main():
 
         querier.send_query(query, clear=False)
 
+        # if there's a problem in getting articles go out of cycle
+        # it can mean that there's no more articles to get.
+        # or got banned by google! 
+        if results_num_to_get - len(querier) == remaining_to_get:
+            print("WARNING: there's probably a problem for getting all requested articles.")
+            print(f"got {len(querier)} articles out of {results_num_to_get} articles.")
+            print("this means we got banned by google.")
+            print("or maybe there was some unavailable articles.")
+            break
+
         remaining_to_get = results_num_to_get - len(querier)
+        print(f'remaining: {remaining_to_get}')
 
     if options.csv:
         csv(querier)

From 2cf81d19fb2064a8d018b82c84428b46080c0d55 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Mon, 5 Sep 2022 18:43:42 +0430
Subject: [PATCH 10/21] comment printing remaining articles to get

---
 scholar.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scholar.py b/scholar.py
index d365bba..9fa107d 100755
--- a/scholar.py
+++ b/scholar.py
@@ -1382,7 +1382,7 @@ def main():
             break
 
         remaining_to_get = results_num_to_get - len(querier)
-        print(f'remaining: {remaining_to_get}')
+        # print(f'remaining: {remaining_to_get}')
 
     if options.csv:
         csv(querier)

From e522d0d4a13a137d7d1aa5c48633ccb20868cfad Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Tue, 6 Sep 2022 13:51:07 +0430
Subject: [PATCH 11/21] initial commit. added .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2211df6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+*.txt

From 897a55245ae2005bbd5909e57e1cce74d661f2e8 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Wed, 7 Sep 2022 13:39:27 +0430
Subject: [PATCH 12/21] fixed citation problem. by changing citation from bytes
 to str.

---
 scholar.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/scholar.py b/scholar.py
index 9fa107d..99b820e 100755
--- a/scholar.py
+++ b/scholar.py
@@ -485,7 +485,7 @@ def _parse_links(self, span):
                     self._strip_url_arg('num', self._path2url(tag.get('href')))
 
             if tag.getText().startswith('Import'):
-                self.article['url_citation'] = self._path2url(tag.get('href'))
+                self.article['url_citation'] = tag.get('href')
 
 
     @staticmethod
@@ -1005,7 +1005,8 @@ def apply_settings(self, settings):
         # to Google.
         soup = SoupKitchen.make_soup(html)
 
-        tag = soup.find(name='form', attrs={'id': 'gs_settings_form'})
+        tag = soup.find(name='form', attrs={'id': 'gs_bdy_frm'})
+
         if tag is None:
             ScholarUtils.log('info', 'parsing settings failed: no form')
             return False
@@ -1026,7 +1027,7 @@ def apply_settings(self, settings):
 
         html = self._get_http_response(url=self.SET_SETTINGS_URL % urlargs,
                                        log_msg='dump of settings result HTML',
-                                       err_msg='applying setttings failed')
+                                       err_msg='applying setttings failed')         
         if html is None:
             return False
 
@@ -1069,6 +1070,10 @@ def get_citation_data(self, article):
         if data is None:
             return False
 
+        # data is 
+        if type(data) == bytes:
+            data = data.decode('utf-8') 
+
         article.set_citation_data(data)
         return True
 

From 34927e76b205f813c858cae27f161848e8150939 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Wed, 7 Sep 2022 19:38:56 +0430
Subject: [PATCH 13/21] fixed apply setting method  and citation bug

---
 scholar.py | 92 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 73 insertions(+), 19 deletions(-)

diff --git a/scholar.py b/scholar.py
index 99b820e..95f900f 100755
--- a/scholar.py
+++ b/scholar.py
@@ -936,16 +936,25 @@ class ScholarQuerier(object):
     GET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_settings?' \
         + 'sciifh=1&hl=en&as_sdt=0,5'
 
-    SET_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?' \
-        + 'q=' \
-        + '&scisig=%(scisig)s' \
-        + '&inststart=0' \
-        + '&as_sdt=1,5' \
-        + '&as_sdtp=' \
-        + '&num=%(num)s' \
-        + '&scis=%(scis)s' \
-        + '%(scisf)s' \
-        + '&hl=en&lang=all&instq=&inst=569367360547434339&save='
+    # example set setting url :
+    # https://scholar.google.com/scholar_setprefs?inststart=0&scisig=AAGBfm0AAAAAYxisq4fTruxOSf9qjln8EPloukoQ1EtW&xsrf=&num=10&scis=yes&scisf=4&hl=de&lang=all&instq=&boi_access=1&has_boo_access=1&has_casa_opt_in=1&save=
+    BASE_SETTINGS_URL = ScholarConf.SCHOLAR_SITE + '/scholar_setprefs?'
+
+    SETTING_ARGS = OrderedDict({
+        'inststart':       'inststart',
+        'scisig':          'scisig',
+        'xsrf':            'xsrf',
+        'num_results':     'num',
+        'scis':            'scis',
+        'scisf':           'scisf',
+        'lang':            'hl',
+        'art_lang':        'lang',
+        'instq':           'instq',
+        'boi_access':      'boi_access',
+        'has_boo_access':  'has_boo_access',
+        'has_casa_opt_in': 'has_casa_opt_in',
+        'save':            'save'
+    })
 
     # Older URLs:
     # ScholarConf.SCHOLAR_SITE + '/scholar?q=%s&hl=en&btnG=Search&as_sdt=2001&as_sdtp=on
@@ -967,6 +976,21 @@ def __init__(self):
         self.query = None
         self.cjar = MozillaCookieJar()
 
+        self.inststart = '0'
+        self.scising = ''
+        self.xsrf = ''
+        self.num = None
+        self.scis = None
+        self.scisf = None
+        self.lang = 'en'
+        self.art_lang = 'all'
+        self.instq = ''
+        self.boi_access = 1
+        self.has_boo_access = 1
+        self.has_casa_opt_in = 1
+        self.boi_access = 1
+        self.save = ''
+
         # If we have a cookie file, load it:
         if ScholarConf.COOKIE_JAR_FILE and \
            os.path.exists(ScholarConf.COOKIE_JAR_FILE):
@@ -980,6 +1004,37 @@ def __init__(self):
 
         self.opener = build_opener(HTTPCookieProcessor(self.cjar))
         self.settings = None # Last settings object, if any
+    
+    @property
+    def setting_query(self):
+        """this will create query to add to BASE_SETTING_URL for requesting"""
+
+        args = {
+            'inststart':       self.inststart,
+            'scisig':          self.scising,
+            'xsrf':            self.xsrf,
+            'num_results':     self.num,
+            'scis':            self.scis,
+            'scisf':           self.scisf,
+            'lang':            self.lang,
+            'art_lang':        self.art_lang,
+            'instq':           self.instq,
+            'boi_access':      self.boi_access,
+            'has_boo_access':  self.has_boo_access,
+            'has_casa_opt_in': self.has_casa_opt_in,
+            'save':            self.save
+        }
+
+        query = ''
+
+        for key, val in self.SETTING_ARGS.items():
+            if args[key] != None:
+                query += '%s=%s&' % (val, quote(encode(args[key])))
+        
+        # deleting last '&'
+        query = query[: -1]
+        
+        return query
 
     def apply_settings(self, settings):
         """
@@ -1016,18 +1071,17 @@ def apply_settings(self, settings):
             ScholarUtils.log('info', 'parsing settings failed: scisig')
             return False
 
-        urlargs = {'scisig': tag['value'],
-                   'num': settings.per_page_results,
-                   'scis': 'no',
-                   'scisf': ''}
+        self.scising = tag['value']
+        self.num = settings.per_page_results
+        self.scis = 'no'
 
         if settings.citform != 0:
-            urlargs['scis'] = 'yes'
-            urlargs['scisf'] = '&scisf=%d' % settings.citform
+            self.scis = 'yes'
+            self.scisf = '%d' % settings.citform
 
-        html = self._get_http_response(url=self.SET_SETTINGS_URL % urlargs,
+        html = self._get_http_response(url=self.BASE_SETTINGS_URL + self.setting_query,
                                        log_msg='dump of settings result HTML',
-                                       err_msg='applying setttings failed')         
+                                       err_msg='applying setttings failed')    
         if html is None:
             return False
 
@@ -1070,7 +1124,7 @@ def get_citation_data(self, article):
         if data is None:
             return False
 
-        # data is 
+        # change to str if it's bytes
         if type(data) == bytes:
             data = data.decode('utf-8') 
 

From c9110fbaaf0e6bcb658da7b08fd849f22936a5e4 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Thu, 8 Sep 2022 02:20:01 +0430
Subject: [PATCH 14/21] added bibTex parser

---
 scholar.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/scholar.py b/scholar.py
index 95f900f..067c5db 100755
--- a/scholar.py
+++ b/scholar.py
@@ -304,6 +304,7 @@ def __init__(self):
         # The citation data in one of the standard export formats,
         # e.g. BibTeX.
         self.citation_data = None
+        self.citation_format = None
 
     def __getitem__(self, key):
         if key in self.attrs:
@@ -487,6 +488,42 @@ def _parse_links(self, span):
             if tag.getText().startswith('Import'):
                 self.article['url_citation'] = tag.get('href')
 
+    def _parse_bib(self, bib_text):
+        """it'll parse a bibTex citation information and extract it's information"""
+
+        # check if citation data exists
+        if self.article.citation_data is None:
+            return False
+        
+        # bibTex sample:
+        # @article{perold1984large,
+        #     title={Large-scale portfolio optimization},
+        #     author={Perold, Andre F},
+        #     journal={Management science},
+        #     volume={30},
+        #     number={10},
+        #     pages={1143--1160},
+        #     year={1984},
+        #     publisher={INFORMS}
+        # }
+
+        # regexes to get any information
+        bib_regs = {
+            'type': r'@(.*){',
+            'title': r'title=\{(.*)\}',
+            'journal': r'journal=\{(.*)\}',
+            'volume': r'volume=\{(.*)\}',
+            'number': r'number=\{(.*)\}',
+            'pages': r'pages=\{(.*)\}',
+            'publisher': r'publisher=\{(.*)\}'
+        }
+
+        info = {}
+
+        for key, reg in bib_regs.items():
+            info[key] = re.search(reg, bib_text, re.IGNORECASE)
+        
+        return info
 
     @staticmethod
     def _tag_has_class(tag, klass):

From 99d307c5d675c4ed4efcb33578ee3e810f4f1ea3 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sat, 10 Sep 2022 15:47:10 +0430
Subject: [PATCH 15/21] updatet .gitignore file

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2211df6..6540f05 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,3 @@
 *.txt
+*.html
+*.out
\ No newline at end of file

From 37d9cc1eab47d9a137357c17390ce2b966cd81ea Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sat, 10 Sep 2022 15:49:07 +0430
Subject: [PATCH 16/21] updated the way of applying delay in requests

---
 scholar.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/scholar.py b/scholar.py
index 067c5db..8909cbd 100755
--- a/scholar.py
+++ b/scholar.py
@@ -163,6 +163,7 @@
 
 import optparse
 import os
+from random import randrange
 import re
 import sys
 from time import sleep
@@ -304,7 +305,6 @@ def __init__(self):
         # The citation data in one of the standard export formats,
         # e.g. BibTeX.
         self.citation_data = None
-        self.citation_format = None
 
     def __getitem__(self, key):
         if key in self.attrs:
@@ -1012,6 +1012,7 @@ def __init__(self):
         self.articles = []
         self.query = None
         self.cjar = MozillaCookieJar()
+        self.delay_range = None
 
         self.inststart = '0'
         self.scising = ''
@@ -1222,11 +1223,22 @@ def _get_http_response(self, url, log_msg=None, err_msg=None):
             ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3
             ScholarUtils.log('debug', '<<<<' + '-'*68)
 
+            # delay for not requesting too much and get banned
+            if self.delay_range is not None:
+                sleep(self.delay)
+
             return html
         except Exception as err:
             ScholarUtils.log('info', err_msg + ': %s' % err)
             return None
 
+    def set_delay(self, min_delay, max_delay):
+        self.delay_range = (min_delay, max_delay)
+
+    @property
+    def delay(self):
+        return randrange(self.delay_range)
+
     def __len__(self):
         return len(self.articles)
     
@@ -1323,7 +1335,7 @@ def main():
     group.add_option('-m', '--max-results', type='int', default=None,
                      help='Maximum number of results to get, returns all results if is bigger than all results')
     group.add_option('-D', '--delay', type='float', default=2.0,
-                     help='delay for each requests, to not get banned by google because of a DOS attack! default is 2 sec')
+                     help='maximum delay for each requests (it\'ll be from 0 to maximum-delay seconds), to not get banned by google because of a DOS attack! default is 2 sec')
     group.add_option('--all-results', action='store_true', default=False,
                      help='get all results')
     # group.add_option('-c', '--count', type='int', default=None,
@@ -1398,6 +1410,7 @@ def main():
         return 1
 
     querier.apply_settings(settings)
+    querier.set_delay(0, options.delay)
 
 
     if options.cluster_id:
@@ -1455,7 +1468,6 @@ def main():
 
     # if we didn't get enough articles get remaining articles
     while remaining_to_get > 0:
-        sleep(options.delay)
 
         # set offset
         query.offset = offset + len(querier)
@@ -1488,7 +1500,7 @@ def main():
         citation_export(querier)
     else:
         txt(querier, with_globals=options.txt_globals)
-
+        
     if options.cookie_file:
         querier.save_cookies()
 

From 0f6639357cae7b283c396bf2c31842f42809bdf0 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sat, 10 Sep 2022 16:06:38 +0430
Subject: [PATCH 17/21] fixed bug of delay method

---
 scholar.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scholar.py b/scholar.py
index 8909cbd..5743fb7 100755
--- a/scholar.py
+++ b/scholar.py
@@ -163,7 +163,7 @@
 
 import optparse
 import os
-from random import randrange
+from random import randrange, uniform
 import re
 import sys
 from time import sleep
@@ -1237,7 +1237,7 @@ def set_delay(self, min_delay, max_delay):
 
     @property
     def delay(self):
-        return randrange(self.delay_range)
+        return uniform(*self.delay_range)
 
     def __len__(self):
         return len(self.articles)

From a4195993c537ab64f72487b3cff7ff422419808b Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sat, 10 Sep 2022 16:27:51 +0430
Subject: [PATCH 18/21] added no-delay option.

---
 scholar.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/scholar.py b/scholar.py
index 5743fb7..09130e5 100755
--- a/scholar.py
+++ b/scholar.py
@@ -1336,6 +1336,8 @@ def main():
                      help='Maximum number of results to get, returns all results if is bigger than all results')
     group.add_option('-D', '--delay', type='float', default=2.0,
                      help='maximum delay for each requests (it\'ll be from 0 to maximum-delay seconds), to not get banned by google because of a DOS attack! default is 2 sec')
+    group.add_option('--no-delay', action='store_true', default=False,
+                     help='set delay to zero')
     group.add_option('--all-results', action='store_true', default=False,
                      help='get all results')
     # group.add_option('-c', '--count', type='int', default=None,
@@ -1410,7 +1412,10 @@ def main():
         return 1
 
     querier.apply_settings(settings)
-    querier.set_delay(0, options.delay)
+
+    # add delay if user wants it.
+    if not options.no_delay and options.delay != 0:
+        querier.set_delay(0, options.delay)
 
 
     if options.cluster_id:

From b2d1e6f280196b60f2a8261959142cbd67baedb9 Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sat, 10 Sep 2022 17:11:06 +0430
Subject: [PATCH 19/21] don't apply delay for first request.

---
 scholar.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/scholar.py b/scholar.py
index 09130e5..292ac3c 100755
--- a/scholar.py
+++ b/scholar.py
@@ -300,6 +300,12 @@ def __init__(self):
             'url_versions':  [None, 'Versions list',  8],
             'url_citation':  [None, 'Citation link',  9],
             'excerpt':       [None, 'Excerpt',       10],
+            'type':          [None, 'Paper type',    11],
+            'journal':       [None, 'Journal',       12],
+            'publisher':     [None, 'Publisher',     13],
+            'pages':         [None, 'Pages',         14],
+            'volume':        [None, 'Volume',        15],
+            'issue':         [None, 'Issue',         16],
         }
 
         # The citation data in one of the standard export formats,
@@ -507,7 +513,7 @@ def _parse_bib(self, bib_text):
         #     publisher={INFORMS}
         # }
 
-        # regexes to get any information
+        # regexes to get informations
         bib_regs = {
             'type': r'@(.*){',
             'title': r'title=\{(.*)\}',
@@ -518,10 +524,9 @@ def _parse_bib(self, bib_text):
             'publisher': r'publisher=\{(.*)\}'
         }
 
-        info = {}
 
         for key, reg in bib_regs.items():
-            info[key] = re.search(reg, bib_text, re.IGNORECASE)
+            self.article[key] = re.search(reg, bib_text, re.IGNORECASE)
         
         return info
 
@@ -1013,6 +1018,7 @@ def __init__(self):
         self.query = None
         self.cjar = MozillaCookieJar()
         self.delay_range = None
+        self.is_first_request = True # don't apply delay for first request.
 
         self.inststart = '0'
         self.scising = ''
@@ -1204,6 +1210,10 @@ def _get_http_response(self, url, log_msg=None, err_msg=None):
         """
         Helper method, sends HTTP request and returns response payload.
         """
+        # delay for not requesting too much and get banned
+        if self.delay_range is not None and not self.is_first_request:
+            sleep(self.delay)
+
         if log_msg is None:
             log_msg = 'HTTP response data follow'
         if err_msg is None:
@@ -1223,9 +1233,7 @@ def _get_http_response(self, url, log_msg=None, err_msg=None):
             ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3
             ScholarUtils.log('debug', '<<<<' + '-'*68)
 
-            # delay for not requesting too much and get banned
-            if self.delay_range is not None:
-                sleep(self.delay)
+            self.is_first_request = False # apply delay for next request!
 
             return html
         except Exception as err:

From 9824b6bba066dc2ca9660d21bf5665eb3536edbb Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sun, 11 Sep 2022 01:36:55 +0430
Subject: [PATCH 20/21] added full-info option. fixed bibTex parsers bug.

---
 scholar.py | 91 +++++++++++++++++++++++++++++++-----------------------
 1 file changed, 53 insertions(+), 38 deletions(-)

diff --git a/scholar.py b/scholar.py
index 292ac3c..6d55430 100755
--- a/scholar.py
+++ b/scholar.py
@@ -364,6 +364,42 @@ def as_citation(self):
         """
         return self.citation_data or ''
 
+    def parse_bib(self):
+        """it'll parse a bibTex citation information and extract it's information"""
+
+        # check if citation data exists
+        if self.citation_data is None:
+            return False
+        
+        # bibTex sample:
+        # @article{perold1984large,
+        #     title={Large-scale portfolio optimization},
+        #     author={Perold, Andre F},
+        #     journal={Management science},
+        #     volume={30},
+        #     number={10},
+        #     pages={1143--1160},
+        #     year={1984},
+        #     publisher={INFORMS}
+        # }
+
+        # regexes to get informations
+        bib_regs = {
+            'type': r'@(.*){',
+            'title': r'title=\{(.*)\}',
+            'journal': r'journal=\{(.*)\}',
+            'volume': r'volume=\{(.*)\}',
+            'issue': r'number=\{(.*)\}',
+            'pages': r'pages=\{(.*)\}',
+            'publisher': r'publisher=\{(.*)\}'
+        }
+
+        for key, reg in bib_regs.items():
+            val = re.findall(reg, self.citation_data, re.IGNORECASE)
+            self[key] = val[0] if len(val) > 0 else None
+        
+        return True
+
 
 class ScholarArticleParser(object):
     """
@@ -494,42 +530,6 @@ def _parse_links(self, span):
             if tag.getText().startswith('Import'):
                 self.article['url_citation'] = tag.get('href')
 
-    def _parse_bib(self, bib_text):
-        """it'll parse a bibTex citation information and extract it's information"""
-
-        # check if citation data exists
-        if self.article.citation_data is None:
-            return False
-        
-        # bibTex sample:
-        # @article{perold1984large,
-        #     title={Large-scale portfolio optimization},
-        #     author={Perold, Andre F},
-        #     journal={Management science},
-        #     volume={30},
-        #     number={10},
-        #     pages={1143--1160},
-        #     year={1984},
-        #     publisher={INFORMS}
-        # }
-
-        # regexes to get informations
-        bib_regs = {
-            'type': r'@(.*){',
-            'title': r'title=\{(.*)\}',
-            'journal': r'journal=\{(.*)\}',
-            'volume': r'volume=\{(.*)\}',
-            'number': r'number=\{(.*)\}',
-            'pages': r'pages=\{(.*)\}',
-            'publisher': r'publisher=\{(.*)\}'
-        }
-
-
-        for key, reg in bib_regs.items():
-            self.article[key] = re.search(reg, bib_text, re.IGNORECASE)
-        
-        return info
-
     @staticmethod
     def _tag_has_class(tag, klass):
         """
@@ -1170,7 +1170,7 @@ def get_citation_data(self, article):
 
         # change to str if it's bytes
         if type(data) == bytes:
-            data = data.decode('utf-8') 
+            data = data.decode('utf-8').replace('\\', '') # there's some useless '\' characters
 
         article.set_citation_data(data)
         return True
@@ -1233,11 +1233,16 @@ def _get_http_response(self, url, log_msg=None, err_msg=None):
             ScholarUtils.log('debug', 'data:\n' + html.decode('utf-8')) # For Python 3
             ScholarUtils.log('debug', '<<<<' + '-'*68)
 
+            # check for robot check!
+            if "Please show you&#39;re not a robot" in html.decode('utf-8'):
+                ScholarUtils.log('info', err_msg + ': google recognized you as a robot!')
+                return None
             self.is_first_request = False # apply delay for next request!
 
             return html
         except Exception as err:
             ScholarUtils.log('info', err_msg + ': %s' % err)
+            print(err)
             return None
 
     def set_delay(self, min_delay, max_delay):
@@ -1254,6 +1259,9 @@ def __iadd__(self, other):
         self.articles += other.articles
         return self
 
+    def __getitem__(self, num):
+        return self.articles[num]
+
 def txt(querier, with_globals):
     if with_globals:
         # If we have any articles, check their attribute labels to get
@@ -1365,6 +1373,8 @@ def main():
                      help='Like --csv, but print header with column names')
     group.add_option('--citation', metavar='FORMAT', default=None,
                      help='Print article details in standard citation format. Argument Must be one of "bt" (BibTeX), "en" (EndNote), "rm" (RefMan), or "rw" (RefWorks).')
+    group.add_option('--full-info', action='store_true', default=False,
+                     help='get full information of an article. it\'ll retrieve more information like journal, publisher, pages, ... from bibTex. (it\'ll increase run-time for getting bibTex information)')
     parser.add_option_group(group)
 
     group = optparse.OptionGroup(parser, 'Miscellaneous')
@@ -1407,7 +1417,7 @@ def main():
     querier = ScholarQuerier()
     settings = ScholarSettings()
 
-    if options.citation == 'bt':
+    if options.citation == 'bt' or options.full_info:
         settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
     elif options.citation == 'en':
         settings.set_citation_format(ScholarSettings.CITFORM_ENDNOTE)
@@ -1505,6 +1515,11 @@ def main():
         remaining_to_get = results_num_to_get - len(querier)
         # print(f'remaining: {remaining_to_get}')
 
+    # include bibTex information to results if user wants it
+    if options.full_info:
+        for article in querier:
+            article.parse_bib()
+
     if options.csv:
         csv(querier)
     elif options.csv_header:

From 60f9e4c90605563f5f998d7c07a8459a5c8ac7fd Mon Sep 17 00:00:00 2001
From: Amir Zeinali <amir_zeinali@hotmail.com>
Date: Sun, 11 Sep 2022 02:37:15 +0430
Subject: [PATCH 21/21] changed delay input option. deleted some useless lines.
 changed robot error from info to error in debug. and fixed some minor bgs

---
 scholar.py | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/scholar.py b/scholar.py
index 6d55430..a15ae27 100755
--- a/scholar.py
+++ b/scholar.py
@@ -163,7 +163,7 @@
 
 import optparse
 import os
-from random import randrange, uniform
+from random import uniform
 import re
 import sys
 from time import sleep
@@ -1235,7 +1235,7 @@ def _get_http_response(self, url, log_msg=None, err_msg=None):
 
             # check for robot check!
             if "Please show you&#39;re not a robot" in html.decode('utf-8'):
-                ScholarUtils.log('info', err_msg + ': google recognized you as a robot!')
+                ScholarUtils.log('error', err_msg + ': google recognized you as a robot!')
                 return None
             self.is_first_request = False # apply delay for next request!
 
@@ -1350,8 +1350,8 @@ def main():
                      help='Do not search, just use articles in given cluster ID')
     group.add_option('-m', '--max-results', type='int', default=None,
                      help='Maximum number of results to get, returns all results if is bigger than all results')
-    group.add_option('-D', '--delay', type='float', default=2.0,
-                     help='maximum delay for each requests (it\'ll be from 0 to maximum-delay seconds), to not get banned by google because of a DOS attack! default is 2 sec')
+    group.add_option('-D', '--delay', type='string', default=(1.0, 2.0),
+                     help='delay range for each requests pass it as : min, max (it\'ll be delay for each request from min to max seconds), to not get banned by google because of a DOS attack! default is 1,2 sec')
     group.add_option('--no-delay', action='store_true', default=False,
                      help='set delay to zero')
     group.add_option('--all-results', action='store_true', default=False,
@@ -1432,9 +1432,9 @@ def main():
     querier.apply_settings(settings)
 
     # add delay if user wants it.
-    if not options.no_delay and options.delay != 0:
-        querier.set_delay(0, options.delay)
-
+    if not options.no_delay and options.delay != (0, 0):
+        options.delay = tuple(float(n) for n in options.delay.split(',')) if type(options.delay) == str else options.delay
+        querier.set_delay(*options.delay)
 
     if options.cluster_id:
         query = ClusterScholarQuery(cluster=options.cluster_id)
@@ -1502,14 +1502,8 @@ def main():
 
         querier.send_query(query, clear=False)
 
-        # if there's a problem in getting articles go out of cycle
         # it can mean that there's no more articles to get.
-        # or got banned by google! 
         if results_num_to_get - len(querier) == remaining_to_get:
-            print("WARNING: there's probably a problem for getting all requested articles.")
-            print(f"got {len(querier)} articles out of {results_num_to_get} articles.")
-            print("this means we got banned by google.")
-            print("or maybe there was some unavailable articles.")
             break
 
         remaining_to_get = results_num_to_get - len(querier)