"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "googler" between
googler-4.0.tar.gz and googler-4.1.tar.gz

About: googler is a command line tool to search Google (Web & News) from the terminal (requires Python).

googler  (googler-4.0):googler  (googler-4.1)
#!/usr/bin/env python3 #!/usr/bin/env python3
# #
# Copyright © 2008 Henri Hakkinen # Copyright © 2008 Henri Hakkinen
# Copyright © 2015-2019 Arun Prakash Jana <engineerarun@gmail.com> # Copyright © 2015-2020 Arun Prakash Jana <engineerarun@gmail.com>
# #
# This program is free software: you can redistribute it and/or modify # This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or # the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. # (at your option) any later version.
# #
# This program is distributed in the hope that it will be useful, # This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of # but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details. # GNU General Public License for more details.
skipping to change at line 90 skipping to change at line 90
sys.exit(1) sys.exit(1)
try: try:
signal.signal(signal.SIGINT, sigint_handler) signal.signal(signal.SIGINT, sigint_handler)
except ValueError: except ValueError:
# signal only works in main thread # signal only works in main thread
pass pass
# Constants # Constants
_VERSION_ = '4.0' _VERSION_ = '4.1'
COLORMAP = {k: '\x1b[%sm' % v for k, v in { COLORMAP = {k: '\x1b[%sm' % v for k, v in {
'a': '30', 'b': '31', 'c': '32', 'd': '33', 'a': '30', 'b': '31', 'c': '32', 'd': '33',
'e': '34', 'f': '35', 'g': '36', 'h': '37', 'e': '34', 'f': '35', 'g': '36', 'h': '37',
'i': '90', 'j': '91', 'k': '92', 'l': '93', 'i': '90', 'j': '91', 'k': '92', 'l': '93',
'm': '94', 'n': '95', 'o': '96', 'p': '97', 'm': '94', 'n': '95', 'o': '96', 'p': '97',
'A': '30;1', 'B': '31;1', 'C': '32;1', 'D': '33;1', 'A': '30;1', 'B': '31;1', 'C': '32;1', 'D': '33;1',
'E': '34;1', 'F': '35;1', 'G': '36;1', 'H': '37;1', 'E': '34;1', 'F': '35;1', 'G': '36;1', 'H': '37;1',
'I': '90;1', 'J': '91;1', 'K': '92;1', 'L': '93;1', 'I': '90;1', 'J': '91;1', 'K': '92;1', 'L': '93;1',
'M': '94;1', 'N': '95;1', 'O': '96;1', 'P': '97;1', 'M': '94;1', 'N': '95;1', 'O': '96;1', 'P': '97;1',
skipping to change at line 1560 skipping to change at line 1560
See "Other Parameters" of `update`. See "Other Parameters" of `update`.
Attributes Attributes
---------- ----------
hostname : str hostname : str
Read-write property. Read-write property.
keywords : str or list of strs keywords : str or list of strs
Read-write property. Read-write property.
news : bool news : bool
Read-only property. Read-only property.
videos : bool
Read-only property.
url : str url : str
Read-only property. Read-only property.
Methods Methods
------- -------
full() full()
relative() relative()
update(opts=None, **kwargs) update(opts=None, **kwargs)
set_queries(**kwargs) set_queries(**kwargs)
unset_queries(*args) unset_queries(*args)
skipping to change at line 1629 skipping to change at line 1631
@keywords.setter @keywords.setter
def keywords(self, keywords): def keywords(self, keywords):
self._keywords = keywords self._keywords = keywords
@property @property
def news(self): def news(self):
"""Whether the URL is for Google News.""" """Whether the URL is for Google News."""
return 'tbm' in self._query_dict and self._query_dict['tbm'] == 'nws' return 'tbm' in self._query_dict and self._query_dict['tbm'] == 'nws'
@property
def videos(self):
"""Whether the URL is for Google Videos."""
return 'tbm' in self._query_dict and self._query_dict['tbm'] == 'vid'
def full(self): def full(self):
"""Return the full URL. """Return the full URL.
Returns Returns
------- -------
str str
""" """
url = (self.scheme + ':') if self.scheme else '' url = (self.scheme + ':') if self.scheme else ''
url += '//' + self.netloc + self.relative() url += '//' + self.netloc + self.relative()
skipping to change at line 1678 skipping to change at line 1685
opts : dict or argparse.Namespace, optional opts : dict or argparse.Namespace, optional
Carries options that affect the Google Search/News URL. The Carries options that affect the Google Search/News URL. The
list of currently recognized option keys with expected value list of currently recognized option keys with expected value
types: types:
duration: str (GooglerArgumentParser.is_duration) duration: str (GooglerArgumentParser.is_duration)
exact: bool exact: bool
keywords: str or list of strs keywords: str or list of strs
lang: str lang: str
news: bool news: bool
videos: bool
num: int num: int
site: str site: str
start: int start: int
tld: str tld: str
unfilter: bool unfilter: bool
Other Parameters Other Parameters
---------------- ----------------
kwargs kwargs
The `kwargs` dict extends `opts`, that is, options can be The `kwargs` dict extends `opts`, that is, options can be
skipping to change at line 1700 skipping to change at line 1708
""" """
if opts is None: if opts is None:
opts = {} opts = {}
if hasattr(opts, '__dict__'): if hasattr(opts, '__dict__'):
opts = opts.__dict__ opts = opts.__dict__
opts.update(kwargs) opts.update(kwargs)
qd = self._query_dict qd = self._query_dict
if 'duration' in opts and opts['duration']: if opts.get('duration'):
qd['tbs'] = 'qdr:%s' % opts['duration'] qd['tbs'] = 'qdr:%s' % opts['duration']
if 'exact' in opts: if 'exact' in opts:
if opts['exact']: if opts['exact']:
qd['nfpr'] = 1 qd['nfpr'] = 1
else: else:
qd.pop('nfpr', None) qd.pop('nfpr', None)
if opts.get('from') or opts.get('to'):
cd_min = opts.get('from') or ''
cd_max = opts.get('to') or ''
qd['tbs'] = 'cdr:1,cd_min:%s,cd_max:%s' % (cd_min, cd_max)
if 'keywords' in opts: if 'keywords' in opts:
self._keywords = opts['keywords'] self._keywords = opts['keywords']
if 'lang' in opts and opts['lang']: if 'lang' in opts and opts['lang']:
qd['hl'] = opts['lang'] qd['hl'] = opts['lang']
if 'news' in opts: if 'news' in opts and opts['news']:
if opts['news']: qd['tbm'] = 'nws'
qd['tbm'] = 'nws' elif 'videos' in opts and opts['videos']:
else: qd['tbm'] = 'vid'
qd.pop('tbm', None) else:
qd.pop('tbm', None)
if 'num' in opts: if 'num' in opts:
self._num = opts['num'] self._num = opts['num']
if 'sites' in opts: if 'sites' in opts:
self._sites = opts['sites'] self._sites = opts['sites']
if 'start' in opts: if 'start' in opts:
self._start = opts['start'] self._start = opts['start']
if 'tld' in opts: if 'tld' in opts:
self._tld = opts['tld'] self._tld = opts['tld']
if 'unfilter' in opts and opts['unfilter']: if 'unfilter' in opts and opts['unfilter']:
qd['filter'] = 0 qd['filter'] = 0
skipping to change at line 2143 skipping to change at line 2156
self.cookie = complete_cookie[:complete_cookie.find(';')] self.cookie = complete_cookie[:complete_cookie.find(';')]
logger.debug('Cookie: %s' % self.cookie) logger.debug('Cookie: %s' % self.cookie)
def close(self): def close(self):
"""Close the connection (if one is active).""" """Close the connection (if one is active)."""
if self._conn: if self._conn:
self._conn.close() self._conn.close()
class GoogleParser(object): class GoogleParser(object):
def __init__(self, html, *, news=False): def __init__(self, html, *, news=False, videos=False):
self.news = news self.news = news
self.videos = videos
self.autocorrected = False self.autocorrected = False
self.showing_results_for = None self.showing_results_for = None
self.filtered = False self.filtered = False
self.results = [] self.results = []
self.parse(html) self.parse(html)
def parse(self, html): def parse(self, html):
tree = parse_html(html) tree = parse_html(html)
if debugger: if debugger:
skipping to change at line 2193 skipping to change at line 2207
abstract = '' abstract = ''
for childnode in div_g.select('.st').children: for childnode in div_g.select('.st').children:
if 'f' in childnode.classes: if 'f' in childnode.classes:
# .f is handled as metadata instead. # .f is handled as metadata instead.
continue continue
if childnode.tag == 'b' and childnode.text != '...': if childnode.tag == 'b' and childnode.text != '...':
matched_keywords.append({'phrase': childnode.text, 'offs et': len(abstract)}) matched_keywords.append({'phrase': childnode.text, 'offs et': len(abstract)})
abstract = abstract + childnode.text.replace('\n', '') abstract = abstract + childnode.text.replace('\n', '')
try: try:
metadata = div_g.select('.f').text metadata = div_g.select('.f').text
metadata = metadata.replace('\u200e', '').replace(' - ', ', ').strip() metadata = metadata.replace('\u200e', '').replace(' - ', ', ').strip().rstrip(',')
except AttributeError: except AttributeError:
metadata = None metadata = None
except (AttributeError, ValueError): except (AttributeError, ValueError):
continue continue
sitelinks = [] sitelinks = []
for td in div_g.select_all('td'): for td in div_g.select_all('td'):
try: try:
a = td.select('a') a = td.select('a')
sl_title = a.text sl_title = a.text
sl_url = self.unwrap_link(a.attr('href')) sl_url = self.unwrap_link(a.attr('href'))
sl_abstract = td.select('div.s.st').text sl_abstract = td.select('div.s.st').text
sitelinks.append(Sitelink(sl_title, sl_url, sl_abstract)) sitelinks.append(Sitelink(sl_title, sl_url, sl_abstract))
except (AttributeError, ValueError): except (AttributeError, ValueError):
continue continue
index += 1 index += 1
self.results.append(Result(index, title, url, abstract, self.results.append(Result(index, title, url, abstract,
metadata=metadata, sitelinks=sitelinks, m atches=matched_keywords)) metadata=metadata, sitelinks=sitelinks, m atches=matched_keywords))
if not self.results:
for card in tree.select_all('g-card'):
a = card.select('a[href]')
if not a:
continue
url = self.unwrap_link(a.attr('href'))
text_nodes = []
for node in a.descendants():
if isinstance(node, TextNode) and node.strip():
text_nodes.append(node.text)
if len(text_nodes) != 4:
continue
publisher, title, abstract, publishing_time = text_nodes
metadata = '%s, %s' % (publisher, publishing_time)
index += 1
self.results.append(Result(index, title, url, abstract, metadata
=metadata))
# Showing results for ... # Showing results for ...
# Search instead for ... # Search instead for ...
spell_orig = tree.select("span.spell_orig") spell_orig = tree.select("span.spell_orig")
if spell_orig: if spell_orig:
showing_results_for_link = next( showing_results_for_link = next(
filter(lambda el: el.tag == "a", spell_orig.previous_siblings()) , None filter(lambda el: el.tag == "a", spell_orig.previous_siblings()) , None
) )
if showing_results_for_link: if showing_results_for_link:
self.autocorrected = True self.autocorrected = True
self.showing_results_for = showing_results_for_link.text self.showing_results_for = showing_results_for_link.text
skipping to change at line 2538 skipping to change at line 2569
page = self._conn.fetch_page(self._google_url.relative()) page = self._conn.fetch_page(self._google_url.relative())
if logger.isEnabledFor(logging.DEBUG): if logger.isEnabledFor(logging.DEBUG):
import tempfile import tempfile
fd, tmpfile = tempfile.mkstemp(prefix='googler-response-', suffix='. html') fd, tmpfile = tempfile.mkstemp(prefix='googler-response-', suffix='. html')
os.close(fd) os.close(fd)
with open(tmpfile, 'w', encoding='utf-8') as fp: with open(tmpfile, 'w', encoding='utf-8') as fp:
fp.write(page) fp.write(page)
logger.debug("Response body written to '%s'.", tmpfile) logger.debug("Response body written to '%s'.", tmpfile)
parser = GoogleParser(page, news=self._google_url.news) parser = GoogleParser(page, news=self._google_url.news, videos=self._goo gle_url.videos)
self.results = parser.results self.results = parser.results
self._autocorrected = parser.autocorrected self._autocorrected = parser.autocorrected
self._showing_results_for = parser.showing_results_for self._showing_results_for = parser.showing_results_for
self._results_filtered = parser.filtered self._results_filtered = parser.filtered
self._urltable = {} self._urltable = {}
for r in self.results: for r in self.results:
self._urltable.update(r.urltable()) self._urltable.update(r.urltable())
def warn_no_results(self): def warn_no_results(self):
skipping to change at line 2904 skipping to change at line 2935
* other inputs issue a new search with original op tions * other inputs issue a new search with original op tions
""")) """))
# Print information on googler # Print information on googler
@staticmethod @staticmethod
def print_general_info(file=None): def print_general_info(file=None):
file = sys.stderr if file is None else file file = sys.stderr if file is None else file
file.write(textwrap.dedent(""" file.write(textwrap.dedent("""
Version %s Version %s
Copyright © 2008 Henri Hakkinen Copyright © 2008 Henri Hakkinen
Copyright © 2015-2019 Arun Prakash Jana <engineerarun@gmail.com> Copyright © 2015-2020 Arun Prakash Jana <engineerarun@gmail.com>
Zhiming Wang <zmwangx@gmail.com> Zhiming Wang <zmwangx@gmail.com>
License: GPLv3 License: GPLv3
Webpage: https://github.com/jarun/googler Webpage: https://github.com/jarun/googler
""" % _VERSION_)) """ % _VERSION_))
# Augment print_help to print more than synopsis and options # Augment print_help to print more than synopsis and options
def print_help(self, file=None): def print_help(self, file=None):
super().print_help(file) super().print_help(file)
self.print_omniprompt_help(file) self.print_omniprompt_help(file)
self.print_general_info(file) self.print_general_info(file)
skipping to change at line 2959 skipping to change at line 2990
non-negative integer. non-negative integer.
""" """
try: try:
if arg[0] not in ('h', 'd', 'w', 'm', 'y') or int(arg[1:]) < 0: if arg[0] not in ('h', 'd', 'w', 'm', 'y') or int(arg[1:]) < 0:
raise ValueError raise ValueError
except (TypeError, IndexError, ValueError): except (TypeError, IndexError, ValueError):
raise argparse.ArgumentTypeError('%s is not a valid duration' % arg) raise argparse.ArgumentTypeError('%s is not a valid duration' % arg)
return arg return arg
@staticmethod @staticmethod
def is_date(arg):
"""Check if a string is a valid date/month/year accepted by Google."""
if re.match(r'^(\d+/){0,2}\d+$', arg):
return arg
else:
raise argparse.ArgumentTypeError('%s is not a valid date/month/year;
'
'use the American date format with
slashes')
@staticmethod
def is_colorstr(arg): def is_colorstr(arg):
"""Check if a string is a valid color string.""" """Check if a string is a valid color string."""
try: try:
assert len(arg) == 6 assert len(arg) == 6
for c in arg: for c in arg:
assert c in COLORMAP assert c in COLORMAP
except AssertionError: except AssertionError:
raise argparse.ArgumentTypeError('%s is not a valid color string' % arg) raise argparse.ArgumentTypeError('%s is not a valid color string' % arg)
return arg return arg
skipping to change at line 3224 skipping to change at line 3264
colorstr_env = os.getenv('GOOGLER_COLORS') colorstr_env = os.getenv('GOOGLER_COLORS')
argparser = GooglerArgumentParser(description='Google from the command-line. ') argparser = GooglerArgumentParser(description='Google from the command-line. ')
addarg = argparser.add_argument addarg = argparser.add_argument
addarg('-s', '--start', type=argparser.nonnegative_int, default=0, addarg('-s', '--start', type=argparser.nonnegative_int, default=0,
metavar='N', help='start at the Nth result') metavar='N', help='start at the Nth result')
addarg('-n', '--count', dest='num', type=argparser.positive_int, addarg('-n', '--count', dest='num', type=argparser.positive_int,
default=10, metavar='N', help='show N results (default 10)') default=10, metavar='N', help='show N results (default 10)')
addarg('-N', '--news', action='store_true', addarg('-N', '--news', action='store_true',
help='show results from news section') help='show results from news section')
addarg('-V', '--videos', action='store_true',
help='show results from videos section')
addarg('-c', '--tld', metavar='TLD', addarg('-c', '--tld', metavar='TLD',
help="""country-specific search with top-level domain .TLD, e.g., 'in ' help="""country-specific search with top-level domain .TLD, e.g., 'in '
for India""") for India""")
addarg('-l', '--lang', metavar='LANG', help='display in language LANG') addarg('-l', '--lang', metavar='LANG', help='display in language LANG')
addarg('-x', '--exact', action='store_true', addarg('-x', '--exact', action='store_true',
help='disable automatic spelling correction') help='disable automatic spelling correction')
addarg('--colorize', nargs='?', choices=['auto', 'always', 'never'], addarg('--colorize', nargs='?', choices=['auto', 'always', 'never'],
const='always', default='auto', const='always', default='auto',
help="""whether to colorize output; defaults to 'auto', which enables help="""whether to colorize output; defaults to 'auto', which enables
color when stdout is a tty device; using --colorize without an argume nt color when stdout is a tty device; using --colorize without an argume nt
skipping to change at line 3245 skipping to change at line 3287
addarg('-C', '--nocolor', action='store_true', addarg('-C', '--nocolor', action='store_true',
help='equivalent to --colorize=never') help='equivalent to --colorize=never')
addarg('--colors', dest='colorstr', type=argparser.is_colorstr, addarg('--colors', dest='colorstr', type=argparser.is_colorstr,
default=colorstr_env if colorstr_env else 'GKlgxy', metavar='COLORS', default=colorstr_env if colorstr_env else 'GKlgxy', metavar='COLORS',
help='set output colors (see man page for details)') help='set output colors (see man page for details)')
addarg('-j', '--first', '--lucky', dest='lucky', action='store_true', addarg('-j', '--first', '--lucky', dest='lucky', action='store_true',
help='open the first result in web browser and exit') help='open the first result in web browser and exit')
addarg('-t', '--time', dest='duration', type=argparser.is_duration, addarg('-t', '--time', dest='duration', type=argparser.is_duration,
metavar='dN', help='time limit search ' metavar='dN', help='time limit search '
'[h5 (5 hrs), d5 (5 days), w5 (5 weeks), m5 (5 months), y5 (5 years)] ') '[h5 (5 hrs), d5 (5 days), w5 (5 weeks), m5 (5 months), y5 (5 years)] ')
addarg('--from', type=argparser.is_date,
help="""starting date/month/year of date range; must use American dat
e
format with slashes, e.g., 2/24/2020, 2/2020, 2020; can be used in
conjuction with --to, and overrides -t, --time""")
addarg('--to', type=argparser.is_date,
help='ending date/month/year of date range; see --from')
addarg('-w', '--site', dest='sites', action='append', metavar='SITE', addarg('-w', '--site', dest='sites', action='append', metavar='SITE',
help='search a site using Google') help='search a site using Google')
addarg('--unfilter', action='store_true', help='do not omit similar results' ) addarg('--unfilter', action='store_true', help='do not omit similar results' )
addarg('-p', '--proxy', default=https_proxy_from_environment(), addarg('-p', '--proxy', default=https_proxy_from_environment(),
help="""tunnel traffic through an HTTP proxy; help="""tunnel traffic through an HTTP proxy;
PROXY is of the form [http://][user:password@]proxyhost[:port]""") PROXY is of the form [http://][user:password@]proxyhost[:port]""")
addarg('--noua', action='store_true', help='legacy option (no effect)') addarg('--noua', action='store_true', help='legacy option (no effect)')
addarg('--notweak', action='store_true', addarg('--notweak', action='store_true',
help='disable TCP optimizations and forced TLS 1.2') help='disable TCP optimizations and forced TLS 1.2')
addarg('--json', action='store_true', addarg('--json', action='store_true',
 End of changes. 17 change blocks. 
12 lines changed or deleted 64 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)