You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

183 lines
6.1 KiB

  1. #
  2. # urlutils.py - Simplified urllib handling
  3. #
  4. # Written by Chris Lawrence <lawrencc@debian.org>
  5. # (C) 1999-2008 Chris Lawrence
  6. # Copyright (C) 2008-2017 Sandro Tosi <morph@debian.org>
  7. #
  8. # This program is freely distributable per the following license:
  9. #
  10. # Permission to use, copy, modify, and distribute this software and its
  11. # documentation for any purpose and without fee is hereby granted,
  12. # provided that the above copyright notice appears in all copies and that
  13. # both that copyright notice and this permission notice appear in
  14. # supporting documentation.
  15. #
  16. # I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
  17. # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL I
  18. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  19. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  20. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
  21. # ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  22. # SOFTWARE.
  23. import http.client
  24. import urllib.request, urllib.parse, urllib.error
  25. import urllib.request, urllib.error, urllib.parse
  26. import getpass
  27. import re
  28. import socket
  29. import shlex
  30. import os
  31. import sys
  32. import webbrowser
  33. import requests
  34. from .exceptions import (
  35. NoNetwork,
  36. )
  37. from .__init__ import VERSION_NUMBER
  38. UA_STR = 'reportbug/' + VERSION_NUMBER + ' (Debian)'
  39. def decode(page):
  40. "gunzip or deflate a compressed page"
  41. # print page.info().headers
  42. encoding = page.info().get("Content-Encoding")
  43. if encoding in ('gzip', 'x-gzip', 'deflate'):
  44. from io import StringIO
  45. # cannot seek in socket descriptors, so must get content now
  46. content = page.read()
  47. if encoding == 'deflate':
  48. import zlib
  49. fp = StringIO(zlib.decompress(content))
  50. else:
  51. import gzip
  52. fp = gzip.GzipFile('', 'rb', 9, StringIO(content))
  53. # remove content-encoding header
  54. headers = http.client.HTTPMessage(StringIO(""))
  55. ceheader = re.compile(r"(?i)content-encoding:")
  56. for h in list(page.info().keys()):
  57. if not ceheader.match(h):
  58. headers[h] = page.info()[h]
  59. newpage = urllib.addinfourl(fp, headers, page.geturl())
  60. # Propagate code, msg through
  61. if hasattr(page, 'code'):
  62. newpage.code = page.code
  63. if hasattr(page, 'msg'):
  64. newpage.msg = page.msg
  65. return newpage
  66. return page
  67. class HttpWithGzipHandler(urllib.request.HTTPHandler):
  68. "support gzip encoding"
  69. def http_open(self, req):
  70. return decode(urllib.request.HTTPHandler.http_open(self, req))
  71. if hasattr(http.client, 'HTTPS'):
  72. class HttpsWithGzipHandler(urllib.request.HTTPSHandler):
  73. "support gzip encoding"
  74. def https_open(self, req):
  75. return decode(urllib.request.HTTPSHandler.https_open(self, req))
  76. class handlepasswd(urllib.request.HTTPPasswordMgrWithDefaultRealm):
  77. def find_user_password(self, realm, authurl):
  78. user, password = urllib.request.HTTPPasswordMgrWithDefaultRealm.find_user_password(self, realm, authurl)
  79. if user is not None:
  80. return user, password
  81. user = input('Enter username for %s at %s: ' % (realm, authurl))
  82. password = getpass.getpass(
  83. "Enter password for %s in %s at %s: " % (user, realm, authurl))
  84. self.add_password(realm, authurl, user, password)
  85. return user, password
  86. _opener = None
  87. def urlopen(url, proxies=None, timeout=60, data=None):
  88. global _opener
  89. if not proxies:
  90. proxies = urllib.request.getproxies()
  91. headers = {'User-Agent': UA_STR,
  92. 'Accept-Encoding': 'gzip;q=1.0, deflate;q=0.9, identity;q=0.5'}
  93. return requests.get(url, headers).text
  94. # req = urllib.request.Request(url, data, headers)
  95. #
  96. # proxy_support = urllib.request.ProxyHandler(proxies)
  97. # if _opener is None:
  98. # pwd_manager = handlepasswd()
  99. # handlers = [proxy_support,
  100. # urllib.request.UnknownHandler, HttpWithGzipHandler,
  101. # urllib.request.HTTPBasicAuthHandler(pwd_manager),
  102. # urllib.request.ProxyBasicAuthHandler(pwd_manager),
  103. # urllib.request.HTTPDigestAuthHandler(pwd_manager),
  104. # urllib.request.ProxyDigestAuthHandler(pwd_manager),
  105. # urllib.request.HTTPDefaultErrorHandler, urllib.request.HTTPRedirectHandler,
  106. # ]
  107. # if hasattr(http.client, 'HTTPS'):
  108. # handlers.append(HttpsWithGzipHandler)
  109. # _opener = urllib.request.build_opener(*handlers)
  110. # # print _opener.handlers
  111. # urllib.request.install_opener(_opener)
  112. #
  113. # return _opener.open(req, timeout=timeout)
  114. # Global useful URL opener; returns None if the page is absent, otherwise
  115. # like urlopen
  116. def open_url(url, http_proxy=None, timeout=60):
  117. # Set timeout to 60 secs (1 min), cfr bug #516449
  118. # in #572316 we set a user-configurable timeout
  119. socket.setdefaulttimeout(timeout)
  120. proxies = urllib.request.getproxies()
  121. if http_proxy:
  122. proxies['http'] = http_proxy
  123. try:
  124. page = urlopen(url, proxies, timeout)
  125. except urllib.error.HTTPError as x:
  126. if x.code in (404, 500, 503):
  127. return None
  128. else:
  129. raise
  130. except (socket.gaierror, socket.error, urllib.error.URLError) as x:
  131. raise NoNetwork
  132. except IOError as data:
  133. if data and data[0] == 'http error' and data[1] == 404:
  134. return None
  135. else:
  136. raise NoNetwork
  137. except TypeError:
  138. print("http_proxy environment variable must be formatted as a valid URI", file=sys.stderr)
  139. raise NoNetwork
  140. except http.client.HTTPException as exc:
  141. exc_name = exc.__class__.__name__
  142. message = "Failed to open %(url)r (%(exc_name)s: %(exc)s)" % vars()
  143. raise NoNetwork(message)
  144. return page
  145. def launch_browser(url):
  146. if not os.system('command -v xdg-open >/dev/null 2>&1'):
  147. cmd = 'xdg-open ' + shlex.quote(url)
  148. os.system(cmd)
  149. return
  150. if webbrowser:
  151. webbrowser.open(url)
  152. return