summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorintrigeri <intrigeri@boum.org>2015-11-29 18:07:04 +0000
committerintrigeri <intrigeri@boum.org>2015-11-29 18:19:26 +0000
commit74c5925aaca14aaf41dcebcd9d3ddf7132251c93 (patch)
treea675507bf87dd2a17e166a7a0c1f6f868c5b71bb
parentdaa83dbb79cc00391a0e3d469d55c3d0ccef7b8b (diff)
Remove embedded copy of urlgrabber, and all usage thereof.
These files have different copyright and licensing info than the rest of Tails Installer. That has not been documented in debian/copyright, so the package was rightfully rejected by Debian ftpmasters. Moreover, these files seem to be some old (possibly modified) version of the code that's shipped in the python-urlgrabber Debian package, so they would be embedded code copies, that are frowned upon in Debian. To end with, they were used only on Windows. Will-Fix: #10692
-rw-r--r--MANIFEST.in6
-rw-r--r--po/POTFILES.in5
-rwxr-xr-xsetup.py2
-rwxr-xr-xtails_installer/gui.py8
-rw-r--r--tails_installer/urlgrabber/__init__.py53
-rw-r--r--tails_installer/urlgrabber/byterange.py465
-rw-r--r--tails_installer/urlgrabber/grabber.py1483
-rw-r--r--tails_installer/urlgrabber/keepalive.py603
-rw-r--r--tails_installer/urlgrabber/mirror.py459
-rw-r--r--tails_installer/urlgrabber/progress.py609
10 files changed, 3 insertions, 3690 deletions
diff --git a/MANIFEST.in b/MANIFEST.in
index f72c91e..0704629 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -10,12 +10,6 @@ include tails_installer/dialog.py
include tails_installer/gui.py
include tails_installer/launcher.py
include tails_installer/releases.py
-include tails_installer/urlgrabber/__init__.py
-include tails_installer/urlgrabber/byterange.py
-include tails_installer/urlgrabber/grabber.py
-include tails_installer/urlgrabber/keepalive.py
-include tails_installer/urlgrabber/mirror.py
-include tails_installer/urlgrabber/progress.py
include data/tails-installer.ui
include data/tails-installer-launcher.ui
include data/tails-installer.desktop
diff --git a/po/POTFILES.in b/po/POTFILES.in
index 2e78753..65534f7 100644
--- a/po/POTFILES.in
+++ b/po/POTFILES.in
@@ -4,11 +4,6 @@ tails_installer/creator.py
tails_installer/gui.py
tails_installer/launcher.py
tails_installer/source.py
-tails_installer/urlgrabber/byterange.py
-tails_installer/urlgrabber/grabber.py
-tails_installer/urlgrabber/keepalive.py
-tails_installer/urlgrabber/mirror.py
-tails_installer/urlgrabber/progress.py
tails_installer/utils.py
data/tails-installer.ui.h
diff --git a/setup.py b/setup.py
index a8eb1fb..1f4a3f8 100755
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@ if sys.platform == 'win32':
setup(
name = 'tails-installer',
version = VERSION,
- packages = ['tails_installer', 'tails_installer/urlgrabber'],
+ packages = ['tails_installer'],
scripts = ['tails-installer'],
license = 'GNU General Public License (GPL)',
url = 'https://tails.boum.org/tails-installer',
diff --git a/tails_installer/gui.py b/tails_installer/gui.py
index 3183016..8c9c445 100755
--- a/tails_installer/gui.py
+++ b/tails_installer/gui.py
@@ -45,12 +45,8 @@ from tails_installer.source import RunningLiveSystemSource
from tails_installer.releases import releases, get_fedora_releases
from tails_installer.utils import _to_unicode, _format_bytes_in_gb, _get_datadir
from tails_installer.utils import is_running_from_tails
-if sys.platform == 'win32':
- from tails_installer.urlgrabber.grabber import URLGrabber, URLGrabError
- from tails_installer.urlgrabber.progress import BaseMeter
-else:
- from urlgrabber.grabber import URLGrabber, URLGrabError
- from urlgrabber.progress import BaseMeter
+from urlgrabber.grabber import URLGrabber, URLGrabError
+from urlgrabber.progress import BaseMeter
MAX_FAT16 = 2047
MAX_FAT32 = 3999
diff --git a/tails_installer/urlgrabber/__init__.py b/tails_installer/urlgrabber/__init__.py
deleted file mode 100644
index 5e6f8d3..0000000
--- a/tails_installer/urlgrabber/__init__.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Library General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-
-# Copyright 2002-2006 Michael D. Stenner, Ryan Tomayko
-
-# $Id: __init__.py,v 1.19 2006/09/21 23:17:36 mstenner Exp $
-
-"""A high-level cross-protocol url-grabber.
-
-Using urlgrabber, data can be fetched in three basic ways:
-
- urlgrab(url) copy the file to the local filesystem
- urlopen(url) open the remote file and return a file object
- (like urllib2.urlopen)
- urlread(url) return the contents of the file as a string
-
-When using these functions (or methods), urlgrabber supports the
-following features:
-
- * identical behavior for http://, ftp://, and file:// urls
- * http keepalive - faster downloads of many files by using
- only a single connection
- * byte ranges - fetch only a portion of the file
- * reget - for a urlgrab, resume a partial download
- * progress meters - the ability to report download progress
- automatically, even when using urlopen!
- * throttling - restrict bandwidth usage
- * retries - automatically retry a download if it fails. The
- number of retries and failure types are configurable.
- * authenticated server access for http and ftp
- * proxy support - support for authenticated http and ftp proxies
- * mirror groups - treat a list of mirrors as a single source,
- automatically switching mirrors if there is a failure.
-"""
-
-__version__ = '3.0.0'
-__date__ = '2006/09/21'
-__author__ = 'Michael D. Stenner <mstenner@linux.duke.edu>, ' \
- 'Ryan Tomayko <rtomayko@naeblis.cx>'
-__url__ = 'http://linux.duke.edu/projects/urlgrabber/'
-
-from grabber import urlgrab, urlopen, urlread
diff --git a/tails_installer/urlgrabber/byterange.py b/tails_installer/urlgrabber/byterange.py
deleted file mode 100644
index e037562..0000000
--- a/tails_installer/urlgrabber/byterange.py
+++ /dev/null
@@ -1,465 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-# $Id: byterange.py,v 1.12 2006/07/20 20:15:58 mstenner Exp $
-
-import os
-import stat
-import urllib
-import urllib2
-import rfc822
-
-DEBUG = None
-
-try:
- from cStringIO import StringIO
-except ImportError, msg:
- from StringIO import StringIO
-
-class RangeError(IOError):
- """Error raised when an unsatisfiable range is requested."""
- pass
-
-class HTTPRangeHandler(urllib2.BaseHandler):
- """Handler that enables HTTP Range headers.
-
- This was extremely simple. The Range header is a HTTP feature to
- begin with so all this class does is tell urllib2 that the
- "206 Partial Content" reponse from the HTTP server is what we
- expected.
-
- Example:
- import urllib2
- import byterange
-
- range_handler = range.HTTPRangeHandler()
- opener = urllib2.build_opener(range_handler)
-
- # install it
- urllib2.install_opener(opener)
-
- # create Request and set Range header
- req = urllib2.Request('http://www.python.org/')
- req.header['Range'] = 'bytes=30-50'
- f = urllib2.urlopen(req)
- """
-
- def http_error_206(self, req, fp, code, msg, hdrs):
- # 206 Partial Content Response
- r = urllib.addinfourl(fp, hdrs, req.get_full_url())
- r.code = code
- r.msg = msg
- return r
-
- def http_error_416(self, req, fp, code, msg, hdrs):
- # HTTP's Range Not Satisfiable error
- raise RangeError('Requested Range Not Satisfiable')
-
-class HTTPSRangeHandler(HTTPRangeHandler):
- """ Range Header support for HTTPS. """
-
- def https_error_206(self, req, fp, code, msg, hdrs):
- return self.http_error_206(req, fp, code, msg, hdrs)
-
- def https_error_416(self, req, fp, code, msg, hdrs):
- self.https_error_416(req, fp, code, msg, hdrs)
-
-class RangeableFileObject:
- """File object wrapper to enable raw range handling.
- This was implemented primarilary for handling range
- specifications for file:// urls. This object effectively makes
- a file object look like it consists only of a range of bytes in
- the stream.
-
- Examples:
- # expose 10 bytes, starting at byte position 20, from
- # /etc/aliases.
- >>> fo = RangeableFileObject(file('/etc/passwd', 'r'), (20,30))
- # seek seeks within the range (to position 23 in this case)
- >>> fo.seek(3)
- # tell tells where your at _within the range_ (position 3 in
- # this case)
- >>> fo.tell()
- # read EOFs if an attempt is made to read past the last
- # byte in the range. the following will return only 7 bytes.
- >>> fo.read(30)
- """
-
- def __init__(self, fo, rangetup):
- """Create a RangeableFileObject.
- fo -- a file like object. only the read() method need be
- supported but supporting an optimized seek() is
- preferable.
- rangetup -- a (firstbyte,lastbyte) tuple specifying the range
- to work over.
- The file object provided is assumed to be at byte offset 0.
- """
- self.fo = fo
- (self.firstbyte, self.lastbyte) = range_tuple_normalize(rangetup)
- self.realpos = 0
- self._do_seek(self.firstbyte)
-
- def __getattr__(self, name):
- """This effectively allows us to wrap at the instance level.
- Any attribute not found in _this_ object will be searched for
- in self.fo. This includes methods."""
- if hasattr(self.fo, name):
- return getattr(self.fo, name)
- raise AttributeError, name
-
- def tell(self):
- """Return the position within the range.
- This is different from fo.seek in that position 0 is the
- first byte position of the range tuple. For example, if
- this object was created with a range tuple of (500,899),
- tell() will return 0 when at byte position 500 of the file.
- """
- return (self.realpos - self.firstbyte)
-
- def seek(self,offset,whence=0):
- """Seek within the byte range.
- Positioning is identical to that described under tell().
- """
- assert whence in (0, 1, 2)
- if whence == 0: # absolute seek
- realoffset = self.firstbyte + offset
- elif whence == 1: # relative seek
- realoffset = self.realpos + offset
- elif whence == 2: # absolute from end of file
- # XXX: are we raising the right Error here?
- raise IOError('seek from end of file not supported.')
-
- # do not allow seek past lastbyte in range
- if self.lastbyte and (realoffset >= self.lastbyte):
- realoffset = self.lastbyte
-
- self._do_seek(realoffset - self.realpos)
-
- def read(self, size=-1):
- """Read within the range.
- This method will limit the size read based on the range.
- """
- size = self._calc_read_size(size)
- rslt = self.fo.read(size)
- self.realpos += len(rslt)
- return rslt
-
- def readline(self, size=-1):
- """Read lines within the range.
- This method will limit the size read based on the range.
- """
- size = self._calc_read_size(size)
- rslt = self.fo.readline(size)
- self.realpos += len(rslt)
- return rslt
-
- def _calc_read_size(self, size):
- """Handles calculating the amount of data to read based on
- the range.
- """
- if self.lastbyte:
- if size > -1:
- if ((self.realpos + size) >= self.lastbyte):
- size = (self.lastbyte - self.realpos)
- else:
- size = (self.lastbyte - self.realpos)
- return size
-
- def _do_seek(self,offset):
- """Seek based on whether wrapped object supports seek().
- offset is relative to the current position (self.realpos).
- """
- assert offset >= 0
- if not hasattr(self.fo, 'seek'):
- self._poor_mans_seek(offset)
- else:
- self.fo.seek(self.realpos + offset)
- self.realpos+= offset
-
- def _poor_mans_seek(self,offset):
- """Seek by calling the wrapped file objects read() method.
- This is used for file like objects that do not have native
- seek support. The wrapped objects read() method is called
- to manually seek to the desired position.
- offset -- read this number of bytes from the wrapped
- file object.
- raise RangeError if we encounter EOF before reaching the
- specified offset.
- """
- pos = 0
- bufsize = 1024
- while pos < offset:
- if (pos + bufsize) > offset:
- bufsize = offset - pos
- buf = self.fo.read(bufsize)
- if len(buf) != bufsize:
- raise RangeError('Requested Range Not Satisfiable')
- pos+= bufsize
-
-class FileRangeHandler(urllib2.FileHandler):
- """FileHandler subclass that adds Range support.
- This class handles Range headers exactly like an HTTP
- server would.
- """
- def open_local_file(self, req):
- import mimetypes
- import mimetools
- host = req.get_host()
- file = req.get_selector()
- localfile = urllib.url2pathname(file)
- stats = os.stat(localfile)
- size = stats[stat.ST_SIZE]
- modified = rfc822.formatdate(stats[stat.ST_MTIME])
- mtype = mimetypes.guess_type(file)[0]
- if host:
- host, port = urllib.splitport(host)
- if port or socket.gethostbyname(host) not in self.get_names():
- raise urllib2.URLError('file not on local host')
- fo = open(localfile,'rb')
- brange = req.headers.get('Range',None)
- brange = range_header_to_tuple(brange)
- assert brange != ()
- if brange:
- (fb,lb) = brange
- if lb == '': lb = size
- if fb < 0 or fb > size or lb > size:
- raise RangeError('Requested Range Not Satisfiable')
- size = (lb - fb)
- fo = RangeableFileObject(fo, (fb,lb))
- headers = mimetools.Message(StringIO(
- 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
- (mtype or 'text/plain', size, modified)))
- return urllib.addinfourl(fo, headers, 'file:'+file)
-
-
-# FTP Range Support
-# Unfortunately, a large amount of base FTP code had to be copied
-# from urllib and urllib2 in order to insert the FTP REST command.
-# Code modifications for range support have been commented as
-# follows:
-# -- range support modifications start/end here
-
-from urllib import splitport, splituser, splitpasswd, splitattr, \
- unquote, addclosehook, addinfourl
-import ftplib
-import socket
-import sys
-import ftplib
-import mimetypes
-import mimetools
-
-class FTPRangeHandler(urllib2.FTPHandler):
- def ftp_open(self, req):
- host = req.get_host()
- if not host:
- raise IOError, ('ftp error', 'no host given')
- host, port = splitport(host)
- if port is None:
- port = ftplib.FTP_PORT
- else:
- port = int(port)
-
- # username/password handling
- user, host = splituser(host)
- if user:
- user, passwd = splitpasswd(user)
- else:
- passwd = None
- host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
-
- try:
- host = socket.gethostbyname(host)
- except socket.error, msg:
- raise urllib2.URLError(msg)
- path, attrs = splitattr(req.get_selector())
- dirs = path.split('/')
- dirs = map(unquote, dirs)
- dirs, file = dirs[:-1], dirs[-1]
- if dirs and not dirs[0]:
- dirs = dirs[1:]
- try:
- fw = self.connect_ftp(user, passwd, host, port, dirs)
- type = file and 'I' or 'D'
- for attr in attrs:
- attr, value = splitattr(attr)
- if attr.lower() == 'type' and \
- value in ('a', 'A', 'i', 'I', 'd', 'D'):
- type = value.upper()
-
- # -- range support modifications start here
- rest = None
- range_tup = range_header_to_tuple(req.headers.get('Range',None))
- assert range_tup != ()
- if range_tup:
- (fb,lb) = range_tup
- if fb > 0: rest = fb
- # -- range support modifications end here
-
- fp, retrlen = fw.retrfile(file, type, rest)
-
- # -- range support modifications start here
- if range_tup:
- (fb,lb) = range_tup
- if lb == '':
- if retrlen is None or retrlen == 0:
- raise RangeError('Requested Range Not Satisfiable due to unobtainable file length.')
- lb = retrlen
- retrlen = lb - fb
- if retrlen < 0:
- # beginning of range is larger than file
- raise RangeError('Requested Range Not Satisfiable')
- else:
- retrlen = lb - fb
- fp = RangeableFileObject(fp, (0,retrlen))
- # -- range support modifications end here
-
- headers = ""
- mtype = mimetypes.guess_type(req.get_full_url())[0]
- if mtype:
- headers += "Content-Type: %s\n" % mtype
- if retrlen is not None and retrlen >= 0:
- headers += "Content-Length: %d\n" % retrlen
- sf = StringIO(headers)
- headers = mimetools.Message(sf)
- return addinfourl(fp, headers, req.get_full_url())
- except ftplib.all_errors, msg:
- raise IOError, ('ftp error', msg), sys.exc_info()[2]
-
- def connect_ftp(self, user, passwd, host, port, dirs):
- fw = ftpwrapper(user, passwd, host, port, dirs)
- return fw
-
-class ftpwrapper(urllib.ftpwrapper):
- # range support note:
- # this ftpwrapper code is copied directly from
- # urllib. The only enhancement is to add the rest
- # argument and pass it on to ftp.ntransfercmd
- def retrfile(self, file, type, rest=None):
- self.endtransfer()
- if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
- else: cmd = 'TYPE ' + type; isdir = 0
- try:
- self.ftp.voidcmd(cmd)
- except ftplib.all_errors:
- self.init()
- self.ftp.voidcmd(cmd)
- conn = None
- if file and not isdir:
- # Use nlst to see if the file exists at all
- try:
- self.ftp.nlst(file)
- except ftplib.error_perm, reason:
- raise IOError, ('ftp error', reason), sys.exc_info()[2]
- # Restore the transfer mode!
- self.ftp.voidcmd(cmd)
- # Try to retrieve as a file
- try:
- cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd, rest)
- except ftplib.error_perm, reason:
- if str(reason)[:3] == '501':
- # workaround for REST not supported error
- fp, retrlen = self.retrfile(file, type)
- fp = RangeableFileObject(fp, (rest,''))
- return (fp, retrlen)
- elif str(reason)[:3] != '550':
- raise IOError, ('ftp error', reason), sys.exc_info()[2]
- if not conn:
- # Set transfer mode to ASCII!
- self.ftp.voidcmd('TYPE A')
- # Try a directory listing
- if file: cmd = 'LIST ' + file
- else: cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
- self.busy = 1
- # Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'),
- self.endtransfer), conn[1])
-
-
-####################################################################
-# Range Tuple Functions
-# XXX: These range tuple functions might go better in a class.
-
-_rangere = None
-def range_header_to_tuple(range_header):
- """Get a (firstbyte,lastbyte) tuple from a Range header value.
-
- Range headers have the form "bytes=<firstbyte>-<lastbyte>". This
- function pulls the firstbyte and lastbyte values and returns
- a (firstbyte,lastbyte) tuple. If lastbyte is not specified in
- the header value, it is returned as an empty string in the
- tuple.
-
- Return None if range_header is None
- Return () if range_header does not conform to the range spec
- pattern.
-
- """
- global _rangere
- if range_header is None: return None
- if _rangere is None:
- import re
- _rangere = re.compile(r'^bytes=(\d{1,})-(\d*)')
- match = _rangere.match(range_header)
- if match:
- tup = range_tuple_normalize(match.group(1,2))
- if tup and tup[1]:
- tup = (tup[0],tup[1]+1)
- return tup
- return ()
-
-def range_tuple_to_header(range_tup):
- """Convert a range tuple to a Range header value.
- Return a string of the form "bytes=<firstbyte>-<lastbyte>" or None
- if no range is needed.
- """
- if range_tup is None: return None
- range_tup = range_tuple_normalize(range_tup)
- if range_tup:
- if range_tup[1]:
- range_tup = (range_tup[0],range_tup[1] - 1)
- return 'bytes=%s-%s' % range_tup
-
-def range_tuple_normalize(range_tup):
- """Normalize a (first_byte,last_byte) range tuple.
- Return a tuple whose first element is guaranteed to be an int
- and whose second element will be '' (meaning: the last byte) or
- an int. Finally, return None if the normalized tuple == (0,'')
- as that is equivelant to retrieving the entire file.
- """
- if range_tup is None: return None
- # handle first byte
- fb = range_tup[0]
- if fb in (None,''): fb = 0
- else: fb = int(fb)
- # handle last byte
- try: lb = range_tup[1]
- except IndexError: lb = ''
- else:
- if lb is None: lb = ''
- elif lb != '': lb = int(lb)
- # check if range is over the entire file
- if (fb,lb) == (0,''): return None
- # check that the range is valid
- if lb < fb: raise RangeError('Invalid byte range: %s-%s' % (fb,lb))
- return (fb,lb)
-
diff --git a/tails_installer/urlgrabber/grabber.py b/tails_installer/urlgrabber/grabber.py
deleted file mode 100644
index 8d396a6..0000000
--- a/tails_installer/urlgrabber/grabber.py
+++ /dev/null
@@ -1,1483 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-"""A high-level cross-protocol url-grabber.
-
-GENERAL ARGUMENTS (kwargs)
-
- Where possible, the module-level default is indicated, and legal
- values are provided.
-
- copy_local = 0 [0|1]
-
- ignored except for file:// urls, in which case it specifies
- whether urlgrab should still make a copy of the file, or simply
- point to the existing copy. The module level default for this
- option is 0.
-
- close_connection = 0 [0|1]
-
- tells URLGrabber to close the connection after a file has been
- transfered. This is ignored unless the download happens with the
- http keepalive handler (keepalive=1). Otherwise, the connection
- is left open for further use. The module level default for this
- option is 0 (keepalive connections will not be closed).
-
- keepalive = 1 [0|1]
-
- specifies whether keepalive should be used for HTTP/1.1 servers
- that support it. The module level default for this option is 1
- (keepalive is enabled).
-
- progress_obj = None
-
- a class instance that supports the following methods:
- po.start(filename, url, basename, length, text)
- # length will be None if unknown
- po.update(read) # read == bytes read so far
- po.end()
-
- text = None
-
- specifies an alternativ text item in the beginning of the progress
- bar line. If not given, the basename of the file is used.
-
- throttle = 1.0
-
- a number - if it's an int, it's the bytes/second throttle limit.
- If it's a float, it is first multiplied by bandwidth. If throttle
- == 0, throttling is disabled. If None, the module-level default
- (which can be set on default_grabber.throttle) is used. See
- BANDWIDTH THROTTLING for more information.
-
- timeout = None
-
- a positive float expressing the number of seconds to wait for socket
- operations. If the value is None or 0.0, socket operations will block
- forever. Setting this option causes urlgrabber to call the settimeout
- method on the Socket object used for the request. See the Python
- documentation on settimeout for more information.
- http://www.python.org/doc/current/lib/socket-objects.html
-
- bandwidth = 0
-
- the nominal max bandwidth in bytes/second. If throttle is a float
- and bandwidth == 0, throttling is disabled. If None, the
- module-level default (which can be set on
- default_grabber.bandwidth) is used. See BANDWIDTH THROTTLING for
- more information.
-
- range = None
-
- a tuple of the form (first_byte, last_byte) describing a byte
- range to retrieve. Either or both of the values may set to
- None. If first_byte is None, byte offset 0 is assumed. If
- last_byte is None, the last byte available is assumed. Note that
- the range specification is python-like in that (0,10) will yeild
- the first 10 bytes of the file.
-
- If set to None, no range will be used.
-
- reget = None [None|'simple'|'check_timestamp']
-
- whether to attempt to reget a partially-downloaded file. Reget
- only applies to .urlgrab and (obviously) only if there is a
- partially downloaded file. Reget has two modes:
-
- 'simple' -- the local file will always be trusted. If there
- are 100 bytes in the local file, then the download will always
- begin 100 bytes into the requested file.
-
- 'check_timestamp' -- the timestamp of the server file will be
- compared to the timestamp of the local file. ONLY if the
- local file is newer than or the same age as the server file
- will reget be used. If the server file is newer, or the
- timestamp is not returned, the entire file will be fetched.
-
- NOTE: urlgrabber can do very little to verify that the partial
- file on disk is identical to the beginning of the remote file.
- You may want to either employ a custom "checkfunc" or simply avoid
- using reget in situations where corruption is a concern.
-
- user_agent = 'urlgrabber/VERSION'
-
- a string, usually of the form 'AGENT/VERSION' that is provided to
- HTTP servers in the User-agent header. The module level default
- for this option is "urlgrabber/VERSION".
-
- http_headers = None
-
- a tuple of 2-tuples, each containing a header and value. These
- will be used for http and https requests only. For example, you
- can do
- http_headers = (('Pragma', 'no-cache'),)
-
- ftp_headers = None
-
- this is just like http_headers, but will be used for ftp requests.
-
- proxies = None
-
- a dictionary that maps protocol schemes to proxy hosts. For
- example, to use a proxy server on host "foo" port 3128 for http
- and https URLs:
- proxies={ 'http' : 'http://foo:3128', 'https' : 'http://foo:3128' }
- note that proxy authentication information may be provided using
- normal URL constructs:
- proxies={ 'http' : 'http://user:host@foo:3128' }
- Lastly, if proxies is None, the default environment settings will
- be used.
-
- prefix = None
-
- a url prefix that will be prepended to all requested urls. For
- example:
- g = URLGrabber(prefix='http://foo.com/mirror/')
- g.urlgrab('some/file.txt')
- ## this will fetch 'http://foo.com/mirror/some/file.txt'
- This option exists primarily to allow identical behavior to
- MirrorGroup (and derived) instances. Note: a '/' will be inserted
- if necessary, so you cannot specify a prefix that ends with a
- partial file or directory name.
-
- opener = None
-
- Overrides the default urllib2.OpenerDirector provided to urllib2
- when making requests. This option exists so that the urllib2
- handler chain may be customized. Note that the range, reget,
- proxy, and keepalive features require that custom handlers be
- provided to urllib2 in order to function properly. If an opener
- option is provided, no attempt is made by urlgrabber to ensure
- chain integrity. You are responsible for ensuring that any
- extension handlers are present if said features are required.
-
- data = None
-
- Only relevant for the HTTP family (and ignored for other
- protocols), this allows HTTP POSTs. When the data kwarg is
- present (and not None), an HTTP request will automatically become
- a POST rather than GET. This is done by direct passthrough to
- urllib2. If you use this, you may also want to set the
- 'Content-length' and 'Content-type' headers with the http_headers
- option. Note that python 2.2 handles the case of these
- badly and if you do not use the proper case (shown here), your
- values will be overridden with the defaults.
-
-
-RETRY RELATED ARGUMENTS
-
- retry = None
-
- the number of times to retry the grab before bailing. If this is
- zero, it will retry forever. This was intentional... really, it
- was :). If this value is not supplied or is supplied but is None
- retrying does not occur.
-
- retrycodes = [-1,2,4,5,6,7]
-
- a sequence of errorcodes (values of e.errno) for which it should
- retry. See the doc on URLGrabError for more details on this. You
- might consider modifying a copy of the default codes rather than
- building yours from scratch so that if the list is extended in the
- future (or one code is split into two) you can still enjoy the
- benefits of the default list. You can do that with something like
- this:
-
- retrycodes = urlgrabber.grabber.URLGrabberOptions().retrycodes
- if 12 not in retrycodes:
- retrycodes.append(12)
-
- checkfunc = None
-
- a function to do additional checks. This defaults to None, which
- means no additional checking. The function should simply return
- on a successful check. It should raise URLGrabError on an
- unsuccessful check. Raising of any other exception will be
- considered immediate failure and no retries will occur.
-
- If it raises URLGrabError, the error code will determine the retry
- behavior. Negative error numbers are reserved for use by these
- passed in functions, so you can use many negative numbers for
- different types of failure. By default, -1 results in a retry,
- but this can be customized with retrycodes.
-
- If you simply pass in a function, it will be given exactly one
- argument: a CallbackObject instance with the .url attribute
- defined and either .filename (for urlgrab) or .data (for urlread).
- For urlgrab, .filename is the name of the local file. For
- urlread, .data is the actual string data. If you need other
- arguments passed to the callback (program state of some sort), you
- can do so like this:
-
- checkfunc=(function, ('arg1', 2), {'kwarg': 3})
-
- if the downloaded file has filename /tmp/stuff, then this will
- result in this call (for urlgrab):
-
- function(obj, 'arg1', 2, kwarg=3)
- # obj.filename = '/tmp/stuff'
- # obj.url = 'http://foo.com/stuff'
-
- NOTE: both the "args" tuple and "kwargs" dict must be present if
- you use this syntax, but either (or both) can be empty.
-
- failure_callback = None
-
- The callback that gets called during retries when an attempt to
- fetch a file fails. The syntax for specifying the callback is
- identical to checkfunc, except for the attributes defined in the
- CallbackObject instance. The attributes for failure_callback are:
-
- exception = the raised exception
- url = the url we're trying to fetch
- tries = the number of tries so far (including this one)
- retry = the value of the retry option
-
- The callback is present primarily to inform the calling program of
- the failure, but if it raises an exception (including the one it's
- passed) that exception will NOT be caught and will therefore cause
- future retries to be aborted.
-
- The callback is called for EVERY failure, including the last one.
- On the last try, the callback can raise an alternate exception,
- but it cannot (without severe trickiness) prevent the exception
- from being raised.
-
- interrupt_callback = None
-
- This callback is called if KeyboardInterrupt is received at any
- point in the transfer. Basically, this callback can have three
- impacts on the fetch process based on the way it exits:
-
- 1) raise no exception: the current fetch will be aborted, but
- any further retries will still take place
-
- 2) raise a URLGrabError: if you're using a MirrorGroup, then
- this will prompt a failover to the next mirror according to
- the behavior of the MirrorGroup subclass. It is recommended
- that you raise URLGrabError with code 15, 'user abort'. If
- you are NOT using a MirrorGroup subclass, then this is the
- same as (3).
-
- 3) raise some other exception (such as KeyboardInterrupt), which
- will not be caught at either the grabber or mirror levels.
- That is, it will be raised up all the way to the caller.
-
- This callback is very similar to failure_callback. They are
- passed the same arguments, so you could use the same function for
- both.
-
- urlparser = URLParser()
-
- The URLParser class handles pre-processing of URLs, including
- auth-handling for user/pass encoded in http urls, file handing
- (that is, filenames not sent as a URL), and URL quoting. If you
- want to override any of this behavior, you can pass in a
- replacement instance. See also the 'quote' option.
-
- quote = None
-
- Whether or not to quote the path portion of a url.
- quote = 1 -> quote the URLs (they're not quoted yet)
- quote = 0 -> do not quote them (they're already quoted)
- quote = None -> guess what to do
-
- This option only affects proper urls like 'file:///etc/passwd'; it
- does not affect 'raw' filenames like '/etc/passwd'. The latter
- will always be quoted as they are converted to URLs. Also, only
- the path part of a url is quoted. If you need more fine-grained
- control, you should probably subclass URLParser and pass it in via
- the 'urlparser' option.
-
-BANDWIDTH THROTTLING
-
- urlgrabber supports throttling via two values: throttle and
- bandwidth Between the two, you can either specify and absolute
- throttle threshold or specify a theshold as a fraction of maximum
- available bandwidth.
-
- throttle is a number - if it's an int, it's the bytes/second
- throttle limit. If it's a float, it is first multiplied by
- bandwidth. If throttle == 0, throttling is disabled. If None, the
- module-level default (which can be set with set_throttle) is used.
-
- bandwidth is the nominal max bandwidth in bytes/second. If throttle
- is a float and bandwidth == 0, throttling is disabled. If None, the
- module-level default (which can be set with set_bandwidth) is used.
-
- THROTTLING EXAMPLES:
-
- Lets say you have a 100 Mbps connection. This is (about) 10^8 bits
- per second, or 12,500,000 Bytes per second. You have a number of
- throttling options:
-
- *) set_bandwidth(12500000); set_throttle(0.5) # throttle is a float
-
- This will limit urlgrab to use half of your available bandwidth.
-
- *) set_throttle(6250000) # throttle is an int
-
- This will also limit urlgrab to use half of your available
- bandwidth, regardless of what bandwidth is set to.
-
- *) set_throttle(6250000); set_throttle(1.0) # float
-
- Use half your bandwidth
-
- *) set_throttle(6250000); set_throttle(2.0) # float
-
- Use up to 12,500,000 Bytes per second (your nominal max bandwidth)
-
- *) set_throttle(6250000); set_throttle(0) # throttle = 0
-
- Disable throttling - this is more efficient than a very large
- throttle setting.
-
- *) set_throttle(0); set_throttle(1.0) # throttle is float, bandwidth = 0
-
- Disable throttling - this is the default when the module is loaded.
-
- SUGGESTED AUTHOR IMPLEMENTATION (THROTTLING)
-
- While this is flexible, it's not extremely obvious to the user. I
- suggest you implement a float throttle as a percent to make the
- distinction between absolute and relative throttling very explicit.
-
- Also, you may want to convert the units to something more convenient
- than bytes/second, such as kbps or kB/s, etc.
-
-"""
-
-# $Id: grabber.py,v 1.47 2006/07/20 20:15:58 mstenner Exp $
-
-import os
-import os.path
-import sys
-import urlparse
-import rfc822
-import time
-import types
-import string
-import urllib
-import urllib2
-from stat import * # S_* and ST_*
-
-########################################################################
-# MODULE INITIALIZATION
-########################################################################
-try:
- exec('from ' + (__name__.split('.'))[0] + ' import __version__')
-except:
- __version__ = '???'
-
-auth_handler = urllib2.HTTPBasicAuthHandler( \
- urllib2.HTTPPasswordMgrWithDefaultRealm())
-
-try:
- from i18n import _
-except ImportError, msg:
- def _(st): return st
-
-try:
- from httplib import HTTPException
-except ImportError, msg:
- HTTPException = None
-
-try:
- # This is a convenient way to make keepalive optional.
- # Just rename the module so it can't be imported.
- import keepalive
- from keepalive import HTTPHandler, HTTPSHandler
-except ImportError, msg:
- keepalive_handlers = ()
-else:
- keepalive_handlers = (HTTPHandler(), HTTPSHandler())
-
-try:
- # add in range support conditionally too
- import byterange
- from byterange import HTTPRangeHandler, HTTPSRangeHandler, \
- FileRangeHandler, FTPRangeHandler, range_tuple_normalize, \
- range_tuple_to_header, RangeError
-except ImportError, msg:
- range_handlers = ()
- RangeError = None
- have_range = 0
-else:
- range_handlers = (HTTPRangeHandler(), HTTPSRangeHandler(),
- FileRangeHandler(), FTPRangeHandler())
- have_range = 1
-
-
-# check whether socket timeout support is available (Python >= 2.3)
-import socket
-try:
- TimeoutError = socket.timeout
- have_socket_timeout = True
-except AttributeError:
- TimeoutError = None
- have_socket_timeout = False
-
-########################################################################
-# functions for debugging output. These functions are here because they
-# are also part of the module initialization.
-DEBUG = None
-def set_logger(DBOBJ):
- """Set the DEBUG object. This is called by _init_default_logger when
- the environment variable URLGRABBER_DEBUG is set, but can also be
- called by a calling program. Basically, if the calling program uses
- the logging module and would like to incorporate urlgrabber logging,
- then it can do so this way. It's probably not necessary as most
- internal logging is only for debugging purposes.
-
- The passed-in object should be a logging.Logger instance. It will
- be pushed into the keepalive and byterange modules if they're
- being used. The mirror module pulls this object in on import, so
- you will need to manually push into it. In fact, you may find it
- tidier to simply push your logging object (or objects) into each
- of these modules independently.
- """
-
- global DEBUG
- DEBUG = DBOBJ
- if keepalive_handlers and keepalive.DEBUG is None:
- keepalive.DEBUG = DBOBJ
- if have_range and byterange.DEBUG is None:
- byterange.DEBUG = DBOBJ
-
-def _init_default_logger():
- '''Examines the environment variable URLGRABBER_DEBUG and creates
- a logging object (logging.logger) based on the contents. It takes
- the form
-
- URLGRABBER_DEBUG=level,filename
-
- where "level" can be either an integer or a log level from the
- logging module (DEBUG, INFO, etc). If the integer is zero or
- less, logging will be disabled. Filename is the filename where
- logs will be sent. If it is "-", then stdout will be used. If
- the filename is empty or missing, stderr will be used. If the
- variable cannot be processed or the logging module cannot be
- imported (python < 2.3) then logging will be disabled. Here are
- some examples:
-
- URLGRABBER_DEBUG=1,debug.txt # log everything to debug.txt
- URLGRABBER_DEBUG=WARNING,- # log warning and higher to stdout
- URLGRABBER_DEBUG=INFO # log info and higher to stderr
-
- This funtion is called during module initialization. It is not
- intended to be called from outside. The only reason it is a
- function at all is to keep the module-level namespace tidy and to
- collect the code into a nice block.'''
-
- try:
- dbinfo = os.environ['URLGRABBER_DEBUG'].split(',')
- import logging
- level = logging._levelNames.get(dbinfo[0], int(dbinfo[0]))
- if level < 1: raise ValueError()
-
- formatter = logging.Formatter('%(asctime)s %(message)s')
- if len(dbinfo) > 1: filename = dbinfo[1]
- else: filename = ''
- if filename == '': handler = logging.StreamHandler(sys.stderr)
- elif filename == '-': handler = logging.StreamHandler(sys.stdout)
- else: handler = logging.FileHandler(filename)
- handler.setFormatter(formatter)
- DBOBJ = logging.getLogger('urlgrabber')
- DBOBJ.addHandler(handler)
- DBOBJ.setLevel(level)
- except (KeyError, ImportError, ValueError):
- DBOBJ = None
- set_logger(DBOBJ)
-
-_init_default_logger()
-########################################################################
-# END MODULE INITIALIZATION
-########################################################################
-
-
-
-class URLGrabError(IOError):
- """
- URLGrabError error codes:
-
- URLGrabber error codes (0 -- 255)
- 0 - everything looks good (you should never see this)
- 1 - malformed url
- 2 - local file doesn't exist
- 3 - request for non-file local file (dir, etc)
- 4 - IOError on fetch
- 5 - OSError on fetch
- 6 - no content length header when we expected one
- 7 - HTTPException
- 8 - Exceeded read limit (for urlread)
- 9 - Requested byte range not satisfiable.
- 10 - Byte range requested, but range support unavailable
- 11 - Illegal reget mode
- 12 - Socket timeout
- 13 - malformed proxy url
- 14 - HTTPError (includes .code and .exception attributes)
- 15 - user abort
-
- MirrorGroup error codes (256 -- 511)
- 256 - No more mirrors left to try
-
- Custom (non-builtin) classes derived from MirrorGroup (512 -- 767)
- [ this range reserved for application-specific error codes ]
-
- Retry codes (< 0)
- -1 - retry the download, unknown reason
-
- Note: to test which group a code is in, you can simply do integer
- division by 256: e.errno / 256
-
- Negative codes are reserved for use by functions passed in to
- retrygrab with checkfunc. The value -1 is built in as a generic
- retry code and is already included in the retrycodes list.
- Therefore, you can create a custom check function that simply
- returns -1 and the fetch will be re-tried. For more customized
- retries, you can use other negative number and include them in
- retry-codes. This is nice for outputting useful messages about
- what failed.
-
- You can use these error codes like so:
- try: urlgrab(url)
- except URLGrabError, e:
- if e.errno == 3: ...
- # or
- print e.strerror
- # or simply
- print e #### print '[Errno %i] %s' % (e.errno, e.strerror)
- """
- pass
-
-class CallbackObject:
- """Container for returned callback data.
-
- This is currently a dummy class into which urlgrabber can stuff
- information for passing to callbacks. This way, the prototype for
- all callbacks is the same, regardless of the data that will be
- passed back. Any function that accepts a callback function as an
- argument SHOULD document what it will define in this object.
-
- It is possible that this class will have some greater
- functionality in the future.
- """
- def __init__(self, **kwargs):
- self.__dict__.update(kwargs)
-
-def close_all():
- """close any open keepalive connections"""
- for handler in keepalive_handlers:
- handler.close_all()
-
-def urlgrab(url, filename=None, **kwargs):
- """grab the file at <url> and make a local copy at <filename>
- If filename is none, the basename of the url is used.
- urlgrab returns the filename of the local file, which may be different
- from the passed-in filename if the copy_local kwarg == 0.
-
- See module documentation for a description of possible kwargs.
- """
- return default_grabber.urlgrab(url, filename, **kwargs)
-
-def urlopen(url, **kwargs):
- """open the url and return a file object
- If a progress object or throttle specifications exist, then
- a special file object will be returned that supports them.
- The file object can be treated like any other file object.
-
- See module documentation for a description of possible kwargs.
- """
- return default_grabber.urlopen(url, **kwargs)
-
-def urlread(url, limit=None, **kwargs):
- """read the url into a string, up to 'limit' bytes
- If the limit is exceeded, an exception will be thrown. Note that urlread
- is NOT intended to be used as a way of saying "I want the first N bytes"
- but rather 'read the whole file into memory, but don't use too much'
-
- See module documentation for a description of possible kwargs.
- """
- return default_grabber.urlread(url, limit, **kwargs)
-
-
-class URLParser:
- """Process the URLs before passing them to urllib2.
-
- This class does several things:
-
- * add any prefix
- * translate a "raw" file to a proper file: url
- * handle any http or https auth that's encoded within the url
- * quote the url
-
- Only the "parse" method is called directly, and it calls sub-methods.
-
- An instance of this class is held in the options object, which
- means that it's easy to change the behavior by sub-classing and
- passing the replacement in. It need only have a method like:
-
- url, parts = urlparser.parse(url, opts)
- """
-
- def parse(self, url, opts):
- """parse the url and return the (modified) url and its parts
-
- Note: a raw file WILL be quoted when it's converted to a URL.
- However, other urls (ones which come with a proper scheme) may
- or may not be quoted according to opts.quote
-
- opts.quote = 1 --> quote it
- opts.quote = 0 --> do not quote it
- opts.quote = None --> guess
- """
- quote = opts.quote
-
- if opts.prefix:
- url = self.add_prefix(url, opts.prefix)
-
- parts = urlparse.urlparse(url)
- (scheme, host, path, parm, query, frag) = parts
-
- if not scheme or (len(scheme) == 1 and scheme in string.letters):
- # if a scheme isn't specified, we guess that it's "file:"
- if url[0] not in '/\\': url = os.path.abspath(url)
- url = 'file:' + urllib.pathname2url(url)
- parts = urlparse.urlparse(url)
- quote = 0 # pathname2url quotes, so we won't do it again
-
- if scheme in ['http', 'https']:
- parts = self.process_http(parts)
-
- if quote is None:
- quote = self.guess_should_quote(parts)
- if quote:
- parts = self.quote(parts)
-
- url = urlparse.urlunparse(parts)
- return url, parts
-
- def add_prefix(self, url, prefix):
- if prefix[-1] == '/' or url[0] == '/':
- url = prefix + url
- else:
- url = prefix + '/' + url
- return url
-
- def process_http(self, parts):
- (scheme, host, path, parm, query, frag) = parts
-
- if '@' in host and auth_handler:
- try:
- user_pass, host = host.split('@', 1)
- if ':' in user_pass:
- user, password = user_pass.split(':', 1)
- except ValueError, e:
- raise URLGrabError(1, _('Bad URL: %s') % url)
- if DEBUG: DEBUG.info('adding HTTP auth: %s, %s', user, password)
- auth_handler.add_password(None, host, user, password)
-
- return (scheme, host, path, parm, query, frag)
-
- def quote(self, parts):
- """quote the URL
-
- This method quotes ONLY the path part. If you need to quote
- other parts, you should override this and pass in your derived
- class. The other alternative is to quote other parts before
- passing into urlgrabber.
- """
- (scheme, host, path, parm, query, frag) = parts
- path = urllib.quote(path)
- return (scheme, host, path, parm, query, frag)
-
- hexvals = '0123456789ABCDEF'
- def guess_should_quote(self, parts):
- """
- Guess whether we should quote a path. This amounts to
- guessing whether it's already quoted.
-
- find ' ' -> 1
- find '%' -> 1
- find '%XX' -> 0
- else -> 1
- """
- (scheme, host, path, parm, query, frag) = parts
- if ' ' in path:
- return 1
- ind = string.find(path, '%')
- if ind > -1:
- while ind > -1:
- if len(path) < ind+3:
- return 1
- code = path[ind+1:ind+3].upper()
- if code[0] not in self.hexvals or \
- code[1] not in self.hexvals:
- return 1
- ind = string.find(path, '%', ind+1)
- return 0
- return 1
-
-class URLGrabberOptions:
- """Class to ease kwargs handling."""
-
- def __init__(self, delegate=None, **kwargs):
- """Initialize URLGrabberOptions object.
- Set default values for all options and then update options specified
- in kwargs.
- """
- self.delegate = delegate
- if delegate is None:
- self._set_defaults()
- self._set_attributes(**kwargs)
-
- def __getattr__(self, name):
- if self.delegate and hasattr(self.delegate, name):
- return getattr(self.delegate, name)
- raise AttributeError, name
-
- def raw_throttle(self):
- """Calculate raw throttle value from throttle and bandwidth
- values.
- """
- if self.throttle <= 0:
- return 0
- elif type(self.throttle) == type(0):
- return float(self.throttle)
- else: # throttle is a float
- return self.bandwidth * self.throttle
-
- def derive(self, **kwargs):
- """Create a derived URLGrabberOptions instance.
- This method creates a new instance and overrides the
- options specified in kwargs.
- """
- return URLGrabberOptions(delegate=self, **kwargs)
-
- def _set_attributes(self, **kwargs):
- """Update object attributes with those provided in kwargs."""
- self.__dict__.update(kwargs)
- if have_range and kwargs.has_key('range'):
- # normalize the supplied range value
- self.range = range_tuple_normalize(self.range)
- if not self.reget in [None, 'simple', 'check_timestamp']:
- raise URLGrabError(11, _('Illegal reget mode: %s') \
- % (self.reget, ))
-
- def _set_defaults(self):
- """Set all options to their default values.
- When adding new options, make sure a default is
- provided here.
- """
- self.progress_obj = None
- self.throttle = 1.0
- self.bandwidth = 0
- self.retry = None
- self.retrycodes = [-1,2,4,5,6,7]
- self.checkfunc = None
- self.copy_local = 0
- self.close_connection = 0
- self.range = None
- self.user_agent = 'urlgrabber/%s' % __version__
- self.keepalive = 1
- self.proxies = None
- self.reget = None
- self.failure_callback = None
- self.interrupt_callback = None
- self.prefix = None
- self.opener = None
- self.cache_openers = True
- self.timeout = None
- self.text = None
- self.http_headers = None
- self.ftp_headers = None
- self.data = None
- self.urlparser = URLParser()
- self.quote = None
-
-class URLGrabber:
- """Provides easy opening of URLs with a variety of options.
-
- All options are specified as kwargs. Options may be specified when
- the class is created and may be overridden on a per request basis.
-
- New objects inherit default values from default_grabber.
- """
-
- def __init__(self, **kwargs):
- self.opts = URLGrabberOptions(**kwargs)
-
- def _retry(self, opts, func, *args):
- tries = 0
- while 1:
- # there are only two ways out of this loop. The second has
- # several "sub-ways"
- # 1) via the return in the "try" block
- # 2) by some exception being raised
- # a) an excepton is raised that we don't "except"
- # b) a callback raises ANY exception
- # c) we're not retry-ing or have run out of retries
- # d) the URLGrabError code is not in retrycodes
- # beware of infinite loops :)
- tries = tries + 1
- exception = None
- retrycode = None
- callback = None
- if DEBUG: DEBUG.info('attempt %i/%s: %s',
- tries, opts.retry, args[0])
- try:
- r = apply(func, (opts,) + args, {})
- if DEBUG: DEBUG.info('success')
- return r
- except URLGrabError, e:
- exception = e
- callback = opts.failure_callback
- retrycode = e.errno
- except KeyboardInterrupt, e:
- exception = e
- callback = opts.interrupt_callback
-
- if DEBUG: DEBUG.info('exception: %s', exception)
- if callback:
- if DEBUG: DEBUG.info('calling callback: %s', callback)
- cb_func, cb_args, cb_kwargs = self._make_callback(callback)
- obj = CallbackObject(exception=exception, url=args[0],
- tries=tries, retry=opts.retry)
- cb_func(obj, *cb_args, **cb_kwargs)
-
- if (opts.retry is None) or (tries == opts.retry):
- if DEBUG: DEBUG.info('retries exceeded, re-raising')
- raise
-
- if (retrycode is not None) and (retrycode not in opts.retrycodes):
- if DEBUG: DEBUG.info('retrycode (%i) not in list %s, re-raising',
- retrycode, opts.retrycodes)
- raise
-
- def urlopen(self, url, **kwargs):
- """open the url and return a file object
- If a progress object or throttle value specified when this
- object was created, then a special file object will be
- returned that supports them. The file object can be treated
- like any other file object.
- """
- opts = self.opts.derive(**kwargs)
- (url,parts) = opts.urlparser.parse(url, opts)
- def retryfunc(opts, url):
- return URLGrabberFileObject(url, filename=None, opts=opts)
- return self._retry(opts, retryfunc, url)
-
- def urlgrab(self, url, filename=None, **kwargs):
- """grab the file at <url> and make a local copy at <filename>
- If filename is none, the basename of the url is used.
- urlgrab returns the filename of the local file, which may be
- different from the passed-in filename if copy_local == 0.
- """
- opts = self.opts.derive(**kwargs)
- (url,parts) = opts.urlparser.parse(url, opts)
- (scheme, host, path, parm, query, frag) = parts
- if filename is None:
- filename = os.path.basename( urllib.unquote(path) )
- if scheme == 'file' and not opts.copy_local:
- # just return the name of the local file - don't make a
- # copy currently
- path = urllib.url2pathname(path)
- if host:
- path = os.path.normpath('//' + host + path)
- if not os.path.exists(path):
- raise URLGrabError(2,
- _('Local file does not exist: %s') % (path, ))
- elif not os.path.isfile(path):
- raise URLGrabError(3,
- _('Not a normal file: %s') % (path, ))
- elif not opts.range:
- return path
-
- def retryfunc(opts, url, filename):
- fo = URLGrabberFileObject(url, filename, opts)
- try:
- fo._do_grab()
- if not opts.checkfunc is None:
- cb_func, cb_args, cb_kwargs = \
- self._make_callback(opts.checkfunc)
- obj = CallbackObject()
- obj.filename = filename
- obj.url = url
- apply(cb_func, (obj, )+cb_args, cb_kwargs)
- finally:
- fo.close()
- return filename
-
- return self._retry(opts, retryfunc, url, filename)
-
- def urlread(self, url, limit=None, **kwargs):
- """read the url into a string, up to 'limit' bytes
- If the limit is exceeded, an exception will be thrown. Note
- that urlread is NOT intended to be used as a way of saying
- "I want the first N bytes" but rather 'read the whole file
- into memory, but don't use too much'
- """
- opts = self.opts.derive(**kwargs)
- (url,parts) = opts.urlparser.parse(url, opts)
- if limit is not None:
- limit = limit + 1
-
- def retryfunc(opts, url, limit):
- fo = URLGrabberFileObject(url, filename=None, opts=opts)
- s = ''
- try:
- # this is an unfortunate thing. Some file-like objects
- # have a default "limit" of None, while the built-in (real)
- # file objects have -1. They each break the other, so for
- # now, we just force the default if necessary.
- if limit is None: s = fo.read()
- else: s = fo.read(limit)
-
- if not opts.checkfunc is None:
- cb_func, cb_args, cb_kwargs = \
- self._make_callback(opts.checkfunc)
- obj = CallbackObject()
- obj.data = s
- obj.url = url
- apply(cb_func, (obj, )+cb_args, cb_kwargs)
- finally:
- fo.close()
- return s
-
- s = self._retry(opts, retryfunc, url, limit)
- if limit and len(s) > limit:
- raise URLGrabError(8,
- _('Exceeded limit (%i): %s') % (limit, url))
- return s
-
- def _make_callback(self, callback_obj):
- if callable(callback_obj):
- return callback_obj, (), {}
- else:
- return callback_obj
-
-# create the default URLGrabber used by urlXXX functions.
-# NOTE: actual defaults are set in URLGrabberOptions
-default_grabber = URLGrabber()
-
-class URLGrabberFileObject:
- """This is a file-object wrapper that supports progress objects
- and throttling.
-
- This exists to solve the following problem: lets say you want to
- drop-in replace a normal open with urlopen. You want to use a
- progress meter and/or throttling, but how do you do that without
- rewriting your code? Answer: urlopen will return a wrapped file
- object that does the progress meter and-or throttling internally.
- """
-
- def __init__(self, url, filename, opts):
- self.url = url
- self.filename = filename
- self.opts = opts
- self.fo = None
- self._rbuf = ''
- self._rbufsize = 1024*8
- self._ttime = time.time()
- self._tsize = 0
- self._amount_read = 0
- self._opener = None
- self._do_open()
-
- def __getattr__(self, name):
- """This effectively allows us to wrap at the instance level.
- Any attribute not found in _this_ object will be searched for
- in self.fo. This includes methods."""
- if hasattr(self.fo, name):
- return getattr(self.fo, name)
- raise AttributeError, name
-
- def _get_opener(self):
- """Build a urllib2 OpenerDirector based on request options."""
- if self.opts.opener:
- return self.opts.opener
- elif self._opener is None:
- handlers = []
- need_keepalive_handler = (keepalive_handlers and self.opts.keepalive)
- need_range_handler = (range_handlers and \
- (self.opts.range or self.opts.reget))
- # if you specify a ProxyHandler when creating the opener
- # it _must_ come before all other handlers in the list or urllib2
- # chokes.
- if self.opts.proxies:
- handlers.append( CachedProxyHandler(self.opts.proxies) )
-
- # -------------------------------------------------------
- # OK, these next few lines are a serious kludge to get
- # around what I think is a bug in python 2.2's
- # urllib2. The basic idea is that default handlers
- # get applied first. If you override one (like a
- # proxy handler), then the default gets pulled, but
- # the replacement goes on the end. In the case of
- # proxies, this means the normal handler picks it up
- # first and the proxy isn't used. Now, this probably
- # only happened with ftp or non-keepalive http, so not
- # many folks saw it. The simple approach to fixing it
- # is just to make sure you override the other
- # conflicting defaults as well. I would LOVE to see
- # these go way or be dealt with more elegantly. The
- # problem isn't there after 2.2. -MDS 2005/02/24
- if not need_keepalive_handler:
- handlers.append( urllib2.HTTPHandler() )
- if not need_range_handler:
- handlers.append( urllib2.FTPHandler() )
- # -------------------------------------------------------
-
- if need_keepalive_handler:
- handlers.extend( keepalive_handlers )
- if need_range_handler:
- handlers.extend( range_handlers )
- handlers.append( auth_handler )
- if self.opts.cache_openers:
- self._opener = CachedOpenerDirector(*handlers)
- else:
- self._opener = urllib2.build_opener(*handlers)
- # OK, I don't like to do this, but otherwise, we end up with
- # TWO user-agent headers.
- self._opener.addheaders = []
- return self._opener
-
- def _do_open(self):
- opener = self._get_opener()
-
- req = urllib2.Request(self.url, self.opts.data) # build request object
- self._add_headers(req) # add misc headers that we need
- self._build_range(req) # take care of reget and byterange stuff
-
- fo, hdr = self._make_request(req, opener)
- if self.reget_time and self.opts.reget == 'check_timestamp':
- # do this if we have a local file with known timestamp AND
- # we're in check_timestamp reget mode.
- fetch_again = 0
- try:
- modified_tuple = hdr.getdate_tz('last-modified')
- modified_stamp = rfc822.mktime_tz(modified_tuple)
- if modified_stamp > self.reget_time: fetch_again = 1
- except (TypeError,):
- fetch_again = 1
-
- if fetch_again:
- # the server version is newer than the (incomplete) local
- # version, so we should abandon the version we're getting
- # and fetch the whole thing again.
- fo.close()
- self.opts.reget = None
- del req.headers['Range']
- self._build_range(req)
- fo, hdr = self._make_request(req, opener)
-
- (scheme, host, path, parm, query, frag) = urlparse.urlparse(self.url)
- path = urllib.unquote(path)
- if not (self.opts.progress_obj or self.opts.raw_throttle() \
- or self.opts.timeout):
- # if we're not using the progress_obj, throttling, or timeout
- # we can get a performance boost by going directly to
- # the underlying fileobject for reads.
- self.read = fo.read
- if hasattr(fo, 'readline'):
- self.readline = fo.readline
- elif self.opts.progress_obj:
- try:
- length = int(hdr['Content-Length'])
- length = length + self._amount_read # Account for regets
- except (KeyError, ValueError, TypeError):
- length = None
-
- self.opts.progress_obj.start(str(self.filename),
- urllib.unquote(self.url),
- os.path.basename(path),
- length, text=self.opts.text)
- self.opts.progress_obj.update(0)
- (self.fo, self.hdr) = (fo, hdr)
-
- def _add_headers(self, req):
- if self.opts.user_agent:
- req.add_header('User-agent', self.opts.user_agent)
- try: req_type = req.get_type()
- except ValueError: req_type = None
- if self.opts.http_headers and req_type in ('http', 'https'):
- for h, v in self.opts.http_headers:
- req.add_header(h, v)
- if self.opts.ftp_headers and req_type == 'ftp':
- for h, v in self.opts.ftp_headers:
- req.add_header(h, v)
-
- def _build_range(self, req):
- self.reget_time = None
- self.append = 0
- reget_length = 0
- rt = None
- if have_range and self.opts.reget and type(self.filename) in types.StringTypes:
- # we have reget turned on and we're dumping to a file
- try:
- s = os.stat(self.filename)
- except OSError:
- pass
- else:
- self.reget_time = s[ST_MTIME]
- reget_length = s[ST_SIZE]
-
- # Set initial length when regetting
- self._amount_read = reget_length
-
- rt = reget_length, ''
- self.append = 1
-
- if self.opts.range:
- if not have_range:
- raise URLGrabError(10, _('Byte range requested but range '\
- 'support unavailable'))
- rt = self.opts.range
- if rt[0]: rt = (rt[0] + reget_length, rt[1])
-
- if rt:
- header = range_tuple_to_header(rt)
- if header: req.add_header('Range', header)
-
- def _make_request(self, req, opener):
- try:
- if have_socket_timeout and self.opts.timeout:
- old_to = socket.getdefaulttimeout()
- socket.setdefaulttimeout(self.opts.timeout)
- try:
- fo = opener.open(req)
- finally:
- socket.setdefaulttimeout(old_to)
- else:
- fo = opener.open(req)
- hdr = fo.info()
- except ValueError, e:
- raise URLGrabError(1, _('Bad URL: %s') % (e, ))
- except RangeError, e:
- raise URLGrabError(9, str(e))
- except urllib2.HTTPError, e:
- new_e = URLGrabError(14, str(e))
- new_e.code = e.code
- new_e.exception = e
- raise new_e
- except IOError, e:
- if hasattr(e, 'reason') and have_socket_timeout and \
- isinstance(e.reason, TimeoutError):
- raise URLGrabError(12, _('Timeout: %s') % (e, ))
- else:
- raise URLGrabError(4, _('IOError: %s') % (e, ))
- except OSError, e:
- raise URLGrabError(5, _('OSError: %s') % (e, ))
- except HTTPException, e:
- raise URLGrabError(7, _('HTTP Exception (%s): %s') % \
- (e.__class__.__name__, e))
- else:
- return (fo, hdr)
-
- def _do_grab(self):
- """dump the file to self.filename."""
- if self.append: new_fo = open(self.filename, 'ab')
- else: new_fo = open(self.filename, 'wb')
- try:
- # if we have a known range, only try to read that much.
- (low, high) = self.opts.range
- amount = high - low
- except TypeError, ValueError:
- amount = None
- bs = 1024*8
- size = 0
-
- try:
- if amount is not None: bs = min(bs, amount - size)
- block = self.read(bs)
- size = size + len(block)
- while block:
- new_fo.write(block)
- if amount is not None: bs = min(bs, amount - size)
- block = self.read(bs)
- size = size + len(block)
- finally:
- new_fo.close()
-
- try:
- modified_tuple = self.hdr.getdate_tz('last-modified')
- modified_stamp = rfc822.mktime_tz(modified_tuple)
- os.utime(self.filename, (modified_stamp, modified_stamp))
- except (TypeError,), e: pass
-
- return size
-
- def _fill_buffer(self, amt=None):
- """fill the buffer to contain at least 'amt' bytes by reading
- from the underlying file object. If amt is None, then it will
- read until it gets nothing more. It updates the progress meter
- and throttles after every self._rbufsize bytes."""
- # the _rbuf test is only in this first 'if' for speed. It's not
- # logically necessary
- if self._rbuf and not amt is None:
- L = len(self._rbuf)
- if amt > L:
- amt = amt - L
- else:
- return
-
- # if we've made it here, then we don't have enough in the buffer
- # and we need to read more.
-
- buf = [self._rbuf]
- bufsize = len(self._rbuf)
- while amt is None or amt:
- # first, delay if necessary for throttling reasons
- if self.opts.raw_throttle():
- diff = self._tsize/self.opts.raw_throttle() - \
- (time.time() - self._ttime)
- if diff > 0: time.sleep(diff)
- self._ttime = time.time()
-
- # now read some data, up to self._rbufsize
- if amt is None: readamount = self._rbufsize
- else: readamount = min(amt, self._rbufsize)
- try:
- new = self.fo.read(readamount)
- except socket.error, e:
- raise URLGrabError(4, _('Socket Error: %s') % (e, ))
- except TimeoutError, e:
- raise URLGrabError(12, _('Timeout: %s') % (e, ))
- except IOError, e:
- raise URLGrabError(4, _('IOError: %s') %(e,))
- newsize = len(new)
- if not newsize: break # no more to read
-
- if amt: amt = amt - newsize
- buf.append(new)
- bufsize = bufsize + newsize
- self._tsize = newsize
- self._amount_read = self._amount_read + newsize
- if self.opts.progress_obj:
- self.opts.progress_obj.update(self._amount_read)
-
- self._rbuf = string.join(buf, '')
- return
-
- def read(self, amt=None):
- self._fill_buffer(amt)
- if amt is None:
- s, self._rbuf = self._rbuf, ''
- else:
- s, self._rbuf = self._rbuf[:amt], self._rbuf[amt:]
- return s
-
- def readline(self, limit=-1):
- i = string.find(self._rbuf, '\n')
- while i < 0 and not (0 < limit <= len(self._rbuf)):
- L = len(self._rbuf)
- self._fill_buffer(L + self._rbufsize)
- if not len(self._rbuf) > L: break
- i = string.find(self._rbuf, '\n', L)
-
- if i < 0: i = len(self._rbuf)
- else: i = i+1
- if 0 <= limit < len(self._rbuf): i = limit
-
- s, self._rbuf = self._rbuf[:i], self._rbuf[i:]
- return s
-
- def close(self):
- if self.opts.progress_obj:
- self.opts.progress_obj.end(self._amount_read)
- self.fo.close()
- if self.opts.close_connection:
- try: self.fo.close_connection()
- except: pass
-
-_handler_cache = []
-def CachedOpenerDirector(*handlers):
- for (cached_handlers, opener) in _handler_cache:
- if cached_handlers == handlers:
- for handler in opener.handlers:
- handler.add_parent(opener)
- return opener
- opener = urllib2.build_opener(*handlers)
- _handler_cache.append( (handlers, opener) )
- return opener
-
-_proxy_cache = []
-def CachedProxyHandler(proxies):
- for (pdict, handler) in _proxy_cache:
- if pdict == proxies:
- if DEBUG: DEBUG.debug('re-using proxy settings: %s', proxies)
- break
- else:
- for k, v in proxies.items():
- utype, url = urllib.splittype(v)
- host, other = urllib.splithost(url)
- if (utype is None) or (host is None):
- raise URLGrabError(13, _('Bad proxy URL: %s') % v)
-
- if DEBUG: DEBUG.info('creating new proxy handler: %s', proxies)
- handler = urllib2.ProxyHandler(proxies)
- _proxy_cache.append( (proxies, handler) )
- return handler
-
-#####################################################################
-# DEPRECATED FUNCTIONS
-def set_throttle(new_throttle):
- """Deprecated. Use: default_grabber.throttle = new_throttle"""
- default_grabber.throttle = new_throttle
-
-def set_bandwidth(new_bandwidth):
- """Deprecated. Use: default_grabber.bandwidth = new_bandwidth"""
- default_grabber.bandwidth = new_bandwidth
-
-def set_progress_obj(new_progress_obj):
- """Deprecated. Use: default_grabber.progress_obj = new_progress_obj"""
- default_grabber.progress_obj = new_progress_obj
-
-def set_user_agent(new_user_agent):
- """Deprecated. Use: default_grabber.user_agent = new_user_agent"""
- default_grabber.user_agent = new_user_agent
-
-def retrygrab(url, filename=None, copy_local=0, close_connection=0,
- progress_obj=None, throttle=None, bandwidth=None,
- numtries=3, retrycodes=[-1,2,4,5,6,7], checkfunc=None):
- """Deprecated. Use: urlgrab() with the retry arg instead"""
- kwargs = {'copy_local' : copy_local,
- 'close_connection' : close_connection,
- 'progress_obj' : progress_obj,
- 'throttle' : throttle,
- 'bandwidth' : bandwidth,
- 'retry' : numtries,
- 'retrycodes' : retrycodes,
- 'checkfunc' : checkfunc
- }
- return urlgrab(url, filename, **kwargs)
-
-
-#####################################################################
-# TESTING
-def _main_test():
- import sys
- try: url, filename = sys.argv[1:3]
- except ValueError:
- print 'usage:', sys.argv[0], \
- '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
- sys.exit()
-
- kwargs = {}
- for a in sys.argv[3:]:
- k, v = string.split(a, '=', 1)
- kwargs[k] = int(v)
-
- set_throttle(1.0)
- set_bandwidth(32 * 1024)
- print "throttle: %s, throttle bandwidth: %s B/s" % (default_grabber.throttle,
- default_grabber.bandwidth)
-
- try: from progress import text_progress_meter
- except ImportError, e: pass
- else: kwargs['progress_obj'] = text_progress_meter()
-
- try: name = apply(urlgrab, (url, filename), kwargs)
- except URLGrabError, e: print e
- else: print 'LOCAL FILE:', name
-
-
-def _retry_test():
- import sys
- try: url, filename = sys.argv[1:3]
- except ValueError:
- print 'usage:', sys.argv[0], \
- '<url> <filename> [copy_local=0|1] [close_connection=0|1]'
- sys.exit()
-
- kwargs = {}
- for a in sys.argv[3:]:
- k, v = string.split(a, '=', 1)
- kwargs[k] = int(v)
-
- try: from progress import text_progress_meter
- except ImportError, e: pass
- else: kwargs['progress_obj'] = text_progress_meter()
-
- def cfunc(filename, hello, there='foo'):
- print hello, there
- import random
- rnum = random.random()
- if rnum < .5:
- print 'forcing retry'
- raise URLGrabError(-1, 'forcing retry')
- if rnum < .75:
- print 'forcing failure'
- raise URLGrabError(-2, 'forcing immediate failure')
- print 'success'
- return
-
- close_all()
- kwargs['checkfunc'] = (cfunc, ('hello',), {'there':'there'})
- try: name = apply(retrygrab, (url, filename), kwargs)
- except URLGrabError, e: print e
- else: print 'LOCAL FILE:', name
-
-def _file_object_test(filename=None):
- import random, cStringIO, sys
- if filename is None:
- filename = __file__
- print 'using file "%s" for comparisons' % filename
- fo = open(filename)
- s_input = fo.read()
- fo.close()
-
- for testfunc in [_test_file_object_smallread,
- _test_file_object_readall,
- _test_file_object_readline,
- _test_file_object_readlines]:
- fo_input = cStringIO.StringIO(s_input)
- fo_output = cStringIO.StringIO()
- wrapper = URLGrabberFileObject(fo_input, None, 0)
- print 'testing %-30s ' % testfunc.__name__,
- testfunc(wrapper, fo_output)
- s_output = fo_output.getvalue()
- if s_output == s_input: print 'passed'
- else: print 'FAILED'
-
-def _test_file_object_smallread(wrapper, fo_output):
- while 1:
- s = wrapper.read(23)
- fo_output.write(s)
- if not s: return
-
-def _test_file_object_readall(wrapper, fo_output):
- s = wrapper.read()
- fo_output.write(s)
-
-def _test_file_object_readline(wrapper, fo_output):
- while 1:
- s = wrapper.readline()
- fo_output.write(s)
- if not s: return
-
-def _test_file_object_readlines(wrapper, fo_output):
- li = wrapper.readlines()
- fo_output.write(string.join(li, ''))
-
-if __name__ == '__main__':
- _main_test()
- _retry_test()
- _file_object_test('test')
diff --git a/tails_installer/urlgrabber/keepalive.py b/tails_installer/urlgrabber/keepalive.py
deleted file mode 100644
index df796bd..0000000
--- a/tails_installer/urlgrabber/keepalive.py
+++ /dev/null
@@ -1,603 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-"""An HTTP handler for urllib2 that supports HTTP 1.1 and keepalive.
-
->>> import urllib2
->>> from keepalive import HTTPHandler
->>> keepalive_handler = HTTPHandler()
->>> opener = urllib2.build_opener(keepalive_handler)
->>> urllib2.install_opener(opener)
->>>
->>> fo = urllib2.urlopen('http://www.python.org')
-
-If a connection to a given host is requested, and all of the existing
-connections are still in use, another connection will be opened. If
-the handler tries to use an existing connection but it fails in some
-way, it will be closed and removed from the pool.
-
-To remove the handler, simply re-run build_opener with no arguments, and
-install that opener.
-
-You can explicitly close connections by using the close_connection()
-method of the returned file-like object (described below) or you can
-use the handler methods:
-
- close_connection(host)
- close_all()
- open_connections()
-
-NOTE: using the close_connection and close_all methods of the handler
-should be done with care when using multiple threads.
- * there is nothing that prevents another thread from creating new
- connections immediately after connections are closed
- * no checks are done to prevent in-use connections from being closed
-
->>> keepalive_handler.close_all()
-
-EXTRA ATTRIBUTES AND METHODS
-
- Upon a status of 200, the object returned has a few additional
- attributes and methods, which should not be used if you want to
- remain consistent with the normal urllib2-returned objects:
-
- close_connection() - close the connection to the host
- readlines() - you know, readlines()
- status - the return status (ie 404)
- reason - english translation of status (ie 'File not found')
-
- If you want the best of both worlds, use this inside an
- AttributeError-catching try:
-
- >>> try: status = fo.status
- >>> except AttributeError: status = None
-
- Unfortunately, these are ONLY there if status == 200, so it's not
- easy to distinguish between non-200 responses. The reason is that
- urllib2 tries to do clever things with error codes 301, 302, 401,
- and 407, and it wraps the object upon return.
-
- For python versions earlier than 2.4, you can avoid this fancy error
- handling by setting the module-level global HANDLE_ERRORS to zero.
- You see, prior to 2.4, it's the HTTP Handler's job to determine what
- to handle specially, and what to just pass up. HANDLE_ERRORS == 0
- means "pass everything up". In python 2.4, however, this job no
- longer belongs to the HTTP Handler and is now done by a NEW handler,
- HTTPErrorProcessor. Here's the bottom line:
-
- python version < 2.4
- HANDLE_ERRORS == 1 (default) pass up 200, treat the rest as
- errors
- HANDLE_ERRORS == 0 pass everything up, error processing is
- left to the calling code
- python version >= 2.4
- HANDLE_ERRORS == 1 pass up 200, treat the rest as errors
- HANDLE_ERRORS == 0 (default) pass everything up, let the
- other handlers (specifically,
- HTTPErrorProcessor) decide what to do
-
- In practice, setting the variable either way makes little difference
- in python 2.4, so for the most consistent behavior across versions,
- you probably just want to use the defaults, which will give you
- exceptions on errors.
-
-"""
-
-# $Id: keepalive.py,v 1.15 2006/07/20 20:15:58 mstenner Exp $
-
-import urllib2
-import httplib
-import socket
-import thread
-
-DEBUG = None
-
-import sys
-if sys.version_info < (2, 4): HANDLE_ERRORS = 1
-else: HANDLE_ERRORS = 0
-
-class ConnectionManager:
- """
- The connection manager must be able to:
- * keep track of all existing
- """
- def __init__(self):
- self._lock = thread.allocate_lock()
- self._hostmap = {} # map hosts to a list of connections
- self._connmap = {} # map connections to host
- self._readymap = {} # map connection to ready state
-
- def add(self, host, connection, ready):
- self._lock.acquire()
- try:
- if not self._hostmap.has_key(host): self._hostmap[host] = []
- self._hostmap[host].append(connection)
- self._connmap[connection] = host
- self._readymap[connection] = ready
- finally:
- self._lock.release()
-
- def remove(self, connection):
- self._lock.acquire()
- try:
- try:
- host = self._connmap[connection]
- except KeyError:
- pass
- else:
- del self._connmap[connection]
- del self._readymap[connection]
- self._hostmap[host].remove(connection)
- if not self._hostmap[host]: del self._hostmap[host]
- finally:
- self._lock.release()
-
- def set_ready(self, connection, ready):
- try: self._readymap[connection] = ready
- except KeyError: pass
-
- def get_ready_conn(self, host):
- conn = None
- self._lock.acquire()
- try:
- if self._hostmap.has_key(host):
- for c in self._hostmap[host]:
- if self._readymap[c]:
- self._readymap[c] = 0
- conn = c
- break
- finally:
- self._lock.release()
- return conn
-
- def get_all(self, host=None):
- if host:
- return list(self._hostmap.get(host, []))
- else:
- return dict(self._hostmap)
-
-class KeepAliveHandler:
- def __init__(self):
- self._cm = ConnectionManager()
-
- #### Connection Management
- def open_connections(self):
- """return a list of connected hosts and the number of connections
- to each. [('foo.com:80', 2), ('bar.org', 1)]"""
- return [(host, len(li)) for (host, li) in self._cm.get_all().items()]
-
- def close_connection(self, host):
- """close connection(s) to <host>
- host is the host:port spec, as in 'www.cnn.com:8080' as passed in.
- no error occurs if there is no connection to that host."""
- for h in self._cm.get_all(host):
- self._cm.remove(h)
- h.close()
-
- def close_all(self):
- """close all open connections"""
- for host, conns in self._cm.get_all().items():
- for h in conns:
- self._cm.remove(h)
- h.close()
-
- def _request_closed(self, request, host, connection):
- """tells us that this request is now closed and the the
- connection is ready for another request"""
- self._cm.set_ready(connection, 1)
-
- def _remove_connection(self, host, connection, close=0):
- if close: connection.close()
- self._cm.remove(connection)
-
- #### Transaction Execution
- def do_open(self, http_class, req):
- host = req.get_host()
- if not host:
- raise urllib2.URLError('no host given')
-
- try:
- h = self._cm.get_ready_conn(host)
- while h:
- r = self._reuse_connection(h, req, host)
-
- # if this response is non-None, then it worked and we're
- # done. Break out, skipping the else block.
- if r: break
-
- # connection is bad - possibly closed by server
- # discard it and ask for the next free connection
- h.close()
- self._cm.remove(h)
- h = self._cm.get_ready_conn(host)
- else:
- # no (working) free connections were found. Create a new one.
- h = http_class(host)
- if DEBUG: DEBUG.info("creating new connection to %s (%d)",
- host, id(h))
- self._cm.add(host, h, 0)
- self._start_transaction(h, req)
- r = h.getresponse()
- except (socket.error, httplib.HTTPException), err:
- raise urllib2.URLError(err)
-
- # if not a persistent connection, don't try to reuse it
- if r.will_close: self._cm.remove(h)
-
- if DEBUG: DEBUG.info("STATUS: %s, %s", r.status, r.reason)
- r._handler = self
- r._host = host
- r._url = req.get_full_url()
- r._connection = h
- r.code = r.status
- r.headers = r.msg
- r.msg = r.reason
-
- if r.status == 200 or not HANDLE_ERRORS:
- return r
- else:
- return self.parent.error('http', req, r,
- r.status, r.msg, r.headers)
-
- def _reuse_connection(self, h, req, host):
- """start the transaction with a re-used connection
- return a response object (r) upon success or None on failure.
- This DOES not close or remove bad connections in cases where
- it returns. However, if an unexpected exception occurs, it
- will close and remove the connection before re-raising.
- """
- try:
- self._start_transaction(h, req)
- r = h.getresponse()
- # note: just because we got something back doesn't mean it
- # worked. We'll check the version below, too.
- except (socket.error, httplib.HTTPException):
- r = None
- except:
- # adding this block just in case we've missed
- # something we will still raise the exception, but
- # lets try and close the connection and remove it
- # first. We previously got into a nasty loop
- # where an exception was uncaught, and so the
- # connection stayed open. On the next try, the
- # same exception was raised, etc. The tradeoff is
- # that it's now possible this call will raise
- # a DIFFERENT exception
- if DEBUG: DEBUG.error("unexpected exception - closing " + \
- "connection to %s (%d)", host, id(h))
- self._cm.remove(h)
- h.close()
- raise
-
- if r is None or r.version == 9:
- # httplib falls back to assuming HTTP 0.9 if it gets a
- # bad header back. This is most likely to happen if
- # the socket has been closed by the server since we
- # last used the connection.
- if DEBUG: DEBUG.info("failed to re-use connection to %s (%d)",
- host, id(h))
- r = None
- else:
- if DEBUG: DEBUG.info("re-using connection to %s (%d)", host, id(h))
-
- return r
-
- def _start_transaction(self, h, req):
- try:
- if req.has_data():
- data = req.get_data()
- h.putrequest('POST', req.get_selector())
- if not req.headers.has_key('Content-type'):
- h.putheader('Content-type',
- 'application/x-www-form-urlencoded')
- if not req.headers.has_key('Content-length'):
- h.putheader('Content-length', '%d' % len(data))
- else:
- h.putrequest('GET', req.get_selector())
- except (socket.error, httplib.HTTPException), err:
- raise urllib2.URLError(err)
-
- for args in self.parent.addheaders:
- h.putheader(*args)
- for k, v in req.headers.items():
- h.putheader(k, v)
- h.endheaders()
- if req.has_data():
- h.send(data)
-
-class HTTPHandler(KeepAliveHandler, urllib2.HTTPHandler):
- def __init__(self):
- KeepAliveHandler.__init__(self)
-
- def http_open(self, req):
- return self.do_open(HTTPConnection, req)
-
-class HTTPSHandler(KeepAliveHandler, urllib2.HTTPSHandler):
- def __init__(self):
- KeepAliveHandler.__init__(self)
-
- def https_open(self, req):
- return self.do_open(HTTPSConnection, req)
-
-class HTTPResponse(httplib.HTTPResponse):
- # we need to subclass HTTPResponse in order to
- # 1) add readline() and readlines() methods
- # 2) add close_connection() methods
- # 3) add info() and geturl() methods
-
- # in order to add readline(), read must be modified to deal with a
- # buffer. example: readline must read a buffer and then spit back
- # one line at a time. The only real alternative is to read one
- # BYTE at a time (ick). Once something has been read, it can't be
- # put back (ok, maybe it can, but that's even uglier than this),
- # so if you THEN do a normal read, you must first take stuff from
- # the buffer.
-
- # the read method wraps the original to accomodate buffering,
- # although read() never adds to the buffer.
- # Both readline and readlines have been stolen with almost no
- # modification from socket.py
-
-
- def __init__(self, sock, debuglevel=0, strict=0, method=None):
- if method: # the httplib in python 2.3 uses the method arg
- httplib.HTTPResponse.__init__(self, sock, debuglevel, method)
- else: # 2.2 doesn't
- httplib.HTTPResponse.__init__(self, sock, debuglevel)
- self.fileno = sock.fileno
- self.code = None
- self._rbuf = ''
- self._rbufsize = 8096
- self._handler = None # inserted by the handler later
- self._host = None # (same)
- self._url = None # (same)
- self._connection = None # (same)
-
- _raw_read = httplib.HTTPResponse.read
-
- def close(self):
- if self.fp:
- self.fp.close()
- self.fp = None
- if self._handler:
- self._handler._request_closed(self, self._host,
- self._connection)
-
- def close_connection(self):
- self._handler._remove_connection(self._host, self._connection, close=1)
- self.close()
-
- def info(self):
- return self.headers
-
- def geturl(self):
- return self._url
-
- def read(self, amt=None):
- # the _rbuf test is only in this first if for speed. It's not
- # logically necessary
- if self._rbuf and not amt is None:
- L = len(self._rbuf)
- if amt > L:
- amt -= L
- else:
- s = self._rbuf[:amt]
- self._rbuf = self._rbuf[amt:]
- return s
-
- s = self._rbuf + self._raw_read(amt)
- self._rbuf = ''
- return s
-
- def readline(self, limit=-1):
- data = ""
- i = self._rbuf.find('\n')
- while i < 0 and not (0 < limit <= len(self._rbuf)):
- new = self._raw_read(self._rbufsize)
- if not new: break
- i = new.find('\n')
- if i >= 0: i = i + len(self._rbuf)
- self._rbuf = self._rbuf + new
- if i < 0: i = len(self._rbuf)
- else: i = i+1
- if 0 <= limit < len(self._rbuf): i = limit
- data, self._rbuf = self._rbuf[:i], self._rbuf[i:]
- return data
-
- def readlines(self, sizehint = 0):
- total = 0
- list = []
- while 1:
- line = self.readline()
- if not line: break
- list.append(line)
- total += len(line)
- if sizehint and total >= sizehint:
- break
- return list
-
-
-class HTTPConnection(httplib.HTTPConnection):
- # use the modified response class
- response_class = HTTPResponse
-
-class HTTPSConnection(httplib.HTTPSConnection):
- response_class = HTTPResponse
-
-#########################################################################
-##### TEST FUNCTIONS
-#########################################################################
-
-def error_handler(url):
- global HANDLE_ERRORS
- orig = HANDLE_ERRORS
- keepalive_handler = HTTPHandler()
- opener = urllib2.build_opener(keepalive_handler)
- urllib2.install_opener(opener)
- pos = {0: 'off', 1: 'on'}
- for i in (0, 1):
- print " fancy error handling %s (HANDLE_ERRORS = %i)" % (pos[i], i)
- HANDLE_ERRORS = i
- try:
- fo = urllib2.urlopen(url)
- foo = fo.read()
- fo.close()
- try: status, reason = fo.status, fo.reason
- except AttributeError: status, reason = None, None
- except IOError, e:
- print " EXCEPTION: %s" % e
- raise
- else:
- print " status = %s, reason = %s" % (status, reason)
- HANDLE_ERRORS = orig
- hosts = keepalive_handler.open_connections()
- print "open connections:", hosts
- keepalive_handler.close_all()
-
-def continuity(url):
- import md5
- format = '%25s: %s'
-
- # first fetch the file with the normal http handler
- opener = urllib2.build_opener()
- urllib2.install_opener(opener)
- fo = urllib2.urlopen(url)
- foo = fo.read()
- fo.close()
- m = md5.new(foo)
- print format % ('normal urllib', m.hexdigest())
-
- # now install the keepalive handler and try again
- opener = urllib2.build_opener(HTTPHandler())
- urllib2.install_opener(opener)
-
- fo = urllib2.urlopen(url)
- foo = fo.read()
- fo.close()
- m = md5.new(foo)
- print format % ('keepalive read', m.hexdigest())
-
- fo = urllib2.urlopen(url)
- foo = ''
- while 1:
- f = fo.readline()
- if f: foo = foo + f
- else: break
- fo.close()
- m = md5.new(foo)
- print format % ('keepalive readline', m.hexdigest())
-
-def comp(N, url):
- print ' making %i connections to:\n %s' % (N, url)
-
- sys.stdout.write(' first using the normal urllib handlers')
- # first use normal opener
- opener = urllib2.build_opener()
- urllib2.install_opener(opener)
- t1 = fetch(N, url)
- print ' TIME: %.3f s' % t1
-
- sys.stdout.write(' now using the keepalive handler ')
- # now install the keepalive handler and try again
- opener = urllib2.build_opener(HTTPHandler())
- urllib2.install_opener(opener)
- t2 = fetch(N, url)
- print ' TIME: %.3f s' % t2
- print ' improvement factor: %.2f' % (t1/t2, )
-
-def fetch(N, url, delay=0):
- import time
- lens = []
- starttime = time.time()
- for i in range(N):
- if delay and i > 0: time.sleep(delay)
- fo = urllib2.urlopen(url)
- foo = fo.read()
- fo.close()
- lens.append(len(foo))
- diff = time.time() - starttime
-
- j = 0
- for i in lens[1:]:
- j = j + 1
- if not i == lens[0]:
- print "WARNING: inconsistent length on read %i: %i" % (j, i)
-
- return diff
-
-def test_timeout(url):
- global DEBUG
- dbbackup = DEBUG
- class FakeLogger:
- def debug(self, msg, *args): print msg % args
- info = warning = error = debug
- DEBUG = FakeLogger()
- print " fetching the file to establish a connection"
- fo = urllib2.urlopen(url)
- data1 = fo.read()
- fo.close()
-
- i = 20
- print " waiting %i seconds for the server to close the connection" % i
- while i > 0:
- sys.stdout.write('\r %2i' % i)
- sys.stdout.flush()
- time.sleep(1)
- i -= 1
- sys.stderr.write('\r')
-
- print " fetching the file a second time"
- fo = urllib2.urlopen(url)
- data2 = fo.read()
- fo.close()
-
- if data1 == data2:
- print ' data are identical'
- else:
- print ' ERROR: DATA DIFFER'
-
- DEBUG = dbbackup
-
-
-def test(url, N=10):
- print "checking error hander (do this on a non-200)"
- try: error_handler(url)
- except IOError, e:
- print "exiting - exception will prevent further tests"
- sys.exit()
- print
- print "performing continuity test (making sure stuff isn't corrupted)"
- continuity(url)
- print
- print "performing speed comparison"
- comp(N, url)
- print
- print "performing dropped-connection check"
- test_timeout(url)
-
-if __name__ == '__main__':
- import time
- import sys
- try:
- N = int(sys.argv[1])
- url = sys.argv[2]
- except:
- print "%s <integer> <url>" % sys.argv[0]
- else:
- test(url, N)
diff --git a/tails_installer/urlgrabber/mirror.py b/tails_installer/urlgrabber/mirror.py
deleted file mode 100644
index 3a39986..0000000
--- a/tails_installer/urlgrabber/mirror.py
+++ /dev/null
@@ -1,459 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-"""Module for downloading files from a pool of mirrors
-
-DESCRIPTION
-
- This module provides support for downloading files from a pool of
- mirrors with configurable failover policies. To a large extent, the
- failover policy is chosen by using different classes derived from
- the main class, MirrorGroup.
-
- Instances of MirrorGroup (and cousins) act very much like URLGrabber
- instances in that they have urlread, urlgrab, and urlopen methods.
- They can therefore, be used in very similar ways.
-
- from urlgrabber.grabber import URLGrabber
- from urlgrabber.mirror import MirrorGroup
- gr = URLGrabber()
- mg = MirrorGroup(gr, ['http://foo.com/some/directory/',
- 'http://bar.org/maybe/somewhere/else/',
- 'ftp://baz.net/some/other/place/entirely/']
- mg.urlgrab('relative/path.zip')
-
- The assumption is that all mirrors are identical AFTER the base urls
- specified, so that any mirror can be used to fetch any file.
-
-FAILOVER
-
- The failover mechanism is designed to be customized by subclassing
- from MirrorGroup to change the details of the behavior. In general,
- the classes maintain a master mirror list and a "current mirror"
- index. When a download is initiated, a copy of this list and index
- is created for that download only. The specific failover policy
- depends on the class used, and so is documented in the class
- documentation. Note that ANY behavior of the class can be
- overridden, so any failover policy at all is possible (although
- you may need to change the interface in extreme cases).
-
-CUSTOMIZATION
-
- Most customization of a MirrorGroup object is done at instantiation
- time (or via subclassing). There are four major types of
- customization:
-
- 1) Pass in a custom urlgrabber - The passed in urlgrabber will be
- used (by default... see #2) for the grabs, so options to it
- apply for the url-fetching
-
- 2) Custom mirror list - Mirror lists can simply be a list of
- stings mirrors (as shown in the example above) but each can
- also be a dict, allowing for more options. For example, the
- first mirror in the list above could also have been:
-
- {'mirror': 'http://foo.com/some/directory/',
- 'grabber': <a custom grabber to be used for this mirror>,
- 'kwargs': { <a dict of arguments passed to the grabber> }}
-
- All mirrors are converted to this format internally. If
- 'grabber' is omitted, the default grabber will be used. If
- kwargs are omitted, then (duh) they will not be used.
-
- 3) Pass keyword arguments when instantiating the mirror group.
- See, for example, the failure_callback argument.
-
- 4) Finally, any kwargs passed in for the specific file (to the
- urlgrab method, for example) will be folded in. The options
- passed into the grabber's urlXXX methods will override any
- options specified in a custom mirror dict.
-
-"""
-
-# $Id: mirror.py,v 1.14 2006/02/22 18:26:46 mstenner Exp $
-
-import random
-import thread # needed for locking to make this threadsafe
-import types
-
-from grabber import URLGrabError, CallbackObject, DEBUG
-
-try:
- from i18n import _
-except ImportError, msg:
- def _(st): return st
-
-class GrabRequest:
- """This is a dummy class used to hold information about the specific
- request. For example, a single file. By maintaining this information
- separately, we can accomplish two things:
-
- 1) make it a little easier to be threadsafe
- 2) have request-specific parameters
- """
- pass
-
-class MirrorGroup:
- """Base Mirror class
-
- Instances of this class are built with a grabber object and a list
- of mirrors. Then all calls to urlXXX should be passed relative urls.
- The requested file will be searched for on the first mirror. If the
- grabber raises an exception (possibly after some retries) then that
- mirror will be removed from the list, and the next will be attempted.
- If all mirrors are exhausted, then an exception will be raised.
-
- MirrorGroup has the following failover policy:
-
- * downloads begin with the first mirror
-
- * by default (see default_action below) a failure (after retries)
- causes it to increment the local AND master indices. Also,
- the current mirror is removed from the local list (but NOT the
- master list - the mirror can potentially be used for other
- files)
-
- * if the local list is ever exhausted, a URLGrabError will be
- raised (errno=256, no more mirrors)
-
- OPTIONS
-
- In addition to the required arguments "grabber" and "mirrors",
- MirrorGroup also takes the following optional arguments:
-
- default_action
-
- A dict that describes the actions to be taken upon failure
- (after retries). default_action can contain any of the
- following keys (shown here with their default values):
-
- default_action = {'increment': 1,
- 'increment_master': 1,
- 'remove': 1,
- 'remove_master': 0,
- 'fail': 0}
-
- In this context, 'increment' means "use the next mirror" and
- 'remove' means "never use this mirror again". The two
- 'master' values refer to the instance-level mirror list (used
- for all files), whereas the non-master values refer to the
- current download only.
-
- The 'fail' option will cause immediate failure by re-raising
- the exception and no further attempts to get the current
- download.
-
- This dict can be set at instantiation time,
- mg = MirrorGroup(grabber, mirrors, default_action={'fail':1})
- at method-execution time (only applies to current fetch),
- filename = mg.urlgrab(url, default_action={'increment': 0})
- or by returning an action dict from the failure_callback
- return {'fail':0}
- in increasing precedence.
-
- If all three of these were done, the net result would be:
- {'increment': 0, # set in method
- 'increment_master': 1, # class default
- 'remove': 1, # class default
- 'remove_master': 0, # class default
- 'fail': 0} # set at instantiation, reset
- # from callback
-
- failure_callback
-
- this is a callback that will be called when a mirror "fails",
- meaning the grabber raises some URLGrabError. If this is a
- tuple, it is interpreted to be of the form (cb, args, kwargs)
- where cb is the actual callable object (function, method,
- etc). Otherwise, it is assumed to be the callable object
- itself. The callback will be passed a grabber.CallbackObject
- instance along with args and kwargs (if present). The following
- attributes are defined withing the instance:
-
- obj.exception = < exception that was raised >
- obj.mirror = < the mirror that was tried >
- obj.relative_url = < url relative to the mirror >
- obj.url = < full url that failed >
- # .url is just the combination of .mirror
- # and .relative_url
-
- The failure callback can return an action dict, as described
- above.
-
- Like default_action, the failure_callback can be set at
- instantiation time or when the urlXXX method is called. In
- the latter case, it applies only for that fetch.
-
- The callback can re-raise the exception quite easily. For
- example, this is a perfectly adequate callback function:
-
- def callback(obj): raise obj.exception
-
- WARNING: do not save the exception object (or the
- CallbackObject instance). As they contain stack frame
- references, they can lead to circular references.
-
- Notes:
- * The behavior can be customized by deriving and overriding the
- 'CONFIGURATION METHODS'
- * The 'grabber' instance is kept as a reference, not copied.
- Therefore, the grabber instance can be modified externally
- and changes will take effect immediately.
- """
-
- # notes on thread-safety:
-
- # A GrabRequest should never be shared by multiple threads because
- # it's never saved inside the MG object and never returned outside it.
- # therefore, it should be safe to access/modify grabrequest data
- # without a lock. However, accessing the mirrors and _next attributes
- # of the MG itself must be done when locked to prevent (for example)
- # removal of the wrong mirror.
-
- ##############################################################
- # CONFIGURATION METHODS - intended to be overridden to
- # customize behavior
- def __init__(self, grabber, mirrors, **kwargs):
- """Initialize the MirrorGroup object.
-
- REQUIRED ARGUMENTS
-
- grabber - URLGrabber instance
- mirrors - a list of mirrors
-
- OPTIONAL ARGUMENTS
-
- failure_callback - callback to be used when a mirror fails
- default_action - dict of failure actions
-
- See the module-level and class level documentation for more
- details.
- """
-
- # OVERRIDE IDEAS:
- # shuffle the list to randomize order
- self.grabber = grabber
- self.mirrors = self._parse_mirrors(mirrors)
- self._next = 0
- self._lock = thread.allocate_lock()
- self.default_action = None
- self._process_kwargs(kwargs)
-
- # if these values are found in **kwargs passed to one of the urlXXX
- # methods, they will be stripped before getting passed on to the
- # grabber
- options = ['default_action', 'failure_callback']
-
- def _process_kwargs(self, kwargs):
- self.failure_callback = kwargs.get('failure_callback')
- self.default_action = kwargs.get('default_action')
-
- def _parse_mirrors(self, mirrors):
- parsed_mirrors = []
- for m in mirrors:
- if type(m) in types.StringTypes: m = {'mirror': m}
- parsed_mirrors.append(m)
- return parsed_mirrors
-
- def _load_gr(self, gr):
- # OVERRIDE IDEAS:
- # shuffle gr list
- self._lock.acquire()
- gr.mirrors = list(self.mirrors)
- gr._next = self._next
- self._lock.release()
-
- def _get_mirror(self, gr):
- # OVERRIDE IDEAS:
- # return a random mirror so that multiple mirrors get used
- # even without failures.
- if not gr.mirrors:
- raise URLGrabError(256, _('No more mirrors to try.'))
- return gr.mirrors[gr._next]
-
- def _failure(self, gr, cb_obj):
- # OVERRIDE IDEAS:
- # inspect the error - remove=1 for 404, remove=2 for connection
- # refused, etc. (this can also be done via
- # the callback)
- cb = gr.kw.get('failure_callback') or self.failure_callback
- if cb:
- if type(cb) == type( () ):
- cb, args, kwargs = cb
- else:
- args, kwargs = (), {}
- action = cb(cb_obj, *args, **kwargs) or {}
- else:
- action = {}
- # XXXX - decide - there are two ways to do this
- # the first is action-overriding as a whole - use the entire action
- # or fall back on module level defaults
- #action = action or gr.kw.get('default_action') or self.default_action
- # the other is to fall through for each element in the action dict
- a = dict(self.default_action or {})
- a.update(gr.kw.get('default_action', {}))
- a.update(action)
- action = a
- self.increment_mirror(gr, action)
- if action and action.get('fail', 0): raise
-
- def increment_mirror(self, gr, action={}):
- """Tell the mirror object increment the mirror index
-
- This increments the mirror index, which amounts to telling the
- mirror object to use a different mirror (for this and future
- downloads).
-
- This is a SEMI-public method. It will be called internally,
- and you may never need to call it. However, it is provided
- (and is made public) so that the calling program can increment
- the mirror choice for methods like urlopen. For example, with
- urlopen, there's no good way for the mirror group to know that
- an error occurs mid-download (it's already returned and given
- you the file object).
-
- remove --- can have several values
- 0 do not remove the mirror from the list
- 1 remove the mirror for this download only
- 2 remove the mirror permanently
-
- beware of remove=0 as it can lead to infinite loops
- """
- badmirror = gr.mirrors[gr._next]
-
- self._lock.acquire()
- try:
- ind = self.mirrors.index(badmirror)
- except ValueError:
- pass
- else:
- if action.get('remove_master', 0):
- del self.mirrors[ind]
- elif self._next == ind and action.get('increment_master', 1):
- self._next += 1
- if self._next >= len(self.mirrors): self._next = 0
- self._lock.release()
-
- if action.get('remove', 1):
- del gr.mirrors[gr._next]
- elif action.get('increment', 1):
- gr._next += 1
- if gr._next >= len(gr.mirrors): gr._next = 0
-
- if DEBUG:
- grm = [m['mirror'] for m in gr.mirrors]
- DEBUG.info('GR mirrors: [%s] %i', ' '.join(grm), gr._next)
- selfm = [m['mirror'] for m in self.mirrors]
- DEBUG.info('MAIN mirrors: [%s] %i', ' '.join(selfm), self._next)
-
- #####################################################################
- # NON-CONFIGURATION METHODS
- # these methods are designed to be largely workhorse methods that
- # are not intended to be overridden. That doesn't mean you can't;
- # if you want to, feel free, but most things can be done by
- # by overriding the configuration methods :)
-
- def _join_url(self, base_url, rel_url):
- if base_url.endswith('/') or rel_url.startswith('/'):
- return base_url + rel_url
- else:
- return base_url + '/' + rel_url
-
- def _mirror_try(self, func, url, kw):
- gr = GrabRequest()
- gr.func = func
- gr.url = url
- gr.kw = dict(kw)
- self._load_gr(gr)
-
- for k in self.options:
- try: del kw[k]
- except KeyError: pass
-
- while 1:
- mirrorchoice = self._get_mirror(gr)
- fullurl = self._join_url(mirrorchoice['mirror'], gr.url)
- kwargs = dict(mirrorchoice.get('kwargs', {}))
- kwargs.update(kw)
- grabber = mirrorchoice.get('grabber') or self.grabber
- func_ref = getattr(grabber, func)
- if DEBUG: DEBUG.info('MIRROR: trying %s -> %s', url, fullurl)
- try:
- return func_ref( *(fullurl,), **kwargs )
- except URLGrabError, e:
- if DEBUG: DEBUG.info('MIRROR: failed')
- obj = CallbackObject()
- obj.exception = e
- obj.mirror = mirrorchoice['mirror']
- obj.relative_url = gr.url
- obj.url = fullurl
- self._failure(gr, obj)
-
- def urlgrab(self, url, filename=None, **kwargs):
- kw = dict(kwargs)
- kw['filename'] = filename
- func = 'urlgrab'
- return self._mirror_try(func, url, kw)
-
- def urlopen(self, url, **kwargs):
- kw = dict(kwargs)
- func = 'urlopen'
- return self._mirror_try(func, url, kw)
-
- def urlread(self, url, limit=None, **kwargs):
- kw = dict(kwargs)
- kw['limit'] = limit
- func = 'urlread'
- return self._mirror_try(func, url, kw)
-
-
-class MGRandomStart(MirrorGroup):
- """A mirror group that starts at a random mirror in the list.
-
- This behavior of this class is identical to MirrorGroup, except that
- it starts at a random location in the mirror list.
- """
-
- def __init__(self, grabber, mirrors, **kwargs):
- """Initialize the object
-
- The arguments for intialization are the same as for MirrorGroup
- """
- MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
- self._next = random.randrange(len(mirrors))
-
-class MGRandomOrder(MirrorGroup):
- """A mirror group that uses mirrors in a random order.
-
- This behavior of this class is identical to MirrorGroup, except that
- it uses the mirrors in a random order. Note that the order is set at
- initialization time and fixed thereafter. That is, it does not pick a
- random mirror after each failure.
- """
-
- def __init__(self, grabber, mirrors, **kwargs):
- """Initialize the object
-
- The arguments for intialization are the same as for MirrorGroup
- """
- MirrorGroup.__init__(self, grabber, mirrors, **kwargs)
- random.shuffle(self.mirrors)
-
-if __name__ == '__main__':
- pass
diff --git a/tails_installer/urlgrabber/progress.py b/tails_installer/urlgrabber/progress.py
deleted file mode 100644
index ed8130c..0000000
--- a/tails_installer/urlgrabber/progress.py
+++ /dev/null
@@ -1,609 +0,0 @@
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the
-# Free Software Foundation, Inc.,
-# 59 Temple Place, Suite 330,
-# Boston, MA 02111-1307 USA
-
-# This file is part of urlgrabber, a high-level cross-protocol url-grabber
-# Copyright 2002-2004 Michael D. Stenner, Ryan Tomayko
-
-# $Id: progress.py,v 1.7 2005/08/19 21:59:07 mstenner Exp $
-
-import sys
-import time
-import math
-import thread
-import types
-
-class BaseMeter:
- def __init__(self):
- self.update_period = 0.3 # seconds
-
- self.filename = None
- self.url = None
- self.basename = None
- self.text = None
- self.size = None
- self.start_time = None
- self.last_amount_read = 0
- self.last_update_time = None
- self.re = RateEstimator()
-
- def start(self, filename=None, url=None, basename=None,
- size=None, now=None, text=None):
- self.filename = filename
- self.url = url
- self.basename = basename
- self.text = text
-
- #size = None ######### TESTING
- self.size = size
- if not size is None: self.fsize = format_number(size) + 'B'
-
- if now is None: now = time.time()
- self.start_time = now
- self.re.start(size, now)
- self.last_amount_read = 0
- self.last_update_time = now
- self._do_start(now)
-
- def _do_start(self, now=None):
- pass
-
- def update(self, amount_read, now=None):
- # for a real gui, you probably want to override and put a call
- # to your mainloop iteration function here
- if now is None: now = time.time()
- if (now >= self.last_update_time + self.update_period) or \
- not self.last_update_time:
- self.re.update(amount_read, now)
- self.last_amount_read = amount_read
- self.last_update_time = now
- self._do_update(amount_read, now)
-
- def _do_update(self, amount_read, now=None):
- pass
-
- def end(self, amount_read, now=None):
- if now is None: now = time.time()
- self.re.update(amount_read, now)
- self.last_amount_read = amount_read
- self.last_update_time = now
- self._do_end(amount_read, now)
-
- def _do_end(self, amount_read, now=None):
- pass
-
-# This is kind of a hack, but progress is gotten from grabber which doesn't
-# know about the total size to download. So we do this so we can get the data
-# out of band here. This will be "fixed" one way or anther soon.
-_text_meter_total_size = 0
-_text_meter_sofar_size = 0
-def text_meter_total_size(size, downloaded=0):
- global _text_meter_total_size
- global _text_meter_sofar_size
- _text_meter_total_size = size
- _text_meter_sofar_size = downloaded
-
-class TextMeter(BaseMeter):
- def __init__(self, fo=sys.stderr):
- BaseMeter.__init__(self)
- self.fo = fo
-
- def _do_update(self, amount_read, now=None):
- etime = self.re.elapsed_time()
- fetime = format_time(etime)
- fread = format_number(amount_read)
- #self.size = None
- if self.text is not None:
- text = self.text
- else:
- text = self.basename
-
- sofar_size = None
- if _text_meter_total_size:
- sofar_size = _text_meter_sofar_size + amount_read
- sofar_pc = (sofar_size * 100) / _text_meter_total_size
-
- if self.size is None:
- out = '\r%-60.60s %5sB %s ' % \
- (text, fread, fetime)
- else:
- rtime = self.re.remaining_time()
- frtime = format_time(rtime)
- frac = self.re.fraction_read()
-
- bar = '='*int(10 * frac)
- ave_dl = format_number(self.re.average_rate())
- if sofar_size is None:
- out = '\r%-25.25s %3i%% |%-14.14s| %5sB/s | %5sB %9s ETA ' % \
- (text, frac*100, bar, ave_dl, fread, frtime)
- else:
- fmt ='\r%-22.22s %3i%% |%4i%% |%-10.10s| %5sB/s | %5sB %9s ETA '
- out = fmt % (text, sofar_pc,frac*100,bar,ave_dl, fread,frtime)
-
- self.fo.write(out)
- self.fo.flush()
-
- def _do_end(self, amount_read, now=None):
- global _text_meter_total_size
- global _text_meter_sofar_size
-
- total_time = format_time(self.re.elapsed_time())
- total_size = format_number(amount_read)
- if self.text is not None:
- text = self.text
- else:
- text = self.basename
- if self.size is None:
- out = '\r%-60.60s %5sB %s ' % \
- (text, total_size, total_time)
- else:
- out = '\r%-56.56s | %5sB %9s ' % \
- (text, total_size, total_time)
-
- self.fo.write(out + '\n')
- self.fo.flush()
- if _text_meter_total_size:
- _text_meter_sofar_size += amount_read
- if _text_meter_total_size <= _text_meter_sofar_size:
- _text_meter_total_size = 0
- _text_meter_sofar_size = 0
-
-text_progress_meter = TextMeter
-
-class MultiFileHelper(BaseMeter):
- def __init__(self, master):
- BaseMeter.__init__(self)
- self.master = master
-
- def _do_start(self, now):
- self.master.start_meter(self, now)
-
- def _do_update(self, amount_read, now):
- # elapsed time since last update
- self.master.update_meter(self, now)
-
- def _do_end(self, amount_read, now):
- self.ftotal_time = format_time(now - self.start_time)
- self.ftotal_size = format_number(self.last_amount_read)
- self.master.end_meter(self, now)
-
- def failure(self, message, now=None):
- self.master.failure_meter(self, message, now)
-
- def message(self, message):
- self.master.message_meter(self, message)
-
-class MultiFileMeter:
- helperclass = MultiFileHelper
- def __init__(self):
- self.meters = []
- self.in_progress_meters = []
- self._lock = thread.allocate_lock()
- self.update_period = 0.3 # seconds
-
- self.numfiles = None
- self.finished_files = 0
- self.failed_files = 0
- self.open_files = 0
- self.total_size = None
- self.failed_size = 0
- self.start_time = None
- self.finished_file_size = 0
- self.last_update_time = None
- self.re = RateEstimator()
-
- def start(self, numfiles=None, total_size=None, now=None):
- if now is None: now = time.time()
- self.numfiles = numfiles
- self.finished_files = 0
- self.failed_files = 0
- self.open_files = 0
- self.total_size = total_size
- self.failed_size = 0
- self.start_time = now
- self.finished_file_size = 0
- self.last_update_time = now
- self.re.start(total_size, now)
- self._do_start(now)
-
- def _do_start(self, now):
- pass
-
- def end(self, now=None):
- if now is None: now = time.time()
- self._do_end(now)
-
- def _do_end(self, now):
- pass
-
- def lock(self): self._lock.acquire()
- def unlock(self): self._lock.release()
-
- ###########################################################
- # child meter creation and destruction
- def newMeter(self):
- newmeter = self.helperclass(self)
- self.meters.append(newmeter)
- return newmeter
-
- def removeMeter(self, meter):
- self.meters.remove(meter)
-
- ###########################################################
- # child functions - these should only be called by helpers
- def start_meter(self, meter, now):
- if not meter in self.meters:
- raise ValueError('attempt to use orphaned meter')
- self._lock.acquire()
- try:
- if not meter in self.in_progress_meters:
- self.in_progress_meters.append(meter)
- self.open_files += 1
- finally:
- self._lock.release()
- self._do_start_meter(meter, now)
-
- def _do_start_meter(self, meter, now):
- pass
-
- def update_meter(self, meter, now):
- if not meter in self.meters:
- raise ValueError('attempt to use orphaned meter')
- if (now >= self.last_update_time + self.update_period) or \
- not self.last_update_time:
- self.re.update(self._amount_read(), now)
- self.last_update_time = now
- self._do_update_meter(meter, now)
-
- def _do_update_meter(self, meter, now):
- pass
-
- def end_meter(self, meter, now):
- if not meter in self.meters:
- raise ValueError('attempt to use orphaned meter')
- self._lock.acquire()
- try:
- try: self.in_progress_meters.remove(meter)
- except ValueError: pass
- self.open_files -= 1
- self.finished_files += 1
- self.finished_file_size += meter.last_amount_read
- finally:
- self._lock.release()
- self._do_end_meter(meter, now)
-
- def _do_end_meter(self, meter, now):
- pass
-
- def failure_meter(self, meter, message, now):
- if not meter in self.meters:
- raise ValueError('attempt to use orphaned meter')
- self._lock.acquire()
- try:
- try: self.in_progress_meters.remove(meter)
- except ValueError: pass
- self.open_files -= 1
- self.failed_files += 1
- if meter.size and self.failed_size is not None:
- self.failed_size += meter.size
- else:
- self.failed_size = None
- finally:
- self._lock.release()
- self._do_failure_meter(meter, message, now)
-
- def _do_failure_meter(self, meter, message, now):
- pass
-
- def message_meter(self, meter, message):
- pass
-
- ########################################################
- # internal functions
- def _amount_read(self):
- tot = self.finished_file_size
- for m in self.in_progress_meters:
- tot += m.last_amount_read
- return tot
-
-
-class TextMultiFileMeter(MultiFileMeter):
- def __init__(self, fo=sys.stderr):
- self.fo = fo
- MultiFileMeter.__init__(self)
-
- # files: ###/### ###% data: ######/###### ###% time: ##:##:##/##:##:##
- def _do_update_meter(self, meter, now):
- self._lock.acquire()
- try:
- format = "files: %3i/%-3i %3i%% data: %6.6s/%-6.6s %3i%% " \
- "time: %8.8s/%8.8s"
- df = self.finished_files
- tf = self.numfiles or 1
- pf = 100 * float(df)/tf + 0.49
- dd = self.re.last_amount_read
- td = self.total_size
- pd = 100 * (self.re.fraction_read() or 0) + 0.49
- dt = self.re.elapsed_time()
- rt = self.re.remaining_time()
- if rt is None: tt = None
- else: tt = dt + rt
-
- fdd = format_number(dd) + 'B'
- ftd = format_number(td) + 'B'
- fdt = format_time(dt, 1)
- ftt = format_time(tt, 1)
-
- out = '%-79.79s' % (format % (df, tf, pf, fdd, ftd, pd, fdt, ftt))
- self.fo.write('\r' + out)
- self.fo.flush()
- finally:
- self._lock.release()
-
- def _do_end_meter(self, meter, now):
- self._lock.acquire()
- try:
- format = "%-30.30s %6.6s %8.8s %9.9s"
- fn = meter.basename
- size = meter.last_amount_read
- fsize = format_number(size) + 'B'
- et = meter.re.elapsed_time()
- fet = format_time(et, 1)
- frate = format_number(size / et) + 'B/s'
-
- out = '%-79.79s' % (format % (fn, fsize, fet, frate))
- self.fo.write('\r' + out + '\n')
- finally:
- self._lock.release()
- self._do_update_meter(meter, now)
-
- def _do_failure_meter(self, meter, message, now):
- self._lock.acquire()
- try:
- format = "%-30.30s %6.6s %s"
- fn = meter.basename
- if type(message) in types.StringTypes:
- message = message.splitlines()
- if not message: message = ['']
- out = '%-79s' % (format % (fn, 'FAILED', message[0] or ''))
- self.fo.write('\r' + out + '\n')
- for m in message[1:]: self.fo.write(' ' + m + '\n')
- self._lock.release()
- finally:
- self._do_update_meter(meter, now)
-
- def message_meter(self, meter, message):
- self._lock.acquire()
- try:
- pass
- finally:
- self._lock.release()
-
- def _do_end(self, now):
- self._do_update_meter(None, now)
- self._lock.acquire()
- try:
- self.fo.write('\n')
- self.fo.flush()
- finally:
- self._lock.release()
-
-######################################################################
-# support classes and functions
-
-class RateEstimator:
- def __init__(self, timescale=5.0):
- self.timescale = timescale
-
- def start(self, total=None, now=None):
- if now is None: now = time.time()
- self.total = total
- self.start_time = now
- self.last_update_time = now
- self.last_amount_read = 0
- self.ave_rate = None
-
- def update(self, amount_read, now=None):
- if now is None: now = time.time()
- if amount_read == 0:
- # if we just started this file, all bets are off
- self.last_update_time = now
- self.last_amount_read = 0
- self.ave_rate = None
- return
-
- #print 'times', now, self.last_update_time
- time_diff = now - self.last_update_time
- read_diff = amount_read - self.last_amount_read
- # First update, on reget is the file size
- if self.last_amount_read:
- self.last_update_time = now
- self.ave_rate = self._temporal_rolling_ave(\
- time_diff, read_diff, self.ave_rate, self.timescale)
- self.last_amount_read = amount_read
- #print 'results', time_diff, read_diff, self.ave_rate
-
- #####################################################################
- # result methods
- def average_rate(self):
- "get the average transfer rate (in bytes/second)"
- return self.ave_rate
-
- def elapsed_time(self):
- "the time between the start of the transfer and the most recent update"
- return self.last_update_time - self.start_time
-
- def remaining_time(self):
- "estimated time remaining"
- if not self.ave_rate or not self.total: return None
- return (self.total - self.last_amount_read) / self.ave_rate
-
- def fraction_read(self):
- """the fraction of the data that has been read
- (can be None for unknown transfer size)"""
- if self.total is None: return None
- elif self.total == 0: return 1.0
- else: return float(self.last_amount_read)/self.total
-
- #########################################################################
- # support methods
- def _temporal_rolling_ave(self, time_diff, read_diff, last_ave, timescale):
- """a temporal rolling average performs smooth averaging even when
- updates come at irregular intervals. This is performed by scaling
- the "epsilon" according to the time since the last update.
- Specifically, epsilon = time_diff / timescale
-
- As a general rule, the average will take on a completely new value
- after 'timescale' seconds."""
- epsilon = time_diff / timescale
- if epsilon > 1: epsilon = 1.0
- return self._rolling_ave(time_diff, read_diff, last_ave, epsilon)
-
- def _rolling_ave(self, time_diff, read_diff, last_ave, epsilon):
- """perform a "rolling average" iteration
- a rolling average "folds" new data into an existing average with
- some weight, epsilon. epsilon must be between 0.0 and 1.0 (inclusive)
- a value of 0.0 means only the old value (initial value) counts,
- and a value of 1.0 means only the newest value is considered."""
-
- try:
- recent_rate = read_diff / time_diff
- except ZeroDivisionError:
- recent_rate = None
- if last_ave is None: return recent_rate
- elif recent_rate is None: return last_ave
-
- # at this point, both last_ave and recent_rate are numbers
- return epsilon * recent_rate + (1 - epsilon) * last_ave
-
- def _round_remaining_time(self, rt, start_time=15.0):
- """round the remaining time, depending on its size
- If rt is between n*start_time and (n+1)*start_time round downward
- to the nearest multiple of n (for any counting number n).
- If rt < start_time, round down to the nearest 1.
- For example (for start_time = 15.0):
- 2.7 -> 2.0
- 25.2 -> 25.0
- 26.4 -> 26.0
- 35.3 -> 34.0
- 63.6 -> 60.0
- """
-
- if rt < 0: return 0.0
- shift = int(math.log(rt/start_time)/math.log(2))
- rt = int(rt)
- if shift <= 0: return rt
- return float(int(rt) >> shift << shift)
-
-
-def format_time(seconds, use_hours=0):
- if seconds is None or seconds < 0:
- if use_hours: return '--:--:--'
- else: return '--:--'
- else:
- seconds = int(seconds)
- minutes = seconds / 60
- seconds = seconds % 60
- if use_hours:
- hours = minutes / 60
- minutes = minutes % 60
- return '%02i:%02i:%02i' % (hours, minutes, seconds)
- else:
- return '%02i:%02i' % (minutes, seconds)
-
-def format_number(number, SI=0, space=' '):
- """Turn numbers into human-readable metric-like numbers"""
- symbols = ['', # (none)
- 'k', # kilo
- 'M', # mega
- 'G', # giga
- 'T', # tera
- 'P', # peta
- 'E', # exa
- 'Z', # zetta
- 'Y'] # yotta
-
- if SI: step = 1000.0
- else: step = 1024.0
-
- thresh = 999
- depth = 0
- max_depth = len(symbols) - 1
-
- # we want numbers between 0 and thresh, but don't exceed the length
- # of our list. In that event, the formatting will be screwed up,
- # but it'll still show the right number.
- while number > thresh and depth < max_depth:
- depth = depth + 1
- number = number / step
-
- if type(number) == type(1) or type(number) == type(1L):
- # it's an int or a long, which means it didn't get divided,
- # which means it's already short enough
- format = '%i%s%s'
- elif number < 9.95:
- # must use 9.95 for proper sizing. For example, 9.99 will be
- # rounded to 10.0 with the .1f format string (which is too long)
- format = '%.1f%s%s'
- else:
- format = '%.0f%s%s'
-
- return(format % (float(number or 0), space, symbols[depth]))
-
-def _tst(fn, cur, tot, beg, size, *args):
- tm = TextMeter()
- text = "(%d/%d): %s" % (cur, tot, fn)
- tm.start(fn, "http://www.example.com/path/to/fn/" + fn, fn, size, text=text)
- num = beg
- off = 0
- for (inc, delay) in args:
- off += 1
- while num < ((size * off) / len(args)):
- num += inc
- tm.update(num)
- time.sleep(delay)
- tm.end(size)
-
-if __name__ == "__main__":
- # (1/2): subversion-1.4.4-7.x86_64.rpm 2.4 MB / 85 kB/s 00:28
- # (2/2): mercurial-0.9.5-6.fc8.x86_64.rpm 924 kB / 106 kB/s 00:08
- if True:
- text_meter_total_size(1000 + 10000 + 10000 + 1000000 + 1000000 +
- 1000000 + 10000 + 10000 + 10000 + 1000000)
- _tst("sm-1.0.0-1.fc8.i386.rpm", 1, 10, 0, 1000,
- (10, 0.2), (10, 0.1), (100, 0.25))
- _tst("s-1.0.1-1.fc8.i386.rpm", 2, 10, 0, 10000,
- (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25))
- _tst("m-1.0.1-2.fc8.i386.rpm", 3, 10, 5000, 10000,
- (10, 0.2), (100, 0.1), (100, 0.1), (100, 0.25))
- _tst("large-file-name-Foo-11.8.7-4.5.6.1.fc8.x86_64.rpm", 4, 10, 0, 1000000,
- (1000, 0.2), (1000, 0.1), (10000, 0.1))
- _tst("large-file-name-Foo2-11.8.7-4.5.6.2.fc8.x86_64.rpm", 5, 10,
- 500001, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1))
- _tst("large-file-name-Foo3-11.8.7-4.5.6.3.fc8.x86_64.rpm", 6, 10,
- 750002, 1000000, (1000, 0.2), (1000, 0.1), (10000, 0.1))
- _tst("large-file-name-Foo4-10.8.7-4.5.6.1.fc8.x86_64.rpm", 7, 10, 0, 10000,
- (100, 0.1))
- _tst("large-file-name-Foo5-10.8.7-4.5.6.2.fc8.x86_64.rpm", 8, 10,
- 5001, 10000, (100, 0.1))
- _tst("large-file-name-Foo6-10.8.7-4.5.6.3.fc8.x86_64.rpm", 9, 10,
- 7502, 10000, (100, 0.1))
- _tst("large-file-name-Foox-9.8.7-4.5.6.1.fc8.x86_64.rpm", 10, 10,
- 0, 1000000, (10, 0.5),
- (10000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
- (10000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
- (10000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
- (10000, 0.1), (10000, 0.1), (10000, 0.1), (10000, 0.1),
- (10000, 0.1), (10000, 0.25))