HTTP Proxy Support
Harry Fuecks
hfuecks at gmail.com
Mon Oct 16 19:36:11 EST 2006
Realise that urllib(2) is able to pick up proxies from environment
variables, win32 registry etc. but, personally, like to be able to set
this up explicitly for a single application (using venus as a personal
aggregator and have to cope with different networks).
Anyway - the attached diffs add explicit proxy support, controllable
via config.ini e.g.
[Planet]
http_proxy =3D http://user:pwd@proxy.myisp.com:8080
I think I found all the places with HTTP requests are being made and
updated them.
Note that I haven't tested the change to config.downloadReadingList()
but changes to spider.py seem to work OK - just passing the list of
handlers to feedparser.
-------------- next part --------------
=3D=3D=3D modified file 'planet/config.py'
--- planet/config.py 2006-10-13 15:01:14 +0000
+++ planet/config.py 2006-10-16 08:42:06 +0000
@@ -97,6 +97,8 @@
define_planet('owner_email', '')
define_planet('output_theme', '')
define_planet('output_dir', 'output')
+ define_planet('http_proxy','')
+
=
define_planet_list('template_files')
define_planet_list('bill_of_materials')
@@ -230,7 +232,8 @@
if options.has_key("last-modified"):
request.add_header('If-Modified-Since',
options['last-modified'])
- response =3D urllib2.urlopen(request)
+ opener =3D apply(urllib2.build_opener, tuple(url_handlers()))
+ response =3D opener.open(request)
if response.headers.has_key('etag'):
cached_config.set(list, 'etag', response.headers['etag'])
if response.headers.has_key('last-modified'):
@@ -348,3 +351,11 @@
def write(file=3Dsys.stdout):
""" write out an updated template """
print parser.write(file)
+
+def url_handlers():
+ """ additional handlers for urllib2 """
+ import config, urllib2
+ handlers =3D []
+ if config.http_proxy() !=3D '':
+ handlers.append(urllib2.ProxyHandler({"http" : config.http_proxy(=
)} ))
+ return handlers
-------------- next part --------------
=3D=3D=3D modified file 'planet/spider.py'
--- planet/spider.py 2006-10-09 16:23:45 +0000
+++ planet/spider.py 2006-10-16 09:10:07 +0000
@@ -114,7 +114,7 @@
# read cached feed info
sources =3D config.cache_sources_directory()
feed_source =3D filename(sources, feed)
- feed_info =3D feedparser.parse(feed_source)
+ feed_info =3D feedparser.parse(feed_source, handlers =3D config.url_ha=
ndlers())
if feed_info.feed.get('planet_http_status',None) =3D=3D '410': return
=
# read feed itself
@@ -125,7 +125,8 @@
except:
pass
data =3D feedparser.parse(feed_info.feed.get('planet_http_location',fe=
ed),
- etag=3Dfeed_info.feed.get('planet_http_etag',None), modified=3Dmod=
ified)
+ etag=3Dfeed_info.feed.get('planet_http_etag',None), modified=3Dmod=
ified,
+ handlers =3D config.url_handlers())
=
# capture http status
if not data.has_key("status"):
More information about the devel
mailing list