HTTP Proxy Support

Harry Fuecks hfuecks at gmail.com
Mon Oct 16 19:36:11 EST 2006


Realise that urllib(2) is able to pick up proxies from environment
variables, win32 registry etc. but, personally, like to be able to set
this up explicitly for a single application (using venus as a personal
aggregator and have to cope with different networks).

Anyway - the attached diffs add explicit proxy support, controllable
via config.ini e.g.

[Planet]
http_proxy =3D http://user:pwd@proxy.myisp.com:8080

I think I found all the places with HTTP requests are being made and
updated them.

Note that I haven't tested the change to config.downloadReadingList()
but changes to spider.py seem to work OK - just passing the list of
handlers to feedparser.
-------------- next part --------------
=3D=3D=3D modified file 'planet/config.py'
--- planet/config.py	2006-10-13 15:01:14 +0000
+++ planet/config.py	2006-10-16 08:42:06 +0000
@@ -97,6 +97,8 @@
     define_planet('owner_email', '')
     define_planet('output_theme', '')
     define_planet('output_dir', 'output')
+    define_planet('http_proxy','')
+
 =

     define_planet_list('template_files')
     define_planet_list('bill_of_materials')
@@ -230,7 +232,8 @@
         if options.has_key("last-modified"):
             request.add_header('If-Modified-Since',
                 options['last-modified'])
-        response =3D urllib2.urlopen(request)
+        opener =3D apply(urllib2.build_opener, tuple(url_handlers()))
+        response =3D opener.open(request)
         if response.headers.has_key('etag'):
             cached_config.set(list, 'etag', response.headers['etag'])
         if response.headers.has_key('last-modified'):
@@ -348,3 +351,11 @@
 def write(file=3Dsys.stdout):
     """ write out an updated template """
     print parser.write(file)
+
+def url_handlers():
+    """ additional handlers for urllib2 """
+    import config, urllib2
+    handlers =3D []
+    if config.http_proxy() !=3D '':
+         handlers.append(urllib2.ProxyHandler({"http" : config.http_proxy(=
)} ))
+    return handlers

-------------- next part --------------
=3D=3D=3D modified file 'planet/spider.py'
--- planet/spider.py	2006-10-09 16:23:45 +0000
+++ planet/spider.py	2006-10-16 09:10:07 +0000
@@ -114,7 +114,7 @@
     # read cached feed info
     sources =3D config.cache_sources_directory()
     feed_source =3D filename(sources, feed)
-    feed_info =3D feedparser.parse(feed_source)
+    feed_info =3D feedparser.parse(feed_source, handlers =3D config.url_ha=
ndlers())
     if feed_info.feed.get('planet_http_status',None) =3D=3D '410': return
 =

     # read feed itself
@@ -125,7 +125,8 @@
     except:
         pass
     data =3D feedparser.parse(feed_info.feed.get('planet_http_location',fe=
ed),
-        etag=3Dfeed_info.feed.get('planet_http_etag',None), modified=3Dmod=
ified)
+        etag=3Dfeed_info.feed.get('planet_http_etag',None), modified=3Dmod=
ified,
+            handlers =3D config.url_handlers())
 =

     # capture http status
     if not data.has_key("status"):



More information about the devel mailing list