Posted: March 10, 2009 | Author: t39uhw | Filed under: code, grails, programming, python |
import urllib2
import threading
from sgmllib import SGMLParser
class URLLister(SGMLParser):
def reset(self):
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
if attrs[0][0] == 'href': self.urls.append(attrs[0][1])
class GrailsPluginsLister(URLLister):
def getFileDetail(self, url):
print 'starting get', url
try:
stream = urllib2.urlopen(url)
data = stream.read()
parser = URLLister()
parser.feed(data)
self.fileDetails.extend([url+i for i in parser.urls if i.endswith('.zip')])
print url, 'acquired'
except IOError:
self.failed.append(url)
def getUrls(self):
data = ['http://plugins.grails.org/'+i+'trunk/' for i in self.urls[1:-1]]
handlers = []
self.fileDetails = []
self.failed = []
print 'wait.....'
for url in data:
handlers.append(threading.Thread(target=self.getFileDetail, args=(url,)))
print 'loading...'
for i in handlers:
i.start()
for i in handlers:
i.join()
print 'done.....'
def getUrls1(self):
data = ['http://plugins.grails.org/'+i+'trunk/' for i in self.urls[1:-1]]
self.fileDetails = []
self.failed = []
for url in data:
self.getFileDetail(url)
if __name__ == '__main__':
print 'hello'
stream = urllib2.urlopen('http://plugins.grails.org/')
print 'stream lister'
data = stream.read()
parser = GrailsPluginsLister()
parser.feed(data)
parser.getUrls()
print parser.fileDetails
print 'failed:', parser.failed