diff options
Diffstat (limited to 'src/fetch/scripts/fr.allocine.py')
-rw-r--r-- | src/fetch/scripts/fr.allocine.py | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/src/fetch/scripts/fr.allocine.py b/src/fetch/scripts/fr.allocine.py index 97a2247..83e41d0 100644 --- a/src/fetch/scripts/fr.allocine.py +++ b/src/fetch/scripts/fr.allocine.py @@ -15,20 +15,20 @@ # *************************************************************************** # Version 0.4: 2007-08-27 -# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres +# * Fixed parsing errors: some fields in allocine's HTML pages have changed recently. Multiple actors and genres # could not be retrieved. Fixed bad http request error due to some changes in HTML code. # # Version 0.3: # * Fixed parsing: some fields in allocine's HTML pages have changed. Movie's image could not be fetched anymore. Fixed. -# +# # Version 0.2: # * Fixed parsing: allocine's HTML pages have changed. Movie's image could not be fetched anymore. -# +# # Version 0.1: # * Initial release. import sys, os, re, md5, random -import urllib, urllib2, time, base64 +import urllib.request, urllib.parse, urllib.error, time, base64 import xml.dom.minidom XML_HEADER = """<?xml version="1.0" encoding="UTF-8"?>""" @@ -48,16 +48,16 @@ class BasicTellicoDOM: self.__root = self.__doc.createElement('tellico') self.__root.setAttribute('xmlns', 'http://periapsis.org/tellico/') self.__root.setAttribute('syntaxVersion', '9') - + self.__collection = self.__doc.createElement('collection') self.__collection.setAttribute('title', 'My Movies') self.__collection.setAttribute('type', '3') - + self.__fields = self.__doc.createElement('fields') # Add all default (standard) fields self.__dfltField = self.__doc.createElement('field') self.__dfltField.setAttribute('name', '_default') - + # Add a custom 'Collection' field self.__customField = self.__doc.createElement('field') self.__customField.setAttribute('name', 'titre-original') @@ -67,7 +67,7 @@ class BasicTellicoDOM: self.__customField.setAttribute('format', '1') self.__customField.setAttribute('type', '1') self.__customField.setAttribute('i18n', 'yes') - + self.__fields.appendChild(self.__dfltField) self.__fields.appendChild(self.__customField) self.__collection.appendChild(self.__fields) @@ -90,23 +90,23 @@ class BasicTellicoDOM: entryNode.setAttribute('id', str(self.__currentId)) titleNode = self.__doc.createElement('title') - titleNode.appendChild(self.__doc.createTextNode(unicode(d['title'], 'latin-1').encode('utf-8'))) + titleNode.appendChild(self.__doc.createTextNode(str(d['title'], 'latin-1').encode('utf-8'))) otitleNode = self.__doc.createElement('titre-original') - otitleNode.appendChild(self.__doc.createTextNode(unicode(d['otitle'], 'latin-1').encode('utf-8'))) + otitleNode.appendChild(self.__doc.createTextNode(str(d['otitle'], 'latin-1').encode('utf-8'))) yearNode = self.__doc.createElement('year') - yearNode.appendChild(self.__doc.createTextNode(unicode(d['year'], 'latin-1').encode('utf-8'))) + yearNode.appendChild(self.__doc.createTextNode(str(d['year'], 'latin-1').encode('utf-8'))) genresNode = self.__doc.createElement('genres') for g in d['genres']: genreNode = self.__doc.createElement('genre') - genreNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + genreNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) genresNode.appendChild(genreNode) natsNode = self.__doc.createElement('nationalitys') natNode = self.__doc.createElement('nat') - natNode.appendChild(self.__doc.createTextNode(unicode(d['nat'], 'latin-1').encode('utf-8'))) + natNode.appendChild(self.__doc.createTextNode(str(d['nat'], 'latin-1').encode('utf-8'))) natsNode.appendChild(natNode) castsNode = self.__doc.createElement('casts') @@ -114,7 +114,7 @@ class BasicTellicoDOM: castNode = self.__doc.createElement('cast') col1Node = self.__doc.createElement('column') col2Node = self.__doc.createElement('column') - col1Node.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + col1Node.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) castNode.appendChild(col1Node) castNode.appendChild(col2Node) castsNode.appendChild(castNode) @@ -122,17 +122,17 @@ class BasicTellicoDOM: dirsNode = self.__doc.createElement('directors') for g in d['dirs']: dirNode = self.__doc.createElement('director') - dirNode.appendChild(self.__doc.createTextNode(unicode(g, 'latin-1').encode('utf-8'))) + dirNode.appendChild(self.__doc.createTextNode(str(g, 'latin-1').encode('utf-8'))) dirsNode.appendChild(dirNode) timeNode = self.__doc.createElement('running-time') - timeNode.appendChild(self.__doc.createTextNode(unicode(d['time'], 'latin-1').encode('utf-8'))) + timeNode.appendChild(self.__doc.createTextNode(str(d['time'], 'latin-1').encode('utf-8'))) - allocineNode = self.__doc.createElement(unicode('allocin�-link', 'latin-1').encode('utf-8')) - allocineNode.appendChild(self.__doc.createTextNode(unicode(d['allocine'], 'latin-1').encode('utf-8'))) + allocineNode = self.__doc.createElement(str('allocin�-link', 'latin-1').encode('utf-8')) + allocineNode.appendChild(self.__doc.createTextNode(str(d['allocine'], 'latin-1').encode('utf-8'))) plotNode = self.__doc.createElement('plot') - plotNode.appendChild(self.__doc.createTextNode(unicode(d['plot'], 'latin-1').encode('utf-8'))) + plotNode.appendChild(self.__doc.createTextNode(str(d['plot'], 'latin-1').encode('utf-8'))) if d['image']: imageNode = self.__doc.createElement('image') @@ -140,12 +140,12 @@ class BasicTellicoDOM: imageNode.setAttribute('id', d['image'][0]) imageNode.setAttribute('width', '120') imageNode.setAttribute('height', '160') - imageNode.appendChild(self.__doc.createTextNode(unicode(d['image'][1], 'latin-1').encode('utf-8'))) + imageNode.appendChild(self.__doc.createTextNode(str(d['image'][1], 'latin-1').encode('utf-8'))) coverNode = self.__doc.createElement('cover') coverNode.appendChild(self.__doc.createTextNode(d['image'][0])) - for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode', + for name in ( 'titleNode', 'otitleNode', 'yearNode', 'genresNode', 'natsNode', 'castsNode', 'dirsNode', 'timeNode', 'allocineNode', 'plotNode' ): entryNode.appendChild(eval(name)) @@ -154,7 +154,7 @@ class BasicTellicoDOM: self.__images.appendChild(imageNode) self.__collection.appendChild(entryNode) - + self.__currentId += 1 def printXML(self): @@ -162,8 +162,8 @@ class BasicTellicoDOM: Outputs XML content to stdout """ self.__collection.appendChild(self.__images) - print XML_HEADER; print DOCTYPE - print self.__root.toxml() + print(XML_HEADER); print(DOCTYPE) + print(self.__root.toxml()) class AlloCineParser: @@ -185,7 +185,7 @@ class AlloCineParser: 'otitle' : 'Titre original *: *<i>(?P<otitle>.+?)</i>', 'plot' : """(?s)<td valign="top" style="padding:10 0 0 0"><div align="justify"><h4> *(?P<plot>.+?) *</h4>""", 'image' : """<td valign="top" width="120".*?<img src="(?P<image>.+?)" border"""} - + self.__domTree = BasicTellicoDOM() @@ -203,7 +203,7 @@ class AlloCineParser: Fetch HTML data from url """ - u = urllib2.urlopen(url) + u = urllib.request.urlopen(url) self.__data = u.read() u.close() @@ -224,7 +224,7 @@ class AlloCineParser: matches = data = {} - for name, regexp in self.__regExps.iteritems(): + for name, regexp in self.__regExps.items(): if name == 'image': matches[name] = re.findall(self.__regExps[name], self.__data, re.S | re.I) else: @@ -272,7 +272,7 @@ class AlloCineParser: elif name == 'image': # Save image to a temporary folder md5 = genMD5() - imObj = urllib2.urlopen(matches[name][0].strip()) + imObj = urllib.request.urlopen(matches[name][0].strip()) img = imObj.read() imObj.close() imgPath = "/tmp/%s.jpeg" % md5 @@ -303,7 +303,7 @@ class AlloCineParser: if not len(title): return self.__title = title - self.__getHTMLContent(self.__searchURL % urllib.quote(self.__title)) + self.__getHTMLContent(self.__searchURL % urllib.parse.quote(self.__title)) # Get all links links = self.__fetchMovieLinks() @@ -321,7 +321,7 @@ class AlloCineParser: def showUsage(): - print "Usage: %s movietitle" % sys.argv[0] + print("Usage: %s movietitle" % sys.argv[0]) sys.exit(1) def main(): |