mirror of
https://github.com/ivuorinen/imdbref.git
synced 2026-01-26 11:23:59 +00:00
144 lines
3.6 KiB
Python
Executable File
144 lines
3.6 KiB
Python
Executable File
import sys, optparse, errno, re, codecs, os
|
|
|
|
def imdbref(argv):
|
|
"""
|
|
create a .gdl-file from imdb's movie-links.list
|
|
"""
|
|
|
|
parser = optparse.OptionParser(
|
|
usage='Usage: %prog [options]',
|
|
description="create a .gdl-file from imdb's movie-links.list",
|
|
version="%prog 0.1"
|
|
)
|
|
parser.add_option( "-f", "--file", default="movie-links.list", help="file to process [default: %default]")
|
|
|
|
(options, args) = parser.parse_args(argv[1:])
|
|
|
|
if options.file == None:
|
|
parser.print_help()
|
|
sys.exit(-1)
|
|
|
|
else:
|
|
names = []
|
|
lines = []
|
|
connections = []
|
|
|
|
# generate {movie list file}.utf8
|
|
os.system("iconv -f iso-8859-1 -t UTF-8 "+options.file+" > "+options.file+".utf8")
|
|
|
|
f = codecs.open( options.file +'.utf8', encoding='utf-8' )
|
|
for line in f:
|
|
line = line.encode('utf-8', 'replace')
|
|
if line[0] == "\"":
|
|
if re.search("{", line): # Skip TV-episodes
|
|
pass
|
|
elif re.search("\(TV\)", line): # Skip (TV)
|
|
pass
|
|
else:
|
|
#title = repr(line)
|
|
title = line
|
|
elif re.search("referenced in", line):
|
|
if re.search("{", line): # Skip TV-episodes
|
|
pass
|
|
elif re.search("\(TV\)", line): # Skip (TV)
|
|
pass
|
|
elif re.search("\(V\)", line): # Skip (V)
|
|
pass
|
|
elif re.search("\(VG\)", line): # Skip (VG)
|
|
pass
|
|
elif re.search("lgyi-show", line):
|
|
pass
|
|
else:
|
|
title = title.replace("\"", "")
|
|
title = title.replace("\"", "")
|
|
refer = line.replace("referenced in", "")
|
|
refer = refer.replace("(", "", 1)
|
|
refer = refer.replace("))", ")")
|
|
refer = refer.replace("\"", "")
|
|
|
|
# Titles and referers into the names-list
|
|
title = title.strip()
|
|
refer = refer.strip()
|
|
|
|
if title not in names:
|
|
names.append(title)
|
|
if refer not in names:
|
|
names.append(refer)
|
|
|
|
names = sort2(names) # remove duplicates, the hard way
|
|
|
|
# Let's get the title and refer index number from the names-list
|
|
id_title = names.index(title)
|
|
id_refer = names.index(refer)
|
|
|
|
# We use the already defined names list to make the file smaller
|
|
normal_line = '\tedge: { sourcename: "'+str(id_title)+'" targetname: "'+str(id_refer)+'" }'
|
|
revers_line = '\tedge: { sourcename: "'+str(id_refer)+'" targetname: "'+str(id_title)+'" }'
|
|
if revers_line or normal_line not in lines: # no dublicates
|
|
if id_title is not id_refer and normal_line is not revers_line:
|
|
if id_title != 6025 and id_refer != 6025:
|
|
# We get occurances
|
|
connections.append( id_title )
|
|
connections.append( id_refer )
|
|
# Add the line itself
|
|
lines.append(normal_line)
|
|
|
|
else:
|
|
pass
|
|
else:
|
|
pass
|
|
|
|
|
|
os.remove( options.file +'.utf8' ) # We get rid of the temp file
|
|
|
|
used = []
|
|
|
|
if True:
|
|
print '''
|
|
graph: {
|
|
title : "IMDB references"
|
|
layoutalgorithm : tree
|
|
scaling : 0.5
|
|
colorentry 42 : 152 222 255
|
|
node.shape : ellipse
|
|
node.color : 42
|
|
node.height : 32
|
|
edge.color : blue
|
|
edge.arrowsize : 6
|
|
node.textcolor : black
|
|
splines : yes
|
|
|
|
'''
|
|
|
|
# loop the names
|
|
for i, name in enumerate(names):
|
|
c = connections.count(i)
|
|
if c > 2:
|
|
print ' node: {title: "' + str(i)+'" label: "'+str(name)+'" }'
|
|
used.append( i )
|
|
|
|
print
|
|
# loop the connections
|
|
for i, line in enumerate(lines):
|
|
s = re.findall("([0-9]+)", line)
|
|
#if any(x in s for x in used): # wtf?
|
|
# print "//", s, 'not found in used'
|
|
if s[0] in used:
|
|
if s[1] in used:
|
|
print line
|
|
#else:
|
|
# print line
|
|
|
|
print "}"
|
|
|
|
|
|
|
|
|
|
def sort2(seq): # Dave Kirby
|
|
seen = set()
|
|
return [x for x in seq if x not in seen and not seen.add(x)]
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
imdbref(sys.argv) |