mirror of
https://github.com/ivuorinen/imdbref.git
synced 2026-01-26 03:14:09 +00:00
First commit of old project
This commit is contained in:
131
imdbref.py
Executable file
131
imdbref.py
Executable file
@@ -0,0 +1,131 @@
|
||||
import sys, optparse, errno, re, codecs, os
|
||||
|
||||
def imdbref(argv):
|
||||
"""
|
||||
create a .dot-file from imdb's movie-links.list
|
||||
"""
|
||||
|
||||
parser = optparse.OptionParser(
|
||||
usage='Usage: %prog [options]',
|
||||
description="create a .dot-file from imdb's movie-links.list",
|
||||
version="%prog 0.1"
|
||||
)
|
||||
parser.add_option( "-f", "--file", default="movie-links.list", help="file to process [default: %default]")
|
||||
|
||||
(options, args) = parser.parse_args(argv[1:])
|
||||
|
||||
if options.file == None:
|
||||
parser.print_help()
|
||||
sys.exit(-1)
|
||||
|
||||
else:
|
||||
names = []
|
||||
lines = []
|
||||
connections = []
|
||||
|
||||
# generate {movie list file}.utf8
|
||||
os.system("iconv -f iso-8859-1 -t UTF-8 "+options.file+" > "+options.file+".utf8")
|
||||
|
||||
f = codecs.open( options.file +'.utf8', encoding='utf-8' )
|
||||
for line in f:
|
||||
line = line.encode('utf-8', 'replace')
|
||||
if line[0] == "\"":
|
||||
if re.search("{", line): # Skip TV-episodes
|
||||
pass
|
||||
elif re.search("\(TV\)", line): # Skip (TV)
|
||||
pass
|
||||
else:
|
||||
#title = repr(line)
|
||||
title = line
|
||||
elif re.search("referenced in", line):
|
||||
if re.search("{", line): # Skip TV-episodes
|
||||
pass
|
||||
elif re.search("\(TV\)", line): # Skip (TV)
|
||||
pass
|
||||
elif re.search("\(V\)", line): # Skip (V)
|
||||
pass
|
||||
elif re.search("\(VG\)", line): # Skip (VG)
|
||||
pass
|
||||
elif re.search("lgyi\-show \(1991\)", line): # Skip (VG)
|
||||
pass
|
||||
else:
|
||||
title = title.replace("\"", "")
|
||||
title = title.replace("\"", "")
|
||||
refer = line.replace("referenced in", "")
|
||||
refer = refer.replace("(", "", 1)
|
||||
refer = refer.replace("))", ")")
|
||||
refer = refer.replace("\"", "")
|
||||
|
||||
# Titles and referers into the names-list
|
||||
title = title.strip()
|
||||
refer = refer.strip()
|
||||
|
||||
if title not in names:
|
||||
names.append(title)
|
||||
if refer not in names:
|
||||
names.append(refer)
|
||||
|
||||
names = sort2(names) # remove duplicates, the hard way
|
||||
|
||||
# Let's get the title and refer index number from the names-list
|
||||
id_title = names.index(title)
|
||||
id_refer = names.index(refer)
|
||||
|
||||
|
||||
# We use the already defined names list to make the file smaller
|
||||
normal_line = "\t"+str(id_title)+" <- "+str(id_refer)+";"
|
||||
revers_line = "\t"+str(id_refer)+" <- "+str(id_title)+";"
|
||||
if revers_line or normal_line not in lines: # no dublicates
|
||||
if id_title is not id_refer and normal_line is not revers_line:
|
||||
if id_title != 6025 and id_refer != 6025:
|
||||
# We get occurances
|
||||
connections.append( id_title )
|
||||
connections.append( id_refer )
|
||||
# Add the line itself
|
||||
lines.append(normal_line)
|
||||
else:
|
||||
pass
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
os.remove( options.file +'.utf8' ) # We get rid of the temp file
|
||||
|
||||
|
||||
used = []
|
||||
|
||||
if True:
|
||||
print "graph imdb {" # start the project
|
||||
print ' node [shape=plaintext fontsize="7" fontname="Arial"]'
|
||||
|
||||
# loop the names
|
||||
for i, name in enumerate(names):
|
||||
c = connections.count(i)
|
||||
if c > 10:
|
||||
print " " + str(i) + ' [label="' + str(name) + '" priority="' + str(c) + '"];'
|
||||
used.append( i )
|
||||
|
||||
print
|
||||
print "// used: " + str(used)
|
||||
print
|
||||
# loop the connections
|
||||
for i, line in enumerate(lines):
|
||||
s = re.findall("([0-9]+)", line)
|
||||
if any(x in s for x in used): # wtf?
|
||||
print "//", s, 'not found in used'
|
||||
else:
|
||||
print line
|
||||
|
||||
print "}"
|
||||
|
||||
|
||||
|
||||
|
||||
def sort2(seq): # Dave Kirby
|
||||
seen = set()
|
||||
return [x for x in seq if x not in seen and not seen.add(x)]
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
imdbref(sys.argv)
|
||||
Reference in New Issue
Block a user