First commit of old project

2026-03-17 07:02:10 +00:00 · 2011-04-01 09:12:00 +03:00
commit 7356ced2fb
4 changed files with 288 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,4 @@
+_done
+_x
+movie-links.list
+movie-links.list.utf8
--- a/imdbref-aisee.py
+++ b/imdbref-aisee.py
@@ -0,0 +1,144 @@
+import sys, optparse, errno, re, codecs, os
+
+def imdbref(argv):
+	"""
+	create a .gdl-file from imdb's movie-links.list
+	"""
+
+	parser = optparse.OptionParser(
+		usage='Usage: %prog [options]',
+		description="create a .gdl-file from imdb's movie-links.list",
+		version="%prog 0.1"
+	)
+	parser.add_option(	"-f", "--file", default="movie-links.list", help="file to process [default: %default]")
+
+	(options, args) = parser.parse_args(argv[1:])
+
+	if options.file == None:
+		parser.print_help()
+		sys.exit(-1)
+
+	else:
+		names = []
+		lines = []
+		connections = []
+
+		# generate {movie list file}.utf8
+		os.system("iconv -f iso-8859-1 -t UTF-8 "+options.file+" > "+options.file+".utf8")
+
+		f = codecs.open( options.file +'.utf8', encoding='utf-8' )
+		for line in f:
+			line = line.encode('utf-8', 'replace')
+			if line[0] == "\"":
+				if re.search("{", line): # Skip TV-episodes
+					pass
+				elif re.search("\(TV\)", line): # Skip (TV)
+					pass
+				else:
+					#title = repr(line)
+					title = line
+			elif re.search("referenced in", line):
+				if re.search("{", line): # Skip TV-episodes
+					pass
+				elif re.search("\(TV\)", line): # Skip (TV)
+					pass
+				elif re.search("\(V\)", line): # Skip (V)
+					pass
+				elif re.search("\(VG\)", line): # Skip (VG)
+					pass
+				elif re.search("lgyi-show", line):
+					pass
+				else:
+					title = title.replace("\"", "")
+					title = title.replace("\"", "")
+					refer = line.replace("referenced in", "")
+					refer = refer.replace("(", "", 1)
+					refer = refer.replace("))", ")")
+					refer = refer.replace("\"", "")
+
+					# Titles and referers into the names-list
+					title = title.strip()
+					refer = refer.strip()
+
+					if title not in names:
+						names.append(title)
+					if refer not in names:
+						names.append(refer)
+
+					names = sort2(names) # remove duplicates, the hard way
+
+					# Let's get the title and refer index number from the names-list
+					id_title = names.index(title)
+					id_refer = names.index(refer)
+
+					# We use the already defined names list to make the file smaller
+					normal_line = '\tedge: { sourcename: "'+str(id_title)+'" targetname: "'+str(id_refer)+'" }'
+					revers_line = '\tedge: { sourcename: "'+str(id_refer)+'" targetname: "'+str(id_title)+'" }'
+					if revers_line or normal_line not in lines: # no dublicates
+						if id_title is not id_refer and normal_line is not revers_line:
+							if id_title != 6025 and id_refer != 6025:
+								# We get occurances
+								connections.append( id_title )
+								connections.append( id_refer )
+								# Add the line itself
+								lines.append(normal_line)
+
+			else:
+				pass
+		else:
+			pass
+
+
+		os.remove( options.file +'.utf8' ) # We get rid of the temp file
+
+		used = []
+
+		if True:
+			print '''
+graph: {
+	title			: "IMDB references"
+	layoutalgorithm	: tree
+	scaling        	: 0.5
+	colorentry 42  	: 152 222 255
+	node.shape     	: ellipse
+	node.color     	: 42
+	node.height    	: 32
+	edge.color     	: blue
+	edge.arrowsize 	: 6
+	node.textcolor 	: black
+	splines        	: yes
+
+'''
+
+			# loop the names
+			for i, name in enumerate(names):
+				c = connections.count(i)
+				if c > 2:
+					print '    node: {title: "' + str(i)+'" label: "'+str(name)+'" }'
+					used.append( i )
+
+			print
+			# loop the connections
+			for i, line in enumerate(lines):
+				s = re.findall("([0-9]+)", line)
+				#if any(x in s for x in used): # wtf?
+				#	print "//", s, 'not found in used'
+				if s[0] in used:
+					if s[1] in used:
+						print line
+				#else:
+				#	print line
+
+			print "}"
+
+
+
+
+def sort2(seq): # Dave Kirby
+    seen = set()
+    return [x for x in seq if x not in seen and not seen.add(x)]
+
+
+
+if __name__ == '__main__':
+	imdbref(sys.argv)
--- a/imdbref.py
+++ b/imdbref.py
@@ -0,0 +1,131 @@
+import sys, optparse, errno, re, codecs, os
+
+def imdbref(argv):
+	"""
+	create a .dot-file from imdb's movie-links.list
+	"""
+
+	parser = optparse.OptionParser(
+		usage='Usage: %prog [options]',
+		description="create a .dot-file from imdb's movie-links.list",
+		version="%prog 0.1"
+	)
+	parser.add_option(	"-f", "--file", default="movie-links.list", help="file to process [default: %default]")
+
+	(options, args) = parser.parse_args(argv[1:])
+
+	if options.file == None:
+		parser.print_help()
+		sys.exit(-1)
+
+	else:
+		names = []
+		lines = []
+		connections = []
+
+		# generate {movie list file}.utf8
+		os.system("iconv -f iso-8859-1 -t UTF-8 "+options.file+" > "+options.file+".utf8")
+
+		f = codecs.open( options.file +'.utf8', encoding='utf-8' )
+		for line in f:
+			line = line.encode('utf-8', 'replace')
+			if line[0] == "\"":
+				if re.search("{", line): # Skip TV-episodes
+					pass
+				elif re.search("\(TV\)", line): # Skip (TV)
+					pass
+				else:
+					#title = repr(line)
+					title = line
+			elif re.search("referenced in", line):
+				if re.search("{", line): # Skip TV-episodes
+					pass
+				elif re.search("\(TV\)", line): # Skip (TV)
+					pass
+				elif re.search("\(V\)", line): # Skip (V)
+					pass
+				elif re.search("\(VG\)", line): # Skip (VG)
+					pass
+				elif re.search("lgyi\-show \(1991\)", line): # Skip (VG)
+					pass
+				else:
+					title = title.replace("\"", "")
+					title = title.replace("\"", "")
+					refer = line.replace("referenced in", "")
+					refer = refer.replace("(", "", 1)
+					refer = refer.replace("))", ")")
+					refer = refer.replace("\"", "")
+
+					# Titles and referers into the names-list
+					title = title.strip()
+					refer = refer.strip()
+
+					if title not in names:
+						names.append(title)
+					if refer not in names:
+						names.append(refer)
+
+					names = sort2(names) # remove duplicates, the hard way
+
+					# Let's get the title and refer index number from the names-list
+					id_title = names.index(title)
+					id_refer = names.index(refer)
+
+
+					# We use the already defined names list to make the file smaller
+					normal_line = "\t"+str(id_title)+" <- "+str(id_refer)+";"
+					revers_line = "\t"+str(id_refer)+" <- "+str(id_title)+";"
+					if revers_line or normal_line not in lines: # no dublicates
+						if id_title is not id_refer and normal_line is not revers_line:
+							if id_title != 6025 and id_refer != 6025:
+								# We get occurances
+								connections.append( id_title )
+								connections.append( id_refer )
+								# Add the line itself
+								lines.append(normal_line)
+			else:
+				pass
+		else:
+			pass
+
+
+		os.remove( options.file +'.utf8' ) # We get rid of the temp file
+
+
+		used = []
+
+		if True:
+			print "graph imdb {" # start the project
+			print '    node [shape=plaintext fontsize="7" fontname="Arial"]'
+
+			# loop the names
+			for i, name in enumerate(names):
+				c = connections.count(i)
+				if c > 10:
+					print "    " + str(i) + ' [label="' + str(name) + '" priority="' + str(c) + '"];'
+					used.append( i )
+
+			print
+			print "// used: " + str(used)
+			print
+			# loop the connections
+			for i, line in enumerate(lines):
+				s = re.findall("([0-9]+)", line)
+				if any(x in s for x in used): # wtf?
+					print "//", s, 'not found in used'
+				else:
+					print line
+
+			print "}"
+
+
+
+
+def sort2(seq): # Dave Kirby
+    seen = set()
+    return [x for x in seq if x not in seen and not seen.add(x)]
+
+
+
+if __name__ == '__main__':
+	imdbref(sys.argv)
--- a/run.sh
+++ b/run.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+FILENAME=$(date +%Y-%m-%d);
+# process and create .dot
+python imdbref.py > $FILENAME.dot
+# remove old
+rm $FILENAME.png
+# do the image from the .dot
+twopi $FILENAME.dot -Tpng -v -o $FILENAME.png