Skip to content

Commit c6eed54

Browse files
author
doc-node121
committed
Merge branch 'master' of github.com:lukaselmer/ethz-web-scale-data-mining-project-runs
2 parents 961f7bd + 379abbf commit c6eed54

23 files changed

+6295
-0
lines changed
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#! /usr/bin/ruby
2+
3+
class Term
4+
attr_accessor :word, :probability
5+
6+
def initialize(s)
7+
@word, @probability = s.split(':')
8+
@probability = @probability.to_f
9+
end
10+
end
11+
12+
class Topic
13+
attr_accessor :id, :terms_by_probability, :terms_by_name
14+
15+
def initialize(str)
16+
# str looks like:
17+
# 9\t{car:0.0065967561177527915,citi:0.005459655518901955,music:0.004991794173514252,auto:0.004858515080998506,hotel:0.00464211954073639,park:0.004147248424495917,room:0.0037326825546470047,year:0.0032589358881492114,post:0.0032061313832683546,locat:0.003178411029853716}
18+
@id, words = str.split("\t")
19+
@terms_by_probability = words.gsub('{', '').gsub('}', '').split(',').collect { |s| Term.new(s) }
20+
@terms_by_name = @terms_by_probability.sort_by { |t| t.word }
21+
end
22+
23+
def to_s
24+
out = []
25+
out << "Topic #{@terms_by_name.collect { |t| t.word }.join(', ')}, id: #{@id}"
26+
@terms_by_probability.each do |t|
27+
out << " * #{t.word} : #{t.probability}"
28+
end
29+
out << "\n"
30+
out.join("\n")
31+
end
32+
end
33+
34+
if ARGV.length == 0
35+
puts "Usage: display_results <textfile>"
36+
exit(1)
37+
end
38+
39+
40+
topics = []
41+
42+
open(ARGV[0]).each do |line|
43+
topics << Topic.new(line)
44+
end
45+
46+
topics.sort_by! { |topic| topic.terms_by_name.first.word }
47+
# Shell output
48+
topics.each { |t| puts t.to_s }
49+
50+
#topics = [topics[0], topics[1], topics[2]]
51+
52+
# HTML output
53+
input = topics.map do |topic|
54+
highest_probability = topic.terms_by_probability.first.probability
55+
topic.terms_by_probability.each { |term| term.probability /= highest_probability }
56+
57+
topic_object = topic.terms_by_name.map do |term|
58+
puts term.probability
59+
"{text: '#{term.word}', size: #{10 + term.probability * 190}}"
60+
end.join(',')
61+
62+
"[#{topic_object}]"
63+
end.join(",\n")
64+
65+
require 'erb'
66+
f = File.read('html/layout.erb')
67+
outfile = 'html/index.html'
68+
File.delete outfile if File.exist? outfile
69+
File.write(outfile, f.gsub!('___input___', "[#{input}]"))
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#! /usr/bin/ruby
2+
3+
i = %w(terms-topic_p2500_run_shard00_50t_10i.txt terms-topic_p20000_run_shard00_50t_10i.txt terms-topic_p500_run_shard00_50t_10i.txt terms-topic_p500_run_shard00_50t_15i.txt terms-topic_p500_run_shard00_50t_20i.txt terms-topic_p500_run_shard00_50t_5i.txt)
4+
i.each{|v| puts "./display_result.rb #{v}"; puts "mv html/index.html html/#{v.gsub(".txt", ".html")}"}
5+

0 commit comments

Comments
 (0)