require 'pp' # converts the output of arrival_rate_sweep.rb into matlab-friendly formats; # each file contains one simdata-mapping pair. # for load graphic output infile = ARGV[0] outdir = ARGV[1] $column_order = ["Query Time", "99% Query Time"] File.open(infile) do |f| data_columns = Hash.new {|h,k| h[k] = Hash.new{|i,j| i[j] = [] }} skip_csi = false curr_out = nil for line in f line = line.strip if line =~ /.*.simdata .*.mapping/ # start of new file; open a new file simdata, mapping = line.strip.split # strip extensions simdata = simdata.sub(/\..*/, '') mapping = mapping.sub(/\..*/, '') filename = File.join(outdir, simdata+"_"+mapping+".parts") curr_out = File.open(filename, "w+") skip_csi = simdata =~ /exh/ elsif line =~ /.*.part/ # do nothing; continue accumulating data columns elsif line.length == 0 # number of parts + 1 for xvalue header #rows = data_columns.first[1].first[1].size + 1 # num of parts rows = data_columns.first[1].reduce(0) {|max_size, curr| [max_size, curr[1].size].max} + 1 # get the max number of cols (query rates) #cols = data_columns.reduce(0) {|max_size, curr| [max_size, curr[1].size].max} cols = data_columns.first[1].size # num of query rates num_blocks = data_columns.size if skip_csi num_blocks -= 1 end curr_out.puts "#{rows}\t#{cols}\t#{num_blocks}" # finished with current simdata-mapping set; output columns & close file for column_name, data_hash in data_columns if skip_csi && column_name == "CSI Load" next end # gather all data into a single matrix columns = [] for query_rate, data in data_hash.to_a.sort curr_col = [query_rate] + data if data.size < rows-1 avg = 0 for x in data avg += x.to_f end avg /= data.size (rows-1-data.size).times do |x| curr_col << avg.to_s end end columns << curr_col end # convert column array matrix into row array and output curr_out.puts column_name for row in columns.transpose curr_out.puts row.join("\t") end end # reset data hash data_columns = Hash.new {|h,k| h[k] = Hash.new{|i,j| i[j] = [] }} curr_out.close else arr = line.split("\t") # if this is the case, then it's because there was an error/aborted if arr.size == 2 next end query_rate = arr[0].to_f # get everything before machine loads $column_order.each_with_index do |name, i| val = arr[i+1].to_f data_columns[name][query_rate] << val end # parse out machine load values machine_loads = arr[$column_order.length+1..-1] i = 0 machine_loads.each_slice(3) do |trip| sum = 0 for x in trip sum += x.to_f end i += 1 data_columns["m_#{i} load"][query_rate] << sum end #machine_loads.each_with_index do |mload, i| # data_columns["Machine #{i} Load"][query_rate] << mload.to_f #end end end end