require 'fileutils' simdata_file = ARGV[0] outdir = ARGV[1] testonlydir = ARGV[2] $slice_size = 1000 base = File.basename(simdata_file) map_dir = File.join(outdir, base) if !File.exist?(map_dir) FileUtils.mkdir(map_dir) end # TODO: modify this file so that it duplicates training data in each part and # set the query id to -1. # Also modify testonly_maker.rb so that doesn't ahve a bunch of training data # in the middle again # # TODO: test it and change testonly_maker qcount = 0 warm_up = [] training_lines = [] File.open(simdata_file) do |f| line = f.gets arr = line.force_encoding('iso-8859-1').split init_qnum = arr[0].to_i warm_up << "-1 #{arr[1..-1].join(" ")}" training_lines << line # get the training data query block while line = f.gets arr = line.force_encoding('iso-8859-1').split # this is a query header if arr.length > 2 qcount += 1 if qcount >= $slice_size break end warm_up << "-1 #{arr[1..-1].join(" ")}" else warm_up << line end training_lines << line end # output parts file for training block curr_out = File.open(File.join(map_dir, "#{init_qnum}-#{init_qnum+$slice_size}.part"), "w+") curr_out.puts warm_up curr_out.puts training_lines # while generating parts file for other queries # generate testonly files as well, for everything except training queries base = File.basename(simdata_file, ".simdata") File.open(File.join(testonlydir, base+".testonly.simdata"), "w+") do |testf| testf.puts warm_up qnum, cost, query = line.split init_qnum = qnum.to_i curr_out = File.open(File.join(map_dir, "#{init_qnum}-#{init_qnum+$slice_size}.part"), "w+") curr_out.puts warm_up curr_out.puts line testf.puts line while line = f.gets arr = line.force_encoding('iso-8859-1').split # this is a query header if arr.length > 2 curr_q = arr[0].to_i if (curr_q - init_qnum) % 1000 == 0 done_warmup = true curr_out.close curr_out = File.open(File.join(map_dir, "#{curr_q}-#{curr_q+$slice_size}.part"), "w+") curr_out.puts warm_up end end curr_out.puts line testf.puts line end curr_out.close end end