#!/usr/bin/env ruby # frozen_string_literal: true # Only use stdlib, no gems BS, no rails BS require 'json' require 'date' # ruby find_suitable_pcaps.rb pcaps_dir start_timestamp end_timestamp search_threshold [dynamic] dir_name = ARGV[0] start_timestamp = ARGV[1].to_i end_timestamp = ARGV[2].to_i search_threshold = ARGV[3].to_i return_all_files = ARGV[4].to_i start_inacurate = 0 end_inacurate = 0 result_h = { files_found: 0, start_files_count: 0, start_files: [], end_files_count: 0, end_files: [], merged_files_count: 0, merged_files: [] } unless File.directory?(File.expand_path(dir_name)) warn "Directory #{dir_name} does not exits. Exiting." exit 1 end Dir.chdir(dir_name) # puts Dir.pwd temp_h = {} Dir.foreach(dir_name) do |file| next if (file == '.') || (file == '..') # Compressed file will have timestamp in filename # Uncompressed file will not have timestamp in filename, so parse accordingly timestamp = file =~ /(\d{10,})(?:\.pcap\.zst)?/ ? Regexp.last_match(1).to_i : File.mtime(file).to_i temp_h[timestamp] = file end exit 0 if temp_h.empty? # p temp_h # hash contains timestamp as key and filename as value # keys sorted by timestamp, i.e # 123456789 => capture00, # 223456788 => capture01 files_h = temp_h.sort_by { |k, _v| k }.to_h # Array of sorted files timestamps timestamps_arr = files_h.keys # p timestamps_arr # start_files[:file0] - file previous to file we matched # start_files[:file] - file we matched # start_files[:file1] - file next of the one we matched # The same for end_files start_files = {} end_files = {} files_h.each_pair do |k, v| # puts "#{k} --> #{v}\n" if k < start_timestamp start_files[:file0] = v start_files[:timestamp0] = k end if k < end_timestamp end_files[:file0] = v end_files[:timestamp0] = k end end # p start_files # Given sorted array with values in timestamps and timestamp, returns next available timestamp def find_next(arr, timestamp) index = arr.index(timestamp) value = arr[index + 1] unless index.nil? return value unless index.nil? || value.nil? nil end # Given sorted array with values in timestamps and timestamp, returns previous available timestamp def find_prev(arr, timestamp) index = arr.index(timestamp) value = arr[index - 1] unless index.nil? || index == 0 return value unless index.nil? || value.nil? nil end def merge_files(result_h) result_h[:merged_files] = (result_h[:start_files] + result_h[:end_files]).uniq result_h[:merged_files_count] = result_h[:merged_files].count end # Simplified captures files example, let say we searching pcap with start_timestamp 1010 and end_timestamp -1022 # capture00 - 1000 # capture01 - 1008 <----- start_files[:file0] # capture02 - 1012 <----- start_files[:file] # capture03 - 1016 <----- start_files[:file1] # capture04 - 1020 <----- end_files[:file0] # capture05 - 1024 <----- end_files[:file] # capture06 - 1028 if start_files[:timestamp0].nil? if timestamps_arr.last < start_timestamp warn "Start file(s) not found, start_timestamp #{start_timestamp} is earlier than latest available file's timestamp #{timestamps_arr.last}" else result_h[:start_files_count] = 1 result_h[:start_files] << files_h[timestamps_arr.first] start_timestamp = timestamps_arr.first start_inacurate = 1 warn "No file found with modification timestamp earlier than start_timestamp #{start_timestamp} (#{Time.at(start_timestamp).to_datetime})" warn "Using earliest available file #{files_h[timestamps_arr.first]}. This can be inacurate." end else start_files[:timestamp] = find_next(timestamps_arr, start_files[:timestamp0]) # p start_files unless start_files[:timestamp].nil? start_files[:file] = files_h[start_files[:timestamp]] start_timestamp = start_files[:timestamp] result_h[:start_files_count] += 1 result_h[:start_files] << start_files[:file] start_files[:timestamp1] = find_next(timestamps_arr, start_files[:timestamp]) # puts "start_files[:timestamp1] #{start_files[:timestamp1]}\n" unless start_files[:timestamp1].nil? start_files[:file1] = files_h[start_files[:timestamp1]] if !search_threshold.nil? && start_timestamp + search_threshold > start_files[:timestamp] result_h[:start_files_count] += 1 result_h[:start_files] << start_files[:file1] end end end end if end_files[:timestamp0].nil? if timestamps_arr.last < start_timestamp puts "Start file(s) not found, start_timestamp #{end_timestamp} is earlier than latest available file's timestamp #{timestamps_arr.last}" else result_h[:end_files_count] = 1 result_h[:end_files] << files_h[timestamps_arr.last] end_timestamp = timestamps_arr.last end_inacurate = 1 warn "Using #{files_h[timestamps_arr.last]} as end file. This can be inacurate." end else end_files[:timestamp] = find_next(timestamps_arr, end_files[:timestamp0]) # p end_files unless end_files[:timestamp].nil? end_files[:file] = files_h[end_files[:timestamp]] end_timestamp = end_files[:timestamp] result_h[:end_files_count] += 1 result_h[:end_files] << end_files[:file] if !search_threshold.nil? && end_timestamp - search_threshold < end_files[:timestamp0] result_h[:end_files_count] += 1 result_h[:end_files].unshift(end_files[:file0]) end end end result_h[:files_found] = result_h[:start_files_count] + result_h[:end_files_count] merge_files(result_h) if (start_inacurate == 1 || end_inacurate == 1) warn "Start or End files are inacurate, will not search for all files" return_all_files = 0 end if return_all_files == 1 if ( !end_timestamp.nil? && !start_timestamp.nil?) result_h[:merged_files] = files_h.select { |timestamp, file| timestamp.to_i >=start_timestamp.to_i && timestamp.to_i <= end_timestamp.to_i}.values result_h[:merged_files_count] = result_h[:merged_files].count end end puts JSON.pretty_generate(result_h) # Example: # { # "files_found": 3, # "start_files_count": 2, # "start_files": [ # "capture03", # "capture04" # ], # "end_files_count": 1, # "end_files": [ # "capture00" # ]