#!/usr/bin/env ruby
# frozen_string_literal: true

# Only use stdlib, no gems BS, no rails BS
require 'json'
require 'date'

# ruby find_suitable_pcaps.rb pcaps_dir start_timestamp end_timestamp search_threshold [dynamic]
dir_name = ARGV[0]
start_timestamp = ARGV[1].to_i
end_timestamp = ARGV[2].to_i
search_threshold = ARGV[3].to_i
return_all_files = ARGV[4].to_i
start_inacurate = 0
end_inacurate = 0

result_h = {
  files_found: 0,
  start_files_count: 0,
  start_files: [],
  end_files_count: 0,
  end_files: [],
  merged_files_count: 0,
  merged_files: []
}

unless File.directory?(File.expand_path(dir_name))
  warn "Directory #{dir_name} does not exits. Exiting."
  exit 1
end

Dir.chdir(dir_name)
# puts Dir.pwd

temp_h = {}
Dir.foreach(dir_name) do |file|
  next if (file == '.') || (file == '..')

  # Compressed file will have timestamp in filename
  # Uncompressed file will not have timestamp in filename, so parse accordingly
  timestamp = file =~ /(\d{10,})(?:\.pcap\.zst)?/ ? Regexp.last_match(1).to_i : File.mtime(file).to_i

  temp_h[timestamp] = file
end

exit 0 if temp_h.empty?
# p temp_h

# hash contains timestamp as key and filename as value
# keys sorted by timestamp, i.e
# 123456789 => capture00,
# 223456788 => capture01
files_h = temp_h.sort_by { |k, _v| k }.to_h

# Array of sorted files timestamps
timestamps_arr = files_h.keys
# p timestamps_arr

# start_files[:file0] - file previous to file we matched
# start_files[:file] - file we matched
# start_files[:file1] - file next of the one we matched
# The same for end_files
start_files = {}
end_files = {}

files_h.each_pair do |k, v|
  # puts "#{k} --> #{v}\n"
  if k < start_timestamp
    start_files[:file0] = v
    start_files[:timestamp0] = k
  end
  if k < end_timestamp
    end_files[:file0] = v
    end_files[:timestamp0] = k
  end
end

# p start_files

# Given sorted array with values in timestamps and timestamp, returns next available timestamp
def find_next(arr, timestamp)
  index = arr.index(timestamp)
  value = arr[index + 1] unless index.nil?
  return value unless index.nil? || value.nil?

  nil
end

# Given sorted array with values in timestamps and timestamp, returns previous available timestamp
def find_prev(arr, timestamp)
  index = arr.index(timestamp)
  value = arr[index - 1] unless index.nil? || index == 0
  return value unless index.nil? || value.nil?

  nil
end

def merge_files(result_h)
  result_h[:merged_files] = (result_h[:start_files] + result_h[:end_files]).uniq
  result_h[:merged_files_count] = result_h[:merged_files].count
end

# Simplified captures files example, let say we searching pcap with start_timestamp 1010 and end_timestamp -1022
# capture00 - 1000
# capture01 - 1008  <----- start_files[:file0]
# capture02 - 1012  <----- start_files[:file]
# capture03 - 1016  <----- start_files[:file1]
# capture04 - 1020  <----- end_files[:file0]
# capture05 - 1024  <----- end_files[:file]
# capture06 - 1028


if start_files[:timestamp0].nil?
  if timestamps_arr.last < start_timestamp
    warn "Start file(s) not found, start_timestamp #{start_timestamp} is earlier than latest available file's timestamp #{timestamps_arr.last}"
  else
    result_h[:start_files_count] = 1
    result_h[:start_files] << files_h[timestamps_arr.first]
    start_timestamp = timestamps_arr.first
    start_inacurate = 1
    warn "No file found with modification timestamp earlier than start_timestamp #{start_timestamp} (#{Time.at(start_timestamp).to_datetime})"
    warn "Using earliest available file #{files_h[timestamps_arr.first]}. This can be inacurate."
  end
else
  start_files[:timestamp] = find_next(timestamps_arr, start_files[:timestamp0])
  # p start_files
  unless start_files[:timestamp].nil?
    start_files[:file] = files_h[start_files[:timestamp]]
    start_timestamp = start_files[:timestamp]
    result_h[:start_files_count] += 1
    result_h[:start_files] << start_files[:file]

    start_files[:timestamp1] = find_next(timestamps_arr, start_files[:timestamp])
    # puts "start_files[:timestamp1] #{start_files[:timestamp1]}\n"
    unless start_files[:timestamp1].nil?
      start_files[:file1] = files_h[start_files[:timestamp1]]

      if !search_threshold.nil? && start_timestamp + search_threshold > start_files[:timestamp]
        result_h[:start_files_count] += 1
        result_h[:start_files] << start_files[:file1]
      end
    end
  end
end

if end_files[:timestamp0].nil?
  if timestamps_arr.last < start_timestamp
    puts "Start file(s) not found, start_timestamp #{end_timestamp} is earlier than latest available file's timestamp #{timestamps_arr.last}"
  else
    result_h[:end_files_count] = 1
    result_h[:end_files] << files_h[timestamps_arr.last]
    end_timestamp = timestamps_arr.last
    end_inacurate = 1
    warn "Using #{files_h[timestamps_arr.last]} as end file. This can be inacurate."
  end
else
  end_files[:timestamp] = find_next(timestamps_arr, end_files[:timestamp0])
  # p end_files
  unless end_files[:timestamp].nil?
    end_files[:file] = files_h[end_files[:timestamp]]
    end_timestamp = end_files[:timestamp]
    result_h[:end_files_count] += 1
    result_h[:end_files] << end_files[:file]

    if !search_threshold.nil? && end_timestamp - search_threshold < end_files[:timestamp0]
      result_h[:end_files_count] += 1
      result_h[:end_files].unshift(end_files[:file0])
    end
  end
end

result_h[:files_found] = result_h[:start_files_count] + result_h[:end_files_count]
merge_files(result_h)

if (start_inacurate == 1 || end_inacurate == 1)
  warn "Start or End files are inacurate, will not search for all files"
  return_all_files = 0
end

if return_all_files == 1
  if ( !end_timestamp.nil? && !start_timestamp.nil?)
    result_h[:merged_files] = files_h.select { |timestamp, file| timestamp.to_i >=start_timestamp.to_i && timestamp.to_i <= end_timestamp.to_i}.values
    result_h[:merged_files_count] = result_h[:merged_files].count
  end
end

puts JSON.pretty_generate(result_h)

# Example:
# {
#  "files_found": 3,
#  "start_files_count": 2,
#  "start_files": [
#    "capture03",
#    "capture04"
#  ],
#  "end_files_count": 1,
#  "end_files": [
#    "capture00"
#  ]