module Exports class CaseLogExportService QUARTERS = { 0 => "jan_mar", 1 => "apr_jun", 2 => "jul_sep", 3 => "oct_dec", }.freeze LOG_ID_OFFSET = 300_000_000_000 MAX_XML_RECORDS = 10_000 def initialize(storage_service, logger = Rails.logger) @storage_service = storage_service @logger = logger end def export_csv_case_logs time_str = Time.zone.now.strftime("%F").underscore case_logs = retrieve_case_logs(Time.zone.now, true) csv_io = build_export_csv(case_logs) @storage_service.write_file("export_#{time_str}.csv", csv_io) end def export_xml_case_logs(full_update: false) start_time = Time.zone.now case_logs = retrieve_case_logs(start_time, full_update) export = build_export_run(start_time, full_update) daily_run = get_daily_run_number archive_datetimes = write_export_archive(export, case_logs) export.empty_export = archive_datetimes.empty? write_master_manifest(daily_run, archive_datetimes) export.save! end private def get_daily_run_number today = Time.zone.today LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).count + 1 end def build_export_run(current_time, full_update) previous_exports_with_data = LogsExport.where(empty_export: false) if previous_exports_with_data.empty? return LogsExport.new(started_at: current_time) end base_number = previous_exports_with_data.maximum(:base_number) increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) if full_update base_number += 1 increment_number = 1 else increment_number += 1 end LogsExport.new(started_at: current_time, base_number:, increment_number:) end def write_master_manifest(daily_run, archive_datetimes) today = Time.zone.today increment_number = daily_run.to_s.rjust(4, "0") month = today.month.to_s.rjust(2, "0") day = today.day.to_s.rjust(2, "0") file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv" string_io = build_manifest_csv_io(archive_datetimes) @storage_service.write_file(file_path, string_io) end def get_archive_name(case_log, base_number, increment) return unless case_log.startdate collection_start = case_log.collection_start_year month = case_log.startdate.month quarter = QUARTERS[(month - 1) / 3] base_number_str = "f#{base_number.to_s.rjust(4, '0')}" increment_str = "inc#{increment.to_s.rjust(4, '0')}" "core_#{collection_start}_#{collection_start + 1}_#{quarter}_#{base_number_str}_#{increment_str}" end def write_export_archive(export, case_logs) # Order case logs per archive case_logs_per_archive = {} case_logs.each do |case_log| archive = get_archive_name(case_log, export.base_number, export.increment_number) next unless archive if case_logs_per_archive.key?(archive) case_logs_per_archive[archive] << case_log else case_logs_per_archive[archive] = [case_log] end end # Write all archives archive_datetimes = {} case_logs_per_archive.each do |archive, case_logs_to_export| manifest_xml = build_manifest_xml(case_logs_to_export.count) zip_file = Zip::File.open_buffer(StringIO.new) zip_file.add("manifest.xml", manifest_xml) part_number = 1 case_logs_to_export.each_slice(MAX_XML_RECORDS) do |case_logs_slice| data_xml = build_export_xml(case_logs_slice) part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) part_number += 1 end # Required by S3 to avoid Aws::S3::Errors::BadDigest zip_io = zip_file.write_buffer zip_io.rewind @storage_service.write_file("#{archive}.zip", zip_io) archive_datetimes[archive] = Time.zone.now end archive_datetimes end def retrieve_case_logs(start_time, full_update) recent_export = LogsExport.order("started_at").last if !full_update && recent_export params = { from: recent_export.started_at, to: start_time } CaseLog.where("updated_at >= :from and updated_at <= :to", params) else params = { to: start_time } CaseLog.where("updated_at <= :to", params) end end def build_manifest_csv_io(archive_datetimes) headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"] csv_string = CSV.generate do |csv| csv << headers archive_datetimes.each do |archive, datetime| csv << [archive, datetime, "#{archive}.zip"] end end StringIO.new(csv_string) end def xml_doc_to_temp_file(xml_doc) file = Tempfile.new xml_doc.write_xml_to(file, encoding: "UTF-8") file.rewind file end def build_manifest_xml(record_number) doc = Nokogiri::XML("") doc.at("report") << doc.create_element("form-data-summary") doc.at("form-data-summary") << doc.create_element("records") doc.at("records") << doc.create_element("count-of-records", record_number) xml_doc_to_temp_file(doc) end def apply_cds_transformation!(attribute_hash) # OLD_CORE FORM ID support attribute_hash["form"] = attribute_hash["old_form_id"] attribute_hash["newform"] = attribute_hash["id"] + LOG_ID_OFFSET # Age refused (1..8).each do |index| attribute_hash["age#{index}"] = -9 if attribute_hash["age#{index}_known"] == 1 end end def filter_keys!(attributes) attributes.reject! { |attribute| is_omitted_field?(attribute) } end def is_omitted_field?(field_name) omitted_attrs = %w[id old_form_id old_id ethnic_group] pattern_age = /age\d_known/ field_name.starts_with?("details_known_") || pattern_age.match(field_name) || omitted_attrs.include?(field_name) ? true : false end def build_export_csv(case_logs) csv_io = CSV.generate do |csv| attribute_keys = nil case_logs.each do |case_log| attribute_hash = case_log.attributes_before_type_cast apply_cds_transformation!(attribute_hash) if attribute_keys.nil? attribute_keys = attribute_hash.keys filter_keys!(attribute_keys) csv << attribute_keys end csv << attribute_keys.map { |attribute_key| attribute_hash[attribute_key] } end end StringIO.new(csv_io) end def build_export_xml(case_logs) doc = Nokogiri::XML("") case_logs.each do |case_log| attribute_hash = case_log.attributes_before_type_cast apply_cds_transformation!(attribute_hash) form = doc.create_element("form") doc.at("forms") << form attribute_hash.each do |key, value| if is_omitted_field?(key) next else form << doc.create_element(key, value) end end form << doc.create_element("providertype", case_log.owning_organisation.read_attribute_before_type_cast(:provider_type)) end xml_doc_to_temp_file(doc) end end end