diff --git a/app/services/exports/case_log_export_service.rb b/app/services/exports/case_log_export_service.rb index e8aca786e..4844e85cb 100644 --- a/app/services/exports/case_log_export_service.rb +++ b/app/services/exports/case_log_export_service.rb @@ -1,16 +1,27 @@ module Exports class CaseLogExportService + QUARTERS = { + 0 => "jan_mar", + 1 => "apr_jun", + 2 => "jul_sep", + 3 => "oct_dec", + }.freeze + + LOG_ID_OFFSET = 300_000_000_000 + MAX_XML_RECORDS = 10_000 + def initialize(storage_service, logger = Rails.logger) @storage_service = storage_service @logger = logger end - def export_case_logs - current_time = Time.zone.now - case_logs = retrieve_case_logs(current_time) - export = save_export_run(current_time) - write_master_manifest(export) - write_export_data(case_logs) + def export_case_logs(full_update: false) + start_time = Time.zone.now + case_logs = retrieve_case_logs(start_time, full_update) + export = build_export_run(start_time, full_update) + daily_run = get_daily_run_number + archive_datetimes = write_export_archive(export, case_logs) + write_master_manifest(daily_run, archive_datetimes) export.save! end @@ -20,57 +31,127 @@ module Exports field_name.starts_with?("details_known_") || pattern_age.match(field_name) || omitted_attrs.include?(field_name) ? true : false end - LOG_ID_OFFSET = 300_000_000_000 - private - def save_export_run(current_time) + def get_daily_run_number today = Time.zone.today - last_daily_run_number = LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).maximum(:daily_run_number) - last_daily_run_number = 0 if last_daily_run_number.nil? + LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).count + 1 + end + + def build_export_run(current_time, full_update) + if LogsExport.count.zero? + return LogsExport.new(started_at: current_time) + end - export = LogsExport.new - export.daily_run_number = last_daily_run_number + 1 - export.started_at = current_time - export + base_number = LogsExport.maximum(:base_number) + increment_number = LogsExport.where(base_number:).maximum(:increment_number) + + if full_update + base_number += 1 + increment_number = 1 + else + increment_number += 1 + end + + LogsExport.new(started_at: current_time, base_number:, increment_number:) end - def write_master_manifest(export) + def write_master_manifest(daily_run, archive_datetimes) today = Time.zone.today - increment_number = export.daily_run_number.to_s.rjust(4, "0") + increment_number = daily_run.to_s.rjust(4, "0") month = today.month.to_s.rjust(2, "0") day = today.day.to_s.rjust(2, "0") file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv" - string_io = build_manifest_csv_io + string_io = build_manifest_csv_io(archive_datetimes) @storage_service.write_file(file_path, string_io) end - def write_export_data(case_logs) - string_io = build_export_xml_io(case_logs) - file_path = "#{get_folder_name}/#{get_file_name}.xml" - @storage_service.write_file(file_path, string_io) + def get_archive_name(case_log, base_number, increment) + return unless case_log.startdate + + collection_start = case_log.collection_start_year + month = case_log.startdate.month + quarter = QUARTERS[(month - 1) / 3] + base_number_str = "f#{base_number.to_s.rjust(4, '0')}" + increment_str = "inc#{increment.to_s.rjust(4, '0')}" + "core_#{collection_start}_#{collection_start + 1}_#{quarter}_#{base_number_str}_#{increment_str}" + end + + def write_export_archive(export, case_logs) + # Order case logs per archive + case_logs_per_archive = {} + case_logs.each do |case_log| + archive = get_archive_name(case_log, export.base_number, export.increment_number) + next unless archive + + if case_logs_per_archive.key?(archive) + case_logs_per_archive[archive] << case_log + else + case_logs_per_archive[archive] = [case_log] + end + end + + # Write all archives + archive_datetimes = {} + case_logs_per_archive.each do |archive, case_logs_to_export| + manifest_xml = build_manifest_xml(case_logs_to_export.count) + zip_io = Zip::File.open_buffer(StringIO.new) + zip_io.add("manifest.xml", manifest_xml) + + part_number = 1 + case_logs_to_export.each_slice(MAX_XML_RECORDS) do |case_logs_slice| + data_xml = build_export_xml(case_logs_slice) + part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" + zip_io.add("#{archive}_#{part_number_str}.xml", data_xml) + part_number += 1 + end + + @storage_service.write_file("#{archive}.zip", zip_io.write_buffer) + archive_datetimes[archive] = Time.zone.now + end + + archive_datetimes end - def retrieve_case_logs(current_time) + def retrieve_case_logs(start_time, full_update) recent_export = LogsExport.order("started_at").last - if recent_export - params = { from: recent_export.started_at, to: current_time } + if !full_update && recent_export + params = { from: recent_export.started_at, to: start_time } CaseLog.where("updated_at >= :from and updated_at <= :to", params) else - params = { to: current_time } + params = { to: start_time } CaseLog.where("updated_at <= :to", params) end end - def build_manifest_csv_io + def build_manifest_csv_io(archive_datetimes) headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"] csv_string = CSV.generate do |csv| csv << headers + archive_datetimes.each do |archive, datetime| + csv << [archive, datetime, "#{archive}.zip"] + end end StringIO.new(csv_string) end - def build_export_xml_io(case_logs) + def xml_doc_to_temp_file(xml_doc) + file = Tempfile.new + xml_doc.write_xml_to(file, encoding: "UTF-8") + file.rewind + file + end + + def build_manifest_xml(record_number) + doc = Nokogiri::XML("") + doc.at("report") << doc.create_element("form-data-summary") + doc.at("form-data-summary") << doc.create_element("records") + doc.at("records") << doc.create_element("count-of-records", record_number) + + xml_doc_to_temp_file(doc) + end + + def build_export_xml(case_logs) doc = Nokogiri::XML("") case_logs.each do |case_log| @@ -87,23 +168,8 @@ module Exports end form << doc.create_element("providertype", case_log.owning_organisation.read_attribute_before_type_cast(:provider_type)) end - doc.write_xml_to(StringIO.new, encoding: "UTF-8") - end - - def get_folder_name - "core_#{day_as_string}" - end - - def get_file_name - "dat_core_#{day_as_string}_#{increment_as_string}" - end - - def day_as_string - Time.current.strftime("%Y_%m_%d") - end - def increment_as_string(increment = 1) - sprintf("%04d", increment) + xml_doc_to_temp_file(doc) end end end diff --git a/db/migrate/20220518115438_add_missing_export_fields.rb b/db/migrate/20220518115438_add_missing_export_fields.rb new file mode 100644 index 000000000..0b7203ac9 --- /dev/null +++ b/db/migrate/20220518115438_add_missing_export_fields.rb @@ -0,0 +1,10 @@ +class AddMissingExportFields < ActiveRecord::Migration[7.0] + def change + change_table :logs_exports, bulk: true do |t| + t.column :base_number, :integer, default: 1, null: false + t.column :increment_number, :integer, default: 1, null: false + t.remove :daily_run_number, type: :integer + t.change_null :started_at, false + end + end +end diff --git a/db/schema.rb b/db/schema.rb index be67fb5f8..a65c0df64 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.0].define(version: 2022_05_16_111514) do +ActiveRecord::Schema[7.0].define(version: 2022_05_18_115438) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -259,9 +259,10 @@ ActiveRecord::Schema[7.0].define(version: 2022_05_16_111514) do end create_table "logs_exports", force: :cascade do |t| - t.integer "daily_run_number" t.datetime "created_at", default: -> { "CURRENT_TIMESTAMP" } - t.datetime "started_at" + t.datetime "started_at", precision: nil, null: false + t.integer "base_number", default: 1, null: false + t.integer "increment_number", default: 1, null: false end create_table "organisation_las", force: :cascade do |t| diff --git a/lib/tasks/data_export.rake b/lib/tasks/data_export.rake index 748a3009e..7e4f766c2 100644 --- a/lib/tasks/data_export.rake +++ b/lib/tasks/data_export.rake @@ -1,7 +1,8 @@ namespace :core do desc "Export data XMLs for import into Central Data System (CDS)" - task data_export: :environment do + task :data_export, %i[full_update] => :environment do |_task, args| storage_service = StorageService.new(PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"]) - Exports::CaseLogExportService.new(storage_service).export_case_logs + full_update = args[:full_update].present? && args[:full_update] == "true" + Exports::CaseLogExportService.new(storage_service).export_case_logs(full_update:) end end diff --git a/spec/fixtures/exports/manifest.xml b/spec/fixtures/exports/manifest.xml new file mode 100644 index 000000000..8c171ae01 --- /dev/null +++ b/spec/fixtures/exports/manifest.xml @@ -0,0 +1,8 @@ + + + + + {recno} + + + diff --git a/spec/services/exports/case_log_export_service_spec.rb b/spec/services/exports/case_log_export_service_spec.rb index 3f4007c70..fd5c1704c 100644 --- a/spec/services/exports/case_log_export_service_spec.rb +++ b/spec/services/exports/case_log_export_service_spec.rb @@ -3,29 +3,32 @@ require "rails_helper" RSpec.describe Exports::CaseLogExportService do let(:storage_service) { instance_double(StorageService) } - let(:export_filepath) { "spec/fixtures/exports/case_logs.xml" } - let(:export_file) { File.open(export_filepath, "r:UTF-8") } + let(:export_file) { File.open("spec/fixtures/exports/case_logs.xml", "r:UTF-8") } + let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") } - let(:expected_data_filename) { "core_2022_02_08/dat_core_2022_02_08_0001.xml" } - let(:expected_master_manifest_filename) { "Manifest_2022_02_08_0001.csv" } - let(:expected_master_manifest_filename2) { "Manifest_2022_02_08_0002.csv" } + let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" } + let(:expected_master_manifest_rerun) { "Manifest_2022_05_01_0002.csv" } + let(:expected_zip_filename) { "core_2021_2022_jan_mar_f0001_inc0001.zip" } + let(:expected_manifest_filename) { "manifest.xml" } - let(:case_log) { FactoryBot.create(:case_log, :completed) } - - def replace_entity_ids(export_template) + def replace_entity_ids(case_log, export_template) export_template.sub!(/\{id\}/, (case_log["id"] + Exports::CaseLogExportService::LOG_ID_OFFSET).to_s) export_template.sub!(/\{owning_org_id\}/, case_log["owning_organisation_id"].to_s) export_template.sub!(/\{managing_org_id\}/, case_log["managing_organisation_id"].to_s) export_template.sub!(/\{created_by_id\}/, case_log["created_by_id"].to_s) end + def replace_record_number(export_template, record_number) + export_template.sub!(/\{recno\}/, record_number.to_s) + end + context "when exporting daily case logs" do subject(:export_service) { described_class.new(storage_service) } - let(:case_log) { FactoryBot.create(:case_log, :completed) } + let(:start_time) { Time.zone.local(2022, 5, 1) } before do - Timecop.freeze(case_log.updated_at) + Timecop.freeze(start_time) allow(storage_service).to receive(:write_file) end @@ -45,75 +48,168 @@ RSpec.describe Exports::CaseLogExportService do end end - context "and case logs are available for export" do - let(:time_now) { Time.zone.now } + context "and one case log is available for export" do + let!(:case_log) { FactoryBot.create(:case_log, :completed) } + let(:expected_data_filename) { "core_2021_2022_jan_mar_f0001_inc0001_pt001.xml" } - before do - Timecop.freeze(time_now) - case_log + it "generates a ZIP export file with the expected filename" do + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) + export_service.export_case_logs end - after do - LogsExport.destroy_all + it "generates an XML manifest file with the expected filename within the ZIP file" do + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) + expect(entry).not_to be_nil + expect(entry.name).to eq(expected_manifest_filename) + end + export_service.export_case_logs end - it "generates an XML export file with the expected filename" do - expect(storage_service).to receive(:write_file).with(expected_data_filename, any_args) + it "generates an XML export file with the expected filename within the ZIP file" do + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) + expect(entry).not_to be_nil + expect(entry.name).to eq(expected_data_filename) + end export_service.export_case_logs end - it "generates an XML export file with the expected content" do - actual_content = nil - expected_content = replace_entity_ids(export_file.read) - allow(storage_service).to receive(:write_file).with(expected_data_filename, any_args) { |_, arg2| actual_content = arg2&.string } + it "generates an XML manifest file with the expected content within the ZIP file" do + expected_content = replace_record_number(local_manifest_file.read, 1) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end export_service.export_case_logs - expect(actual_content).to eq(expected_content) end - it "creates a logs export record in a database with correct time" do + it "generates an XML export file with the expected content within the ZIP file" do + expected_content = replace_entity_ids(case_log, export_file.read) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_data_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end + export_service.export_case_logs - records_from_db = ActiveRecord::Base.connection.execute("select started_at, id from logs_exports ").to_a - expect(records_from_db[0]["started_at"]).to eq(time_now) - expect(records_from_db.count).to eq(1) + end + end + + context "and multiple case logs are available for export on different periods" do + let(:expected_zip_filename2) { "core_2022_2023_apr_jun_f0001_inc0001.zip" } + + before do + FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1)) + FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 4, 1)) + end + + context "when case logs are across multiple quarters" do + it "generates multiple ZIP export files with the expected filenames" do + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) + expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args) + + export_service.export_case_logs + end + end + end + + context "and multiple case logs are available for export on same quarter" do + before do + FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1)) + FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 3, 20)) end - it "gets the logs for correct timeframe" do - start_time = Time.zone.local(2022, 4, 13, 2, 2, 2) - export = LogsExport.new(started_at: start_time, daily_run_number: 1) - export.save! - params = { from: start_time, to: time_now } - allow(CaseLog).to receive(:where).with("updated_at >= :from and updated_at <= :to", params).once.and_return([]) + it "generates an XML manifest file with the expected content within the ZIP file" do + expected_content = replace_record_number(local_manifest_file.read, 2) + expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content| + entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename) + expect(entry).not_to be_nil + expect(entry.get_input_stream.read).to eq(expected_content) + end + export_service.export_case_logs end - context "when this is the first export" do - it "gets the logs for the timeframe up until the current time" do - params = { to: time_now } - allow(CaseLog).to receive(:where).with("updated_at <= :to", params).once.and_return([]) + it "creates a logs export record in a database with correct time" do + expect { export_service.export_case_logs } + .to change(LogsExport, :count).by(1) + expect(LogsExport.last.started_at).to eq(start_time) + end + + context "when this is the first export (full)" do + it "records a ZIP archive in the master manifest (existing case logs)" do + expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) do |_, csv_content| + csv = CSV.parse(csv_content, headers: true) + expect(csv&.count).to be > 0 + end + + export_service.export_case_logs + end + end + + context "when this is a second export (partial)" do + before do + start_time = Time.zone.local(2022, 4, 1) + LogsExport.new(started_at: start_time).save! + end + + it "does not add any entry in the master manifest (no case logs)" do + expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content| + csv = CSV.parse(csv_content, headers: true) + expect(csv&.count).to eq(0) + end export_service.export_case_logs end end end - context "and a previous export has run the same day" do + context "and a previous export has run the same day with logs" do before do + FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1)) export_service.export_case_logs end it "increments the master manifest number by 1" do - expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename2, any_args) + expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) export_service.export_case_logs end + + context "and we trigger another full update" do + it "increments the base number" do + export_service.export_case_logs(full_update: true) + expect(LogsExport.last.base_number).to eq(2) + end + + it "resets the increment number" do + export_service.export_case_logs(full_update: true) + expect(LogsExport.last.increment_number).to eq(1) + end + + it "records a ZIP archive in the master manifest (existing case logs)" do + expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content| + csv = CSV.parse(csv_content, headers: true) + expect(csv&.count).to be > 0 + end + export_service.export_case_logs(full_update: true) + end + + it "generates a ZIP export file with the expected filename" do + expect(storage_service).to receive(:write_file).with("core_2021_2022_jan_mar_f0002_inc0001.zip", any_args) + export_service.export_case_logs(full_update: true) + end + end end - context "when export has an error" do + context "and the export has an error" do + before { allow(storage_service).to receive(:write_file).and_raise(StandardError.new("This is an exception")) } + it "does not save a record in the database" do - allow(storage_service).to receive(:write_file).and_raise(StandardError.new("This is an exception")) - export = LogsExport.new - allow(LogsExport).to receive(:new).and_return(export) - expect(export).not_to receive(:save!) - expect { export_service.export_case_logs }.to raise_error(StandardError) + expect { export_service.export_case_logs } + .to raise_error(StandardError) + .and(change(LogsExport, :count).by(0)) end end end