diff --git a/app/services/exports/case_log_export_service.rb b/app/services/exports/case_log_export_service.rb
index e8aca786e..4844e85cb 100644
--- a/app/services/exports/case_log_export_service.rb
+++ b/app/services/exports/case_log_export_service.rb
@@ -1,16 +1,27 @@
module Exports
class CaseLogExportService
+ QUARTERS = {
+ 0 => "jan_mar",
+ 1 => "apr_jun",
+ 2 => "jul_sep",
+ 3 => "oct_dec",
+ }.freeze
+
+ LOG_ID_OFFSET = 300_000_000_000
+ MAX_XML_RECORDS = 10_000
+
def initialize(storage_service, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
end
- def export_case_logs
- current_time = Time.zone.now
- case_logs = retrieve_case_logs(current_time)
- export = save_export_run(current_time)
- write_master_manifest(export)
- write_export_data(case_logs)
+ def export_case_logs(full_update: false)
+ start_time = Time.zone.now
+ case_logs = retrieve_case_logs(start_time, full_update)
+ export = build_export_run(start_time, full_update)
+ daily_run = get_daily_run_number
+ archive_datetimes = write_export_archive(export, case_logs)
+ write_master_manifest(daily_run, archive_datetimes)
export.save!
end
@@ -20,57 +31,127 @@ module Exports
field_name.starts_with?("details_known_") || pattern_age.match(field_name) || omitted_attrs.include?(field_name) ? true : false
end
- LOG_ID_OFFSET = 300_000_000_000
-
private
- def save_export_run(current_time)
+ def get_daily_run_number
today = Time.zone.today
- last_daily_run_number = LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).maximum(:daily_run_number)
- last_daily_run_number = 0 if last_daily_run_number.nil?
+ LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).count + 1
+ end
+
+ def build_export_run(current_time, full_update)
+ if LogsExport.count.zero?
+ return LogsExport.new(started_at: current_time)
+ end
- export = LogsExport.new
- export.daily_run_number = last_daily_run_number + 1
- export.started_at = current_time
- export
+ base_number = LogsExport.maximum(:base_number)
+ increment_number = LogsExport.where(base_number:).maximum(:increment_number)
+
+ if full_update
+ base_number += 1
+ increment_number = 1
+ else
+ increment_number += 1
+ end
+
+ LogsExport.new(started_at: current_time, base_number:, increment_number:)
end
- def write_master_manifest(export)
+ def write_master_manifest(daily_run, archive_datetimes)
today = Time.zone.today
- increment_number = export.daily_run_number.to_s.rjust(4, "0")
+ increment_number = daily_run.to_s.rjust(4, "0")
month = today.month.to_s.rjust(2, "0")
day = today.day.to_s.rjust(2, "0")
file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv"
- string_io = build_manifest_csv_io
+ string_io = build_manifest_csv_io(archive_datetimes)
@storage_service.write_file(file_path, string_io)
end
- def write_export_data(case_logs)
- string_io = build_export_xml_io(case_logs)
- file_path = "#{get_folder_name}/#{get_file_name}.xml"
- @storage_service.write_file(file_path, string_io)
+ def get_archive_name(case_log, base_number, increment)
+ return unless case_log.startdate
+
+ collection_start = case_log.collection_start_year
+ month = case_log.startdate.month
+ quarter = QUARTERS[(month - 1) / 3]
+ base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
+ increment_str = "inc#{increment.to_s.rjust(4, '0')}"
+ "core_#{collection_start}_#{collection_start + 1}_#{quarter}_#{base_number_str}_#{increment_str}"
+ end
+
+ def write_export_archive(export, case_logs)
+ # Order case logs per archive
+ case_logs_per_archive = {}
+ case_logs.each do |case_log|
+ archive = get_archive_name(case_log, export.base_number, export.increment_number)
+ next unless archive
+
+ if case_logs_per_archive.key?(archive)
+ case_logs_per_archive[archive] << case_log
+ else
+ case_logs_per_archive[archive] = [case_log]
+ end
+ end
+
+ # Write all archives
+ archive_datetimes = {}
+ case_logs_per_archive.each do |archive, case_logs_to_export|
+ manifest_xml = build_manifest_xml(case_logs_to_export.count)
+ zip_io = Zip::File.open_buffer(StringIO.new)
+ zip_io.add("manifest.xml", manifest_xml)
+
+ part_number = 1
+ case_logs_to_export.each_slice(MAX_XML_RECORDS) do |case_logs_slice|
+ data_xml = build_export_xml(case_logs_slice)
+ part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
+ zip_io.add("#{archive}_#{part_number_str}.xml", data_xml)
+ part_number += 1
+ end
+
+ @storage_service.write_file("#{archive}.zip", zip_io.write_buffer)
+ archive_datetimes[archive] = Time.zone.now
+ end
+
+ archive_datetimes
end
- def retrieve_case_logs(current_time)
+ def retrieve_case_logs(start_time, full_update)
recent_export = LogsExport.order("started_at").last
- if recent_export
- params = { from: recent_export.started_at, to: current_time }
+ if !full_update && recent_export
+ params = { from: recent_export.started_at, to: start_time }
CaseLog.where("updated_at >= :from and updated_at <= :to", params)
else
- params = { to: current_time }
+ params = { to: start_time }
CaseLog.where("updated_at <= :to", params)
end
end
- def build_manifest_csv_io
+ def build_manifest_csv_io(archive_datetimes)
headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"]
csv_string = CSV.generate do |csv|
csv << headers
+ archive_datetimes.each do |archive, datetime|
+ csv << [archive, datetime, "#{archive}.zip"]
+ end
end
StringIO.new(csv_string)
end
- def build_export_xml_io(case_logs)
+ def xml_doc_to_temp_file(xml_doc)
+ file = Tempfile.new
+ xml_doc.write_xml_to(file, encoding: "UTF-8")
+ file.rewind
+ file
+ end
+
+ def build_manifest_xml(record_number)
+ doc = Nokogiri::XML("")
+ doc.at("report") << doc.create_element("form-data-summary")
+ doc.at("form-data-summary") << doc.create_element("records")
+ doc.at("records") << doc.create_element("count-of-records", record_number)
+
+ xml_doc_to_temp_file(doc)
+ end
+
+ def build_export_xml(case_logs)
doc = Nokogiri::XML("")
case_logs.each do |case_log|
@@ -87,23 +168,8 @@ module Exports
end
form << doc.create_element("providertype", case_log.owning_organisation.read_attribute_before_type_cast(:provider_type))
end
- doc.write_xml_to(StringIO.new, encoding: "UTF-8")
- end
-
- def get_folder_name
- "core_#{day_as_string}"
- end
-
- def get_file_name
- "dat_core_#{day_as_string}_#{increment_as_string}"
- end
-
- def day_as_string
- Time.current.strftime("%Y_%m_%d")
- end
- def increment_as_string(increment = 1)
- sprintf("%04d", increment)
+ xml_doc_to_temp_file(doc)
end
end
end
diff --git a/db/migrate/20220518115438_add_missing_export_fields.rb b/db/migrate/20220518115438_add_missing_export_fields.rb
new file mode 100644
index 000000000..0b7203ac9
--- /dev/null
+++ b/db/migrate/20220518115438_add_missing_export_fields.rb
@@ -0,0 +1,10 @@
+class AddMissingExportFields < ActiveRecord::Migration[7.0]
+ def change
+ change_table :logs_exports, bulk: true do |t|
+ t.column :base_number, :integer, default: 1, null: false
+ t.column :increment_number, :integer, default: 1, null: false
+ t.remove :daily_run_number, type: :integer
+ t.change_null :started_at, false
+ end
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index be67fb5f8..a65c0df64 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[7.0].define(version: 2022_05_16_111514) do
+ActiveRecord::Schema[7.0].define(version: 2022_05_18_115438) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -259,9 +259,10 @@ ActiveRecord::Schema[7.0].define(version: 2022_05_16_111514) do
end
create_table "logs_exports", force: :cascade do |t|
- t.integer "daily_run_number"
t.datetime "created_at", default: -> { "CURRENT_TIMESTAMP" }
- t.datetime "started_at"
+ t.datetime "started_at", precision: nil, null: false
+ t.integer "base_number", default: 1, null: false
+ t.integer "increment_number", default: 1, null: false
end
create_table "organisation_las", force: :cascade do |t|
diff --git a/lib/tasks/data_export.rake b/lib/tasks/data_export.rake
index 748a3009e..7e4f766c2 100644
--- a/lib/tasks/data_export.rake
+++ b/lib/tasks/data_export.rake
@@ -1,7 +1,8 @@
namespace :core do
desc "Export data XMLs for import into Central Data System (CDS)"
- task data_export: :environment do
+ task :data_export, %i[full_update] => :environment do |_task, args|
storage_service = StorageService.new(PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"])
- Exports::CaseLogExportService.new(storage_service).export_case_logs
+ full_update = args[:full_update].present? && args[:full_update] == "true"
+ Exports::CaseLogExportService.new(storage_service).export_case_logs(full_update:)
end
end
diff --git a/spec/fixtures/exports/manifest.xml b/spec/fixtures/exports/manifest.xml
new file mode 100644
index 000000000..8c171ae01
--- /dev/null
+++ b/spec/fixtures/exports/manifest.xml
@@ -0,0 +1,8 @@
+
+
+
+
+ {recno}
+
+
+
diff --git a/spec/services/exports/case_log_export_service_spec.rb b/spec/services/exports/case_log_export_service_spec.rb
index 3f4007c70..fd5c1704c 100644
--- a/spec/services/exports/case_log_export_service_spec.rb
+++ b/spec/services/exports/case_log_export_service_spec.rb
@@ -3,29 +3,32 @@ require "rails_helper"
RSpec.describe Exports::CaseLogExportService do
let(:storage_service) { instance_double(StorageService) }
- let(:export_filepath) { "spec/fixtures/exports/case_logs.xml" }
- let(:export_file) { File.open(export_filepath, "r:UTF-8") }
+ let(:export_file) { File.open("spec/fixtures/exports/case_logs.xml", "r:UTF-8") }
+ let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") }
- let(:expected_data_filename) { "core_2022_02_08/dat_core_2022_02_08_0001.xml" }
- let(:expected_master_manifest_filename) { "Manifest_2022_02_08_0001.csv" }
- let(:expected_master_manifest_filename2) { "Manifest_2022_02_08_0002.csv" }
+ let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" }
+ let(:expected_master_manifest_rerun) { "Manifest_2022_05_01_0002.csv" }
+ let(:expected_zip_filename) { "core_2021_2022_jan_mar_f0001_inc0001.zip" }
+ let(:expected_manifest_filename) { "manifest.xml" }
- let(:case_log) { FactoryBot.create(:case_log, :completed) }
-
- def replace_entity_ids(export_template)
+ def replace_entity_ids(case_log, export_template)
export_template.sub!(/\{id\}/, (case_log["id"] + Exports::CaseLogExportService::LOG_ID_OFFSET).to_s)
export_template.sub!(/\{owning_org_id\}/, case_log["owning_organisation_id"].to_s)
export_template.sub!(/\{managing_org_id\}/, case_log["managing_organisation_id"].to_s)
export_template.sub!(/\{created_by_id\}/, case_log["created_by_id"].to_s)
end
+ def replace_record_number(export_template, record_number)
+ export_template.sub!(/\{recno\}/, record_number.to_s)
+ end
+
context "when exporting daily case logs" do
subject(:export_service) { described_class.new(storage_service) }
- let(:case_log) { FactoryBot.create(:case_log, :completed) }
+ let(:start_time) { Time.zone.local(2022, 5, 1) }
before do
- Timecop.freeze(case_log.updated_at)
+ Timecop.freeze(start_time)
allow(storage_service).to receive(:write_file)
end
@@ -45,75 +48,168 @@ RSpec.describe Exports::CaseLogExportService do
end
end
- context "and case logs are available for export" do
- let(:time_now) { Time.zone.now }
+ context "and one case log is available for export" do
+ let!(:case_log) { FactoryBot.create(:case_log, :completed) }
+ let(:expected_data_filename) { "core_2021_2022_jan_mar_f0001_inc0001_pt001.xml" }
- before do
- Timecop.freeze(time_now)
- case_log
+ it "generates a ZIP export file with the expected filename" do
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
+ export_service.export_case_logs
end
- after do
- LogsExport.destroy_all
+ it "generates an XML manifest file with the expected filename within the ZIP file" do
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
+ expect(entry).not_to be_nil
+ expect(entry.name).to eq(expected_manifest_filename)
+ end
+ export_service.export_case_logs
end
- it "generates an XML export file with the expected filename" do
- expect(storage_service).to receive(:write_file).with(expected_data_filename, any_args)
+ it "generates an XML export file with the expected filename within the ZIP file" do
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
+ expect(entry).not_to be_nil
+ expect(entry.name).to eq(expected_data_filename)
+ end
export_service.export_case_logs
end
- it "generates an XML export file with the expected content" do
- actual_content = nil
- expected_content = replace_entity_ids(export_file.read)
- allow(storage_service).to receive(:write_file).with(expected_data_filename, any_args) { |_, arg2| actual_content = arg2&.string }
+ it "generates an XML manifest file with the expected content within the ZIP file" do
+ expected_content = replace_record_number(local_manifest_file.read, 1)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
export_service.export_case_logs
- expect(actual_content).to eq(expected_content)
end
- it "creates a logs export record in a database with correct time" do
+ it "generates an XML export file with the expected content within the ZIP file" do
+ expected_content = replace_entity_ids(case_log, export_file.read)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
+
export_service.export_case_logs
- records_from_db = ActiveRecord::Base.connection.execute("select started_at, id from logs_exports ").to_a
- expect(records_from_db[0]["started_at"]).to eq(time_now)
- expect(records_from_db.count).to eq(1)
+ end
+ end
+
+ context "and multiple case logs are available for export on different periods" do
+ let(:expected_zip_filename2) { "core_2022_2023_apr_jun_f0001_inc0001.zip" }
+
+ before do
+ FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1))
+ FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 4, 1))
+ end
+
+ context "when case logs are across multiple quarters" do
+ it "generates multiple ZIP export files with the expected filenames" do
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
+
+ export_service.export_case_logs
+ end
+ end
+ end
+
+ context "and multiple case logs are available for export on same quarter" do
+ before do
+ FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1))
+ FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 3, 20))
end
- it "gets the logs for correct timeframe" do
- start_time = Time.zone.local(2022, 4, 13, 2, 2, 2)
- export = LogsExport.new(started_at: start_time, daily_run_number: 1)
- export.save!
- params = { from: start_time, to: time_now }
- allow(CaseLog).to receive(:where).with("updated_at >= :from and updated_at <= :to", params).once.and_return([])
+ it "generates an XML manifest file with the expected content within the ZIP file" do
+ expected_content = replace_record_number(local_manifest_file.read, 2)
+ expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
+ entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
+ expect(entry).not_to be_nil
+ expect(entry.get_input_stream.read).to eq(expected_content)
+ end
+
export_service.export_case_logs
end
- context "when this is the first export" do
- it "gets the logs for the timeframe up until the current time" do
- params = { to: time_now }
- allow(CaseLog).to receive(:where).with("updated_at <= :to", params).once.and_return([])
+ it "creates a logs export record in a database with correct time" do
+ expect { export_service.export_case_logs }
+ .to change(LogsExport, :count).by(1)
+ expect(LogsExport.last.started_at).to eq(start_time)
+ end
+
+ context "when this is the first export (full)" do
+ it "records a ZIP archive in the master manifest (existing case logs)" do
+ expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) do |_, csv_content|
+ csv = CSV.parse(csv_content, headers: true)
+ expect(csv&.count).to be > 0
+ end
+
+ export_service.export_case_logs
+ end
+ end
+
+ context "when this is a second export (partial)" do
+ before do
+ start_time = Time.zone.local(2022, 4, 1)
+ LogsExport.new(started_at: start_time).save!
+ end
+
+ it "does not add any entry in the master manifest (no case logs)" do
+ expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content|
+ csv = CSV.parse(csv_content, headers: true)
+ expect(csv&.count).to eq(0)
+ end
export_service.export_case_logs
end
end
end
- context "and a previous export has run the same day" do
+ context "and a previous export has run the same day with logs" do
before do
+ FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1))
export_service.export_case_logs
end
it "increments the master manifest number by 1" do
- expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename2, any_args)
+ expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args)
export_service.export_case_logs
end
+
+ context "and we trigger another full update" do
+ it "increments the base number" do
+ export_service.export_case_logs(full_update: true)
+ expect(LogsExport.last.base_number).to eq(2)
+ end
+
+ it "resets the increment number" do
+ export_service.export_case_logs(full_update: true)
+ expect(LogsExport.last.increment_number).to eq(1)
+ end
+
+ it "records a ZIP archive in the master manifest (existing case logs)" do
+ expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content|
+ csv = CSV.parse(csv_content, headers: true)
+ expect(csv&.count).to be > 0
+ end
+ export_service.export_case_logs(full_update: true)
+ end
+
+ it "generates a ZIP export file with the expected filename" do
+ expect(storage_service).to receive(:write_file).with("core_2021_2022_jan_mar_f0002_inc0001.zip", any_args)
+ export_service.export_case_logs(full_update: true)
+ end
+ end
end
- context "when export has an error" do
+ context "and the export has an error" do
+ before { allow(storage_service).to receive(:write_file).and_raise(StandardError.new("This is an exception")) }
+
it "does not save a record in the database" do
- allow(storage_service).to receive(:write_file).and_raise(StandardError.new("This is an exception"))
- export = LogsExport.new
- allow(LogsExport).to receive(:new).and_return(export)
- expect(export).not_to receive(:save!)
- expect { export_service.export_case_logs }.to raise_error(StandardError)
+ expect { export_service.export_case_logs }
+ .to raise_error(StandardError)
+ .and(change(LogsExport, :count).by(0))
end
end
end