Browse Source

CLDC-1118: Implement data export structure (CDS) (#587)

Co-authored-by: Dushan Despotovic <dushan@madetech.com>
pull/619/head
Stéphane Meny 3 years ago committed by baarkerlounger
parent
commit
f31c2be851
  1. 154
      app/services/exports/case_log_export_service.rb
  2. 10
      db/migrate/20220518115438_add_missing_export_fields.rb
  3. 7
      db/schema.rb
  4. 5
      lib/tasks/data_export.rake
  5. 8
      spec/fixtures/exports/manifest.xml
  6. 188
      spec/services/exports/case_log_export_service_spec.rb

154
app/services/exports/case_log_export_service.rb

@ -1,16 +1,27 @@
module Exports
class CaseLogExportService
QUARTERS = {
0 => "jan_mar",
1 => "apr_jun",
2 => "jul_sep",
3 => "oct_dec",
}.freeze
LOG_ID_OFFSET = 300_000_000_000
MAX_XML_RECORDS = 10_000
def initialize(storage_service, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
end
def export_case_logs
current_time = Time.zone.now
case_logs = retrieve_case_logs(current_time)
export = save_export_run(current_time)
write_master_manifest(export)
write_export_data(case_logs)
def export_case_logs(full_update: false)
start_time = Time.zone.now
case_logs = retrieve_case_logs(start_time, full_update)
export = build_export_run(start_time, full_update)
daily_run = get_daily_run_number
archive_datetimes = write_export_archive(export, case_logs)
write_master_manifest(daily_run, archive_datetimes)
export.save!
end
@ -20,57 +31,127 @@ module Exports
field_name.starts_with?("details_known_") || pattern_age.match(field_name) || omitted_attrs.include?(field_name) ? true : false
end
LOG_ID_OFFSET = 300_000_000_000
private
def save_export_run(current_time)
def get_daily_run_number
today = Time.zone.today
last_daily_run_number = LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).maximum(:daily_run_number)
last_daily_run_number = 0 if last_daily_run_number.nil?
LogsExport.where(created_at: today.beginning_of_day..today.end_of_day).count + 1
end
def build_export_run(current_time, full_update)
if LogsExport.count.zero?
return LogsExport.new(started_at: current_time)
end
export = LogsExport.new
export.daily_run_number = last_daily_run_number + 1
export.started_at = current_time
export
base_number = LogsExport.maximum(:base_number)
increment_number = LogsExport.where(base_number:).maximum(:increment_number)
if full_update
base_number += 1
increment_number = 1
else
increment_number += 1
end
LogsExport.new(started_at: current_time, base_number:, increment_number:)
end
def write_master_manifest(export)
def write_master_manifest(daily_run, archive_datetimes)
today = Time.zone.today
increment_number = export.daily_run_number.to_s.rjust(4, "0")
increment_number = daily_run.to_s.rjust(4, "0")
month = today.month.to_s.rjust(2, "0")
day = today.day.to_s.rjust(2, "0")
file_path = "Manifest_#{today.year}_#{month}_#{day}_#{increment_number}.csv"
string_io = build_manifest_csv_io
string_io = build_manifest_csv_io(archive_datetimes)
@storage_service.write_file(file_path, string_io)
end
def write_export_data(case_logs)
string_io = build_export_xml_io(case_logs)
file_path = "#{get_folder_name}/#{get_file_name}.xml"
@storage_service.write_file(file_path, string_io)
def get_archive_name(case_log, base_number, increment)
return unless case_log.startdate
collection_start = case_log.collection_start_year
month = case_log.startdate.month
quarter = QUARTERS[(month - 1) / 3]
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"core_#{collection_start}_#{collection_start + 1}_#{quarter}_#{base_number_str}_#{increment_str}"
end
def write_export_archive(export, case_logs)
# Order case logs per archive
case_logs_per_archive = {}
case_logs.each do |case_log|
archive = get_archive_name(case_log, export.base_number, export.increment_number)
next unless archive
if case_logs_per_archive.key?(archive)
case_logs_per_archive[archive] << case_log
else
case_logs_per_archive[archive] = [case_log]
end
end
# Write all archives
archive_datetimes = {}
case_logs_per_archive.each do |archive, case_logs_to_export|
manifest_xml = build_manifest_xml(case_logs_to_export.count)
zip_io = Zip::File.open_buffer(StringIO.new)
zip_io.add("manifest.xml", manifest_xml)
part_number = 1
case_logs_to_export.each_slice(MAX_XML_RECORDS) do |case_logs_slice|
data_xml = build_export_xml(case_logs_slice)
part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
zip_io.add("#{archive}_#{part_number_str}.xml", data_xml)
part_number += 1
end
@storage_service.write_file("#{archive}.zip", zip_io.write_buffer)
archive_datetimes[archive] = Time.zone.now
end
archive_datetimes
end
def retrieve_case_logs(current_time)
def retrieve_case_logs(start_time, full_update)
recent_export = LogsExport.order("started_at").last
if recent_export
params = { from: recent_export.started_at, to: current_time }
if !full_update && recent_export
params = { from: recent_export.started_at, to: start_time }
CaseLog.where("updated_at >= :from and updated_at <= :to", params)
else
params = { to: current_time }
params = { to: start_time }
CaseLog.where("updated_at <= :to", params)
end
end
def build_manifest_csv_io
def build_manifest_csv_io(archive_datetimes)
headers = ["zip-name", "date-time zipped folder generated", "zip-file-uri"]
csv_string = CSV.generate do |csv|
csv << headers
archive_datetimes.each do |archive, datetime|
csv << [archive, datetime, "#{archive}.zip"]
end
end
StringIO.new(csv_string)
end
def build_export_xml_io(case_logs)
def xml_doc_to_temp_file(xml_doc)
file = Tempfile.new
xml_doc.write_xml_to(file, encoding: "UTF-8")
file.rewind
file
end
def build_manifest_xml(record_number)
doc = Nokogiri::XML("<report/>")
doc.at("report") << doc.create_element("form-data-summary")
doc.at("form-data-summary") << doc.create_element("records")
doc.at("records") << doc.create_element("count-of-records", record_number)
xml_doc_to_temp_file(doc)
end
def build_export_xml(case_logs)
doc = Nokogiri::XML("<forms/>")
case_logs.each do |case_log|
@ -87,23 +168,8 @@ module Exports
end
form << doc.create_element("providertype", case_log.owning_organisation.read_attribute_before_type_cast(:provider_type))
end
doc.write_xml_to(StringIO.new, encoding: "UTF-8")
end
def get_folder_name
"core_#{day_as_string}"
end
def get_file_name
"dat_core_#{day_as_string}_#{increment_as_string}"
end
def day_as_string
Time.current.strftime("%Y_%m_%d")
end
def increment_as_string(increment = 1)
sprintf("%04d", increment)
xml_doc_to_temp_file(doc)
end
end
end

10
db/migrate/20220518115438_add_missing_export_fields.rb

@ -0,0 +1,10 @@
class AddMissingExportFields < ActiveRecord::Migration[7.0]
def change
change_table :logs_exports, bulk: true do |t|
t.column :base_number, :integer, default: 1, null: false
t.column :increment_number, :integer, default: 1, null: false
t.remove :daily_run_number, type: :integer
t.change_null :started_at, false
end
end
end

7
db/schema.rb

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2022_05_16_111514) do
ActiveRecord::Schema[7.0].define(version: 2022_05_18_115438) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@ -259,9 +259,10 @@ ActiveRecord::Schema[7.0].define(version: 2022_05_16_111514) do
end
create_table "logs_exports", force: :cascade do |t|
t.integer "daily_run_number"
t.datetime "created_at", default: -> { "CURRENT_TIMESTAMP" }
t.datetime "started_at"
t.datetime "started_at", precision: nil, null: false
t.integer "base_number", default: 1, null: false
t.integer "increment_number", default: 1, null: false
end
create_table "organisation_las", force: :cascade do |t|

5
lib/tasks/data_export.rake

@ -1,7 +1,8 @@
namespace :core do
desc "Export data XMLs for import into Central Data System (CDS)"
task data_export: :environment do
task :data_export, %i[full_update] => :environment do |_task, args|
storage_service = StorageService.new(PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"])
Exports::CaseLogExportService.new(storage_service).export_case_logs
full_update = args[:full_update].present? && args[:full_update] == "true"
Exports::CaseLogExportService.new(storage_service).export_case_logs(full_update:)
end
end

8
spec/fixtures/exports/manifest.xml vendored

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<report>
<form-data-summary>
<records>
<count-of-records>{recno}</count-of-records>
</records>
</form-data-summary>
</report>

188
spec/services/exports/case_log_export_service_spec.rb

@ -3,29 +3,32 @@ require "rails_helper"
RSpec.describe Exports::CaseLogExportService do
let(:storage_service) { instance_double(StorageService) }
let(:export_filepath) { "spec/fixtures/exports/case_logs.xml" }
let(:export_file) { File.open(export_filepath, "r:UTF-8") }
let(:export_file) { File.open("spec/fixtures/exports/case_logs.xml", "r:UTF-8") }
let(:local_manifest_file) { File.open("spec/fixtures/exports/manifest.xml", "r:UTF-8") }
let(:expected_data_filename) { "core_2022_02_08/dat_core_2022_02_08_0001.xml" }
let(:expected_master_manifest_filename) { "Manifest_2022_02_08_0001.csv" }
let(:expected_master_manifest_filename2) { "Manifest_2022_02_08_0002.csv" }
let(:expected_master_manifest_filename) { "Manifest_2022_05_01_0001.csv" }
let(:expected_master_manifest_rerun) { "Manifest_2022_05_01_0002.csv" }
let(:expected_zip_filename) { "core_2021_2022_jan_mar_f0001_inc0001.zip" }
let(:expected_manifest_filename) { "manifest.xml" }
let(:case_log) { FactoryBot.create(:case_log, :completed) }
def replace_entity_ids(export_template)
def replace_entity_ids(case_log, export_template)
export_template.sub!(/\{id\}/, (case_log["id"] + Exports::CaseLogExportService::LOG_ID_OFFSET).to_s)
export_template.sub!(/\{owning_org_id\}/, case_log["owning_organisation_id"].to_s)
export_template.sub!(/\{managing_org_id\}/, case_log["managing_organisation_id"].to_s)
export_template.sub!(/\{created_by_id\}/, case_log["created_by_id"].to_s)
end
def replace_record_number(export_template, record_number)
export_template.sub!(/\{recno\}/, record_number.to_s)
end
context "when exporting daily case logs" do
subject(:export_service) { described_class.new(storage_service) }
let(:case_log) { FactoryBot.create(:case_log, :completed) }
let(:start_time) { Time.zone.local(2022, 5, 1) }
before do
Timecop.freeze(case_log.updated_at)
Timecop.freeze(start_time)
allow(storage_service).to receive(:write_file)
end
@ -45,75 +48,168 @@ RSpec.describe Exports::CaseLogExportService do
end
end
context "and case logs are available for export" do
let(:time_now) { Time.zone.now }
context "and one case log is available for export" do
let!(:case_log) { FactoryBot.create(:case_log, :completed) }
let(:expected_data_filename) { "core_2021_2022_jan_mar_f0001_inc0001_pt001.xml" }
before do
Timecop.freeze(time_now)
case_log
it "generates a ZIP export file with the expected filename" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
export_service.export_case_logs
end
after do
LogsExport.destroy_all
it "generates an XML manifest file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.name).to eq(expected_manifest_filename)
end
export_service.export_case_logs
end
it "generates an XML export file with the expected filename" do
expect(storage_service).to receive(:write_file).with(expected_data_filename, any_args)
it "generates an XML export file with the expected filename within the ZIP file" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
expect(entry).not_to be_nil
expect(entry.name).to eq(expected_data_filename)
end
export_service.export_case_logs
end
it "generates an XML export file with the expected content" do
actual_content = nil
expected_content = replace_entity_ids(export_file.read)
allow(storage_service).to receive(:write_file).with(expected_data_filename, any_args) { |_, arg2| actual_content = arg2&.string }
it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 1)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_case_logs
expect(actual_content).to eq(expected_content)
end
it "creates a logs export record in a database with correct time" do
it "generates an XML export file with the expected content within the ZIP file" do
expected_content = replace_entity_ids(case_log, export_file.read)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_data_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_case_logs
records_from_db = ActiveRecord::Base.connection.execute("select started_at, id from logs_exports ").to_a
expect(records_from_db[0]["started_at"]).to eq(time_now)
expect(records_from_db.count).to eq(1)
end
end
context "and multiple case logs are available for export on different periods" do
let(:expected_zip_filename2) { "core_2022_2023_apr_jun_f0001_inc0001.zip" }
before do
FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1))
FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 4, 1))
end
context "when case logs are across multiple quarters" do
it "generates multiple ZIP export files with the expected filenames" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
export_service.export_case_logs
end
end
end
context "and multiple case logs are available for export on same quarter" do
before do
FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1))
FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 3, 20))
end
it "gets the logs for correct timeframe" do
start_time = Time.zone.local(2022, 4, 13, 2, 2, 2)
export = LogsExport.new(started_at: start_time, daily_run_number: 1)
export.save!
params = { from: start_time, to: time_now }
allow(CaseLog).to receive(:where).with("updated_at >= :from and updated_at <= :to", params).once.and_return([])
it "generates an XML manifest file with the expected content within the ZIP file" do
expected_content = replace_record_number(local_manifest_file.read, 2)
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args) do |_, content|
entry = Zip::File.open_buffer(content).find_entry(expected_manifest_filename)
expect(entry).not_to be_nil
expect(entry.get_input_stream.read).to eq(expected_content)
end
export_service.export_case_logs
end
context "when this is the first export" do
it "gets the logs for the timeframe up until the current time" do
params = { to: time_now }
allow(CaseLog).to receive(:where).with("updated_at <= :to", params).once.and_return([])
it "creates a logs export record in a database with correct time" do
expect { export_service.export_case_logs }
.to change(LogsExport, :count).by(1)
expect(LogsExport.last.started_at).to eq(start_time)
end
context "when this is the first export (full)" do
it "records a ZIP archive in the master manifest (existing case logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename, any_args) do |_, csv_content|
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to be > 0
end
export_service.export_case_logs
end
end
context "when this is a second export (partial)" do
before do
start_time = Time.zone.local(2022, 4, 1)
LogsExport.new(started_at: start_time).save!
end
it "does not add any entry in the master manifest (no case logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content|
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to eq(0)
end
export_service.export_case_logs
end
end
end
context "and a previous export has run the same day" do
context "and a previous export has run the same day with logs" do
before do
FactoryBot.create(:case_log, startdate: Time.zone.local(2022, 2, 1))
export_service.export_case_logs
end
it "increments the master manifest number by 1" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_filename2, any_args)
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args)
export_service.export_case_logs
end
context "and we trigger another full update" do
it "increments the base number" do
export_service.export_case_logs(full_update: true)
expect(LogsExport.last.base_number).to eq(2)
end
it "resets the increment number" do
export_service.export_case_logs(full_update: true)
expect(LogsExport.last.increment_number).to eq(1)
end
it "records a ZIP archive in the master manifest (existing case logs)" do
expect(storage_service).to receive(:write_file).with(expected_master_manifest_rerun, any_args) do |_, csv_content|
csv = CSV.parse(csv_content, headers: true)
expect(csv&.count).to be > 0
end
export_service.export_case_logs(full_update: true)
end
it "generates a ZIP export file with the expected filename" do
expect(storage_service).to receive(:write_file).with("core_2021_2022_jan_mar_f0002_inc0001.zip", any_args)
export_service.export_case_logs(full_update: true)
end
end
end
context "when export has an error" do
context "and the export has an error" do
before { allow(storage_service).to receive(:write_file).and_raise(StandardError.new("This is an exception")) }
it "does not save a record in the database" do
allow(storage_service).to receive(:write_file).and_raise(StandardError.new("This is an exception"))
export = LogsExport.new
allow(LogsExport).to receive(:new).and_return(export)
expect(export).not_to receive(:save!)
expect { export_service.export_case_logs }.to raise_error(StandardError)
expect { export_service.export_case_logs }
.to raise_error(StandardError)
.and(change(LogsExport, :count).by(0))
end
end
end

Loading…
Cancel
Save