From 84177349f0d13b12463a07cfdf953c37d42df075 Mon Sep 17 00:00:00 2001
From: kosiakkatrina <54268893+kosiakkatrina@users.noreply.github.com>
Date: Wed, 4 Dec 2024 12:21:09 +0000
Subject: [PATCH] Refactor exports to have years (#2849)
* Refactor exports to have years
* Update existing exports
* Fix flaky test
* Remove a comment
---
app/models/export.rb | 3 ++
app/services/exports/export_service.rb | 10 ++---
.../exports/lettings_log_export_service.rb | 22 +++++-----
.../exports/organisation_export_service.rb | 12 +++---
app/services/exports/user_export_service.rb | 12 +++---
app/services/exports/xml_export_service.rb | 20 ++++-----
.../20241204100518_add_year_to_export.rb | 5 +++
db/schema.rb | 3 +-
lib/tasks/data_export.rake | 6 +--
lib/tasks/set_export_collection_years.rake | 8 ++++
spec/helpers/tab_nav_helper_spec.rb | 2 +-
spec/lib/tasks/data_export_spec.rb | 6 +--
.../tasks/set_export_collection_years_spec.rb | 41 +++++++++++++++++++
.../lettings_log_export_service_spec.rb | 18 ++++----
14 files changed, 111 insertions(+), 57 deletions(-)
create mode 100644 db/migrate/20241204100518_add_year_to_export.rb
create mode 100644 lib/tasks/set_export_collection_years.rake
create mode 100644 spec/lib/tasks/set_export_collection_years_spec.rb
diff --git a/app/models/export.rb b/app/models/export.rb
index 2ee04fe3d..27e574c72 100644
--- a/app/models/export.rb
+++ b/app/models/export.rb
@@ -1,2 +1,5 @@
class Export < ApplicationRecord
+ scope :lettings, -> { where(collection: "lettings") }
+ scope :organisations, -> { where(collection: "organisations") }
+ scope :users, -> { where(collection: "users") }
end
diff --git a/app/services/exports/export_service.rb b/app/services/exports/export_service.rb
index bf98a4edf..40c10055b 100644
--- a/app/services/exports/export_service.rb
+++ b/app/services/exports/export_service.rb
@@ -7,7 +7,7 @@ module Exports
@logger = logger
end
- def export_xml(full_update: false, collection: nil)
+ def export_xml(full_update: false, collection: nil, year: nil)
start_time = Time.zone.now
daily_run_number = get_daily_run_number
lettings_archives_for_manifest = {}
@@ -21,12 +21,12 @@ module Exports
when "organisations"
organisations_archives_for_manifest = get_organisation_archives(start_time, full_update)
else
- lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, collection)
+ lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, year)
end
else
users_archives_for_manifest = get_user_archives(start_time, full_update)
organisations_archives_for_manifest = get_organisation_archives(start_time, full_update)
- lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, collection)
+ lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, year)
end
write_master_manifest(daily_run_number, lettings_archives_for_manifest.merge(users_archives_for_manifest).merge(organisations_archives_for_manifest))
@@ -70,9 +70,9 @@ module Exports
organisations_export_service.export_xml_organisations(full_update:)
end
- def get_lettings_archives(start_time, full_update, collection)
+ def get_lettings_archives(start_time, full_update, collection_year)
lettings_export_service = Exports::LettingsLogExportService.new(@storage_service, start_time)
- lettings_export_service.export_xml_lettings_logs(full_update:, collection_year: collection)
+ lettings_export_service.export_xml_lettings_logs(full_update:, collection_year:)
end
end
end
diff --git a/app/services/exports/lettings_log_export_service.rb b/app/services/exports/lettings_log_export_service.rb
index 97f495e0c..a8877eac3 100644
--- a/app/services/exports/lettings_log_export_service.rb
+++ b/app/services/exports/lettings_log_export_service.rb
@@ -5,11 +5,11 @@ module Exports
def export_xml_lettings_logs(full_update: false, collection_year: nil)
archives_for_manifest = {}
- collection_years_to_export(collection_year).each do |collection|
- recent_export = Export.where(collection:).order("started_at").last
- base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1
- export = build_export_run(collection, base_number, full_update)
- archives = write_export_archive(export, collection, recent_export, full_update)
+ collection_years_to_export(collection_year).each do |year|
+ recent_export = Export.lettings.where(year:).order("started_at").last
+ base_number = Export.lettings.where(empty_export: false, year:).maximum(:base_number) || 1
+ export = build_export_run("lettings", base_number, full_update, year)
+ archives = write_export_archive(export, year, recent_export, full_update)
archives_for_manifest.merge!(archives)
@@ -22,21 +22,21 @@ module Exports
private
- def get_archive_name(collection, base_number, increment)
- return unless collection
+ def get_archive_name(year, base_number, increment)
+ return unless year
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
- "core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
+ "core_#{year}_#{year + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
- def retrieve_resources(recent_export, full_update, collection)
+ def retrieve_resources(recent_export, full_update, year)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
- LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params).filter_by_year(collection)
+ LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params).filter_by_year(year)
else
params = { to: @start_time }
- LettingsLog.exportable.where("updated_at <= :to", params).filter_by_year(collection)
+ LettingsLog.exportable.where("updated_at <= :to", params).filter_by_year(year)
end
end
diff --git a/app/services/exports/organisation_export_service.rb b/app/services/exports/organisation_export_service.rb
index 71eccf60a..2b2da71c8 100644
--- a/app/services/exports/organisation_export_service.rb
+++ b/app/services/exports/organisation_export_service.rb
@@ -5,9 +5,9 @@ module Exports
def export_xml_organisations(full_update: false)
collection = "organisations"
- recent_export = Export.where(collection:).order("started_at").last
+ recent_export = Export.organisations.order("started_at").last
- base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1
+ base_number = Export.organisations.where(empty_export: false).maximum(:base_number) || 1
export = build_export_run(collection, base_number, full_update)
archives_for_manifest = write_export_archive(export, collection, recent_export, full_update)
@@ -19,15 +19,13 @@ module Exports
private
- def get_archive_name(collection, base_number, increment)
- return unless collection
-
+ def get_archive_name(_year, base_number, increment)
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
- "#{collection}_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
+ "organisations_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
- def retrieve_resources(recent_export, full_update, _collection)
+ def retrieve_resources(recent_export, full_update, _year)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
Organisation.where("(updated_at >= :from AND updated_at <= :to)", params)
diff --git a/app/services/exports/user_export_service.rb b/app/services/exports/user_export_service.rb
index 907a1cc86..aaa30c424 100644
--- a/app/services/exports/user_export_service.rb
+++ b/app/services/exports/user_export_service.rb
@@ -5,9 +5,9 @@ module Exports
def export_xml_users(full_update: false)
collection = "users"
- recent_export = Export.where(collection:).order("started_at").last
+ recent_export = Export.users.order("started_at").last
- base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1
+ base_number = Export.users.where(empty_export: false).maximum(:base_number) || 1
export = build_export_run(collection, base_number, full_update)
archives_for_manifest = write_export_archive(export, collection, recent_export, full_update)
@@ -19,15 +19,13 @@ module Exports
private
- def get_archive_name(collection, base_number, increment)
- return unless collection
-
+ def get_archive_name(_year, base_number, increment)
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
- "#{collection}_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
+ "users_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
- def retrieve_resources(recent_export, full_update, _collection)
+ def retrieve_resources(recent_export, full_update, _year)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
User.where("(updated_at >= :from AND updated_at <= :to)", params)
diff --git a/app/services/exports/xml_export_service.rb b/app/services/exports/xml_export_service.rb
index 009a1b306..d499dd518 100644
--- a/app/services/exports/xml_export_service.rb
+++ b/app/services/exports/xml_export_service.rb
@@ -11,9 +11,9 @@ module Exports
private
- def build_export_run(collection, base_number, full_update)
- @logger.info("Building export run for #{collection}")
- previous_exports_with_data = Export.where(collection:, empty_export: false)
+ def build_export_run(collection, base_number, full_update, year = nil)
+ @logger.info("Building export run for #{[collection, year].join(' ')}")
+ previous_exports_with_data = Export.where(collection:, year:, empty_export: false)
increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1
@@ -25,16 +25,16 @@ module Exports
end
if previous_exports_with_data.empty?
- return Export.new(collection:, base_number:, started_at: @start_time)
+ return Export.new(collection:, year:, base_number:, started_at: @start_time)
end
- Export.new(collection:, started_at: @start_time, base_number:, increment_number:)
+ Export.new(collection:, year:, started_at: @start_time, base_number:, increment_number:)
end
- def write_export_archive(export, collection, recent_export, full_update)
- archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
+ def write_export_archive(export, year, recent_export, full_update)
+ archive = get_archive_name(year, export.base_number, export.increment_number)
- initial_count = retrieve_resources(recent_export, full_update, collection).count
+ initial_count = retrieve_resources(recent_export, full_update, year).count
@logger.info("Creating #{archive} - #{initial_count} resources")
return {} if initial_count.zero?
@@ -46,12 +46,12 @@ module Exports
loop do
slice = if last_processed_marker.present?
- retrieve_resources(recent_export, full_update, collection)
+ retrieve_resources(recent_export, full_update, year)
.where("created_at > ?", last_processed_marker)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
else
- retrieve_resources(recent_export, full_update, collection)
+ retrieve_resources(recent_export, full_update, year)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
end
diff --git a/db/migrate/20241204100518_add_year_to_export.rb b/db/migrate/20241204100518_add_year_to_export.rb
new file mode 100644
index 000000000..784754888
--- /dev/null
+++ b/db/migrate/20241204100518_add_year_to_export.rb
@@ -0,0 +1,5 @@
+class AddYearToExport < ActiveRecord::Migration[7.0]
+ def change
+ add_column :exports, :year, :integer
+ end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 0cdc15e9f..d31a54da2 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
-ActiveRecord::Schema[7.0].define(version: 2024_11_22_154743) do
+ActiveRecord::Schema[7.0].define(version: 2024_12_04_100518) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@@ -113,6 +113,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_11_22_154743) do
t.integer "increment_number", default: 1, null: false
t.boolean "empty_export", default: false, null: false
t.string "collection"
+ t.integer "year"
end
create_table "la_rent_ranges", force: :cascade do |t|
diff --git a/lib/tasks/data_export.rake b/lib/tasks/data_export.rake
index 7a9a90bc8..43ef06374 100644
--- a/lib/tasks/data_export.rake
+++ b/lib/tasks/data_export.rake
@@ -7,13 +7,13 @@ namespace :core do
end
desc "Export all data XMLs for import into Central Data System (CDS)"
- task :full_data_export_xml, %i[collection] => :environment do |_task, args|
+ task :full_data_export_xml, %i[collection year] => :environment do |_task, args|
collection = args[:collection].presence
- collection = collection.to_i if collection.present? && collection.scan(/\D/).empty?
+ year = args[:year]&.to_i
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"])
export_service = Exports::ExportService.new(storage_service)
- export_service.export_xml(full_update: true, collection:)
+ export_service.export_xml(full_update: true, collection:, year:)
end
end
diff --git a/lib/tasks/set_export_collection_years.rake b/lib/tasks/set_export_collection_years.rake
new file mode 100644
index 000000000..badaab0eb
--- /dev/null
+++ b/lib/tasks/set_export_collection_years.rake
@@ -0,0 +1,8 @@
+desc "Set export collection years for lettings exports"
+task set_export_collection_years: :environment do
+ Export.where(collection: %w[2022 2023 2024 2025]).find_each do |export|
+ export.year = export.collection.to_i
+ export.collection = "lettings"
+ export.save!
+ end
+end
diff --git a/spec/helpers/tab_nav_helper_spec.rb b/spec/helpers/tab_nav_helper_spec.rb
index 89f867775..bd29f4c46 100644
--- a/spec/helpers/tab_nav_helper_spec.rb
+++ b/spec/helpers/tab_nav_helper_spec.rb
@@ -9,7 +9,7 @@ RSpec.describe TabNavHelper do
describe "#user_cell" do
it "returns user link and email separated by a newline character" do
expected_html = "#{current_user.name}\nUser #{current_user.email}"
- expect(user_cell(current_user)).to match(expected_html)
+ expect(CGI.unescapeHTML(user_cell(current_user))).to match(expected_html)
end
end
diff --git a/spec/lib/tasks/data_export_spec.rb b/spec/lib/tasks/data_export_spec.rb
index 8b5dd5fbe..2f6127512 100644
--- a/spec/lib/tasks/data_export_spec.rb
+++ b/spec/lib/tasks/data_export_spec.rb
@@ -30,7 +30,7 @@ describe "rake core:data_export", type: task do
context "with all available years" do
it "calls the export service" do
- expect(export_service).to receive(:export_xml).with(full_update: true, collection: nil)
+ expect(export_service).to receive(:export_xml).with(full_update: true, collection: nil, year: nil)
task.invoke
end
@@ -38,9 +38,9 @@ describe "rake core:data_export", type: task do
context "with a specific collection" do
it "calls the export service" do
- expect(export_service).to receive(:export_xml).with(full_update: true, collection: 2022)
+ expect(export_service).to receive(:export_xml).with(full_update: true, collection: "lettings", year: 2022)
- task.invoke("2022")
+ task.invoke("lettings", "2022")
end
end
end
diff --git a/spec/lib/tasks/set_export_collection_years_spec.rb b/spec/lib/tasks/set_export_collection_years_spec.rb
new file mode 100644
index 000000000..76ff779b0
--- /dev/null
+++ b/spec/lib/tasks/set_export_collection_years_spec.rb
@@ -0,0 +1,41 @@
+require "rails_helper"
+require "rake"
+
+RSpec.describe "set_export_collection_years" do
+ describe ":set_export_collection_years", type: :task do
+ subject(:task) { Rake::Task["set_export_collection_years"] }
+
+ before do
+ Rake.application.rake_require("tasks/set_export_collection_years")
+ Rake::Task.define_task(:environment)
+ task.reenable
+ end
+
+ context "when the rake task is run" do
+ let!(:lettings_export_2023) { Export.create(collection: "2023", year: nil, started_at: Time.zone.now) }
+ let!(:lettings_export_2024) { Export.create(collection: "2024", year: nil, started_at: Time.zone.now) }
+ let!(:updated_lettings_export) { Export.create(collection: "lettings", year: 2023, started_at: Time.zone.now) }
+ let!(:organisations_export) { Export.create(collection: "organisations", year: nil, started_at: Time.zone.now) }
+ let!(:users_export) { Export.create(collection: "users", year: nil, started_at: Time.zone.now) }
+
+ it "correctly updates collection years" do
+ task.invoke
+
+ expect(lettings_export_2023.reload.collection).to eq("lettings")
+ expect(lettings_export_2023.year).to eq(2023)
+
+ expect(lettings_export_2024.reload.collection).to eq("lettings")
+ expect(lettings_export_2024.year).to eq(2024)
+
+ expect(updated_lettings_export.reload.collection).to eq("lettings")
+ expect(updated_lettings_export.year).to eq(2023)
+
+ expect(organisations_export.reload.collection).to eq("organisations")
+ expect(organisations_export.year).to eq(nil)
+
+ expect(users_export.reload.collection).to eq("users")
+ expect(users_export.year).to eq(nil)
+ end
+ end
+ end
+end
diff --git a/spec/services/exports/lettings_log_export_service_spec.rb b/spec/services/exports/lettings_log_export_service_spec.rb
index 6a07af8dd..8c123b47e 100644
--- a/spec/services/exports/lettings_log_export_service_spec.rb
+++ b/spec/services/exports/lettings_log_export_service_spec.rb
@@ -207,15 +207,15 @@ RSpec.describe Exports::LettingsLogExportService do
it "generates multiple ZIP export files with the expected filenames" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
- expect(Rails.logger).to receive(:info).with("Building export run for 2021")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2021")
expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2021_2022_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2021_2022_apr_mar_f0001_inc0001.zip")
- expect(Rails.logger).to receive(:info).with("Building export run for 2022")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2022")
expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip")
- expect(Rails.logger).to receive(:info).with("Building export run for 2023")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2023")
expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 resources")
export_service.export_xml_lettings_logs
@@ -223,7 +223,7 @@ RSpec.describe Exports::LettingsLogExportService do
it "generates zip export files only for specified year" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
- expect(Rails.logger).to receive(:info).with("Building export run for 2022")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2022")
expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip")
@@ -236,21 +236,21 @@ RSpec.describe Exports::LettingsLogExportService do
let(:expected_zip_filename2) { "core_2022_2023_apr_mar_f0001_inc0001.zip" }
before do
- Export.new(started_at: Time.zone.yesterday, base_number: 7, increment_number: 3, collection: 2021).save!
+ Export.new(started_at: Time.zone.yesterday, base_number: 7, increment_number: 3, collection: "lettings", year: 2021).save!
end
it "generates multiple ZIP export files with different base numbers in the filenames" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
- expect(Rails.logger).to receive(:info).with("Building export run for 2021")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2021")
expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0007_inc0004 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2021_2022_apr_mar_f0007_inc0004_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2021_2022_apr_mar_f0007_inc0004.zip")
- expect(Rails.logger).to receive(:info).with("Building export run for 2022")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2022")
expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip")
- expect(Rails.logger).to receive(:info).with("Building export run for 2023")
+ expect(Rails.logger).to receive(:info).with("Building export run for lettings 2023")
expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 resources")
export_service.export_xml_lettings_logs
@@ -329,7 +329,7 @@ RSpec.describe Exports::LettingsLogExportService do
context "when this is a second export (partial)" do
before do
start_time = Time.zone.local(2022, 6, 1)
- Export.new(started_at: start_time, collection: 2021).save!
+ Export.new(started_at: start_time, collection: "lettings", year: 2021).save!
end
it "does not add any entry for the master manifest (no lettings logs)" do