Browse Source

Refactor exports to have years (#2849)

* Refactor exports to have years

* Update existing exports

* Fix flaky test

* Remove a comment
pull/2850/head
kosiakkatrina 2 months ago committed by GitHub
parent
commit
84177349f0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 3
      app/models/export.rb
  2. 10
      app/services/exports/export_service.rb
  3. 22
      app/services/exports/lettings_log_export_service.rb
  4. 12
      app/services/exports/organisation_export_service.rb
  5. 12
      app/services/exports/user_export_service.rb
  6. 20
      app/services/exports/xml_export_service.rb
  7. 5
      db/migrate/20241204100518_add_year_to_export.rb
  8. 3
      db/schema.rb
  9. 6
      lib/tasks/data_export.rake
  10. 8
      lib/tasks/set_export_collection_years.rake
  11. 2
      spec/helpers/tab_nav_helper_spec.rb
  12. 6
      spec/lib/tasks/data_export_spec.rb
  13. 41
      spec/lib/tasks/set_export_collection_years_spec.rb
  14. 18
      spec/services/exports/lettings_log_export_service_spec.rb

3
app/models/export.rb

@ -1,2 +1,5 @@
class Export < ApplicationRecord
scope :lettings, -> { where(collection: "lettings") }
scope :organisations, -> { where(collection: "organisations") }
scope :users, -> { where(collection: "users") }
end

10
app/services/exports/export_service.rb

@ -7,7 +7,7 @@ module Exports
@logger = logger
end
def export_xml(full_update: false, collection: nil)
def export_xml(full_update: false, collection: nil, year: nil)
start_time = Time.zone.now
daily_run_number = get_daily_run_number
lettings_archives_for_manifest = {}
@ -21,12 +21,12 @@ module Exports
when "organisations"
organisations_archives_for_manifest = get_organisation_archives(start_time, full_update)
else
lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, collection)
lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, year)
end
else
users_archives_for_manifest = get_user_archives(start_time, full_update)
organisations_archives_for_manifest = get_organisation_archives(start_time, full_update)
lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, collection)
lettings_archives_for_manifest = get_lettings_archives(start_time, full_update, year)
end
write_master_manifest(daily_run_number, lettings_archives_for_manifest.merge(users_archives_for_manifest).merge(organisations_archives_for_manifest))
@ -70,9 +70,9 @@ module Exports
organisations_export_service.export_xml_organisations(full_update:)
end
def get_lettings_archives(start_time, full_update, collection)
def get_lettings_archives(start_time, full_update, collection_year)
lettings_export_service = Exports::LettingsLogExportService.new(@storage_service, start_time)
lettings_export_service.export_xml_lettings_logs(full_update:, collection_year: collection)
lettings_export_service.export_xml_lettings_logs(full_update:, collection_year:)
end
end
end

22
app/services/exports/lettings_log_export_service.rb

@ -5,11 +5,11 @@ module Exports
def export_xml_lettings_logs(full_update: false, collection_year: nil)
archives_for_manifest = {}
collection_years_to_export(collection_year).each do |collection|
recent_export = Export.where(collection:).order("started_at").last
base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1
export = build_export_run(collection, base_number, full_update)
archives = write_export_archive(export, collection, recent_export, full_update)
collection_years_to_export(collection_year).each do |year|
recent_export = Export.lettings.where(year:).order("started_at").last
base_number = Export.lettings.where(empty_export: false, year:).maximum(:base_number) || 1
export = build_export_run("lettings", base_number, full_update, year)
archives = write_export_archive(export, year, recent_export, full_update)
archives_for_manifest.merge!(archives)
@ -22,21 +22,21 @@ module Exports
private
def get_archive_name(collection, base_number, increment)
return unless collection
def get_archive_name(year, base_number, increment)
return unless year
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
"core_#{year}_#{year + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
def retrieve_resources(recent_export, full_update, collection)
def retrieve_resources(recent_export, full_update, year)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params).filter_by_year(collection)
LettingsLog.exportable.where("(updated_at >= :from AND updated_at <= :to) OR (values_updated_at IS NOT NULL AND values_updated_at >= :from AND values_updated_at <= :to)", params).filter_by_year(year)
else
params = { to: @start_time }
LettingsLog.exportable.where("updated_at <= :to", params).filter_by_year(collection)
LettingsLog.exportable.where("updated_at <= :to", params).filter_by_year(year)
end
end

12
app/services/exports/organisation_export_service.rb

@ -5,9 +5,9 @@ module Exports
def export_xml_organisations(full_update: false)
collection = "organisations"
recent_export = Export.where(collection:).order("started_at").last
recent_export = Export.organisations.order("started_at").last
base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1
base_number = Export.organisations.where(empty_export: false).maximum(:base_number) || 1
export = build_export_run(collection, base_number, full_update)
archives_for_manifest = write_export_archive(export, collection, recent_export, full_update)
@ -19,15 +19,13 @@ module Exports
private
def get_archive_name(collection, base_number, increment)
return unless collection
def get_archive_name(_year, base_number, increment)
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"#{collection}_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
"organisations_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
def retrieve_resources(recent_export, full_update, _collection)
def retrieve_resources(recent_export, full_update, _year)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
Organisation.where("(updated_at >= :from AND updated_at <= :to)", params)

12
app/services/exports/user_export_service.rb

@ -5,9 +5,9 @@ module Exports
def export_xml_users(full_update: false)
collection = "users"
recent_export = Export.where(collection:).order("started_at").last
recent_export = Export.users.order("started_at").last
base_number = Export.where(empty_export: false, collection:).maximum(:base_number) || 1
base_number = Export.users.where(empty_export: false).maximum(:base_number) || 1
export = build_export_run(collection, base_number, full_update)
archives_for_manifest = write_export_archive(export, collection, recent_export, full_update)
@ -19,15 +19,13 @@ module Exports
private
def get_archive_name(collection, base_number, increment)
return unless collection
def get_archive_name(_year, base_number, increment)
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"#{collection}_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
"users_2024_2025_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
def retrieve_resources(recent_export, full_update, _collection)
def retrieve_resources(recent_export, full_update, _year)
if !full_update && recent_export
params = { from: recent_export.started_at, to: @start_time }
User.where("(updated_at >= :from AND updated_at <= :to)", params)

20
app/services/exports/xml_export_service.rb

@ -11,9 +11,9 @@ module Exports
private
def build_export_run(collection, base_number, full_update)
@logger.info("Building export run for #{collection}")
previous_exports_with_data = Export.where(collection:, empty_export: false)
def build_export_run(collection, base_number, full_update, year = nil)
@logger.info("Building export run for #{[collection, year].join(' ')}")
previous_exports_with_data = Export.where(collection:, year:, empty_export: false)
increment_number = previous_exports_with_data.where(base_number:).maximum(:increment_number) || 1
@ -25,16 +25,16 @@ module Exports
end
if previous_exports_with_data.empty?
return Export.new(collection:, base_number:, started_at: @start_time)
return Export.new(collection:, year:, base_number:, started_at: @start_time)
end
Export.new(collection:, started_at: @start_time, base_number:, increment_number:)
Export.new(collection:, year:, started_at: @start_time, base_number:, increment_number:)
end
def write_export_archive(export, collection, recent_export, full_update)
archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
def write_export_archive(export, year, recent_export, full_update)
archive = get_archive_name(year, export.base_number, export.increment_number)
initial_count = retrieve_resources(recent_export, full_update, collection).count
initial_count = retrieve_resources(recent_export, full_update, year).count
@logger.info("Creating #{archive} - #{initial_count} resources")
return {} if initial_count.zero?
@ -46,12 +46,12 @@ module Exports
loop do
slice = if last_processed_marker.present?
retrieve_resources(recent_export, full_update, collection)
retrieve_resources(recent_export, full_update, year)
.where("created_at > ?", last_processed_marker)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
else
retrieve_resources(recent_export, full_update, collection)
retrieve_resources(recent_export, full_update, year)
.order(:created_at)
.limit(MAX_XML_RECORDS).to_a
end

5
db/migrate/20241204100518_add_year_to_export.rb

@ -0,0 +1,5 @@
class AddYearToExport < ActiveRecord::Migration[7.0]
def change
add_column :exports, :year, :integer
end
end

3
db/schema.rb

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[7.0].define(version: 2024_11_22_154743) do
ActiveRecord::Schema[7.0].define(version: 2024_12_04_100518) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
@ -113,6 +113,7 @@ ActiveRecord::Schema[7.0].define(version: 2024_11_22_154743) do
t.integer "increment_number", default: 1, null: false
t.boolean "empty_export", default: false, null: false
t.string "collection"
t.integer "year"
end
create_table "la_rent_ranges", force: :cascade do |t|

6
lib/tasks/data_export.rake

@ -7,13 +7,13 @@ namespace :core do
end
desc "Export all data XMLs for import into Central Data System (CDS)"
task :full_data_export_xml, %i[collection] => :environment do |_task, args|
task :full_data_export_xml, %i[collection year] => :environment do |_task, args|
collection = args[:collection].presence
collection = collection.to_i if collection.present? && collection.scan(/\D/).empty?
year = args[:year]&.to_i
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["EXPORT_BUCKET"])
export_service = Exports::ExportService.new(storage_service)
export_service.export_xml(full_update: true, collection:)
export_service.export_xml(full_update: true, collection:, year:)
end
end

8
lib/tasks/set_export_collection_years.rake

@ -0,0 +1,8 @@
desc "Set export collection years for lettings exports"
task set_export_collection_years: :environment do
Export.where(collection: %w[2022 2023 2024 2025]).find_each do |export|
export.year = export.collection.to_i
export.collection = "lettings"
export.save!
end
end

2
spec/helpers/tab_nav_helper_spec.rb

@ -9,7 +9,7 @@ RSpec.describe TabNavHelper do
describe "#user_cell" do
it "returns user link and email separated by a newline character" do
expected_html = "<a class=\"govuk-link\" href=\"/users\">#{current_user.name}</a>\n<span class=\"govuk-visually-hidden\">User </span><span class=\"govuk-!-font-weight-regular app-!-colour-muted\">#{current_user.email}</span>"
expect(user_cell(current_user)).to match(expected_html)
expect(CGI.unescapeHTML(user_cell(current_user))).to match(expected_html)
end
end

6
spec/lib/tasks/data_export_spec.rb

@ -30,7 +30,7 @@ describe "rake core:data_export", type: task do
context "with all available years" do
it "calls the export service" do
expect(export_service).to receive(:export_xml).with(full_update: true, collection: nil)
expect(export_service).to receive(:export_xml).with(full_update: true, collection: nil, year: nil)
task.invoke
end
@ -38,9 +38,9 @@ describe "rake core:data_export", type: task do
context "with a specific collection" do
it "calls the export service" do
expect(export_service).to receive(:export_xml).with(full_update: true, collection: 2022)
expect(export_service).to receive(:export_xml).with(full_update: true, collection: "lettings", year: 2022)
task.invoke("2022")
task.invoke("lettings", "2022")
end
end
end

41
spec/lib/tasks/set_export_collection_years_spec.rb

@ -0,0 +1,41 @@
require "rails_helper"
require "rake"
RSpec.describe "set_export_collection_years" do
describe ":set_export_collection_years", type: :task do
subject(:task) { Rake::Task["set_export_collection_years"] }
before do
Rake.application.rake_require("tasks/set_export_collection_years")
Rake::Task.define_task(:environment)
task.reenable
end
context "when the rake task is run" do
let!(:lettings_export_2023) { Export.create(collection: "2023", year: nil, started_at: Time.zone.now) }
let!(:lettings_export_2024) { Export.create(collection: "2024", year: nil, started_at: Time.zone.now) }
let!(:updated_lettings_export) { Export.create(collection: "lettings", year: 2023, started_at: Time.zone.now) }
let!(:organisations_export) { Export.create(collection: "organisations", year: nil, started_at: Time.zone.now) }
let!(:users_export) { Export.create(collection: "users", year: nil, started_at: Time.zone.now) }
it "correctly updates collection years" do
task.invoke
expect(lettings_export_2023.reload.collection).to eq("lettings")
expect(lettings_export_2023.year).to eq(2023)
expect(lettings_export_2024.reload.collection).to eq("lettings")
expect(lettings_export_2024.year).to eq(2024)
expect(updated_lettings_export.reload.collection).to eq("lettings")
expect(updated_lettings_export.year).to eq(2023)
expect(organisations_export.reload.collection).to eq("organisations")
expect(organisations_export.year).to eq(nil)
expect(users_export.reload.collection).to eq("users")
expect(users_export.year).to eq(nil)
end
end
end
end

18
spec/services/exports/lettings_log_export_service_spec.rb

@ -207,15 +207,15 @@ RSpec.describe Exports::LettingsLogExportService do
it "generates multiple ZIP export files with the expected filenames" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
expect(Rails.logger).to receive(:info).with("Building export run for 2021")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2021")
expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2021_2022_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2021_2022_apr_mar_f0001_inc0001.zip")
expect(Rails.logger).to receive(:info).with("Building export run for 2022")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2022")
expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip")
expect(Rails.logger).to receive(:info).with("Building export run for 2023")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2023")
expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 resources")
export_service.export_xml_lettings_logs
@ -223,7 +223,7 @@ RSpec.describe Exports::LettingsLogExportService do
it "generates zip export files only for specified year" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
expect(Rails.logger).to receive(:info).with("Building export run for 2022")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2022")
expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip")
@ -236,21 +236,21 @@ RSpec.describe Exports::LettingsLogExportService do
let(:expected_zip_filename2) { "core_2022_2023_apr_mar_f0001_inc0001.zip" }
before do
Export.new(started_at: Time.zone.yesterday, base_number: 7, increment_number: 3, collection: 2021).save!
Export.new(started_at: Time.zone.yesterday, base_number: 7, increment_number: 3, collection: "lettings", year: 2021).save!
end
it "generates multiple ZIP export files with different base numbers in the filenames" do
expect(storage_service).to receive(:write_file).with(expected_zip_filename, any_args)
expect(storage_service).to receive(:write_file).with(expected_zip_filename2, any_args)
expect(Rails.logger).to receive(:info).with("Building export run for 2021")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2021")
expect(Rails.logger).to receive(:info).with("Creating core_2021_2022_apr_mar_f0007_inc0004 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2021_2022_apr_mar_f0007_inc0004_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2021_2022_apr_mar_f0007_inc0004.zip")
expect(Rails.logger).to receive(:info).with("Building export run for 2022")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2022")
expect(Rails.logger).to receive(:info).with("Creating core_2022_2023_apr_mar_f0001_inc0001 - 1 resources")
expect(Rails.logger).to receive(:info).with("Added core_2022_2023_apr_mar_f0001_inc0001_pt001.xml")
expect(Rails.logger).to receive(:info).with("Writing core_2022_2023_apr_mar_f0001_inc0001.zip")
expect(Rails.logger).to receive(:info).with("Building export run for 2023")
expect(Rails.logger).to receive(:info).with("Building export run for lettings 2023")
expect(Rails.logger).to receive(:info).with("Creating core_2023_2024_apr_mar_f0001_inc0001 - 0 resources")
export_service.export_xml_lettings_logs
@ -329,7 +329,7 @@ RSpec.describe Exports::LettingsLogExportService do
context "when this is a second export (partial)" do
before do
start_time = Time.zone.local(2022, 6, 1)
Export.new(started_at: start_time, collection: 2021).save!
Export.new(started_at: start_time, collection: "lettings", year: 2021).save!
end
it "does not add any entry for the master manifest (no lettings logs)" do

Loading…
Cancel
Save