Browse Source

CLDC-2684 Keep fewer logs in memory when running exports (#1859)

* Keep fewer logs in memory

* refactor write_export_archive to only hold MAX_XML_RECORDS in memory at a time
pull/1861/head
kosiakkatrina 1 year ago committed by GitHub
parent
commit
d6d34a3063
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 97
      app/services/exports/lettings_log_export_service.rb

97
app/services/exports/lettings_log_export_service.rb

@ -10,14 +10,12 @@ module Exports
def export_xml_lettings_logs(full_update: false) def export_xml_lettings_logs(full_update: false)
start_time = Time.zone.now start_time = Time.zone.now
logs_by_collection = retrieve_lettings_logs(start_time, full_update).group_by(&:collection_start_year)
daily_run_number = get_daily_run_number daily_run_number = get_daily_run_number
archives_for_manifest = {} archives_for_manifest = {}
base_number = LogsExport.where(empty_export: false).maximum(:base_number) || 1 base_number = LogsExport.where(empty_export: false).maximum(:base_number) || 1
available_collection_years.each do |collection| available_collection_years.each do |collection|
lettings_logs = logs_by_collection.fetch(collection, LettingsLog.none)
export = build_export_run(collection, start_time, base_number, full_update) export = build_export_run(collection, start_time, base_number, full_update)
archives = write_export_archive(export, lettings_logs) archives = write_export_archive(export, collection, start_time, full_update)
archives_for_manifest.merge!(archives) archives_for_manifest.merge!(archives)
@ -65,63 +63,58 @@ module Exports
@storage_service.write_file(file_path, string_io) @storage_service.write_file(file_path, string_io)
end end
def get_archive_name(lettings_log, base_number, increment) def get_archive_name(collection, base_number, increment)
return unless lettings_log.startdate return unless collection
collection_start = lettings_log.collection_start_year
start_month = collection_start_date(lettings_log.startdate).strftime("%b")
end_month = collection_end_date(lettings_log.startdate).strftime("%b")
base_number_str = "f#{base_number.to_s.rjust(4, '0')}" base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}" increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"core_#{collection_start}_#{collection_start + 1}_#{start_month}_#{end_month}_#{base_number_str}_#{increment_str}".downcase "core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end end
def write_export_archive(export, lettings_logs) def write_export_archive(export, collection, start_time, full_update)
@logger.info("Writing export archives") @logger.info("Writing export archives")
# Order lettings logs per archive archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
lettings_logs_per_archive = {}
lettings_logs.each do |lettings_log| # Write archive
archive = get_archive_name(lettings_log, export.base_number, export.increment_number) logs_count = retrieve_lettings_logs(start_time, full_update).filter_by_year(collection).count
next unless archive @logger.info("Writing #{archive} - #{logs_count} logs")
manifest_xml = build_manifest_xml(logs_count)
if lettings_logs_per_archive.key?(archive) return {} if logs_count.zero?
lettings_logs_per_archive[archive] << lettings_log
else zip_file = Zip::File.open_buffer(StringIO.new)
lettings_logs_per_archive[archive] = [lettings_log] zip_file.add("manifest.xml", manifest_xml)
end
end part_number = 1
last_processed_marker = nil
# Write all archives
archive_datetimes = {} loop do
@logger.info("Following archives to write:") lettings_logs_slice = if last_processed_marker.present?
# rubocop:disable Style/CombinableLoops retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
lettings_logs_per_archive.each do |archive, lettings_logs_to_export| .where("created_at > ?", last_processed_marker)
@logger.info("#{archive} - #{lettings_logs_to_export.count} logs") .order(:created_at)
end .limit(MAX_XML_RECORDS)
lettings_logs_per_archive.each do |archive, lettings_logs_to_export| else
manifest_xml = build_manifest_xml(lettings_logs_to_export.count) retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
zip_file = Zip::File.open_buffer(StringIO.new) .order(:created_at)
zip_file.add("manifest.xml", manifest_xml) .limit(MAX_XML_RECORDS)
end
part_number = 1
lettings_logs_to_export.each_slice(MAX_XML_RECORDS) do |lettings_logs_slice| break if lettings_logs_slice.empty?
data_xml = build_export_xml(lettings_logs_slice)
part_number_str = "pt#{part_number.to_s.rjust(3, '0')}" data_xml = build_export_xml(lettings_logs_slice)
@logger.info("Adding #{archive}_#{part_number_str}.xml") part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
zip_file.add("#{archive}_#{part_number_str}.xml", data_xml) @logger.info("Adding #{archive}_#{part_number_str}.xml")
part_number += 1 zip_file.add("#{archive}_#{part_number_str}.xml", data_xml)
end part_number += 1
last_processed_marker = lettings_logs_slice.last.created_at
# Required by S3 to avoid Aws::S3::Errors::BadDigest
zip_io = zip_file.write_buffer
zip_io.rewind
@logger.info("Writting #{archive}.zip")
@storage_service.write_file("#{archive}.zip", zip_io)
archive_datetimes[archive] = Time.zone.now
end end
# rubocop:enable Style/CombinableLoops
archive_datetimes # Required by S3 to avoid Aws::S3::Errors::BadDigest
zip_io = zip_file.write_buffer
zip_io.rewind
@logger.info("Writting #{archive}.zip")
@storage_service.write_file("#{archive}.zip", zip_io)
{ archive => Time.zone.now }
end end
def retrieve_lettings_logs(start_time, full_update) def retrieve_lettings_logs(start_time, full_update)

Loading…
Cancel
Save