Browse Source

CLDC-2684 Keep fewer logs in memory when running exports (#1859)

* Keep fewer logs in memory

* refactor write_export_archive to only hold MAX_XML_RECORDS in memory at a time
pull/1861/head
kosiakkatrina 1 year ago committed by GitHub
parent
commit
d6d34a3063
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 65
      app/services/exports/lettings_log_export_service.rb

65
app/services/exports/lettings_log_export_service.rb

@ -10,14 +10,12 @@ module Exports
def export_xml_lettings_logs(full_update: false)
start_time = Time.zone.now
logs_by_collection = retrieve_lettings_logs(start_time, full_update).group_by(&:collection_start_year)
daily_run_number = get_daily_run_number
archives_for_manifest = {}
base_number = LogsExport.where(empty_export: false).maximum(:base_number) || 1
available_collection_years.each do |collection|
lettings_logs = logs_by_collection.fetch(collection, LettingsLog.none)
export = build_export_run(collection, start_time, base_number, full_update)
archives = write_export_archive(export, lettings_logs)
archives = write_export_archive(export, collection, start_time, full_update)
archives_for_manifest.merge!(archives)
@ -65,51 +63,50 @@ module Exports
@storage_service.write_file(file_path, string_io)
end
def get_archive_name(lettings_log, base_number, increment)
return unless lettings_log.startdate
def get_archive_name(collection, base_number, increment)
return unless collection
collection_start = lettings_log.collection_start_year
start_month = collection_start_date(lettings_log.startdate).strftime("%b")
end_month = collection_end_date(lettings_log.startdate).strftime("%b")
base_number_str = "f#{base_number.to_s.rjust(4, '0')}"
increment_str = "inc#{increment.to_s.rjust(4, '0')}"
"core_#{collection_start}_#{collection_start + 1}_#{start_month}_#{end_month}_#{base_number_str}_#{increment_str}".downcase
"core_#{collection}_#{collection + 1}_apr_mar_#{base_number_str}_#{increment_str}".downcase
end
def write_export_archive(export, lettings_logs)
def write_export_archive(export, collection, start_time, full_update)
@logger.info("Writing export archives")
# Order lettings logs per archive
lettings_logs_per_archive = {}
lettings_logs.each do |lettings_log|
archive = get_archive_name(lettings_log, export.base_number, export.increment_number)
next unless archive
archive = get_archive_name(collection, export.base_number, export.increment_number) # archive name would be the same for all logs because they're already filtered by year (?)
if lettings_logs_per_archive.key?(archive)
lettings_logs_per_archive[archive] << lettings_log
else
lettings_logs_per_archive[archive] = [lettings_log]
end
end
# Write archive
logs_count = retrieve_lettings_logs(start_time, full_update).filter_by_year(collection).count
@logger.info("Writing #{archive} - #{logs_count} logs")
manifest_xml = build_manifest_xml(logs_count)
return {} if logs_count.zero?
# Write all archives
archive_datetimes = {}
@logger.info("Following archives to write:")
# rubocop:disable Style/CombinableLoops
lettings_logs_per_archive.each do |archive, lettings_logs_to_export|
@logger.info("#{archive} - #{lettings_logs_to_export.count} logs")
end
lettings_logs_per_archive.each do |archive, lettings_logs_to_export|
manifest_xml = build_manifest_xml(lettings_logs_to_export.count)
zip_file = Zip::File.open_buffer(StringIO.new)
zip_file.add("manifest.xml", manifest_xml)
part_number = 1
lettings_logs_to_export.each_slice(MAX_XML_RECORDS) do |lettings_logs_slice|
last_processed_marker = nil
loop do
lettings_logs_slice = if last_processed_marker.present?
retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
.where("created_at > ?", last_processed_marker)
.order(:created_at)
.limit(MAX_XML_RECORDS)
else
retrieve_lettings_logs(start_time, full_update).filter_by_year(collection)
.order(:created_at)
.limit(MAX_XML_RECORDS)
end
break if lettings_logs_slice.empty?
data_xml = build_export_xml(lettings_logs_slice)
part_number_str = "pt#{part_number.to_s.rjust(3, '0')}"
@logger.info("Adding #{archive}_#{part_number_str}.xml")
zip_file.add("#{archive}_#{part_number_str}.xml", data_xml)
part_number += 1
last_processed_marker = lettings_logs_slice.last.created_at
end
# Required by S3 to avoid Aws::S3::Errors::BadDigest
@ -117,11 +114,7 @@ module Exports
zip_io.rewind
@logger.info("Writting #{archive}.zip")
@storage_service.write_file("#{archive}.zip", zip_io)
archive_datetimes[archive] = Time.zone.now
end
# rubocop:enable Style/CombinableLoops
archive_datetimes
{ archive => Time.zone.now }
end
def retrieve_lettings_logs(start_time, full_update)

Loading…
Cancel
Save