@ -10,14 +10,12 @@ module Exports
def export_xml_lettings_logs ( full_update : false )
def export_xml_lettings_logs ( full_update : false )
start_time = Time . zone . now
start_time = Time . zone . now
logs_by_collection = retrieve_lettings_logs ( start_time , full_update ) . group_by ( & :collection_start_year )
daily_run_number = get_daily_run_number
daily_run_number = get_daily_run_number
archives_for_manifest = { }
archives_for_manifest = { }
base_number = LogsExport . where ( empty_export : false ) . maximum ( :base_number ) || 1
base_number = LogsExport . where ( empty_export : false ) . maximum ( :base_number ) || 1
available_collection_years . each do | collection |
available_collection_years . each do | collection |
lettings_logs = logs_by_collection . fetch ( collection , LettingsLog . none )
export = build_export_run ( collection , start_time , base_number , full_update )
export = build_export_run ( collection , start_time , base_number , full_update )
archives = write_export_archive ( export , lettings_logs )
archives = write_export_archive ( export , collection , start_time , full_update )
archives_for_manifest . merge! ( archives )
archives_for_manifest . merge! ( archives )
@ -65,63 +63,58 @@ module Exports
@storage_service . write_file ( file_path , string_io )
@storage_service . write_file ( file_path , string_io )
end
end
def get_archive_name ( lettings_log , base_number , increment )
def get_archive_name ( collection , base_number , increment )
return unless lettings_log . startdate
return unless collection
collection_start = lettings_log . collection_start_year
start_month = collection_start_date ( lettings_log . startdate ) . strftime ( " %b " )
end_month = collection_end_date ( lettings_log . startdate ) . strftime ( " %b " )
base_number_str = " f #{ base_number . to_s . rjust ( 4 , '0' ) } "
base_number_str = " f #{ base_number . to_s . rjust ( 4 , '0' ) } "
increment_str = " inc #{ increment . to_s . rjust ( 4 , '0' ) } "
increment_str = " inc #{ increment . to_s . rjust ( 4 , '0' ) } "
" core_ #{ collection_start } _ #{ collection_start + 1 } _ #{ start_month } _ #{ end_month } _#{ base_number_str } _ #{ increment_str } " . downcase
" core_ #{ collection } _ #{ collection + 1 } _apr_mar_ #{ base_number_str } _ #{ increment_str } " . downcase
end
end
def write_export_archive ( export , lettings_logs )
def write_export_archive ( export , collection , start_time , full_update )
@logger . info ( " Writing export archives " )
@logger . info ( " Writing export archives " )
# Order lettings logs per archive
archive = get_archive_name ( collection , export . base_number , export . increment_number ) # archive name would be the same for all logs because they're already filtered by year (?)
lettings_logs_per_archive = { }
lettings_logs . each do | lettings_log |
# Write archive
archive = get_archive_name ( lettings_log , export . base_number , export . increment_number )
logs_count = retrieve_lettings_logs ( start_time , full_update ) . filter_by_year ( collection ) . count
next unless archive
@logger . info ( " Writing #{ archive } - #{ logs_count } logs " )
manifest_xml = build_manifest_xml ( logs_count )
if lettings_logs_per_archive . key? ( archive )
return { } if logs_count . zero?
lettings_logs_per_archive [ archive ] << lettings_log
else
zip_file = Zip :: File . open_buffer ( StringIO . new )
lettings_logs_per_archive [ archive ] = [ lettings_log ]
zip_file . add ( " manifest.xml " , manifest_xml )
end
end
part_number = 1
last_processed_marker = nil
# Write all archives
archive_datetimes = { }
loop do
@logger . info ( " Following archives to write: " )
lettings_logs_slice = if last_processed_marker . present?
# rubocop:disable Style/CombinableLoops
retrieve_lettings_logs ( start_time , full_update ) . filter_by_year ( collection )
lettings_logs_per_archive . each do | archive , lettings_logs_to_export |
. where ( " created_at > ? " , last_processed_marker )
@logger . info ( " #{ archive } - #{ lettings_logs_to_export . count } logs " )
. order ( :created_at )
end
. limit ( MAX_XML_RECORDS )
lettings_logs_per_archive . each do | archive , lettings_logs_to_export |
else
manifest_xml = build_manifest_xml ( lettings_logs_to_export . count )
retrieve_lettings_logs ( start_time , full_update ) . filter_by_year ( collection )
zip_file = Zip :: File . open_buffer ( StringIO . new )
. order ( :created_at )
zip_file . add ( " manifest.xml " , manifest_xml )
. limit ( MAX_XML_RECORDS )
end
part_number = 1
lettings_logs_to_export . each_slice ( MAX_XML_RECORDS ) do | lettings_logs_slice |
break if lettings_logs_slice . empty?
data_xml = build_export_xml ( lettings_logs_slice )
part_number_str = " pt #{ part_number . to_s . rjust ( 3 , '0' ) } "
data_xml = build_export_xml ( lettings_logs_slice )
@logger . info ( " Adding #{ archive } _ #{ part_number_str } .xml " )
part_number_str = " pt #{ part_number . to_s . rjust ( 3 , '0' ) } "
zip_file . add ( " #{ archive } _ #{ part_number_str } .xml " , data_xml )
@logger . info ( " Adding #{ archive } _ #{ part_number_str } .xml " )
part_number += 1
zip_file . add ( " #{ archive } _ #{ part_number_str } .xml " , data_xml )
end
part_number += 1
last_processed_marker = lettings_logs_slice . last . created_at
# Required by S3 to avoid Aws::S3::Errors::BadDigest
zip_io = zip_file . write_buffer
zip_io . rewind
@logger . info ( " Writting #{ archive } .zip " )
@storage_service . write_file ( " #{ archive } .zip " , zip_io )
archive_datetimes [ archive ] = Time . zone . now
end
end
# rubocop:enable Style/CombinableLoops
archive_datetimes
# Required by S3 to avoid Aws::S3::Errors::BadDigest
zip_io = zip_file . write_buffer
zip_io . rewind
@logger . info ( " Writting #{ archive } .zip " )
@storage_service . write_file ( " #{ archive } .zip " , zip_io )
{ archive = > Time . zone . now }
end
end
def retrieve_lettings_logs ( start_time , full_update )
def retrieve_lettings_logs ( start_time , full_update )