diff --git a/app/helpers/invalid_logs_helper.rb b/app/helpers/invalid_logs_helper.rb index 4cd0e2976..e2b945151 100644 --- a/app/helpers/invalid_logs_helper.rb +++ b/app/helpers/invalid_logs_helper.rb @@ -1,29 +1,52 @@ module InvalidLogsHelper def count_and_display_invalid_logs(model, log_type, year) + Rails.logger.info "Starting to count invalid #{log_type} for year #{year}..." invalid_ids = [] total_logs_seen = 0 - model.filter_by_year(year).find_in_batches(batch_size: 1000) do |batch| + model.filter_by_year(year).find_in_batches(batch_size: 1000).with_index(1) do |batch, batch_index| + Rails.logger.info "Processing batch #{batch_index} with #{batch.size} logs..." batch.each do |log| + next unless log.status == "completed" + total_logs_seen += 1 - invalid_ids << log.id unless log.valid? + + next unless !log.valid? || log.incomplete_subsections.any? || log.incomplete_questions.any? + + invalid_ids << log.id end - Rails.logger.debug "Progress: #{invalid_ids.size} invalid logs found out of #{total_logs_seen} logs seen so far." + Rails.logger.info "Batch #{batch_index} complete. Progress: #{invalid_ids.size} invalid logs found out of #{total_logs_seen} logs seen so far." end - Rails.logger.info "Number of invalid #{log_type} for year #{year}: #{invalid_ids.size}" - Rails.logger.info "Invalid #{log_type} IDs: #{invalid_ids.join(', ')}" + Rails.logger.info "Counting complete for #{log_type}. Total invalid logs: #{invalid_ids.size}, Total logs seen: #{total_logs_seen}." + Rails.logger.info "Invalid #{log_type} IDs: #{invalid_ids.join(', ')}" if invalid_ids.any? + Rails.logger.info "--------------------------------" end def surface_invalid_logs(model, log_type, year) - model.filter_by_year(year).find_in_batches(batch_size: 1000) do |batch| + Rails.logger.info "Surfacing invalid #{log_type} for year #{year}..." + invalid_ids = [] + total_logs_seen = 0 + + model.filter_by_year(year).find_in_batches(batch_size: 1000).with_index(1) do |batch, batch_index| + Rails.logger.info "Processing batch #{batch_index} with #{batch.size} logs..." batch.each do |log| - next if log.valid? + next unless log.status == "completed" + + total_logs_seen += 1 + next unless !log.valid? || log.incomplete_subsections.any? || log.incomplete_questions.any? + + invalid_ids << log.id error_messages = log.errors.full_messages.join(";\n") Rails.logger.info "#{log_type} ID: #{log.id} \n Errors----\n #{error_messages}" end + Rails.logger.info "Batch #{batch_index} complete. Processed #{batch.size} logs." end + + Rails.logger.info "Surfacing complete for #{log_type}. Total invalid logs: #{invalid_ids.size}, Total logs seen: #{total_logs_seen}." + Rails.logger.info "Invalid #{log_type} IDs: #{invalid_ids.join(', ')}" if invalid_ids.any? + Rails.logger.info "--------------------------------" end end diff --git a/app/models/log.rb b/app/models/log.rb index 31072dc44..b3294ae32 100644 --- a/app/models/log.rb +++ b/app/models/log.rb @@ -317,6 +317,20 @@ class Log < ApplicationRecord !!public_send("age#{person_num}_known")&.zero? end + def incomplete_subsections + form.subsections.reject do |subsection| + subsection.complete?(self) || subsection.not_displayed_in_tasklist?(self) + end + end + + def incomplete_questions + incomplete_subsections.flat_map do |subsection| + subsection.questions.select do |question| + question.displayed_to_user?(self) && question.unanswered?(self) + end + end + end + private # Handle logs that are older than previous collection start date