From cae731ed6f277673ae7201326abc4a35bcf03a12 Mon Sep 17 00:00:00 2001 From: Phil Lee Date: Wed, 26 Apr 2023 12:01:03 +0100 Subject: [PATCH] CLDC-1888 Bulk upload handles duplicates (#1334) # Context - https://digital.dclg.gov.uk/jira/browse/CLDC-1888 - This is a continuation of https://github.com/communitiesuk/submit-social-housing-lettings-and-sales-data/pull/1277 - When bulk uploading we want to check users are not uploading data that already exists to prevent them submitting duplicate # Changes - This feature is behind a feature toggle. it has been disabled for staging for testing purposes but available in all other environments - If a log already exists based off certain fields add errors to the associated fields - We discount any hidden logs and only check "active" logs - Added memoization to `#valid?` as an optimisation --- .../bulk_upload/lettings/validator.rb | 16 +++- .../lettings/year2022/row_parser.rb | 58 +++++++++++--- .../lettings/year2023/row_parser.rb | 65 +++++++++++++--- app/services/feature_toggle.rb | 4 + spec/factories/bulk_upload_error.rb | 2 +- .../lettings/year2022/row_parser_spec.rb | 70 +++++++++++++++++ .../lettings/year2023/row_parser_spec.rb | 76 ++++++++++++++++++- spec/services/bulk_upload/processor_spec.rb | 1 + 8 files changed, 263 insertions(+), 29 deletions(-) diff --git a/app/services/bulk_upload/lettings/validator.rb b/app/services/bulk_upload/lettings/validator.rb index e78c1138b..ae272a0fe 100644 --- a/app/services/bulk_upload/lettings/validator.rb +++ b/app/services/bulk_upload/lettings/validator.rb @@ -43,8 +43,10 @@ class BulkUpload::Lettings::Validator def create_logs? return false if any_setup_errors? return false if row_parsers.any?(&:block_log_creation?) + return false if any_logs_already_exist? && FeatureToggle.bulk_upload_duplicate_log_check_enabled? + return false if any_logs_invalid? - row_parsers.all? { |row_parser| row_parser.log.valid? } + true end def self.question_for_field(field) @@ -59,8 +61,6 @@ class BulkUpload::Lettings::Validator .positive? end -private - def over_column_error_threshold? fields = ("field_1".."field_134").to_a percentage_threshold = (row_parsers.size * COLUMN_PERCENTAGE_ERROR_THRESHOLD).ceil @@ -74,6 +74,16 @@ private end end + def any_logs_already_exist? + row_parsers.any?(&:log_already_exists?) + end + +private + + def any_logs_invalid? + row_parsers.any? { |row_parser| row_parser.log.invalid? } + end + def csv_parser @csv_parser ||= case bulk_upload.year when 2022 diff --git a/app/services/bulk_upload/lettings/year2022/row_parser.rb b/app/services/bulk_upload/lettings/year2022/row_parser.rb index b6ba93dc5..9d273dab5 100644 --- a/app/services/bulk_upload/lettings/year2022/row_parser.rb +++ b/app/services/bulk_upload/lettings/year2022/row_parser.rb @@ -309,6 +309,7 @@ class BulkUpload::Lettings::Year2022::RowParser validate :validate_no_disabled_needs_conjunction, on: :after_log validate :validate_dont_know_disabled_needs_conjunction, on: :after_log validate :validate_no_and_dont_know_disabled_needs_conjunction, on: :after_log + validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? } validate :validate_owning_org_data_given, on: :after_log validate :validate_owning_org_exists, on: :after_log @@ -340,9 +341,11 @@ class BulkUpload::Lettings::Year2022::RowParser end def valid? + return @valid if @valid + errors.clear - return true if blank_row? + return @valid = true if blank_row? super(:before_log) before_errors = errors.dup @@ -362,7 +365,7 @@ class BulkUpload::Lettings::Year2022::RowParser end end - errors.blank? + @valid = errors.blank? end def blank_row? @@ -395,6 +398,12 @@ class BulkUpload::Lettings::Year2022::RowParser field_100 end + def log_already_exists? + @log_already_exists ||= LettingsLog + .where(status: %w[not_started in_progress completed]) + .exists?(duplicate_check_fields.index_with { |field| log.public_send(field) }) + end + private def validate_declaration_acceptance @@ -439,6 +448,20 @@ private @created_by ||= User.find_by(email: field_112) end + def duplicate_check_fields + %w[ + startdate + age1 + sex1 + ecstat1 + owning_organisation + tcharge + propcode + postcode_full + location + ] + end + def validate_location_related return if scheme.blank? || location.blank? @@ -697,6 +720,25 @@ private log.form.setup_sections[0].subsections[0].questions.include?(question) end + def validate_if_log_already_exists + if log_already_exists? + error_message = "This is a duplicate log" + + errors.add(:field_5, error_message) # location + errors.add(:field_12, error_message) # age1 + errors.add(:field_20, error_message) # sex1 + errors.add(:field_35, error_message) # ecstat1 + errors.add(:field_84, error_message) # tcharge + errors.add(:field_96, error_message) # startdate + errors.add(:field_97, error_message) # startdate + errors.add(:field_98, error_message) # startdate + errors.add(:field_100, error_message) # propcode + errors.add(:field_108, error_message) # postcode_full + errors.add(:field_109, error_message) # postcode_full + errors.add(:field_111, error_message) # owning_organisation + end + end + def field_mapping_for_errors { lettype: [:field_1], @@ -896,18 +938,10 @@ private Organisation.find_by_id_on_multiple_fields(field_111) end - def owning_organisation_id - owning_organisation&.id - end - def managing_organisation Organisation.find_by_id_on_multiple_fields(field_113) end - def managing_organisation_id - managing_organisation&.id - end - def attributes_for_log attributes = {} @@ -916,8 +950,8 @@ private attributes["la"] = field_107 attributes["postcode_known"] = postcode_known attributes["postcode_full"] = postcode_full - attributes["owning_organisation_id"] = owning_organisation_id - attributes["managing_organisation_id"] = managing_organisation_id + attributes["owning_organisation"] = owning_organisation + attributes["managing_organisation"] = managing_organisation attributes["renewal"] = renewal attributes["scheme"] = scheme attributes["location"] = location diff --git a/app/services/bulk_upload/lettings/year2023/row_parser.rb b/app/services/bulk_upload/lettings/year2023/row_parser.rb index 93b6d6ade..4393d1af2 100644 --- a/app/services/bulk_upload/lettings/year2023/row_parser.rb +++ b/app/services/bulk_upload/lettings/year2023/row_parser.rb @@ -311,6 +311,7 @@ class BulkUpload::Lettings::Year2023::RowParser validate :validate_no_disabled_needs_conjunction, on: :after_log validate :validate_dont_know_disabled_needs_conjunction, on: :after_log validate :validate_no_and_dont_know_disabled_needs_conjunction, on: :after_log + validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? } validate :validate_owning_org_data_given, on: :after_log validate :validate_owning_org_exists, on: :after_log @@ -343,9 +344,11 @@ class BulkUpload::Lettings::Year2023::RowParser end def valid? + return @valid if @valid + errors.clear - return true if blank_row? + return @valid = true if blank_row? super(:before_log) before_errors = errors.dup @@ -365,7 +368,7 @@ class BulkUpload::Lettings::Year2023::RowParser end end - errors.blank? + @valid = errors.blank? end def blank_row? @@ -398,6 +401,12 @@ class BulkUpload::Lettings::Year2023::RowParser field_14 end + def log_already_exists? + @log_already_exists ||= LettingsLog + .where(status: %w[not_started in_progress completed]) + .exists?(duplicate_check_fields.index_with { |field| log.public_send(field) }) + end + private def validate_declaration_acceptance @@ -448,6 +457,26 @@ private end end + def duplicate_check_fields + %w[ + startdate + age1 + sex1 + ecstat1 + owning_organisation + tcharge + propcode + postcode_full + location + ] + end + + def validate_needs_type_present + if field_4.blank? + errors.add(:field_4, I18n.t("validations.not_answered", question: "needs type")) + end + end + def start_date return if field_7.blank? || field_8.blank? || field_9.blank? @@ -679,6 +708,26 @@ private log.form.setup_sections[0].subsections[0].questions.include?(question) end + def validate_if_log_already_exists + if log_already_exists? + error_message = "This is a duplicate log" + + errors.add(:field_1, error_message) # owning_organisation + errors.add(:field_7, error_message) # startdate + errors.add(:field_8, error_message) # startdate + errors.add(:field_9, error_message) # startdate + errors.add(:field_14, error_message) # propcode + errors.add(:field_17, error_message) # location + errors.add(:field_23, error_message) # postcode_full + errors.add(:field_24, error_message) # postcode_full + errors.add(:field_25, error_message) # postcode_full + errors.add(:field_46, error_message) # age1 + errors.add(:field_47, error_message) # sex1 + errors.add(:field_50, error_message) # ecstat1 + errors.add(:field_132, error_message) # tcharge + end + end + def field_mapping_for_errors { lettype: [:field_5], @@ -855,8 +904,8 @@ private attributes["la"] = field_25 attributes["postcode_known"] = postcode_known attributes["postcode_full"] = postcode_full - attributes["owning_organisation_id"] = owning_organisation_id - attributes["managing_organisation_id"] = managing_organisation_id + attributes["owning_organisation"] = owning_organisation + attributes["managing_organisation"] = managing_organisation attributes["renewal"] = renewal attributes["scheme"] = scheme attributes["location"] = location @@ -1050,18 +1099,10 @@ private Organisation.find_by_id_on_multiple_fields(field_1) end - def owning_organisation_id - owning_organisation&.id - end - def managing_organisation Organisation.find_by_id_on_multiple_fields(field_2) end - def managing_organisation_id - managing_organisation&.id - end - def renewal case field_6 when 1 diff --git a/app/services/feature_toggle.rb b/app/services/feature_toggle.rb index a25d99b84..de706585a 100644 --- a/app/services/feature_toggle.rb +++ b/app/services/feature_toggle.rb @@ -40,6 +40,10 @@ class FeatureToggle !Rails.env.production? end + def self.bulk_upload_duplicate_log_check_enabled? + !Rails.env.staging? + end + def self.upload_enabled? !Rails.env.development? end diff --git a/spec/factories/bulk_upload_error.rb b/spec/factories/bulk_upload_error.rb index 885f27d9d..bd2b9038c 100644 --- a/spec/factories/bulk_upload_error.rb +++ b/spec/factories/bulk_upload_error.rb @@ -9,7 +9,7 @@ FactoryBot.define do tenant_code { SecureRandom.hex(4) } property_ref { SecureRandom.hex(4) } purchaser_code { SecureRandom.hex(4) } - field { "field_#{rand(134)}" } + field { "field_#{rand(1..134)}" } error { "some error" } end end diff --git a/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb b/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb index 43a4ad822..dd9bf4a5b 100644 --- a/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb @@ -212,6 +212,10 @@ RSpec.describe BulkUpload::Lettings::Year2022::RowParser do end context "when valid row" do + before do + allow(FeatureToggle).to receive(:bulk_upload_duplicate_log_check_enabled?).and_return(true) + end + let(:attributes) { valid_attributes } it "returns true" do @@ -226,6 +230,72 @@ RSpec.describe BulkUpload::Lettings::Year2022::RowParser do expect(questions.map(&:id).size).to eq(0) expect(questions.map(&:id)).to eql([]) end + + context "when the log already exists in the db" do + before do + parser.log.save! + parser.instance_variable_set(:@valid, nil) + end + + it "is not a valid row" do + expect(parser).not_to be_valid + end + + it "adds an error to all (and only) the fields used to determine duplicates" do + parser.valid? + + error_message = "This is a duplicate log" + + expected_errors = { + field_5: [error_message], # location + field_12: [error_message], # age1 + field_20: [error_message], # sex1 + field_35: [error_message], # ecstat1 + field_84: [error_message], # tcharge + field_96: [error_message], # startdate + field_97: [error_message], # startdate + field_98: [error_message], # startdate + field_100: [error_message], # propcode + field_108: [error_message], # postcode_full + field_109: [error_message], # postcode_full + field_111: [error_message], # owning_organisation + } + expect(parser.errors.as_json).to eq(expected_errors) + end + end + + context "when a hidden log already exists in db" do + before do + parser.log.status = "pending" + parser.log.skip_update_status = true + parser.log.save! + end + + it "is a valid row" do + expect(parser).to be_valid + end + + it "does not add duplicate errors" do + parser.valid? + + [ + :field_5, # location + :field_12, # age1 + :field_20, # sex1 + :field_35, # ecstat1 + :field_84, # tcharge + :field_96, # startdate + :field_97, # startdate + :field_98, # startdate + :field_100, # propcode + :field_108, # postcode_full + :field_109, # postcode_full + :field_111, # owning_organisation + ].each do |field| + expect(parser.errors[field]).to be_blank + end + end + end end end diff --git a/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb b/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb index 17d6aa50f..58b216e43 100644 --- a/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb @@ -80,6 +80,7 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do DPA: { "POSTCODE": "EC1N 2TD", "POST_TOWN": "Newcastle", + "ORGANISATION_NAME": "Some place", }, }, ], @@ -109,6 +110,10 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do end context "when valid row" do + before do + allow(FeatureToggle).to receive(:bulk_upload_duplicate_log_check_enabled?).and_return(true) + end + let(:attributes) do { bulk_upload:, @@ -234,7 +239,7 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do } end - xit "returns true" do + it "returns true" do expect(parser).to be_valid end @@ -246,6 +251,75 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do expect(questions.map(&:id).size).to eq(0) expect(questions.map(&:id)).to eql([]) end + + context "when the log already exists in the db" do + before do + parser.log.save! + parser.instance_variable_set(:@valid, nil) + end + + it "is not a valid row" do + expect(parser).not_to be_valid + end + + it "adds an error to all (and only) the fields used to determine duplicates" do + parser.valid? + + error_message = "This is a duplicate log" + + [ + :field_1, # owning_organisation + :field_7, # startdate + :field_8, # startdate + :field_9, # startdate + :field_14, # propcode + :field_17, # location + :field_23, # postcode_full + :field_24, # postcode_full + :field_25, # postcode_full + :field_46, # age1 + :field_47, # sex1 + :field_50, # ecstat1 + :field_132, # tcharge + ].each do |field| + expect(parser.errors[field]).to include(error_message) + end + end + end + + context "when a hidden log already exists in db" do + before do + parser.log.status = "pending" + parser.log.skip_update_status = true + parser.log.save! + end + + it "is a valid row" do + expect(parser).to be_valid + end + + it "does not add duplicate errors" do + parser.valid? + + [ + :field_1, # owning_organisation + :field_7, # startdate + :field_8, # startdate + :field_9, # startdate + :field_14, # propcode + :field_17, # location + :field_23, # postcode_full + :field_24, # postcode_full + :field_25, # postcode_full + :field_46, # age1 + :field_47, # sex1 + :field_50, # ecstat1 + :field_132, # tcharge + ].each do |field| + expect(parser.errors[field]).to be_blank + end + end + end end describe "#validate_nulls" do diff --git a/spec/services/bulk_upload/processor_spec.rb b/spec/services/bulk_upload/processor_spec.rb index 4eca8238a..64ee0e197 100644 --- a/spec/services/bulk_upload/processor_spec.rb +++ b/spec/services/bulk_upload/processor_spec.rb @@ -217,6 +217,7 @@ RSpec.describe BulkUpload::Processor do file.rewind allow(BulkUpload::Downloader).to receive(:new).with(bulk_upload:).and_return(mock_downloader) + allow(FeatureToggle).to receive(:bulk_upload_duplicate_log_check_enabled?).and_return(true) end it "creates pending log" do