diff --git a/app/services/bulk_upload/lettings/validator.rb b/app/services/bulk_upload/lettings/validator.rb index a7c20aa1b..47600978b 100644 --- a/app/services/bulk_upload/lettings/validator.rb +++ b/app/services/bulk_upload/lettings/validator.rb @@ -1,5 +1,3 @@ -require "csv" - class BulkUpload::Lettings::Validator include ActiveModel::Validations @@ -16,9 +14,11 @@ class BulkUpload::Lettings::Validator end def call - row_parsers.each_with_index do |row_parser, index| - row_parser.valid? + row_parsers.each(&:valid?) + validate_duplicate_rows if FeatureToggle.bulk_upload_duplicate_log_check_enabled? + + row_parsers.each_with_index do |row_parser, index| row = index + row_offset + 1 row_parser.errors.each do |error| @@ -69,25 +69,25 @@ class BulkUpload::Lettings::Validator errors.count == errors.where(category: "soft_validation").count && errors.count.positive? end - def over_column_error_threshold? - fields = ("field_1".."field_134").to_a - percentage_threshold = (row_parsers.size * COLUMN_PERCENTAGE_ERROR_THRESHOLD).ceil - - fields.any? do |field| - count = row_parsers.count { |row_parser| row_parser.errors[field].present? } - - next if count < COLUMN_ABSOLUTE_ERROR_THRESHOLD - - count > percentage_threshold - end - end - def any_logs_already_exist? row_parsers.any?(&:log_already_exists?) end private + # n^2 algo + def validate_duplicate_rows + row_parsers.each do |rp| + dupe = row_parsers.reject { |r| r.object_id.equal?(rp.object_id) }.any? do |rp_counter| + rp.spreadsheet_duplicate_hash == rp_counter.spreadsheet_duplicate_hash + end + + if dupe + rp.add_duplicate_found_in_spreadsheet_errors + end + end + end + def any_logs_invalid? row_parsers.any? { |row_parser| row_parser.log.invalid? } end diff --git a/app/services/bulk_upload/lettings/year2022/row_parser.rb b/app/services/bulk_upload/lettings/year2022/row_parser.rb index 00bdb2d7e..b79447b67 100644 --- a/app/services/bulk_upload/lettings/year2022/row_parser.rb +++ b/app/services/bulk_upload/lettings/year2022/row_parser.rb @@ -438,6 +438,29 @@ class BulkUpload::Lettings::Year2022::RowParser .exists?(duplicate_check_fields.index_with { |field| log.public_send(field) }) end + def spreadsheet_duplicate_hash + attributes.slice( + "field_5", # location + "field_12", # age1 + "field_20", # sex1 + "field_35", # ecstat1 + "field_84", # tcharge + "field_96", # startdate + "field_97", # startdate + "field_98", # startdate + "field_100", # propcode + "field_108", # postcode + "field_109", # postcode + "field_111", # owning org + ) + end + + def add_duplicate_found_in_spreadsheet_errors + spreadsheet_duplicate_hash.each_key do |field| + errors.add(field, :spreadsheet_dupe, category: :setup) + end + end + private def validate_declaration_acceptance diff --git a/app/services/bulk_upload/lettings/year2023/row_parser.rb b/app/services/bulk_upload/lettings/year2023/row_parser.rb index 8c9de5f62..225c45b4f 100644 --- a/app/services/bulk_upload/lettings/year2023/row_parser.rb +++ b/app/services/bulk_upload/lettings/year2023/row_parser.rb @@ -455,6 +455,29 @@ class BulkUpload::Lettings::Year2023::RowParser .exists?(duplicate_check_fields.index_with { |field| log.public_send(field) }) end + def spreadsheet_duplicate_hash + attributes.slice( + "field_1", # owning org + "field_7", # startdate + "field_8", # startdate + "field_9", # startdate + "field_14", # propcode + "field_17", # location + "field_23", # postcode + "field_24", # postcode + "field_46", # age1 + "field_47", # sex1 + "field_50", # ecstat1 + "field_132", # tcharge + ) + end + + def add_duplicate_found_in_spreadsheet_errors + spreadsheet_duplicate_hash.each_key do |field| + errors.add(field, :spreadsheet_dupe, category: :setup) + end + end + private def validate_declaration_acceptance diff --git a/app/services/bulk_upload/sales/log_creator.rb b/app/services/bulk_upload/sales/log_creator.rb index 69ef637fa..db389fc5c 100644 --- a/app/services/bulk_upload/sales/log_creator.rb +++ b/app/services/bulk_upload/sales/log_creator.rb @@ -16,7 +16,6 @@ class BulkUpload::Sales::LogCreator row_parser.log.bulk_upload = bulk_upload row_parser.log.skip_update_status = true row_parser.log.status = "pending" - row_parser.log.status_cache = row_parser.log.calculate_status begin diff --git a/app/services/bulk_upload/sales/validator.rb b/app/services/bulk_upload/sales/validator.rb index 4508a5e96..9552bbc69 100644 --- a/app/services/bulk_upload/sales/validator.rb +++ b/app/services/bulk_upload/sales/validator.rb @@ -13,9 +13,11 @@ class BulkUpload::Sales::Validator end def call - row_parsers.each_with_index do |row_parser, index| - row_parser.valid? + row_parsers.each(&:valid?) + + validate_duplicate_rows if FeatureToggle.bulk_upload_duplicate_log_check_enabled? + row_parsers.each_with_index do |row_parser, index| row = index + row_offset + 1 row_parser.errors.each do |error| @@ -67,6 +69,19 @@ class BulkUpload::Sales::Validator private + # n^2 algo + def validate_duplicate_rows + row_parsers.each do |rp| + dupe = row_parsers.reject { |r| r.object_id.equal?(rp.object_id) }.any? do |rp_counter| + rp.spreadsheet_duplicate_hash == rp_counter.spreadsheet_duplicate_hash + end + + if dupe + rp.add_duplicate_found_in_spreadsheet_errors + end + end + end + def any_logs_invalid? row_parsers.any? { |row_parser| row_parser.log.invalid? } end diff --git a/app/services/bulk_upload/sales/year2022/row_parser.rb b/app/services/bulk_upload/sales/year2022/row_parser.rb index 8224e0d7a..a4e60487f 100644 --- a/app/services/bulk_upload/sales/year2022/row_parser.rb +++ b/app/services/bulk_upload/sales/year2022/row_parser.rb @@ -411,6 +411,27 @@ class BulkUpload::Sales::Year2022::RowParser field_1 end + def spreadsheet_duplicate_hash + attributes.slice( + "field_1", # purcahser code + "field_2", # saledate + "field_3", # saledate + "field_4", # saledate + "field_7", # age1 + "field_13", # sex1 + "field_24", # ecstat1 + "field_54", # postcode + "field_55", # postcode + "field_92", # owning org + ) + end + + def add_duplicate_found_in_spreadsheet_errors + spreadsheet_duplicate_hash.each_key do |field| + errors.add(field, :spreadsheet_dupe, category: :setup) + end + end + private def validate_data_protection_answered diff --git a/app/services/bulk_upload/sales/year2023/row_parser.rb b/app/services/bulk_upload/sales/year2023/row_parser.rb index b5185f7a0..e0f2b727f 100644 --- a/app/services/bulk_upload/sales/year2023/row_parser.rb +++ b/app/services/bulk_upload/sales/year2023/row_parser.rb @@ -533,6 +533,27 @@ class BulkUpload::Sales::Year2023::RowParser field_6 end + def spreadsheet_duplicate_hash + attributes.slice( + "field_1", # owning org + "field_3", # saledate + "field_4", # saledate + "field_5", # saledate + "field_6", # purchaser_code + "field_24", # postcode + "field_25", # postcode + "field_30", # age1 + "field_31", # sex1 + "field_35", # ecstat1 + ) + end + + def add_duplicate_found_in_spreadsheet_errors + spreadsheet_duplicate_hash.each_key do |field| + errors.add(field, :spreadsheet_dupe, category: :setup) + end + end + private def validate_data_protection_answered diff --git a/config/locales/en.yml b/config/locales/en.yml index 8bf58586f..6cba97954 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -41,10 +41,17 @@ en: activemodel: errors: models: - bulk_upload/sales/year2022/row_parser: + bulk_upload/row_parser: &bulk_upload__row_parser__base inclusion: Enter a valid value for %{question} + spreadsheet_dupe: This is a duplicate of a log in your file + bulk_upload/lettings/year2022/row_parser: + <<: *bulk_upload__row_parser__base + bulk_upload/lettings/year2023/row_parser: + <<: *bulk_upload__row_parser__base + bulk_upload/sales/year2022/row_parser: + <<: *bulk_upload__row_parser__base bulk_upload/sales/year2023/row_parser: - inclusion: Enter a valid value for %{question} + <<: *bulk_upload__row_parser__base bulk_upload/lettings/validator: attributes: base: diff --git a/spec/factories/sales_log.rb b/spec/factories/sales_log.rb index 90d5e0ddb..905298e5e 100644 --- a/spec/factories/sales_log.rb +++ b/spec/factories/sales_log.rb @@ -36,6 +36,7 @@ FactoryBot.define do jointpur { 2 } end trait :completed do + purchid { rand(999_999_999).to_s } ownershipsch { 2 } type { 8 } saledate { Time.utc(2023, 2, 2, 10, 36, 49) } diff --git a/spec/services/bulk_upload/lettings/validator_spec.rb b/spec/services/bulk_upload/lettings/validator_spec.rb index 192f2e0ed..41d42b25f 100644 --- a/spec/services/bulk_upload/lettings/validator_spec.rb +++ b/spec/services/bulk_upload/lettings/validator_spec.rb @@ -290,6 +290,23 @@ RSpec.describe BulkUpload::Lettings::Validator do end end + context "when duplicate rows present" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:lettings_log, :completed) } + + before do + file.write(BulkUpload::LettingsLogToCsv.new(log:, line_ending: "\r\n").default_2023_field_numbers_row) + file.write(BulkUpload::LettingsLogToCsv.new(log:, line_ending: "\r\n").to_2023_csv_row) + file.write(BulkUpload::LettingsLogToCsv.new(log:, line_ending: "\r\n").to_2023_csv_row) + file.close + end + + it "creates errors" do + expect { validator.call }.to change(BulkUploadError.where(category: :setup, error: "This is a duplicate of a log in your file"), :count).by(24) + end + end + context "with unix line endings" do let(:fixture_path) { file_fixture("2022_23_lettings_bulk_upload.csv") } let(:file) { Tempfile.new } diff --git a/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb b/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb index e107d1662..bac02dde6 100644 --- a/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb @@ -1760,4 +1760,16 @@ RSpec.describe BulkUpload::Lettings::Year2022::RowParser do end end end + + describe "#spreadsheet_duplicate_hash" do + it "returns a hash" do + expect(parser.spreadsheet_duplicate_hash).to be_a(Hash) + end + end + + describe "#add_duplicate_found_in_spreadsheet_errors" do + it "adds errors" do + expect { parser.add_duplicate_found_in_spreadsheet_errors }.to change(parser.errors, :size) + end + end end diff --git a/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb b/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb index d0e18c915..0c885f11d 100644 --- a/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb @@ -1830,4 +1830,16 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do end end end + + describe "#spreadsheet_duplicate_hash" do + it "returns a hash" do + expect(parser.spreadsheet_duplicate_hash).to be_a(Hash) + end + end + + describe "#add_duplicate_found_in_spreadsheet_errors" do + it "adds errors" do + expect { parser.add_duplicate_found_in_spreadsheet_errors }.to change(parser.errors, :size) + end + end end diff --git a/spec/services/bulk_upload/sales/validator_spec.rb b/spec/services/bulk_upload/sales/validator_spec.rb index cb2860352..79362b8e9 100644 --- a/spec/services/bulk_upload/sales/validator_spec.rb +++ b/spec/services/bulk_upload/sales/validator_spec.rb @@ -125,6 +125,22 @@ RSpec.describe BulkUpload::Sales::Validator do expect { validator.call }.to change(BulkUploadError, :count) end end + + context "when duplicate rows present" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:sales_log, :completed) } + + before do + file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2022_csv_row) + file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2022_csv_row) + file.close + end + + it "creates errors" do + expect { validator.call }.to change(BulkUploadError.where(category: :setup, error: "This is a duplicate of a log in your file"), :count).by(20) + end + end end describe "#create_logs?" do diff --git a/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb b/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb index 4a7b55300..d6594bd35 100644 --- a/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb +++ b/spec/services/bulk_upload/sales/year2022/row_parser_spec.rb @@ -832,4 +832,16 @@ RSpec.describe BulkUpload::Sales::Year2022::RowParser do end end end + + describe "#spreadsheet_duplicate_hash" do + it "returns a hash" do + expect(parser.spreadsheet_duplicate_hash).to be_a(Hash) + end + end + + describe "#add_duplicate_found_in_spreadsheet_errors" do + it "adds errors" do + expect { parser.add_duplicate_found_in_spreadsheet_errors }.to change(parser.errors, :size) + end + end end diff --git a/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb b/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb index b2de40ad4..0f23dfbc0 100644 --- a/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb +++ b/spec/services/bulk_upload/sales/year2023/row_parser_spec.rb @@ -1081,4 +1081,16 @@ RSpec.describe BulkUpload::Sales::Year2023::RowParser do end end end + + describe "#spreadsheet_duplicate_hash" do + it "returns a hash" do + expect(parser.spreadsheet_duplicate_hash).to be_a(Hash) + end + end + + describe "#add_duplicate_found_in_spreadsheet_errors" do + it "adds errors" do + expect { parser.add_duplicate_found_in_spreadsheet_errors }.to change(parser.errors, :size) + end + end end diff --git a/spec/services/csv/sales_log_csv_service_spec.rb b/spec/services/csv/sales_log_csv_service_spec.rb index 96099aa70..358e1d40a 100644 --- a/spec/services/csv/sales_log_csv_service_spec.rb +++ b/spec/services/csv/sales_log_csv_service_spec.rb @@ -3,7 +3,7 @@ require "rails_helper" RSpec.describe Csv::SalesLogCsvService do let(:form_handler_mock) { instance_double(FormHandler) } let(:organisation) { create(:organisation) } - let!(:log) { create(:sales_log, :completed, owning_organisation: organisation) } + let!(:log) { create(:sales_log, :completed, owning_organisation: organisation, purchid: nil) } let(:service) { described_class.new(export_type: "labels") } let(:csv) { CSV.parse(service.prepare_csv(SalesLog.all)) }