Browse Source

CLDC-2304 Deduplicate sales logs in bulk upload (#1612)

* Deduplicate 2022 sales logs

* Don't run deduplication on staging

* Do not create duplicate logs

* Add deduplication to the 23/24 sales
pull/1630/head
kosiakkatrina 2 years ago committed by GitHub
parent
commit
353facc893
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 5
      app/services/bulk_upload/sales/validator.rb
  2. 42
      app/services/bulk_upload/sales/year2022/row_parser.rb
  3. 42
      app/services/bulk_upload/sales/year2023/row_parser.rb
  4. 65
      spec/services/bulk_upload/sales/year2022/row_parser_spec.rb
  5. 65
      spec/services/bulk_upload/sales/year2023/row_parser_spec.rb

5
app/services/bulk_upload/sales/validator.rb

@ -36,6 +36,7 @@ class BulkUpload::Sales::Validator
def create_logs? def create_logs?
return false if any_setup_errors? return false if any_setup_errors?
return false if row_parsers.any?(&:block_log_creation?) return false if row_parsers.any?(&:block_log_creation?)
return false if any_logs_already_exist? && FeatureToggle.bulk_upload_duplicate_log_check_enabled?
row_parsers.each do |row_parser| row_parsers.each do |row_parser|
row_parser.log.blank_invalid_non_setup_fields! row_parser.log.blank_invalid_non_setup_fields!
@ -54,6 +55,10 @@ class BulkUpload::Sales::Validator
.positive? .positive?
end end
def any_logs_already_exist?
row_parsers.any?(&:log_already_exists?)
end
private private
def any_logs_invalid? def any_logs_invalid?

42
app/services/bulk_upload/sales/year2022/row_parser.rb

@ -285,6 +285,7 @@ class BulkUpload::Sales::Year2022::RowParser
validate :validate_created_by_related, on: :after_log validate :validate_created_by_related, on: :after_log
validate :validate_relevant_collection_window, on: :after_log validate :validate_relevant_collection_window, on: :after_log
validate :validate_incomplete_soft_validations, on: :after_log validate :validate_incomplete_soft_validations, on: :after_log
validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? }
def self.question_for_field(field) def self.question_for_field(field)
QUESTIONS[field] QUESTIONS[field]
@ -337,6 +338,12 @@ class BulkUpload::Sales::Year2022::RowParser
block_log_creation block_log_creation
end end
def log_already_exists?
@log_already_exists ||= SalesLog
.where(status: %w[not_started in_progress completed])
.exists?(duplicate_check_fields.index_with { |field| log.public_send(field) })
end
private private
def buyer_not_interviewed? def buyer_not_interviewed?
@ -591,7 +598,7 @@ private
attributes["othtype"] = field_85 attributes["othtype"] = field_85
attributes["owning_organisation_id"] = owning_organisation_id attributes["owning_organisation"] = owning_organisation
attributes["created_by"] = created_by || bulk_upload.user attributes["created_by"] = created_by || bulk_upload.user
attributes["hhregres"] = hhregres attributes["hhregres"] = hhregres
attributes["hhregresstill"] = hhregresstill attributes["hhregresstill"] = hhregresstill
@ -803,10 +810,6 @@ private
Organisation.find_by_id_on_multiple_fields(field_92) Organisation.find_by_id_on_multiple_fields(field_92)
end end
def owning_organisation_id
owning_organisation&.id
end
def created_by def created_by
@created_by ||= User.find_by(email: field_93) @created_by ||= User.find_by(email: field_93)
end end
@ -848,6 +851,18 @@ private
@questions ||= log.form.subsections.flat_map { |ss| ss.applicable_questions(log) } @questions ||= log.form.subsections.flat_map { |ss| ss.applicable_questions(log) }
end end
def duplicate_check_fields
%w[
saledate
age1
sex1
ecstat1
owning_organisation
postcode_full
purchid
]
end
def validate_owning_org_data_given def validate_owning_org_data_given
if field_92.blank? if field_92.blank?
block_log_creation! block_log_creation!
@ -985,4 +1000,21 @@ private
end end
end end
end end
def validate_if_log_already_exists
if log_already_exists?
error_message = "This is a duplicate log"
errors.add(:field_92, error_message) # Owning org
errors.add(:field_2, error_message) # Sale completion date
errors.add(:field_3, error_message) # Sale completion date
errors.add(:field_4, error_message) # Sale completion date
errors.add(:field_41, error_message) # Postcode
errors.add(:field_42, error_message) # Postcode
errors.add(:field_7, error_message) # Buyer 1 age
errors.add(:field_13, error_message) # Buyer 1 gender
errors.add(:field_24, error_message) # Buyer 1 working situation
errors.add(:field_1, error_message) # Purchaser code
end
end
end end

42
app/services/bulk_upload/sales/year2023/row_parser.rb

@ -410,6 +410,7 @@ class BulkUpload::Sales::Year2023::RowParser
validate :validate_address_line_1, on: :after_log validate :validate_address_line_1, on: :after_log
validate :validate_town_or_city, on: :after_log validate :validate_town_or_city, on: :after_log
validate :validate_postcode, on: :after_log validate :validate_postcode, on: :after_log
validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? }
def self.question_for_field(field) def self.question_for_field(field)
QUESTIONS[field] QUESTIONS[field]
@ -466,6 +467,12 @@ class BulkUpload::Sales::Year2023::RowParser
"#<BulkUpload::Sales::Year2023::RowParser:#{object_id}>" "#<BulkUpload::Sales::Year2023::RowParser:#{object_id}>"
end end
def log_already_exists?
@log_already_exists ||= SalesLog
.where(status: %w[not_started in_progress completed])
.exists?(duplicate_check_fields.index_with { |field| log.public_send(field) })
end
private private
def prevtenbuy2 def prevtenbuy2
@ -786,7 +793,7 @@ private
attributes["othtype"] = field_11 attributes["othtype"] = field_11
attributes["owning_organisation_id"] = owning_organisation_id attributes["owning_organisation"] = owning_organisation
attributes["created_by"] = created_by || bulk_upload.user attributes["created_by"] = created_by || bulk_upload.user
attributes["hhregres"] = hhregres attributes["hhregres"] = hhregres
attributes["hhregresstill"] = hhregresstill attributes["hhregresstill"] = hhregresstill
@ -1000,10 +1007,6 @@ private
Organisation.find_by_id_on_multiple_fields(field_1) Organisation.find_by_id_on_multiple_fields(field_1)
end end
def owning_organisation_id
owning_organisation&.id
end
def created_by def created_by
@created_by ||= User.find_by(email: field_2) @created_by ||= User.find_by(email: field_2)
end end
@ -1035,6 +1038,18 @@ private
@questions ||= log.form.subsections.flat_map { |ss| ss.applicable_questions(log) } @questions ||= log.form.subsections.flat_map { |ss| ss.applicable_questions(log) }
end end
def duplicate_check_fields
%w[
saledate
age1
sex1
ecstat1
owning_organisation
postcode_full
purchid
]
end
def validate_owning_org_data_given def validate_owning_org_data_given
if field_1.blank? if field_1.blank?
block_log_creation! block_log_creation!
@ -1156,4 +1171,21 @@ private
errors.add(:field_5, I18n.t("validations.date.outside_collection_window", year_combo: bulk_upload.year_combo, start_year: bulk_upload.year, end_year: bulk_upload.end_year), category: :setup) errors.add(:field_5, I18n.t("validations.date.outside_collection_window", year_combo: bulk_upload.year_combo, start_year: bulk_upload.year, end_year: bulk_upload.end_year), category: :setup)
end end
end end
def validate_if_log_already_exists
if log_already_exists?
error_message = "This is a duplicate log"
errors.add(:field_1, error_message) # Owning org
errors.add(:field_3, error_message) # Sale completion date
errors.add(:field_4, error_message) # Sale completion date
errors.add(:field_5, error_message) # Sale completion date
errors.add(:field_24, error_message) # Postcode
errors.add(:field_25, error_message) # Postcode
errors.add(:field_30, error_message) # Buyer 1 age
errors.add(:field_31, error_message) # Buyer 1 gender
errors.add(:field_35, error_message) # Buyer 1 working situation
errors.add(:field_6, error_message) # Purchaser code
end
end
end end

65
spec/services/bulk_upload/sales/year2022/row_parser_spec.rb

@ -539,6 +539,71 @@ RSpec.describe BulkUpload::Sales::Year2022::RowParser do
end end
end end
end end
context "when the log already exists in the db" do
let(:attributes) { valid_attributes }
before do
parser.log.save!
parser.instance_variable_set(:@valid, nil)
end
it "is not a valid row" do
expect(parser).not_to be_valid
end
it "adds an error to all (and only) the fields used to determine duplicates" do
parser.valid?
error_message = "This is a duplicate log"
[
:field_92, # Owning org
:field_2, # Sale completion date
:field_3, # Sale completion date
:field_4, # Sale completion date
:field_41, # Postcode
:field_42, # Postcode
:field_7, # Buyer 1 age
:field_13, # Buyer 1 gender
:field_24, # Buyer 1 working situation
:field_1, # Purchaser code
].each do |field|
expect(parser.errors[field]).to include(error_message)
end
end
end
context "when a hidden log already exists in db" do
before do
parser.log.status = "pending"
parser.log.skip_update_status = true
parser.log.save!
end
it "is a valid row" do
expect(parser).to be_valid
end
it "does not add duplicate errors" do
parser.valid?
[
:field_92, # Owning org
:field_2, # Sale completion date
:field_3, # Sale completion date
:field_4, # Sale completion date
:field_41, # Postcode
:field_42, # Postcode
:field_7, # Buyer 1 age
:field_13, # Buyer 1 gender
:field_24, # Buyer 1 working situation
:field_1, # Purchaser code
].each do |field|
expect(parser.errors[field]).to be_blank
end
end
end
end end
describe "inferences" do describe "inferences" do

65
spec/services/bulk_upload/sales/year2023/row_parser_spec.rb

@ -467,6 +467,71 @@ RSpec.describe BulkUpload::Sales::Year2023::RowParser do
end end
end end
context "when the log already exists in the db" do
let(:attributes) { valid_attributes }
before do
parser.log.save!
parser.instance_variable_set(:@valid, nil)
end
it "is not a valid row" do
expect(parser).not_to be_valid
end
it "adds an error to all (and only) the fields used to determine duplicates" do
parser.valid?
error_message = "This is a duplicate log"
[
:field_1, # Owning org
:field_3, # Sale completion date
:field_4, # Sale completion date
:field_5, # Sale completion date
:field_24, # Postcode
:field_25, # Postcode
:field_30, # Buyer 1 age
:field_31, # Buyer 1 gender
:field_35, # Buyer 1 working situation
:field_6, # Purchaser code
].each do |field|
expect(parser.errors[field]).to include(error_message)
end
end
end
context "when a hidden log already exists in db" do
before do
parser.log.status = "pending"
parser.log.skip_update_status = true
parser.log.save!
end
it "is a valid row" do
expect(parser).to be_valid
end
it "does not add duplicate errors" do
parser.valid?
[
:field_1, # Owning org
:field_3, # Sale completion date
:field_4, # Sale completion date
:field_5, # Sale completion date
:field_24, # Postcode
:field_25, # Postcode
:field_30, # Buyer 1 age
:field_31, # Buyer 1 gender
:field_35, # Buyer 1 working situation
:field_6, # Purchaser code
].each do |field|
expect(parser.errors[field]).to be_blank
end
end
end
describe "#field_19" do # UPRN describe "#field_19" do # UPRN
context "when UPRN known" do context "when UPRN known" do
let(:attributes) { setup_section_params.merge({ field_19: "100023336956" }) } let(:attributes) { setup_section_params.merge({ field_19: "100023336956" }) }

Loading…
Cancel
Save