Browse Source

CLDC-1888 Bulk upload handles duplicates (#1334)

# Context

- https://digital.dclg.gov.uk/jira/browse/CLDC-1888
- This is a continuation of https://github.com/communitiesuk/submit-social-housing-lettings-and-sales-data/pull/1277
- When bulk uploading we want to check users are not uploading data that already exists to prevent them submitting duplicate 

# Changes

- This feature is behind a feature toggle. it has been disabled for staging for testing purposes but available in all other environments
- If a log already exists based off certain fields add errors to the associated fields
- We discount any hidden logs and only check "active" logs
- Added memoization to `#valid?` as an optimisation
pull/1591/head
Phil Lee 2 years ago committed by GitHub
parent
commit
cae731ed6f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 16
      app/services/bulk_upload/lettings/validator.rb
  2. 58
      app/services/bulk_upload/lettings/year2022/row_parser.rb
  3. 65
      app/services/bulk_upload/lettings/year2023/row_parser.rb
  4. 4
      app/services/feature_toggle.rb
  5. 2
      spec/factories/bulk_upload_error.rb
  6. 70
      spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb
  7. 76
      spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb
  8. 1
      spec/services/bulk_upload/processor_spec.rb

16
app/services/bulk_upload/lettings/validator.rb

@ -43,8 +43,10 @@ class BulkUpload::Lettings::Validator
def create_logs? def create_logs?
return false if any_setup_errors? return false if any_setup_errors?
return false if row_parsers.any?(&:block_log_creation?) return false if row_parsers.any?(&:block_log_creation?)
return false if any_logs_already_exist? && FeatureToggle.bulk_upload_duplicate_log_check_enabled?
return false if any_logs_invalid?
row_parsers.all? { |row_parser| row_parser.log.valid? } true
end end
def self.question_for_field(field) def self.question_for_field(field)
@ -59,8 +61,6 @@ class BulkUpload::Lettings::Validator
.positive? .positive?
end end
private
def over_column_error_threshold? def over_column_error_threshold?
fields = ("field_1".."field_134").to_a fields = ("field_1".."field_134").to_a
percentage_threshold = (row_parsers.size * COLUMN_PERCENTAGE_ERROR_THRESHOLD).ceil percentage_threshold = (row_parsers.size * COLUMN_PERCENTAGE_ERROR_THRESHOLD).ceil
@ -74,6 +74,16 @@ private
end end
end end
def any_logs_already_exist?
row_parsers.any?(&:log_already_exists?)
end
private
def any_logs_invalid?
row_parsers.any? { |row_parser| row_parser.log.invalid? }
end
def csv_parser def csv_parser
@csv_parser ||= case bulk_upload.year @csv_parser ||= case bulk_upload.year
when 2022 when 2022

58
app/services/bulk_upload/lettings/year2022/row_parser.rb

@ -309,6 +309,7 @@ class BulkUpload::Lettings::Year2022::RowParser
validate :validate_no_disabled_needs_conjunction, on: :after_log validate :validate_no_disabled_needs_conjunction, on: :after_log
validate :validate_dont_know_disabled_needs_conjunction, on: :after_log validate :validate_dont_know_disabled_needs_conjunction, on: :after_log
validate :validate_no_and_dont_know_disabled_needs_conjunction, on: :after_log validate :validate_no_and_dont_know_disabled_needs_conjunction, on: :after_log
validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? }
validate :validate_owning_org_data_given, on: :after_log validate :validate_owning_org_data_given, on: :after_log
validate :validate_owning_org_exists, on: :after_log validate :validate_owning_org_exists, on: :after_log
@ -340,9 +341,11 @@ class BulkUpload::Lettings::Year2022::RowParser
end end
def valid? def valid?
return @valid if @valid
errors.clear errors.clear
return true if blank_row? return @valid = true if blank_row?
super(:before_log) super(:before_log)
before_errors = errors.dup before_errors = errors.dup
@ -362,7 +365,7 @@ class BulkUpload::Lettings::Year2022::RowParser
end end
end end
errors.blank? @valid = errors.blank?
end end
def blank_row? def blank_row?
@ -395,6 +398,12 @@ class BulkUpload::Lettings::Year2022::RowParser
field_100 field_100
end end
def log_already_exists?
@log_already_exists ||= LettingsLog
.where(status: %w[not_started in_progress completed])
.exists?(duplicate_check_fields.index_with { |field| log.public_send(field) })
end
private private
def validate_declaration_acceptance def validate_declaration_acceptance
@ -439,6 +448,20 @@ private
@created_by ||= User.find_by(email: field_112) @created_by ||= User.find_by(email: field_112)
end end
def duplicate_check_fields
%w[
startdate
age1
sex1
ecstat1
owning_organisation
tcharge
propcode
postcode_full
location
]
end
def validate_location_related def validate_location_related
return if scheme.blank? || location.blank? return if scheme.blank? || location.blank?
@ -697,6 +720,25 @@ private
log.form.setup_sections[0].subsections[0].questions.include?(question) log.form.setup_sections[0].subsections[0].questions.include?(question)
end end
def validate_if_log_already_exists
if log_already_exists?
error_message = "This is a duplicate log"
errors.add(:field_5, error_message) # location
errors.add(:field_12, error_message) # age1
errors.add(:field_20, error_message) # sex1
errors.add(:field_35, error_message) # ecstat1
errors.add(:field_84, error_message) # tcharge
errors.add(:field_96, error_message) # startdate
errors.add(:field_97, error_message) # startdate
errors.add(:field_98, error_message) # startdate
errors.add(:field_100, error_message) # propcode
errors.add(:field_108, error_message) # postcode_full
errors.add(:field_109, error_message) # postcode_full
errors.add(:field_111, error_message) # owning_organisation
end
end
def field_mapping_for_errors def field_mapping_for_errors
{ {
lettype: [:field_1], lettype: [:field_1],
@ -896,18 +938,10 @@ private
Organisation.find_by_id_on_multiple_fields(field_111) Organisation.find_by_id_on_multiple_fields(field_111)
end end
def owning_organisation_id
owning_organisation&.id
end
def managing_organisation def managing_organisation
Organisation.find_by_id_on_multiple_fields(field_113) Organisation.find_by_id_on_multiple_fields(field_113)
end end
def managing_organisation_id
managing_organisation&.id
end
def attributes_for_log def attributes_for_log
attributes = {} attributes = {}
@ -916,8 +950,8 @@ private
attributes["la"] = field_107 attributes["la"] = field_107
attributes["postcode_known"] = postcode_known attributes["postcode_known"] = postcode_known
attributes["postcode_full"] = postcode_full attributes["postcode_full"] = postcode_full
attributes["owning_organisation_id"] = owning_organisation_id attributes["owning_organisation"] = owning_organisation
attributes["managing_organisation_id"] = managing_organisation_id attributes["managing_organisation"] = managing_organisation
attributes["renewal"] = renewal attributes["renewal"] = renewal
attributes["scheme"] = scheme attributes["scheme"] = scheme
attributes["location"] = location attributes["location"] = location

65
app/services/bulk_upload/lettings/year2023/row_parser.rb

@ -311,6 +311,7 @@ class BulkUpload::Lettings::Year2023::RowParser
validate :validate_no_disabled_needs_conjunction, on: :after_log validate :validate_no_disabled_needs_conjunction, on: :after_log
validate :validate_dont_know_disabled_needs_conjunction, on: :after_log validate :validate_dont_know_disabled_needs_conjunction, on: :after_log
validate :validate_no_and_dont_know_disabled_needs_conjunction, on: :after_log validate :validate_no_and_dont_know_disabled_needs_conjunction, on: :after_log
validate :validate_if_log_already_exists, on: :after_log, if: -> { FeatureToggle.bulk_upload_duplicate_log_check_enabled? }
validate :validate_owning_org_data_given, on: :after_log validate :validate_owning_org_data_given, on: :after_log
validate :validate_owning_org_exists, on: :after_log validate :validate_owning_org_exists, on: :after_log
@ -343,9 +344,11 @@ class BulkUpload::Lettings::Year2023::RowParser
end end
def valid? def valid?
return @valid if @valid
errors.clear errors.clear
return true if blank_row? return @valid = true if blank_row?
super(:before_log) super(:before_log)
before_errors = errors.dup before_errors = errors.dup
@ -365,7 +368,7 @@ class BulkUpload::Lettings::Year2023::RowParser
end end
end end
errors.blank? @valid = errors.blank?
end end
def blank_row? def blank_row?
@ -398,6 +401,12 @@ class BulkUpload::Lettings::Year2023::RowParser
field_14 field_14
end end
def log_already_exists?
@log_already_exists ||= LettingsLog
.where(status: %w[not_started in_progress completed])
.exists?(duplicate_check_fields.index_with { |field| log.public_send(field) })
end
private private
def validate_declaration_acceptance def validate_declaration_acceptance
@ -448,6 +457,26 @@ private
end end
end end
def duplicate_check_fields
%w[
startdate
age1
sex1
ecstat1
owning_organisation
tcharge
propcode
postcode_full
location
]
end
def validate_needs_type_present
if field_4.blank?
errors.add(:field_4, I18n.t("validations.not_answered", question: "needs type"))
end
end
def start_date def start_date
return if field_7.blank? || field_8.blank? || field_9.blank? return if field_7.blank? || field_8.blank? || field_9.blank?
@ -679,6 +708,26 @@ private
log.form.setup_sections[0].subsections[0].questions.include?(question) log.form.setup_sections[0].subsections[0].questions.include?(question)
end end
def validate_if_log_already_exists
if log_already_exists?
error_message = "This is a duplicate log"
errors.add(:field_1, error_message) # owning_organisation
errors.add(:field_7, error_message) # startdate
errors.add(:field_8, error_message) # startdate
errors.add(:field_9, error_message) # startdate
errors.add(:field_14, error_message) # propcode
errors.add(:field_17, error_message) # location
errors.add(:field_23, error_message) # postcode_full
errors.add(:field_24, error_message) # postcode_full
errors.add(:field_25, error_message) # postcode_full
errors.add(:field_46, error_message) # age1
errors.add(:field_47, error_message) # sex1
errors.add(:field_50, error_message) # ecstat1
errors.add(:field_132, error_message) # tcharge
end
end
def field_mapping_for_errors def field_mapping_for_errors
{ {
lettype: [:field_5], lettype: [:field_5],
@ -855,8 +904,8 @@ private
attributes["la"] = field_25 attributes["la"] = field_25
attributes["postcode_known"] = postcode_known attributes["postcode_known"] = postcode_known
attributes["postcode_full"] = postcode_full attributes["postcode_full"] = postcode_full
attributes["owning_organisation_id"] = owning_organisation_id attributes["owning_organisation"] = owning_organisation
attributes["managing_organisation_id"] = managing_organisation_id attributes["managing_organisation"] = managing_organisation
attributes["renewal"] = renewal attributes["renewal"] = renewal
attributes["scheme"] = scheme attributes["scheme"] = scheme
attributes["location"] = location attributes["location"] = location
@ -1050,18 +1099,10 @@ private
Organisation.find_by_id_on_multiple_fields(field_1) Organisation.find_by_id_on_multiple_fields(field_1)
end end
def owning_organisation_id
owning_organisation&.id
end
def managing_organisation def managing_organisation
Organisation.find_by_id_on_multiple_fields(field_2) Organisation.find_by_id_on_multiple_fields(field_2)
end end
def managing_organisation_id
managing_organisation&.id
end
def renewal def renewal
case field_6 case field_6
when 1 when 1

4
app/services/feature_toggle.rb

@ -40,6 +40,10 @@ class FeatureToggle
!Rails.env.production? !Rails.env.production?
end end
def self.bulk_upload_duplicate_log_check_enabled?
!Rails.env.staging?
end
def self.upload_enabled? def self.upload_enabled?
!Rails.env.development? !Rails.env.development?
end end

2
spec/factories/bulk_upload_error.rb

@ -9,7 +9,7 @@ FactoryBot.define do
tenant_code { SecureRandom.hex(4) } tenant_code { SecureRandom.hex(4) }
property_ref { SecureRandom.hex(4) } property_ref { SecureRandom.hex(4) }
purchaser_code { SecureRandom.hex(4) } purchaser_code { SecureRandom.hex(4) }
field { "field_#{rand(134)}" } field { "field_#{rand(1..134)}" }
error { "some error" } error { "some error" }
end end
end end

70
spec/services/bulk_upload/lettings/year2022/row_parser_spec.rb

@ -212,6 +212,10 @@ RSpec.describe BulkUpload::Lettings::Year2022::RowParser do
end end
context "when valid row" do context "when valid row" do
before do
allow(FeatureToggle).to receive(:bulk_upload_duplicate_log_check_enabled?).and_return(true)
end
let(:attributes) { valid_attributes } let(:attributes) { valid_attributes }
it "returns true" do it "returns true" do
@ -226,6 +230,72 @@ RSpec.describe BulkUpload::Lettings::Year2022::RowParser do
expect(questions.map(&:id).size).to eq(0) expect(questions.map(&:id).size).to eq(0)
expect(questions.map(&:id)).to eql([]) expect(questions.map(&:id)).to eql([])
end end
context "when the log already exists in the db" do
before do
parser.log.save!
parser.instance_variable_set(:@valid, nil)
end
it "is not a valid row" do
expect(parser).not_to be_valid
end
it "adds an error to all (and only) the fields used to determine duplicates" do
parser.valid?
error_message = "This is a duplicate log"
expected_errors = {
field_5: [error_message], # location
field_12: [error_message], # age1
field_20: [error_message], # sex1
field_35: [error_message], # ecstat1
field_84: [error_message], # tcharge
field_96: [error_message], # startdate
field_97: [error_message], # startdate
field_98: [error_message], # startdate
field_100: [error_message], # propcode
field_108: [error_message], # postcode_full
field_109: [error_message], # postcode_full
field_111: [error_message], # owning_organisation
}
expect(parser.errors.as_json).to eq(expected_errors)
end
end
context "when a hidden log already exists in db" do
before do
parser.log.status = "pending"
parser.log.skip_update_status = true
parser.log.save!
end
it "is a valid row" do
expect(parser).to be_valid
end
it "does not add duplicate errors" do
parser.valid?
[
:field_5, # location
:field_12, # age1
:field_20, # sex1
:field_35, # ecstat1
:field_84, # tcharge
:field_96, # startdate
:field_97, # startdate
:field_98, # startdate
:field_100, # propcode
:field_108, # postcode_full
:field_109, # postcode_full
:field_111, # owning_organisation
].each do |field|
expect(parser.errors[field]).to be_blank
end
end
end
end end
end end

76
spec/services/bulk_upload/lettings/year2023/row_parser_spec.rb

@ -80,6 +80,7 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do
DPA: { DPA: {
"POSTCODE": "EC1N 2TD", "POSTCODE": "EC1N 2TD",
"POST_TOWN": "Newcastle", "POST_TOWN": "Newcastle",
"ORGANISATION_NAME": "Some place",
}, },
}, },
], ],
@ -109,6 +110,10 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do
end end
context "when valid row" do context "when valid row" do
before do
allow(FeatureToggle).to receive(:bulk_upload_duplicate_log_check_enabled?).and_return(true)
end
let(:attributes) do let(:attributes) do
{ {
bulk_upload:, bulk_upload:,
@ -234,7 +239,7 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do
} }
end end
xit "returns true" do it "returns true" do
expect(parser).to be_valid expect(parser).to be_valid
end end
@ -246,6 +251,75 @@ RSpec.describe BulkUpload::Lettings::Year2023::RowParser do
expect(questions.map(&:id).size).to eq(0) expect(questions.map(&:id).size).to eq(0)
expect(questions.map(&:id)).to eql([]) expect(questions.map(&:id)).to eql([])
end end
context "when the log already exists in the db" do
before do
parser.log.save!
parser.instance_variable_set(:@valid, nil)
end
it "is not a valid row" do
expect(parser).not_to be_valid
end
it "adds an error to all (and only) the fields used to determine duplicates" do
parser.valid?
error_message = "This is a duplicate log"
[
:field_1, # owning_organisation
:field_7, # startdate
:field_8, # startdate
:field_9, # startdate
:field_14, # propcode
:field_17, # location
:field_23, # postcode_full
:field_24, # postcode_full
:field_25, # postcode_full
:field_46, # age1
:field_47, # sex1
:field_50, # ecstat1
:field_132, # tcharge
].each do |field|
expect(parser.errors[field]).to include(error_message)
end
end
end
context "when a hidden log already exists in db" do
before do
parser.log.status = "pending"
parser.log.skip_update_status = true
parser.log.save!
end
it "is a valid row" do
expect(parser).to be_valid
end
it "does not add duplicate errors" do
parser.valid?
[
:field_1, # owning_organisation
:field_7, # startdate
:field_8, # startdate
:field_9, # startdate
:field_14, # propcode
:field_17, # location
:field_23, # postcode_full
:field_24, # postcode_full
:field_25, # postcode_full
:field_46, # age1
:field_47, # sex1
:field_50, # ecstat1
:field_132, # tcharge
].each do |field|
expect(parser.errors[field]).to be_blank
end
end
end
end end
describe "#validate_nulls" do describe "#validate_nulls" do

1
spec/services/bulk_upload/processor_spec.rb

@ -217,6 +217,7 @@ RSpec.describe BulkUpload::Processor do
file.rewind file.rewind
allow(BulkUpload::Downloader).to receive(:new).with(bulk_upload:).and_return(mock_downloader) allow(BulkUpload::Downloader).to receive(:new).with(bulk_upload:).and_return(mock_downloader)
allow(FeatureToggle).to receive(:bulk_upload_duplicate_log_check_enabled?).and_return(true)
end end
it "creates pending log" do it "creates pending log" do

Loading…
Cancel
Save