Browse Source

CLDC-3620 Count duplicate schemes and locations (#2645)

* Add duplicate sets scope to schemes

* Add rake task to write duplicate scheme sets

* Add duplicate sets scope to locations

* Add rake task to write duplicate locations

* lint

* Update location duplicate count

* Add scheme_id back to DUPLICATE_LOCATION_ATTRIBUTES
pull/2627/head^2
kosiakkatrina 4 months ago committed by GitHub
parent
commit
5a93df785a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 24
      app/models/location.rb
  2. 18
      app/models/scheme.rb
  3. 63
      lib/tasks/count_duplicates.rake
  4. 9
      spec/factories/scheme.rb
  5. 111
      spec/lib/tasks/count_duplicates_spec.rb
  6. 70
      spec/models/location_spec.rb
  7. 129
      spec/models/scheme_spec.rb

24
app/models/location.rb

@ -121,6 +121,30 @@ class Location < ApplicationRecord
scope :visible, -> { where(discarded_at: nil) } scope :visible, -> { where(discarded_at: nil) }
scope :duplicate_sets, lambda {
scope = visible
.group(*DUPLICATE_LOCATION_ATTRIBUTES)
.where.not(scheme_id: nil)
.where.not(postcode: nil)
.where.not(mobility_type: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
scope :duplicate_sets_within_given_schemes, lambda {
scope = visible
.group(*DUPLICATE_LOCATION_ATTRIBUTES - %w[scheme_id])
.where.not(postcode: nil)
.where.not(mobility_type: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze
LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h
enum local_authorities: LOCAL_AUTHORITIES enum local_authorities: LOCAL_AUTHORITIES

18
app/models/scheme.rb

@ -103,6 +103,22 @@ class Scheme < ApplicationRecord
scope :visible, -> { where(discarded_at: nil) } scope :visible, -> { where(discarded_at: nil) }
scope :duplicate_sets, lambda {
scope = visible
.group(*DUPLICATE_SCHEME_ATTRIBUTES)
.where.not(scheme_type: nil)
.where.not(registered_under_care_act: nil)
.where.not(primary_client_group: nil)
.where.not(has_other_client_group: nil)
.where.not(secondary_client_group: nil).or(where(has_other_client_group: 0))
.where.not(support_type: nil)
.where.not(intended_stay: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
validate :validate_confirmed validate :validate_confirmed
validate :validate_owning_organisation validate :validate_owning_organisation
@ -192,6 +208,8 @@ class Scheme < ApplicationRecord
"Missing": "X", "Missing": "X",
}.freeze }.freeze
DUPLICATE_SCHEME_ATTRIBUTES = %w[scheme_type registered_under_care_act primary_client_group secondary_client_group has_other_client_group support_type intended_stay].freeze
enum arrangement_type: ARRANGEMENT_TYPE, _suffix: true enum arrangement_type: ARRANGEMENT_TYPE, _suffix: true
def self.find_by_id_on_multiple_fields(scheme_id, location_id) def self.find_by_id_on_multiple_fields(scheme_id, location_id)

63
lib/tasks/count_duplicates.rake

@ -0,0 +1,63 @@
namespace :count_duplicates do
desc "Count the number of duplicate schemes per organisation"
task scheme_duplicates_per_org: :environment do
duplicates_csv = CSV.generate(headers: true) do |csv|
csv << ["Organisation id", "Number of duplicate sets", "Total duplicate schemes"]
Organisation.visible.each do |organisation|
if organisation.owned_schemes.duplicate_sets.count.positive?
csv << [organisation.id, organisation.owned_schemes.duplicate_sets.count, organisation.owned_schemes.duplicate_sets.sum(&:size)]
end
end
end
filename = "scheme-duplicates-#{Time.zone.now}.csv"
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"])
storage_service.write_file(filename, "#{duplicates_csv}")
url = storage_service.get_presigned_url(filename, 72.hours.to_i)
Rails.logger.info("Download URL: #{url}")
end
desc "Count the number of duplicate locations per organisation"
task location_duplicates_per_org: :environment do
duplicates_csv = CSV.generate(headers: true) do |csv|
csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"]
Organisation.visible.each do |organisation|
duplicate_sets_within_individual_schemes = []
organisation.owned_schemes.each do |scheme|
duplicate_sets_within_individual_schemes += scheme.locations.duplicate_sets
end
duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten
duplicate_sets_within_duplicate_schemes = []
if organisation.owned_schemes.duplicate_sets.count.positive?
organisation.owned_schemes.duplicate_sets.each do |duplicate_set|
duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_sets_within_given_schemes
end
duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten
duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } }
all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count
all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count
else
all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count
all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count
end
if all_duplicate_locations_count.positive?
csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count]
end
end
end
filename = "location-duplicates-#{Time.zone.now}.csv"
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"])
storage_service.write_file(filename, "#{duplicates_csv}")
url = storage_service.get_presigned_url(filename, 72.hours.to_i)
Rails.logger.info("Download URL: #{url}")
end
end

9
spec/factories/scheme.rb

@ -32,5 +32,14 @@ FactoryBot.define do
confirmed { false } confirmed { false }
support_type { nil } support_type { nil }
end end
trait :duplicate do
scheme_type { 4 }
registered_under_care_act { 1 }
primary_client_group { "O" }
secondary_client_group { "H" }
has_other_client_group { 1 }
support_type { 2 }
intended_stay { "M" }
end
end end
end end

111
spec/lib/tasks/count_duplicates_spec.rb

@ -0,0 +1,111 @@
require "rails_helper"
require "rake"
RSpec.describe "count_duplicates" do
before do
allow(Storage::S3Service).to receive(:new).and_return(storage_service)
allow(storage_service).to receive(:write_file)
allow(storage_service).to receive(:get_presigned_url).and_return(test_url)
end
describe "count_duplicates:scheme_duplicates_per_org", type: :task do
subject(:task) { Rake::Task["count_duplicates:scheme_duplicates_per_org"] }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:test_url) { "test_url" }
before do
Rake.application.rake_require("tasks/count_duplicates")
Rake::Task.define_task(:environment)
task.reenable
end
context "when the rake task is run" do
context "and there are no duplicate schemes" do
before do
create(:organisation)
end
it "creates a csv with headers only" do
expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
context "and there are duplicate schemes" do
let(:organisation) { create(:organisation) }
let(:organisation2) { create(:organisation) }
before do
create_list(:scheme, 2, :duplicate, owning_organisation: organisation)
create_list(:scheme, 3, :duplicate, primary_client_group: "I", owning_organisation: organisation)
create_list(:scheme, 5, :duplicate, owning_organisation: organisation2)
end
it "creates a csv with correct duplicate numbers" do
expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n#{organisation.id},2,5\n#{organisation2.id},1,5\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
end
end
describe "count_duplicates:location_duplicates_per_org", type: :task do
subject(:task) { Rake::Task["count_duplicates:location_duplicates_per_org"] }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:test_url) { "test_url" }
before do
Rake.application.rake_require("tasks/count_duplicates")
Rake::Task.define_task(:environment)
task.reenable
end
context "when the rake task is run" do
context "and there are no duplicate locations" do
before do
create(:organisation)
end
it "creates a csv with headers only" do
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
context "and there are duplicate locations" do
let(:organisation) { create(:organisation) }
let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:scheme_c) { create(:scheme, owning_organisation: organisation) }
let(:organisation2) { create(:organisation) }
let(:scheme2) { create(:scheme, owning_organisation: organisation2) }
let(:scheme3) { create(:scheme, owning_organisation: organisation2) }
before do
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B
create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2)
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3)
end
it "creates a csv with correct duplicate numbers" do
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
end
end
end

70
spec/models/location_spec.rb

@ -831,6 +831,76 @@ RSpec.describe Location, type: :model do
expect(described_class.active.count).to eq(2) expect(described_class.active.count).to eq(2)
end end
end end
context "when getting list of duplicate locations" do
let!(:scheme) { create(:scheme) }
let!(:location) { create(:location, postcode: "AB1 2CD", mobility_type: "M", scheme:) }
let!(:duplicate_location) { create(:location, postcode: "AB1 2CD", mobility_type: "M", scheme:) }
let(:duplicate_sets) { described_class.duplicate_sets }
it "returns a list of duplicates for the same scheme" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id)
end
context "when there is a deleted duplicate location" do
before do
create(:location, postcode: "AB1 2CD", mobility_type: "M", discarded_at: Time.zone.now, scheme:)
end
it "does not return the deleted location as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id)
end
end
context "when there is a location with a different postcode" do
before do
create(:location, postcode: "A1 1AB", mobility_type: "M", scheme:)
end
it "does not return a location with a different postcode as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id)
end
end
context "when there is a location with a different mobility_type" do
before do
create(:location, postcode: "AB1 2CD", mobility_type: "A", scheme:)
end
it "does not return a location with a different mobility_type as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id)
end
end
context "when there is a location with a different scheme" do
before do
create(:location, postcode: "AB1 2CD", mobility_type: "M")
end
it "does not return a location with a different scheme as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(location.id, duplicate_location.id)
end
end
context "when there is a location with nil values for duplicate check fields" do
before do
[location, duplicate_location].each do |l|
l.postcode = nil
l.mobility_type = nil
l.save!(validate: false)
end
end
it "does not return a location with nil values as a duplicate" do
expect(duplicate_sets).to be_empty
end
end
end
end end
describe "status" do describe "status" do

129
spec/models/scheme_spec.rb

@ -208,6 +208,135 @@ RSpec.describe Scheme, type: :model do
end end
end end
end end
context "when getting list of duplicate schemes" do
let(:organisation) { create(:organisation) }
let!(:scheme) { create(:scheme, :duplicate, owning_organisation: organisation) }
let!(:duplicate_scheme) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:duplicate_sets) { described_class.duplicate_sets }
it "returns a list of duplicates in the same organisation" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
context "when there is a deleted duplicate scheme" do
before do
create(:scheme, :duplicate, discarded_at: Time.zone.now)
end
it "does not return the deleted scheme as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different scheme_type" do
before do
create(:scheme, :duplicate, scheme_type: 7)
end
it "does not return a scheme with a different scheme_type as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different registered_under_care_act" do
before do
create(:scheme, :duplicate, registered_under_care_act: 2)
end
it "does not return a scheme with a different registered_under_care_act as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different primary_client_group" do
before do
create(:scheme, :duplicate, primary_client_group: "H")
end
it "does not return a scheme with a different primary_client_group as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different secondary_client_group" do
before do
create(:scheme, :duplicate, secondary_client_group: "O")
end
it "does not return a scheme with a different secondary_client_group as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different has_other_client_group" do
before do
create(:scheme, :duplicate, has_other_client_group: 0)
end
it "does not return a scheme with a different has_other_client_group as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different support_type" do
before do
create(:scheme, :duplicate, support_type: 4)
end
it "does not return a scheme with a different support_type as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with a different intended_stay" do
before do
create(:scheme, :duplicate, intended_stay: "P")
end
it "does not return a scheme with a different intended_stay as a duplicate" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
context "when there is a scheme with nil values for duplicate check fields" do
before do
[scheme, duplicate_scheme].each do |s|
s.scheme_type = nil
s.registered_under_care_act = nil
s.primary_client_group = nil
s.secondary_client_group = nil
s.has_other_client_group = nil
s.support_type = nil
s.intended_stay = nil
s.save!(validate: false)
end
end
it "does not return a scheme with nil values as a duplicate" do
expect(duplicate_sets).to be_empty
end
end
context "when there are duplicate schemes without secondary client group" do
let!(:scheme) { create(:scheme, :duplicate, owning_organisation: organisation, secondary_client_group: nil, has_other_client_group: 0) }
let!(:duplicate_scheme) { create(:scheme, :duplicate, owning_organisation: organisation, secondary_client_group: nil, has_other_client_group: 0) }
it "does not returns the duplicates" do
expect(duplicate_sets.count).to eq(1)
expect(duplicate_sets.first).to contain_exactly(scheme.id, duplicate_scheme.id)
end
end
end
end end
end end

Loading…
Cancel
Save