Browse Source

Count active scheme and location duplicates (#2828)

pull/2842/head
kosiakkatrina 2 months ago committed by GitHub
parent
commit
56f70f9cd6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 23
      app/models/location.rb
  2. 16
      app/models/scheme.rb
  3. 62
      lib/tasks/count_duplicates.rake
  4. 5
      spec/factories/scheme.rb
  5. 110
      spec/lib/tasks/count_duplicates_spec.rb

23
app/models/location.rb

@ -144,6 +144,29 @@ class Location < ApplicationRecord
scope.pluck("ARRAY_AGG(id)") scope.pluck("ARRAY_AGG(id)")
} }
scope :duplicate_active_sets, lambda {
scope = active
.group(*DUPLICATE_LOCATION_ATTRIBUTES)
.where.not(scheme_id: nil)
.where.not(postcode: nil)
.where.not(mobility_type: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
scope :duplicate_active_sets_within_given_schemes, lambda {
scope = active
.group(*DUPLICATE_LOCATION_ATTRIBUTES - %w[scheme_id])
.where.not(postcode: nil)
.where.not(mobility_type: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze
LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h

16
app/models/scheme.rb

@ -119,6 +119,22 @@ class Scheme < ApplicationRecord
scope.pluck("ARRAY_AGG(id)") scope.pluck("ARRAY_AGG(id)")
} }
scope :duplicate_active_sets, lambda {
scope = active
.group(*DUPLICATE_SCHEME_ATTRIBUTES)
.where.not(scheme_type: nil)
.where.not(registered_under_care_act: nil)
.where.not(primary_client_group: nil)
.where.not(has_other_client_group: nil)
.where.not(secondary_client_group: nil).or(where(has_other_client_group: 0))
.where.not(support_type: nil)
.where.not(intended_stay: nil)
.having(
"COUNT(*) > 1",
)
scope.pluck("ARRAY_AGG(id)")
}
validate :validate_confirmed validate :validate_confirmed
validate :validate_owning_organisation validate :validate_owning_organisation

62
lib/tasks/count_duplicates.rake

@ -60,4 +60,66 @@ namespace :count_duplicates do
url = storage_service.get_presigned_url(filename, 72.hours.to_i) url = storage_service.get_presigned_url(filename, 72.hours.to_i)
Rails.logger.info("Download URL: #{url}") Rails.logger.info("Download URL: #{url}")
end end
desc "Count the number of duplicate active schemes per organisation"
task active_scheme_duplicates_per_org: :environment do
duplicates_csv = CSV.generate(headers: true) do |csv|
csv << ["Organisation id", "Number of duplicate sets", "Total duplicate schemes"]
Organisation.visible.each do |organisation|
if organisation.owned_schemes.duplicate_active_sets.count.positive?
csv << [organisation.id, organisation.owned_schemes.duplicate_active_sets.count, organisation.owned_schemes.duplicate_active_sets.sum(&:size)]
end
end
end
filename = "active-scheme-duplicates-#{Time.zone.now}.csv"
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"])
storage_service.write_file(filename, "#{duplicates_csv}")
url = storage_service.get_presigned_url(filename, 72.hours.to_i)
Rails.logger.info("Download URL: #{url}")
end
desc "Count the number of duplicate active locations per organisation"
task active_location_duplicates_per_org: :environment do
duplicates_csv = CSV.generate(headers: true) do |csv|
csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"]
Organisation.visible.each do |organisation|
duplicate_sets_within_individual_schemes = []
organisation.owned_schemes.each do |scheme|
duplicate_sets_within_individual_schemes += scheme.locations.duplicate_active_sets
end
duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten
duplicate_sets_within_duplicate_schemes = []
if organisation.owned_schemes.duplicate_active_sets.count.positive?
organisation.owned_schemes.duplicate_active_sets.each do |duplicate_set|
duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_active_sets_within_given_schemes
end
duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten
duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } }
all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count
all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count
else
all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count
all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count
end
if all_duplicate_locations_count.positive?
csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count]
end
end
end
filename = "active-location-duplicates-#{Time.zone.now}.csv"
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"])
storage_service.write_file(filename, "#{duplicates_csv}")
url = storage_service.get_presigned_url(filename, 72.hours.to_i)
Rails.logger.info("Download URL: #{url}")
end
end end

5
spec/factories/scheme.rb

@ -44,5 +44,10 @@ FactoryBot.define do
trait :created_now do trait :created_now do
created_at { Time.zone.now } created_at { Time.zone.now }
end end
trait :with_location do
after(:create) do |scheme|
create(:location, scheme:)
end
end
end end
end end

110
spec/lib/tasks/count_duplicates_spec.rb

@ -108,4 +108,114 @@ RSpec.describe "count_duplicates" do
end end
end end
end end
describe "count_duplicates:active_scheme_duplicates_per_org", type: :task do
subject(:task) { Rake::Task["count_duplicates:active_scheme_duplicates_per_org"] }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:test_url) { "test_url" }
before do
Rake.application.rake_require("tasks/count_duplicates")
Rake::Task.define_task(:environment)
task.reenable
end
context "when the rake task is run" do
context "and there are no duplicate schemes" do
before do
create(:organisation)
end
it "creates a csv with headers only" do
expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
context "and there are duplicate schemes" do
let(:organisation) { create(:organisation) }
let(:organisation2) { create(:organisation) }
before do
create_list(:scheme, 2, :duplicate, :with_location, owning_organisation: organisation)
create_list(:scheme, 3, :duplicate, :with_location, primary_client_group: "I", owning_organisation: organisation)
create_list(:scheme, 5, :duplicate, :with_location, owning_organisation: organisation2)
deactivated_schemes = create_list(:scheme, 2, :duplicate, owning_organisation: organisation)
deactivated_schemes.each do |scheme|
create(:scheme_deactivation_period, deactivation_date: Time.zone.yesterday, reactivation_date: nil, scheme:)
end
end
it "creates a csv with correct duplicate numbers" do
expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n#{organisation.id},2,5\n#{organisation2.id},1,5\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
end
end
describe "count_duplicates:active_location_duplicates_per_org", type: :task do
subject(:task) { Rake::Task["count_duplicates:active_location_duplicates_per_org"] }
let(:storage_service) { instance_double(Storage::S3Service) }
let(:test_url) { "test_url" }
before do
Rake.application.rake_require("tasks/count_duplicates")
Rake::Task.define_task(:environment)
task.reenable
end
context "when the rake task is run" do
context "and there are no duplicate locations" do
before do
create(:organisation)
end
it "creates a csv with headers only" do
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
context "and there are duplicate locations" do
let(:organisation) { create(:organisation) }
let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) }
let(:scheme_c) { create(:scheme, owning_organisation: organisation) }
let(:organisation2) { create(:organisation) }
let(:scheme2) { create(:scheme, owning_organisation: organisation2) }
let(:scheme3) { create(:scheme, owning_organisation: organisation2) }
before do
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B
create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2)
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3)
deactivated_locations = create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b)
deactivated_locations.each do |location|
create(:location_deactivation_period, deactivation_date: Time.zone.yesterday, reactivation_date: nil, location:)
end
end
it "creates a csv with correct duplicate numbers" do
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n")
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}")
task.invoke
end
end
end
end
end end

Loading…
Cancel
Save