diff --git a/app/models/location.rb b/app/models/location.rb index 12c6f2fad..58afc76e8 100644 --- a/app/models/location.rb +++ b/app/models/location.rb @@ -144,6 +144,29 @@ class Location < ApplicationRecord scope.pluck("ARRAY_AGG(id)") } + scope :duplicate_active_sets, lambda { + scope = active + .group(*DUPLICATE_LOCATION_ATTRIBUTES) + .where.not(scheme_id: nil) + .where.not(postcode: nil) + .where.not(mobility_type: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + + scope :duplicate_active_sets_within_given_schemes, lambda { + scope = active + .group(*DUPLICATE_LOCATION_ATTRIBUTES - %w[scheme_id]) + .where.not(postcode: nil) + .where.not(mobility_type: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + DUPLICATE_LOCATION_ATTRIBUTES = %w[scheme_id postcode mobility_type].freeze LOCAL_AUTHORITIES = LocalAuthority.all.map { |la| [la.name, la.code] }.to_h diff --git a/app/models/scheme.rb b/app/models/scheme.rb index 33f236374..1cd56ac7d 100644 --- a/app/models/scheme.rb +++ b/app/models/scheme.rb @@ -119,6 +119,22 @@ class Scheme < ApplicationRecord scope.pluck("ARRAY_AGG(id)") } + scope :duplicate_active_sets, lambda { + scope = active + .group(*DUPLICATE_SCHEME_ATTRIBUTES) + .where.not(scheme_type: nil) + .where.not(registered_under_care_act: nil) + .where.not(primary_client_group: nil) + .where.not(has_other_client_group: nil) + .where.not(secondary_client_group: nil).or(where(has_other_client_group: 0)) + .where.not(support_type: nil) + .where.not(intended_stay: nil) + .having( + "COUNT(*) > 1", + ) + scope.pluck("ARRAY_AGG(id)") + } + validate :validate_confirmed validate :validate_owning_organisation diff --git a/lib/tasks/count_duplicates.rake b/lib/tasks/count_duplicates.rake index e65688b4d..76cd1d991 100644 --- a/lib/tasks/count_duplicates.rake +++ b/lib/tasks/count_duplicates.rake @@ -60,4 +60,66 @@ namespace :count_duplicates do url = storage_service.get_presigned_url(filename, 72.hours.to_i) Rails.logger.info("Download URL: #{url}") end + + desc "Count the number of duplicate active schemes per organisation" + task active_scheme_duplicates_per_org: :environment do + duplicates_csv = CSV.generate(headers: true) do |csv| + csv << ["Organisation id", "Number of duplicate sets", "Total duplicate schemes"] + + Organisation.visible.each do |organisation| + if organisation.owned_schemes.duplicate_active_sets.count.positive? + csv << [organisation.id, organisation.owned_schemes.duplicate_active_sets.count, organisation.owned_schemes.duplicate_active_sets.sum(&:size)] + end + end + end + + filename = "active-scheme-duplicates-#{Time.zone.now}.csv" + storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) + storage_service.write_file(filename, "#{duplicates_csv}") + + url = storage_service.get_presigned_url(filename, 72.hours.to_i) + Rails.logger.info("Download URL: #{url}") + end + + desc "Count the number of duplicate active locations per organisation" + task active_location_duplicates_per_org: :environment do + duplicates_csv = CSV.generate(headers: true) do |csv| + csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"] + + Organisation.visible.each do |organisation| + duplicate_sets_within_individual_schemes = [] + + organisation.owned_schemes.each do |scheme| + duplicate_sets_within_individual_schemes += scheme.locations.duplicate_active_sets + end + duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten + + duplicate_sets_within_duplicate_schemes = [] + if organisation.owned_schemes.duplicate_active_sets.count.positive? + organisation.owned_schemes.duplicate_active_sets.each do |duplicate_set| + duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_active_sets_within_given_schemes + end + duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten + + duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } } + all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count + all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count + else + all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count + all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count + end + + if all_duplicate_locations_count.positive? + csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count] + end + end + end + + filename = "active-location-duplicates-#{Time.zone.now}.csv" + storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) + storage_service.write_file(filename, "#{duplicates_csv}") + + url = storage_service.get_presigned_url(filename, 72.hours.to_i) + Rails.logger.info("Download URL: #{url}") + end end diff --git a/spec/factories/scheme.rb b/spec/factories/scheme.rb index 34f98a8a6..e7ecc8b60 100644 --- a/spec/factories/scheme.rb +++ b/spec/factories/scheme.rb @@ -44,5 +44,10 @@ FactoryBot.define do trait :created_now do created_at { Time.zone.now } end + trait :with_location do + after(:create) do |scheme| + create(:location, scheme:) + end + end end end diff --git a/spec/lib/tasks/count_duplicates_spec.rb b/spec/lib/tasks/count_duplicates_spec.rb index 99da5b2fb..b4f6a8db8 100644 --- a/spec/lib/tasks/count_duplicates_spec.rb +++ b/spec/lib/tasks/count_duplicates_spec.rb @@ -108,4 +108,114 @@ RSpec.describe "count_duplicates" do end end end + + describe "count_duplicates:active_scheme_duplicates_per_org", type: :task do + subject(:task) { Rake::Task["count_duplicates:active_scheme_duplicates_per_org"] } + + let(:storage_service) { instance_double(Storage::S3Service) } + let(:test_url) { "test_url" } + + before do + Rake.application.rake_require("tasks/count_duplicates") + Rake::Task.define_task(:environment) + task.reenable + end + + context "when the rake task is run" do + context "and there are no duplicate schemes" do + before do + create(:organisation) + end + + it "creates a csv with headers only" do + expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + + context "and there are duplicate schemes" do + let(:organisation) { create(:organisation) } + let(:organisation2) { create(:organisation) } + + before do + create_list(:scheme, 2, :duplicate, :with_location, owning_organisation: organisation) + create_list(:scheme, 3, :duplicate, :with_location, primary_client_group: "I", owning_organisation: organisation) + create_list(:scheme, 5, :duplicate, :with_location, owning_organisation: organisation2) + deactivated_schemes = create_list(:scheme, 2, :duplicate, owning_organisation: organisation) + deactivated_schemes.each do |scheme| + create(:scheme_deactivation_period, deactivation_date: Time.zone.yesterday, reactivation_date: nil, scheme:) + end + end + + it "creates a csv with correct duplicate numbers" do + expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n#{organisation.id},2,5\n#{organisation2.id},1,5\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + end + end + + describe "count_duplicates:active_location_duplicates_per_org", type: :task do + subject(:task) { Rake::Task["count_duplicates:active_location_duplicates_per_org"] } + + let(:storage_service) { instance_double(Storage::S3Service) } + let(:test_url) { "test_url" } + + before do + Rake.application.rake_require("tasks/count_duplicates") + Rake::Task.define_task(:environment) + task.reenable + end + + context "when the rake task is run" do + context "and there are no duplicate locations" do + before do + create(:organisation) + end + + it "creates a csv with headers only" do + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + + context "and there are duplicate locations" do + let(:organisation) { create(:organisation) } + let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) } + let(:scheme_c) { create(:scheme, owning_organisation: organisation) } + let(:organisation2) { create(:organisation) } + let(:scheme2) { create(:scheme, owning_organisation: organisation2) } + let(:scheme3) { create(:scheme, owning_organisation: organisation2) } + + before do + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B + + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A + create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C + + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B + + create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2) + create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3) + + deactivated_locations = create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) + deactivated_locations.each do |location| + create(:location_deactivation_period, deactivation_date: Time.zone.yesterday, reactivation_date: nil, location:) + end + end + + it "creates a csv with correct duplicate numbers" do + expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n") + expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") + task.invoke + end + end + end + end end