Browse Source
* Add duplicate sets scope to schemes * Add rake task to write duplicate scheme sets * Add duplicate sets scope to locations * Add rake task to write duplicate locations * lint * Update location duplicate count * Add scheme_id back to DUPLICATE_LOCATION_ATTRIBUTESpull/2627/head^2
kosiakkatrina
4 months ago
committed by
GitHub
7 changed files with 424 additions and 0 deletions
@ -0,0 +1,63 @@ |
|||||||
|
namespace :count_duplicates do |
||||||
|
desc "Count the number of duplicate schemes per organisation" |
||||||
|
task scheme_duplicates_per_org: :environment do |
||||||
|
duplicates_csv = CSV.generate(headers: true) do |csv| |
||||||
|
csv << ["Organisation id", "Number of duplicate sets", "Total duplicate schemes"] |
||||||
|
|
||||||
|
Organisation.visible.each do |organisation| |
||||||
|
if organisation.owned_schemes.duplicate_sets.count.positive? |
||||||
|
csv << [organisation.id, organisation.owned_schemes.duplicate_sets.count, organisation.owned_schemes.duplicate_sets.sum(&:size)] |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
filename = "scheme-duplicates-#{Time.zone.now}.csv" |
||||||
|
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) |
||||||
|
storage_service.write_file(filename, "#{duplicates_csv}") |
||||||
|
|
||||||
|
url = storage_service.get_presigned_url(filename, 72.hours.to_i) |
||||||
|
Rails.logger.info("Download URL: #{url}") |
||||||
|
end |
||||||
|
|
||||||
|
desc "Count the number of duplicate locations per organisation" |
||||||
|
task location_duplicates_per_org: :environment do |
||||||
|
duplicates_csv = CSV.generate(headers: true) do |csv| |
||||||
|
csv << ["Organisation id", "Duplicate sets within individual schemes", "Duplicate locations within individual schemes", "All duplicate sets", "All duplicates"] |
||||||
|
|
||||||
|
Organisation.visible.each do |organisation| |
||||||
|
duplicate_sets_within_individual_schemes = [] |
||||||
|
|
||||||
|
organisation.owned_schemes.each do |scheme| |
||||||
|
duplicate_sets_within_individual_schemes += scheme.locations.duplicate_sets |
||||||
|
end |
||||||
|
duplicate_locations_within_individual_schemes = duplicate_sets_within_individual_schemes.flatten |
||||||
|
|
||||||
|
duplicate_sets_within_duplicate_schemes = [] |
||||||
|
if organisation.owned_schemes.duplicate_sets.count.positive? |
||||||
|
organisation.owned_schemes.duplicate_sets.each do |duplicate_set| |
||||||
|
duplicate_sets_within_duplicate_schemes += Location.where(scheme_id: duplicate_set).duplicate_sets_within_given_schemes |
||||||
|
end |
||||||
|
duplicate_locations_within_duplicate_schemes_ids = duplicate_sets_within_duplicate_schemes.flatten |
||||||
|
|
||||||
|
duplicate_sets_within_individual_schemes_without_intersecting_sets = duplicate_sets_within_individual_schemes.reject { |set| set.any? { |id| duplicate_sets_within_duplicate_schemes.any? { |duplicate_set| duplicate_set.include?(id) } } } |
||||||
|
all_duplicate_sets_count = (duplicate_sets_within_individual_schemes_without_intersecting_sets + duplicate_sets_within_duplicate_schemes).count |
||||||
|
all_duplicate_locations_count = (duplicate_locations_within_duplicate_schemes_ids + duplicate_locations_within_individual_schemes).uniq.count |
||||||
|
else |
||||||
|
all_duplicate_sets_count = duplicate_sets_within_individual_schemes.count |
||||||
|
all_duplicate_locations_count = duplicate_locations_within_individual_schemes.count |
||||||
|
end |
||||||
|
|
||||||
|
if all_duplicate_locations_count.positive? |
||||||
|
csv << [organisation.id, duplicate_sets_within_individual_schemes.count, duplicate_locations_within_individual_schemes.count, all_duplicate_sets_count, all_duplicate_locations_count] |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
filename = "location-duplicates-#{Time.zone.now}.csv" |
||||||
|
storage_service = Storage::S3Service.new(Configuration::EnvConfigurationService.new, ENV["BULK_UPLOAD_BUCKET"]) |
||||||
|
storage_service.write_file(filename, "#{duplicates_csv}") |
||||||
|
|
||||||
|
url = storage_service.get_presigned_url(filename, 72.hours.to_i) |
||||||
|
Rails.logger.info("Download URL: #{url}") |
||||||
|
end |
||||||
|
end |
@ -0,0 +1,111 @@ |
|||||||
|
require "rails_helper" |
||||||
|
require "rake" |
||||||
|
|
||||||
|
RSpec.describe "count_duplicates" do |
||||||
|
before do |
||||||
|
allow(Storage::S3Service).to receive(:new).and_return(storage_service) |
||||||
|
allow(storage_service).to receive(:write_file) |
||||||
|
allow(storage_service).to receive(:get_presigned_url).and_return(test_url) |
||||||
|
end |
||||||
|
|
||||||
|
describe "count_duplicates:scheme_duplicates_per_org", type: :task do |
||||||
|
subject(:task) { Rake::Task["count_duplicates:scheme_duplicates_per_org"] } |
||||||
|
|
||||||
|
let(:storage_service) { instance_double(Storage::S3Service) } |
||||||
|
let(:test_url) { "test_url" } |
||||||
|
|
||||||
|
before do |
||||||
|
Rake.application.rake_require("tasks/count_duplicates") |
||||||
|
Rake::Task.define_task(:environment) |
||||||
|
task.reenable |
||||||
|
end |
||||||
|
|
||||||
|
context "when the rake task is run" do |
||||||
|
context "and there are no duplicate schemes" do |
||||||
|
before do |
||||||
|
create(:organisation) |
||||||
|
end |
||||||
|
|
||||||
|
it "creates a csv with headers only" do |
||||||
|
expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n") |
||||||
|
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") |
||||||
|
task.invoke |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and there are duplicate schemes" do |
||||||
|
let(:organisation) { create(:organisation) } |
||||||
|
let(:organisation2) { create(:organisation) } |
||||||
|
|
||||||
|
before do |
||||||
|
create_list(:scheme, 2, :duplicate, owning_organisation: organisation) |
||||||
|
create_list(:scheme, 3, :duplicate, primary_client_group: "I", owning_organisation: organisation) |
||||||
|
create_list(:scheme, 5, :duplicate, owning_organisation: organisation2) |
||||||
|
end |
||||||
|
|
||||||
|
it "creates a csv with correct duplicate numbers" do |
||||||
|
expect(storage_service).to receive(:write_file).with(/scheme-duplicates-.*\.csv/, "\uFEFFOrganisation id,Number of duplicate sets,Total duplicate schemes\n#{organisation.id},2,5\n#{organisation2.id},1,5\n") |
||||||
|
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") |
||||||
|
task.invoke |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
describe "count_duplicates:location_duplicates_per_org", type: :task do |
||||||
|
subject(:task) { Rake::Task["count_duplicates:location_duplicates_per_org"] } |
||||||
|
|
||||||
|
let(:storage_service) { instance_double(Storage::S3Service) } |
||||||
|
let(:test_url) { "test_url" } |
||||||
|
|
||||||
|
before do |
||||||
|
Rake.application.rake_require("tasks/count_duplicates") |
||||||
|
Rake::Task.define_task(:environment) |
||||||
|
task.reenable |
||||||
|
end |
||||||
|
|
||||||
|
context "when the rake task is run" do |
||||||
|
context "and there are no duplicate locations" do |
||||||
|
before do |
||||||
|
create(:organisation) |
||||||
|
end |
||||||
|
|
||||||
|
it "creates a csv with headers only" do |
||||||
|
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n") |
||||||
|
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") |
||||||
|
task.invoke |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "and there are duplicate locations" do |
||||||
|
let(:organisation) { create(:organisation) } |
||||||
|
let(:scheme_a) { create(:scheme, :duplicate, owning_organisation: organisation) } |
||||||
|
let(:scheme_b) { create(:scheme, :duplicate, owning_organisation: organisation) } |
||||||
|
let(:scheme_c) { create(:scheme, owning_organisation: organisation) } |
||||||
|
let(:organisation2) { create(:organisation) } |
||||||
|
let(:scheme2) { create(:scheme, owning_organisation: organisation2) } |
||||||
|
let(:scheme3) { create(:scheme, owning_organisation: organisation2) } |
||||||
|
|
||||||
|
before do |
||||||
|
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_a) # Location A |
||||||
|
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_a) # Location B |
||||||
|
|
||||||
|
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "M", scheme: scheme_b) # Location A |
||||||
|
create_list(:location, 1, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_b) # Location B |
||||||
|
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "N", scheme: scheme_b) # Location C |
||||||
|
|
||||||
|
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "A", scheme: scheme_c) # Location B |
||||||
|
|
||||||
|
create_list(:location, 5, postcode: "A1 1AB", mobility_type: "M", scheme: scheme2) |
||||||
|
create_list(:location, 2, postcode: "A1 1AB", mobility_type: "M", scheme: scheme3) |
||||||
|
end |
||||||
|
|
||||||
|
it "creates a csv with correct duplicate numbers" do |
||||||
|
expect(storage_service).to receive(:write_file).with(/location-duplicates-.*\.csv/, "\uFEFFOrganisation id,Duplicate sets within individual schemes,Duplicate locations within individual schemes,All duplicate sets,All duplicates\n#{organisation.id},3,6,4,9\n#{organisation2.id},2,7,2,7\n") |
||||||
|
expect(Rails.logger).to receive(:info).with("Download URL: #{test_url}") |
||||||
|
task.invoke |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
end |
Loading…
Reference in new issue