From a41c5f2c19d0cecc968182d61b9de558959a49e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Meny?= Date: Thu, 3 Feb 2022 15:45:06 +0000 Subject: [PATCH] Setup rake task to import organisations from S3 (#267) Co-authored-by: Kat --- .github/workflows/pipeline.yml | 2 + app/services/import_service.rb | 56 ++++++++++++++++ app/services/storage_service.rb | 10 ++- ...4800_add_unique_index_to_old_visible_id.rb | 3 + db/schema.rb | 3 +- lib/tasks/data_import/organisations.rake | 38 +---------- .../tasks/data_import/organisations_spec.rb | 17 ++++- spec/services/import_service_spec.rb | 65 +++++++++++++++++++ spec/services/storage_service_spec.rb | 38 +++++++++-- 9 files changed, 187 insertions(+), 45 deletions(-) create mode 100644 app/services/import_service.rb create mode 100644 db/migrate/20220203104800_add_unique_index_to_old_visible_id.rb create mode 100644 spec/services/import_service_spec.rb diff --git a/.github/workflows/pipeline.yml b/.github/workflows/pipeline.yml index 64dd170fe..9177f349e 100644 --- a/.github/workflows/pipeline.yml +++ b/.github/workflows/pipeline.yml @@ -122,6 +122,7 @@ jobs: GOVUK_NOTIFY_API_KEY: ${{ secrets.GOVUK_NOTIFY_API_KEY }} APP_HOST: ${{ secrets.APP_HOST }} RAILS_MASTER_KEY: ${{ secrets.RAILS_MASTER_KEY }} + IMPORT_PAAS_INSTANCE: ${{ secrets.IMPORT_PAAS_INSTANCE }} run: | cf7 api $CF_API_ENDPOINT cf7 auth @@ -131,4 +132,5 @@ jobs: cf7 set-env $APP_NAME GOVUK_NOTIFY_API_KEY $GOVUK_NOTIFY_API_KEY cf7 set-env $APP_NAME APP_HOST $APP_HOST cf7 set-env $APP_NAME RAILS_MASTER_KEY $RAILS_MASTER_KEY + cf7 set-env $APP_NAME IMPORT_PAAS_INSTANCE $IMPORT_PAAS_INSTANCE cf7 push $APP_NAME --strategy rolling diff --git a/app/services/import_service.rb b/app/services/import_service.rb new file mode 100644 index 000000000..6d570e1c3 --- /dev/null +++ b/app/services/import_service.rb @@ -0,0 +1,56 @@ +class ImportService + def initialize(storage_service, logger = Rails.logger) + @storage_service = storage_service + @logger = logger + end + + def update_organisations(folder) + filenames = @storage_service.list_files(folder) + filenames.each do |filename| + file_io = @storage_service.get_file_io(filename) + create_organisation(file_io) + end + end + +private + + def create_organisation(file_io) + doc = Nokogiri::XML(file_io) + name = field_value(doc, "name") + old_visible_id = field_value(doc, "visible-id") + begin + Organisation.create!( + name: name, + providertype: field_value(doc, "institution-type"), + phone: field_value(doc, "telephone-number"), + holds_own_stock: to_boolean(field_value(doc, "holds-stock")), + active: to_boolean(field_value(doc, "active")), + old_association_type: field_value(doc, "old-association-type"), + software_supplier_id: field_value(doc, "software-supplier-id"), + housing_management_system: field_value(doc, "housing-management-system"), + choice_based_lettings: to_boolean(field_value(doc, "choice-based-lettings")), + common_housing_register: to_boolean(field_value(doc, "common-housing-register")), + choice_allocation_policy: to_boolean(field_value(doc, "choice-allocation-policy")), + cbl_proportion_percentage: field_value(doc, "cbl-proportion-percentage"), + enter_affordable_logs: to_boolean(field_value(doc, "enter-affordable-logs")), + owns_affordable_logs: to_boolean(field_value(doc, "owns-affordable-rent")), + housing_registration_no: field_value(doc, "housing-registration-no"), + general_needs_units: field_value(doc, "general-needs-units"), + supported_housing_units: field_value(doc, "supported-housing-units"), + unspecified_units: field_value(doc, "unspecified-units"), + old_org_id: field_value(doc, "id"), + old_visible_id: old_visible_id, + ) + rescue ActiveRecord::RecordNotUnique + @logger.warn("Organisation #{name} is already present with old visible ID #{old_visible_id}, skipping.") + end + end + + def field_value(doc, field) + doc.at_xpath("//institution:#{field}")&.text + end + + def to_boolean(input_string) + input_string == "true" + end +end diff --git a/app/services/storage_service.rb b/app/services/storage_service.rb index 7a65dda82..25889875a 100644 --- a/app/services/storage_service.rb +++ b/app/services/storage_service.rb @@ -8,10 +8,14 @@ class StorageService @client = create_client end + def list_files(folder) + @client.list_objects_v2(bucket: @configuration.bucket_name, prefix: folder) + .flat_map { |response| response.contents.map(&:key) } + end + def get_file_io(file_name) - file_response = - @client.get_object(bucket: @configuration.bucket_name, key: file_name) - file_response.body + @client.get_object(bucket: @configuration.bucket_name, key: file_name) + .body end def write_file(file_name, data) diff --git a/db/migrate/20220203104800_add_unique_index_to_old_visible_id.rb b/db/migrate/20220203104800_add_unique_index_to_old_visible_id.rb new file mode 100644 index 000000000..1805abbbd --- /dev/null +++ b/db/migrate/20220203104800_add_unique_index_to_old_visible_id.rb @@ -0,0 +1,3 @@ +class AddUniqueIndexToOldVisibleId < ActiveRecord::Migration[7.0] + add_index :organisations, :old_visible_id, unique: true +end diff --git a/db/schema.rb b/db/schema.rb index d93c8efed..b3f0fca4d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_01_31_123638) do +ActiveRecord::Schema.define(version: 2022_02_03_104800) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -228,6 +228,7 @@ ActiveRecord::Schema.define(version: 2022_01_31_123638) do t.integer "unspecified_units" t.string "old_org_id" t.integer "old_visible_id" + t.index ["old_visible_id"], name: "index_organisations_on_old_visible_id", unique: true end create_table "users", force: :cascade do |t| diff --git a/lib/tasks/data_import/organisations.rake b/lib/tasks/data_import/organisations.rake index 5caf1ecef..72c59f6ce 100644 --- a/lib/tasks/data_import/organisations.rake +++ b/lib/tasks/data_import/organisations.rake @@ -6,40 +6,8 @@ namespace :data_import do # rake data_import:organisations['path/to/xml_files'] task :organisations, %i[path] => :environment do |_task, args| directory = args.path - Dir.glob("#{directory}/*.xml").each do |file| - doc = Nokogiri::XML(File.open(file)) - Organisation.create!( - name: field_value(doc, "name"), - providertype: field_value(doc, "institution-type"), - phone: field_value(doc, "telephone-number"), - holds_own_stock: to_boolean(field_value(doc, "holds-stock")), - active: to_boolean(field_value(doc, "active")), - old_association_type: field_value(doc, "old-association-type"), - software_supplier_id: field_value(doc, "software-supplier-id"), - housing_management_system: field_value(doc, "housing-management-system"), - choice_based_lettings: to_boolean(field_value(doc, "choice-based-lettings")), - common_housing_register: to_boolean(field_value(doc, "common-housing-register")), - choice_allocation_policy: to_boolean(field_value(doc, "choice-allocation-policy")), - cbl_proportion_percentage: field_value(doc, "cbl-proportion-percentage"), - enter_affordable_logs: to_boolean(field_value(doc, "enter-affordable-logs")), - owns_affordable_logs: to_boolean(field_value(doc, "owns-affordable-rent")), - housing_registration_no: field_value(doc, "housing-registration-no"), - general_needs_units: field_value(doc, "general-needs-units"), - supported_housing_units: field_value(doc, "supported-housing-units"), - unspecified_units: field_value(doc, "unspecified-units"), - old_org_id: field_value(doc, "id"), - old_visible_id: field_value(doc, "visible-id"), - ) - end + storage_service = StorageService.new(PaasConfigurationService.new, ENV["IMPORT_PAAS_INSTANCE"]) + import_service = ImportService.new(storage_service) + import_service.update_organisations(directory) end end - -private - -def field_value(doc, field) - doc.at_xpath("//institution:#{field}")&.text -end - -def to_boolean(input_string) - input_string == "true" -end diff --git a/spec/lib/tasks/data_import/organisations_spec.rb b/spec/lib/tasks/data_import/organisations_spec.rb index 34a1a6b4c..164b64218 100644 --- a/spec/lib/tasks/data_import/organisations_spec.rb +++ b/spec/lib/tasks/data_import/organisations_spec.rb @@ -5,15 +5,28 @@ describe "rake data_import:organisations", type: :task do subject(:task) { Rake::Task["data_import:organisations"] } let(:fixture_path) { "spec/fixtures/softwire_imports/organisations" } + let(:instance_name) { "my_instance" } + let(:storage_service) { instance_double(StorageService) } + let(:paas_config_service) { instance_double(PaasConfigurationService) } + let(:import_service) { instance_double(ImportService) } before do + allow(StorageService).to receive(:new).and_return(storage_service) + allow(PaasConfigurationService).to receive(:new).and_return(paas_config_service) + allow(ImportService).to receive(:new).and_return(import_service) + allow(ENV).to receive(:[]) + allow(ENV).to receive(:[]).with("IMPORT_PAAS_INSTANCE").and_return(instance_name) + Rake.application.rake_require("tasks/data_import/organisations") Rake::Task.define_task(:environment) task.reenable end it "creates an organisation from the given XML file" do - expect { task.invoke(fixture_path) }.to change(Organisation, :count).by(1) - expect(Organisation.find_by(old_visible_id: 1034).name).to eq("HA Ltd") + expect(StorageService).to receive(:new).with(paas_config_service, instance_name) + expect(ImportService).to receive(:new).with(storage_service) + expect(import_service).to receive(:update_organisations).with(fixture_path) + + task.invoke(fixture_path) end end diff --git a/spec/services/import_service_spec.rb b/spec/services/import_service_spec.rb new file mode 100644 index 000000000..a72f77326 --- /dev/null +++ b/spec/services/import_service_spec.rb @@ -0,0 +1,65 @@ +require "rails_helper" + +RSpec.describe ImportService do + let(:storage_service) { instance_double(StorageService) } + let(:logger) { instance_double(Rails::Rack::Logger) } + let(:folder_name) { "organisations" } + let(:filenames) { %w[my_folder/my_file1.xml my_folder/my_file2.xml] } + let(:fixture_directory) { "spec/fixtures/softwire_imports/organisations" } + + def create_organisation_file(fixture_directory, visible_id, name = "my_organisation") + file = File.open("#{fixture_directory}/7c5bd5fb549c09a2c55d7cb90d7ba84927e64618.xml") + doc = Nokogiri::XML(file) + doc.at_xpath("//institution:visible-id").content = visible_id + doc.at_xpath("//institution:name").content = name + StringIO.new(doc.to_xml) + end + + context "when importing organisations" do + subject(:import_service) { described_class.new(storage_service) } + + before do + allow(storage_service).to receive(:list_files) + .and_return(filenames) + allow(storage_service).to receive(:get_file_io) + .with(filenames[0]) + .and_return(create_organisation_file(fixture_directory, 1)) + allow(storage_service).to receive(:get_file_io) + .with(filenames[1]) + .and_return(create_organisation_file(fixture_directory, 2)) + end + + it "successfully create a new organisation if it does not exist" do + expect(storage_service).to receive(:list_files).with(folder_name) + expect(storage_service).to receive(:get_file_io).with(filenames[0]).ordered + expect(storage_service).to receive(:get_file_io).with(filenames[1]).ordered + + expect { import_service.update_organisations(folder_name) }.to change(Organisation, :count).by(2) + expect(Organisation).to exist(old_visible_id: 1) + expect(Organisation).to exist(old_visible_id: 2) + end + end + + context "when importing organisations twice" do + subject(:import_service) { described_class.new(storage_service, logger) } + + before do + allow(storage_service).to receive(:list_files).and_return([filenames[0]]) + allow(storage_service).to receive(:get_file_io).and_return( + create_organisation_file(fixture_directory, 1), + create_organisation_file(fixture_directory, 1, "my_new_organisation"), + ) + end + + it "successfully create an organisation the first time, and does not update it" do + expect(storage_service).to receive(:list_files).with(folder_name).twice + expect(storage_service).to receive(:get_file_io).with(filenames[0]).twice + expect(logger).to receive(:warn).once + + expect { import_service.update_organisations(folder_name) }.to change(Organisation, :count).by(1) + expect { import_service.update_organisations(folder_name) }.to change(Organisation, :count).by(0) + + expect(Organisation).to exist(old_visible_id: 1, name: "my_organisation") + end + end +end diff --git a/spec/services/storage_service_spec.rb b/spec/services/storage_service_spec.rb index f05c071d8..9a9256d46 100644 --- a/spec/services/storage_service_spec.rb +++ b/spec/services/storage_service_spec.rb @@ -19,7 +19,7 @@ RSpec.describe StorageService do JSON end - context "when we create an S3 Service with no PaaS Configuration present" do + context "when we create a storage service with no PaaS Configuration present" do subject(:storage_service) { described_class.new(PaasConfigurationService.new, "random_instance") } it "raises an exception" do @@ -27,7 +27,7 @@ RSpec.describe StorageService do end end - context "when we create an S3 Service with an unknown instance name" do + context "when we create a storage service with an unknown instance name" do subject(:storage_service) { described_class.new(PaasConfigurationService.new, "random_instance") } before do @@ -39,7 +39,7 @@ RSpec.describe StorageService do end end - context "when we create an storage service with a valid instance name" do + context "when we create a storage service with a valid instance name" do subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) } before do @@ -64,7 +64,7 @@ RSpec.describe StorageService do end end - context "when we create an storage service and write a stubbed object" do + context "when we create a storage service and write a stubbed object" do subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) } let(:filename) { "my_file" } @@ -98,4 +98,34 @@ RSpec.describe StorageService do storage_service.write_file(filename, content) end end + + context "when we create a storage service and list files" do + subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) } + + let(:s3_client_stub) { Aws::S3::Client.new(stub_responses: true) } + + before do + allow(ENV).to receive(:[]) + allow(ENV).to receive(:[]).with("VCAP_SERVICES").and_return(vcap_services) + allow(Aws::S3::Client).to receive(:new).and_return(s3_client_stub) + end + + it "returns a list with all present file names in a given folder" do + expected_filenames = %w[my_folder/my_file1.xml my_folder/my_file2.xml] + s3_client_stub.stub_responses(:list_objects_v2, { + contents: [ + { + key: expected_filenames[0], + }, + { + key: expected_filenames[1], + }, + ], + }) + + filenames = storage_service.list_files("my_folder") + + expect(filenames).to eq(expected_filenames) + end + end end