Browse Source

Setup rake task to import organisations from S3 (#267)

Co-authored-by: Kat <katrina@madetech.com>
pull/272/head
Stéphane Meny 3 years ago committed by GitHub
parent
commit
a41c5f2c19
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .github/workflows/pipeline.yml
  2. 56
      app/services/import_service.rb
  3. 8
      app/services/storage_service.rb
  4. 3
      db/migrate/20220203104800_add_unique_index_to_old_visible_id.rb
  5. 3
      db/schema.rb
  6. 38
      lib/tasks/data_import/organisations.rake
  7. 17
      spec/lib/tasks/data_import/organisations_spec.rb
  8. 65
      spec/services/import_service_spec.rb
  9. 38
      spec/services/storage_service_spec.rb

2
.github/workflows/pipeline.yml

@ -122,6 +122,7 @@ jobs:
GOVUK_NOTIFY_API_KEY: ${{ secrets.GOVUK_NOTIFY_API_KEY }} GOVUK_NOTIFY_API_KEY: ${{ secrets.GOVUK_NOTIFY_API_KEY }}
APP_HOST: ${{ secrets.APP_HOST }} APP_HOST: ${{ secrets.APP_HOST }}
RAILS_MASTER_KEY: ${{ secrets.RAILS_MASTER_KEY }} RAILS_MASTER_KEY: ${{ secrets.RAILS_MASTER_KEY }}
IMPORT_PAAS_INSTANCE: ${{ secrets.IMPORT_PAAS_INSTANCE }}
run: | run: |
cf7 api $CF_API_ENDPOINT cf7 api $CF_API_ENDPOINT
cf7 auth cf7 auth
@ -131,4 +132,5 @@ jobs:
cf7 set-env $APP_NAME GOVUK_NOTIFY_API_KEY $GOVUK_NOTIFY_API_KEY cf7 set-env $APP_NAME GOVUK_NOTIFY_API_KEY $GOVUK_NOTIFY_API_KEY
cf7 set-env $APP_NAME APP_HOST $APP_HOST cf7 set-env $APP_NAME APP_HOST $APP_HOST
cf7 set-env $APP_NAME RAILS_MASTER_KEY $RAILS_MASTER_KEY cf7 set-env $APP_NAME RAILS_MASTER_KEY $RAILS_MASTER_KEY
cf7 set-env $APP_NAME IMPORT_PAAS_INSTANCE $IMPORT_PAAS_INSTANCE
cf7 push $APP_NAME --strategy rolling cf7 push $APP_NAME --strategy rolling

56
app/services/import_service.rb

@ -0,0 +1,56 @@
class ImportService
def initialize(storage_service, logger = Rails.logger)
@storage_service = storage_service
@logger = logger
end
def update_organisations(folder)
filenames = @storage_service.list_files(folder)
filenames.each do |filename|
file_io = @storage_service.get_file_io(filename)
create_organisation(file_io)
end
end
private
def create_organisation(file_io)
doc = Nokogiri::XML(file_io)
name = field_value(doc, "name")
old_visible_id = field_value(doc, "visible-id")
begin
Organisation.create!(
name: name,
providertype: field_value(doc, "institution-type"),
phone: field_value(doc, "telephone-number"),
holds_own_stock: to_boolean(field_value(doc, "holds-stock")),
active: to_boolean(field_value(doc, "active")),
old_association_type: field_value(doc, "old-association-type"),
software_supplier_id: field_value(doc, "software-supplier-id"),
housing_management_system: field_value(doc, "housing-management-system"),
choice_based_lettings: to_boolean(field_value(doc, "choice-based-lettings")),
common_housing_register: to_boolean(field_value(doc, "common-housing-register")),
choice_allocation_policy: to_boolean(field_value(doc, "choice-allocation-policy")),
cbl_proportion_percentage: field_value(doc, "cbl-proportion-percentage"),
enter_affordable_logs: to_boolean(field_value(doc, "enter-affordable-logs")),
owns_affordable_logs: to_boolean(field_value(doc, "owns-affordable-rent")),
housing_registration_no: field_value(doc, "housing-registration-no"),
general_needs_units: field_value(doc, "general-needs-units"),
supported_housing_units: field_value(doc, "supported-housing-units"),
unspecified_units: field_value(doc, "unspecified-units"),
old_org_id: field_value(doc, "id"),
old_visible_id: old_visible_id,
)
rescue ActiveRecord::RecordNotUnique
@logger.warn("Organisation #{name} is already present with old visible ID #{old_visible_id}, skipping.")
end
end
def field_value(doc, field)
doc.at_xpath("//institution:#{field}")&.text
end
def to_boolean(input_string)
input_string == "true"
end
end

8
app/services/storage_service.rb

@ -8,10 +8,14 @@ class StorageService
@client = create_client @client = create_client
end end
def list_files(folder)
@client.list_objects_v2(bucket: @configuration.bucket_name, prefix: folder)
.flat_map { |response| response.contents.map(&:key) }
end
def get_file_io(file_name) def get_file_io(file_name)
file_response =
@client.get_object(bucket: @configuration.bucket_name, key: file_name) @client.get_object(bucket: @configuration.bucket_name, key: file_name)
file_response.body .body
end end
def write_file(file_name, data) def write_file(file_name, data)

3
db/migrate/20220203104800_add_unique_index_to_old_visible_id.rb

@ -0,0 +1,3 @@
class AddUniqueIndexToOldVisibleId < ActiveRecord::Migration[7.0]
add_index :organisations, :old_visible_id, unique: true
end

3
db/schema.rb

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2022_01_31_123638) do ActiveRecord::Schema.define(version: 2022_02_03_104800) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
@ -228,6 +228,7 @@ ActiveRecord::Schema.define(version: 2022_01_31_123638) do
t.integer "unspecified_units" t.integer "unspecified_units"
t.string "old_org_id" t.string "old_org_id"
t.integer "old_visible_id" t.integer "old_visible_id"
t.index ["old_visible_id"], name: "index_organisations_on_old_visible_id", unique: true
end end
create_table "users", force: :cascade do |t| create_table "users", force: :cascade do |t|

38
lib/tasks/data_import/organisations.rake

@ -6,40 +6,8 @@ namespace :data_import do
# rake data_import:organisations['path/to/xml_files'] # rake data_import:organisations['path/to/xml_files']
task :organisations, %i[path] => :environment do |_task, args| task :organisations, %i[path] => :environment do |_task, args|
directory = args.path directory = args.path
Dir.glob("#{directory}/*.xml").each do |file| storage_service = StorageService.new(PaasConfigurationService.new, ENV["IMPORT_PAAS_INSTANCE"])
doc = Nokogiri::XML(File.open(file)) import_service = ImportService.new(storage_service)
Organisation.create!( import_service.update_organisations(directory)
name: field_value(doc, "name"),
providertype: field_value(doc, "institution-type"),
phone: field_value(doc, "telephone-number"),
holds_own_stock: to_boolean(field_value(doc, "holds-stock")),
active: to_boolean(field_value(doc, "active")),
old_association_type: field_value(doc, "old-association-type"),
software_supplier_id: field_value(doc, "software-supplier-id"),
housing_management_system: field_value(doc, "housing-management-system"),
choice_based_lettings: to_boolean(field_value(doc, "choice-based-lettings")),
common_housing_register: to_boolean(field_value(doc, "common-housing-register")),
choice_allocation_policy: to_boolean(field_value(doc, "choice-allocation-policy")),
cbl_proportion_percentage: field_value(doc, "cbl-proportion-percentage"),
enter_affordable_logs: to_boolean(field_value(doc, "enter-affordable-logs")),
owns_affordable_logs: to_boolean(field_value(doc, "owns-affordable-rent")),
housing_registration_no: field_value(doc, "housing-registration-no"),
general_needs_units: field_value(doc, "general-needs-units"),
supported_housing_units: field_value(doc, "supported-housing-units"),
unspecified_units: field_value(doc, "unspecified-units"),
old_org_id: field_value(doc, "id"),
old_visible_id: field_value(doc, "visible-id"),
)
end end
end
end
private
def field_value(doc, field)
doc.at_xpath("//institution:#{field}")&.text
end
def to_boolean(input_string)
input_string == "true"
end end

17
spec/lib/tasks/data_import/organisations_spec.rb

@ -5,15 +5,28 @@ describe "rake data_import:organisations", type: :task do
subject(:task) { Rake::Task["data_import:organisations"] } subject(:task) { Rake::Task["data_import:organisations"] }
let(:fixture_path) { "spec/fixtures/softwire_imports/organisations" } let(:fixture_path) { "spec/fixtures/softwire_imports/organisations" }
let(:instance_name) { "my_instance" }
let(:storage_service) { instance_double(StorageService) }
let(:paas_config_service) { instance_double(PaasConfigurationService) }
let(:import_service) { instance_double(ImportService) }
before do before do
allow(StorageService).to receive(:new).and_return(storage_service)
allow(PaasConfigurationService).to receive(:new).and_return(paas_config_service)
allow(ImportService).to receive(:new).and_return(import_service)
allow(ENV).to receive(:[])
allow(ENV).to receive(:[]).with("IMPORT_PAAS_INSTANCE").and_return(instance_name)
Rake.application.rake_require("tasks/data_import/organisations") Rake.application.rake_require("tasks/data_import/organisations")
Rake::Task.define_task(:environment) Rake::Task.define_task(:environment)
task.reenable task.reenable
end end
it "creates an organisation from the given XML file" do it "creates an organisation from the given XML file" do
expect { task.invoke(fixture_path) }.to change(Organisation, :count).by(1) expect(StorageService).to receive(:new).with(paas_config_service, instance_name)
expect(Organisation.find_by(old_visible_id: 1034).name).to eq("HA Ltd") expect(ImportService).to receive(:new).with(storage_service)
expect(import_service).to receive(:update_organisations).with(fixture_path)
task.invoke(fixture_path)
end end
end end

65
spec/services/import_service_spec.rb

@ -0,0 +1,65 @@
require "rails_helper"
RSpec.describe ImportService do
let(:storage_service) { instance_double(StorageService) }
let(:logger) { instance_double(Rails::Rack::Logger) }
let(:folder_name) { "organisations" }
let(:filenames) { %w[my_folder/my_file1.xml my_folder/my_file2.xml] }
let(:fixture_directory) { "spec/fixtures/softwire_imports/organisations" }
def create_organisation_file(fixture_directory, visible_id, name = "my_organisation")
file = File.open("#{fixture_directory}/7c5bd5fb549c09a2c55d7cb90d7ba84927e64618.xml")
doc = Nokogiri::XML(file)
doc.at_xpath("//institution:visible-id").content = visible_id
doc.at_xpath("//institution:name").content = name
StringIO.new(doc.to_xml)
end
context "when importing organisations" do
subject(:import_service) { described_class.new(storage_service) }
before do
allow(storage_service).to receive(:list_files)
.and_return(filenames)
allow(storage_service).to receive(:get_file_io)
.with(filenames[0])
.and_return(create_organisation_file(fixture_directory, 1))
allow(storage_service).to receive(:get_file_io)
.with(filenames[1])
.and_return(create_organisation_file(fixture_directory, 2))
end
it "successfully create a new organisation if it does not exist" do
expect(storage_service).to receive(:list_files).with(folder_name)
expect(storage_service).to receive(:get_file_io).with(filenames[0]).ordered
expect(storage_service).to receive(:get_file_io).with(filenames[1]).ordered
expect { import_service.update_organisations(folder_name) }.to change(Organisation, :count).by(2)
expect(Organisation).to exist(old_visible_id: 1)
expect(Organisation).to exist(old_visible_id: 2)
end
end
context "when importing organisations twice" do
subject(:import_service) { described_class.new(storage_service, logger) }
before do
allow(storage_service).to receive(:list_files).and_return([filenames[0]])
allow(storage_service).to receive(:get_file_io).and_return(
create_organisation_file(fixture_directory, 1),
create_organisation_file(fixture_directory, 1, "my_new_organisation"),
)
end
it "successfully create an organisation the first time, and does not update it" do
expect(storage_service).to receive(:list_files).with(folder_name).twice
expect(storage_service).to receive(:get_file_io).with(filenames[0]).twice
expect(logger).to receive(:warn).once
expect { import_service.update_organisations(folder_name) }.to change(Organisation, :count).by(1)
expect { import_service.update_organisations(folder_name) }.to change(Organisation, :count).by(0)
expect(Organisation).to exist(old_visible_id: 1, name: "my_organisation")
end
end
end

38
spec/services/storage_service_spec.rb

@ -19,7 +19,7 @@ RSpec.describe StorageService do
JSON JSON
end end
context "when we create an S3 Service with no PaaS Configuration present" do context "when we create a storage service with no PaaS Configuration present" do
subject(:storage_service) { described_class.new(PaasConfigurationService.new, "random_instance") } subject(:storage_service) { described_class.new(PaasConfigurationService.new, "random_instance") }
it "raises an exception" do it "raises an exception" do
@ -27,7 +27,7 @@ RSpec.describe StorageService do
end end
end end
context "when we create an S3 Service with an unknown instance name" do context "when we create a storage service with an unknown instance name" do
subject(:storage_service) { described_class.new(PaasConfigurationService.new, "random_instance") } subject(:storage_service) { described_class.new(PaasConfigurationService.new, "random_instance") }
before do before do
@ -39,7 +39,7 @@ RSpec.describe StorageService do
end end
end end
context "when we create an storage service with a valid instance name" do context "when we create a storage service with a valid instance name" do
subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) } subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) }
before do before do
@ -64,7 +64,7 @@ RSpec.describe StorageService do
end end
end end
context "when we create an storage service and write a stubbed object" do context "when we create a storage service and write a stubbed object" do
subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) } subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) }
let(:filename) { "my_file" } let(:filename) { "my_file" }
@ -98,4 +98,34 @@ RSpec.describe StorageService do
storage_service.write_file(filename, content) storage_service.write_file(filename, content)
end end
end end
context "when we create a storage service and list files" do
subject(:storage_service) { described_class.new(PaasConfigurationService.new, instance_name) }
let(:s3_client_stub) { Aws::S3::Client.new(stub_responses: true) }
before do
allow(ENV).to receive(:[])
allow(ENV).to receive(:[]).with("VCAP_SERVICES").and_return(vcap_services)
allow(Aws::S3::Client).to receive(:new).and_return(s3_client_stub)
end
it "returns a list with all present file names in a given folder" do
expected_filenames = %w[my_folder/my_file1.xml my_folder/my_file2.xml]
s3_client_stub.stub_responses(:list_objects_v2, {
contents: [
{
key: expected_filenames[0],
},
{
key: expected_filenames[1],
},
],
})
filenames = storage_service.list_files("my_folder")
expect(filenames).to eq(expected_filenames)
end
end
end end

Loading…
Cancel
Save