Browse Source

Schedule data export tasks in sidekiq-cron (#952)

* Schedule data export tasks in sidekiq-cron

- Split combined XML and CSV data jobs into seperate background jobs
- Add sidekiq-cron to the project
- Schedule new XML and CSV jobs to run every day at 5am
- Adjust data export Rake tasks to use new job

* Add specs for DataExport{Xml,Csv}Job

* x

* Run bundle install

* Change review app export bucket name

---------

Co-authored-by: Kat <katrina@kosiak.co.uk>
pull/1256/head
James Rose 2 years ago committed by GitHub
parent
commit
68faee2efd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .github/workflows/review_pipeline.yml
  2. 3
      Gemfile
  3. 17
      Gemfile.lock
  4. 1
      Procfile.dev
  5. 10
      app/jobs/data_export_csv_job.rb
  6. 10
      app/jobs/data_export_xml_job.rb
  7. 10
      config/initializers/sidekiq.rb
  8. 8
      config/sidekiq_cron_schedule.yml
  9. 17
      lib/tasks/data_export.rake
  10. 19
      spec/jobs/data_export_csv_job_spec.rb
  11. 27
      spec/jobs/data_export_xml_job_spec.rb
  12. 18
      spec/lib/tasks/data_export_spec.rb

2
.github/workflows/review_pipeline.yml

@ -118,7 +118,7 @@ jobs:
cf set-env $APP_NAME GOVUK_NOTIFY_API_KEY $GOVUK_NOTIFY_API_KEY cf set-env $APP_NAME GOVUK_NOTIFY_API_KEY $GOVUK_NOTIFY_API_KEY
cf set-env $APP_NAME RAILS_MASTER_KEY $RAILS_MASTER_KEY cf set-env $APP_NAME RAILS_MASTER_KEY $RAILS_MASTER_KEY
cf set-env $APP_NAME IMPORT_PAAS_INSTANCE $IMPORT_PAAS_INSTANCE cf set-env $APP_NAME IMPORT_PAAS_INSTANCE $IMPORT_PAAS_INSTANCE
cf set-env $APP_NAME EXPORT_PAAS_INSTANCE $EXPORT_PAAS_INSTANCE cf set-env $APP_NAME EXPORT_PAAS_INSTANCE "dluhc-core-review-export-bucket"
cf set-env $APP_NAME S3_CONFIG $S3_CONFIG cf set-env $APP_NAME S3_CONFIG $S3_CONFIG
cf set-env $APP_NAME CSV_DOWNLOAD_PAAS_INSTANCE "dluhc-core-review-csv-bucket" cf set-env $APP_NAME CSV_DOWNLOAD_PAAS_INSTANCE "dluhc-core-review-csv-bucket"
cf set-env $APP_NAME SENTRY_DSN $SENTRY_DSN cf set-env $APP_NAME SENTRY_DSN $SENTRY_DSN

3
Gemfile

@ -50,7 +50,7 @@ gem "paper_trail"
gem "paper_trail-globalid" gem "paper_trail-globalid"
# Request rate limiting # Request rate limiting
gem "rack-attack" gem "rack-attack"
gem "redis" gem "redis", "~> 4.8"
# Receive exceptions and configure alerts # Receive exceptions and configure alerts
gem "sentry-rails" gem "sentry-rails"
gem "sentry-ruby" gem "sentry-ruby"
@ -60,6 +60,7 @@ gem "possessive"
gem "auto_strip_attributes" gem "auto_strip_attributes"
# Use sidekiq for background processing # Use sidekiq for background processing
gem "sidekiq" gem "sidekiq"
gem "sidekiq-cron"
group :development, :test do group :development, :test do
# Check gems for known vulnerabilities # Check gems for known vulnerabilities

17
Gemfile.lock

@ -158,6 +158,8 @@ GEM
rubocop rubocop
smart_properties smart_properties
erubi (1.12.0) erubi (1.12.0)
et-orbi (1.2.7)
tzinfo
excon (0.92.5) excon (0.92.5)
factory_bot (6.2.1) factory_bot (6.2.1)
activesupport (>= 5.0.0) activesupport (>= 5.0.0)
@ -167,6 +169,9 @@ GEM
faker (2.23.0) faker (2.23.0)
i18n (>= 1.8.11, < 2) i18n (>= 1.8.11, < 2)
ffi (1.15.5) ffi (1.15.5)
fugit (1.8.1)
et-orbi (~> 1, >= 1.2.7)
raabro (~> 1.4)
globalid (1.0.1) globalid (1.0.1)
activesupport (>= 5.0) activesupport (>= 5.0)
govuk-components (3.2.1) govuk-components (3.2.1)
@ -267,6 +272,7 @@ GEM
public_suffix (5.0.0) public_suffix (5.0.0)
puma (5.6.5) puma (5.6.5)
nio4r (~> 2.0) nio4r (~> 2.0)
raabro (1.4.0)
racc (1.6.2) racc (1.6.2)
rack (2.2.6.2) rack (2.2.6.2)
rack-attack (6.6.1) rack-attack (6.6.1)
@ -308,10 +314,7 @@ GEM
rb-inotify (0.10.1) rb-inotify (0.10.1)
ffi (~> 1.0) ffi (~> 1.0)
redcarpet (3.5.1) redcarpet (3.5.1)
redis (5.0.5) redis (4.8.0)
redis-client (>= 0.9.0)
redis-client (0.9.0)
connection_pool
regexp_parser (2.5.0) regexp_parser (2.5.0)
request_store (1.5.1) request_store (1.5.1)
rack (>= 1.4) rack (>= 1.4)
@ -385,6 +388,9 @@ GEM
connection_pool (>= 2.2.2) connection_pool (>= 2.2.2)
rack (~> 2.0) rack (~> 2.0)
redis (>= 4.5.0) redis (>= 4.5.0)
sidekiq-cron (1.8.0)
fugit (~> 1)
sidekiq (>= 4.2.1)
simplecov (0.21.2) simplecov (0.21.2)
docile (~> 1.1) docile (~> 1.1)
simplecov-html (~> 0.11) simplecov-html (~> 0.11)
@ -467,7 +473,7 @@ DEPENDENCIES
rack-attack rack-attack
rack-mini-profiler (~> 2.0) rack-mini-profiler (~> 2.0)
rails (~> 7.0.2) rails (~> 7.0.2)
redis redis (~> 4.8)
roo roo
rspec-rails rspec-rails
rubocop-govuk (= 4.3.0) rubocop-govuk (= 4.3.0)
@ -477,6 +483,7 @@ DEPENDENCIES
sentry-rails sentry-rails
sentry-ruby sentry-ruby
sidekiq sidekiq
sidekiq-cron
simplecov simplecov
stimulus-rails stimulus-rails
timecop (~> 0.9.4) timecop (~> 0.9.4)

1
Procfile.dev

@ -1,2 +1,3 @@
web: bin/rails server -p 3000 web: bin/rails server -p 3000
redis: redis-server
js: yarn build --watch js: yarn build --watch

10
app/jobs/data_export_csv_job.rb

@ -0,0 +1,10 @@
class DataExportCsvJob < ApplicationJob
queue_as :default
def perform
storage_service = Storage::S3Service.new(Configuration::PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"])
export_service = Exports::LettingsLogExportService.new(storage_service)
export_service.export_csv_lettings_logs
end
end

10
app/jobs/data_export_xml_job.rb

@ -0,0 +1,10 @@
class DataExportXmlJob < ApplicationJob
queue_as :default
def perform(full_update: false)
storage_service = Storage::S3Service.new(Configuration::PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"])
export_service = Exports::LettingsLogExportService.new(storage_service)
export_service.export_xml_lettings_logs(full_update:)
end
end

10
config/initializers/sidekiq.rb

@ -1,4 +1,5 @@
require "sidekiq/web" require "sidekiq/web"
require "sidekiq/cron/web"
if Rails.env.staging? || Rails.env.production? if Rails.env.staging? || Rails.env.production?
redis_url = Configuration::PaasConfigurationService.new.redis_uris[:"dluhc-core-#{Rails.env}-redis"] redis_url = Configuration::PaasConfigurationService.new.redis_uris[:"dluhc-core-#{Rails.env}-redis"]
@ -23,3 +24,12 @@ if Rails.env.review?
config.redis = { url: redis_url } config.redis = { url: redis_url }
end end
end end
# Until https://github.com/sidekiq-cron/sidekiq-cron/issues/357 is fixed.
Redis.silence_deprecations = true
Sidekiq.configure_server do |config|
config.on(:startup) do
Sidekiq::Cron::Job.load_from_hash YAML.load_file("config/sidekiq_cron_schedule.yml")
end
end

8
config/sidekiq_cron_schedule.yml

@ -0,0 +1,8 @@
data_export_csv:
cron: "every day at 5am"
class: "DataExportCsvJob"
queue: default
data_export_xml:
cron: "every day at 5am"
class: "DataExportXmlJob"
queue: default

17
lib/tasks/data_export.rake

@ -1,17 +1,14 @@
namespace :core do namespace :core do
desc "Export data CSVs for import into Central Data System (CDS)"
task data_export_csv: :environment do |_task, _args|
DataExportCsvJob.perform_later
end
desc "Export data XMLs for import into Central Data System (CDS)" desc "Export data XMLs for import into Central Data System (CDS)"
task :data_export, %i[format full_update] => :environment do |_task, args| task :data_export_xml, %i[full_update] => :environment do |_task, args|
format = args[:format]
full_update = args[:full_update].present? && args[:full_update] == "true" full_update = args[:full_update].present? && args[:full_update] == "true"
storage_service = Storage::S3Service.new(Configuration::PaasConfigurationService.new, ENV["EXPORT_PAAS_INSTANCE"]) DataExportXmlJob.perform_later(full_update:)
export_service = Exports::LettingsLogExportService.new(storage_service)
if format.present? && format == "CSV"
export_service.export_csv_lettings_logs
else
export_service.export_xml_lettings_logs(full_update:)
end
end end
end end

19
spec/jobs/data_export_csv_job_spec.rb

@ -0,0 +1,19 @@
require "rails_helper"
describe DataExportCsvJob do
let(:storage_service) { instance_double(Storage::S3Service) }
let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) }
let(:export_service) { instance_double(Exports::LettingsLogExportService) }
before do
allow(Storage::S3Service).to receive(:new).and_return(storage_service)
allow(Configuration::PaasConfigurationService).to receive(:new).and_return(paas_config_service)
allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service)
end
it "calls the export service" do
expect(export_service).to receive(:export_csv_lettings_logs)
described_class.perform_now
end
end

27
spec/jobs/data_export_xml_job_spec.rb

@ -0,0 +1,27 @@
require "rails_helper"
describe DataExportXmlJob do
let(:storage_service) { instance_double(Storage::S3Service) }
let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) }
let(:export_service) { instance_double(Exports::LettingsLogExportService) }
before do
allow(Storage::S3Service).to receive(:new).and_return(storage_service)
allow(Configuration::PaasConfigurationService).to receive(:new).and_return(paas_config_service)
allow(Exports::LettingsLogExportService).to receive(:new).and_return(export_service)
end
it "calls the export service" do
expect(export_service).to receive(:export_xml_lettings_logs)
described_class.perform_now
end
context "with full update enabled" do
it "calls the export service" do
expect(export_service).to receive(:export_xml_lettings_logs).with(full_update: true)
described_class.perform_now(full_update: true)
end
end
end

18
spec/lib/tasks/data_export_spec.rb

@ -2,8 +2,6 @@ require "rails_helper"
require "rake" require "rake"
describe "rake core:data_export", type: task do describe "rake core:data_export", type: task do
subject(:task) { Rake::Task["core:data_export"] }
let(:paas_instance) { "paas_export_instance" } let(:paas_instance) { "paas_export_instance" }
let(:storage_service) { instance_double(Storage::S3Service) } let(:storage_service) { instance_double(Storage::S3Service) }
let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) } let(:paas_config_service) { instance_double(Configuration::PaasConfigurationService) }
@ -22,22 +20,18 @@ describe "rake core:data_export", type: task do
end end
context "when exporting lettings logs with no parameters" do context "when exporting lettings logs with no parameters" do
it "starts the XML export process" do let(:task) { Rake::Task["core:data_export_xml"] }
expect(Storage::S3Service).to receive(:new).with(paas_config_service, paas_instance)
expect(Exports::LettingsLogExportService).to receive(:new).with(storage_service)
expect(export_service).to receive(:export_xml_lettings_logs)
task.invoke it "starts the XML export process" do
expect { task.invoke }.to enqueue_job(DataExportXmlJob)
end end
end end
context "when exporting lettings logs with CSV format" do context "when exporting lettings logs with CSV format" do
it "starts the CSV export process" do let(:task) { Rake::Task["core:data_export_csv"] }
expect(Storage::S3Service).to receive(:new).with(paas_config_service, paas_instance)
expect(Exports::LettingsLogExportService).to receive(:new).with(storage_service)
expect(export_service).to receive(:export_csv_lettings_logs)
task.invoke("CSV", "false") it "starts the CSV export process" do
expect { task.invoke }.to enqueue_job(DataExportCsvJob)
end end
end end
end end

Loading…
Cancel
Save