Browse Source

add 2025 sales bulk upload parser

pull/2945/head
Carolyn 3 weeks ago
parent
commit
a9a4904325
  1. 0
      .rake_tasks~
  2. 139
      app/helpers/bulk_upload/sales_log_to_csv.rb
  3. 2
      app/models/bulk_upload.rb
  4. 2
      app/models/forms/bulk_upload_form/prepare_your_file.rb
  5. 2
      app/services/bulk_upload/sales/log_creator.rb
  6. 2
      app/services/bulk_upload/sales/validator.rb
  7. 124
      app/services/bulk_upload/sales/year2025/csv_parser.rb
  8. 2037
      app/services/bulk_upload/sales/year2025/row_parser.rb
  9. 36
      app/views/bulk_upload_sales_logs/forms/prepare_your_file_2025.html.erb
  10. 2
      config/locales/en.yml
  11. 46
      config/locales/validations/sales/2025/bulk_upload.en.yml
  12. BIN
      session-manager-plugin.deb
  13. 1
      spec/models/bulk_upload_spec.rb
  14. 8
      spec/services/bulk_upload/sales/log_creator_spec.rb
  15. 16
      spec/services/bulk_upload/sales/validator_spec.rb
  16. 191
      spec/services/bulk_upload/sales/year2025/csv_parser_spec.rb
  17. 1912
      spec/services/bulk_upload/sales/year2025/row_parser_spec.rb

0
.rake_tasks~

139
app/helpers/bulk_upload/sales_log_to_csv.rb

@ -19,7 +19,7 @@ class BulkUpload::SalesLogToCsv
case year
when 2022
to_2022_csv_row
when 2023, 2024
when 2023, 2024, 2025
to_year_csv_row(year, seed:)
else
raise NotImplementedError "No mapping function implemented for year #{year}"
@ -67,6 +67,8 @@ class BulkUpload::SalesLogToCsv
[6, 3, 4, 5, nil, 28, 30, 38, 47, 51, 55, 59, 31, 39, 48, 52, 56, 60, 37, 46, 50, 54, 58, 35, 43, 49, 53, 57, 61, 32, 33, 78, 80, 79, 81, 83, 84, nil, 62, 66, 64, 65, 63, 67, 69, 70, 68, 76, 77, 16, 17, 18, 26, 24, 25, 27, 8, 91, 95, 96, 97, 92, 93, 94, 98, 100, 101, 103, 104, 106, 110, 111, 112, 113, 114, 9, 116, 117, 118, 120, 124, 125, 126, 10, 11, nil, 127, 129, 133, 134, 135, 1, 2, nil, 73, nil, 75, 107, 108, 121, 122, 130, 131, 82, 109, 123, 132, 115, 15, 86, 87, 29, 7, 12, 13, 14, 36, 44, 45, 88, 89, 102, 105, 119, 128, 19, 20, 21, 22, 23, 34, 40, 41, 42, 71, 72, 74, 85, 90, 99]
when 2024
(1..131).to_a
when 2025
(1..121).to_a
else
raise NotImplementedError "No mapping function implemented for year #{year}"
end
@ -395,6 +397,141 @@ class BulkUpload::SalesLogToCsv
]
end
def to_2025_row
[
overrides[:organisation_id] || log.owning_organisation&.old_visible_id,
overrides[:managing_organisation_id] || log.managing_organisation&.old_visible_id,
log.assigned_to&.email,
log.saledate&.day,
log.saledate&.month,
log.saledate&.strftime("%y"),
log.purchid,
log.ownershipsch,
log.ownershipsch == 1 ? log.type : "", # field_9: "What is the type of shared ownership sale?",
log.staircase, # 10
log.ownershipsch == 2 ? log.type : "", # field_11: "What is the type of discounted ownership sale?",
log.jointpur,
log.jointmore,
log.noint,
log.privacynotice,
log.proptype,
log.beds,
log.builtype,
log.uprn,
log.address_line1&.tr(",", " "), # 20
log.address_line2&.tr(",", " "),
log.town_or_city&.tr(",", " "),
log.county&.tr(",", " "),
((log.postcode_full || "").split(" ") || [""]).first,
((log.postcode_full || "").split(" ") || [""]).last,
log.la,
log.wchair,
log.age1,
log.sex1,
log.ethnic, # 30
log.nationality_all_group,
log.ecstat1,
log.buy1livein,
log.relat2,
log.age2,
log.sex2,
log.ethnic_group2,
log.nationality_all_buyer2_group,
log.ecstat2,
log.buy2livein, # 40
log.hholdcount,
log.relat3,
log.age3,
log.sex3,
log.ecstat3,
log.relat4,
log.age4,
log.sex4,
log.ecstat4,
log.relat5, # 50
log.age5,
log.sex5,
log.ecstat5,
log.relat6,
log.age6,
log.sex6,
log.ecstat6,
log.prevten,
log.ppcodenk,
((log.ppostcode_full || "").split(" ") || [""]).first, # 60
((log.ppostcode_full || "").split(" ") || [""]).last,
log.prevloc,
log.buy2living,
log.prevtenbuy2,
log.hhregres,
log.hhregresstill,
log.armedforcesspouse,
log.disabled,
log.wheel,
log.income1, # 70
log.inc1mort,
log.income2,
log.inc2mort,
log.hb,
log.savings.present? || "R",
log.prevown,
log.prevshared,
log.resale,
log.proplen,
log.hodate&.day, # 80
log.hodate&.month,
log.hodate&.strftime("%y"),
log.frombeds,
log.fromprop,
log.socprevten,
log.value,
log.equity,
log.mortgageused,
log.mortgage,
log.mortlen, # 90
log.deposit,
log.cashdis,
log.mrent,
log.mscharge,
log.management_fee,
log.stairbought,
log.stairowned,
log.staircasesale,
log.firststair,
log.initialpurchase&.day, # 100
log.initialpurchase&.month,
log.initialpurchase&.strftime("%y"),
log.numstair,
log.lasttransaction&.day,
log.lasttransaction&.month,
log.lasttransaction&.strftime("%y"),
log.value,
log.equity,
log.mortgageused,
log.mrentprestaircasing, # 110
log.mrent,
log.proplen,
log.value,
log.grant,
log.discount,
log.mortgageused,
log.mortgage,
log.mortlen,
log.extrabor,
log.deposit, # 120
log.mscharge,
]
end
def custom_field_numbers_row(seed: nil, field_numbers: nil)
if seed
["Field number"] + field_numbers.shuffle(random: Random.new(seed))

2
app/models/bulk_upload.rb

@ -104,6 +104,8 @@ class BulkUpload < ApplicationRecord
end
year_class = case year
when 2025
"Year2025"
when 2024
"Year2024"
when 2023

2
app/models/forms/bulk_upload_form/prepare_your_file.rb

@ -13,6 +13,8 @@ module Forms
case year
when 2024
"bulk_upload_#{log_type}_logs/forms/prepare_your_file_2024"
when 2025
"bulk_upload_#{log_type}_logs/forms/prepare_your_file_2025"
end
end

2
app/services/bulk_upload/sales/log_creator.rb

@ -33,6 +33,8 @@ private
BulkUpload::Sales::Year2023::CsvParser.new(path:)
when 2024
BulkUpload::Sales::Year2024::CsvParser.new(path:)
when 2025
BulkUpload::Sales::Year2025::CsvParser.new(path:)
else
raise "csv parser not found"
end

2
app/services/bulk_upload/sales/validator.rb

@ -108,6 +108,8 @@ private
BulkUpload::Sales::Year2023::CsvParser.new(path:)
when 2024
BulkUpload::Sales::Year2024::CsvParser.new(path:)
when 2025
BulkUpload::Sales::Year2025::CsvParser.new(path:)
else
raise "csv parser not found"
end

124
app/services/bulk_upload/sales/year2025/csv_parser.rb

@ -0,0 +1,124 @@
require "csv"
class BulkUpload::Sales::Year2025::CsvParser
include CollectionTimeHelper
FIELDS = 121
MAX_COLUMNS = 142
FORM_YEAR = 2025
attr_reader :path
def initialize(path:)
@path = path
end
def row_offset
if with_headers?
rows.find_index { |row| row[0].present? && row[0].match(/field number/i) } + 1
else
0
end
end
def col_offset
with_headers? ? 1 : 0
end
def cols
@cols ||= ("A".."DR").to_a
end
def row_parsers
@row_parsers ||= body_rows.map { |row|
next if row.empty?
stripped_row = row[col_offset..]
hash = Hash[field_numbers.zip(stripped_row)]
BulkUpload::Sales::Year2025::RowParser.new(hash)
}.compact
end
def body_rows
rows[row_offset..]
end
def rows
@rows ||= CSV.parse(normalised_string, row_sep:)
end
def column_for_field(field)
cols[field_numbers.find_index(field) + col_offset]
end
def wrong_template_for_year?
collection_start_year_for_date(first_record_start_date) != FORM_YEAR
rescue Date::Error
false
end
def missing_required_headers?
!with_headers?
end
def correct_field_count?
valid_field_numbers_count = field_numbers.count { |f| f != "field_blank" }
valid_field_numbers_count == FIELDS
end
private
def default_field_numbers
(1..FIELDS).map do |number|
if number.to_s.match?(/^[0-9]+$/)
"field_#{number}"
else
"field_blank"
end
end
end
def field_numbers
@field_numbers ||= if with_headers?
rows[row_offset - 1][col_offset..].map { |number| number.to_s.match?(/^[0-9]+$/) ? "field_#{number}" : "field_blank" }
else
default_field_numbers
end
end
def headers
@headers ||= ("field_1".."field_#{FIELDS}").to_a
end
def with_headers?
# we will eventually want to validate that headers exist for this year
rows.map { |r| r[0] }.any? { |cell| cell&.match?(/field number/i) }
end
def row_sep
"\n"
end
def normalised_string
return @normalised_string if @normalised_string
@normalised_string = File.read(path, encoding: "bom|utf-8")
@normalised_string.gsub!("\r\n", "\n")
@normalised_string.scrub!("")
@normalised_string.tr!("\r", "\n")
@normalised_string
end
def first_record_start_date
if with_headers?
year = row_parsers.first.field_6.to_s.strip.length.between?(1, 2) ? row_parsers.first.field_6.to_i + 2000 : row_parsers.first.field_6.to_i
Date.new(year, row_parsers.first.field_5.to_i, row_parsers.first.field_4.to_i)
else
year = rows.first[5].to_s.strip.length.between?(1, 2) ? rows.first[5].to_i + 2000 : rows.first[5].to_i
Date.new(year, rows.first[4].to_i, rows.first[3].to_i)
end
end
end

2037
app/services/bulk_upload/sales/year2025/row_parser.rb

File diff suppressed because it is too large Load Diff

36
app/views/bulk_upload_sales_logs/forms/prepare_your_file_2025.html.erb

@ -0,0 +1,36 @@
<% content_for :before_content do %>
<%= govuk_back_link href: @form.back_path %>
<% end %>
<div class="govuk-grid-row">
<div class="govuk-grid-column-two-thirds">
<%= form_with model: @form, scope: :form, url: bulk_upload_sales_log_path(id: "prepare-your-file"), method: :patch do |f| %>
<%= f.hidden_field :year %>
<%= f.hidden_field :organisation_id %>
<span class="govuk-caption-l">Upload sales logs in bulk (<%= @form.year_combo %>)</span>
<h1 class="govuk-heading-l">Prepare your file</h1>
<p class="govuk-body govuk-!-margin-bottom-2"><%= govuk_link_to "Read the full guidance", bulk_upload_sales_log_path(id: "guidance", form: { year: @form.year }, referrer: "prepare-your-file") %> before you start if you have not used bulk upload before.</p>
<h2 class="govuk-heading-s">Download template</h2>
<p class="govuk-body govuk-!-margin-bottom-2">Use one of these templates to upload logs for 2025/26:</p>
<p class="govuk-body govuk-!-margin-bottom-2"><%= govuk_link_to "Download the sales bulk upload template (2025 to 2026)", @form.template_path %>: In this template, the questions are in the same order as the 2025/26 paper form and web form.</p>
<p class="govuk-body govuk-!-margin-bottom-2">There are 8 rows of content in the templates. These rows are called the ‘headers’. They contain the CORE form questions and guidance about which questions are required and how to format your answers.</p>
<h2 class="govuk-heading-s">Create your file</h2>
<%= govuk_list [
"Fill in the template with data from your housing management system. Your data should go below the headers, with one row per log. The bulk upload fields start at column B. Leave column A blank.",
"Make sure each column of your data aligns with the matching headers above. You may need to reorder your data.",
"Use the #{govuk_link_to 'Sales bulk upload Specification (2025 to 2026)', @form.specification_path} to check your data is in the correct format.".html_safe,
"<strong>Username field:</strong> To assign a log to someone else, enter the email address they use to log into CORE.".html_safe,
"If you have reordered the headers, keep the headers in the file.",
], type: :bullet %>
<h2 class="govuk-heading-s">Save your file</h2>
<%= govuk_list ["Save your file as a CSV.", "Your file should now be ready to upload."], type: :bullet %>
<%= f.govuk_submit %>
<% end %>
</div>
</div>

2
config/locales/en.yml

@ -57,6 +57,8 @@ en:
<<: *bulk_upload__row_parser__base
bulk_upload/lettings/year2023/row_parser:
<<: *bulk_upload__row_parser__base
bulk_upload/sales/year2025/row_parser:
<<: *bulk_upload__row_parser__base
bulk_upload/sales/year2024/row_parser:
<<: *bulk_upload__row_parser__base
bulk_upload/sales/year2023/row_parser:

46
config/locales/validations/sales/2025/bulk_upload.en.yml

@ -0,0 +1,46 @@
en:
validations:
sales:
2024:
bulk_upload:
not_answered: "You must answer %{question}"
invalid_option: "Enter a valid value for %{question}"
spreadsheet_dupe: "This is a duplicate of a log in your file."
duplicate: "This is a duplicate log."
blank_file: "Template is blank - The template must be filled in for us to create the logs and check if data is correct."
wrong_template:
over_max_column_count: "Too many columns, please ensure you have used the correct template."
no_headers: "Your file does not contain the required header rows. Add or check the header rows and upload your file again. [Read more about using the template headers](%{guidance_link})."
wrong_field_numbers_count: "Incorrect number of fields, please ensure you have used the correct template."
wrong_template: "Incorrect sale dates, please ensure you have used the correct template."
numeric:
within_range: "%{field} must be between %{min} and %{max}."
owning_organisation:
not_found: "The owning organisation code is incorrect."
not_stock_owner: "The owning organisation code provided is for an organisation that does not own stock."
not_permitted:
support: "This owning organisation is not affiliated with %{name}."
not_support: "You do not have permission to add logs for this owning organisation."
assigned_to:
not_found: "User with the specified email could not be found."
organisation_not_related: "User must be related to owning organisation or managing organisation."
managing_organisation_not_related: "This organisation does not have a relationship with the owning organisation."
saledate:
outside_collection_window: "Enter a date within the %{year_combo} collection year, which is between 1st April %{start_year} and 31st March %{end_year}."
year_not_two_or_four_digits: "Sale completion year must be 2 or 4 digits."
ecstat1:
buyer_cannot_be_over_16_and_child: "Buyer 1's age cannot be 16 or over if their working situation is child under 16."
buyer_cannot_be_child: "Buyer 1 cannot have a working situation of child under 16."
age1:
buyer_cannot_be_over_16_and_child: "Buyer 1's age cannot be 16 or over if their working situation is child under 16."
ecstat2:
buyer_cannot_be_over_16_and_child: "Buyer 2's age cannot be 16 or over if their working situation is child under 16."
buyer_cannot_be_child: "Buyer 2 cannot have a working situation of child under 16."
age2:
buyer_cannot_be_over_16_and_child: "Buyer 2's age cannot be 16 or over if their working situation is child under 16."
address:
not_found: "We could not find this address. Check the address data in your CSV file is correct and complete, or select the correct address using the CORE site."
not_determined: "There are multiple matches for this address. Either select the correct address manually or correct the UPRN in the CSV file."
not_answered: "Enter either the UPRN or the full address."
nationality:
invalid: "Select a valid nationality."

BIN
session-manager-plugin.deb

Binary file not shown.

1
spec/models/bulk_upload_spec.rb

@ -41,6 +41,7 @@ RSpec.describe BulkUpload, type: :model do
[
{ year: 2023, expected_value: "2023 to 2024" },
{ year: 2024, expected_value: "2024 to 2025" },
{ year: 2025, expected_value: "2025 to 2026" },
].each do |test_case|
context "when the bulk upload year is #{test_case[:year]}" do
let(:bulk_upload) { build(:bulk_upload, year: test_case[:year]) }

8
spec/services/bulk_upload/sales/log_creator_spec.rb

@ -6,13 +6,13 @@ RSpec.describe BulkUpload::Sales::LogCreator do
let(:owning_org) { create(:organisation, old_visible_id: 123) }
let(:user) { create(:user, organisation: owning_org) }
let(:bulk_upload) { create(:bulk_upload, :sales, user:, year: 2024) }
let(:csv_parser) { instance_double(BulkUpload::Sales::Year2024::CsvParser) }
let(:row_parser) { instance_double(BulkUpload::Sales::Year2024::RowParser) }
let(:bulk_upload) { create(:bulk_upload, :sales, user:, year: 2025) }
let(:csv_parser) { instance_double(BulkUpload::Sales::Year2025::CsvParser) }
let(:row_parser) { instance_double(BulkUpload::Sales::Year2025::RowParser) }
let(:log) { build(:sales_log, :completed, assigned_to: user, owning_organisation: owning_org, managing_organisation: owning_org) }
before do
allow(BulkUpload::Sales::Year2024::CsvParser).to receive(:new).and_return(csv_parser)
allow(BulkUpload::Sales::Year2025::CsvParser).to receive(:new).and_return(csv_parser)
allow(csv_parser).to receive(:row_parsers).and_return([row_parser])
allow(row_parser).to receive(:log).and_return(log)
allow(row_parser).to receive(:bulk_upload=).and_return(true)

16
spec/services/bulk_upload/sales/validator_spec.rb

@ -58,6 +58,22 @@ RSpec.describe BulkUpload::Sales::Validator do
end
end
context "when trying to upload 2024 logs for 2025 bulk upload" do
let(:bulk_upload) { create(:bulk_upload, user:, year: 2025) }
let(:log) { build(:sales_log, :completed, saledate: Time.zone.local(2024, 10, 10), assigned_to: user) }
before do
file.write(log_to_csv.default_2025_field_numbers_row)
file.write(log_to_csv.to_2025_csv_row)
file.rewind
end
it "is not valid" do
expect(validator).not_to be_valid
expect(validator.errors["base"]).to eql([I18n.t("validations.sales.2025.bulk_upload.wrong_template.wrong_template")])
end
end
[
{ line_ending: "\n", name: "unix" },
{ line_ending: "\r\n", name: "windows" },

191
spec/services/bulk_upload/sales/year2025/csv_parser_spec.rb

@ -0,0 +1,191 @@
require "rails_helper"
RSpec.describe BulkUpload::Sales::Year2025::CsvParser do
subject(:service) { described_class.new(path:) }
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed, :with_uprn) }
context "when parsing csv with headers" do
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row)
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row)
file.write("\n")
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(7)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
it "counts the number of valid field numbers correctly" do
expect(service).to be_correct_field_count
end
it "does not parse the last empty row" do
expect(service.row_parsers.count).to eq(1)
end
end
context "when some csv headers are empty (and we don't care about them)" do
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row)
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row)
file.write("\n")
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(7)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
it "counts the number of valid field numbers correctly" do
expect(service).to be_correct_field_count
end
it "does not parse the last empty row" do
expect(service.row_parsers.count).to eq(1)
end
end
context "when parsing csv with headers in arbitrary order" do
let(:seed) { rand }
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row(seed:))
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row(seed:))
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(7)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
end
context "when parsing csv without headers" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed, :with_uprn) }
before do
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2025_csv_row)
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(0)
expect(service.col_offset).to eq(0)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
end
context "when parsing with BOM aka byte order mark" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed, :with_uprn) }
let(:bom) { "\uFEFF" }
before do
file.write(bom)
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2025_csv_row)
file.close
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
end
context "when an invalid byte sequence" do
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:sales_log, :completed, :with_uprn) }
let(:invalid_sequence) { "\x81" }
before do
file.write(invalid_sequence)
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2025_csv_row)
file.close
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
end
describe "#column_for_field", aggregate_failures: true do
context "when headers present" do
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row)
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row)
file.rewind
end
it "returns correct column" do
expect(service.column_for_field("field_1")).to eql("B")
expect(service.column_for_field("field_99")).to eql("CV")
end
end
end
context "when parsing csv with carriage returns" do
before do
file.write("Question\r\n")
file.write("Additional info\r")
file.write("Values\r\n")
file.write("Can be empty?\r")
file.write("Type of letting the question applies to\r\n")
file.write("Duplicate check field?\r")
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row)
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row)
file.rewind
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_22).to eql(log.uprn)
end
end
end

1912
spec/services/bulk_upload/sales/year2025/row_parser_spec.rb

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save