Browse Source
* Copy 23 BU files to 24 * Renumber bulk upload fields for 2024 * Add prepare your file * Update max columns * Update fields in first_record_start_date * Update managing org * Rebase changespull/2174/head
kosiakkatrina
11 months ago
committed by
GitHub
11 changed files with 3104 additions and 4 deletions
@ -0,0 +1,109 @@ |
|||||||
|
require "csv" |
||||||
|
|
||||||
|
class BulkUpload::Sales::Year2024::CsvParser |
||||||
|
include CollectionTimeHelper |
||||||
|
|
||||||
|
MAX_COLUMNS = 142 |
||||||
|
FORM_YEAR = 2024 |
||||||
|
|
||||||
|
attr_reader :path |
||||||
|
|
||||||
|
def initialize(path:) |
||||||
|
@path = path |
||||||
|
end |
||||||
|
|
||||||
|
def row_offset |
||||||
|
if with_headers? |
||||||
|
rows.find_index { |row| row[0].match(/field number/i) } + 1 |
||||||
|
else |
||||||
|
0 |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
def col_offset |
||||||
|
with_headers? ? 1 : 0 |
||||||
|
end |
||||||
|
|
||||||
|
def cols |
||||||
|
@cols ||= ("A".."EK").to_a |
||||||
|
end |
||||||
|
|
||||||
|
def row_parsers |
||||||
|
@row_parsers ||= body_rows.map do |row| |
||||||
|
stripped_row = row[col_offset..] |
||||||
|
hash = Hash[field_numbers.zip(stripped_row)] |
||||||
|
|
||||||
|
BulkUpload::Sales::Year2024::RowParser.new(hash) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
def body_rows |
||||||
|
rows[row_offset..] |
||||||
|
end |
||||||
|
|
||||||
|
def rows |
||||||
|
@rows ||= CSV.parse(normalised_string, row_sep:) |
||||||
|
end |
||||||
|
|
||||||
|
def column_for_field(field) |
||||||
|
cols[field_numbers.find_index(field) + col_offset] |
||||||
|
end |
||||||
|
|
||||||
|
def wrong_template_for_year? |
||||||
|
collection_start_year_for_date(first_record_start_date) != FORM_YEAR |
||||||
|
rescue Date::Error |
||||||
|
false |
||||||
|
end |
||||||
|
|
||||||
|
private |
||||||
|
|
||||||
|
def default_field_numbers |
||||||
|
(1..131).map do |number| |
||||||
|
if number.to_s.match?(/^[0-9]+$/) |
||||||
|
"field_#{number}" |
||||||
|
else |
||||||
|
"field_blank" |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
def field_numbers |
||||||
|
@field_numbers ||= if with_headers? |
||||||
|
rows[row_offset - 1][col_offset..].map { |number| number.to_s.match?(/^[0-9]+$/) ? "field_#{number}" : "field_blank" } |
||||||
|
else |
||||||
|
default_field_numbers |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
def headers |
||||||
|
@headers ||= ("field_1".."field_135").to_a |
||||||
|
end |
||||||
|
|
||||||
|
def with_headers? |
||||||
|
# we will eventually want to validate that headers exist for this year |
||||||
|
rows.map { |r| r[0] }.any? { |cell| cell&.match?(/field number/i) } |
||||||
|
end |
||||||
|
|
||||||
|
def row_sep |
||||||
|
"\n" |
||||||
|
end |
||||||
|
|
||||||
|
def normalised_string |
||||||
|
return @normalised_string if @normalised_string |
||||||
|
|
||||||
|
@normalised_string = File.read(path, encoding: "bom|utf-8") |
||||||
|
@normalised_string.gsub!("\r\n", "\n") |
||||||
|
@normalised_string.scrub!("") |
||||||
|
@normalised_string.tr!("\r", "\n") |
||||||
|
|
||||||
|
@normalised_string |
||||||
|
end |
||||||
|
|
||||||
|
def first_record_start_date |
||||||
|
if with_headers? |
||||||
|
Date.new(row_parsers.first.field_6.to_i + 2000, row_parsers.first.field_5.to_i, row_parsers.first.field_4.to_i) |
||||||
|
else |
||||||
|
Date.new(rows.first[5].to_i + 2000, rows.first[4].to_i, rows.first[3].to_i) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,41 @@ |
|||||||
|
<% content_for :before_content do %> |
||||||
|
<%= govuk_back_link href: @form.back_path %> |
||||||
|
<% end %> |
||||||
|
|
||||||
|
<div class="govuk-grid-row"> |
||||||
|
<div class="govuk-grid-column-two-thirds"> |
||||||
|
<%= form_with model: @form, scope: :form, url: bulk_upload_sales_log_path(id: "prepare-your-file"), method: :patch do |f| %> |
||||||
|
<%= f.hidden_field :year %> |
||||||
|
|
||||||
|
<span class="govuk-caption-l">Upload sales logs in bulk (<%= @form.year_combo %>)</span> |
||||||
|
<h1 class="govuk-heading-l">Prepare your file</h1> |
||||||
|
<p class="govuk-body govuk-!-margin-bottom-2"><%= govuk_link_to "Read the full guidance", bulk_upload_sales_log_path(id: "guidance", form: { year: @form.year }) %> before you start if you have not used bulk upload before.</p> |
||||||
|
|
||||||
|
<h2 class="govuk-heading-s">Download template</h2> |
||||||
|
|
||||||
|
<p class="govuk-body govuk-!-margin-bottom-2">Use one of these templates to upload logs for 2024/25:</p> |
||||||
|
<ul class="govuk-list govuk-list--bullet"> |
||||||
|
<li><%= govuk_link_to "Download the new template", @form.template_path %>: In this template, the questions are in the same order as the 2024/25 paper form and web form.</li> |
||||||
|
<li><%= govuk_link_to "Download the legacy template", @form.legacy_template_path %>: In this template, the questions are in the same order as the 2022/23 template, with new questions added on to the end.</li> |
||||||
|
</ul> |
||||||
|
<p class="govuk-body govuk-!-margin-bottom-2">There are 7 or 8 rows of content in the templates. These rows are called the ‘headers’. They contain the CORE form questions and guidance about which questions are required and how to format your answers.</p> |
||||||
|
|
||||||
|
<h2 class="govuk-heading-s">Create your file</h2> |
||||||
|
<ul class="govuk-list govuk-list--bullet"> |
||||||
|
<li>Fill in the template with data from your housing management system. Your data should go below the headers, with one row per log. The bulk upload fields start at column B. Leave column A blank.</li> |
||||||
|
<li>Make sure each column of your data aligns with the matching headers above. You may need to reorder your data.</li> |
||||||
|
<li>Use the <%= govuk_link_to "Sales #{@form.year_combo} Bulk Upload Specification", @form.specification_path %> to check your data is in the correct format.</li> |
||||||
|
<li><strong>Username field:</strong> To assign a log to someone else, enter the email address they use to log into CORE.</li> |
||||||
|
<li>If you are using the new template, keep the headers. If you are using the legacy template, you can either keep or remove the headers. If you remove the headers, you should also remove the blank column A.</li> |
||||||
|
</ul> |
||||||
|
|
||||||
|
<h2 class="govuk-heading-s">Save your file</h2> |
||||||
|
<ul class="govuk-list govuk-list--bullet"> |
||||||
|
<li>Save your file as a CSV.</li> |
||||||
|
<li>Your file should now be ready to upload.</li> |
||||||
|
</ul> |
||||||
|
|
||||||
|
<%= f.govuk_submit %> |
||||||
|
<% end %> |
||||||
|
</div> |
||||||
|
</div> |
@ -0,0 +1,150 @@ |
|||||||
|
require "rails_helper" |
||||||
|
|
||||||
|
RSpec.describe BulkUpload::Sales::Year2024::CsvParser do |
||||||
|
subject(:service) { described_class.new(path:) } |
||||||
|
|
||||||
|
let(:file) { Tempfile.new } |
||||||
|
let(:path) { file.path } |
||||||
|
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||||
|
|
||||||
|
context "when parsing csv with headers" do |
||||||
|
before do |
||||||
|
file.write("Question\n") |
||||||
|
file.write("Additional info\n") |
||||||
|
file.write("Values\n") |
||||||
|
file.write("Can be empty?\n") |
||||||
|
file.write("Type of letting the question applies to\n") |
||||||
|
file.write("Duplicate check field?\n") |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2024_field_numbers_row) |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2024_csv_row) |
||||||
|
file.rewind |
||||||
|
end |
||||||
|
|
||||||
|
it "returns correct offsets" do |
||||||
|
expect(service.row_offset).to eq(7) |
||||||
|
expect(service.col_offset).to eq(1) |
||||||
|
end |
||||||
|
|
||||||
|
it "parses csv correctly" do |
||||||
|
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "when parsing csv with headers in arbitrary order" do |
||||||
|
let(:seed) { rand } |
||||||
|
|
||||||
|
before do |
||||||
|
file.write("Question\n") |
||||||
|
file.write("Additional info\n") |
||||||
|
file.write("Values\n") |
||||||
|
file.write("Can be empty?\n") |
||||||
|
file.write("Type of letting the question applies to\n") |
||||||
|
file.write("Duplicate check field?\n") |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2024_field_numbers_row(seed:)) |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2024_csv_row(seed:)) |
||||||
|
file.rewind |
||||||
|
end |
||||||
|
|
||||||
|
it "returns correct offsets" do |
||||||
|
expect(service.row_offset).to eq(7) |
||||||
|
expect(service.col_offset).to eq(1) |
||||||
|
end |
||||||
|
|
||||||
|
it "parses csv correctly" do |
||||||
|
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "when parsing csv without headers" do |
||||||
|
let(:file) { Tempfile.new } |
||||||
|
let(:path) { file.path } |
||||||
|
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||||
|
|
||||||
|
before do |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||||
|
file.rewind |
||||||
|
end |
||||||
|
|
||||||
|
it "returns correct offsets" do |
||||||
|
expect(service.row_offset).to eq(0) |
||||||
|
expect(service.col_offset).to eq(0) |
||||||
|
end |
||||||
|
|
||||||
|
it "parses csv correctly" do |
||||||
|
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "when parsing with BOM aka byte order mark" do |
||||||
|
let(:file) { Tempfile.new } |
||||||
|
let(:path) { file.path } |
||||||
|
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||||
|
let(:bom) { "\uFEFF" } |
||||||
|
|
||||||
|
before do |
||||||
|
file.write(bom) |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||||
|
file.close |
||||||
|
end |
||||||
|
|
||||||
|
it "parses csv correctly" do |
||||||
|
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "when an invalid byte sequence" do |
||||||
|
let(:file) { Tempfile.new } |
||||||
|
let(:path) { file.path } |
||||||
|
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||||
|
let(:invalid_sequence) { "\x81" } |
||||||
|
|
||||||
|
before do |
||||||
|
file.write(invalid_sequence) |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||||
|
file.close |
||||||
|
end |
||||||
|
|
||||||
|
it "parses csv correctly" do |
||||||
|
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
describe "#column_for_field", aggregate_failures: true do |
||||||
|
context "when headers present" do |
||||||
|
before do |
||||||
|
file.write("Question\n") |
||||||
|
file.write("Additional info\n") |
||||||
|
file.write("Values\n") |
||||||
|
file.write("Can be empty?\n") |
||||||
|
file.write("Type of letting the question applies to\n") |
||||||
|
file.write("Duplicate check field?\n") |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2024_field_numbers_row) |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2024_csv_row) |
||||||
|
file.rewind |
||||||
|
end |
||||||
|
|
||||||
|
it "returns correct column" do |
||||||
|
expect(service.column_for_field("field_1")).to eql("B") |
||||||
|
expect(service.column_for_field("field_99")).to eql("CV") |
||||||
|
end |
||||||
|
end |
||||||
|
end |
||||||
|
|
||||||
|
context "when parsing csv with carriage returns" do |
||||||
|
before do |
||||||
|
file.write("Question\r\n") |
||||||
|
file.write("Additional info\r") |
||||||
|
file.write("Values\r\n") |
||||||
|
file.write("Can be empty?\r") |
||||||
|
file.write("Type of letting the question applies to\r\n") |
||||||
|
file.write("Duplicate check field?\r") |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2024_field_numbers_row) |
||||||
|
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2024_csv_row) |
||||||
|
file.rewind |
||||||
|
end |
||||||
|
|
||||||
|
it "parses csv correctly" do |
||||||
|
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||||
|
end |
||||||
|
end |
||||||
|
end |
File diff suppressed because it is too large
Load Diff
Loading…
Reference in new issue