17 changed files with 4515 additions and 5 deletions
@ -0,0 +1,124 @@
|
||||
require "csv" |
||||
|
||||
class BulkUpload::Sales::Year2025::CsvParser |
||||
include CollectionTimeHelper |
||||
|
||||
FIELDS = 121 |
||||
MAX_COLUMNS = 142 |
||||
FORM_YEAR = 2025 |
||||
|
||||
attr_reader :path |
||||
|
||||
def initialize(path:) |
||||
@path = path |
||||
end |
||||
|
||||
def row_offset |
||||
if with_headers? |
||||
rows.find_index { |row| row[0].present? && row[0].match(/field number/i) } + 1 |
||||
else |
||||
0 |
||||
end |
||||
end |
||||
|
||||
def col_offset |
||||
with_headers? ? 1 : 0 |
||||
end |
||||
|
||||
def cols |
||||
@cols ||= ("A".."DR").to_a |
||||
end |
||||
|
||||
def row_parsers |
||||
@row_parsers ||= body_rows.map { |row| |
||||
next if row.empty? |
||||
|
||||
stripped_row = row[col_offset..] |
||||
hash = Hash[field_numbers.zip(stripped_row)] |
||||
|
||||
BulkUpload::Sales::Year2025::RowParser.new(hash) |
||||
}.compact |
||||
end |
||||
|
||||
def body_rows |
||||
rows[row_offset..] |
||||
end |
||||
|
||||
def rows |
||||
@rows ||= CSV.parse(normalised_string, row_sep:) |
||||
end |
||||
|
||||
def column_for_field(field) |
||||
cols[field_numbers.find_index(field) + col_offset] |
||||
end |
||||
|
||||
def wrong_template_for_year? |
||||
collection_start_year_for_date(first_record_start_date) != FORM_YEAR |
||||
rescue Date::Error |
||||
false |
||||
end |
||||
|
||||
def missing_required_headers? |
||||
!with_headers? |
||||
end |
||||
|
||||
def correct_field_count? |
||||
valid_field_numbers_count = field_numbers.count { |f| f != "field_blank" } |
||||
|
||||
valid_field_numbers_count == FIELDS |
||||
end |
||||
|
||||
private |
||||
|
||||
def default_field_numbers |
||||
(1..FIELDS).map do |number| |
||||
if number.to_s.match?(/^[0-9]+$/) |
||||
"field_#{number}" |
||||
else |
||||
"field_blank" |
||||
end |
||||
end |
||||
end |
||||
|
||||
def field_numbers |
||||
@field_numbers ||= if with_headers? |
||||
rows[row_offset - 1][col_offset..].map { |number| number.to_s.match?(/^[0-9]+$/) ? "field_#{number}" : "field_blank" } |
||||
else |
||||
default_field_numbers |
||||
end |
||||
end |
||||
|
||||
def headers |
||||
@headers ||= ("field_1".."field_#{FIELDS}").to_a |
||||
end |
||||
|
||||
def with_headers? |
||||
# we will eventually want to validate that headers exist for this year |
||||
rows.map { |r| r[0] }.any? { |cell| cell&.match?(/field number/i) } |
||||
end |
||||
|
||||
def row_sep |
||||
"\n" |
||||
end |
||||
|
||||
def normalised_string |
||||
return @normalised_string if @normalised_string |
||||
|
||||
@normalised_string = File.read(path, encoding: "bom|utf-8") |
||||
@normalised_string.gsub!("\r\n", "\n") |
||||
@normalised_string.scrub!("") |
||||
@normalised_string.tr!("\r", "\n") |
||||
|
||||
@normalised_string |
||||
end |
||||
|
||||
def first_record_start_date |
||||
if with_headers? |
||||
year = row_parsers.first.field_6.to_s.strip.length.between?(1, 2) ? row_parsers.first.field_6.to_i + 2000 : row_parsers.first.field_6.to_i |
||||
Date.new(year, row_parsers.first.field_5.to_i, row_parsers.first.field_4.to_i) |
||||
else |
||||
year = rows.first[5].to_s.strip.length.between?(1, 2) ? rows.first[5].to_i + 2000 : rows.first[5].to_i |
||||
Date.new(year, rows.first[4].to_i, rows.first[3].to_i) |
||||
end |
||||
end |
||||
end |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,36 @@
|
||||
<% content_for :before_content do %> |
||||
<%= govuk_back_link href: @form.back_path %> |
||||
<% end %> |
||||
|
||||
<div class="govuk-grid-row"> |
||||
<div class="govuk-grid-column-two-thirds"> |
||||
<%= form_with model: @form, scope: :form, url: bulk_upload_sales_log_path(id: "prepare-your-file"), method: :patch do |f| %> |
||||
<%= f.hidden_field :year %> |
||||
<%= f.hidden_field :organisation_id %> |
||||
|
||||
<span class="govuk-caption-l">Upload sales logs in bulk (<%= @form.year_combo %>)</span> |
||||
<h1 class="govuk-heading-l">Prepare your file</h1> |
||||
<p class="govuk-body govuk-!-margin-bottom-2"><%= govuk_link_to "Read the full guidance", bulk_upload_sales_log_path(id: "guidance", form: { year: @form.year }, referrer: "prepare-your-file") %> before you start if you have not used bulk upload before.</p> |
||||
|
||||
<h2 class="govuk-heading-s">Download template</h2> |
||||
|
||||
<p class="govuk-body govuk-!-margin-bottom-2">Use one of these templates to upload logs for 2025/26:</p> |
||||
<p class="govuk-body govuk-!-margin-bottom-2"><%= govuk_link_to "Download the sales bulk upload template (2025 to 2026)", @form.template_path %>: In this template, the questions are in the same order as the 2025/26 paper form and web form.</p> |
||||
<p class="govuk-body govuk-!-margin-bottom-2">There are 8 rows of content in the templates. These rows are called the ‘headers’. They contain the CORE form questions and guidance about which questions are required and how to format your answers.</p> |
||||
|
||||
<h2 class="govuk-heading-s">Create your file</h2> |
||||
<%= govuk_list [ |
||||
"Fill in the template with data from your housing management system. Your data should go below the headers, with one row per log. The bulk upload fields start at column B. Leave column A blank.", |
||||
"Make sure each column of your data aligns with the matching headers above. You may need to reorder your data.", |
||||
"Use the #{govuk_link_to 'Sales bulk upload Specification (2025 to 2026)', @form.specification_path} to check your data is in the correct format.".html_safe, |
||||
"<strong>Username field:</strong> To assign a log to someone else, enter the email address they use to log into CORE.".html_safe, |
||||
"If you have reordered the headers, keep the headers in the file.", |
||||
], type: :bullet %> |
||||
|
||||
<h2 class="govuk-heading-s">Save your file</h2> |
||||
<%= govuk_list ["Save your file as a CSV.", "Your file should now be ready to upload."], type: :bullet %> |
||||
|
||||
<%= f.govuk_submit %> |
||||
<% end %> |
||||
</div> |
||||
</div> |
@ -0,0 +1,46 @@
|
||||
en: |
||||
validations: |
||||
sales: |
||||
2024: |
||||
bulk_upload: |
||||
not_answered: "You must answer %{question}" |
||||
invalid_option: "Enter a valid value for %{question}" |
||||
spreadsheet_dupe: "This is a duplicate of a log in your file." |
||||
duplicate: "This is a duplicate log." |
||||
blank_file: "Template is blank - The template must be filled in for us to create the logs and check if data is correct." |
||||
wrong_template: |
||||
over_max_column_count: "Too many columns, please ensure you have used the correct template." |
||||
no_headers: "Your file does not contain the required header rows. Add or check the header rows and upload your file again. [Read more about using the template headers](%{guidance_link})." |
||||
wrong_field_numbers_count: "Incorrect number of fields, please ensure you have used the correct template." |
||||
wrong_template: "Incorrect sale dates, please ensure you have used the correct template." |
||||
numeric: |
||||
within_range: "%{field} must be between %{min} and %{max}." |
||||
owning_organisation: |
||||
not_found: "The owning organisation code is incorrect." |
||||
not_stock_owner: "The owning organisation code provided is for an organisation that does not own stock." |
||||
not_permitted: |
||||
support: "This owning organisation is not affiliated with %{name}." |
||||
not_support: "You do not have permission to add logs for this owning organisation." |
||||
assigned_to: |
||||
not_found: "User with the specified email could not be found." |
||||
organisation_not_related: "User must be related to owning organisation or managing organisation." |
||||
managing_organisation_not_related: "This organisation does not have a relationship with the owning organisation." |
||||
saledate: |
||||
outside_collection_window: "Enter a date within the %{year_combo} collection year, which is between 1st April %{start_year} and 31st March %{end_year}." |
||||
year_not_two_or_four_digits: "Sale completion year must be 2 or 4 digits." |
||||
ecstat1: |
||||
buyer_cannot_be_over_16_and_child: "Buyer 1's age cannot be 16 or over if their working situation is child under 16." |
||||
buyer_cannot_be_child: "Buyer 1 cannot have a working situation of child under 16." |
||||
age1: |
||||
buyer_cannot_be_over_16_and_child: "Buyer 1's age cannot be 16 or over if their working situation is child under 16." |
||||
ecstat2: |
||||
buyer_cannot_be_over_16_and_child: "Buyer 2's age cannot be 16 or over if their working situation is child under 16." |
||||
buyer_cannot_be_child: "Buyer 2 cannot have a working situation of child under 16." |
||||
age2: |
||||
buyer_cannot_be_over_16_and_child: "Buyer 2's age cannot be 16 or over if their working situation is child under 16." |
||||
address: |
||||
not_found: "We could not find this address. Check the address data in your CSV file is correct and complete, or select the correct address using the CORE site." |
||||
not_determined: "There are multiple matches for this address. Either select the correct address manually or correct the UPRN in the CSV file." |
||||
not_answered: "Enter either the UPRN or the full address." |
||||
nationality: |
||||
invalid: "Select a valid nationality." |
Binary file not shown.
@ -0,0 +1,191 @@
|
||||
require "rails_helper" |
||||
|
||||
RSpec.describe BulkUpload::Sales::Year2025::CsvParser do |
||||
subject(:service) { described_class.new(path:) } |
||||
|
||||
let(:file) { Tempfile.new } |
||||
let(:path) { file.path } |
||||
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||
|
||||
context "when parsing csv with headers" do |
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row) |
||||
file.write("\n") |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(7) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
|
||||
it "counts the number of valid field numbers correctly" do |
||||
expect(service).to be_correct_field_count |
||||
end |
||||
|
||||
it "does not parse the last empty row" do |
||||
expect(service.row_parsers.count).to eq(1) |
||||
end |
||||
end |
||||
|
||||
context "when some csv headers are empty (and we don't care about them)" do |
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row) |
||||
file.write("\n") |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(7) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
|
||||
it "counts the number of valid field numbers correctly" do |
||||
expect(service).to be_correct_field_count |
||||
end |
||||
|
||||
it "does not parse the last empty row" do |
||||
expect(service.row_parsers.count).to eq(1) |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv with headers in arbitrary order" do |
||||
let(:seed) { rand } |
||||
|
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row(seed:)) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row(seed:)) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(7) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv without headers" do |
||||
let(:file) { Tempfile.new } |
||||
let(:path) { file.path } |
||||
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||
|
||||
before do |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2025_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(0) |
||||
expect(service.col_offset).to eq(0) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
end |
||||
|
||||
context "when parsing with BOM aka byte order mark" do |
||||
let(:file) { Tempfile.new } |
||||
let(:path) { file.path } |
||||
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||
let(:bom) { "\uFEFF" } |
||||
|
||||
before do |
||||
file.write(bom) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2025_csv_row) |
||||
file.close |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
end |
||||
|
||||
context "when an invalid byte sequence" do |
||||
let(:file) { Tempfile.new } |
||||
let(:path) { file.path } |
||||
let(:log) { build(:sales_log, :completed, :with_uprn) } |
||||
let(:invalid_sequence) { "\x81" } |
||||
|
||||
before do |
||||
file.write(invalid_sequence) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:, col_offset: 0).to_2025_csv_row) |
||||
file.close |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
end |
||||
|
||||
describe "#column_for_field", aggregate_failures: true do |
||||
context "when headers present" do |
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct column" do |
||||
expect(service.column_for_field("field_1")).to eql("B") |
||||
expect(service.column_for_field("field_99")).to eql("CV") |
||||
end |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv with carriage returns" do |
||||
before do |
||||
file.write("Question\r\n") |
||||
file.write("Additional info\r") |
||||
file.write("Values\r\n") |
||||
file.write("Can be empty?\r") |
||||
file.write("Type of letting the question applies to\r\n") |
||||
file.write("Duplicate check field?\r") |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).default_2025_field_numbers_row) |
||||
file.write(BulkUpload::SalesLogToCsv.new(log:).to_2025_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_22).to eql(log.uprn) |
||||
end |
||||
end |
||||
end |
Loading…
Reference in new issue