Browse Source

Copy row and csv parser for 2025 lettings

pull/2939/head
Kat 3 months ago
parent
commit
d0bd05f166
  1. 122
      app/services/bulk_upload/lettings/year2025/csv_parser.rb
  2. 1652
      app/services/bulk_upload/lettings/year2025/row_parser.rb
  3. 254
      spec/services/bulk_upload/lettings/year2025/csv_parser_spec.rb
  4. 3018
      spec/services/bulk_upload/lettings/year2025/row_parser_spec.rb

122
app/services/bulk_upload/lettings/year2025/csv_parser.rb

@ -0,0 +1,122 @@
require "csv"
class BulkUpload::Lettings::Year2025::CsvParser
include CollectionTimeHelper
FIELDS = 130
MAX_COLUMNS = 131
FORM_YEAR = 2024
attr_reader :path
def initialize(path:)
@path = path
end
def row_offset
if with_headers?
rows.find_index { |row| row[0].present? && row[0].match(/field number/i) } + 1
else
0
end
end
def col_offset
with_headers? ? 1 : 0
end
def cols
@cols ||= ("A".."EA").to_a
end
def row_parsers
@row_parsers ||= body_rows.map { |row|
next if row.empty?
stripped_row = row[col_offset..]
hash = Hash[field_numbers.zip(stripped_row)]
BulkUpload::Lettings::Year2024::RowParser.new(hash)
}.compact
end
def body_rows
rows[row_offset..]
end
def rows
@rows ||= CSV.parse(normalised_string, row_sep:)
end
def column_for_field(field)
cols[field_numbers.find_index(field) + col_offset]
end
def correct_field_count?
valid_field_numbers_count = field_numbers.count { |f| f != "field_blank" }
valid_field_numbers_count == FIELDS
end
def too_many_columns?
return if with_headers?
max_columns_count = body_rows.map(&:size).max - col_offset
max_columns_count > MAX_COLUMNS
end
def wrong_template_for_year?
collection_start_year_for_date(first_record_start_date) != FORM_YEAR
rescue Date::Error
false
end
def missing_required_headers?
!with_headers?
end
private
def default_field_numbers
(1..FIELDS).map { |h| h.present? && h.to_s.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" }
end
def field_numbers
@field_numbers ||= if with_headers?
rows[row_offset - 1][col_offset..].map { |h| h.present? && h.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" }
else
default_field_numbers
end
end
def with_headers?
rows.map { |r| r[0] }.any? { |cell| cell&.match?(/field number/i) }
end
def row_sep
"\n"
end
def normalised_string
return @normalised_string if @normalised_string
@normalised_string = File.read(path, encoding: "bom|utf-8")
@normalised_string.gsub!("\r\n", "\n")
@normalised_string.scrub!("")
@normalised_string.tr!("\r", "\n")
@normalised_string
end
def first_record_start_date
if with_headers?
year = row_parsers.first.field_10.to_s.strip.length.between?(1, 2) ? row_parsers.first.field_10.to_i + 2000 : row_parsers.first.field_10.to_i
Date.new(year, row_parsers.first.field_9.to_i, row_parsers.first.field_8.to_i)
else
year = rows.first[9].to_s.strip.length.between?(1, 2) ? rows.first[9].to_i + 2000 : rows.first[9].to_i
Date.new(year, rows.first[8].to_i, rows.first[7].to_i)
end
end
end

1652
app/services/bulk_upload/lettings/year2025/row_parser.rb

File diff suppressed because it is too large Load Diff

254
spec/services/bulk_upload/lettings/year2025/csv_parser_spec.rb

@ -0,0 +1,254 @@
require "rails_helper"
RSpec.describe BulkUpload::Lettings::Year2025::CsvParser do
subject(:service) { described_class.new(path:) }
let(:file) { Tempfile.new }
let(:path) { file.path }
let(:log) { build(:lettings_log, :completed) }
context "when parsing csv with headers" do
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row)
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row)
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(7)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
context "when some csv headers are empty (and we don't care about them)" do
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row)
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row)
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(7)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
context "when parsing csv with headers with extra rows" do
before do
file.write("Section\n")
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row)
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row)
file.write("\n")
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(8)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
it "does not parse the last empty row" do
expect(service.row_parsers.count).to eq(1)
end
end
context "when parsing csv with headers in arbitrary order" do
let(:seed) { rand }
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row(seed:))
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row(seed:))
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(7)
expect(service.col_offset).to eq(1)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
context "when parsing csv with extra invalid headers" do
let(:seed) { rand }
let(:log_to_csv) { BulkUpload::LettingsLogToCsv.new(log:) }
let(:field_numbers) { log_to_csv.default_2024_field_numbers + %w[invalid_field_number] }
let(:field_values) { log_to_csv.to_2024_row + %w[value_for_invalid_field_number] }
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:))
file.write(log_to_csv.to_custom_csv_row(seed:, field_values:))
file.rewind
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
it "counts the number of valid field numbers correctly" do
expect(service).to be_correct_field_count
end
end
context "when parsing csv without headers" do
before do
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row)
file.rewind
end
it "returns correct offsets" do
expect(service.row_offset).to eq(0)
expect(service.col_offset).to eq(0)
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
context "when parsing with BOM aka byte order mark" do
let(:bom) { "\uFEFF" }
before do
file.write(bom)
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row)
file.rewind
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
context "when an invalid byte sequence" do
let(:invalid_sequence) { "\x81" }
before do
file.write(invalid_sequence)
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row)
file.rewind
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
context "when parsing csv with carriage returns" do
before do
file.write("Question\r\n")
file.write("Additional info\r")
file.write("Values\r\n")
file.write("Can be empty?\r")
file.write("Type of letting the question applies to\r\n")
file.write("Duplicate check field?\r")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row)
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row)
file.rewind
end
it "parses csv correctly" do
expect(service.row_parsers[0].field_13).to eql(log.tenancycode)
end
end
describe "#column_for_field", aggregate_failures: true do
context "when with headers using default ordering" do
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row)
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row)
file.rewind
end
it "returns correct column" do
expect(service.column_for_field("field_5")).to eql("F")
expect(service.column_for_field("field_22")).to eql("W")
end
end
context "when without headers using default ordering" do
before do
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row)
file.rewind
end
it "returns correct column" do
expect(service.column_for_field("field_5")).to eql("E")
expect(service.column_for_field("field_22")).to eql("V")
end
end
context "when with headers using custom ordering" do
let(:seed) { 123 }
before do
file.write("Question\n")
file.write("Additional info\n")
file.write("Values\n")
file.write("Can be empty?\n")
file.write("Type of letting the question applies to\n")
file.write("Duplicate check field?\n")
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row(seed:))
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row(seed:))
file.rewind
end
it "returns correct column" do
expect(service.column_for_field("field_5")).to eql("B")
expect(service.column_for_field("field_22")).to eql("AS")
expect(service.column_for_field("field_26")).to eql("DH")
expect(service.column_for_field("field_25")).to eql("I")
end
end
end
end

3018
spec/services/bulk_upload/lettings/year2025/row_parser_spec.rb

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save