4 changed files with 5046 additions and 0 deletions
@ -0,0 +1,122 @@
|
||||
require "csv" |
||||
|
||||
class BulkUpload::Lettings::Year2025::CsvParser |
||||
include CollectionTimeHelper |
||||
|
||||
FIELDS = 130 |
||||
MAX_COLUMNS = 131 |
||||
FORM_YEAR = 2024 |
||||
|
||||
attr_reader :path |
||||
|
||||
def initialize(path:) |
||||
@path = path |
||||
end |
||||
|
||||
def row_offset |
||||
if with_headers? |
||||
rows.find_index { |row| row[0].present? && row[0].match(/field number/i) } + 1 |
||||
else |
||||
0 |
||||
end |
||||
end |
||||
|
||||
def col_offset |
||||
with_headers? ? 1 : 0 |
||||
end |
||||
|
||||
def cols |
||||
@cols ||= ("A".."EA").to_a |
||||
end |
||||
|
||||
def row_parsers |
||||
@row_parsers ||= body_rows.map { |row| |
||||
next if row.empty? |
||||
|
||||
stripped_row = row[col_offset..] |
||||
|
||||
hash = Hash[field_numbers.zip(stripped_row)] |
||||
|
||||
BulkUpload::Lettings::Year2024::RowParser.new(hash) |
||||
}.compact |
||||
end |
||||
|
||||
def body_rows |
||||
rows[row_offset..] |
||||
end |
||||
|
||||
def rows |
||||
@rows ||= CSV.parse(normalised_string, row_sep:) |
||||
end |
||||
|
||||
def column_for_field(field) |
||||
cols[field_numbers.find_index(field) + col_offset] |
||||
end |
||||
|
||||
def correct_field_count? |
||||
valid_field_numbers_count = field_numbers.count { |f| f != "field_blank" } |
||||
|
||||
valid_field_numbers_count == FIELDS |
||||
end |
||||
|
||||
def too_many_columns? |
||||
return if with_headers? |
||||
|
||||
max_columns_count = body_rows.map(&:size).max - col_offset |
||||
|
||||
max_columns_count > MAX_COLUMNS |
||||
end |
||||
|
||||
def wrong_template_for_year? |
||||
collection_start_year_for_date(first_record_start_date) != FORM_YEAR |
||||
rescue Date::Error |
||||
false |
||||
end |
||||
|
||||
def missing_required_headers? |
||||
!with_headers? |
||||
end |
||||
|
||||
private |
||||
|
||||
def default_field_numbers |
||||
(1..FIELDS).map { |h| h.present? && h.to_s.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" } |
||||
end |
||||
|
||||
def field_numbers |
||||
@field_numbers ||= if with_headers? |
||||
rows[row_offset - 1][col_offset..].map { |h| h.present? && h.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" } |
||||
else |
||||
default_field_numbers |
||||
end |
||||
end |
||||
|
||||
def with_headers? |
||||
rows.map { |r| r[0] }.any? { |cell| cell&.match?(/field number/i) } |
||||
end |
||||
|
||||
def row_sep |
||||
"\n" |
||||
end |
||||
|
||||
def normalised_string |
||||
return @normalised_string if @normalised_string |
||||
|
||||
@normalised_string = File.read(path, encoding: "bom|utf-8") |
||||
@normalised_string.gsub!("\r\n", "\n") |
||||
@normalised_string.scrub!("") |
||||
@normalised_string.tr!("\r", "\n") |
||||
|
||||
@normalised_string |
||||
end |
||||
|
||||
def first_record_start_date |
||||
if with_headers? |
||||
year = row_parsers.first.field_10.to_s.strip.length.between?(1, 2) ? row_parsers.first.field_10.to_i + 2000 : row_parsers.first.field_10.to_i |
||||
Date.new(year, row_parsers.first.field_9.to_i, row_parsers.first.field_8.to_i) |
||||
else |
||||
year = rows.first[9].to_s.strip.length.between?(1, 2) ? rows.first[9].to_i + 2000 : rows.first[9].to_i |
||||
Date.new(year, rows.first[8].to_i, rows.first[7].to_i) |
||||
end |
||||
end |
||||
end |
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,254 @@
|
||||
require "rails_helper" |
||||
|
||||
RSpec.describe BulkUpload::Lettings::Year2025::CsvParser do |
||||
subject(:service) { described_class.new(path:) } |
||||
|
||||
let(:file) { Tempfile.new } |
||||
let(:path) { file.path } |
||||
let(:log) { build(:lettings_log, :completed) } |
||||
|
||||
context "when parsing csv with headers" do |
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(7) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
context "when some csv headers are empty (and we don't care about them)" do |
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(7) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv with headers with extra rows" do |
||||
before do |
||||
file.write("Section\n") |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row) |
||||
file.write("\n") |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(8) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
|
||||
it "does not parse the last empty row" do |
||||
expect(service.row_parsers.count).to eq(1) |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv with headers in arbitrary order" do |
||||
let(:seed) { rand } |
||||
|
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row(seed:)) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row(seed:)) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(7) |
||||
expect(service.col_offset).to eq(1) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv with extra invalid headers" do |
||||
let(:seed) { rand } |
||||
let(:log_to_csv) { BulkUpload::LettingsLogToCsv.new(log:) } |
||||
let(:field_numbers) { log_to_csv.default_2024_field_numbers + %w[invalid_field_number] } |
||||
let(:field_values) { log_to_csv.to_2024_row + %w[value_for_invalid_field_number] } |
||||
|
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) |
||||
file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) |
||||
file.rewind |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
|
||||
it "counts the number of valid field numbers correctly" do |
||||
expect(service).to be_correct_field_count |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv without headers" do |
||||
before do |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct offsets" do |
||||
expect(service.row_offset).to eq(0) |
||||
expect(service.col_offset).to eq(0) |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
context "when parsing with BOM aka byte order mark" do |
||||
let(:bom) { "\uFEFF" } |
||||
|
||||
before do |
||||
file.write(bom) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
context "when an invalid byte sequence" do |
||||
let(:invalid_sequence) { "\x81" } |
||||
|
||||
before do |
||||
file.write(invalid_sequence) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
context "when parsing csv with carriage returns" do |
||||
before do |
||||
file.write("Question\r\n") |
||||
file.write("Additional info\r") |
||||
file.write("Values\r\n") |
||||
file.write("Can be empty?\r") |
||||
file.write("Type of letting the question applies to\r\n") |
||||
file.write("Duplicate check field?\r") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "parses csv correctly" do |
||||
expect(service.row_parsers[0].field_13).to eql(log.tenancycode) |
||||
end |
||||
end |
||||
|
||||
describe "#column_for_field", aggregate_failures: true do |
||||
context "when with headers using default ordering" do |
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct column" do |
||||
expect(service.column_for_field("field_5")).to eql("F") |
||||
expect(service.column_for_field("field_22")).to eql("W") |
||||
end |
||||
end |
||||
|
||||
context "when without headers using default ordering" do |
||||
before do |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:, col_offset: 0).to_2024_csv_row) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct column" do |
||||
expect(service.column_for_field("field_5")).to eql("E") |
||||
expect(service.column_for_field("field_22")).to eql("V") |
||||
end |
||||
end |
||||
|
||||
context "when with headers using custom ordering" do |
||||
let(:seed) { 123 } |
||||
|
||||
before do |
||||
file.write("Question\n") |
||||
file.write("Additional info\n") |
||||
file.write("Values\n") |
||||
file.write("Can be empty?\n") |
||||
file.write("Type of letting the question applies to\n") |
||||
file.write("Duplicate check field?\n") |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).default_2024_field_numbers_row(seed:)) |
||||
file.write(BulkUpload::LettingsLogToCsv.new(log:).to_2024_csv_row(seed:)) |
||||
file.rewind |
||||
end |
||||
|
||||
it "returns correct column" do |
||||
expect(service.column_for_field("field_5")).to eql("B") |
||||
expect(service.column_for_field("field_22")).to eql("AS") |
||||
expect(service.column_for_field("field_26")).to eql("DH") |
||||
expect(service.column_for_field("field_25")).to eql("I") |
||||
end |
||||
end |
||||
end |
||||
end |
Loading…
Reference in new issue