diff --git a/app/services/bulk_upload/lettings/validator.rb b/app/services/bulk_upload/lettings/validator.rb index 1a0e6ecf5..e78c1138b 100644 --- a/app/services/bulk_upload/lettings/validator.rb +++ b/app/services/bulk_upload/lettings/validator.rb @@ -9,8 +9,8 @@ class BulkUpload::Lettings::Validator attr_reader :bulk_upload, :path validate :validate_file_not_empty - validate :validate_min_columns - validate :validate_max_columns + validate :validate_field_numbers_count + validate :validate_max_columns_count_if_no_headers def initialize(bulk_upload:, path:) @bulk_upload = bulk_upload @@ -129,20 +129,16 @@ private end end - def validate_min_columns + def validate_field_numbers_count return if halt_validations? - column_count = rows.map(&:size).min - - errors.add(:base, :under_min_column_count) if column_count < csv_parser.class::MIN_COLUMNS + errors.add(:base, :wrong_field_numbers_count) if csv_parser.incorrect_field_count? end - def validate_max_columns + def validate_max_columns_count_if_no_headers return if halt_validations? - column_count = rows.map(&:size).max - - errors.add(:base, :over_max_column_count) if column_count > csv_parser.class::MAX_COLUMNS + errors.add(:base, :over_max_column_count) if csv_parser.too_many_columns? end def halt_validations! diff --git a/app/services/bulk_upload/lettings/year2022/csv_parser.rb b/app/services/bulk_upload/lettings/year2022/csv_parser.rb index c0d8d8900..3e6aaa640 100644 --- a/app/services/bulk_upload/lettings/year2022/csv_parser.rb +++ b/app/services/bulk_upload/lettings/year2022/csv_parser.rb @@ -1,8 +1,8 @@ require "csv" class BulkUpload::Lettings::Year2022::CsvParser - MIN_COLUMNS = 134 - MAX_COLUMNS = 136 + FIELDS = 134 + MAX_COLUMNS = 135 attr_reader :path @@ -11,7 +11,11 @@ class BulkUpload::Lettings::Year2022::CsvParser end def row_offset - with_headers? ? 5 : 0 + if with_headers? + rows.find_index { |row| row[0].match(/field number/i) } + 1 + else + 0 + end end def col_offset @@ -25,8 +29,7 @@ class BulkUpload::Lettings::Year2022::CsvParser def row_parsers @row_parsers ||= body_rows.map do |row| stripped_row = row[col_offset..] - headers = ("field_1".."field_134").to_a - hash = Hash[headers.zip(stripped_row)] + hash = Hash[field_numbers.zip(stripped_row)] BulkUpload::Lettings::Year2022::RowParser.new(hash) end @@ -41,17 +44,39 @@ class BulkUpload::Lettings::Year2022::CsvParser end def column_for_field(field) - cols[headers.find_index(field) + col_offset] + cols[field_numbers.find_index(field) + col_offset] + end + + def incorrect_field_count? + valid_field_numbers_count = field_numbers.count { |f| f != "field_blank" } + + valid_field_numbers_count != FIELDS + end + + def too_many_columns? + return if with_headers? + + max_columns_count = body_rows.map(&:size).max - col_offset + + max_columns_count > MAX_COLUMNS end private - def headers - @headers ||= ("field_1".."field_134").to_a + def default_field_numbers + ("field_1".."field_#{FIELDS}").to_a + end + + def field_numbers + @field_numbers ||= if with_headers? + rows[row_offset - 1][col_offset..].map { |h| h.present? && h.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" } + else + default_field_numbers + end end def with_headers? - rows[0][0]&.match?(/\D+/) + rows.map { |r| r[0] }.any? { |cell| cell&.match?(/field number/i) } end def row_sep diff --git a/app/services/bulk_upload/lettings/year2022/row_parser.rb b/app/services/bulk_upload/lettings/year2022/row_parser.rb index 5b88ebdbe..b6ba93dc5 100644 --- a/app/services/bulk_upload/lettings/year2022/row_parser.rb +++ b/app/services/bulk_upload/lettings/year2022/row_parser.rb @@ -142,6 +142,8 @@ class BulkUpload::Lettings::Year2022::RowParser attribute :bulk_upload attribute :block_log_creation, :boolean, default: -> { false } + attribute :field_blank + attribute :field_1, :integer attribute :field_2 attribute :field_3 @@ -366,7 +368,7 @@ class BulkUpload::Lettings::Year2022::RowParser def blank_row? attribute_set .to_hash - .reject { |k, _| %w[bulk_upload block_log_creation].include?(k) } + .reject { |k, _| %w[bulk_upload block_log_creation field_blank].include?(k) } .values .reject(&:blank?) .compact diff --git a/app/services/bulk_upload/lettings/year2023/csv_parser.rb b/app/services/bulk_upload/lettings/year2023/csv_parser.rb index 6f661922c..cc351d603 100644 --- a/app/services/bulk_upload/lettings/year2023/csv_parser.rb +++ b/app/services/bulk_upload/lettings/year2023/csv_parser.rb @@ -1,8 +1,8 @@ require "csv" class BulkUpload::Lettings::Year2023::CsvParser - MIN_COLUMNS = 141 - MAX_COLUMNS = 143 + FIELDS = 134 + MAX_COLUMNS = 142 attr_reader :path @@ -47,15 +47,29 @@ class BulkUpload::Lettings::Year2023::CsvParser cols[field_numbers.find_index(field) + col_offset] end + def incorrect_field_count? + valid_field_numbers_count = field_numbers.count { |f| f != "field_blank" } + + valid_field_numbers_count != FIELDS + end + + def too_many_columns? + return if with_headers? + + max_columns_count = body_rows.map(&:size).max - col_offset + + max_columns_count > MAX_COLUMNS + end + private def default_field_numbers - [5, nil, nil, 15, 16, nil, 13, 40, 41, 42, 43, 46, 52, 56, 60, 64, 68, 72, 76, 47, 53, 57, 61, 65, 69, 73, 77, 51, 55, 59, 63, 67, 71, 75, 50, 54, 58, 62, 66, 70, 74, 78, 48, 49, 79, 81, 82, 123, 124, 122, 120, 102, 103, nil, 83, 84, 85, 86, 87, 88, 104, 109, 107, 108, 106, 100, 101, 105, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 126, 128, 129, 130, 131, 132, 127, 125, 133, 134, 33, 34, 35, 36, 37, 38, nil, 7, 8, 9, 28, 14, 32, 29, 30, 31, 26, 27, 25, 23, 24, nil, 1, 3, 2, 80, nil, 121, 44, 89, 98, 92, 95, 90, 91, 93, 94, 97, 96, 99, 10, 11, 12, 45, 39, 6, 4, 17, 18, 19, 20, 21, 22].map { |h| h.present? ? "field_#{h}" : "field_blank" } + [5, nil, nil, 15, 16, nil, 13, 40, 41, 42, 43, 46, 52, 56, 60, 64, 68, 72, 76, 47, 53, 57, 61, 65, 69, 73, 77, 51, 55, 59, 63, 67, 71, 75, 50, 54, 58, 62, 66, 70, 74, 78, 48, 49, 79, 81, 82, 123, 124, 122, 120, 102, 103, nil, 83, 84, 85, 86, 87, 88, 104, 109, 107, 108, 106, 100, 101, 105, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 126, 128, 129, 130, 131, 132, 127, 125, 133, 134, 33, 34, 35, 36, 37, 38, nil, 7, 8, 9, 28, 14, 32, 29, 30, 31, 26, 27, 25, 23, 24, nil, 1, 3, 2, 80, nil, 121, 44, 89, 98, 92, 95, 90, 91, 93, 94, 97, 96, 99, 10, 11, 12, 45, 39, 6, 4, 17, 18, 19, 20, 21, 22].map { |h| h.present? && h.to_s.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" } end def field_numbers @field_numbers ||= if with_headers? - rows[row_offset - 1][col_offset..].map { |h| h.present? ? "field_#{h}" : "field_blank" } + rows[row_offset - 1][col_offset..].map { |h| h.present? && h.match?(/^[0-9]+$/) ? "field_#{h}" : "field_blank" } else default_field_numbers end diff --git a/config/locales/en.yml b/config/locales/en.yml index 12738ad93..7bf8839a9 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -44,8 +44,8 @@ en: bulk_upload/lettings/validator: attributes: base: + wrong_field_numbers_count: "incorrect number of fields, please ensure you have used the correct template" over_max_column_count: "too many columns, please ensure you have used the correct template" - under_min_column_count: "too few columns, please ensure you have used the correct template" forms/bulk_upload_lettings/year: attributes: year: diff --git a/spec/services/bulk_upload/lettings/validator_spec.rb b/spec/services/bulk_upload/lettings/validator_spec.rb index 5eeb32c4a..679602ec0 100644 --- a/spec/services/bulk_upload/lettings/validator_spec.rb +++ b/spec/services/bulk_upload/lettings/validator_spec.rb @@ -5,6 +5,7 @@ RSpec.describe BulkUpload::Lettings::Validator do let(:organisation) { create(:organisation, old_visible_id: "3") } let(:user) { create(:user, organisation:) } + let(:log) { build(:lettings_log, :completed) } let(:bulk_upload) { create(:bulk_upload, user:) } let(:path) { file.path } let(:file) { Tempfile.new } @@ -16,31 +17,181 @@ RSpec.describe BulkUpload::Lettings::Validator do end end - context "when file has too many columns" do - before do - file.write("a," * 136) - file.write("\n") - file.rewind + context "when 2022" do + let(:bulk_upload) { create(:bulk_upload, user:, year: 2022) } + + context "when file has no headers" do + context "and too many columns" do + before do + file.write(("a" * 136).chars.join(",")) + file.write("\n") + file.rewind + end + + it "is not valid" do + expect(validator).not_to be_valid + expect(validator.errors["base"]).to eql(["too many columns, please ensure you have used the correct template"]) + end + end + + context "and doesn't have too many columns" do + before do + file.write(("a" * 135).chars.join(",")) + file.write("\n") + file.rewind + end + + it "is valid" do + expect(validator).to be_valid + end + end end - it "is not valid" do - expect(validator).not_to be_valid + context "when file has headers" do + context "and file has extra invalid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2022_field_numbers + %w[invalid_field_number] } + let(:field_values) { log_to_csv.to_2022_row + %w[value_for_invalid_field_number] } + + before do + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "is valid" do + expect(validator).to be_valid + end + end + + context "and file has too few valid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2022_field_numbers } + let(:field_values) { log_to_csv.to_2022_row } + + before do + field_numbers.delete_at(20) + field_values.delete_at(20) + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "is not valid" do + expect(validator).not_to be_valid + expect(validator.errors["base"]).to eql(["incorrect number of fields, please ensure you have used the correct template"]) + end + end + + context "and file has too many valid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2022_field_numbers + %w[23] } + let(:field_values) { log_to_csv.to_2022_row + %w[value] } + + before do + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "is not valid" do + expect(validator).not_to be_valid + expect(validator.errors["base"]).to eql(["incorrect number of fields, please ensure you have used the correct template"]) + end + end end end - context "when file has too few columns" do - before do - file.write("a," * 132) - file.write("\n") - file.rewind + context "when 2023" do + let(:bulk_upload) { create(:bulk_upload, user:, year: 2023) } + + context "when file has no headers" do + context "and too many columns" do + before do + file.write(("a" * 143).chars.join(",")) + file.write("\n") + file.rewind + end + + it "is not valid" do + expect(validator).not_to be_valid + expect(validator.errors["base"]).to eql(["too many columns, please ensure you have used the correct template"]) + end + end + + context "and doesn't have too many columns" do + before do + file.write(("a" * 142).chars.join(",")) + file.write("\n") + file.rewind + end + + it "is valid" do + expect(validator).to be_valid + end + end end - it "is not valid" do - expect(validator).not_to be_valid + context "when file has headers" do + context "and file has extra invalid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2023_field_numbers + %w[invalid_field_number] } + let(:field_values) { log_to_csv.to_2023_row + %w[value_for_invalid_field_number] } + + before do + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "is valid" do + expect(validator).to be_valid + end + end + + context "and file has too few valid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2023_field_numbers } + let(:field_values) { log_to_csv.to_2023_row } + + before do + field_numbers.delete_at(20) + field_values.delete_at(20) + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "is not valid" do + expect(validator).not_to be_valid + expect(validator.errors["base"]).to eql(["incorrect number of fields, please ensure you have used the correct template"]) + end + end + + context "and file has too many valid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2023_field_numbers + %w[23] } + let(:field_values) { log_to_csv.to_2023_row + %w[value] } + + before do + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "is not valid" do + expect(validator).not_to be_valid + expect(validator.errors["base"]).to eql(["incorrect number of fields, please ensure you have used the correct template"]) + end + end end end - - context "when incorrect headers" end describe "#call" do diff --git a/spec/services/bulk_upload/lettings/year2022/csv_parser_spec.rb b/spec/services/bulk_upload/lettings/year2022/csv_parser_spec.rb index d396ad78e..10e722e9a 100644 --- a/spec/services/bulk_upload/lettings/year2022/csv_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/year2022/csv_parser_spec.rb @@ -3,24 +3,86 @@ require "rails_helper" RSpec.describe BulkUpload::Lettings::Year2022::CsvParser do subject(:service) { described_class.new(path:) } - let(:path) { file_fixture("2022_23_lettings_bulk_upload.csv") } + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:lettings_log, :completed) } context "when parsing csv with headers" do + before do + file.write(BulkUpload::LogToCsv.new(log:).default_2022_field_numbers_row) + file.write(BulkUpload::LogToCsv.new(log:).to_2022_csv_row) + file.rewind + end + it "returns correct offsets" do - expect(service.row_offset).to eq(5) + expect(service.row_offset).to eq(1) expect(service.col_offset).to eq(1) end it "parses csv correctly" do - expect(service.row_parsers[0].field_12.to_i).to eq(55) + expect(service.row_parsers[0].field_12.to_i).to eq(35) end end - context "when parsing csv without headers" do - let(:file) { Tempfile.new } - let(:path) { file.path } - let(:log) { build(:lettings_log, :completed) } + context "when parsing csv with headers with extra rows" do + before do + file.write("Extra row\n") + file.write(BulkUpload::LogToCsv.new(log:).default_2022_field_numbers_row) + file.write(BulkUpload::LogToCsv.new(log:).to_2022_csv_row) + file.rewind + end + + it "returns correct offsets" do + expect(service.row_offset).to eq(2) + expect(service.col_offset).to eq(1) + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_12.to_i).to eq(35) + end + end + + context "when parsing csv with headers in arbitrary order" do + let(:seed) { rand } + before do + file.write(BulkUpload::LogToCsv.new(log:).default_2022_field_numbers_row(seed:)) + file.write(BulkUpload::LogToCsv.new(log:).to_2022_csv_row(seed:)) + file.rewind + end + + it "returns correct offsets" do + expect(service.row_offset).to eq(1) + expect(service.col_offset).to eq(1) + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_12.to_i).to eq(35) + end + end + + context "when parsing csv with extra invalid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2022_field_numbers + %w[invalid_field_number] } + let(:field_values) { log_to_csv.to_2022_row + %w[value_for_invalid_field_number] } + + before do + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_12.to_i).to eq(35) + end + + it "counts the number of valid field numbers correctly" do + expect(service.incorrect_field_count?).to be false + end + end + + context "when parsing csv without headers" do before do file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) file.rewind @@ -32,59 +94,55 @@ RSpec.describe BulkUpload::Lettings::Year2022::CsvParser do end it "parses csv correctly" do - expect(service.row_parsers[0].field_12.to_i).to eql(log.age1) + expect(service.row_parsers[0].field_12.to_i).to be(35) end end context "when parsing with BOM aka byte order mark" do - let(:file) { Tempfile.new } - let(:path) { file.path } - let(:log) { build(:lettings_log, :completed) } let(:bom) { "\uFEFF" } before do file.write(bom) file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) - file.close + file.rewind end it "parses csv correctly" do - expect(service.row_parsers[0].field_12.to_i).to eql(log.age1) + expect(service.row_parsers[0].field_12.to_i).to be(35) end end context "when an invalid byte sequence" do - let(:file) { Tempfile.new } - let(:path) { file.path } - let(:log) { build(:lettings_log, :completed) } let(:invalid_sequence) { "\x81" } before do file.write(invalid_sequence) file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) - file.close + file.rewind end it "parses csv correctly" do - expect(service.row_parsers[0].field_12.to_i).to eql(log.age1) + expect(service.row_parsers[0].field_12.to_i).to be(35) end end describe "#column_for_field", aggregate_failures: true do - context "when headers present" do + context "when with headers using default ordering" do + before do + file.write(BulkUpload::LogToCsv.new(log:).default_2022_field_numbers_row) + file.write(BulkUpload::LogToCsv.new(log:).to_2022_csv_row) + file.rewind + end + it "returns correct column" do expect(service.column_for_field("field_1")).to eql("B") expect(service.column_for_field("field_134")).to eql("EE") end end - context "when no headers" do - let(:file) { Tempfile.new } - let(:path) { file.path } - let(:log) { build(:lettings_log, :completed) } - + context "when without headers using default ordering" do before do - file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2022_csv_row) + file.write(BulkUpload::LogToCsv.new(log:).to_2022_csv_row) file.rewind end @@ -93,5 +151,22 @@ RSpec.describe BulkUpload::Lettings::Year2022::CsvParser do expect(service.column_for_field("field_134")).to eql("ED") end end + + context "when with headers using custom ordering" do + let(:seed) { 123 } + + before do + file.write(BulkUpload::LogToCsv.new(log:).default_2022_field_numbers_row(seed:)) + file.write(BulkUpload::LogToCsv.new(log:).to_2022_csv_row(seed:)) + file.rewind + end + + it "returns correct column" do + expect(service.column_for_field("field_1")).to eql("I") + expect(service.column_for_field("field_45")).to eql("BE") + expect(service.column_for_field("field_90")).to eql("AN") + expect(service.column_for_field("field_134")).to eql("BA") + end + end end end diff --git a/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb b/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb index a8cf227f4..fc82faa8d 100644 --- a/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/year2023/csv_parser_spec.rb @@ -79,6 +79,33 @@ RSpec.describe BulkUpload::Lettings::Year2023::CsvParser do end end + context "when parsing csv with extra invalid headers" do + let(:seed) { rand } + let(:log_to_csv) { BulkUpload::LogToCsv.new(log:) } + let(:field_numbers) { log_to_csv.default_2023_field_numbers + %w[invalid_field_number] } + let(:field_values) { log_to_csv.to_2023_row + %w[value_for_invalid_field_number] } + + before do + file.write("Question\n") + file.write("Additional info\n") + file.write("Values\n") + file.write("Can be empty?\n") + file.write("Type of letting the question applies to\n") + file.write("Duplicate check field?\n") + file.write(log_to_csv.custom_field_numbers_row(seed:, field_numbers:)) + file.write(log_to_csv.to_custom_csv_row(seed:, field_values:)) + file.rewind + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_13).to eql(log.tenancycode) + end + + it "counts the number of valid field numbers correctly" do + expect(service.incorrect_field_count?).to be false + end + end + context "when parsing csv without headers" do before do file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_2023_csv_row) diff --git a/spec/support/bulk_upload/log_to_csv.rb b/spec/support/bulk_upload/log_to_csv.rb index d5c64a6ee..6064e57e3 100644 --- a/spec/support/bulk_upload/log_to_csv.rb +++ b/spec/support/bulk_upload/log_to_csv.rb @@ -12,14 +12,31 @@ class BulkUpload::LogToCsv [nil] * col_offset end - def to_2022_csv_row - (row_prefix + to_2022_row).flatten.join(",") + line_ending + def to_2022_csv_row(seed: nil) + if seed + row = to_2022_row.shuffle(random: Random.new(seed)) + (row_prefix + row).flatten.join(",") + line_ending + else + (row_prefix + to_2022_row).flatten.join(",") + line_ending + end end def to_2022_sales_csv_row (row_prefix + to_2022_sales_row).flatten.join(",") + line_ending end + def default_2022_field_numbers + (1..134).to_a + end + + def default_2022_field_numbers_row(seed: nil) + if seed + ["Bulk upload field number"] + default_2022_field_numbers.shuffle(random: Random.new(seed)) + else + ["Bulk upload field number"] + default_2022_field_numbers + end.flatten.join(",") + line_ending + end + def to_2023_csv_row(seed: nil) if seed row = to_2023_row.shuffle(random: Random.new(seed)) @@ -202,6 +219,21 @@ class BulkUpload::LogToCsv ] end + def custom_field_numbers_row(seed: nil, field_numbers: nil) + if seed + ["Bulk upload field number"] + field_numbers.shuffle(random: Random.new(seed)) + else + ["Bulk upload field number"] + field_numbers + end.flatten.join(",") + line_ending + end + + def to_custom_csv_row(seed: nil, field_values: nil) + if seed + row = field_values.shuffle(random: Random.new(seed)) + end + (row_prefix + row).flatten.join(",") + line_ending + end + def to_2022_sales_row [ log.purchid, # 1