From a12d26864bdbc27cf6508152768f872e5192040d Mon Sep 17 00:00:00 2001 From: Phil Lee Date: Mon, 27 Feb 2023 11:31:18 +0000 Subject: [PATCH] bulk upload handles invalid byte sequences (#1340) - sometimes csv is not exported as utf8 therefore invalid byte sequences are removed --- app/services/bulk_upload/lettings/csv_parser.rb | 1 + .../bulk_upload/lettings/csv_parser_spec.rb | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/app/services/bulk_upload/lettings/csv_parser.rb b/app/services/bulk_upload/lettings/csv_parser.rb index ae29f4a6f..6cead61ab 100644 --- a/app/services/bulk_upload/lettings/csv_parser.rb +++ b/app/services/bulk_upload/lettings/csv_parser.rb @@ -52,6 +52,7 @@ private @normalised_string = File.read(path, encoding: "bom|utf-8") @normalised_string.gsub!("\r\n", "\n") + @normalised_string.scrub!("") @normalised_string end diff --git a/spec/services/bulk_upload/lettings/csv_parser_spec.rb b/spec/services/bulk_upload/lettings/csv_parser_spec.rb index 9d381a5a0..782782e04 100644 --- a/spec/services/bulk_upload/lettings/csv_parser_spec.rb +++ b/spec/services/bulk_upload/lettings/csv_parser_spec.rb @@ -52,4 +52,21 @@ RSpec.describe BulkUpload::Lettings::CsvParser do expect(service.row_parsers[0].field_12).to eql(log.age1) end end + + context "when an invalid byte sequence" do + let(:file) { Tempfile.new } + let(:path) { file.path } + let(:log) { build(:lettings_log, :completed) } + let(:invalid_sequence) { "\x81" } + + before do + file.write(invalid_sequence) + file.write(BulkUpload::LogToCsv.new(log:, col_offset: 0).to_csv_row) + file.close + end + + it "parses csv correctly" do + expect(service.row_parsers[0].field_12).to eql(log.age1) + end + end end