All checks were successful
CI Pipeline / build (push) Successful in 49s
Strip invalid XML 1.0 control characters (0x00-0x08, 0x0B-0x0C, 0x0E-0x1F) from text to prevent corrupted docx files that fail to open in LibreOffice. Fixes SAXParseException 'PCData Invalid Char value' errors.
74 lines
2.1 KiB
Ruby
74 lines
2.1 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
require "test_helper"
|
|
|
|
class XmlSanitizerTest < Minitest::Test
|
|
def test_removes_null_character
|
|
assert_equal "hello", Notare::XmlSanitizer.sanitize("hel\x00lo")
|
|
end
|
|
|
|
def test_removes_control_characters_0x01_to_0x08
|
|
input = "a\x01b\x02c\x03d\x04e\x05f\x06g\x07h\x08i"
|
|
assert_equal "abcdefghi", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_removes_control_characters_0x0b_and_0x0c
|
|
input = "hello\x0Bworld\x0Ctest"
|
|
assert_equal "helloworldtest", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_removes_control_characters_0x0e_to_0x1f
|
|
input = "a\x0Eb\x0Fc\x10d\x11e\x1Ff"
|
|
assert_equal "abcdef", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_preserves_tab_character
|
|
input = "hello\tworld"
|
|
assert_equal "hello\tworld", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_preserves_newline_character
|
|
input = "hello\nworld"
|
|
assert_equal "hello\nworld", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_preserves_carriage_return_character
|
|
input = "hello\rworld"
|
|
assert_equal "hello\rworld", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_preserves_crlf
|
|
input = "hello\r\nworld"
|
|
assert_equal "hello\r\nworld", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_returns_nil_unchanged
|
|
assert_nil Notare::XmlSanitizer.sanitize(nil)
|
|
end
|
|
|
|
def test_returns_non_string_unchanged
|
|
assert_equal 123, Notare::XmlSanitizer.sanitize(123)
|
|
assert_equal :symbol, Notare::XmlSanitizer.sanitize(:symbol)
|
|
end
|
|
|
|
def test_preserves_unicode_characters
|
|
input = "café naïve 日本語 🎉"
|
|
assert_equal "café naïve 日本語 🎉", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_preserves_regular_text
|
|
input = "Hello, World! This is normal text."
|
|
assert_equal input, Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
|
|
def test_handles_empty_string
|
|
assert_equal "", Notare::XmlSanitizer.sanitize("")
|
|
end
|
|
|
|
def test_real_world_case_stx_character
|
|
# The actual case from the failed.docx: 0x02 (STX) character
|
|
input = "infrastruktur\x02bidrag"
|
|
assert_equal "infrastrukturbidrag", Notare::XmlSanitizer.sanitize(input)
|
|
end
|
|
end
|