Files
Notare/test/paragraph_test.rb
mathias234 64c8679044
All checks were successful
CI Pipeline / build (push) Successful in 49s
Sanitize invalid XML characters in text content
Strip invalid XML 1.0 control characters (0x00-0x08, 0x0B-0x0C, 0x0E-0x1F)
from text to prevent corrupted docx files that fail to open in LibreOffice.

Fixes SAXParseException 'PCData Invalid Char value' errors.
2026-01-22 09:10:33 +01:00

132 lines
3.5 KiB
Ruby

# frozen_string_literal: true
require "test_helper"
class ParagraphTest < Minitest::Test
include NotareTestHelpers
def test_simple_paragraph
xml = create_doc_and_read_xml { |doc| doc.p "Hello World" }
assert_includes xml, "<w:p>"
assert_includes xml, "<w:r>"
assert_includes xml, "<w:t"
assert_includes xml, "Hello World"
end
def test_multiple_paragraphs
xml = create_doc_and_read_xml do |doc|
doc.p "First paragraph"
doc.p "Second paragraph"
doc.p "Third paragraph"
end
assert_includes xml, "First paragraph"
assert_includes xml, "Second paragraph"
assert_includes xml, "Third paragraph"
assert_equal 3, xml.scan("<w:p>").count
end
def test_paragraph_with_block_and_multiple_text_runs
xml = create_doc_and_read_xml do |doc|
doc.p do
doc.text "Part one "
doc.text "Part two "
doc.text "Part three"
end
end
assert_includes xml, "Part one "
assert_includes xml, "Part two "
assert_includes xml, "Part three"
assert_equal 3, xml.scan("<w:r>").count
end
def test_paragraph_preserves_whitespace
xml = create_doc_and_read_xml do |doc|
doc.p do
doc.text " leading spaces"
doc.text "trailing spaces "
doc.text " both "
end
end
assert_includes xml, 'xml:space="preserve"'
assert_includes xml, " leading spaces"
assert_includes xml, "trailing spaces "
assert_includes xml, " both "
end
def test_paragraph_with_special_characters
xml = create_doc_and_read_xml do |doc|
doc.p "Special chars: <>&\"'"
doc.p "Unicode: café, naïve, 日本語"
end
# XML should escape special characters
assert_includes xml, "&lt;"
assert_includes xml, "&gt;"
assert_includes xml, "&amp;"
assert_includes xml, "café"
assert_includes xml, "日本語"
end
def test_empty_paragraph_with_block
xml = create_doc_and_read_xml do |doc|
doc.p {} # rubocop:disable Lint/EmptyBlock
end
# Empty paragraph may be self-closing or have opening tag
assert(xml.include?("<w:p>") || xml.include?("<w:p/>"), "Should contain paragraph element")
end
def test_long_text_content
long_text = "x" * 10_000
xml = create_doc_and_read_xml do |doc|
doc.p long_text
end
assert_includes xml, long_text
end
def test_unicode_content
xml = create_doc_and_read_xml do |doc|
doc.p "Emoji: 🎉🚀💻"
doc.p "Chinese: 你好世界"
doc.p "Arabic: مرحبا بالعالم"
doc.p "Russian: Привет мир"
end
assert_includes xml, "🎉🚀💻"
assert_includes xml, "你好世界"
assert_includes xml, "مرحبا بالعالم"
assert_includes xml, "Привет мир"
end
def test_newlines_in_text
xml = create_doc_and_read_xml do |doc|
doc.p "Line 1\nLine 2\nLine 3"
end
# Newlines should be preserved in the text
assert_includes xml, "Line 1\nLine 2\nLine 3"
end
def test_invalid_xml_characters_are_stripped
xml = create_doc_and_read_xml do |doc|
doc.p "infrastruktur\x02bidrag"
doc.p "hello\x00world"
doc.p "test\x01\x03\x04value"
end
# Invalid characters should be stripped
assert_includes xml, "infrastrukturbidrag"
assert_includes xml, "helloworld"
assert_includes xml, "testvalue"
# Verify the XML is valid by parsing it (will raise if invalid)
doc = Nokogiri::XML(xml, &:strict)
assert doc.errors.empty?, "XML should be valid: #{doc.errors}"
end
end