5 Commits

Author SHA1 Message Date
e937552913 Fix RuboCop Style/SelectByKind and Style/PredicateWithKind offenses
All checks were successful
CI Pipeline / build (pull_request) Successful in 1m8s
2026-03-05 13:43:15 +01:00
26e0d59cf1 Support configurable default font and size
Some checks failed
CI Pipeline / build (pull_request) Failing after 46s
Add default_font and default_size options to Document.create, rendered
as w:docDefaults in styles.xml. Defaults to Arial 12pt. Adjust heading
sizes for better visual hierarchy.

Styling based on uutilsynet guidance for accessible documents:
https://www.uutilsynet.no/veiledning/rettleiar-universelt-utforma-word-og-pdf-dokument/1636
2026-03-05 13:09:40 +01:00
64c8679044 Sanitize invalid XML characters in text content
All checks were successful
CI Pipeline / build (push) Successful in 49s
Strip invalid XML 1.0 control characters (0x00-0x08, 0x0B-0x0C, 0x0E-0x1F)
from text to prevent corrupted docx files that fail to open in LibreOffice.

Fixes SAXParseException 'PCData Invalid Char value' errors.
2026-01-22 09:10:33 +01:00
8b4f538cbb Update version
All checks were successful
CI Pipeline / build (push) Successful in 12s
2025-12-03 13:53:04 +01:00
bc69880c9b Merge pull request 'Support table and table column sizing' (#9) from feature/table-column-sizing into main
All checks were successful
CI Pipeline / build (push) Successful in 12s
Reviewed-on: #9
2025-12-03 12:52:10 +00:00
10 changed files with 156 additions and 14 deletions

View File

@@ -3,6 +3,7 @@
require "nokogiri"
require_relative "notare/version"
require_relative "notare/xml_sanitizer"
require_relative "notare/nodes/base"
require_relative "notare/nodes/break"
require_relative "notare/nodes/hyperlink"

View File

@@ -4,16 +4,21 @@ module Notare
class Document
include Builder
attr_reader :nodes, :styles, :table_styles, :hyperlinks
DEFAULT_FONT = "Arial"
DEFAULT_SIZE = 12
def self.create(path, &block)
doc = new
attr_reader :nodes, :styles, :table_styles, :hyperlinks, :default_font, :default_size
def self.create(path, default_font: DEFAULT_FONT, default_size: DEFAULT_SIZE, &block)
doc = new(default_font: default_font, default_size: default_size)
block.call(doc)
doc.save(path)
doc
end
def initialize
def initialize(default_font: DEFAULT_FONT, default_size: DEFAULT_SIZE)
@default_font = default_font
@default_size = default_size
@nodes = []
@format_stack = []
@current_target = nil
@@ -51,7 +56,7 @@ module Notare
end
def lists
@nodes.select { |n| n.is_a?(Nodes::List) }
@nodes.grep(Nodes::List)
end
def uses_lists?
@@ -101,12 +106,12 @@ module Notare
def register_built_in_styles
# Headings (spacing_before ensures they're rendered as paragraph styles)
define_style :heading1, size: 24, bold: true, spacing_before: 240, spacing_after: 120
define_style :heading2, size: 18, bold: true, spacing_before: 200, spacing_after: 100
define_style :heading1, size: 20, bold: true, spacing_before: 240, spacing_after: 120
define_style :heading2, size: 16, bold: true, spacing_before: 200, spacing_after: 100
define_style :heading3, size: 14, bold: true, spacing_before: 160, spacing_after: 80
define_style :heading4, size: 12, bold: true, spacing_before: 120, spacing_after: 60
define_style :heading5, size: 11, bold: true, italic: true, spacing_before: 100, spacing_after: 40
define_style :heading6, size: 10, bold: true, italic: true, spacing_before: 80, spacing_after: 40
define_style :heading5, size: 12, bold: true, italic: true, spacing_before: 100, spacing_after: 40
define_style :heading6, size: 12, italic: true, spacing_before: 80, spacing_after: 40
# Other built-in styles
define_style :title, size: 26, bold: true, align: :center

View File

@@ -8,7 +8,7 @@ module Notare
def initialize(text, bold: false, italic: false, underline: false,
strike: false, highlight: nil, color: nil, style: nil)
super()
@text = text
@text = XmlSanitizer.sanitize(text)
@bold = bold
@italic = italic
@underline = underline

View File

@@ -59,7 +59,12 @@ module Notare
end
def styles_xml
Xml::StylesXml.new(@document.styles, @document.table_styles).to_xml
Xml::StylesXml.new(
@document.styles,
@document.table_styles,
default_font: @document.default_font,
default_size: @document.default_size
).to_xml
end
def numbering_xml

View File

@@ -1,5 +1,5 @@
# frozen_string_literal: true
module Notare
VERSION = "0.0.4"
VERSION = "0.0.7"
end

View File

@@ -18,9 +18,11 @@ module Notare
right: "right"
}.freeze
def initialize(styles, table_styles = {})
def initialize(styles, table_styles = {}, default_font: nil, default_size: nil)
@styles = styles
@table_styles = table_styles
@default_font = default_font
@default_size = default_size
end
def to_xml
@@ -28,6 +30,8 @@ module Notare
xml.styles("xmlns:w" => NAMESPACE) do
xml.parent.namespace = xml.parent.namespace_definitions.find { |ns| ns.prefix == "w" }
render_doc_defaults(xml) if @default_font || @default_size
@styles.each_value do |style|
render_style(xml, style)
end
@@ -44,6 +48,28 @@ module Notare
private
def render_doc_defaults(xml)
xml["w"].docDefaults do
xml["w"].rPrDefault do
xml["w"].rPr do
if @default_font
xml["w"].rFonts(
"w:ascii" => @default_font,
"w:hAnsi" => @default_font,
"w:eastAsia" => @default_font,
"w:cs" => @default_font
)
end
if @default_size
half_points = (@default_size * 2).to_i
xml["w"].sz("w:val" => half_points.to_s)
xml["w"].szCs("w:val" => half_points.to_s)
end
end
end
end
end
def render_style(xml, style)
style_type = style.paragraph_properties? ? "paragraph" : "character"

View File

@@ -0,0 +1,15 @@
# frozen_string_literal: true
module Notare
module XmlSanitizer
# Invalid XML 1.0 characters: 0x00, 0x01-0x08, 0x0B-0x0C, 0x0E-0x1F
# Valid whitespace preserved: 0x09 (tab), 0x0A (LF), 0x0D (CR)
INVALID_XML_CHARS = /[\x00-\x08\x0B\x0C\x0E-\x1F]/
def self.sanitize(text)
return text unless text.is_a?(String)
text.gsub(INVALID_XML_CHARS, "")
end
end
end

View File

@@ -69,6 +69,6 @@ class DocumentTest < Minitest::Test
doc.table { doc.tr { doc.td "Cell" } }
assert_equal 2, doc.lists.count
assert(doc.lists.all? { |l| l.is_a?(Notare::Nodes::List) })
assert(doc.lists.all?(Notare::Nodes::List))
end
end

View File

@@ -111,4 +111,21 @@ class ParagraphTest < Minitest::Test
# Newlines should be preserved in the text
assert_includes xml, "Line 1\nLine 2\nLine 3"
end
def test_invalid_xml_characters_are_stripped
xml = create_doc_and_read_xml do |doc|
doc.p "infrastruktur\x02bidrag"
doc.p "hello\x00world"
doc.p "test\x01\x03\x04value"
end
# Invalid characters should be stripped
assert_includes xml, "infrastrukturbidrag"
assert_includes xml, "helloworld"
assert_includes xml, "testvalue"
# Verify the XML is valid by parsing it (will raise if invalid)
doc = Nokogiri::XML(xml, &:strict)
assert doc.errors.empty?, "XML should be valid: #{doc.errors}"
end
end

View File

@@ -0,0 +1,73 @@
# frozen_string_literal: true
require "test_helper"
class XmlSanitizerTest < Minitest::Test
def test_removes_null_character
assert_equal "hello", Notare::XmlSanitizer.sanitize("hel\x00lo")
end
def test_removes_control_characters_0x01_to_0x08
input = "a\x01b\x02c\x03d\x04e\x05f\x06g\x07h\x08i"
assert_equal "abcdefghi", Notare::XmlSanitizer.sanitize(input)
end
def test_removes_control_characters_0x0b_and_0x0c
input = "hello\x0Bworld\x0Ctest"
assert_equal "helloworldtest", Notare::XmlSanitizer.sanitize(input)
end
def test_removes_control_characters_0x0e_to_0x1f
input = "a\x0Eb\x0Fc\x10d\x11e\x1Ff"
assert_equal "abcdef", Notare::XmlSanitizer.sanitize(input)
end
def test_preserves_tab_character
input = "hello\tworld"
assert_equal "hello\tworld", Notare::XmlSanitizer.sanitize(input)
end
def test_preserves_newline_character
input = "hello\nworld"
assert_equal "hello\nworld", Notare::XmlSanitizer.sanitize(input)
end
def test_preserves_carriage_return_character
input = "hello\rworld"
assert_equal "hello\rworld", Notare::XmlSanitizer.sanitize(input)
end
def test_preserves_crlf
input = "hello\r\nworld"
assert_equal "hello\r\nworld", Notare::XmlSanitizer.sanitize(input)
end
def test_returns_nil_unchanged
assert_nil Notare::XmlSanitizer.sanitize(nil)
end
def test_returns_non_string_unchanged
assert_equal 123, Notare::XmlSanitizer.sanitize(123)
assert_equal :symbol, Notare::XmlSanitizer.sanitize(:symbol)
end
def test_preserves_unicode_characters
input = "café naïve 日本語 🎉"
assert_equal "café naïve 日本語 🎉", Notare::XmlSanitizer.sanitize(input)
end
def test_preserves_regular_text
input = "Hello, World! This is normal text."
assert_equal input, Notare::XmlSanitizer.sanitize(input)
end
def test_handles_empty_string
assert_equal "", Notare::XmlSanitizer.sanitize("")
end
def test_real_world_case_stx_character
# The actual case from the failed.docx: 0x02 (STX) character
input = "infrastruktur\x02bidrag"
assert_equal "infrastrukturbidrag", Notare::XmlSanitizer.sanitize(input)
end
end