Compare commits
5 Commits
feature/ta
...
feature/de
| Author | SHA1 | Date | |
|---|---|---|---|
| e937552913 | |||
| 26e0d59cf1 | |||
| 64c8679044 | |||
| 8b4f538cbb | |||
| bc69880c9b |
@@ -3,6 +3,7 @@
|
||||
require "nokogiri"
|
||||
|
||||
require_relative "notare/version"
|
||||
require_relative "notare/xml_sanitizer"
|
||||
require_relative "notare/nodes/base"
|
||||
require_relative "notare/nodes/break"
|
||||
require_relative "notare/nodes/hyperlink"
|
||||
|
||||
@@ -4,16 +4,21 @@ module Notare
|
||||
class Document
|
||||
include Builder
|
||||
|
||||
attr_reader :nodes, :styles, :table_styles, :hyperlinks
|
||||
DEFAULT_FONT = "Arial"
|
||||
DEFAULT_SIZE = 12
|
||||
|
||||
def self.create(path, &block)
|
||||
doc = new
|
||||
attr_reader :nodes, :styles, :table_styles, :hyperlinks, :default_font, :default_size
|
||||
|
||||
def self.create(path, default_font: DEFAULT_FONT, default_size: DEFAULT_SIZE, &block)
|
||||
doc = new(default_font: default_font, default_size: default_size)
|
||||
block.call(doc)
|
||||
doc.save(path)
|
||||
doc
|
||||
end
|
||||
|
||||
def initialize
|
||||
def initialize(default_font: DEFAULT_FONT, default_size: DEFAULT_SIZE)
|
||||
@default_font = default_font
|
||||
@default_size = default_size
|
||||
@nodes = []
|
||||
@format_stack = []
|
||||
@current_target = nil
|
||||
@@ -51,7 +56,7 @@ module Notare
|
||||
end
|
||||
|
||||
def lists
|
||||
@nodes.select { |n| n.is_a?(Nodes::List) }
|
||||
@nodes.grep(Nodes::List)
|
||||
end
|
||||
|
||||
def uses_lists?
|
||||
@@ -101,12 +106,12 @@ module Notare
|
||||
|
||||
def register_built_in_styles
|
||||
# Headings (spacing_before ensures they're rendered as paragraph styles)
|
||||
define_style :heading1, size: 24, bold: true, spacing_before: 240, spacing_after: 120
|
||||
define_style :heading2, size: 18, bold: true, spacing_before: 200, spacing_after: 100
|
||||
define_style :heading1, size: 20, bold: true, spacing_before: 240, spacing_after: 120
|
||||
define_style :heading2, size: 16, bold: true, spacing_before: 200, spacing_after: 100
|
||||
define_style :heading3, size: 14, bold: true, spacing_before: 160, spacing_after: 80
|
||||
define_style :heading4, size: 12, bold: true, spacing_before: 120, spacing_after: 60
|
||||
define_style :heading5, size: 11, bold: true, italic: true, spacing_before: 100, spacing_after: 40
|
||||
define_style :heading6, size: 10, bold: true, italic: true, spacing_before: 80, spacing_after: 40
|
||||
define_style :heading5, size: 12, bold: true, italic: true, spacing_before: 100, spacing_after: 40
|
||||
define_style :heading6, size: 12, italic: true, spacing_before: 80, spacing_after: 40
|
||||
|
||||
# Other built-in styles
|
||||
define_style :title, size: 26, bold: true, align: :center
|
||||
|
||||
@@ -8,7 +8,7 @@ module Notare
|
||||
def initialize(text, bold: false, italic: false, underline: false,
|
||||
strike: false, highlight: nil, color: nil, style: nil)
|
||||
super()
|
||||
@text = text
|
||||
@text = XmlSanitizer.sanitize(text)
|
||||
@bold = bold
|
||||
@italic = italic
|
||||
@underline = underline
|
||||
|
||||
@@ -59,7 +59,12 @@ module Notare
|
||||
end
|
||||
|
||||
def styles_xml
|
||||
Xml::StylesXml.new(@document.styles, @document.table_styles).to_xml
|
||||
Xml::StylesXml.new(
|
||||
@document.styles,
|
||||
@document.table_styles,
|
||||
default_font: @document.default_font,
|
||||
default_size: @document.default_size
|
||||
).to_xml
|
||||
end
|
||||
|
||||
def numbering_xml
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Notare
|
||||
VERSION = "0.0.4"
|
||||
VERSION = "0.0.7"
|
||||
end
|
||||
|
||||
@@ -18,9 +18,11 @@ module Notare
|
||||
right: "right"
|
||||
}.freeze
|
||||
|
||||
def initialize(styles, table_styles = {})
|
||||
def initialize(styles, table_styles = {}, default_font: nil, default_size: nil)
|
||||
@styles = styles
|
||||
@table_styles = table_styles
|
||||
@default_font = default_font
|
||||
@default_size = default_size
|
||||
end
|
||||
|
||||
def to_xml
|
||||
@@ -28,6 +30,8 @@ module Notare
|
||||
xml.styles("xmlns:w" => NAMESPACE) do
|
||||
xml.parent.namespace = xml.parent.namespace_definitions.find { |ns| ns.prefix == "w" }
|
||||
|
||||
render_doc_defaults(xml) if @default_font || @default_size
|
||||
|
||||
@styles.each_value do |style|
|
||||
render_style(xml, style)
|
||||
end
|
||||
@@ -44,6 +48,28 @@ module Notare
|
||||
|
||||
private
|
||||
|
||||
def render_doc_defaults(xml)
|
||||
xml["w"].docDefaults do
|
||||
xml["w"].rPrDefault do
|
||||
xml["w"].rPr do
|
||||
if @default_font
|
||||
xml["w"].rFonts(
|
||||
"w:ascii" => @default_font,
|
||||
"w:hAnsi" => @default_font,
|
||||
"w:eastAsia" => @default_font,
|
||||
"w:cs" => @default_font
|
||||
)
|
||||
end
|
||||
if @default_size
|
||||
half_points = (@default_size * 2).to_i
|
||||
xml["w"].sz("w:val" => half_points.to_s)
|
||||
xml["w"].szCs("w:val" => half_points.to_s)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def render_style(xml, style)
|
||||
style_type = style.paragraph_properties? ? "paragraph" : "character"
|
||||
|
||||
|
||||
15
lib/notare/xml_sanitizer.rb
Normal file
15
lib/notare/xml_sanitizer.rb
Normal file
@@ -0,0 +1,15 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Notare
|
||||
module XmlSanitizer
|
||||
# Invalid XML 1.0 characters: 0x00, 0x01-0x08, 0x0B-0x0C, 0x0E-0x1F
|
||||
# Valid whitespace preserved: 0x09 (tab), 0x0A (LF), 0x0D (CR)
|
||||
INVALID_XML_CHARS = /[\x00-\x08\x0B\x0C\x0E-\x1F]/
|
||||
|
||||
def self.sanitize(text)
|
||||
return text unless text.is_a?(String)
|
||||
|
||||
text.gsub(INVALID_XML_CHARS, "")
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -69,6 +69,6 @@ class DocumentTest < Minitest::Test
|
||||
doc.table { doc.tr { doc.td "Cell" } }
|
||||
|
||||
assert_equal 2, doc.lists.count
|
||||
assert(doc.lists.all? { |l| l.is_a?(Notare::Nodes::List) })
|
||||
assert(doc.lists.all?(Notare::Nodes::List))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -111,4 +111,21 @@ class ParagraphTest < Minitest::Test
|
||||
# Newlines should be preserved in the text
|
||||
assert_includes xml, "Line 1\nLine 2\nLine 3"
|
||||
end
|
||||
|
||||
def test_invalid_xml_characters_are_stripped
|
||||
xml = create_doc_and_read_xml do |doc|
|
||||
doc.p "infrastruktur\x02bidrag"
|
||||
doc.p "hello\x00world"
|
||||
doc.p "test\x01\x03\x04value"
|
||||
end
|
||||
|
||||
# Invalid characters should be stripped
|
||||
assert_includes xml, "infrastrukturbidrag"
|
||||
assert_includes xml, "helloworld"
|
||||
assert_includes xml, "testvalue"
|
||||
|
||||
# Verify the XML is valid by parsing it (will raise if invalid)
|
||||
doc = Nokogiri::XML(xml, &:strict)
|
||||
assert doc.errors.empty?, "XML should be valid: #{doc.errors}"
|
||||
end
|
||||
end
|
||||
|
||||
73
test/xml_sanitizer_test.rb
Normal file
73
test/xml_sanitizer_test.rb
Normal file
@@ -0,0 +1,73 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
require "test_helper"
|
||||
|
||||
class XmlSanitizerTest < Minitest::Test
|
||||
def test_removes_null_character
|
||||
assert_equal "hello", Notare::XmlSanitizer.sanitize("hel\x00lo")
|
||||
end
|
||||
|
||||
def test_removes_control_characters_0x01_to_0x08
|
||||
input = "a\x01b\x02c\x03d\x04e\x05f\x06g\x07h\x08i"
|
||||
assert_equal "abcdefghi", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_removes_control_characters_0x0b_and_0x0c
|
||||
input = "hello\x0Bworld\x0Ctest"
|
||||
assert_equal "helloworldtest", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_removes_control_characters_0x0e_to_0x1f
|
||||
input = "a\x0Eb\x0Fc\x10d\x11e\x1Ff"
|
||||
assert_equal "abcdef", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_preserves_tab_character
|
||||
input = "hello\tworld"
|
||||
assert_equal "hello\tworld", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_preserves_newline_character
|
||||
input = "hello\nworld"
|
||||
assert_equal "hello\nworld", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_preserves_carriage_return_character
|
||||
input = "hello\rworld"
|
||||
assert_equal "hello\rworld", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_preserves_crlf
|
||||
input = "hello\r\nworld"
|
||||
assert_equal "hello\r\nworld", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_returns_nil_unchanged
|
||||
assert_nil Notare::XmlSanitizer.sanitize(nil)
|
||||
end
|
||||
|
||||
def test_returns_non_string_unchanged
|
||||
assert_equal 123, Notare::XmlSanitizer.sanitize(123)
|
||||
assert_equal :symbol, Notare::XmlSanitizer.sanitize(:symbol)
|
||||
end
|
||||
|
||||
def test_preserves_unicode_characters
|
||||
input = "café naïve 日本語 🎉"
|
||||
assert_equal "café naïve 日本語 🎉", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_preserves_regular_text
|
||||
input = "Hello, World! This is normal text."
|
||||
assert_equal input, Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
|
||||
def test_handles_empty_string
|
||||
assert_equal "", Notare::XmlSanitizer.sanitize("")
|
||||
end
|
||||
|
||||
def test_real_world_case_stx_character
|
||||
# The actual case from the failed.docx: 0x02 (STX) character
|
||||
input = "infrastruktur\x02bidrag"
|
||||
assert_equal "infrastrukturbidrag", Notare::XmlSanitizer.sanitize(input)
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user