Compare commits
4 Commits
bc69880c9b
...
feature/de
| Author | SHA1 | Date | |
|---|---|---|---|
| e937552913 | |||
| 26e0d59cf1 | |||
| 64c8679044 | |||
| 8b4f538cbb |
@@ -3,6 +3,7 @@
|
|||||||
require "nokogiri"
|
require "nokogiri"
|
||||||
|
|
||||||
require_relative "notare/version"
|
require_relative "notare/version"
|
||||||
|
require_relative "notare/xml_sanitizer"
|
||||||
require_relative "notare/nodes/base"
|
require_relative "notare/nodes/base"
|
||||||
require_relative "notare/nodes/break"
|
require_relative "notare/nodes/break"
|
||||||
require_relative "notare/nodes/hyperlink"
|
require_relative "notare/nodes/hyperlink"
|
||||||
|
|||||||
@@ -4,16 +4,21 @@ module Notare
|
|||||||
class Document
|
class Document
|
||||||
include Builder
|
include Builder
|
||||||
|
|
||||||
attr_reader :nodes, :styles, :table_styles, :hyperlinks
|
DEFAULT_FONT = "Arial"
|
||||||
|
DEFAULT_SIZE = 12
|
||||||
|
|
||||||
def self.create(path, &block)
|
attr_reader :nodes, :styles, :table_styles, :hyperlinks, :default_font, :default_size
|
||||||
doc = new
|
|
||||||
|
def self.create(path, default_font: DEFAULT_FONT, default_size: DEFAULT_SIZE, &block)
|
||||||
|
doc = new(default_font: default_font, default_size: default_size)
|
||||||
block.call(doc)
|
block.call(doc)
|
||||||
doc.save(path)
|
doc.save(path)
|
||||||
doc
|
doc
|
||||||
end
|
end
|
||||||
|
|
||||||
def initialize
|
def initialize(default_font: DEFAULT_FONT, default_size: DEFAULT_SIZE)
|
||||||
|
@default_font = default_font
|
||||||
|
@default_size = default_size
|
||||||
@nodes = []
|
@nodes = []
|
||||||
@format_stack = []
|
@format_stack = []
|
||||||
@current_target = nil
|
@current_target = nil
|
||||||
@@ -51,7 +56,7 @@ module Notare
|
|||||||
end
|
end
|
||||||
|
|
||||||
def lists
|
def lists
|
||||||
@nodes.select { |n| n.is_a?(Nodes::List) }
|
@nodes.grep(Nodes::List)
|
||||||
end
|
end
|
||||||
|
|
||||||
def uses_lists?
|
def uses_lists?
|
||||||
@@ -101,12 +106,12 @@ module Notare
|
|||||||
|
|
||||||
def register_built_in_styles
|
def register_built_in_styles
|
||||||
# Headings (spacing_before ensures they're rendered as paragraph styles)
|
# Headings (spacing_before ensures they're rendered as paragraph styles)
|
||||||
define_style :heading1, size: 24, bold: true, spacing_before: 240, spacing_after: 120
|
define_style :heading1, size: 20, bold: true, spacing_before: 240, spacing_after: 120
|
||||||
define_style :heading2, size: 18, bold: true, spacing_before: 200, spacing_after: 100
|
define_style :heading2, size: 16, bold: true, spacing_before: 200, spacing_after: 100
|
||||||
define_style :heading3, size: 14, bold: true, spacing_before: 160, spacing_after: 80
|
define_style :heading3, size: 14, bold: true, spacing_before: 160, spacing_after: 80
|
||||||
define_style :heading4, size: 12, bold: true, spacing_before: 120, spacing_after: 60
|
define_style :heading4, size: 12, bold: true, spacing_before: 120, spacing_after: 60
|
||||||
define_style :heading5, size: 11, bold: true, italic: true, spacing_before: 100, spacing_after: 40
|
define_style :heading5, size: 12, bold: true, italic: true, spacing_before: 100, spacing_after: 40
|
||||||
define_style :heading6, size: 10, bold: true, italic: true, spacing_before: 80, spacing_after: 40
|
define_style :heading6, size: 12, italic: true, spacing_before: 80, spacing_after: 40
|
||||||
|
|
||||||
# Other built-in styles
|
# Other built-in styles
|
||||||
define_style :title, size: 26, bold: true, align: :center
|
define_style :title, size: 26, bold: true, align: :center
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ module Notare
|
|||||||
def initialize(text, bold: false, italic: false, underline: false,
|
def initialize(text, bold: false, italic: false, underline: false,
|
||||||
strike: false, highlight: nil, color: nil, style: nil)
|
strike: false, highlight: nil, color: nil, style: nil)
|
||||||
super()
|
super()
|
||||||
@text = text
|
@text = XmlSanitizer.sanitize(text)
|
||||||
@bold = bold
|
@bold = bold
|
||||||
@italic = italic
|
@italic = italic
|
||||||
@underline = underline
|
@underline = underline
|
||||||
|
|||||||
@@ -59,7 +59,12 @@ module Notare
|
|||||||
end
|
end
|
||||||
|
|
||||||
def styles_xml
|
def styles_xml
|
||||||
Xml::StylesXml.new(@document.styles, @document.table_styles).to_xml
|
Xml::StylesXml.new(
|
||||||
|
@document.styles,
|
||||||
|
@document.table_styles,
|
||||||
|
default_font: @document.default_font,
|
||||||
|
default_size: @document.default_size
|
||||||
|
).to_xml
|
||||||
end
|
end
|
||||||
|
|
||||||
def numbering_xml
|
def numbering_xml
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module Notare
|
module Notare
|
||||||
VERSION = "0.0.4"
|
VERSION = "0.0.7"
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -18,9 +18,11 @@ module Notare
|
|||||||
right: "right"
|
right: "right"
|
||||||
}.freeze
|
}.freeze
|
||||||
|
|
||||||
def initialize(styles, table_styles = {})
|
def initialize(styles, table_styles = {}, default_font: nil, default_size: nil)
|
||||||
@styles = styles
|
@styles = styles
|
||||||
@table_styles = table_styles
|
@table_styles = table_styles
|
||||||
|
@default_font = default_font
|
||||||
|
@default_size = default_size
|
||||||
end
|
end
|
||||||
|
|
||||||
def to_xml
|
def to_xml
|
||||||
@@ -28,6 +30,8 @@ module Notare
|
|||||||
xml.styles("xmlns:w" => NAMESPACE) do
|
xml.styles("xmlns:w" => NAMESPACE) do
|
||||||
xml.parent.namespace = xml.parent.namespace_definitions.find { |ns| ns.prefix == "w" }
|
xml.parent.namespace = xml.parent.namespace_definitions.find { |ns| ns.prefix == "w" }
|
||||||
|
|
||||||
|
render_doc_defaults(xml) if @default_font || @default_size
|
||||||
|
|
||||||
@styles.each_value do |style|
|
@styles.each_value do |style|
|
||||||
render_style(xml, style)
|
render_style(xml, style)
|
||||||
end
|
end
|
||||||
@@ -44,6 +48,28 @@ module Notare
|
|||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
def render_doc_defaults(xml)
|
||||||
|
xml["w"].docDefaults do
|
||||||
|
xml["w"].rPrDefault do
|
||||||
|
xml["w"].rPr do
|
||||||
|
if @default_font
|
||||||
|
xml["w"].rFonts(
|
||||||
|
"w:ascii" => @default_font,
|
||||||
|
"w:hAnsi" => @default_font,
|
||||||
|
"w:eastAsia" => @default_font,
|
||||||
|
"w:cs" => @default_font
|
||||||
|
)
|
||||||
|
end
|
||||||
|
if @default_size
|
||||||
|
half_points = (@default_size * 2).to_i
|
||||||
|
xml["w"].sz("w:val" => half_points.to_s)
|
||||||
|
xml["w"].szCs("w:val" => half_points.to_s)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def render_style(xml, style)
|
def render_style(xml, style)
|
||||||
style_type = style.paragraph_properties? ? "paragraph" : "character"
|
style_type = style.paragraph_properties? ? "paragraph" : "character"
|
||||||
|
|
||||||
|
|||||||
15
lib/notare/xml_sanitizer.rb
Normal file
15
lib/notare/xml_sanitizer.rb
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
module Notare
|
||||||
|
module XmlSanitizer
|
||||||
|
# Invalid XML 1.0 characters: 0x00, 0x01-0x08, 0x0B-0x0C, 0x0E-0x1F
|
||||||
|
# Valid whitespace preserved: 0x09 (tab), 0x0A (LF), 0x0D (CR)
|
||||||
|
INVALID_XML_CHARS = /[\x00-\x08\x0B\x0C\x0E-\x1F]/
|
||||||
|
|
||||||
|
def self.sanitize(text)
|
||||||
|
return text unless text.is_a?(String)
|
||||||
|
|
||||||
|
text.gsub(INVALID_XML_CHARS, "")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -69,6 +69,6 @@ class DocumentTest < Minitest::Test
|
|||||||
doc.table { doc.tr { doc.td "Cell" } }
|
doc.table { doc.tr { doc.td "Cell" } }
|
||||||
|
|
||||||
assert_equal 2, doc.lists.count
|
assert_equal 2, doc.lists.count
|
||||||
assert(doc.lists.all? { |l| l.is_a?(Notare::Nodes::List) })
|
assert(doc.lists.all?(Notare::Nodes::List))
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -111,4 +111,21 @@ class ParagraphTest < Minitest::Test
|
|||||||
# Newlines should be preserved in the text
|
# Newlines should be preserved in the text
|
||||||
assert_includes xml, "Line 1\nLine 2\nLine 3"
|
assert_includes xml, "Line 1\nLine 2\nLine 3"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_invalid_xml_characters_are_stripped
|
||||||
|
xml = create_doc_and_read_xml do |doc|
|
||||||
|
doc.p "infrastruktur\x02bidrag"
|
||||||
|
doc.p "hello\x00world"
|
||||||
|
doc.p "test\x01\x03\x04value"
|
||||||
|
end
|
||||||
|
|
||||||
|
# Invalid characters should be stripped
|
||||||
|
assert_includes xml, "infrastrukturbidrag"
|
||||||
|
assert_includes xml, "helloworld"
|
||||||
|
assert_includes xml, "testvalue"
|
||||||
|
|
||||||
|
# Verify the XML is valid by parsing it (will raise if invalid)
|
||||||
|
doc = Nokogiri::XML(xml, &:strict)
|
||||||
|
assert doc.errors.empty?, "XML should be valid: #{doc.errors}"
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
73
test/xml_sanitizer_test.rb
Normal file
73
test/xml_sanitizer_test.rb
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require "test_helper"
|
||||||
|
|
||||||
|
class XmlSanitizerTest < Minitest::Test
|
||||||
|
def test_removes_null_character
|
||||||
|
assert_equal "hello", Notare::XmlSanitizer.sanitize("hel\x00lo")
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_removes_control_characters_0x01_to_0x08
|
||||||
|
input = "a\x01b\x02c\x03d\x04e\x05f\x06g\x07h\x08i"
|
||||||
|
assert_equal "abcdefghi", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_removes_control_characters_0x0b_and_0x0c
|
||||||
|
input = "hello\x0Bworld\x0Ctest"
|
||||||
|
assert_equal "helloworldtest", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_removes_control_characters_0x0e_to_0x1f
|
||||||
|
input = "a\x0Eb\x0Fc\x10d\x11e\x1Ff"
|
||||||
|
assert_equal "abcdef", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_preserves_tab_character
|
||||||
|
input = "hello\tworld"
|
||||||
|
assert_equal "hello\tworld", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_preserves_newline_character
|
||||||
|
input = "hello\nworld"
|
||||||
|
assert_equal "hello\nworld", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_preserves_carriage_return_character
|
||||||
|
input = "hello\rworld"
|
||||||
|
assert_equal "hello\rworld", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_preserves_crlf
|
||||||
|
input = "hello\r\nworld"
|
||||||
|
assert_equal "hello\r\nworld", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_returns_nil_unchanged
|
||||||
|
assert_nil Notare::XmlSanitizer.sanitize(nil)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_returns_non_string_unchanged
|
||||||
|
assert_equal 123, Notare::XmlSanitizer.sanitize(123)
|
||||||
|
assert_equal :symbol, Notare::XmlSanitizer.sanitize(:symbol)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_preserves_unicode_characters
|
||||||
|
input = "café naïve 日本語 🎉"
|
||||||
|
assert_equal "café naïve 日本語 🎉", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_preserves_regular_text
|
||||||
|
input = "Hello, World! This is normal text."
|
||||||
|
assert_equal input, Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_handles_empty_string
|
||||||
|
assert_equal "", Notare::XmlSanitizer.sanitize("")
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_real_world_case_stx_character
|
||||||
|
# The actual case from the failed.docx: 0x02 (STX) character
|
||||||
|
input = "infrastruktur\x02bidrag"
|
||||||
|
assert_equal "infrastrukturbidrag", Notare::XmlSanitizer.sanitize(input)
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user