2025-10-24 23:34:05 +03:00
|
|
|
package xmlparser
|
2025-10-14 19:40:21 +03:00
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"bufio"
|
|
|
|
|
"errors"
|
|
|
|
|
"fmt"
|
|
|
|
|
"io"
|
2025-11-25 22:34:02 +03:00
|
|
|
"iter"
|
2025-11-25 22:12:43 +03:00
|
|
|
"slices"
|
2025-10-21 20:52:16 +03:00
|
|
|
"strings"
|
2025-10-14 19:40:21 +03:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
type Node struct {
|
|
|
|
|
Parent *Node
|
2025-10-21 20:52:16 +03:00
|
|
|
Name Name
|
2025-11-21 21:31:12 +03:00
|
|
|
Attrs *Attributes
|
2025-10-14 19:40:21 +03:00
|
|
|
Children []any
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-25 22:12:43 +03:00
|
|
|
// FilterChildren removes all child nodes that do not satisfy the given predicate function.
|
|
|
|
|
func (n *Node) FilterChildren(pred func(child any) bool) {
|
|
|
|
|
n.Children = slices.DeleteFunc(n.Children, func(child any) bool {
|
|
|
|
|
return !pred(child)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-25 22:34:02 +03:00
|
|
|
// ChildNodes returns an iterator over children of type *Node.
|
|
|
|
|
func (n *Node) ChildNodes() iter.Seq[*Node] {
|
|
|
|
|
return func(yield func(*Node) bool) {
|
|
|
|
|
for _, child := range n.Children {
|
|
|
|
|
cn, ok := child.(*Node)
|
|
|
|
|
if !ok {
|
|
|
|
|
continue
|
|
|
|
|
}
|
|
|
|
|
if !yield(cn) {
|
|
|
|
|
return
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-25 22:12:43 +03:00
|
|
|
// FilterChildNodes removes all child nodes of type *Node
|
|
|
|
|
// that do not satisfy the given predicate function.
|
|
|
|
|
func (n *Node) FilterChildNodes(pred func(child *Node) bool) {
|
|
|
|
|
n.Children = slices.DeleteFunc(n.Children, func(child any) bool {
|
|
|
|
|
cn, ok := child.(*Node)
|
|
|
|
|
if !ok {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
return !pred(cn)
|
|
|
|
|
})
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-21 20:52:16 +03:00
|
|
|
func (n *Node) readFrom(r io.Reader) error {
|
2025-10-14 19:40:21 +03:00
|
|
|
if n.Parent != nil {
|
|
|
|
|
return errors.New("cannot read child node")
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-21 20:52:16 +03:00
|
|
|
dec := NewDecoder(r)
|
|
|
|
|
defer dec.Close()
|
2025-10-14 19:40:21 +03:00
|
|
|
|
|
|
|
|
curNode := n
|
|
|
|
|
|
|
|
|
|
for {
|
|
|
|
|
// Read raw token so decoder doesn't mess with attributes and namespaces.
|
2025-10-21 20:52:16 +03:00
|
|
|
tok, err := dec.Token()
|
2025-10-14 19:40:21 +03:00
|
|
|
if err == io.EOF {
|
|
|
|
|
break
|
|
|
|
|
}
|
|
|
|
|
if err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch t := tok.(type) {
|
2025-11-14 18:37:50 +03:00
|
|
|
case *StartElement:
|
2025-10-14 19:40:21 +03:00
|
|
|
// An element is opened, create a node for it
|
|
|
|
|
el := &Node{
|
|
|
|
|
Parent: curNode,
|
|
|
|
|
Name: t.Name,
|
|
|
|
|
Attrs: t.Attr,
|
|
|
|
|
}
|
|
|
|
|
// Append the node to the current node's children and make it current
|
|
|
|
|
curNode.Children = append(curNode.Children, el)
|
2025-10-24 00:03:18 +03:00
|
|
|
|
|
|
|
|
if !t.SelfClosing {
|
|
|
|
|
curNode = el
|
|
|
|
|
}
|
2025-10-14 19:40:21 +03:00
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *EndElement:
|
2025-10-14 19:40:21 +03:00
|
|
|
// If the current node has no parent, then we are at the root,
|
|
|
|
|
// which can't be closed.
|
|
|
|
|
if curNode.Parent == nil {
|
|
|
|
|
return fmt.Errorf(
|
|
|
|
|
"malformed XML: unexpected closing tag </%s> while no elements are opened",
|
2025-11-21 21:31:12 +03:00
|
|
|
t.Name,
|
2025-10-14 19:40:21 +03:00
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
// Closing tag name should match opened node name (which is current)
|
2025-11-21 21:31:12 +03:00
|
|
|
if curNode.Name != t.Name {
|
2025-10-14 19:40:21 +03:00
|
|
|
return fmt.Errorf(
|
|
|
|
|
"malformed XML: unexpected closing tag </%s> for opened <%s> element",
|
2025-11-21 21:31:12 +03:00
|
|
|
t.Name,
|
|
|
|
|
curNode.Name,
|
2025-10-14 19:40:21 +03:00
|
|
|
)
|
|
|
|
|
}
|
|
|
|
|
// The node is closed, return to its parent
|
|
|
|
|
curNode = curNode.Parent
|
|
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *Text:
|
2025-10-21 20:52:16 +03:00
|
|
|
curNode.Children = append(curNode.Children, t.Clone())
|
2025-10-14 19:40:21 +03:00
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *Directive:
|
2025-10-21 20:52:16 +03:00
|
|
|
curNode.Children = append(curNode.Children, t.Clone())
|
2025-10-14 19:40:21 +03:00
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *Comment:
|
2025-10-21 20:52:16 +03:00
|
|
|
curNode.Children = append(curNode.Children, t.Clone())
|
2025-10-14 19:40:21 +03:00
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *ProcInst:
|
2025-10-21 20:52:16 +03:00
|
|
|
curNode.Children = append(curNode.Children, t.Clone())
|
2025-10-14 19:40:21 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n *Node) writeTo(w *bufio.Writer) error {
|
|
|
|
|
if err := w.WriteByte('<'); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-11-21 21:31:12 +03:00
|
|
|
if _, err := w.WriteString(n.Name.String()); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
n.writeAttrsTo(w)
|
|
|
|
|
|
|
|
|
|
if len(n.Children) == 0 {
|
|
|
|
|
if _, err := w.WriteString("/>"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := w.WriteByte('>'); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := n.writeChildrenTo(w); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if _, err := w.WriteString("</"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-11-21 21:31:12 +03:00
|
|
|
if _, err := w.WriteString(n.Name.String()); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if err := w.WriteByte('>'); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n *Node) writeAttrsTo(w *bufio.Writer) error {
|
2025-11-21 21:31:12 +03:00
|
|
|
for attr := range n.Attrs.Iter() {
|
2025-10-14 19:40:21 +03:00
|
|
|
if err := w.WriteByte(' '); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-11-21 21:31:12 +03:00
|
|
|
if _, err := w.WriteString(attr.Name.String()); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
2025-10-21 20:52:16 +03:00
|
|
|
if len(attr.Value) > 0 {
|
|
|
|
|
quote := byte('"')
|
|
|
|
|
if strings.IndexByte(attr.Value, quote) != -1 {
|
|
|
|
|
quote = '\''
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if err := w.WriteByte('='); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if err := w.WriteByte(quote); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-10-14 19:40:21 +03:00
|
|
|
if _, err := w.WriteString(attr.Value); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-10-21 20:52:16 +03:00
|
|
|
if err := w.WriteByte(quote); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func (n *Node) writeChildrenTo(w *bufio.Writer) error {
|
|
|
|
|
for _, child := range n.Children {
|
|
|
|
|
switch c := child.(type) {
|
|
|
|
|
case *Node:
|
|
|
|
|
if err := c.writeTo(w); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *Text:
|
2025-11-15 19:23:35 +03:00
|
|
|
if c.CData {
|
|
|
|
|
if _, err := w.WriteString("<![CDATA["); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-10-21 20:52:16 +03:00
|
|
|
}
|
2025-11-14 18:37:50 +03:00
|
|
|
if _, err := w.Write(c.Data); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
2025-11-15 19:23:35 +03:00
|
|
|
if c.CData {
|
|
|
|
|
if _, err := w.WriteString("]]>"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-10-21 20:52:16 +03:00
|
|
|
}
|
2025-10-14 19:40:21 +03:00
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *Comment:
|
2025-10-14 19:40:21 +03:00
|
|
|
if _, err := w.WriteString("<!--"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-11-14 18:37:50 +03:00
|
|
|
if _, err := w.Write(c.Data); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if _, err := w.WriteString("-->"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *Directive:
|
2025-10-21 20:52:16 +03:00
|
|
|
if _, err := w.WriteString("<!DOCTYPE"); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
2025-11-14 18:37:50 +03:00
|
|
|
if _, err := w.Write(c.Data); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if err := w.WriteByte('>'); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 18:37:50 +03:00
|
|
|
case *ProcInst:
|
2025-10-14 19:40:21 +03:00
|
|
|
if _, err := w.WriteString("<?"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-10-23 20:37:58 +03:00
|
|
|
if _, err := w.Write(c.Target); err != nil {
|
2025-10-14 19:40:21 +03:00
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
if len(c.Inst) > 0 {
|
2025-10-21 20:52:16 +03:00
|
|
|
if !isSpace(c.Inst[0]) {
|
|
|
|
|
if err := w.WriteByte(' '); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
2025-10-14 19:40:21 +03:00
|
|
|
}
|
|
|
|
|
if _, err := w.Write([]byte(c.Inst)); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if _, err := w.WriteString("?>"); err != nil {
|
|
|
|
|
return err
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
return fmt.Errorf("unknown child type: %T", c)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-14 19:53:21 +03:00
|
|
|
func (n *Node) replaceEntities(em map[string][]byte) {
|
|
|
|
|
// Replace in attributes
|
2025-11-21 21:31:12 +03:00
|
|
|
for attr := range n.Attrs.Iter() {
|
|
|
|
|
attr.Value = replaceEntitiesString(attr.Value, em)
|
2025-11-14 19:53:21 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Replace in children.
|
|
|
|
|
// Only Text nodes are processed, other Node children
|
|
|
|
|
// are processed recursively.
|
|
|
|
|
for _, child := range n.Children {
|
|
|
|
|
switch c := child.(type) {
|
|
|
|
|
case *Node:
|
|
|
|
|
c.replaceEntities(em)
|
|
|
|
|
|
|
|
|
|
case *Text:
|
2025-11-15 19:23:35 +03:00
|
|
|
if !c.CData {
|
|
|
|
|
c.Data = replaceEntitiesBytes(c.Data, em)
|
|
|
|
|
}
|
2025-11-14 19:53:21 +03:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|