Skip to content

Commit 0d489e9

Browse files
committed
a less buggy solution for avoiding <pre>
1 parent 00a2a55 commit 0d489e9

1 file changed

Lines changed: 63 additions & 7 deletions

File tree

document/html.go

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ import (
1919
"golang.org/x/net/html/atom"
2020
)
2121

22+
type replacement struct {
23+
pattern *regexp.Regexp
24+
new []byte
25+
}
26+
2227
var (
2328
// styleWrapper is used to surround any text that,
2429
// if the entire page is in a monospace font,
@@ -44,6 +49,7 @@ var (
4449
"⁓": styleWrapper, // Swung dash
4550

4651
}
52+
earlyReplacementRegexes = mustCompileReplacements(earlyReplacements)
4753
// lateReplacements are raw text replacements that will happen after HTML has been parsed.
4854
lateReplacements = map[string][]byte{
4955
"&#34;": []byte("\""),
@@ -52,6 +58,60 @@ var (
5258
}
5359
)
5460

61+
func mustCompileReplacements(repls map[string][]byte) []replacement {
62+
compiled := make([]replacement, 0, len(repls))
63+
for pattern, repl := range repls {
64+
re, err := regexp.Compile(pattern)
65+
if err != nil {
66+
panic(fmt.Sprintf("invalid replacement %q: %v", pattern, err))
67+
}
68+
compiled = append(compiled, replacement{pattern: re, new: repl})
69+
}
70+
return compiled
71+
}
72+
73+
// applyEarlyReplacements applies early replacements to the HTML node tree.
74+
// It parses each replacement fragment into HTML before inserting it.
75+
func applyEarlyReplacements(root *html.Node) error {
76+
var walk func(*html.Node, bool) error
77+
walk = func(n *html.Node, inPre bool) error {
78+
if n.Type == html.ElementNode && n.DataAtom == atom.Pre {
79+
inPre = true
80+
}
81+
if n.Type == html.TextNode && !inPre {
82+
if n.Parent == nil {
83+
return nil
84+
}
85+
original := n.Data
86+
updated := original
87+
for _, repl := range earlyReplacementRegexes {
88+
updated = repl.pattern.ReplaceAllString(updated, string(repl.new))
89+
}
90+
if updated != original {
91+
fragments, err := html.ParseFragment(strings.NewReader(updated), n.Parent)
92+
if err != nil {
93+
return err
94+
}
95+
parent := n.Parent
96+
for _, frag := range fragments {
97+
parent.InsertBefore(frag, n)
98+
}
99+
parent.RemoveChild(n)
100+
return nil
101+
}
102+
}
103+
for child := n.FirstChild; child != nil; {
104+
next := child.NextSibling
105+
if err := walk(child, inPre); err != nil {
106+
return err
107+
}
108+
child = next
109+
}
110+
return nil
111+
}
112+
return walk(root, false)
113+
}
114+
55115
// HTMLDocument represents the HTML for a source file.
56116
//
57117
// The document's source content may or may not be in HTML.
@@ -117,18 +177,14 @@ func (doc *HTMLDocument) Load(r io.Reader) error {
117177
if err != nil {
118178
return fmt.Errorf("cannot load template frontmatter for %q: %w", doc.meta.SourcePath, err)
119179
}
120-
for old, new := range earlyReplacements {
121-
re, err := regexp.Compile(old)
122-
if err != nil {
123-
return err
124-
}
125-
body = re.ReplaceAll(body, new)
126-
}
127180
root, err := html.Parse(bytes.NewReader(body))
128181
if err != nil {
129182
return err
130183
}
131184
doc.root = root
185+
if err := applyEarlyReplacements(doc.root); err != nil {
186+
return err
187+
}
132188
if err := doc.Massage(); err != nil {
133189
return err
134190
}

0 commit comments

Comments
 (0)