@@ -19,6 +19,11 @@ import (
1919 "golang.org/x/net/html/atom"
2020)
2121
22+ type replacement struct {
23+ pattern * regexp.Regexp
24+ new []byte
25+ }
26+
2227var (
2328 // styleWrapper is used to surround any text that,
2429 // if the entire page is in a monospace font,
4449 "⁓" : styleWrapper , // Swung dash
4550
4651 }
52+ earlyReplacementRegexes = mustCompileReplacements (earlyReplacements )
4753 // lateReplacements are raw text replacements that will happen after HTML has been parsed.
4854 lateReplacements = map [string ][]byte {
4955 """ : []byte ("\" " ),
5258 }
5359)
5460
61+ func mustCompileReplacements (repls map [string ][]byte ) []replacement {
62+ compiled := make ([]replacement , 0 , len (repls ))
63+ for pattern , repl := range repls {
64+ re , err := regexp .Compile (pattern )
65+ if err != nil {
66+ panic (fmt .Sprintf ("invalid replacement %q: %v" , pattern , err ))
67+ }
68+ compiled = append (compiled , replacement {pattern : re , new : repl })
69+ }
70+ return compiled
71+ }
72+
73+ // applyEarlyReplacements applies early replacements to the HTML node tree.
74+ // It parses each replacement fragment into HTML before inserting it.
75+ func applyEarlyReplacements (root * html.Node ) error {
76+ var walk func (* html.Node , bool ) error
77+ walk = func (n * html.Node , inPre bool ) error {
78+ if n .Type == html .ElementNode && n .DataAtom == atom .Pre {
79+ inPre = true
80+ }
81+ if n .Type == html .TextNode && ! inPre {
82+ if n .Parent == nil {
83+ return nil
84+ }
85+ original := n .Data
86+ updated := original
87+ for _ , repl := range earlyReplacementRegexes {
88+ updated = repl .pattern .ReplaceAllString (updated , string (repl .new ))
89+ }
90+ if updated != original {
91+ fragments , err := html .ParseFragment (strings .NewReader (updated ), n .Parent )
92+ if err != nil {
93+ return err
94+ }
95+ parent := n .Parent
96+ for _ , frag := range fragments {
97+ parent .InsertBefore (frag , n )
98+ }
99+ parent .RemoveChild (n )
100+ return nil
101+ }
102+ }
103+ for child := n .FirstChild ; child != nil ; {
104+ next := child .NextSibling
105+ if err := walk (child , inPre ); err != nil {
106+ return err
107+ }
108+ child = next
109+ }
110+ return nil
111+ }
112+ return walk (root , false )
113+ }
114+
55115// HTMLDocument represents the HTML for a source file.
56116//
57117// The document's source content may or may not be in HTML.
@@ -117,18 +177,14 @@ func (doc *HTMLDocument) Load(r io.Reader) error {
117177 if err != nil {
118178 return fmt .Errorf ("cannot load template frontmatter for %q: %w" , doc .meta .SourcePath , err )
119179 }
120- for old , new := range earlyReplacements {
121- re , err := regexp .Compile (old )
122- if err != nil {
123- return err
124- }
125- body = re .ReplaceAll (body , new )
126- }
127180 root , err := html .Parse (bytes .NewReader (body ))
128181 if err != nil {
129182 return err
130183 }
131184 doc .root = root
185+ if err := applyEarlyReplacements (doc .root ); err != nil {
186+ return err
187+ }
132188 if err := doc .Massage (); err != nil {
133189 return err
134190 }
0 commit comments