Eric Bower
·
16 Sep 24
mdparser.go
1package shared
2
3import (
4 "bytes"
5 "fmt"
6 "strings"
7 "time"
8
9 "github.com/alecthomas/chroma/v2/formatters/html"
10 "github.com/araddon/dateparse"
11 "github.com/microcosm-cc/bluemonday"
12 "github.com/yuin/goldmark"
13 highlighting "github.com/yuin/goldmark-highlighting/v2"
14 meta "github.com/yuin/goldmark-meta"
15 "github.com/yuin/goldmark/ast"
16 "github.com/yuin/goldmark/extension"
17 "github.com/yuin/goldmark/parser"
18 ghtml "github.com/yuin/goldmark/renderer/html"
19 gtext "github.com/yuin/goldmark/text"
20 "go.abhg.dev/goldmark/anchor"
21 "go.abhg.dev/goldmark/hashtag"
22 "go.abhg.dev/goldmark/toc"
23 yaml "gopkg.in/yaml.v2"
24)
25
26type Link struct {
27 URL string
28 Text string
29}
30
31type MetaData struct {
32 PublishAt *time.Time
33 Title string
34 Description string
35 Nav []Link
36 Tags []string
37 Aliases []string
38 Layout string
39 Image string
40 ImageCard string
41 Favicon string
42 Hidden bool
43}
44
45type ParsedText struct {
46 Html string
47 *MetaData
48}
49
50func HtmlPolicy() *bluemonday.Policy {
51 policy := bluemonday.UGCPolicy()
52 policy.AllowStyling()
53 policy.AllowAttrs("rel").OnElements("a")
54 return policy
55}
56
57var policy = HtmlPolicy()
58
59func toString(obj interface{}) (string, error) {
60 if obj == nil {
61 return "", nil
62 }
63 switch val := obj.(type) {
64 case string:
65 return val, nil
66 default:
67 return "", fmt.Errorf("incorrect type for value: %T, should be string", val)
68 }
69}
70
71func toBool(obj interface{}) (bool, error) {
72 if obj == nil {
73 return false, nil
74 }
75 switch val := obj.(type) {
76 case bool:
77 return val, nil
78 default:
79 return false, fmt.Errorf("incorrect type for value: %T, should be bool", val)
80 }
81}
82
83// The toc frontmatter can take a boolean or an integer.
84//
85// A value of -1 or false means "do not generate a toc".
86// A value of 0 or true means "generate a toc with no depth limit".
87// A value of >0 means "generate a toc with a depth limit of $value past title".
88func toToc(obj interface{}) (int, error) {
89 if obj == nil {
90 return -1, nil
91 }
92 switch val := obj.(type) {
93 case bool:
94 if val {
95 return 0, nil
96 }
97 return -1, nil
98 case int:
99 if val < -1 {
100 val = -1
101 }
102 return val, nil
103 default:
104 return -1, fmt.Errorf("incorrect type for value: %T, should be bool or int", val)
105 }
106}
107
108func toLinks(orderedMetaData yaml.MapSlice) ([]Link, error) {
109 var navData interface{}
110 for i := 0; i < len(orderedMetaData); i++ {
111 var item = orderedMetaData[i]
112 if item.Key == "nav" {
113 navData = item.Value
114 break
115 }
116 }
117
118 links := []Link{}
119 if navData == nil {
120 return links, nil
121 }
122
123 addLinks := func(raw yaml.MapSlice) {
124 for _, k := range raw {
125 links = append(links, Link{
126 Text: k.Key.(string),
127 URL: k.Value.(string),
128 })
129 }
130 }
131
132 switch raw := navData.(type) {
133 case yaml.MapSlice:
134 addLinks(raw)
135 case []interface{}:
136 for _, v := range raw {
137 switch linkRaw := v.(type) {
138 case yaml.MapSlice:
139 addLinks(v.(yaml.MapSlice))
140 default:
141 return links, fmt.Errorf("unsupported type for `nav` link item (%T), looking for map (`text: href`)", linkRaw)
142 }
143 }
144 default:
145 return links, fmt.Errorf("unsupported type for `nav` variable: %T", raw)
146 }
147
148 return links, nil
149}
150
151func toAliases(obj interface{}) ([]string, error) {
152 arr := make([]string, 0)
153 if obj == nil {
154 return arr, nil
155 }
156
157 switch raw := obj.(type) {
158 case []interface{}:
159 for _, alias := range raw {
160 als := strings.TrimSpace(alias.(string))
161 arr = append(arr, strings.TrimPrefix(als, "/"))
162 }
163 case string:
164 aliases := strings.Split(raw, " ")
165 for _, alias := range aliases {
166 als := strings.TrimSpace(alias)
167 arr = append(arr, strings.TrimPrefix(als, "/"))
168 }
169 default:
170 return arr, fmt.Errorf("unsupported type for `aliases` variable: %T", raw)
171 }
172
173 return arr, nil
174}
175
176func toTags(obj interface{}) ([]string, error) {
177 arr := make([]string, 0)
178 if obj == nil {
179 return arr, nil
180 }
181
182 switch raw := obj.(type) {
183 case []interface{}:
184 for _, tag := range raw {
185 arr = append(arr, tag.(string))
186 }
187 case string:
188 tags := strings.Split(raw, " ")
189 for _, tag := range tags {
190 arr = append(arr, strings.TrimSpace(tag))
191 }
192 default:
193 return arr, fmt.Errorf("unsupported type for `tags` variable: %T", raw)
194 }
195
196 return arr, nil
197}
198
199func CreateGoldmark(extenders ...goldmark.Extender) goldmark.Markdown {
200 return goldmark.New(
201 goldmark.WithExtensions(
202 extenders...,
203 ),
204 goldmark.WithParserOptions(
205 parser.WithAutoHeadingID(),
206 ),
207 goldmark.WithRendererOptions(
208 ghtml.WithUnsafe(),
209 ),
210 )
211}
212
213func ParseText(text string) (*ParsedText, error) {
214 parsed := ParsedText{
215 MetaData: &MetaData{
216 Tags: []string{},
217 Aliases: []string{},
218 },
219 }
220 hili := highlighting.NewHighlighting(
221 highlighting.WithFormatOptions(
222 html.WithLineNumbers(true),
223 html.WithClasses(true),
224 ),
225 )
226 extenders := []goldmark.Extender{
227 extension.GFM,
228 extension.Footnote,
229 meta.Meta,
230 &hashtag.Extender{},
231 hili,
232 &anchor.Extender{
233 Position: anchor.After,
234 Texter: anchor.Text("#"),
235 },
236 }
237 md := CreateGoldmark(extenders...)
238 context := parser.NewContext()
239 // we do the Parse/Render steps manually to get a chance to examine the AST
240 btext := []byte(text)
241 doc := md.Parser().Parse(gtext.NewReader(btext), parser.WithContext(context))
242 metaData := meta.Get(context)
243
244 // title:
245 // 1. if specified in frontmatter, use that
246 title, err := toString(metaData["title"])
247 if err != nil {
248 return &parsed, fmt.Errorf("front-matter field (%s): %w", "title", err)
249 }
250 // 2. If an <h1> is found before a <p> or other heading is found, use that
251 if title == "" {
252 title = AstTitle(doc, btext, true)
253 }
254 // 3. else, set it to nothing (slug should get used later down the line)
255 // this is implicit since it's already ""
256 parsed.MetaData.Title = title
257
258 // only handle toc after the title is extracted (if it's getting extracted)
259 mtoc, err := toToc(metaData["toc"])
260 if err != nil {
261 return &parsed, fmt.Errorf("front-matter field (%s): %w", "toc", err)
262 }
263 if mtoc >= 0 {
264 err = AstToc(doc, btext, mtoc)
265 if err != nil {
266 return &parsed, fmt.Errorf("error generating toc: %w", err)
267 }
268 }
269
270 description, err := toString(metaData["description"])
271 if err != nil {
272 return &parsed, fmt.Errorf("front-matter field (%s): %w", "description", err)
273 }
274 parsed.MetaData.Description = description
275
276 layout, err := toString(metaData["layout"])
277 if err != nil {
278 return &parsed, fmt.Errorf("front-matter field (%s): %w", "layout", err)
279 }
280 parsed.MetaData.Layout = layout
281
282 image, err := toString(metaData["image"])
283 if err != nil {
284 return &parsed, fmt.Errorf("front-matter field (%s): %w", "image", err)
285 }
286 parsed.MetaData.Image = image
287
288 card, err := toString(metaData["card"])
289 if err != nil {
290 return &parsed, fmt.Errorf("front-matter field (%s): %w", "card", err)
291 }
292 parsed.MetaData.ImageCard = card
293
294 hidden, err := toBool(metaData["draft"])
295 if err != nil {
296 return &parsed, fmt.Errorf("front-matter field (%s): %w", "draft", err)
297 }
298 parsed.MetaData.Hidden = hidden
299
300 favicon, err := toString(metaData["favicon"])
301 if err != nil {
302 return &parsed, fmt.Errorf("front-matter field (%s): %w", "favicon", err)
303 }
304 parsed.MetaData.Favicon = favicon
305
306 var publishAt *time.Time = nil
307 date, err := toString(metaData["date"])
308 if err != nil {
309 return &parsed, fmt.Errorf("front-matter field (%s): %w", "date", err)
310 }
311
312 if date != "" {
313 nextDate, err := dateparse.ParseStrict(date)
314 if err != nil {
315 return &parsed, err
316 }
317 publishAt = &nextDate
318 }
319 parsed.MetaData.PublishAt = publishAt
320
321 orderedMetaData := meta.GetItems(context)
322
323 nav, err := toLinks(orderedMetaData)
324 if err != nil {
325 return &parsed, err
326 }
327 parsed.MetaData.Nav = nav
328
329 aliases, err := toAliases(metaData["aliases"])
330 if err != nil {
331 return &parsed, err
332 }
333 parsed.MetaData.Aliases = aliases
334
335 rtags := metaData["tags"]
336 tags, err := toTags(rtags)
337 if err != nil {
338 return &parsed, err
339 }
340 // fill from hashtag ASTs as fallback
341 if rtags == nil {
342 tags = AstTags(doc)
343 }
344 parsed.MetaData.Tags = tags
345
346 // Rendering happens last to allow any of the previous steps to manipulate
347 // the AST.
348 var buf bytes.Buffer
349 if err := md.Renderer().Render(&buf, btext, doc); err != nil {
350 return &parsed, err
351 }
352 parsed.Html = policy.Sanitize(buf.String())
353
354 return &parsed, nil
355}
356
357func AstTags(doc ast.Node) []string {
358 var tags []string
359 err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
360 switch n.Kind() {
361 // ignore hashtags inside of these sections
362 case ast.KindBlockquote, ast.KindCodeBlock, ast.KindCodeSpan:
363 return ast.WalkSkipChildren, nil
364 // register hashtags
365 case hashtag.Kind:
366 t := n.(*hashtag.Node)
367 if entering { // only add each tag once
368 tags = append(tags, string(t.Tag))
369 }
370 }
371 // out-of-switch default
372 return ast.WalkContinue, nil
373 })
374 if err != nil {
375 panic(err) // unreachable
376 }
377
378 // sort and deduplicate results
379 dedupe := removeDuplicateStr(tags)
380 return dedupe
381}
382
383// https://stackoverflow.com/a/66751055
384func removeDuplicateStr(strSlice []string) []string {
385 allKeys := make(map[string]bool)
386 list := []string{}
387 for _, item := range strSlice {
388 if _, value := allKeys[item]; !value {
389 allKeys[item] = true
390 list = append(list, item)
391 }
392 }
393 return list
394}
395
396// AstTitle extracts the title (if any) from a parsed markdown document.
397//
398// If "clean" is true, it will also remove the heading node from the AST.
399func AstTitle(doc ast.Node, src []byte, clean bool) string {
400 out := ""
401 err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
402 if n.Kind() == ast.KindHeading {
403 if h := n.(*ast.Heading); h.Level == 1 {
404 if clean {
405 p := h.Parent()
406 p.RemoveChild(p, n)
407 }
408 out = string(h.Text(src))
409 }
410 return ast.WalkStop, nil
411 }
412 if ast.IsParagraph(n) {
413 return ast.WalkStop, nil
414 }
415 return ast.WalkContinue, nil
416 })
417 if err != nil {
418 panic(err) // unreachable
419 }
420 return out
421}
422
423func AstToc(doc ast.Node, src []byte, mtoc int) error {
424 var tree *toc.TOC
425 if mtoc >= 0 {
426 var err error
427 if mtoc > 0 {
428 tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2), toc.MaxDepth(mtoc+1))
429 } else {
430 tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2))
431 }
432 if err != nil {
433 return err
434 }
435 if tree == nil {
436 return nil // no headings?
437 }
438 }
439 list := toc.RenderList(tree)
440 if list == nil {
441 return nil // no headings
442 }
443
444 list.SetAttributeString("id", []byte("toc-list"))
445
446 // generate # toc
447 heading := ast.NewHeading(2)
448 heading.SetAttributeString("id", []byte("toc"))
449 heading.AppendChild(heading, ast.NewString([]byte("Table of Contents")))
450
451 // insert
452 doc.InsertBefore(doc, doc.FirstChild(), list)
453 doc.InsertBefore(doc, doc.FirstChild(), heading)
454 return nil
455}