Antonio Mika
·
07 Nov 24
mdparser.go
1package shared
2
3import (
4 "bytes"
5 "fmt"
6 "strings"
7 "time"
8
9 "github.com/alecthomas/chroma/v2/formatters/html"
10 "github.com/araddon/dateparse"
11 "github.com/microcosm-cc/bluemonday"
12 "github.com/yuin/goldmark"
13 highlighting "github.com/yuin/goldmark-highlighting/v2"
14 meta "github.com/yuin/goldmark-meta"
15 "github.com/yuin/goldmark/ast"
16 "github.com/yuin/goldmark/extension"
17 "github.com/yuin/goldmark/parser"
18 ghtml "github.com/yuin/goldmark/renderer/html"
19 gtext "github.com/yuin/goldmark/text"
20 "go.abhg.dev/goldmark/anchor"
21 "go.abhg.dev/goldmark/hashtag"
22 "go.abhg.dev/goldmark/toc"
23 yaml "gopkg.in/yaml.v2"
24)
25
26type Link struct {
27 URL string
28 Text string
29}
30
31type MetaData struct {
32 PublishAt *time.Time
33 Title string
34 Description string
35 Nav []Link
36 Tags []string
37 Aliases []string
38 Layout string
39 Image string
40 ImageCard string
41 Favicon string
42 Hidden bool
43 WithStyles bool
44}
45
46type ParsedText struct {
47 Html string
48 *MetaData
49}
50
51func HtmlPolicy() *bluemonday.Policy {
52 policy := bluemonday.UGCPolicy()
53 policy.AllowStyling()
54 policy.AllowAttrs("rel").OnElements("a")
55 return policy
56}
57
58var policy = HtmlPolicy()
59
60func toString(obj interface{}) (string, error) {
61 if obj == nil {
62 return "", nil
63 }
64 switch val := obj.(type) {
65 case string:
66 return val, nil
67 default:
68 return "", fmt.Errorf("incorrect type for value: %T, should be string", val)
69 }
70}
71
72func toBool(obj interface{}, fallback bool) (bool, error) {
73 if obj == nil {
74 return fallback, nil
75 }
76 switch val := obj.(type) {
77 case bool:
78 return val, nil
79 default:
80 return false, fmt.Errorf("incorrect type for value: %T, should be bool", val)
81 }
82}
83
84// The toc frontmatter can take a boolean or an integer.
85//
86// A value of -1 or false means "do not generate a toc".
87// A value of 0 or true means "generate a toc with no depth limit".
88// A value of >0 means "generate a toc with a depth limit of $value past title".
89func toToc(obj interface{}) (int, error) {
90 if obj == nil {
91 return -1, nil
92 }
93 switch val := obj.(type) {
94 case bool:
95 if val {
96 return 0, nil
97 }
98 return -1, nil
99 case int:
100 if val < -1 {
101 val = -1
102 }
103 return val, nil
104 default:
105 return -1, fmt.Errorf("incorrect type for value: %T, should be bool or int", val)
106 }
107}
108
109func toLinks(orderedMetaData yaml.MapSlice) ([]Link, error) {
110 var navData interface{}
111 for i := 0; i < len(orderedMetaData); i++ {
112 var item = orderedMetaData[i]
113 if item.Key == "nav" {
114 navData = item.Value
115 break
116 }
117 }
118
119 links := []Link{}
120 if navData == nil {
121 return links, nil
122 }
123
124 addLinks := func(raw yaml.MapSlice) {
125 for _, k := range raw {
126 links = append(links, Link{
127 Text: k.Key.(string),
128 URL: k.Value.(string),
129 })
130 }
131 }
132
133 switch raw := navData.(type) {
134 case yaml.MapSlice:
135 addLinks(raw)
136 case []interface{}:
137 for _, v := range raw {
138 switch linkRaw := v.(type) {
139 case yaml.MapSlice:
140 addLinks(v.(yaml.MapSlice))
141 default:
142 return links, fmt.Errorf("unsupported type for `nav` link item (%T), looking for map (`text: href`)", linkRaw)
143 }
144 }
145 default:
146 return links, fmt.Errorf("unsupported type for `nav` variable: %T", raw)
147 }
148
149 return links, nil
150}
151
152func toAliases(obj interface{}) ([]string, error) {
153 arr := make([]string, 0)
154 if obj == nil {
155 return arr, nil
156 }
157
158 switch raw := obj.(type) {
159 case []interface{}:
160 for _, alias := range raw {
161 als := strings.TrimSpace(alias.(string))
162 arr = append(arr, strings.TrimPrefix(als, "/"))
163 }
164 case string:
165 aliases := strings.Split(raw, " ")
166 for _, alias := range aliases {
167 als := strings.TrimSpace(alias)
168 arr = append(arr, strings.TrimPrefix(als, "/"))
169 }
170 default:
171 return arr, fmt.Errorf("unsupported type for `aliases` variable: %T", raw)
172 }
173
174 return arr, nil
175}
176
177func toTags(obj interface{}) ([]string, error) {
178 arr := make([]string, 0)
179 if obj == nil {
180 return arr, nil
181 }
182
183 switch raw := obj.(type) {
184 case []interface{}:
185 for _, tag := range raw {
186 arr = append(arr, tag.(string))
187 }
188 case string:
189 tags := strings.Split(raw, " ")
190 for _, tag := range tags {
191 arr = append(arr, strings.TrimSpace(tag))
192 }
193 default:
194 return arr, fmt.Errorf("unsupported type for `tags` variable: %T", raw)
195 }
196
197 return arr, nil
198}
199
200func CreateGoldmark(extenders ...goldmark.Extender) goldmark.Markdown {
201 return goldmark.New(
202 goldmark.WithExtensions(
203 extenders...,
204 ),
205 goldmark.WithParserOptions(
206 parser.WithAutoHeadingID(),
207 ),
208 goldmark.WithRendererOptions(
209 ghtml.WithUnsafe(),
210 ),
211 )
212}
213
214func ParseText(text string) (*ParsedText, error) {
215 parsed := ParsedText{
216 MetaData: &MetaData{
217 Tags: []string{},
218 Aliases: []string{},
219 WithStyles: true,
220 },
221 }
222 hili := highlighting.NewHighlighting(
223 highlighting.WithFormatOptions(
224 html.WithLineNumbers(true),
225 html.WithClasses(true),
226 ),
227 )
228 extenders := []goldmark.Extender{
229 extension.GFM,
230 extension.Footnote,
231 meta.Meta,
232 &hashtag.Extender{},
233 hili,
234 &anchor.Extender{
235 Position: anchor.After,
236 Texter: anchor.Text("#"),
237 },
238 }
239 md := CreateGoldmark(extenders...)
240 context := parser.NewContext()
241 // we do the Parse/Render steps manually to get a chance to examine the AST
242 btext := []byte(text)
243 doc := md.Parser().Parse(gtext.NewReader(btext), parser.WithContext(context))
244 metaData := meta.Get(context)
245
246 // title:
247 // 1. if specified in frontmatter, use that
248 title, err := toString(metaData["title"])
249 if err != nil {
250 return &parsed, fmt.Errorf("front-matter field (%s): %w", "title", err)
251 }
252 // 2. If an <h1> is found before a <p> or other heading is found, use that
253 if title == "" {
254 title = AstTitle(doc, btext, true)
255 }
256 // 3. else, set it to nothing (slug should get used later down the line)
257 // this is implicit since it's already ""
258 parsed.MetaData.Title = title
259
260 // only handle toc after the title is extracted (if it's getting extracted)
261 mtoc, err := toToc(metaData["toc"])
262 if err != nil {
263 return &parsed, fmt.Errorf("front-matter field (%s): %w", "toc", err)
264 }
265 if mtoc >= 0 {
266 err = AstToc(doc, btext, mtoc)
267 if err != nil {
268 return &parsed, fmt.Errorf("error generating toc: %w", err)
269 }
270 }
271
272 description, err := toString(metaData["description"])
273 if err != nil {
274 return &parsed, fmt.Errorf("front-matter field (%s): %w", "description", err)
275 }
276 parsed.MetaData.Description = description
277
278 layout, err := toString(metaData["layout"])
279 if err != nil {
280 return &parsed, fmt.Errorf("front-matter field (%s): %w", "layout", err)
281 }
282 parsed.MetaData.Layout = layout
283
284 image, err := toString(metaData["image"])
285 if err != nil {
286 return &parsed, fmt.Errorf("front-matter field (%s): %w", "image", err)
287 }
288 parsed.MetaData.Image = image
289
290 card, err := toString(metaData["card"])
291 if err != nil {
292 return &parsed, fmt.Errorf("front-matter field (%s): %w", "card", err)
293 }
294 parsed.MetaData.ImageCard = card
295
296 hidden, err := toBool(metaData["draft"], false)
297 if err != nil {
298 return &parsed, fmt.Errorf("front-matter field (%s): %w", "draft", err)
299 }
300 parsed.MetaData.Hidden = hidden
301
302 withStyles, err := toBool(metaData["with_styles"], true)
303 if err != nil {
304 return &parsed, fmt.Errorf("front-matter field (%s): %w", "with_style", err)
305 }
306 parsed.MetaData.WithStyles = withStyles
307
308 favicon, err := toString(metaData["favicon"])
309 if err != nil {
310 return &parsed, fmt.Errorf("front-matter field (%s): %w", "favicon", err)
311 }
312 parsed.MetaData.Favicon = favicon
313
314 var publishAt *time.Time = nil
315 date, err := toString(metaData["date"])
316 if err != nil {
317 return &parsed, fmt.Errorf("front-matter field (%s): %w", "date", err)
318 }
319
320 if date != "" {
321 nextDate, err := dateparse.ParseStrict(date)
322 if err != nil {
323 return &parsed, err
324 }
325 publishAt = &nextDate
326 }
327 parsed.MetaData.PublishAt = publishAt
328
329 orderedMetaData := meta.GetItems(context)
330
331 nav, err := toLinks(orderedMetaData)
332 if err != nil {
333 return &parsed, err
334 }
335 parsed.MetaData.Nav = nav
336
337 aliases, err := toAliases(metaData["aliases"])
338 if err != nil {
339 return &parsed, err
340 }
341 parsed.MetaData.Aliases = aliases
342
343 rtags := metaData["tags"]
344 tags, err := toTags(rtags)
345 if err != nil {
346 return &parsed, err
347 }
348 // fill from hashtag ASTs as fallback
349 if rtags == nil {
350 tags = AstTags(doc)
351 }
352 parsed.MetaData.Tags = tags
353
354 // Rendering happens last to allow any of the previous steps to manipulate
355 // the AST.
356 var buf bytes.Buffer
357 if err := md.Renderer().Render(&buf, btext, doc); err != nil {
358 return &parsed, err
359 }
360 parsed.Html = policy.Sanitize(buf.String())
361
362 return &parsed, nil
363}
364
365func AstTags(doc ast.Node) []string {
366 var tags []string
367 err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
368 switch n.Kind() {
369 // ignore hashtags inside of these sections
370 case ast.KindBlockquote, ast.KindCodeBlock, ast.KindCodeSpan:
371 return ast.WalkSkipChildren, nil
372 // register hashtags
373 case hashtag.Kind:
374 t := n.(*hashtag.Node)
375 if entering { // only add each tag once
376 tags = append(tags, string(t.Tag))
377 }
378 }
379 // out-of-switch default
380 return ast.WalkContinue, nil
381 })
382 if err != nil {
383 panic(err) // unreachable
384 }
385
386 // sort and deduplicate results
387 dedupe := removeDuplicateStr(tags)
388 return dedupe
389}
390
391// https://stackoverflow.com/a/66751055
392func removeDuplicateStr(strSlice []string) []string {
393 allKeys := make(map[string]bool)
394 list := []string{}
395 for _, item := range strSlice {
396 if _, value := allKeys[item]; !value {
397 allKeys[item] = true
398 list = append(list, item)
399 }
400 }
401 return list
402}
403
404// AstTitle extracts the title (if any) from a parsed markdown document.
405//
406// If "clean" is true, it will also remove the heading node from the AST.
407func AstTitle(doc ast.Node, src []byte, clean bool) string {
408 out := ""
409 err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
410 if n.Kind() == ast.KindHeading {
411 if h := n.(*ast.Heading); h.Level == 1 {
412 if clean {
413 p := h.Parent()
414 p.RemoveChild(p, n)
415 }
416 out = string(h.Lines().Value(src))
417 }
418 return ast.WalkStop, nil
419 }
420 if ast.IsParagraph(n) {
421 return ast.WalkStop, nil
422 }
423 return ast.WalkContinue, nil
424 })
425 if err != nil {
426 panic(err) // unreachable
427 }
428 return out
429}
430
431func AstToc(doc ast.Node, src []byte, mtoc int) error {
432 var tree *toc.TOC
433 if mtoc >= 0 {
434 var err error
435 if mtoc > 0 {
436 tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2), toc.MaxDepth(mtoc+1))
437 } else {
438 tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2))
439 }
440 if err != nil {
441 return err
442 }
443 if tree == nil {
444 return nil // no headings?
445 }
446 }
447 list := toc.RenderList(tree)
448 if list == nil {
449 return nil // no headings
450 }
451
452 list.SetAttributeString("id", []byte("toc-list"))
453
454 // generate # toc
455 heading := ast.NewHeading(2)
456 heading.SetAttributeString("id", []byte("toc"))
457 heading.AppendChild(heading, ast.NewString([]byte("Table of Contents")))
458
459 // insert
460 doc.InsertBefore(doc, doc.FirstChild(), list)
461 doc.InsertBefore(doc, doc.FirstChild(), heading)
462 return nil
463}