repos / pico

pico services - prose.sh, pastes.sh, imgs.sh, feeds.sh, pgs.sh
git clone https://github.com/picosh/pico.git

pico / shared
Eric Bower · 16 Sep 24

mdparser.go

  1package shared
  2
  3import (
  4	"bytes"
  5	"fmt"
  6	"strings"
  7	"time"
  8
  9	"github.com/alecthomas/chroma/v2/formatters/html"
 10	"github.com/araddon/dateparse"
 11	"github.com/microcosm-cc/bluemonday"
 12	"github.com/yuin/goldmark"
 13	highlighting "github.com/yuin/goldmark-highlighting/v2"
 14	meta "github.com/yuin/goldmark-meta"
 15	"github.com/yuin/goldmark/ast"
 16	"github.com/yuin/goldmark/extension"
 17	"github.com/yuin/goldmark/parser"
 18	ghtml "github.com/yuin/goldmark/renderer/html"
 19	gtext "github.com/yuin/goldmark/text"
 20	"go.abhg.dev/goldmark/anchor"
 21	"go.abhg.dev/goldmark/hashtag"
 22	"go.abhg.dev/goldmark/toc"
 23	yaml "gopkg.in/yaml.v2"
 24)
 25
 26type Link struct {
 27	URL  string
 28	Text string
 29}
 30
 31type MetaData struct {
 32	PublishAt   *time.Time
 33	Title       string
 34	Description string
 35	Nav         []Link
 36	Tags        []string
 37	Aliases     []string
 38	Layout      string
 39	Image       string
 40	ImageCard   string
 41	Favicon     string
 42	Hidden      bool
 43}
 44
 45type ParsedText struct {
 46	Html string
 47	*MetaData
 48}
 49
 50func HtmlPolicy() *bluemonday.Policy {
 51	policy := bluemonday.UGCPolicy()
 52	policy.AllowStyling()
 53	policy.AllowAttrs("rel").OnElements("a")
 54	return policy
 55}
 56
 57var policy = HtmlPolicy()
 58
 59func toString(obj interface{}) (string, error) {
 60	if obj == nil {
 61		return "", nil
 62	}
 63	switch val := obj.(type) {
 64	case string:
 65		return val, nil
 66	default:
 67		return "", fmt.Errorf("incorrect type for value: %T, should be string", val)
 68	}
 69}
 70
 71func toBool(obj interface{}) (bool, error) {
 72	if obj == nil {
 73		return false, nil
 74	}
 75	switch val := obj.(type) {
 76	case bool:
 77		return val, nil
 78	default:
 79		return false, fmt.Errorf("incorrect type for value: %T, should be bool", val)
 80	}
 81}
 82
 83// The toc frontmatter can take a boolean or an integer.
 84//
 85// A value of -1 or false means "do not generate a toc".
 86// A value of 0 or true means "generate a toc with no depth limit".
 87// A value of >0 means "generate a toc with a depth limit of $value past title".
 88func toToc(obj interface{}) (int, error) {
 89	if obj == nil {
 90		return -1, nil
 91	}
 92	switch val := obj.(type) {
 93	case bool:
 94		if val {
 95			return 0, nil
 96		}
 97		return -1, nil
 98	case int:
 99		if val < -1 {
100			val = -1
101		}
102		return val, nil
103	default:
104		return -1, fmt.Errorf("incorrect type for value: %T, should be bool or int", val)
105	}
106}
107
108func toLinks(orderedMetaData yaml.MapSlice) ([]Link, error) {
109	var navData interface{}
110	for i := 0; i < len(orderedMetaData); i++ {
111		var item = orderedMetaData[i]
112		if item.Key == "nav" {
113			navData = item.Value
114			break
115		}
116	}
117
118	links := []Link{}
119	if navData == nil {
120		return links, nil
121	}
122
123	addLinks := func(raw yaml.MapSlice) {
124		for _, k := range raw {
125			links = append(links, Link{
126				Text: k.Key.(string),
127				URL:  k.Value.(string),
128			})
129		}
130	}
131
132	switch raw := navData.(type) {
133	case yaml.MapSlice:
134		addLinks(raw)
135	case []interface{}:
136		for _, v := range raw {
137			switch linkRaw := v.(type) {
138			case yaml.MapSlice:
139				addLinks(v.(yaml.MapSlice))
140			default:
141				return links, fmt.Errorf("unsupported type for `nav` link item (%T), looking for map (`text: href`)", linkRaw)
142			}
143		}
144	default:
145		return links, fmt.Errorf("unsupported type for `nav` variable: %T", raw)
146	}
147
148	return links, nil
149}
150
151func toAliases(obj interface{}) ([]string, error) {
152	arr := make([]string, 0)
153	if obj == nil {
154		return arr, nil
155	}
156
157	switch raw := obj.(type) {
158	case []interface{}:
159		for _, alias := range raw {
160			als := strings.TrimSpace(alias.(string))
161			arr = append(arr, strings.TrimPrefix(als, "/"))
162		}
163	case string:
164		aliases := strings.Split(raw, " ")
165		for _, alias := range aliases {
166			als := strings.TrimSpace(alias)
167			arr = append(arr, strings.TrimPrefix(als, "/"))
168		}
169	default:
170		return arr, fmt.Errorf("unsupported type for `aliases` variable: %T", raw)
171	}
172
173	return arr, nil
174}
175
176func toTags(obj interface{}) ([]string, error) {
177	arr := make([]string, 0)
178	if obj == nil {
179		return arr, nil
180	}
181
182	switch raw := obj.(type) {
183	case []interface{}:
184		for _, tag := range raw {
185			arr = append(arr, tag.(string))
186		}
187	case string:
188		tags := strings.Split(raw, " ")
189		for _, tag := range tags {
190			arr = append(arr, strings.TrimSpace(tag))
191		}
192	default:
193		return arr, fmt.Errorf("unsupported type for `tags` variable: %T", raw)
194	}
195
196	return arr, nil
197}
198
199func CreateGoldmark(extenders ...goldmark.Extender) goldmark.Markdown {
200	return goldmark.New(
201		goldmark.WithExtensions(
202			extenders...,
203		),
204		goldmark.WithParserOptions(
205			parser.WithAutoHeadingID(),
206		),
207		goldmark.WithRendererOptions(
208			ghtml.WithUnsafe(),
209		),
210	)
211}
212
213func ParseText(text string) (*ParsedText, error) {
214	parsed := ParsedText{
215		MetaData: &MetaData{
216			Tags:    []string{},
217			Aliases: []string{},
218		},
219	}
220	hili := highlighting.NewHighlighting(
221		highlighting.WithFormatOptions(
222			html.WithLineNumbers(true),
223			html.WithClasses(true),
224		),
225	)
226	extenders := []goldmark.Extender{
227		extension.GFM,
228		extension.Footnote,
229		meta.Meta,
230		&hashtag.Extender{},
231		hili,
232		&anchor.Extender{
233			Position: anchor.After,
234			Texter:   anchor.Text("#"),
235		},
236	}
237	md := CreateGoldmark(extenders...)
238	context := parser.NewContext()
239	// we do the Parse/Render steps manually to get a chance to examine the AST
240	btext := []byte(text)
241	doc := md.Parser().Parse(gtext.NewReader(btext), parser.WithContext(context))
242	metaData := meta.Get(context)
243
244	// title:
245	// 1. if specified in frontmatter, use that
246	title, err := toString(metaData["title"])
247	if err != nil {
248		return &parsed, fmt.Errorf("front-matter field (%s): %w", "title", err)
249	}
250	// 2. If an <h1> is found before a <p> or other heading is found, use that
251	if title == "" {
252		title = AstTitle(doc, btext, true)
253	}
254	// 3. else, set it to nothing (slug should get used later down the line)
255	// this is implicit since it's already ""
256	parsed.MetaData.Title = title
257
258	// only handle toc after the title is extracted (if it's getting extracted)
259	mtoc, err := toToc(metaData["toc"])
260	if err != nil {
261		return &parsed, fmt.Errorf("front-matter field (%s): %w", "toc", err)
262	}
263	if mtoc >= 0 {
264		err = AstToc(doc, btext, mtoc)
265		if err != nil {
266			return &parsed, fmt.Errorf("error generating toc: %w", err)
267		}
268	}
269
270	description, err := toString(metaData["description"])
271	if err != nil {
272		return &parsed, fmt.Errorf("front-matter field (%s): %w", "description", err)
273	}
274	parsed.MetaData.Description = description
275
276	layout, err := toString(metaData["layout"])
277	if err != nil {
278		return &parsed, fmt.Errorf("front-matter field (%s): %w", "layout", err)
279	}
280	parsed.MetaData.Layout = layout
281
282	image, err := toString(metaData["image"])
283	if err != nil {
284		return &parsed, fmt.Errorf("front-matter field (%s): %w", "image", err)
285	}
286	parsed.MetaData.Image = image
287
288	card, err := toString(metaData["card"])
289	if err != nil {
290		return &parsed, fmt.Errorf("front-matter field (%s): %w", "card", err)
291	}
292	parsed.MetaData.ImageCard = card
293
294	hidden, err := toBool(metaData["draft"])
295	if err != nil {
296		return &parsed, fmt.Errorf("front-matter field (%s): %w", "draft", err)
297	}
298	parsed.MetaData.Hidden = hidden
299
300	favicon, err := toString(metaData["favicon"])
301	if err != nil {
302		return &parsed, fmt.Errorf("front-matter field (%s): %w", "favicon", err)
303	}
304	parsed.MetaData.Favicon = favicon
305
306	var publishAt *time.Time = nil
307	date, err := toString(metaData["date"])
308	if err != nil {
309		return &parsed, fmt.Errorf("front-matter field (%s): %w", "date", err)
310	}
311
312	if date != "" {
313		nextDate, err := dateparse.ParseStrict(date)
314		if err != nil {
315			return &parsed, err
316		}
317		publishAt = &nextDate
318	}
319	parsed.MetaData.PublishAt = publishAt
320
321	orderedMetaData := meta.GetItems(context)
322
323	nav, err := toLinks(orderedMetaData)
324	if err != nil {
325		return &parsed, err
326	}
327	parsed.MetaData.Nav = nav
328
329	aliases, err := toAliases(metaData["aliases"])
330	if err != nil {
331		return &parsed, err
332	}
333	parsed.MetaData.Aliases = aliases
334
335	rtags := metaData["tags"]
336	tags, err := toTags(rtags)
337	if err != nil {
338		return &parsed, err
339	}
340	// fill from hashtag ASTs as fallback
341	if rtags == nil {
342		tags = AstTags(doc)
343	}
344	parsed.MetaData.Tags = tags
345
346	// Rendering happens last to allow any of the previous steps to manipulate
347	// the AST.
348	var buf bytes.Buffer
349	if err := md.Renderer().Render(&buf, btext, doc); err != nil {
350		return &parsed, err
351	}
352	parsed.Html = policy.Sanitize(buf.String())
353
354	return &parsed, nil
355}
356
357func AstTags(doc ast.Node) []string {
358	var tags []string
359	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
360		switch n.Kind() {
361		// ignore hashtags inside of these sections
362		case ast.KindBlockquote, ast.KindCodeBlock, ast.KindCodeSpan:
363			return ast.WalkSkipChildren, nil
364		// register hashtags
365		case hashtag.Kind:
366			t := n.(*hashtag.Node)
367			if entering { // only add each tag once
368				tags = append(tags, string(t.Tag))
369			}
370		}
371		// out-of-switch default
372		return ast.WalkContinue, nil
373	})
374	if err != nil {
375		panic(err) // unreachable
376	}
377
378	// sort and deduplicate results
379	dedupe := removeDuplicateStr(tags)
380	return dedupe
381}
382
383// https://stackoverflow.com/a/66751055
384func removeDuplicateStr(strSlice []string) []string {
385	allKeys := make(map[string]bool)
386	list := []string{}
387	for _, item := range strSlice {
388		if _, value := allKeys[item]; !value {
389			allKeys[item] = true
390			list = append(list, item)
391		}
392	}
393	return list
394}
395
396// AstTitle extracts the title (if any) from a parsed markdown document.
397//
398// If "clean" is true, it will also remove the heading node from the AST.
399func AstTitle(doc ast.Node, src []byte, clean bool) string {
400	out := ""
401	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
402		if n.Kind() == ast.KindHeading {
403			if h := n.(*ast.Heading); h.Level == 1 {
404				if clean {
405					p := h.Parent()
406					p.RemoveChild(p, n)
407				}
408				out = string(h.Text(src))
409			}
410			return ast.WalkStop, nil
411		}
412		if ast.IsParagraph(n) {
413			return ast.WalkStop, nil
414		}
415		return ast.WalkContinue, nil
416	})
417	if err != nil {
418		panic(err) // unreachable
419	}
420	return out
421}
422
423func AstToc(doc ast.Node, src []byte, mtoc int) error {
424	var tree *toc.TOC
425	if mtoc >= 0 {
426		var err error
427		if mtoc > 0 {
428			tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2), toc.MaxDepth(mtoc+1))
429		} else {
430			tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2))
431		}
432		if err != nil {
433			return err
434		}
435		if tree == nil {
436			return nil // no headings?
437		}
438	}
439	list := toc.RenderList(tree)
440	if list == nil {
441		return nil // no headings
442	}
443
444	list.SetAttributeString("id", []byte("toc-list"))
445
446	// generate # toc
447	heading := ast.NewHeading(2)
448	heading.SetAttributeString("id", []byte("toc"))
449	heading.AppendChild(heading, ast.NewString([]byte("Table of Contents")))
450
451	// insert
452	doc.InsertBefore(doc, doc.FirstChild(), list)
453	doc.InsertBefore(doc, doc.FirstChild(), heading)
454	return nil
455}