repos / pico

pico services - prose.sh, pastes.sh, imgs.sh, feeds.sh, pgs.sh
git clone https://github.com/picosh/pico.git

pico / shared
Antonio Mika · 07 Nov 24

mdparser.go

  1package shared
  2
  3import (
  4	"bytes"
  5	"fmt"
  6	"strings"
  7	"time"
  8
  9	"github.com/alecthomas/chroma/v2/formatters/html"
 10	"github.com/araddon/dateparse"
 11	"github.com/microcosm-cc/bluemonday"
 12	"github.com/yuin/goldmark"
 13	highlighting "github.com/yuin/goldmark-highlighting/v2"
 14	meta "github.com/yuin/goldmark-meta"
 15	"github.com/yuin/goldmark/ast"
 16	"github.com/yuin/goldmark/extension"
 17	"github.com/yuin/goldmark/parser"
 18	ghtml "github.com/yuin/goldmark/renderer/html"
 19	gtext "github.com/yuin/goldmark/text"
 20	"go.abhg.dev/goldmark/anchor"
 21	"go.abhg.dev/goldmark/hashtag"
 22	"go.abhg.dev/goldmark/toc"
 23	yaml "gopkg.in/yaml.v2"
 24)
 25
 26type Link struct {
 27	URL  string
 28	Text string
 29}
 30
 31type MetaData struct {
 32	PublishAt   *time.Time
 33	Title       string
 34	Description string
 35	Nav         []Link
 36	Tags        []string
 37	Aliases     []string
 38	Layout      string
 39	Image       string
 40	ImageCard   string
 41	Favicon     string
 42	Hidden      bool
 43	WithStyles  bool
 44}
 45
 46type ParsedText struct {
 47	Html string
 48	*MetaData
 49}
 50
 51func HtmlPolicy() *bluemonday.Policy {
 52	policy := bluemonday.UGCPolicy()
 53	policy.AllowStyling()
 54	policy.AllowAttrs("rel").OnElements("a")
 55	return policy
 56}
 57
 58var policy = HtmlPolicy()
 59
 60func toString(obj interface{}) (string, error) {
 61	if obj == nil {
 62		return "", nil
 63	}
 64	switch val := obj.(type) {
 65	case string:
 66		return val, nil
 67	default:
 68		return "", fmt.Errorf("incorrect type for value: %T, should be string", val)
 69	}
 70}
 71
 72func toBool(obj interface{}, fallback bool) (bool, error) {
 73	if obj == nil {
 74		return fallback, nil
 75	}
 76	switch val := obj.(type) {
 77	case bool:
 78		return val, nil
 79	default:
 80		return false, fmt.Errorf("incorrect type for value: %T, should be bool", val)
 81	}
 82}
 83
 84// The toc frontmatter can take a boolean or an integer.
 85//
 86// A value of -1 or false means "do not generate a toc".
 87// A value of 0 or true means "generate a toc with no depth limit".
 88// A value of >0 means "generate a toc with a depth limit of $value past title".
 89func toToc(obj interface{}) (int, error) {
 90	if obj == nil {
 91		return -1, nil
 92	}
 93	switch val := obj.(type) {
 94	case bool:
 95		if val {
 96			return 0, nil
 97		}
 98		return -1, nil
 99	case int:
100		if val < -1 {
101			val = -1
102		}
103		return val, nil
104	default:
105		return -1, fmt.Errorf("incorrect type for value: %T, should be bool or int", val)
106	}
107}
108
109func toLinks(orderedMetaData yaml.MapSlice) ([]Link, error) {
110	var navData interface{}
111	for i := 0; i < len(orderedMetaData); i++ {
112		var item = orderedMetaData[i]
113		if item.Key == "nav" {
114			navData = item.Value
115			break
116		}
117	}
118
119	links := []Link{}
120	if navData == nil {
121		return links, nil
122	}
123
124	addLinks := func(raw yaml.MapSlice) {
125		for _, k := range raw {
126			links = append(links, Link{
127				Text: k.Key.(string),
128				URL:  k.Value.(string),
129			})
130		}
131	}
132
133	switch raw := navData.(type) {
134	case yaml.MapSlice:
135		addLinks(raw)
136	case []interface{}:
137		for _, v := range raw {
138			switch linkRaw := v.(type) {
139			case yaml.MapSlice:
140				addLinks(v.(yaml.MapSlice))
141			default:
142				return links, fmt.Errorf("unsupported type for `nav` link item (%T), looking for map (`text: href`)", linkRaw)
143			}
144		}
145	default:
146		return links, fmt.Errorf("unsupported type for `nav` variable: %T", raw)
147	}
148
149	return links, nil
150}
151
152func toAliases(obj interface{}) ([]string, error) {
153	arr := make([]string, 0)
154	if obj == nil {
155		return arr, nil
156	}
157
158	switch raw := obj.(type) {
159	case []interface{}:
160		for _, alias := range raw {
161			als := strings.TrimSpace(alias.(string))
162			arr = append(arr, strings.TrimPrefix(als, "/"))
163		}
164	case string:
165		aliases := strings.Split(raw, " ")
166		for _, alias := range aliases {
167			als := strings.TrimSpace(alias)
168			arr = append(arr, strings.TrimPrefix(als, "/"))
169		}
170	default:
171		return arr, fmt.Errorf("unsupported type for `aliases` variable: %T", raw)
172	}
173
174	return arr, nil
175}
176
177func toTags(obj interface{}) ([]string, error) {
178	arr := make([]string, 0)
179	if obj == nil {
180		return arr, nil
181	}
182
183	switch raw := obj.(type) {
184	case []interface{}:
185		for _, tag := range raw {
186			arr = append(arr, tag.(string))
187		}
188	case string:
189		tags := strings.Split(raw, " ")
190		for _, tag := range tags {
191			arr = append(arr, strings.TrimSpace(tag))
192		}
193	default:
194		return arr, fmt.Errorf("unsupported type for `tags` variable: %T", raw)
195	}
196
197	return arr, nil
198}
199
200func CreateGoldmark(extenders ...goldmark.Extender) goldmark.Markdown {
201	return goldmark.New(
202		goldmark.WithExtensions(
203			extenders...,
204		),
205		goldmark.WithParserOptions(
206			parser.WithAutoHeadingID(),
207		),
208		goldmark.WithRendererOptions(
209			ghtml.WithUnsafe(),
210		),
211	)
212}
213
214func ParseText(text string) (*ParsedText, error) {
215	parsed := ParsedText{
216		MetaData: &MetaData{
217			Tags:       []string{},
218			Aliases:    []string{},
219			WithStyles: true,
220		},
221	}
222	hili := highlighting.NewHighlighting(
223		highlighting.WithFormatOptions(
224			html.WithLineNumbers(true),
225			html.WithClasses(true),
226		),
227	)
228	extenders := []goldmark.Extender{
229		extension.GFM,
230		extension.Footnote,
231		meta.Meta,
232		&hashtag.Extender{},
233		hili,
234		&anchor.Extender{
235			Position: anchor.After,
236			Texter:   anchor.Text("#"),
237		},
238	}
239	md := CreateGoldmark(extenders...)
240	context := parser.NewContext()
241	// we do the Parse/Render steps manually to get a chance to examine the AST
242	btext := []byte(text)
243	doc := md.Parser().Parse(gtext.NewReader(btext), parser.WithContext(context))
244	metaData := meta.Get(context)
245
246	// title:
247	// 1. if specified in frontmatter, use that
248	title, err := toString(metaData["title"])
249	if err != nil {
250		return &parsed, fmt.Errorf("front-matter field (%s): %w", "title", err)
251	}
252	// 2. If an <h1> is found before a <p> or other heading is found, use that
253	if title == "" {
254		title = AstTitle(doc, btext, true)
255	}
256	// 3. else, set it to nothing (slug should get used later down the line)
257	// this is implicit since it's already ""
258	parsed.MetaData.Title = title
259
260	// only handle toc after the title is extracted (if it's getting extracted)
261	mtoc, err := toToc(metaData["toc"])
262	if err != nil {
263		return &parsed, fmt.Errorf("front-matter field (%s): %w", "toc", err)
264	}
265	if mtoc >= 0 {
266		err = AstToc(doc, btext, mtoc)
267		if err != nil {
268			return &parsed, fmt.Errorf("error generating toc: %w", err)
269		}
270	}
271
272	description, err := toString(metaData["description"])
273	if err != nil {
274		return &parsed, fmt.Errorf("front-matter field (%s): %w", "description", err)
275	}
276	parsed.MetaData.Description = description
277
278	layout, err := toString(metaData["layout"])
279	if err != nil {
280		return &parsed, fmt.Errorf("front-matter field (%s): %w", "layout", err)
281	}
282	parsed.MetaData.Layout = layout
283
284	image, err := toString(metaData["image"])
285	if err != nil {
286		return &parsed, fmt.Errorf("front-matter field (%s): %w", "image", err)
287	}
288	parsed.MetaData.Image = image
289
290	card, err := toString(metaData["card"])
291	if err != nil {
292		return &parsed, fmt.Errorf("front-matter field (%s): %w", "card", err)
293	}
294	parsed.MetaData.ImageCard = card
295
296	hidden, err := toBool(metaData["draft"], false)
297	if err != nil {
298		return &parsed, fmt.Errorf("front-matter field (%s): %w", "draft", err)
299	}
300	parsed.MetaData.Hidden = hidden
301
302	withStyles, err := toBool(metaData["with_styles"], true)
303	if err != nil {
304		return &parsed, fmt.Errorf("front-matter field (%s): %w", "with_style", err)
305	}
306	parsed.MetaData.WithStyles = withStyles
307
308	favicon, err := toString(metaData["favicon"])
309	if err != nil {
310		return &parsed, fmt.Errorf("front-matter field (%s): %w", "favicon", err)
311	}
312	parsed.MetaData.Favicon = favicon
313
314	var publishAt *time.Time = nil
315	date, err := toString(metaData["date"])
316	if err != nil {
317		return &parsed, fmt.Errorf("front-matter field (%s): %w", "date", err)
318	}
319
320	if date != "" {
321		nextDate, err := dateparse.ParseStrict(date)
322		if err != nil {
323			return &parsed, err
324		}
325		publishAt = &nextDate
326	}
327	parsed.MetaData.PublishAt = publishAt
328
329	orderedMetaData := meta.GetItems(context)
330
331	nav, err := toLinks(orderedMetaData)
332	if err != nil {
333		return &parsed, err
334	}
335	parsed.MetaData.Nav = nav
336
337	aliases, err := toAliases(metaData["aliases"])
338	if err != nil {
339		return &parsed, err
340	}
341	parsed.MetaData.Aliases = aliases
342
343	rtags := metaData["tags"]
344	tags, err := toTags(rtags)
345	if err != nil {
346		return &parsed, err
347	}
348	// fill from hashtag ASTs as fallback
349	if rtags == nil {
350		tags = AstTags(doc)
351	}
352	parsed.MetaData.Tags = tags
353
354	// Rendering happens last to allow any of the previous steps to manipulate
355	// the AST.
356	var buf bytes.Buffer
357	if err := md.Renderer().Render(&buf, btext, doc); err != nil {
358		return &parsed, err
359	}
360	parsed.Html = policy.Sanitize(buf.String())
361
362	return &parsed, nil
363}
364
365func AstTags(doc ast.Node) []string {
366	var tags []string
367	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
368		switch n.Kind() {
369		// ignore hashtags inside of these sections
370		case ast.KindBlockquote, ast.KindCodeBlock, ast.KindCodeSpan:
371			return ast.WalkSkipChildren, nil
372		// register hashtags
373		case hashtag.Kind:
374			t := n.(*hashtag.Node)
375			if entering { // only add each tag once
376				tags = append(tags, string(t.Tag))
377			}
378		}
379		// out-of-switch default
380		return ast.WalkContinue, nil
381	})
382	if err != nil {
383		panic(err) // unreachable
384	}
385
386	// sort and deduplicate results
387	dedupe := removeDuplicateStr(tags)
388	return dedupe
389}
390
391// https://stackoverflow.com/a/66751055
392func removeDuplicateStr(strSlice []string) []string {
393	allKeys := make(map[string]bool)
394	list := []string{}
395	for _, item := range strSlice {
396		if _, value := allKeys[item]; !value {
397			allKeys[item] = true
398			list = append(list, item)
399		}
400	}
401	return list
402}
403
404// AstTitle extracts the title (if any) from a parsed markdown document.
405//
406// If "clean" is true, it will also remove the heading node from the AST.
407func AstTitle(doc ast.Node, src []byte, clean bool) string {
408	out := ""
409	err := ast.Walk(doc, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
410		if n.Kind() == ast.KindHeading {
411			if h := n.(*ast.Heading); h.Level == 1 {
412				if clean {
413					p := h.Parent()
414					p.RemoveChild(p, n)
415				}
416				out = string(h.Lines().Value(src))
417			}
418			return ast.WalkStop, nil
419		}
420		if ast.IsParagraph(n) {
421			return ast.WalkStop, nil
422		}
423		return ast.WalkContinue, nil
424	})
425	if err != nil {
426		panic(err) // unreachable
427	}
428	return out
429}
430
431func AstToc(doc ast.Node, src []byte, mtoc int) error {
432	var tree *toc.TOC
433	if mtoc >= 0 {
434		var err error
435		if mtoc > 0 {
436			tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2), toc.MaxDepth(mtoc+1))
437		} else {
438			tree, err = toc.Inspect(doc, src, toc.Compact(true), toc.MinDepth(2))
439		}
440		if err != nil {
441			return err
442		}
443		if tree == nil {
444			return nil // no headings?
445		}
446	}
447	list := toc.RenderList(tree)
448	if list == nil {
449		return nil // no headings
450	}
451
452	list.SetAttributeString("id", []byte("toc-list"))
453
454	// generate # toc
455	heading := ast.NewHeading(2)
456	heading.SetAttributeString("id", []byte("toc"))
457	heading.AppendChild(heading, ast.NewString([]byte("Table of Contents")))
458
459	// insert
460	doc.InsertBefore(doc, doc.FirstChild(), list)
461	doc.InsertBefore(doc, doc.FirstChild(), heading)
462	return nil
463}