Eric Bower
·
23 Sep 24
listparser.go
1package shared
2
3import (
4 "fmt"
5 "html/template"
6 "log"
7 "regexp"
8 "strconv"
9 "strings"
10 "time"
11
12 "slices"
13
14 "github.com/araddon/dateparse"
15)
16
17var reIndent = regexp.MustCompile(`^[[:blank:]]+`)
18var DigestIntervalOpts = []string{
19 "10min",
20 "1hour",
21 "6hour",
22 "12hour",
23 "1day",
24 "7day",
25 "30day",
26}
27
28type ListParsedText struct {
29 Items []*ListItem
30 *ListMetaData
31}
32
33type ListItem struct {
34 Value string
35 URL template.URL
36 Variable string
37 IsURL bool
38 IsBlock bool
39 IsText bool
40 IsHeaderOne bool
41 IsHeaderTwo bool
42 IsImg bool
43 IsPre bool
44 Indent int
45}
46
47type ListMetaData struct {
48 PublishAt *time.Time
49 Title string
50 Description string
51 Layout string
52 Tags []string
53 ListType string // https://developer.mozilla.org/en-US/docs/Web/CSS/list-style-type
54 DigestInterval string
55 Email string
56 InlineContent bool // allows content inlining to be disabled in feeds.pico.sh emails
57}
58
59var urlToken = "=>"
60var blockToken = ">"
61var varToken = "=:"
62var imgToken = "=<"
63var headerOneToken = "#"
64var headerTwoToken = "##"
65var preToken = "```"
66
67type SplitToken struct {
68 Key string
69 Value string
70}
71
72func TextToSplitToken(text string) *SplitToken {
73 txt := strings.Trim(text, " ")
74 token := &SplitToken{}
75 word := ""
76 for i, c := range txt {
77 if c == ' ' {
78 token.Key = strings.Trim(word, " ")
79 token.Value = strings.Trim(txt[i:], " ")
80 break
81 } else {
82 word += string(c)
83 }
84 }
85
86 if token.Key == "" {
87 token.Key = strings.Trim(text, " ")
88 token.Value = strings.Trim(text, " ")
89 }
90
91 return token
92}
93
94func SplitByNewline(text string) []string {
95 return strings.Split(strings.ReplaceAll(text, "\r\n", "\n"), "\n")
96}
97
98func PublishAtDate(date string) (*time.Time, error) {
99 t, err := dateparse.ParseStrict(date)
100 return &t, err
101}
102
103func TokenToMetaField(meta *ListMetaData, token *SplitToken) error {
104 if token.Key == "publish_at" {
105 publishAt, err := PublishAtDate(token.Value)
106 if err == nil {
107 meta.PublishAt = publishAt
108 }
109 } else if token.Key == "title" {
110 meta.Title = token.Value
111 } else if token.Key == "description" {
112 meta.Description = token.Value
113 } else if token.Key == "list_type" {
114 meta.ListType = token.Value
115 } else if token.Key == "tags" {
116 tags := strings.Split(token.Value, ",")
117 meta.Tags = make([]string, 0)
118 for _, tag := range tags {
119 meta.Tags = append(meta.Tags, strings.TrimSpace(tag))
120 }
121 } else if token.Key == "layout" {
122 meta.Layout = token.Value
123 } else if token.Key == "digest_interval" {
124 if !slices.Contains(DigestIntervalOpts, token.Value) {
125 return fmt.Errorf(
126 "(%s) is not a valid option, choose from [%s]",
127 token.Value,
128 strings.Join(DigestIntervalOpts, ","),
129 )
130 }
131 meta.DigestInterval = token.Value
132 } else if token.Key == "email" {
133 meta.Email = token.Value
134 } else if token.Key == "inline_content" {
135 v, err := strconv.ParseBool(token.Value)
136 if err != nil {
137 // its empty or its improperly configured, just send the content
138 v = true
139 }
140 meta.InlineContent = v
141 }
142
143 return nil
144}
145
146func KeyAsValue(token *SplitToken) string {
147 if token.Value == "" {
148 return token.Key
149 }
150 return token.Value
151}
152
153func parseItem(meta *ListMetaData, li *ListItem, prevItem *ListItem, pre bool, mod int) (bool, bool, int) {
154 skip := false
155
156 if strings.HasPrefix(li.Value, preToken) {
157 pre = !pre
158 if pre {
159 nextValue := strings.Replace(li.Value, preToken, "", 1)
160 li.IsPre = true
161 li.Value = nextValue
162 } else {
163 skip = true
164 }
165 } else if pre {
166 nextValue := strings.Replace(li.Value, preToken, "", 1)
167 prevItem.Value = fmt.Sprintf("%s\n%s", prevItem.Value, nextValue)
168 skip = true
169 } else if strings.HasPrefix(li.Value, urlToken) {
170 li.IsURL = true
171 split := TextToSplitToken(strings.Replace(li.Value, urlToken, "", 1))
172 li.URL = template.URL(split.Key)
173 li.Value = KeyAsValue(split)
174 } else if strings.HasPrefix(li.Value, blockToken) {
175 li.IsBlock = true
176 li.Value = strings.Replace(li.Value, blockToken, "", 1)
177 } else if strings.HasPrefix(li.Value, imgToken) {
178 li.IsImg = true
179 split := TextToSplitToken(strings.Replace(li.Value, imgToken, "", 1))
180 key := split.Key
181 li.URL = template.URL(key)
182 li.Value = KeyAsValue(split)
183 } else if strings.HasPrefix(li.Value, varToken) {
184 split := TextToSplitToken(strings.Replace(li.Value, varToken, "", 1))
185 err := TokenToMetaField(meta, split)
186 if err != nil {
187 log.Println(err)
188 }
189 } else if strings.HasPrefix(li.Value, headerTwoToken) {
190 li.IsHeaderTwo = true
191 li.Value = strings.Replace(li.Value, headerTwoToken, "", 1)
192 } else if strings.HasPrefix(li.Value, headerOneToken) {
193 li.IsHeaderOne = true
194 li.Value = strings.Replace(li.Value, headerOneToken, "", 1)
195 } else if reIndent.MatchString(li.Value) {
196 trim := reIndent.ReplaceAllString(li.Value, "")
197 old := len(li.Value)
198 li.Value = trim
199
200 pre, skip, _ = parseItem(meta, li, prevItem, pre, mod)
201 if prevItem != nil && prevItem.Indent == 0 {
202 mod = old - len(trim)
203 li.Indent = 1
204 } else {
205 numerator := old - len(trim)
206 if mod == 0 {
207 li.Indent = 1
208 } else {
209 li.Indent = numerator / mod
210 }
211 }
212 } else {
213 li.IsText = true
214 }
215
216 return pre, skip, mod
217}
218
219func ListParseText(text string) *ListParsedText {
220 textItems := SplitByNewline(text)
221 items := []*ListItem{}
222 meta := ListMetaData{
223 ListType: "disc",
224 Tags: []string{},
225 Layout: "default",
226 }
227 pre := false
228 skip := false
229 mod := 0
230 var prevItem *ListItem
231
232 for _, t := range textItems {
233 if len(items) > 0 {
234 prevItem = items[len(items)-1]
235 }
236
237 li := ListItem{
238 Value: t,
239 }
240
241 pre, skip, mod = parseItem(&meta, &li, prevItem, pre, mod)
242
243 if li.IsText && li.Value == "" {
244 skip = true
245 }
246
247 if !skip {
248 items = append(items, &li)
249 }
250 }
251
252 return &ListParsedText{
253 Items: items,
254 ListMetaData: &meta,
255 }
256}