repos / pico

pico services - prose.sh, pastes.sh, imgs.sh, feeds.sh, pgs.sh
git clone https://github.com/picosh/pico.git

pico / feeds
Eric Bower · 13 Oct 24

cron.go

  1package feeds
  2
  3import (
  4	"crypto/tls"
  5	"errors"
  6	"fmt"
  7	html "html/template"
  8	"io"
  9	"log/slog"
 10	"math"
 11	"net/http"
 12	"strings"
 13	"text/template"
 14	"time"
 15
 16	"github.com/mmcdole/gofeed"
 17	"github.com/picosh/pico/db"
 18	"github.com/picosh/pico/shared"
 19	"github.com/sendgrid/sendgrid-go"
 20	"github.com/sendgrid/sendgrid-go/helpers/mail"
 21)
 22
 23var ErrNoRecentArticles = errors.New("no recent articles")
 24
 25type UserAgentTransport struct {
 26	http.RoundTripper
 27}
 28
 29func (c *UserAgentTransport) RoundTrip(r *http.Request) (*http.Response, error) {
 30	userAgent := "linux:feeds:v2 (by /u/pico-sh)"
 31	r.Header.Set("User-Agent", userAgent)
 32	r.Header.Set("Accept", "*/*")
 33	return c.RoundTripper.RoundTrip(r)
 34}
 35
 36var httpClient = http.Client{
 37	Transport: &UserAgentTransport{
 38		&http.Transport{
 39			TLSClientConfig: &tls.Config{},
 40		},
 41	},
 42}
 43
 44type FeedItemTmpl struct {
 45	GUID        string
 46	Title       string
 47	Link        string
 48	PublishedAt *time.Time
 49	Content     html.HTML
 50	Description html.HTML
 51}
 52
 53type Feed struct {
 54	Title       string
 55	Link        string
 56	Description string
 57	Items       []*FeedItemTmpl
 58	FeedItems   []*gofeed.Item
 59}
 60
 61type DigestFeed struct {
 62	Feeds        []*Feed
 63	Options      DigestOptions
 64	KeepAliveURL string
 65	DaysLeft     string
 66	ShowBanner   bool
 67}
 68
 69type DigestOptions struct {
 70	InlineContent bool
 71}
 72
 73func itemToTemplate(item *gofeed.Item) *FeedItemTmpl {
 74	return &FeedItemTmpl{
 75		Title:       item.Title,
 76		Link:        item.Link,
 77		PublishedAt: item.PublishedParsed,
 78		Description: html.HTML(item.Description),
 79		Content:     html.HTML(item.Content),
 80	}
 81}
 82
 83func digestOptionToTime(lastDigest time.Time, interval string) time.Time {
 84	day := 24 * time.Hour
 85	if interval == "10min" {
 86		return lastDigest.Add(10 * time.Minute)
 87	} else if interval == "1hour" {
 88		return lastDigest.Add(1 * time.Hour)
 89	} else if interval == "6hour" {
 90		return lastDigest.Add(6 * time.Hour)
 91	} else if interval == "12hour" {
 92		return lastDigest.Add(12 * time.Hour)
 93	} else if interval == "1day" || interval == "" {
 94		return lastDigest.Add(1 * day)
 95	} else if interval == "7day" {
 96		return lastDigest.Add(7 * day)
 97	} else if interval == "30day" {
 98		return lastDigest.Add(30 * day)
 99	} else {
100		return lastDigest
101	}
102}
103
104// see if this feed item should be emailed to user.
105func isValidItem(item *gofeed.Item, feedItems []*db.FeedItem) bool {
106	for _, feedItem := range feedItems {
107		if item.GUID == feedItem.GUID {
108			return false
109		}
110	}
111
112	return true
113}
114
115type Fetcher struct {
116	cfg *shared.ConfigSite
117	db  db.DB
118}
119
120func NewFetcher(dbpool db.DB, cfg *shared.ConfigSite) *Fetcher {
121	return &Fetcher{
122		db:  dbpool,
123		cfg: cfg,
124	}
125}
126
127func (f *Fetcher) Validate(post *db.Post, parsed *shared.ListParsedText) error {
128	lastDigest := post.Data.LastDigest
129	if lastDigest == nil {
130		return nil
131	}
132
133	now := time.Now().UTC()
134
135	expiresAt := post.ExpiresAt
136	if expiresAt != nil {
137		if post.ExpiresAt.Before(now) {
138			return fmt.Errorf("(%s) post has expired, skipping", post.ExpiresAt.Format(time.RFC3339))
139		}
140	}
141
142	digestAt := digestOptionToTime(*lastDigest, parsed.DigestInterval)
143	if digestAt.After(now) {
144		return fmt.Errorf("(%s) not time to digest, skipping", digestAt.Format(time.RFC3339))
145	}
146	return nil
147}
148
149func (f *Fetcher) RunPost(logger *slog.Logger, user *db.User, post *db.Post) error {
150	logger = logger.With("filename", post.Filename)
151	logger.Info("running feed post")
152
153	parsed := shared.ListParseText(post.Text)
154
155	logger.Info("last digest at", "lastDigest", post.Data.LastDigest.Format(time.RFC3339))
156	err := f.Validate(post, parsed)
157	if err != nil {
158		logger.Info("validation failed", "err", err)
159		return nil
160	}
161
162	urls := []string{}
163	for _, item := range parsed.Items {
164		url := ""
165		if item.IsText {
166			url = item.Value
167		} else if item.IsURL {
168			url = string(item.URL)
169		}
170
171		if url == "" {
172			continue
173		}
174
175		urls = append(urls, url)
176	}
177
178	msgBody, err := f.FetchAll(logger, urls, parsed.InlineContent, user.Name, post)
179	if err != nil {
180		return err
181	}
182
183	subject := fmt.Sprintf("%s feed digest", post.Title)
184	err = f.SendEmail(logger, user.Name, parsed.Email, subject, msgBody)
185	if err != nil {
186		return err
187	}
188
189	now := time.Now().UTC()
190	if post.ExpiresAt == nil {
191		expiresAt := time.Now().AddDate(0, 6, 0)
192		post.ExpiresAt = &expiresAt
193	}
194	post.Data.LastDigest = &now
195	_, err = f.db.UpdatePost(post)
196	return err
197}
198
199func (f *Fetcher) RunUser(user *db.User) error {
200	logger := shared.LoggerWithUser(f.cfg.Logger, user)
201	posts, err := f.db.FindPostsForUser(&db.Pager{Num: 100}, user.ID, "feeds")
202	if err != nil {
203		return err
204	}
205
206	if len(posts.Data) > 0 {
207		logger.Info("found feed posts", "len", len(posts.Data))
208	}
209
210	for _, post := range posts.Data {
211		err = f.RunPost(logger, user, post)
212		if err != nil {
213			logger.Error("run post failed", "err", err)
214		}
215	}
216
217	return nil
218}
219
220func (f *Fetcher) ParseURL(fp *gofeed.Parser, url string) (*gofeed.Feed, error) {
221	req, err := http.NewRequest("GET", url, nil)
222	if err != nil {
223		return nil, err
224	}
225
226	resp, err := httpClient.Do(req)
227	if err != nil {
228		return nil, err
229	}
230
231	defer resp.Body.Close()
232	body, err := io.ReadAll(resp.Body)
233	if err != nil {
234		return nil, err
235	}
236
237	if resp.StatusCode < 200 || resp.StatusCode > 300 {
238		return nil, fmt.Errorf("fetching feed resulted in an error: %s %s", resp.Status, body)
239	}
240
241	feed, err := fp.ParseString(string(body))
242	if err != nil {
243		return nil, err
244	}
245
246	return feed, nil
247}
248
249func (f *Fetcher) Fetch(logger *slog.Logger, fp *gofeed.Parser, url string, username string, feedItems []*db.FeedItem) (*Feed, error) {
250	logger.Info("fetching feed", "url", url)
251
252	feed, err := f.ParseURL(fp, url)
253	if err != nil {
254		return nil, err
255	}
256
257	feedTmpl := &Feed{
258		Title:       feed.Title,
259		Description: feed.Description,
260		Link:        feed.Link,
261	}
262
263	items := []*FeedItemTmpl{}
264	gofeedItems := []*gofeed.Item{}
265	// we only want to return feed items published since the last digest time we fetched
266	for _, item := range feed.Items {
267		if item == nil {
268			continue
269		}
270
271		if !isValidItem(item, feedItems) {
272			continue
273		}
274
275		gofeedItems = append(gofeedItems, item)
276		items = append(items, itemToTemplate(item))
277	}
278
279	if len(items) == 0 {
280		return nil, fmt.Errorf(
281			"%s %w, skipping",
282			url,
283			ErrNoRecentArticles,
284		)
285	}
286
287	feedTmpl.FeedItems = gofeedItems
288	feedTmpl.Items = items
289	return feedTmpl, nil
290}
291
292func (f *Fetcher) PrintText(feedTmpl *DigestFeed) (string, error) {
293	ts, err := template.ParseFiles(
294		f.cfg.StaticPath("html/digest_text.page.tmpl"),
295	)
296
297	if err != nil {
298		return "", err
299	}
300
301	w := new(strings.Builder)
302	err = ts.Execute(w, feedTmpl)
303	if err != nil {
304		return "", err
305	}
306
307	return w.String(), nil
308}
309
310func (f *Fetcher) PrintHtml(feedTmpl *DigestFeed) (string, error) {
311	ts, err := html.ParseFiles(
312		f.cfg.StaticPath("html/digest.page.tmpl"),
313	)
314
315	if err != nil {
316		return "", err
317	}
318
319	w := new(strings.Builder)
320	err = ts.Execute(w, feedTmpl)
321	if err != nil {
322		return "", err
323	}
324
325	return w.String(), nil
326}
327
328type MsgBody struct {
329	Html string
330	Text string
331}
332
333func (f *Fetcher) FetchAll(logger *slog.Logger, urls []string, inlineContent bool, username string, post *db.Post) (*MsgBody, error) {
334	fp := gofeed.NewParser()
335	daysLeft := ""
336	showBanner := false
337	if post.ExpiresAt != nil {
338		diff := time.Until(*post.ExpiresAt)
339		daysLeftInt := int(math.Ceil(diff.Hours() / 24))
340		daysLeft = fmt.Sprintf("%d", daysLeftInt)
341		if daysLeftInt <= 30 {
342			showBanner = true
343		}
344	}
345	feeds := &DigestFeed{
346		KeepAliveURL: fmt.Sprintf("https://feeds.pico.sh/keep-alive/%s", post.ID),
347		DaysLeft:     daysLeft,
348		ShowBanner:   showBanner,
349		Options:      DigestOptions{InlineContent: inlineContent},
350	}
351	feedItems, err := f.db.FindFeedItemsByPostID(post.ID)
352	if err != nil {
353		return nil, err
354	}
355
356	for _, url := range urls {
357		feedTmpl, err := f.Fetch(logger, fp, url, username, feedItems)
358		if err != nil {
359			if errors.Is(err, ErrNoRecentArticles) {
360				logger.Info("no recent articles", "err", err)
361			} else {
362				logger.Error("fetch error", "err", err)
363			}
364			continue
365		}
366		feeds.Feeds = append(feeds.Feeds, feedTmpl)
367	}
368
369	if len(feeds.Feeds) == 0 {
370		return nil, fmt.Errorf("(%s) %w, skipping email", username, ErrNoRecentArticles)
371	}
372
373	fdi := []*db.FeedItem{}
374	for _, feed := range feeds.Feeds {
375		for _, item := range feed.FeedItems {
376			fdi = append(fdi, &db.FeedItem{
377				PostID: post.ID,
378				GUID:   item.GUID,
379				Data: db.FeedItemData{
380					Title:       item.Title,
381					Description: item.Description,
382					Content:     item.Content,
383					Link:        item.Link,
384					PublishedAt: item.PublishedParsed,
385				},
386			})
387		}
388	}
389	err = f.db.InsertFeedItems(post.ID, fdi)
390	if err != nil {
391		return nil, err
392	}
393
394	text, err := f.PrintText(feeds)
395	if err != nil {
396		return nil, err
397	}
398
399	html, err := f.PrintHtml(feeds)
400	if err != nil {
401		return nil, err
402	}
403
404	return &MsgBody{
405		Text: text,
406		Html: html,
407	}, nil
408}
409
410func (f *Fetcher) SendEmail(logger *slog.Logger, username, email string, subject string, msg *MsgBody) error {
411	if email == "" {
412		return fmt.Errorf("(%s) does not have an email associated with their feed post", username)
413	}
414
415	from := mail.NewEmail("team pico", shared.DefaultEmail)
416	to := mail.NewEmail(username, email)
417
418	// f.logger.Infof("message body (%s)", plainTextContent)
419
420	message := mail.NewSingleEmail(from, subject, to, msg.Text, msg.Html)
421	client := sendgrid.NewSendClient(f.cfg.SendgridKey)
422
423	logger.Info("sending email digest")
424	response, err := client.Send(message)
425	if err != nil {
426		return err
427	}
428
429	// f.logger.Infof("(%s) email digest response: %v", username, response)
430
431	if len(response.Headers["X-Message-Id"]) > 0 {
432		logger.Info(
433			"successfully sent email digest",
434			"email", email,
435			"x-message-id", response.Headers["X-Message-Id"][0],
436		)
437	} else {
438		logger.Error(
439			"could not find x-message-id, which means sending an email failed",
440			"email", email,
441		)
442	}
443
444	return nil
445}
446
447func (f *Fetcher) Run(logger *slog.Logger) error {
448	users, err := f.db.FindUsers()
449	if err != nil {
450		return err
451	}
452
453	for _, user := range users {
454		err := f.RunUser(user)
455		if err != nil {
456			logger.Error("run user failed", "err", err)
457			continue
458		}
459	}
460
461	return nil
462}
463
464func (f *Fetcher) Loop() {
465	logger := f.cfg.Logger
466	for {
467		logger.Info("running digest emailer")
468
469		err := f.Run(logger)
470		if err != nil {
471			logger.Error("run failed", "err", err)
472		}
473
474		logger.Info("digest emailer finished, waiting 10 mins")
475		time.Sleep(10 * time.Minute)
476	}
477}