repos / pico

pico services - prose.sh, pastes.sh, imgs.sh, feeds.sh, pgs.sh
git clone https://github.com/picosh/pico.git

pico / shared
Eric Bower · 14 May 24

analytics.go

  1package shared
  2
  3import (
  4	"crypto/hmac"
  5	"crypto/sha256"
  6	"encoding/hex"
  7	"errors"
  8	"fmt"
  9	"log/slog"
 10	"net"
 11	"net/http"
 12	"net/url"
 13
 14	"github.com/picosh/pico/db"
 15	"github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
 16	"github.com/x-way/crawlerdetect"
 17)
 18
 19func HmacString(secret, data string) string {
 20	hmacer := hmac.New(sha256.New, []byte(secret))
 21	hmacer.Write([]byte(data))
 22	dataHmac := hmacer.Sum(nil)
 23	return hex.EncodeToString(dataHmac)
 24}
 25
 26func trackableRequest(r *http.Request) error {
 27	agent := r.UserAgent()
 28	// dont store requests from bots
 29	if crawlerdetect.IsCrawler(agent) {
 30		return fmt.Errorf(
 31			"request is likely from a bot (User-Agent: %s)",
 32			cleanUserAgent(agent),
 33		)
 34	}
 35	return nil
 36}
 37
 38func cleanIpAddress(ip string) (string, error) {
 39	host, _, err := net.SplitHostPort(ip)
 40	if err != nil {
 41		host = ip
 42	}
 43	// /24 IPv4 subnet mask
 44	// /64 IPv6 subnet mask
 45	anonymizer := ipanonymizer.NewWithMask(
 46		net.CIDRMask(24, 32),
 47		net.CIDRMask(64, 128),
 48	)
 49	anonIp, err := anonymizer.IPString(host)
 50	return anonIp, err
 51}
 52
 53func cleanUrl(r *http.Request) (string, string) {
 54	host := r.Header.Get("x-forwarded-host")
 55	if host == "" {
 56		host = r.URL.Host
 57	}
 58	// we don't want query params in the url for security reasons
 59	return host, r.URL.Path
 60}
 61
 62func cleanUserAgent(ua string) string {
 63	// truncate user-agent because http headers have no text limit
 64	if len(ua) > 1000 {
 65		return ua[:1000]
 66	}
 67	return ua
 68}
 69
 70func cleanReferer(ref string) (string, error) {
 71	// we only want to store host for security reasons
 72	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
 73	u, err := url.Parse(ref)
 74	if err != nil {
 75		return "", err
 76	}
 77	return u.Host, nil
 78}
 79
 80var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
 81
 82func AnalyticsVisitFromRequest(r *http.Request, userID string, secret string) (*db.AnalyticsVisits, error) {
 83	dbpool := GetDB(r)
 84	if !dbpool.HasFeatureForUser(userID, "analytics") {
 85		return nil, ErrAnalyticsDisabled
 86	}
 87
 88	err := trackableRequest(r)
 89	if err != nil {
 90		return nil, err
 91	}
 92
 93	// https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
 94	ipOrig := r.Header.Get("x-forwarded-for")
 95	if ipOrig == "" {
 96		ipOrig = r.RemoteAddr
 97	}
 98	// probably means this is a web tunnel
 99	if ipOrig == "" || ipOrig == "@" {
100		sshCtx, err := GetSshCtx(r)
101		if err == nil {
102			ipOrig = sshCtx.RemoteAddr().String()
103		}
104	}
105	ipAddress, err := cleanIpAddress(ipOrig)
106	if err != nil {
107		return nil, err
108	}
109	host, path := cleanUrl(r)
110
111	referer, err := cleanReferer(r.Referer())
112	if err != nil {
113		return nil, err
114	}
115
116	return &db.AnalyticsVisits{
117		UserID:    userID,
118		Host:      host,
119		Path:      path,
120		IpAddress: HmacString(secret, ipAddress),
121		UserAgent: cleanUserAgent(r.UserAgent()),
122		Referer:   referer,
123		Status:    http.StatusOK,
124	}, nil
125}
126
127func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
128	for view := range ch {
129		err := dbpool.InsertVisit(view)
130		if err != nil {
131			logger.Error("could not insert view record", "err", err)
132		}
133	}
134}