repos / pico

pico services - prose.sh, pastes.sh, imgs.sh, feeds.sh, pgs.sh
git clone https://github.com/picosh/pico.git

pico / shared
Antonio Mika · 18 Nov 24

analytics.go

  1package shared
  2
  3import (
  4	"context"
  5	"crypto/hmac"
  6	"crypto/sha256"
  7	"encoding/hex"
  8	"encoding/json"
  9	"errors"
 10	"fmt"
 11	"log/slog"
 12	"net"
 13	"net/http"
 14	"net/url"
 15	"time"
 16
 17	"github.com/picosh/pico/db"
 18	"github.com/picosh/utils/pipe/metrics"
 19	"github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
 20	"github.com/x-way/crawlerdetect"
 21)
 22
 23func HmacString(secret, data string) string {
 24	hmacer := hmac.New(sha256.New, []byte(secret))
 25	hmacer.Write([]byte(data))
 26	dataHmac := hmacer.Sum(nil)
 27	return hex.EncodeToString(dataHmac)
 28}
 29
 30func trackableUserAgent(agent string) error {
 31	// dont store requests from bots
 32	if crawlerdetect.IsCrawler(agent) {
 33		return fmt.Errorf(
 34			"request is likely from a bot (User-Agent: %s)",
 35			cleanUserAgent(agent),
 36		)
 37	}
 38	return nil
 39}
 40
 41func trackableRequest(r *http.Request) error {
 42	agent := r.UserAgent()
 43	return trackableUserAgent(agent)
 44}
 45
 46func cleanIpAddress(ip string) (string, error) {
 47	host, _, err := net.SplitHostPort(ip)
 48	if err != nil {
 49		host = ip
 50	}
 51	// /24 IPv4 subnet mask
 52	// /64 IPv6 subnet mask
 53	anonymizer := ipanonymizer.NewWithMask(
 54		net.CIDRMask(24, 32),
 55		net.CIDRMask(64, 128),
 56	)
 57	anonIp, err := anonymizer.IPString(host)
 58	return anonIp, err
 59}
 60
 61func cleanUrl(orig string) (string, string) {
 62	u, err := url.Parse(orig)
 63	if err != nil {
 64		return "", ""
 65	}
 66	return u.Host, u.Path
 67}
 68
 69func cleanUrlFromRequest(r *http.Request) (string, string) {
 70	host := r.Header.Get("x-forwarded-host")
 71	if host == "" {
 72		host = r.URL.Host
 73	}
 74	if host == "" {
 75		host = r.Host
 76	}
 77	// we don't want query params in the url for security reasons
 78	return host, r.URL.Path
 79}
 80
 81func cleanUserAgent(ua string) string {
 82	// truncate user-agent because http headers have no text limit
 83	if len(ua) > 1000 {
 84		return ua[:1000]
 85	}
 86	return ua
 87}
 88
 89func cleanReferer(ref string) (string, error) {
 90	// we only want to store host for security reasons
 91	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
 92	u, err := url.Parse(ref)
 93	if err != nil {
 94		return "", err
 95	}
 96	return u.Host, nil
 97}
 98
 99var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
100
101func AnalyticsVisitFromVisit(visit *db.AnalyticsVisits, dbpool db.DB, secret string) error {
102	if !dbpool.HasFeatureForUser(visit.UserID, "analytics") {
103		return ErrAnalyticsDisabled
104	}
105
106	err := trackableUserAgent(visit.UserAgent)
107	if err != nil {
108		return err
109	}
110
111	ipAddress, err := cleanIpAddress(visit.IpAddress)
112	if err != nil {
113		return err
114	}
115	visit.IpAddress = HmacString(secret, ipAddress)
116	_, path := cleanUrl(visit.Path)
117	visit.Path = path
118
119	referer, err := cleanReferer(visit.Referer)
120	if err != nil {
121		return err
122	}
123	visit.Referer = referer
124	visit.UserAgent = cleanUserAgent(visit.UserAgent)
125
126	return nil
127}
128
129func ipFromRequest(r *http.Request) string {
130	// https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
131	ipOrig := r.Header.Get("x-forwarded-for")
132	if ipOrig == "" {
133		ipOrig = r.RemoteAddr
134	}
135	// probably means this is a web tunnel
136	if ipOrig == "" || ipOrig == "@" {
137		sshCtx, err := GetSshCtx(r)
138		if err == nil {
139			ipOrig = sshCtx.RemoteAddr().String()
140		}
141	}
142
143	return ipOrig
144}
145
146func AnalyticsVisitFromRequest(r *http.Request, dbpool db.DB, userID string) (*db.AnalyticsVisits, error) {
147	if !dbpool.HasFeatureForUser(userID, "analytics") {
148		return nil, ErrAnalyticsDisabled
149	}
150
151	err := trackableRequest(r)
152	if err != nil {
153		return nil, err
154	}
155
156	ipAddress := ipFromRequest(r)
157	host, path := cleanUrlFromRequest(r)
158
159	return &db.AnalyticsVisits{
160		UserID:    userID,
161		Host:      host,
162		Path:      path,
163		IpAddress: ipAddress,
164		UserAgent: r.UserAgent(),
165		Referer:   r.Referer(),
166		Status:    http.StatusOK,
167	}, nil
168}
169
170func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
171	drain := metrics.RegisterReconnectMetricRecorder(
172		context.Background(),
173		logger,
174		NewPicoPipeClient(),
175		100,
176		10*time.Millisecond,
177	)
178
179	for visit := range ch {
180		data, err := json.Marshal(visit)
181		if err != nil {
182			logger.Error("could not json marshall visit record", "err", err)
183			continue
184		}
185
186		data = append(data, '\n')
187
188		_, err = drain.Write(data)
189		if err != nil {
190			logger.Error("could not write to metric-drain", "err", err)
191		}
192	}
193}