repos / pico

pico services - prose.sh, pastes.sh, imgs.sh, feeds.sh, pgs.sh
git clone https://github.com/picosh/pico.git

pico / shared
Eric Bower · 29 Nov 24

analytics.go

  1package shared
  2
  3import (
  4	"context"
  5	"crypto/hmac"
  6	"crypto/sha256"
  7	"encoding/hex"
  8	"encoding/json"
  9	"errors"
 10	"fmt"
 11	"log/slog"
 12	"net"
 13	"net/http"
 14	"net/url"
 15	"strings"
 16	"time"
 17
 18	"github.com/picosh/pico/db"
 19	"github.com/picosh/utils/pipe/metrics"
 20	"github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
 21	"github.com/x-way/crawlerdetect"
 22)
 23
 24func HmacString(secret, data string) string {
 25	hmacer := hmac.New(sha256.New, []byte(secret))
 26	hmacer.Write([]byte(data))
 27	dataHmac := hmacer.Sum(nil)
 28	return hex.EncodeToString(dataHmac)
 29}
 30
 31func trackableUserAgent(agent string) error {
 32	// dont store requests from bots
 33	if crawlerdetect.IsCrawler(agent) {
 34		return fmt.Errorf(
 35			"request is likely from a bot (User-Agent: %s)",
 36			CleanUserAgent(agent),
 37		)
 38	}
 39	return nil
 40}
 41
 42func trackableRequest(r *http.Request) error {
 43	agent := r.UserAgent()
 44	return trackableUserAgent(agent)
 45}
 46
 47func cleanIpAddress(ip string) (string, error) {
 48	host, _, err := net.SplitHostPort(ip)
 49	if err != nil {
 50		host = ip
 51	}
 52	// /24 IPv4 subnet mask
 53	// /64 IPv6 subnet mask
 54	anonymizer := ipanonymizer.NewWithMask(
 55		net.CIDRMask(24, 32),
 56		net.CIDRMask(64, 128),
 57	)
 58	anonIp, err := anonymizer.IPString(host)
 59	return anonIp, err
 60}
 61
 62func cleanUrl(orig string) (string, string) {
 63	u, err := url.Parse(orig)
 64	if err != nil {
 65		return "", ""
 66	}
 67	return u.Host, u.Path
 68}
 69
 70func cleanUrlFromRequest(r *http.Request) (string, string) {
 71	host := r.Header.Get("x-forwarded-host")
 72	if host == "" {
 73		host = r.URL.Host
 74	}
 75	if host == "" {
 76		host = r.Host
 77	}
 78	// we don't want query params in the url for security reasons
 79	return host, r.URL.Path
 80}
 81
 82func CleanUserAgent(ua string) string {
 83	// truncate user-agent because http headers have no text limit
 84	if len(ua) > 1000 {
 85		return ua[:1000]
 86	}
 87	return strings.TrimSpace(ua)
 88}
 89
 90func filterIp(host string) (string, error) {
 91	if host == "" {
 92		return "", nil
 93	}
 94	addr := net.ParseIP(host)
 95	if addr != nil {
 96		return "", fmt.Errorf("host is an ip")
 97	}
 98	return host, nil
 99}
100
101func CleanReferer(raw string) (string, error) {
102	ref := raw
103	if ref == "" {
104		return "", nil
105	}
106	// referer sometimes dont include scheme but we need it
107	if !strings.HasPrefix(ref, "http") {
108		ref = "https://" + ref
109	}
110	// we only want to store host for security reasons
111	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
112	u, err := url.Parse(ref)
113	if err != nil {
114		return "", err
115	}
116	hostname := u.Hostname()
117	hostname, _ = filterIp(hostname)
118	hostname = strings.TrimSpace(strings.ToLower(hostname))
119	return hostname, err
120}
121
122func CleanHost(raw string) (string, error) {
123	prep := strings.TrimSpace(strings.ToLower(raw))
124	if prep == "" {
125		return "", fmt.Errorf("host is blank")
126	}
127	// hosts dont usually include scheme but we need it
128	if !strings.HasPrefix(prep, "http") {
129		prep = "https://" + prep
130	}
131	// no clue why but our prod data contains periods
132	prep = strings.Trim(prep, ".")
133	// we only want to store host for security reasons
134	// https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
135	u, err := url.Parse(prep)
136	if err != nil {
137		return raw, err
138	}
139	host := u.Hostname()
140	host, err = filterIp(host)
141	return host, err
142}
143
144var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
145
146func AnalyticsVisitFromVisit(visit *db.AnalyticsVisits, dbpool db.DB, secret string) error {
147	if !dbpool.HasFeatureForUser(visit.UserID, "analytics") {
148		return ErrAnalyticsDisabled
149	}
150
151	err := trackableUserAgent(visit.UserAgent)
152	if err != nil {
153		return err
154	}
155
156	ipAddress, err := cleanIpAddress(visit.IpAddress)
157	if err != nil {
158		return err
159	}
160	visit.IpAddress = HmacString(secret, ipAddress)
161	_, path := cleanUrl(visit.Path)
162	visit.Path = path
163
164	referer, err := CleanReferer(visit.Referer)
165	if err != nil {
166		return err
167	}
168	visit.Referer = referer
169
170	hostname, err := CleanHost(visit.Host)
171	if err != nil {
172		return err
173	}
174	visit.Host = hostname
175	visit.UserAgent = CleanUserAgent(visit.UserAgent)
176
177	return nil
178}
179
180func ipFromRequest(r *http.Request) string {
181	// https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
182	ipOrig := r.Header.Get("x-forwarded-for")
183	if ipOrig == "" {
184		ipOrig = r.RemoteAddr
185	}
186	// probably means this is a web tunnel
187	if ipOrig == "" || ipOrig == "@" {
188		sshCtx, err := GetSshCtx(r)
189		if err == nil {
190			ipOrig = sshCtx.RemoteAddr().String()
191		}
192	}
193
194	return ipOrig
195}
196
197func AnalyticsVisitFromRequest(r *http.Request, dbpool db.DB, userID string) (*db.AnalyticsVisits, error) {
198	if !dbpool.HasFeatureForUser(userID, "analytics") {
199		return nil, ErrAnalyticsDisabled
200	}
201
202	err := trackableRequest(r)
203	if err != nil {
204		return nil, err
205	}
206
207	ipAddress := ipFromRequest(r)
208	host, path := cleanUrlFromRequest(r)
209
210	return &db.AnalyticsVisits{
211		UserID:    userID,
212		Host:      host,
213		Path:      path,
214		IpAddress: ipAddress,
215		UserAgent: r.UserAgent(),
216		Referer:   r.Referer(),
217		Status:    http.StatusOK,
218	}, nil
219}
220
221func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
222	drain := metrics.RegisterReconnectMetricRecorder(
223		context.Background(),
224		logger,
225		NewPicoPipeClient(),
226		100,
227		10*time.Millisecond,
228	)
229
230	for visit := range ch {
231		data, err := json.Marshal(visit)
232		if err != nil {
233			logger.Error("could not json marshall visit record", "err", err)
234			continue
235		}
236
237		data = append(data, '\n')
238
239		_, err = drain.Write(data)
240		if err != nil {
241			logger.Error("could not write to metric-drain", "err", err)
242		}
243	}
244}