Eric Bower
·
14 May 24
analytics.go
1package shared
2
3import (
4 "crypto/hmac"
5 "crypto/sha256"
6 "encoding/hex"
7 "errors"
8 "fmt"
9 "log/slog"
10 "net"
11 "net/http"
12 "net/url"
13
14 "github.com/picosh/pico/db"
15 "github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
16 "github.com/x-way/crawlerdetect"
17)
18
19func HmacString(secret, data string) string {
20 hmacer := hmac.New(sha256.New, []byte(secret))
21 hmacer.Write([]byte(data))
22 dataHmac := hmacer.Sum(nil)
23 return hex.EncodeToString(dataHmac)
24}
25
26func trackableRequest(r *http.Request) error {
27 agent := r.UserAgent()
28 // dont store requests from bots
29 if crawlerdetect.IsCrawler(agent) {
30 return fmt.Errorf(
31 "request is likely from a bot (User-Agent: %s)",
32 cleanUserAgent(agent),
33 )
34 }
35 return nil
36}
37
38func cleanIpAddress(ip string) (string, error) {
39 host, _, err := net.SplitHostPort(ip)
40 if err != nil {
41 host = ip
42 }
43 // /24 IPv4 subnet mask
44 // /64 IPv6 subnet mask
45 anonymizer := ipanonymizer.NewWithMask(
46 net.CIDRMask(24, 32),
47 net.CIDRMask(64, 128),
48 )
49 anonIp, err := anonymizer.IPString(host)
50 return anonIp, err
51}
52
53func cleanUrl(r *http.Request) (string, string) {
54 host := r.Header.Get("x-forwarded-host")
55 if host == "" {
56 host = r.URL.Host
57 }
58 // we don't want query params in the url for security reasons
59 return host, r.URL.Path
60}
61
62func cleanUserAgent(ua string) string {
63 // truncate user-agent because http headers have no text limit
64 if len(ua) > 1000 {
65 return ua[:1000]
66 }
67 return ua
68}
69
70func cleanReferer(ref string) (string, error) {
71 // we only want to store host for security reasons
72 // https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
73 u, err := url.Parse(ref)
74 if err != nil {
75 return "", err
76 }
77 return u.Host, nil
78}
79
80var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
81
82func AnalyticsVisitFromRequest(r *http.Request, userID string, secret string) (*db.AnalyticsVisits, error) {
83 dbpool := GetDB(r)
84 if !dbpool.HasFeatureForUser(userID, "analytics") {
85 return nil, ErrAnalyticsDisabled
86 }
87
88 err := trackableRequest(r)
89 if err != nil {
90 return nil, err
91 }
92
93 // https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
94 ipOrig := r.Header.Get("x-forwarded-for")
95 if ipOrig == "" {
96 ipOrig = r.RemoteAddr
97 }
98 // probably means this is a web tunnel
99 if ipOrig == "" || ipOrig == "@" {
100 sshCtx, err := GetSshCtx(r)
101 if err == nil {
102 ipOrig = sshCtx.RemoteAddr().String()
103 }
104 }
105 ipAddress, err := cleanIpAddress(ipOrig)
106 if err != nil {
107 return nil, err
108 }
109 host, path := cleanUrl(r)
110
111 referer, err := cleanReferer(r.Referer())
112 if err != nil {
113 return nil, err
114 }
115
116 return &db.AnalyticsVisits{
117 UserID: userID,
118 Host: host,
119 Path: path,
120 IpAddress: HmacString(secret, ipAddress),
121 UserAgent: cleanUserAgent(r.UserAgent()),
122 Referer: referer,
123 Status: http.StatusOK,
124 }, nil
125}
126
127func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
128 for view := range ch {
129 err := dbpool.InsertVisit(view)
130 if err != nil {
131 logger.Error("could not insert view record", "err", err)
132 }
133 }
134}