Antonio Mika
·
18 Nov 24
analytics.go
1package shared
2
3import (
4 "context"
5 "crypto/hmac"
6 "crypto/sha256"
7 "encoding/hex"
8 "encoding/json"
9 "errors"
10 "fmt"
11 "log/slog"
12 "net"
13 "net/http"
14 "net/url"
15 "time"
16
17 "github.com/picosh/pico/db"
18 "github.com/picosh/utils/pipe/metrics"
19 "github.com/simplesurance/go-ip-anonymizer/ipanonymizer"
20 "github.com/x-way/crawlerdetect"
21)
22
23func HmacString(secret, data string) string {
24 hmacer := hmac.New(sha256.New, []byte(secret))
25 hmacer.Write([]byte(data))
26 dataHmac := hmacer.Sum(nil)
27 return hex.EncodeToString(dataHmac)
28}
29
30func trackableUserAgent(agent string) error {
31 // dont store requests from bots
32 if crawlerdetect.IsCrawler(agent) {
33 return fmt.Errorf(
34 "request is likely from a bot (User-Agent: %s)",
35 cleanUserAgent(agent),
36 )
37 }
38 return nil
39}
40
41func trackableRequest(r *http.Request) error {
42 agent := r.UserAgent()
43 return trackableUserAgent(agent)
44}
45
46func cleanIpAddress(ip string) (string, error) {
47 host, _, err := net.SplitHostPort(ip)
48 if err != nil {
49 host = ip
50 }
51 // /24 IPv4 subnet mask
52 // /64 IPv6 subnet mask
53 anonymizer := ipanonymizer.NewWithMask(
54 net.CIDRMask(24, 32),
55 net.CIDRMask(64, 128),
56 )
57 anonIp, err := anonymizer.IPString(host)
58 return anonIp, err
59}
60
61func cleanUrl(orig string) (string, string) {
62 u, err := url.Parse(orig)
63 if err != nil {
64 return "", ""
65 }
66 return u.Host, u.Path
67}
68
69func cleanUrlFromRequest(r *http.Request) (string, string) {
70 host := r.Header.Get("x-forwarded-host")
71 if host == "" {
72 host = r.URL.Host
73 }
74 if host == "" {
75 host = r.Host
76 }
77 // we don't want query params in the url for security reasons
78 return host, r.URL.Path
79}
80
81func cleanUserAgent(ua string) string {
82 // truncate user-agent because http headers have no text limit
83 if len(ua) > 1000 {
84 return ua[:1000]
85 }
86 return ua
87}
88
89func cleanReferer(ref string) (string, error) {
90 // we only want to store host for security reasons
91 // https://developer.mozilla.org/en-US/docs/Web/Security/Referer_header:_privacy_and_security_concerns
92 u, err := url.Parse(ref)
93 if err != nil {
94 return "", err
95 }
96 return u.Host, nil
97}
98
99var ErrAnalyticsDisabled = errors.New("owner does not have site analytics enabled")
100
101func AnalyticsVisitFromVisit(visit *db.AnalyticsVisits, dbpool db.DB, secret string) error {
102 if !dbpool.HasFeatureForUser(visit.UserID, "analytics") {
103 return ErrAnalyticsDisabled
104 }
105
106 err := trackableUserAgent(visit.UserAgent)
107 if err != nil {
108 return err
109 }
110
111 ipAddress, err := cleanIpAddress(visit.IpAddress)
112 if err != nil {
113 return err
114 }
115 visit.IpAddress = HmacString(secret, ipAddress)
116 _, path := cleanUrl(visit.Path)
117 visit.Path = path
118
119 referer, err := cleanReferer(visit.Referer)
120 if err != nil {
121 return err
122 }
123 visit.Referer = referer
124 visit.UserAgent = cleanUserAgent(visit.UserAgent)
125
126 return nil
127}
128
129func ipFromRequest(r *http.Request) string {
130 // https://caddyserver.com/docs/caddyfile/directives/reverse_proxy#defaults
131 ipOrig := r.Header.Get("x-forwarded-for")
132 if ipOrig == "" {
133 ipOrig = r.RemoteAddr
134 }
135 // probably means this is a web tunnel
136 if ipOrig == "" || ipOrig == "@" {
137 sshCtx, err := GetSshCtx(r)
138 if err == nil {
139 ipOrig = sshCtx.RemoteAddr().String()
140 }
141 }
142
143 return ipOrig
144}
145
146func AnalyticsVisitFromRequest(r *http.Request, dbpool db.DB, userID string) (*db.AnalyticsVisits, error) {
147 if !dbpool.HasFeatureForUser(userID, "analytics") {
148 return nil, ErrAnalyticsDisabled
149 }
150
151 err := trackableRequest(r)
152 if err != nil {
153 return nil, err
154 }
155
156 ipAddress := ipFromRequest(r)
157 host, path := cleanUrlFromRequest(r)
158
159 return &db.AnalyticsVisits{
160 UserID: userID,
161 Host: host,
162 Path: path,
163 IpAddress: ipAddress,
164 UserAgent: r.UserAgent(),
165 Referer: r.Referer(),
166 Status: http.StatusOK,
167 }, nil
168}
169
170func AnalyticsCollect(ch chan *db.AnalyticsVisits, dbpool db.DB, logger *slog.Logger) {
171 drain := metrics.RegisterReconnectMetricRecorder(
172 context.Background(),
173 logger,
174 NewPicoPipeClient(),
175 100,
176 10*time.Millisecond,
177 )
178
179 for visit := range ch {
180 data, err := json.Marshal(visit)
181 if err != nil {
182 logger.Error("could not json marshall visit record", "err", err)
183 continue
184 }
185
186 data = append(data, '\n')
187
188 _, err = drain.Write(data)
189 if err != nil {
190 logger.Error("could not write to metric-drain", "err", err)
191 }
192 }
193}