Eric Bower
·
29 Nov 24
clean.go
1package main
2
3import (
4 "fmt"
5 "log/slog"
6 "os"
7
8 "github.com/picosh/pico/db/postgres"
9 "github.com/picosh/pico/shared"
10)
11
12func main() {
13 logger := slog.Default()
14 DbURL := os.Getenv("DATABASE_URL")
15 dbpool := postgres.NewDB(DbURL, logger)
16 batchSize := 100_000
17 offset := 0
18
19 var totalRows int
20 err := dbpool.Db.QueryRow("SELECT count(id) FROM analytics_visits").Scan(&totalRows)
21 if err != nil {
22 panic(err)
23 }
24
25 fmt.Println("TOTAL ROWS", totalRows)
26
27 for {
28 fmt.Println("===")
29 fmt.Println("offset", offset)
30 fmt.Println("===")
31 rows, err := dbpool.Db.Query("SELECT id, host, referer FROM analytics_visits ORDER BY created_at DESC LIMIT $1 OFFSET $2", batchSize, offset)
32 if err != nil {
33 panic(err)
34 }
35
36 // Process the rows
37 for rows.Next() {
38 var id, origHost, origRef string
39 err := rows.Scan(
40 &id,
41 &origHost,
42 &origRef,
43 )
44 if err != nil {
45 panic(err)
46 }
47
48 update := false
49
50 host, err := shared.CleanHost(origHost)
51 if err != nil {
52 fmt.Println(err)
53 }
54
55 if origHost != host {
56 update = true
57 fmt.Printf(
58 "HOST %s->%s\n",
59 origHost, host,
60 )
61 }
62
63 ref, err := shared.CleanReferer(origRef)
64 if err != nil {
65 fmt.Println(err)
66 }
67
68 if origRef != ref {
69 update = true
70 fmt.Printf(
71 "REF %s->%s\n",
72 origRef, ref,
73 )
74 }
75
76 if update {
77 fmt.Printf("Updating visit ID:%s\n", id)
78 _, err := dbpool.Db.Exec(
79 "UPDATE analytics_visits SET host=$1, referer=$2 WHERE id=$3",
80 host,
81 ref,
82 id,
83 )
84 if err != nil {
85 panic(err)
86 }
87 }
88 }
89
90 if rows.Err() != nil {
91 panic(rows.Err())
92 }
93
94 offset += batchSize
95 if offset >= totalRows {
96 break
97 }
98 }
99}