diff options
author | Till <2353100+S7evinK@users.noreply.github.com> | 2022-05-04 19:04:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-04 19:04:28 +0200 |
commit | 3c940c428d529476b6fa2cbf1ba28d53ec011584 (patch) | |
tree | a8963fc8b69c123947d126d0c64f12d1c3d48957 /userapi/storage/sqlite3 | |
parent | b0a9e85c4a02f39880682d9d682f9cc7af13a93c (diff) |
Add opt-in anonymous stats reporting (#2249)
* Initial phone home stats queries
* Add userAgent to UpdateDeviceLastSeen
Add new Table for tracking daily user vists
* Add user_daily_visits table
* Fix queries
* userapi stats tables & queries
* userapi interface and internal api
* sycnapi stats queries
* testing phone home stats
* Add complete config to syncapi
* add missing files
* Fix queries
* Send empty request
* Add version & monolith stats
* Add configuration for phone home stats
* Move WASM to its own file, add config and comments
* Add tracing methods
* Add total rooms
* Add more fields, actually send data somewhere
* Move stats to the userapi
* Move phone home stats to util package
* Cleanup
* Linter & parts of GH comments
* More GH comments changes
- Move comments to SQL statements
- Shrink interface, add struct for stats
- No fatal errors, use defaults
* Be more explicit when querying
* Fix wrong calculation & wrong query params
Add tests
* Add Windows stats
* ADd build constraint
* Use new testing structure
Fix issues with getting values when using SQLite
Fix wrong AddDate value
Export UpdateUserDailyVisits
* Fix query params
* Fix test
* Add comment about countR30UsersSQL and countR30UsersV2SQL; fix test
* Update config
* Also update example config file
* Use OS level proxy, update logging
Co-authored-by: kegsay <kegan@matrix.org>
Diffstat (limited to 'userapi/storage/sqlite3')
-rw-r--r-- | userapi/storage/sqlite3/devices_table.go | 6 | ||||
-rw-r--r-- | userapi/storage/sqlite3/stats_table.go | 452 | ||||
-rw-r--r-- | userapi/storage/sqlite3/storage.go | 5 |
3 files changed, 460 insertions, 3 deletions
diff --git a/userapi/storage/sqlite3/devices_table.go b/userapi/storage/sqlite3/devices_table.go index b86ed1cc..93291e6a 100644 --- a/userapi/storage/sqlite3/devices_table.go +++ b/userapi/storage/sqlite3/devices_table.go @@ -81,7 +81,7 @@ const selectDevicesByIDSQL = "" + "SELECT device_id, localpart, display_name, last_seen_ts FROM device_devices WHERE device_id IN ($1) ORDER BY last_seen_ts DESC" const updateDeviceLastSeen = "" + - "UPDATE device_devices SET last_seen_ts = $1, ip = $2 WHERE localpart = $3 AND device_id = $4" + "UPDATE device_devices SET last_seen_ts = $1, ip = $2, user_agent = $3 WHERE localpart = $4 AND device_id = $5" type devicesStatements struct { db *sql.DB @@ -306,9 +306,9 @@ func (s *devicesStatements) SelectDevicesByID(ctx context.Context, deviceIDs []s return devices, rows.Err() } -func (s *devicesStatements) UpdateDeviceLastSeen(ctx context.Context, txn *sql.Tx, localpart, deviceID, ipAddr string) error { +func (s *devicesStatements) UpdateDeviceLastSeen(ctx context.Context, txn *sql.Tx, localpart, deviceID, ipAddr, userAgent string) error { lastSeenTs := time.Now().UnixNano() / 1000000 stmt := sqlutil.TxStmt(txn, s.updateDeviceLastSeenStmt) - _, err := stmt.ExecContext(ctx, lastSeenTs, ipAddr, localpart, deviceID) + _, err := stmt.ExecContext(ctx, lastSeenTs, ipAddr, userAgent, localpart, deviceID) return err } diff --git a/userapi/storage/sqlite3/stats_table.go b/userapi/storage/sqlite3/stats_table.go new file mode 100644 index 00000000..af4c7ff9 --- /dev/null +++ b/userapi/storage/sqlite3/stats_table.go @@ -0,0 +1,452 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sqlite3 + +import ( + "context" + "database/sql" + "strings" + "time" + + "github.com/matrix-org/dendrite/internal" + "github.com/matrix-org/dendrite/internal/sqlutil" + "github.com/matrix-org/dendrite/userapi/api" + "github.com/matrix-org/dendrite/userapi/storage/tables" + "github.com/matrix-org/dendrite/userapi/types" + "github.com/matrix-org/gomatrixserverlib" + "github.com/sirupsen/logrus" +) + +const userDailyVisitsSchema = ` +CREATE TABLE IF NOT EXISTS user_daily_visits ( + localpart TEXT NOT NULL, + device_id TEXT NOT NULL, + timestamp BIGINT NOT NULL, + user_agent TEXT +); + +-- Device IDs and timestamp must be unique for a given user per day +CREATE UNIQUE INDEX IF NOT EXISTS localpart_device_timestamp_idx ON user_daily_visits(localpart, device_id, timestamp); +CREATE INDEX IF NOT EXISTS timestamp_idx ON user_daily_visits(timestamp); +CREATE INDEX IF NOT EXISTS localpart_timestamp_idx ON user_daily_visits(localpart, timestamp); +` + +const countUsersLastSeenAfterSQL = "" + + "SELECT COUNT(*) FROM (" + + " SELECT localpart FROM device_devices WHERE last_seen_ts > $1 " + + " GROUP BY localpart" + + " ) u" + +// Note on the following countR30UsersSQL and countR30UsersV2SQL: The different checks are intentional. +// This is to ensure the values reported by Dendrite are the same as by Synapse. +// Queries are taken from: https://github.com/matrix-org/synapse/blob/9ce51a47f6e37abd0a1275281806399d874eb026/synapse/storage/databases/main/stats.py + +/* +R30Users counts the number of 30 day retained users, defined as: +- Users who have created their accounts more than 30 days ago +- Where last seen at most 30 days ago +- Where account creation and last_seen are > 30 days apart +*/ +const countR30UsersSQL = ` +SELECT platform, COUNT(*) FROM ( + SELECT users.localpart, platform, users.created_ts, MAX(uip.last_seen_ts) + FROM account_accounts users + INNER JOIN + (SELECT + localpart, last_seen_ts, + CASE + WHEN user_agent LIKE '%%Android%%' THEN 'android' + WHEN user_agent LIKE '%%iOS%%' THEN 'ios' + WHEN user_agent LIKE '%%Electron%%' THEN 'electron' + WHEN user_agent LIKE '%%Mozilla%%' THEN 'web' + WHEN user_agent LIKE '%%Gecko%%' THEN 'web' + ELSE 'unknown' + END + AS platform + FROM device_devices + ) uip + ON users.localpart = uip.localpart + AND users.account_type <> 4 + AND users.created_ts < $1 + AND uip.last_seen_ts > $2 + AND (uip.last_seen_ts) - users.created_ts > $3 + GROUP BY users.localpart, platform, users.created_ts + ) u GROUP BY PLATFORM +` + +// Note on the following countR30UsersSQL and countR30UsersV2SQL: The different checks are intentional. +// This is to ensure the values reported are the same as Synapse reports. +// Queries are taken from: https://github.com/matrix-org/synapse/blob/9ce51a47f6e37abd0a1275281806399d874eb026/synapse/storage/databases/main/stats.py + +/* +R30UsersV2 counts the number of 30 day retained users, defined as users that: +- Appear more than once in the past 60 days +- Have more than 30 days between the most and least recent appearances that occurred in the past 60 days. +*/ +const countR30UsersV2SQL = ` +SELECT + client_type, + count(client_type) +FROM + ( + SELECT + localpart, + CASE + WHEN + LOWER(user_agent) LIKE '%%riot%%' OR + LOWER(user_agent) LIKE '%%element%%' + THEN CASE + WHEN LOWER(user_agent) LIKE '%%electron%%' THEN 'electron' + WHEN LOWER(user_agent) LIKE '%%android%%' THEN 'android' + WHEN LOWER(user_agent) LIKE '%%ios%%' THEN 'ios' + ELSE 'unknown' + END + WHEN LOWER(user_agent) LIKE '%%mozilla%%' OR LOWER(user_agent) LIKE '%%gecko%%' THEN 'web' + ELSE 'unknown' + END as client_type + FROM user_daily_visits + WHERE timestamp > $1 AND timestamp < $2 + GROUP BY localpart, client_type + HAVING max(timestamp) - min(timestamp) > $3 + ) AS temp +GROUP BY client_type +` + +const countUserByAccountTypeSQL = ` +SELECT COUNT(*) FROM account_accounts WHERE account_type IN ($1) +` + +// $1 = Guest AccountType +// $3 & $4 = All non guest AccountType IDs +const countRegisteredUserByTypeSQL = ` +SELECT user_type, COUNT(*) AS count FROM ( + SELECT + CASE + WHEN account_type IN ($1) AND appservice_id IS NULL THEN 'native' + WHEN account_type = $4 AND appservice_id IS NULL THEN 'guest' + WHEN account_type IN ($5) AND appservice_id IS NOT NULL THEN 'bridged' + END AS user_type + FROM account_accounts + WHERE created_ts > $8 +) AS t GROUP BY user_type +` + +// account_type 1 = users; 3 = admins +const updateUserDailyVisitsSQL = ` +INSERT INTO user_daily_visits(localpart, device_id, timestamp, user_agent) + SELECT u.localpart, u.device_id, $1, MAX(u.user_agent) + FROM device_devices AS u + LEFT JOIN ( + SELECT localpart, device_id, timestamp FROM user_daily_visits + WHERE timestamp = $1 + ) udv + ON u.localpart = udv.localpart AND u.device_id = udv.device_id + INNER JOIN device_devices d ON d.localpart = u.localpart + INNER JOIN account_accounts a ON a.localpart = u.localpart + WHERE $2 <= d.last_seen_ts AND d.last_seen_ts < $3 + AND a.account_type in (1, 3) + GROUP BY u.localpart, u.device_id +ON CONFLICT (localpart, device_id, timestamp) DO NOTHING +; +` + +const queryDBEngineVersion = "select sqlite_version();" + +type statsStatements struct { + serverName gomatrixserverlib.ServerName + db *sql.DB + lastUpdate time.Time + countUsersLastSeenAfterStmt *sql.Stmt + countR30UsersStmt *sql.Stmt + countR30UsersV2Stmt *sql.Stmt + updateUserDailyVisitsStmt *sql.Stmt + countUserByAccountTypeStmt *sql.Stmt + countRegisteredUserByTypeStmt *sql.Stmt + dbEngineVersionStmt *sql.Stmt +} + +func NewSQLiteStatsTable(db *sql.DB, serverName gomatrixserverlib.ServerName) (tables.StatsTable, error) { + s := &statsStatements{ + serverName: serverName, + lastUpdate: time.Now(), + db: db, + } + + _, err := db.Exec(userDailyVisitsSchema) + if err != nil { + return nil, err + } + go s.startTimers() + return s, sqlutil.StatementList{ + {&s.countUsersLastSeenAfterStmt, countUsersLastSeenAfterSQL}, + {&s.countR30UsersStmt, countR30UsersSQL}, + {&s.countR30UsersV2Stmt, countR30UsersV2SQL}, + {&s.updateUserDailyVisitsStmt, updateUserDailyVisitsSQL}, + {&s.countUserByAccountTypeStmt, countUserByAccountTypeSQL}, + {&s.countRegisteredUserByTypeStmt, countRegisteredUserByTypeSQL}, + {&s.dbEngineVersionStmt, queryDBEngineVersion}, + }.Prepare(db) +} + +func (s *statsStatements) startTimers() { + var updateStatsFunc func() + updateStatsFunc = func() { + logrus.Infof("Executing UpdateUserDailyVisits") + if err := s.UpdateUserDailyVisits(context.Background(), nil, time.Now(), s.lastUpdate); err != nil { + logrus.WithError(err).Error("failed to update daily user visits") + } + time.AfterFunc(time.Hour*3, updateStatsFunc) + } + time.AfterFunc(time.Minute*5, updateStatsFunc) +} + +func (s *statsStatements) allUsers(ctx context.Context, txn *sql.Tx) (result int64, err error) { + query := strings.Replace(countUserByAccountTypeSQL, "($1)", sqlutil.QueryVariadic(4), 1) + queryStmt, err := s.db.Prepare(query) + if err != nil { + return 0, err + } + stmt := sqlutil.TxStmt(txn, queryStmt) + err = stmt.QueryRowContext(ctx, + 1, 2, 3, 4, + ).Scan(&result) + return +} + +func (s *statsStatements) nonBridgedUsers(ctx context.Context, txn *sql.Tx) (result int64, err error) { + query := strings.Replace(countUserByAccountTypeSQL, "($1)", sqlutil.QueryVariadic(3), 1) + queryStmt, err := s.db.Prepare(query) + if err != nil { + return 0, err + } + stmt := sqlutil.TxStmt(txn, queryStmt) + err = stmt.QueryRowContext(ctx, + 1, 2, 3, + ).Scan(&result) + return +} + +func (s *statsStatements) registeredUserByType(ctx context.Context, txn *sql.Tx) (map[string]int64, error) { + // $1 = Guest AccountType; $2 = timestamp + // $3 & $4 = All non guest AccountType IDs + nonGuests := []api.AccountType{api.AccountTypeUser, api.AccountTypeAdmin, api.AccountTypeAppService} + countSQL := strings.Replace(countRegisteredUserByTypeSQL, "($1)", sqlutil.QueryVariadicOffset(len(nonGuests), 0), 1) + countSQL = strings.Replace(countSQL, "($5)", sqlutil.QueryVariadicOffset(len(nonGuests), 1+len(nonGuests)), 1) + queryStmt, err := s.db.Prepare(countSQL) + if err != nil { + return nil, err + } + stmt := sqlutil.TxStmt(txn, queryStmt) + registeredAfter := time.Now().AddDate(0, 0, -30) + + params := make([]interface{}, len(nonGuests)*2+2) + // nonGuests is used twice + for i, v := range nonGuests { + params[i] = v // i: 0 1 2 => ($1, $2, $3) + params[i+1+len(nonGuests)] = v // i: 4 5 6 => ($5, $6, $7) + } + params[3] = api.AccountTypeGuest // $4 + params[7] = gomatrixserverlib.AsTimestamp(registeredAfter) // $8 + + rows, err := stmt.QueryContext(ctx, params...) + if err != nil { + return nil, err + } + defer internal.CloseAndLogIfError(ctx, rows, "RegisteredUserByType: failed to close rows") + + var userType string + var count int64 + var result = make(map[string]int64) + for rows.Next() { + if err = rows.Scan(&userType, &count); err != nil { + return nil, err + } + result[userType] = count + } + + return result, rows.Err() +} + +func (s *statsStatements) dailyUsers(ctx context.Context, txn *sql.Tx) (result int64, err error) { + stmt := sqlutil.TxStmt(txn, s.countUsersLastSeenAfterStmt) + lastSeenAfter := time.Now().AddDate(0, 0, -1) + err = stmt.QueryRowContext(ctx, + gomatrixserverlib.AsTimestamp(lastSeenAfter), + ).Scan(&result) + return +} + +func (s *statsStatements) monthlyUsers(ctx context.Context, txn *sql.Tx) (result int64, err error) { + stmt := sqlutil.TxStmt(txn, s.countUsersLastSeenAfterStmt) + lastSeenAfter := time.Now().AddDate(0, 0, -30) + err = stmt.QueryRowContext(ctx, + gomatrixserverlib.AsTimestamp(lastSeenAfter), + ).Scan(&result) + return +} + +/* R30Users counts the number of 30 day retained users, defined as: +- Users who have created their accounts more than 30 days ago +- Where last seen at most 30 days ago +- Where account creation and last_seen are > 30 days apart +*/ +func (s *statsStatements) r30Users(ctx context.Context, txn *sql.Tx) (map[string]int64, error) { + stmt := sqlutil.TxStmt(txn, s.countR30UsersStmt) + lastSeenAfter := time.Now().AddDate(0, 0, -30) + diff := time.Hour * 24 * 30 + + rows, err := stmt.QueryContext(ctx, + gomatrixserverlib.AsTimestamp(lastSeenAfter), + gomatrixserverlib.AsTimestamp(lastSeenAfter), + diff.Milliseconds(), + ) + if err != nil { + return nil, err + } + defer internal.CloseAndLogIfError(ctx, rows, "R30Users: failed to close rows") + + var platform string + var count int64 + var result = make(map[string]int64) + for rows.Next() { + if err = rows.Scan(&platform, &count); err != nil { + return nil, err + } + if platform == "unknown" { + continue + } + result["all"] += count + result[platform] = count + } + + return result, rows.Err() +} + +/* R30UsersV2 counts the number of 30 day retained users, defined as users that: +- Appear more than once in the past 60 days +- Have more than 30 days between the most and least recent appearances that occurred in the past 60 days. +*/ +func (s *statsStatements) r30UsersV2(ctx context.Context, txn *sql.Tx) (map[string]int64, error) { + stmt := sqlutil.TxStmt(txn, s.countR30UsersV2Stmt) + sixtyDaysAgo := time.Now().AddDate(0, 0, -60) + diff := time.Hour * 24 * 30 + tomorrow := time.Now().Add(time.Hour * 24) + + rows, err := stmt.QueryContext(ctx, + gomatrixserverlib.AsTimestamp(sixtyDaysAgo), + gomatrixserverlib.AsTimestamp(tomorrow), + diff.Milliseconds(), + ) + if err != nil { + return nil, err + } + defer internal.CloseAndLogIfError(ctx, rows, "R30UsersV2: failed to close rows") + + var platform string + var count int64 + var result = map[string]int64{ + "ios": 0, + "android": 0, + "web": 0, + "electron": 0, + "all": 0, + } + for rows.Next() { + if err = rows.Scan(&platform, &count); err != nil { + return nil, err + } + if _, ok := result[platform]; !ok { + continue + } + result["all"] += count + result[platform] = count + } + return result, rows.Err() +} + +// UserStatistics collects some information about users on this instance. +// Returns the stats itself as well as the database engine version and type. +// On error, returns the stats collected up to the error. +func (s *statsStatements) UserStatistics(ctx context.Context, txn *sql.Tx) (*types.UserStatistics, *types.DatabaseEngine, error) { + var ( + stats = &types.UserStatistics{ + R30UsersV2: map[string]int64{ + "ios": 0, + "android": 0, + "web": 0, + "electron": 0, + "all": 0, + }, + R30Users: map[string]int64{}, + RegisteredUsersByType: map[string]int64{}, + } + dbEngine = &types.DatabaseEngine{Engine: "SQLite", Version: "unknown"} + err error + ) + stats.AllUsers, err = s.allUsers(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + stats.DailyUsers, err = s.dailyUsers(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + stats.MonthlyUsers, err = s.monthlyUsers(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + stats.R30Users, err = s.r30Users(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + stats.R30UsersV2, err = s.r30UsersV2(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + stats.NonBridgedUsers, err = s.nonBridgedUsers(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + stats.RegisteredUsersByType, err = s.registeredUserByType(ctx, txn) + if err != nil { + return stats, dbEngine, err + } + + stmt := sqlutil.TxStmt(txn, s.dbEngineVersionStmt) + err = stmt.QueryRowContext(ctx).Scan(&dbEngine.Version) + return stats, dbEngine, err +} + +func (s *statsStatements) UpdateUserDailyVisits( + ctx context.Context, txn *sql.Tx, + startTime, lastUpdate time.Time, +) error { + stmt := sqlutil.TxStmt(txn, s.updateUserDailyVisitsStmt) + startTime = startTime.Truncate(time.Hour * 24) + + // edge case + if startTime.After(s.lastUpdate) { + startTime = startTime.AddDate(0, 0, -1) + } + _, err := stmt.ExecContext(ctx, + gomatrixserverlib.AsTimestamp(startTime), + gomatrixserverlib.AsTimestamp(lastUpdate), + gomatrixserverlib.AsTimestamp(time.Now()), + ) + if err == nil { + s.lastUpdate = time.Now() + } + return err +} diff --git a/userapi/storage/sqlite3/storage.go b/userapi/storage/sqlite3/storage.go index 6858d3d1..a822f687 100644 --- a/userapi/storage/sqlite3/storage.go +++ b/userapi/storage/sqlite3/storage.go @@ -95,6 +95,10 @@ func NewDatabase(base *base.BaseDendrite, dbProperties *config.DatabaseOptions, if err != nil { return nil, fmt.Errorf("NewPostgresNotificationTable: %w", err) } + statsTable, err := NewSQLiteStatsTable(db, serverName) + if err != nil { + return nil, fmt.Errorf("NewSQLiteStatsTable: %w", err) + } return &shared.Database{ AccountDatas: accountDataTable, Accounts: accountsTable, @@ -107,6 +111,7 @@ func NewDatabase(base *base.BaseDendrite, dbProperties *config.DatabaseOptions, ThreePIDs: threePIDTable, Pushers: pusherTable, Notifications: notificationsTable, + Stats: statsTable, ServerName: serverName, DB: db, Writer: writer, |