twitterchiver/archiver: store mapping of tweet ID to user ID that saw it

This commit is contained in:
Luke Granger-Brown 2020-10-04 03:42:02 +01:00
parent 68d8fa6d42
commit 014d1c881a
2 changed files with 40 additions and 6 deletions

View file

@ -112,10 +112,21 @@ func fetchTweets(ctx context.Context, twitterOAuthConfig *oauth1.Config, user us
type userResult struct { type userResult struct {
UserID int64 UserID int64
TweetID int64 TweetID int64
SeenTweetIDs []int64
} }
func updateUser(ctx context.Context, conn *pgx.Conn, ur userResult) error { func updateUser(ctx context.Context, conn *pgx.Conn, ur userResult) error {
_, err := conn.Exec(ctx, "UPDATE user_accounts SET latest_tweet=$1 WHERE userid=$2", ur.TweetID, ur.UserID) var b pgx.Batch
b.Queue("UPDATE user_accounts SET latest_tweet=$1 WHERE userid=$2", ur.TweetID, ur.UserID)
for _, twid := range ur.SeenTweetIDs {
b.Queue("INSERT INTO user_accounts_tweets (userid, tweetid) VALUES ($1, $2) ON CONFLICT DO NOTHING", ur.UserID, twid)
}
log.Printf("sending batch of updates for %d...", ur.UserID)
start := time.Now()
br := conn.SendBatch(ctx, &b)
defer br.Close()
_, err := br.Exec()
log.Printf("batch of updates for %d done in %s", ur.UserID, time.Now().Sub(start))
return err return err
} }
@ -145,6 +156,7 @@ func tick(ctx context.Context, conn *pgx.Conn, twitterOAuthConfig *oauth1.Config
defer wg.Done() defer wg.Done()
var largestTweetID int64 var largestTweetID int64
var seenTweetIDs []int64
var tweetCount int var tweetCount int
for user := range userCh { for user := range userCh {
tweets, err := fetchTweets(ctx, twitterOAuthConfig, user) tweets, err := fetchTweets(ctx, twitterOAuthConfig, user)
@ -155,6 +167,7 @@ func tick(ctx context.Context, conn *pgx.Conn, twitterOAuthConfig *oauth1.Config
for _, tw := range tweets { for _, tw := range tweets {
tweetCount++ tweetCount++
resultCh <- tw resultCh <- tw
seenTweetIDs = append(seenTweetIDs, tw.ID)
if tw.ID > largestTweetID { if tw.ID > largestTweetID {
largestTweetID = tw.ID largestTweetID = tw.ID
} }
@ -164,6 +177,7 @@ func tick(ctx context.Context, conn *pgx.Conn, twitterOAuthConfig *oauth1.Config
userResultCh <- userResult{ userResultCh <- userResult{
UserID: user.UserID, UserID: user.UserID,
TweetID: largestTweetID, TweetID: largestTweetID,
SeenTweetIDs: seenTweetIDs,
} }
} }
} }

View file

@ -8,10 +8,30 @@ CREATE TABLE tweets (
object JSONB NOT NULL object JSONB NOT NULL
); );
CREATE INDEX tweets_retweet_text_idx ON public.tweets USING gin (to_tsvector('english'::regconfig, ((object -> 'retweeted_status'::text) ->> 'full_text'::text)));
CREATE INDEX tweets_retweet_user_idx ON public.tweets USING btree (((((object -> 'retweeted_status'::text) -> 'user'::text) ->> 'screen_name'::text)));
CREATE INDEX tweets_text_idx ON public.tweets USING gin (to_tsvector('english'::regconfig, text));
CREATE INDEX tweets_user_idx ON public.tweets USING btree ((((object -> 'user'::text) ->> 'screen_name'::text)));
CREATE TABLE user_accounts ( CREATE TABLE user_accounts (
username TEXT NOT NULL PRIMARY KEY, username TEXT NOT NULL PRIMARY KEY,
userid BIGINT NOT NULL, userid BIGINT NOT NULL,
access_token TEXT NOT NULL, access_token TEXT NOT NULL,
access_secret TEXT NOT NULL, access_secret TEXT NOT NULL,
latest_tweet BIGINT NOT NULL latest_tweet BIGINT NOT NULL,
CONSTRAINT unique_userid
UNIQUE (userid)
);
CREATE TABLE public.user_accounts_tweets (
userid bigint NOT NULL,
tweetid bigint NOT NULL,
PRIMARY KEY (userid, tweetid),
UNIQUE (tweetid, userid),
CONSTRAINT fk_userid
FOREIGN KEY (userid)
REFERENCES user_accounts(userid),
CONSTRAINT fk_tweetid
FOREIGN KEY (tweetid)
REFERENCES tweets(id))
); );