go: init twitternuke

This commit is contained in:
Luke Granger-Brown 2021-01-06 21:15:56 +00:00
parent d38601fabe
commit 576a45ae67
4 changed files with 275 additions and 0 deletions

View file

@ -4,6 +4,7 @@
args: { args: {
twitterchiver = import ./twitterchiver args; twitterchiver = import ./twitterchiver args;
twitternuke = import ./twitternuke args;
openshiftauth = import ./openshiftauth args; openshiftauth = import ./openshiftauth args;
minotarproxy = import ./minotarproxy args; minotarproxy = import ./minotarproxy args;
} }

View file

@ -189,6 +189,7 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=

View file

@ -0,0 +1,13 @@
# SPDX-FileCopyrightText: 2020 Luke Granger-Brown <depot@lukegb.com>
#
# SPDX-License-Identifier: Apache-2.0
{ depot, ... }:
depot.third_party.buildGo.program {
name = "twitternuke";
srcs = [ ./twitternuke.go ];
deps = [
depot.third_party.gopkgs."github.com".dghubble.oauth1
depot.third_party.gopkgs."golang.org".x.sync.errgroup
];
}

View file

@ -0,0 +1,260 @@
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
"log"
"net/http"
"os"
"strconv"
"time"
"github.com/dghubble/oauth1"
"golang.org/x/sync/errgroup"
)
var (
tweetArchiveFile = flag.String("tweet_archive_file", "", "Tweet archive .js file")
tweetsFromAPI = flag.Bool("tweets_from_api", false, "Fetch tweets from API")
tweetCutoff = flag.Duration("tweet_cutoff", 3*30*24*time.Hour, "Cutoff lookback (default is 3 'months')")
dryRun = flag.Bool("dryrun", true, "Dry run: don't delete anything")
deleteThreads = flag.Int("delete_threads", 64, "Concurrent threads to use for deleting tweets.")
)
type archivedTweetTime struct {
time.Time
}
func (t *archivedTweetTime) UnmarshalJSON(b []byte) error {
var s string
if err := json.Unmarshal(b, &s); err != nil {
return err
}
pt, err := time.Parse("Mon Jan 02 15:04:05 -0700 2006", s)
if err != nil {
return err
}
t.Time = pt
return nil
}
type archivedTweet struct {
Tweet struct {
Id string `json:"id_str"`
CreatedAt archivedTweetTime `json:"created_at"`
} `json:"tweet"`
}
func loadTweetIDsFromFile(ctx context.Context, tweetIDsFile string, tweetCutoff time.Time) ([]int64, error) {
f, err := os.Open(tweetIDsFile)
if err != nil {
return nil, fmt.Errorf("os.Open(%q): %w", tweetIDsFile, err)
}
defer f.Close()
if _, err := io.CopyN(ioutil.Discard, f, int64(len("window.YTD.tweet.part0 = "))); err != nil {
return nil, fmt.Errorf("io.CopyN: %w", err)
}
var ts []archivedTweet
if err := json.NewDecoder(f).Decode(&ts); err != nil {
return nil, fmt.Errorf("json Decode: %w", err)
}
var out []int64
for _, t := range ts {
tID, err := strconv.ParseInt(t.Tweet.Id, 10, 64)
if err != nil {
return nil, fmt.Errorf("ParseInt tweet ID %q: %w", t.Tweet.Id, err)
}
if t.Tweet.CreatedAt.Before(tweetCutoff) {
out = append(out, tID)
}
}
return out, nil
}
type apiTweet struct {
Id string `json:"id_str"`
CreatedAt archivedTweetTime `json:"created_at"`
}
func fetchTweets(ctx context.Context, httpClient *http.Client, maxID int64) ([]apiTweet, error) {
var suffix string
if maxID != 0 {
suffix = fmt.Sprintf("&max_id=%d", (maxID - 1))
}
req, err := http.NewRequest("GET", "https://api.twitter.com/1.1/statuses/user_timeline.json?user_id=me&count=200"+suffix, nil)
if err != nil {
return nil, fmt.Errorf("http.NewRequest for timeline max tweet %d: %w", maxID, err)
}
req = req.WithContext(ctx)
resp, err := httpClient.Do(req)
if err != nil {
return nil, fmt.Errorf("http.Do for maxID %d: %w", maxID, err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fetching timeline: %w", err)
}
var tweets []apiTweet
if err := json.NewDecoder(resp.Body).Decode(&tweets); err != nil {
return nil, fmt.Errorf("decoding timeline JSON: %w", err)
}
return tweets, nil
}
func loadTweetIDsFromAPI(ctx context.Context, httpClient *http.Client, tweetCutoff time.Time) ([]int64, error) {
var maxID int64
var earliestTweet *apiTweet
var out []int64
for {
tweets, err := fetchTweets(ctx, httpClient, maxID)
if err != nil {
return nil, fmt.Errorf("fetchTweets(%d): %w", maxID, err)
}
log.Printf("fetched tweets with max ID %d", maxID)
for _, t := range tweets {
tID, err := strconv.ParseInt(t.Id, 10, 64)
if err != nil {
return nil, fmt.Errorf("ParseInt tweet ID %q: %w", t.Id, err)
}
if maxID == 0 || tID < maxID {
maxID = tID
}
if t.CreatedAt.Before(tweetCutoff) {
out = append(out, tID)
}
if earliestTweet == nil || earliestTweet.CreatedAt.Time.After(t.CreatedAt.Time) {
earliestTweet = &t
}
}
if len(tweets) == 0 {
break
}
}
log.Printf("Earliest tweet: %v %v (%v ago)", earliestTweet.Id, earliestTweet.CreatedAt.Time, time.Since(earliestTweet.CreatedAt.Time))
return out, nil
}
func deleteTweet(ctx context.Context, httpClient *http.Client, tid int64) error {
req, err := http.NewRequest("POST", fmt.Sprintf("https://api.twitter.com/1.1/statuses/destroy/%d.json?trim_user=true", tid), nil)
if err != nil {
return fmt.Errorf("http.NewRequest for tid %d: %w", tid, err)
}
req = req.WithContext(ctx)
resp, err := httpClient.Do(req)
if err != nil {
return fmt.Errorf("http.Do for tid %d: %w", tid, err)
}
defer resp.Body.Close()
switch resp.StatusCode {
case 200, 404:
break
default:
return fmt.Errorf("deleting tid %d: got status %d %q", resp.StatusCode, resp.Status)
}
if _, err := io.Copy(ioutil.Discard, resp.Body); err != nil {
return fmt.Errorf("io.Copy for tid %d: %w", tid, err)
}
return nil
}
func deleteTweets(ctx context.Context, httpClient *http.Client, deleteThreads int, tweetIDs []int64) error {
deleteQueue := make(chan int64)
eg, egCtx := errgroup.WithContext(ctx)
for n := 0; n < deleteThreads; n++ {
eg.Go(func() error {
ctx := egCtx
for tid := range deleteQueue {
if err := deleteTweet(ctx, httpClient, tid); err != nil {
return fmt.Errorf("deleting tweet %d: %w", tid, err)
}
}
return nil
})
}
eg.Go(func() error {
defer close(deleteQueue)
for n, tid := range tweetIDs {
if n%100 == 0 {
log.Printf("Progress: %d / %d", n, len(tweetIDs))
}
select {
case <-egCtx.Done():
return nil
case deleteQueue <- tid:
}
}
return nil
})
return eg.Wait()
}
func main() {
flag.Parse()
ctx := context.Background()
log.Printf("Started up.")
ckey, csecret := os.Getenv("TWITTER_OAUTH_CONSUMER_KEY"), os.Getenv("TWITTER_OAUTH_CONSUMER_SECRET")
if ckey == "" || csecret == "" {
fmt.Fprintf(os.Stderr, "No TWITTER_OAUTH_CONSUMER_KEY or TWITTER_OAUTH_CONSUMER_SECRET\n")
os.Exit(1)
}
atoken, asecret := os.Getenv("TWITTER_OAUTH_ACCESS_TOKEN"), os.Getenv("TWITTER_OAUTH_ACCESS_SECRET")
if atoken == "" || asecret == "" {
fmt.Fprintf(os.Stderr, "No TWITTER_OAUTH_ACCESS_TOKEN or TWITTER_OAUTH_ACCESS_SECRET\n")
os.Exit(1)
}
httpClient := oauth1.NewConfig(ckey, csecret).Client(ctx, oauth1.NewToken(atoken, asecret))
log.Printf("Initialized OAuth config.")
var tweetIDs []int64
if *tweetsFromAPI {
log.Printf("Loading tweets from API.")
var err error
tweetIDs, err = loadTweetIDsFromAPI(ctx, httpClient, time.Now().Add(-*tweetCutoff))
if err != nil {
log.Fatalf("loadTweetIDsFromAPI(%q): %v", *tweetCutoff, err)
}
} else {
if *tweetArchiveFile == "" {
log.Fatalf("--tweet_archive_file must be set.")
}
var err error
tweetIDs, err = loadTweetIDsFromFile(ctx, *tweetArchiveFile, time.Now().Add(-*tweetCutoff))
if err != nil {
log.Fatalf("loadTweetIDsFromFile(%q, %q): %v", *tweetArchiveFile, *tweetCutoff, err)
}
}
log.Printf("Got %d tweets to delete.", len(tweetIDs))
if *dryRun {
log.Printf("Dry run: not doing anything.")
return
}
if err := deleteTweets(ctx, httpClient, *deleteThreads, tweetIDs); err != nil {
log.Fatalf("deleteTweets: %v", err)
}
}