2020-10-18 00:02:46 +00:00
// SPDX-FileCopyrightText: 2020 Luke Granger-Brown <depot@lukegb.com>
//
// SPDX-License-IDentifier: Apache-2.0
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/google/safehtml"
"github.com/google/safehtml/template"
"github.com/google/safehtml/uncheckedconversions"
"github.com/gorilla/mux"
"github.com/jackc/pgtype"
"github.com/jackc/pgx/v4/pgxpool"
"hg.lukegb.com/lukegb/depot/go/openshiftauth"
)
var (
databaseURL = flag . String ( "database_url" , "" , "Database URL" )
userMapping = userMap { }
localDisableAuth = flag . String ( "local_auth_override_user" , "" , "Disable authn/authz - used for dev - set to username" )
indexTmpl = template . Must ( template . ParseFiles ( "templates/index.html" ) )
tweetsTmpl = template . Must ( template . ParseFiles ( "templates/tweets.html" ) )
)
func init ( ) {
flag . Var ( userMapping , "user_to_twitter" , "Space-separated list of <username>:<comma-separated Twitter usernames>" )
}
type userMap map [ string ] [ ] string
func ( um userMap ) String ( ) string {
var bits [ ] string
for u , ts := range um {
ts2 := make ( [ ] string , len ( ts ) )
copy ( ts2 , ts )
sort . Strings ( ts2 )
bits = append ( bits , fmt . Sprintf ( "%s:%s" , u , strings . Join ( ts2 , "," ) ) )
}
sort . Strings ( bits )
return strings . Join ( bits , " " )
}
func ( um userMap ) Set ( v string ) error {
bits := strings . Split ( v , " " )
for _ , b := range bits {
utsPair := strings . Split ( b , ":" )
if len ( utsPair ) != 2 {
return fmt . Errorf ( "%v is not a <username>:<comma-separated twitter usernames> string" , b )
}
u := utsPair [ 0 ]
ts := utsPair [ 1 ]
um [ u ] = append ( um [ u ] , strings . Split ( ts , "," ) ... )
}
return nil
}
func main ( ) {
flag . Parse ( )
ctx := context . Background ( )
pool , err := pgxpool . Connect ( ctx , * databaseURL )
if err != nil {
log . Fatalf ( "pgxpool.Connect: %v" , err )
}
defer pool . Close ( )
r := mux . NewRouter ( )
r . HandleFunc ( "/healthz" , func ( rw http . ResponseWriter , r * http . Request ) {
rw . Header ( ) . Set ( "Content-Type" , "text/plain" )
fmt . Fprintf ( rw , "ok" )
} )
var authR * mux . Router
if * localDisableAuth != "" {
authR = r
} else {
authR , err = openshiftauth . NewRouter ( r )
if err != nil {
log . Fatalf ( "openshiftauth.NewRouter: %v" , err )
}
}
userFromContext := func ( ctx context . Context ) string {
if * localDisableAuth != "" {
return * localDisableAuth
}
return openshiftauth . UserFromContext ( ctx ) . Metadata . Name
}
writeError := func ( rw http . ResponseWriter , status int , wrap string , err error ) {
log . Printf ( "Error in HTTP handler: %v: %v" , wrap , err )
rw . WriteHeader ( status )
fmt . Fprintf ( rw , "<h1>Oops. Something went wrong.</h1>" )
fmt . Fprintf ( rw , "<p>%s</p>" , wrap )
}
authR . HandleFunc ( "/" , func ( rw http . ResponseWriter , r * http . Request ) {
ctx := r . Context ( )
user := userFromContext ( ctx )
twitterAccounts := userMapping [ user ]
rows , err := pool . Query ( ctx , "SELECT ua.username, COUNT(uat.tweetid) tweet_count, (SELECT CAST(object->>'created_at' AS timestamp with time zone) FROM tweets WHERE id=MAX(uat.tweetid)) latest_tweet FROM user_accounts ua LEFT JOIN user_accounts_tweets uat ON uat.userid=ua.userid WHERE ua.username = ANY($1::text[]) GROUP BY 1 ORDER BY 1" , twitterAccounts )
if err != nil {
writeError ( rw , http . StatusInternalServerError , "querying database" , err )
return
}
defer rows . Close ( )
type twitterData struct {
Username string
TweetCount int
LatestTweet time . Time
}
var tds [ ] twitterData
for rows . Next ( ) {
var td twitterData
if err := rows . Scan ( & td . Username , & td . TweetCount , & td . LatestTweet ) ; err != nil {
writeError ( rw , http . StatusInternalServerError , "reading from database" , err )
return
}
tds = append ( tds , td )
}
rows . Close ( )
indexTmpl . Execute ( rw , struct {
Username string
TwitterAccounts [ ] twitterData
} {
Username : user ,
TwitterAccounts : tds ,
} )
} )
isAllowedToSee := func ( ctx context . Context , twitterUser string ) bool {
twitterAccounts := userMapping [ userFromContext ( ctx ) ]
for _ , a := range twitterAccounts {
if a == twitterUser {
return true
}
}
return false
}
toInt := func ( s string , def int ) int {
n , err := strconv . ParseInt ( s , 10 , 0 )
if err != nil {
return def
}
return int ( n )
}
clamp := func ( min , n , max int ) int {
if n < min {
return min
} else if n > max {
return max
}
return n
}
authR . HandleFunc ( "/view/{twitterUser}" , func ( rw http . ResponseWriter , r * http . Request ) {
ctx := r . Context ( )
vars := mux . Vars ( r )
twitterUser := vars [ "twitterUser" ]
if ! isAllowedToSee ( ctx , twitterUser ) {
writeError ( rw , http . StatusNotFound , "no such twitter user being archived" , fmt . Errorf ( "user %q attempted to access %q" , userFromContext ( ctx ) , twitterUser ) )
return
}
q := r . URL . Query ( )
pageSize := clamp ( 1 , toInt ( q . Get ( "page_size" ) , 20 ) , 200 )
startFrom := toInt ( q . Get ( "start_from" ) , 0 )
query := q . Get ( "q" )
rows , err := pool . Query ( ctx , `
SELECT
t . id ,
t . text ,
t . object ,
CAST ( COALESCE ( t . object - > ' retweeted_status ' - >> ' created_at ' , t . object - >> ' created_at ' ) AS timestamp with time zone ) created_at
FROM
user_accounts_tweets uat
INNER JOIN
user_accounts ua ON ua . userid = uat . userid
INNER JOIN tweets t ON t . id = uat . tweetid
WHERE 1 = 1
AND ua . username = $ 1
2020-10-18 20:07:55 +00:00
AND uat . on_timeline
2020-10-18 00:02:46 +00:00
AND ( $ 3 : : bigint = 0 OR t . id <= $ 3 : : bigint )
AND ( $ 4 = ' ' OR ( $ 4 < > ' ' AND ( to_tsvector ( ' english ' , text ) @ @ to_tsquery ( ' english ' , $ 4 ) OR to_tsvector ( ' english ' , object - > ' retweeted_status ' - >> ' full_text ' ) @ @ to_tsquery ( ' english ' , $ 4 ) OR object - > ' user ' - >> ' screen_name ' = $ 4 ) ) )
ORDER BY t . id DESC
LIMIT $ 2
` , twitterUser , pageSize + 1 , startFrom , query )
if err != nil {
writeError ( rw , http . StatusInternalServerError , "querying database" , err )
return
}
defer rows . Close ( )
type tweet struct {
ID int64
Text string
CreatedAt time . Time
CreatedAtFriendly string
Object interface { }
}
type twitterData struct {
TwitterUsername string
Query string
Tweets [ ] tweet
NextTweetID * int64
FormatTweetText func ( string , interface { } ) safehtml . HTML
}
pullIndices := func ( m map [ string ] interface { } ) [ 2 ] int {
midx := m [ "indices" ] . ( [ ] interface { } )
midx0 := int ( midx [ 0 ] . ( float64 ) )
midx1 := int ( midx [ 1 ] . ( float64 ) )
return [ 2 ] int { midx0 , midx1 }
}
td := twitterData {
TwitterUsername : twitterUser ,
Query : query ,
FormatTweetText : func ( t string , tw interface { } ) safehtml . HTML {
ltRep := string ( [ ] rune { 0xe000 } )
gtRep := string ( [ ] rune { 0xe001 } )
t = strings . ReplaceAll ( t , "<" , ltRep )
t = strings . ReplaceAll ( t , ">" , gtRep )
type span struct {
span [ 2 ] int
whatDo string // remove, link
linkTo string // link only
linkText string // link only
}
// Delete native media and add links.
var spans [ ] span
obj := tw . ( map [ string ] interface { } )
if ee , ok := obj [ "extended_entities" ] . ( map [ string ] interface { } ) ; ok {
ems := ee [ "media" ] . ( [ ] interface { } )
for _ , emi := range ems {
em := emi . ( map [ string ] interface { } )
spans = append ( spans , span {
span : pullIndices ( em ) ,
whatDo : "remove" ,
} )
}
}
if es , ok := obj [ "entities" ] . ( map [ string ] interface { } ) ; ok {
if hts , ok := es [ "hashtags" ] . ( [ ] interface { } ) ; ok {
for _ , hti := range hts {
ht := hti . ( map [ string ] interface { } )
htt := ht [ "text" ] . ( string )
spans = append ( spans , span {
span : pullIndices ( ht ) ,
whatDo : "link" ,
linkTo : fmt . Sprintf ( "https://twitter.com/hashtag/%s" , htt ) ,
} )
}
}
if urls , ok := es [ "urls" ] . ( [ ] interface { } ) ; ok {
for _ , urli := range urls {
url := urli . ( map [ string ] interface { } )
urldisp := url [ "display_url" ] . ( string )
urlexp := url [ "expanded_url" ] . ( string )
spans = append ( spans , span {
span : pullIndices ( url ) ,
whatDo : "link" ,
linkTo : urlexp ,
linkText : urldisp ,
} )
}
}
if mentions , ok := es [ "user_mentions" ] . ( [ ] interface { } ) ; ok {
for _ , mentioni := range mentions {
mention := mentioni . ( map [ string ] interface { } )
mentionuser := mention [ "screen_name" ] . ( string )
spans = append ( spans , span {
span : pullIndices ( mention ) ,
whatDo : "link" ,
linkTo : fmt . Sprintf ( "https://twitter.com/%s" , mentionuser ) ,
} )
}
}
if symbols , ok := es [ "symbols" ] . ( [ ] interface { } ) ; ok {
for _ , symboli := range symbols {
symbol := symboli . ( map [ string ] interface { } )
symbolname := symbol [ "text" ] . ( string )
spans = append ( spans , span {
span : pullIndices ( symbol ) ,
whatDo : "link" ,
linkTo : fmt . Sprintf ( "?q=$%s" , symbolname ) ,
} )
}
}
}
// Sort removeSpans from the end to the beginning.
sort . Slice ( spans , func ( a , b int ) bool {
return spans [ a ] . span [ 0 ] > spans [ b ] . span [ 0 ]
} )
// Expand overlapping remove spans.
newSpans := make ( [ ] span , 0 , len ( spans ) )
for i := 0 ; i < len ( spans ) - 1 ; i ++ {
span := spans [ i ]
prevSpan := spans [ i + 1 ]
if prevSpan . span [ 0 ] <= span . span [ 0 ] && prevSpan . span [ 1 ] >= span . span [ 1 ] {
// Spans overlap.
if span . whatDo != "remove" || prevSpan . whatDo != "remove" {
log . Printf ( "found overlapping non-remove spans!" )
}
if span . span [ 1 ] > prevSpan . span [ 1 ] {
prevSpan . span [ 1 ] = span . span [ 1 ]
}
continue
}
newSpans = append ( newSpans , span )
}
if len ( spans ) > 0 {
newSpans = append ( newSpans , spans [ len ( spans ) - 1 ] )
}
spans = newSpans
runed := [ ] rune ( t )
for _ , span := range spans {
switch span . whatDo {
case "remove" :
// Delete text from span[0] to span[1].
runed = append ( runed [ : span . span [ 0 ] ] , runed [ span . span [ 1 ] : ] ... )
case "link" :
// Add a link.
var text [ ] rune
if span . linkText == "" {
text = runed [ span . span [ 0 ] : span . span [ 1 ] ]
} else {
text = [ ] rune ( span . linkText )
}
runedBits := [ ] [ ] rune {
runed [ : span . span [ 0 ] ] ,
[ ] rune ( "<a href=\"" ) ,
[ ] rune ( span . linkTo ) ,
[ ] rune ( "\">" ) ,
[ ] rune ( text ) ,
[ ] rune ( "</a>" ) ,
runed [ span . span [ 1 ] : ] ,
}
finalLen := 0
for _ , s := range runedBits {
finalLen += len ( s )
}
runed = make ( [ ] rune , finalLen )
p := 0
for _ , s := range runedBits {
p += copy ( runed [ p : ] , s )
}
default :
log . Printf ( "unknown span operation %v" , span . whatDo )
}
}
t = string ( runed )
// HTML escape any <>
t = strings . ReplaceAll ( t , ltRep , "<" )
t = strings . ReplaceAll ( t , gtRep , ">" )
return uncheckedconversions . HTMLFromStringKnownToSatisfyTypeContract ( t )
} ,
}
now := time . Now ( )
for rows . Next ( ) {
var t tweet
var o pgtype . JSONB
if err := rows . Scan ( & t . ID , & t . Text , & o , & t . CreatedAt ) ; err != nil {
writeError ( rw , http . StatusInternalServerError , "reading from database" , err )
return
}
if err := json . Unmarshal ( o . Bytes , & t . Object ) ; err != nil {
writeError ( rw , http . StatusInternalServerError , "parsing JSON from database" , err )
return
}
ago := now . Sub ( t . CreatedAt )
switch {
case t . CreatedAt . Year ( ) != now . Year ( ) :
t . CreatedAtFriendly = t . CreatedAt . Format ( "Jan 2, 2006" )
case t . CreatedAt . YearDay ( ) != now . YearDay ( ) :
t . CreatedAtFriendly = t . CreatedAt . Format ( "Jan 2" )
case ago . Hours ( ) >= 1.0 :
t . CreatedAtFriendly = fmt . Sprintf ( "%dh" , int ( ago . Hours ( ) ) )
case ago . Minutes ( ) >= 1.0 :
t . CreatedAtFriendly = fmt . Sprintf ( "%dm" , int ( ago . Minutes ( ) ) )
case ago . Seconds ( ) >= 0.0 :
t . CreatedAtFriendly = fmt . Sprintf ( "%ds" , int ( ago . Seconds ( ) ) )
default :
t . CreatedAtFriendly = fmt . Sprintf ( "in %ds" , - int ( ago . Seconds ( ) ) )
}
td . Tweets = append ( td . Tweets , t )
}
rows . Close ( )
if len ( td . Tweets ) > pageSize {
td . NextTweetID = & td . Tweets [ pageSize ] . ID
td . Tweets = td . Tweets [ : pageSize ]
}
if err := tweetsTmpl . Execute ( rw , td ) ; err != nil {
log . Printf ( "tweets: executing template: %v" , err )
}
} )
log . Printf ( "now listening on :8080" )
log . Print ( http . ListenAndServe ( ":8080" , r ) )
}