twitterchiver: init viewer

This commit is contained in:
Luke Granger-Brown 2020-10-18 01:02:46 +01:00
parent 36682c25cf
commit c5a53d9334
7 changed files with 610 additions and 3 deletions

View file

@ -10,6 +10,7 @@ require (
github.com/dghubble/gologin/v2 v2.2.0
github.com/dghubble/oauth1 v0.6.0
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/google/safehtml v0.0.2 // indirect
github.com/gorilla/mux v1.8.0
github.com/gorilla/securecookie v1.1.1
github.com/gorilla/sessions v1.2.1

View file

@ -37,6 +37,8 @@ github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/safehtml v0.0.2 h1:ZOt2VXg4x24bW0m2jtzAOkhoXV0iM8vNKc0paByCZqM=
github.com/google/safehtml v0.0.2/go.mod h1:L4KWwDsUJdECRAEpZoBn3O64bQaywRscowZjJAzjHnU=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/gorilla/mux v1.8.0 h1:i40aqfkR1h2SlN9hojwV5ZA91wcXFOvkdNIeFDP5koI=
github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So=

View file

@ -11,4 +11,23 @@
depot.third_party.gopkgs."github.com".jackc.pgx.v4
];
};
viewer = depot.third_party.buildGo.program {
name = "viewer";
srcs = [ ./viewer/viewer.go ];
deps = [
depot.third_party.gopkgs."github.com".google.safehtml
depot.third_party.gopkgs."github.com".google.safehtml.template
depot.third_party.gopkgs."github.com".google.safehtml.uncheckedconversions
depot.third_party.gopkgs."github.com".gorilla.mux
depot.third_party.gopkgs."github.com".jackc.pgtype
depot.third_party.gopkgs."github.com".jackc.pgx.v4.pgxpool
depot.go.openshiftauth.openshiftauth
];
dockerData = [ (
depot.pkgs.runCommand "source" {} ''
cp -R ${builtins.filterSource (path: type: (type == "directory" && depot.lib.hasSuffix "/templates" path) || (depot.lib.hasInfix "/templates/" path)) ./viewer} $out
''
) ];
};
}

View file

@ -0,0 +1,8 @@
<!DOCTYPE html>
<h1>Twitterchiver</h1>
<h2>Accounts visible to {{.Username}}</h2>
<ul>
{{range .TwitterAccounts}}
<li><strong><a href="/view/{{.Username}}">{{.Username}}</a></strong> ({{.TweetCount}} tweets, latest tweet at {{.LatestTweet}})
{{end}}
</ul>

View file

@ -0,0 +1,158 @@
<!DOCTYPE html>
<style>
html, body {
background-color: rgb(21, 32, 43);
color: white;
font-family: sans-serif;
}
.tweet-list {
margin: 0;
padding: 0;
}
.tweet-list-item {
display: block;
padding-top: 8px;
padding-bottom: 8px;
border-bottom: 1px solid rgb(136, 153, 166);
width: clamp(calc(45ch+64px), 100%, calc(75ch+64px));
}
.tweet-list-item:first-of-type {
margin-top: 8px;
border-top: 1px solid rgb(136, 153, 166);
}
.tweet {
margin-left: 64px;
}
.tweet::after {
clear: both;
content: "";
display: block;
}
a {
text-decoration: none;
color: inherit;
}
a:hover {
text-decoration: underline;
text-decoration-skip-ink: auto;
}
.retweeted-byline {
display: block;
}
.tweet-author-img {
float: left;
margin-left: -58px;
border-radius: 100%;
}
.byline-link:hover {
text-decoration: none;
}
.byline-link:hover .byline-name {
text-decoration: underline;
}
.byline-name {
color: white;
}
.byline, .retweeted-byline {
color: rgb(136, 153, 166);
}
.retweeted-byline {
font-weight: bold;
margin-bottom: 6px;
}
.retweeted-icon {
color: rgb(23, 191, 99);
width: 19px;
fill: currentcolor;
float: left;
margin-left: -24px;
}
.content {
white-space: pre-wrap;
overflow-wrap: break-word;
}
.content a {
color: rgb(27, 149, 224);
}
.media {
margin-top: 8px;
display: grid;
grid-template-columns: 50% 50%;
grid-template-rows: 50% 50%;
grid-auto-flow: dense;
}
.media-item:only-child {
grid-column-end: span 2;
}
.media-link {
position: relative;
display: block;
}
.media-link::before {
content: "";
display: block;
padding-bottom: calc(100% / (1920/1080));
}
.media-img {
position: absolute;
top: 0;
left: 0;
height: 100%;
width: 100%;
object-fit: cover;
}
</style>
<h1>Twitterchiver: {{.TwitterUsername}}</h1>
{{if .Query}}
<h2>Searching for: {{.Query}}</h2>
{{end}}
<form action="" method="GET">
<input type="search" value="{{.Query}}" name="q">
</form>
<ol class="tweet-list">
{{range .Tweets}}
<li class="tweet-list-item">
<div class="tweet">
{{$status := .Object}}
{{if .Object.retweeted_status}}
{{$status = .Object.retweeted_status}}
<svg viewBox="0 0 24 24" class="retweeted-icon"><g><path d="M23.615 15.477c-.47-.47-1.23-.47-1.697 0l-1.326 1.326V7.4c0-2.178-1.772-3.95-3.95-3.95h-5.2c-.663 0-1.2.538-1.2 1.2s.537 1.2 1.2 1.2h5.2c.854 0 1.55.695 1.55 1.55v9.403l-1.326-1.326c-.47-.47-1.23-.47-1.697 0s-.47 1.23 0 1.697l3.374 3.375c.234.233.542.35.85.35s.613-.116.848-.35l3.375-3.376c.467-.47.467-1.23-.002-1.697zM12.562 18.5h-5.2c-.854 0-1.55-.695-1.55-1.55V7.547l1.326 1.326c.234.235.542.352.848.352s.614-.117.85-.352c.468-.47.468-1.23 0-1.697L5.46 3.8c-.47-.468-1.23-.468-1.697 0L.388 7.177c-.47.47-.47 1.23 0 1.697s1.23.47 1.697 0L3.41 7.547v9.403c0 2.178 1.773 3.95 3.95 3.95h5.2c.664 0 1.2-.538 1.2-1.2s-.535-1.2-1.198-1.2z"></path></g></svg>
<a class="retweeted-byline" href="https://twitter.com/{{.Object.user.screen_name}}"><!--
-->{{.Object.user.name}} (@{{.Object.user.screen_name}}) retweeted<!--
--></a>
{{end}}
<a href="https://twitter.com/{{$status.user.screen_name}}"><img class="tweet-author-img" src="{{$status.user.profile_image_url_https}}"></a>
<div class="byline">
<a href="https://twitter.com/{{$status.user.screen_name}}" class="byline-link"><!--
--><strong class="byline-name">{{$status.user.name}}</strong>
<span class="byline-username">@{{$status.user.screen_name}}</span><!--
--></a>
&middot;
<a href="https://twitter.com/{{$status.user.screen_name}}/status/{{$status.id_str}}" class="timestamp"><time datetime="{{.CreatedAt.Format "2006-01-02T15:04:05-0700"}}">{{.CreatedAtFriendly}}</time></a>
</div>
<div class="content"><!--
-->{{call $.FormatTweetText $status.full_text $status}}<!--
--></div>
{{if .Object.extended_entities}}
<div class="media">
{{range $entity := .Object.extended_entities.media}}
<div class="media-item">
<a href="{{$entity.expanded_url}}" class="media-link">
<img src="{{$entity.media_url_https}}" class="media-img">
</a>
</div>
{{end}}
</div>
{{end}}
</div>
</li>
{{end}}
</ol>
{{if .NextTweetID}}
<a href="?start_from={{.NextTweetID}}&q={{.Query}}">...next</a>
{{end}}

View file

@ -0,0 +1,418 @@
// SPDX-FileCopyrightText: 2020 Luke Granger-Brown <depot@lukegb.com>
//
// SPDX-License-IDentifier: Apache-2.0
package main
import (
"context"
"encoding/json"
"flag"
"fmt"
"log"
"net/http"
"sort"
"strconv"
"strings"
"time"
"github.com/google/safehtml"
"github.com/google/safehtml/template"
"github.com/google/safehtml/uncheckedconversions"
"github.com/gorilla/mux"
"github.com/jackc/pgtype"
"github.com/jackc/pgx/v4/pgxpool"
"hg.lukegb.com/lukegb/depot/go/openshiftauth"
)
var (
databaseURL = flag.String("database_url", "", "Database URL")
userMapping = userMap{}
localDisableAuth = flag.String("local_auth_override_user", "", "Disable authn/authz - used for dev - set to username")
indexTmpl = template.Must(template.ParseFiles("templates/index.html"))
tweetsTmpl = template.Must(template.ParseFiles("templates/tweets.html"))
)
func init() {
flag.Var(userMapping, "user_to_twitter", "Space-separated list of <username>:<comma-separated Twitter usernames>")
}
type userMap map[string][]string
func (um userMap) String() string {
var bits []string
for u, ts := range um {
ts2 := make([]string, len(ts))
copy(ts2, ts)
sort.Strings(ts2)
bits = append(bits, fmt.Sprintf("%s:%s", u, strings.Join(ts2, ",")))
}
sort.Strings(bits)
return strings.Join(bits, " ")
}
func (um userMap) Set(v string) error {
bits := strings.Split(v, " ")
for _, b := range bits {
utsPair := strings.Split(b, ":")
if len(utsPair) != 2 {
return fmt.Errorf("%v is not a <username>:<comma-separated twitter usernames> string", b)
}
u := utsPair[0]
ts := utsPair[1]
um[u] = append(um[u], strings.Split(ts, ",")...)
}
return nil
}
func main() {
flag.Parse()
ctx := context.Background()
pool, err := pgxpool.Connect(ctx, *databaseURL)
if err != nil {
log.Fatalf("pgxpool.Connect: %v", err)
}
defer pool.Close()
r := mux.NewRouter()
r.HandleFunc("/healthz", func(rw http.ResponseWriter, r *http.Request) {
rw.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(rw, "ok")
})
var authR *mux.Router
if *localDisableAuth != "" {
authR = r
} else {
authR, err = openshiftauth.NewRouter(r)
if err != nil {
log.Fatalf("openshiftauth.NewRouter: %v", err)
}
}
userFromContext := func(ctx context.Context) string {
if *localDisableAuth != "" {
return *localDisableAuth
}
return openshiftauth.UserFromContext(ctx).Metadata.Name
}
writeError := func(rw http.ResponseWriter, status int, wrap string, err error) {
log.Printf("Error in HTTP handler: %v: %v", wrap, err)
rw.WriteHeader(status)
fmt.Fprintf(rw, "<h1>Oops. Something went wrong.</h1>")
fmt.Fprintf(rw, "<p>%s</p>", wrap)
}
authR.HandleFunc("/", func(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
user := userFromContext(ctx)
twitterAccounts := userMapping[user]
rows, err := pool.Query(ctx, "SELECT ua.username, COUNT(uat.tweetid) tweet_count, (SELECT CAST(object->>'created_at' AS timestamp with time zone) FROM tweets WHERE id=MAX(uat.tweetid)) latest_tweet FROM user_accounts ua LEFT JOIN user_accounts_tweets uat ON uat.userid=ua.userid WHERE ua.username = ANY($1::text[]) GROUP BY 1 ORDER BY 1", twitterAccounts)
if err != nil {
writeError(rw, http.StatusInternalServerError, "querying database", err)
return
}
defer rows.Close()
type twitterData struct {
Username string
TweetCount int
LatestTweet time.Time
}
var tds []twitterData
for rows.Next() {
var td twitterData
if err := rows.Scan(&td.Username, &td.TweetCount, &td.LatestTweet); err != nil {
writeError(rw, http.StatusInternalServerError, "reading from database", err)
return
}
tds = append(tds, td)
}
rows.Close()
indexTmpl.Execute(rw, struct {
Username string
TwitterAccounts []twitterData
}{
Username: user,
TwitterAccounts: tds,
})
})
isAllowedToSee := func(ctx context.Context, twitterUser string) bool {
twitterAccounts := userMapping[userFromContext(ctx)]
for _, a := range twitterAccounts {
if a == twitterUser {
return true
}
}
return false
}
toInt := func(s string, def int) int {
n, err := strconv.ParseInt(s, 10, 0)
if err != nil {
return def
}
return int(n)
}
clamp := func(min, n, max int) int {
if n < min {
return min
} else if n > max {
return max
}
return n
}
authR.HandleFunc("/view/{twitterUser}", func(rw http.ResponseWriter, r *http.Request) {
ctx := r.Context()
vars := mux.Vars(r)
twitterUser := vars["twitterUser"]
if !isAllowedToSee(ctx, twitterUser) {
writeError(rw, http.StatusNotFound, "no such twitter user being archived", fmt.Errorf("user %q attempted to access %q", userFromContext(ctx), twitterUser))
return
}
q := r.URL.Query()
pageSize := clamp(1, toInt(q.Get("page_size"), 20), 200)
startFrom := toInt(q.Get("start_from"), 0)
query := q.Get("q")
rows, err := pool.Query(ctx, `
SELECT
t.id,
t.text,
t.object,
CAST(COALESCE(t.object->'retweeted_status'->>'created_at', t.object->>'created_at') AS timestamp with time zone) created_at
FROM
user_accounts_tweets uat
INNER JOIN
user_accounts ua ON ua.userid=uat.userid
INNER JOIN tweets t ON t.id=uat.tweetid
WHERE 1=1
AND ua.username=$1
AND ($3::bigint=0 OR t.id <= $3::bigint)
AND ($4='' OR ($4<>'' AND (to_tsvector('english', text) @@ to_tsquery('english', $4) OR to_tsvector('english', object->'retweeted_status'->>'full_text') @@ to_tsquery('english', $4) OR object->'user'->>'screen_name'=$4)))
ORDER BY t.id DESC
LIMIT $2
`, twitterUser, pageSize+1, startFrom, query)
if err != nil {
writeError(rw, http.StatusInternalServerError, "querying database", err)
return
}
defer rows.Close()
type tweet struct {
ID int64
Text string
CreatedAt time.Time
CreatedAtFriendly string
Object interface{}
}
type twitterData struct {
TwitterUsername string
Query string
Tweets []tweet
NextTweetID *int64
FormatTweetText func(string, interface{}) safehtml.HTML
}
pullIndices := func(m map[string]interface{}) [2]int {
midx := m["indices"].([]interface{})
midx0 := int(midx[0].(float64))
midx1 := int(midx[1].(float64))
return [2]int{midx0, midx1}
}
td := twitterData{
TwitterUsername: twitterUser,
Query: query,
FormatTweetText: func(t string, tw interface{}) safehtml.HTML {
ltRep := string([]rune{0xe000})
gtRep := string([]rune{0xe001})
t = strings.ReplaceAll(t, "<", ltRep)
t = strings.ReplaceAll(t, ">", gtRep)
type span struct {
span [2]int
whatDo string // remove, link
linkTo string // link only
linkText string // link only
}
// Delete native media and add links.
var spans []span
obj := tw.(map[string]interface{})
if ee, ok := obj["extended_entities"].(map[string]interface{}); ok {
ems := ee["media"].([]interface{})
for _, emi := range ems {
em := emi.(map[string]interface{})
spans = append(spans, span{
span: pullIndices(em),
whatDo: "remove",
})
}
}
if es, ok := obj["entities"].(map[string]interface{}); ok {
if hts, ok := es["hashtags"].([]interface{}); ok {
for _, hti := range hts {
ht := hti.(map[string]interface{})
htt := ht["text"].(string)
spans = append(spans, span{
span: pullIndices(ht),
whatDo: "link",
linkTo: fmt.Sprintf("https://twitter.com/hashtag/%s", htt),
})
}
}
if urls, ok := es["urls"].([]interface{}); ok {
for _, urli := range urls {
url := urli.(map[string]interface{})
urldisp := url["display_url"].(string)
urlexp := url["expanded_url"].(string)
spans = append(spans, span{
span: pullIndices(url),
whatDo: "link",
linkTo: urlexp,
linkText: urldisp,
})
}
}
if mentions, ok := es["user_mentions"].([]interface{}); ok {
for _, mentioni := range mentions {
mention := mentioni.(map[string]interface{})
mentionuser := mention["screen_name"].(string)
spans = append(spans, span{
span: pullIndices(mention),
whatDo: "link",
linkTo: fmt.Sprintf("https://twitter.com/%s", mentionuser),
})
}
}
if symbols, ok := es["symbols"].([]interface{}); ok {
for _, symboli := range symbols {
symbol := symboli.(map[string]interface{})
symbolname := symbol["text"].(string)
spans = append(spans, span{
span: pullIndices(symbol),
whatDo: "link",
linkTo: fmt.Sprintf("?q=$%s", symbolname),
})
}
}
}
// Sort removeSpans from the end to the beginning.
sort.Slice(spans, func(a, b int) bool {
return spans[a].span[0] > spans[b].span[0]
})
// Expand overlapping remove spans.
newSpans := make([]span, 0, len(spans))
for i := 0; i < len(spans)-1; i++ {
span := spans[i]
prevSpan := spans[i+1]
if prevSpan.span[0] <= span.span[0] && prevSpan.span[1] >= span.span[1] {
// Spans overlap.
if span.whatDo != "remove" || prevSpan.whatDo != "remove" {
log.Printf("found overlapping non-remove spans!")
}
if span.span[1] > prevSpan.span[1] {
prevSpan.span[1] = span.span[1]
}
continue
}
newSpans = append(newSpans, span)
}
if len(spans) > 0 {
newSpans = append(newSpans, spans[len(spans)-1])
}
spans = newSpans
runed := []rune(t)
for _, span := range spans {
switch span.whatDo {
case "remove":
// Delete text from span[0] to span[1].
runed = append(runed[:span.span[0]], runed[span.span[1]:]...)
case "link":
// Add a link.
var text []rune
if span.linkText == "" {
text = runed[span.span[0]:span.span[1]]
} else {
text = []rune(span.linkText)
}
runedBits := [][]rune{
runed[:span.span[0]],
[]rune("<a href=\""),
[]rune(span.linkTo),
[]rune("\">"),
[]rune(text),
[]rune("</a>"),
runed[span.span[1]:],
}
finalLen := 0
for _, s := range runedBits {
finalLen += len(s)
}
runed = make([]rune, finalLen)
p := 0
for _, s := range runedBits {
p += copy(runed[p:], s)
}
default:
log.Printf("unknown span operation %v", span.whatDo)
}
}
t = string(runed)
// HTML escape any <>
t = strings.ReplaceAll(t, ltRep, "&lt;")
t = strings.ReplaceAll(t, gtRep, "&gt;")
return uncheckedconversions.HTMLFromStringKnownToSatisfyTypeContract(t)
},
}
now := time.Now()
for rows.Next() {
var t tweet
var o pgtype.JSONB
if err := rows.Scan(&t.ID, &t.Text, &o, &t.CreatedAt); err != nil {
writeError(rw, http.StatusInternalServerError, "reading from database", err)
return
}
if err := json.Unmarshal(o.Bytes, &t.Object); err != nil {
writeError(rw, http.StatusInternalServerError, "parsing JSON from database", err)
return
}
ago := now.Sub(t.CreatedAt)
switch {
case t.CreatedAt.Year() != now.Year():
t.CreatedAtFriendly = t.CreatedAt.Format("Jan 2, 2006")
case t.CreatedAt.YearDay() != now.YearDay():
t.CreatedAtFriendly = t.CreatedAt.Format("Jan 2")
case ago.Hours() >= 1.0:
t.CreatedAtFriendly = fmt.Sprintf("%dh", int(ago.Hours()))
case ago.Minutes() >= 1.0:
t.CreatedAtFriendly = fmt.Sprintf("%dm", int(ago.Minutes()))
case ago.Seconds() >= 0.0:
t.CreatedAtFriendly = fmt.Sprintf("%ds", int(ago.Seconds()))
default:
t.CreatedAtFriendly = fmt.Sprintf("in %ds", -int(ago.Seconds()))
}
td.Tweets = append(td.Tweets, t)
}
rows.Close()
if len(td.Tweets) > pageSize {
td.NextTweetID = &td.Tweets[pageSize].ID
td.Tweets = td.Tweets[:pageSize]
}
if err := tweetsTmpl.Execute(rw, td); err != nil {
log.Printf("tweets: executing template: %v", err)
}
})
log.Printf("now listening on :8080")
log.Print(http.ListenAndServe(":8080", r))
}

View file

@ -19,14 +19,15 @@ rec {
buildGo =
let orig = import ./tvl/nix/buildGo { pkgs = nixpkgs; };
in orig // {
program = args:
program = { dockerData ? [], ... }@args:
let
origOut = orig.program args;
origOut = orig.program (nixpkgs.lib.filterAttrs (n: v: n != "dockerData") args);
in origOut // {
dockerImage = nixpkgs.dockerTools.buildImage {
name = args.name;
contents = dockerData;
config = {
Cmd = [ "${origOut}/bin/${args.name}" ];
Entrypoint = [ "${origOut}/bin/${args.name}" ];
Env = [
"SSL_CERT_FILE=${nixpkgs.cacert}/etc/ssl/certs/ca-bundle.crt"
];