From 2306915e2ca0651a9786bfb8616bf1b240a57571 Mon Sep 17 00:00:00 2001 From: Luke Granger-Brown Date: Sun, 21 Mar 2021 18:04:37 +0000 Subject: [PATCH] fup: implement file uploads! TODO: tests for this. --- web/fup/cmd/serve.go | 15 +- web/fup/default.nix | 2 +- web/fup/fuphttp/fuphttp.go | 28 ++++ web/fup/fuphttp/httpupload.go | 251 +++++++++++++++++++++++++++++ web/fup/fuphttp/httpupload_test.go | 46 ++++++ web/fup/fuphttp/metadata.go | 24 +++ web/fup/go.mod | 2 +- 7 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 web/fup/fuphttp/httpupload.go create mode 100644 web/fup/fuphttp/httpupload_test.go diff --git a/web/fup/cmd/serve.go b/web/fup/cmd/serve.go index 1bc4ef5f95..ef4b7f5a79 100644 --- a/web/fup/cmd/serve.go +++ b/web/fup/cmd/serve.go @@ -9,6 +9,7 @@ import ( "fmt" "log" "net/http" + "strings" "github.com/google/safehtml" "github.com/spf13/cobra" @@ -19,23 +20,35 @@ import ( func init() { rootCmd.AddCommand(serveCmd) + serveCmd.Flags().StringVar(&serveRoot, "root", "http://localhost:8191/", "Application root address.") + serveCmd.Flags().StringVar(&serveStaticRoot, "static-root", "/static/", "Root address from which static assets should be referenced.") serveCmd.Flags().StringVarP(&serveBind, "listen", "l", ":8191", "Bind address for HTTP server.") serveCmd.Flags().BoolVar(&serveDirectOnly, "direct-only", false, "If set, all file serving will be proxied, even if the backend supports signed URLs.") } var ( serveBind string + serveRoot string + serveStaticRoot string serveDirectOnly bool serveCmd = &cobra.Command{ Use: "serve", Short: "Serve HTTP", RunE: func(cmd *cobra.Command, args []string) error { + if !strings.HasSuffix(serveRoot, "/") { + return fmt.Errorf("--root flag should end in / (value is %q)", serveRoot) + } + if !strings.HasSuffix(serveStaticRoot, "/") { + return fmt.Errorf("--static-root flag should end in / (value is %q)", serveStaticRoot) + } + ctx := context.Background() cfg := &fuphttp.Config{ Templates: fupstatic.Templates, Static: fupstatic.Static, - StaticRoot: safehtml.TrustedResourceURLFromConstant("/static/"), + StaticRoot: safehtml.TrustedResourceURLFromFlag(cmd.Flag("static-root").Value), + AppRoot: serveRoot, StorageURL: bucketURL, RedirectToBlobstore: !serveDirectOnly, } diff --git a/web/fup/default.nix b/web/fup/default.nix index a459500d6d..6ed930908e 100644 --- a/web/fup/default.nix +++ b/web/fup/default.nix @@ -12,7 +12,7 @@ pkgs.buildGoModule { src = ./.; - vendorSha256 = "sha256:01q2pqn5j34zsp1al6kidfxd9bj6s1wmz8klywp1mp4lh39ln4sl"; + vendorSha256 = "sha256:0myd1p61q777ybbwdz8k4nbchh2hv1yr8008061m3gc44s3gsphx"; meta = with pkgs.lib; { description = "Simple file upload manager."; diff --git a/web/fup/fuphttp/fuphttp.go b/web/fup/fuphttp/fuphttp.go index f921114c7a..620ffa1078 100644 --- a/web/fup/fuphttp/fuphttp.go +++ b/web/fup/fuphttp/fuphttp.go @@ -10,6 +10,7 @@ import ( "io/fs" "log" "net/http" + "strings" "time" "github.com/google/safehtml" @@ -31,6 +32,8 @@ type Config struct { Static fs.FS StaticRoot safehtml.TrustedResourceURL + AppRoot string + // If set, redirects to a signed URL if possible instead of serving directly. RedirectToBlobstore bool @@ -44,6 +47,9 @@ type Config struct { // FilenameGenerator returns a new filename based on the provided prefix and extension. FilenameGenerator fngen.FilenameGenerator + + // UseDirectDownload decides whether the "pretty" wrapped page or the direct download page is the most appropriate for a given set of parameters. + UseDirectDownload func(fileExtension string, mimeType string) bool } type Application struct { @@ -51,10 +57,18 @@ type Application struct { notFoundTmpl *template.Template storageBackend *blob.Bucket + appRoot string + redirectToBlobstore bool redirectExpiry time.Duration filenameGenerator fngen.FilenameGenerator + useDirectDownload func(fileExtension string, mimeType string) bool +} + +func DefaultUseDirectDownload(fileExtension, mimeType string) bool { + // Only use the pretty page for text/*. + return !strings.HasPrefix(mimeType, "text/") } func (a *Application) Handler() http.Handler { @@ -71,6 +85,8 @@ func (a *Application) Handler() http.Handler { r.NotFoundHandler = http.HandlerFunc(a.notFound) r.HandleFunc("/", renderTemplate(a.indexTmpl)) r.HandleFunc("/raw/{filename}", a.rawDownload) + r.HandleFunc("/upload", a.upload).Methods("POST") + r.HandleFunc("/upload/{filename}", a.upload).Methods("PUT") return r } @@ -96,6 +112,10 @@ func (a *Application) badRequest(rw http.ResponseWriter, r *http.Request, err er fmt.Fprintf(rw, "bad request: %v\n", err.Error()) } +func (a *Application) appURL(s string) string { + return a.appRoot + s +} + func parseTemplate(t *template.Template, fsys fs.FS, name string) (*template.Template, error) { bs, err := fs.ReadFile(fsys, name) if err != nil { @@ -124,6 +144,8 @@ func New(ctx context.Context, cfg *Config) (*Application, error) { redirectToBlobstore: cfg.RedirectToBlobstore, redirectExpiry: cfg.RedirectExpiry, filenameGenerator: cfg.FilenameGenerator, + useDirectDownload: cfg.UseDirectDownload, + appRoot: cfg.AppRoot, } if a.redirectExpiry == 0 { a.redirectExpiry = defaultRedirectExpiry @@ -131,6 +153,9 @@ func New(ctx context.Context, cfg *Config) (*Application, error) { if a.filenameGenerator == nil { a.filenameGenerator = fngen.PetnameGenerator } + if a.useDirectDownload == nil { + a.useDirectDownload = DefaultUseDirectDownload + } bkt := cfg.StorageBackend if bkt == nil { @@ -150,6 +175,9 @@ func New(ctx context.Context, cfg *Config) (*Application, error) { } funcMap := template.FuncMap{ + "app": func(s string) safehtml.URL { + return safehtml.URLSanitized(a.appRoot + s) + }, "static": func(s string) safehtml.TrustedResourceURL { staticPath := s if fs, ok := cfg.Static.(*hashfs.FS); ok { diff --git a/web/fup/fuphttp/httpupload.go b/web/fup/fuphttp/httpupload.go new file mode 100644 index 0000000000..d9fc561ed7 --- /dev/null +++ b/web/fup/fuphttp/httpupload.go @@ -0,0 +1,251 @@ +// SPDX-FileCopyrightText: 2021 Luke Granger-Brown +// +// SPDX-License-Identifier: Apache-2.0 + +package fuphttp + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "log" + "mime" + "net/http" + "net/url" + "path" + "strings" + "time" + + "github.com/gabriel-vasile/mimetype" + "github.com/gorilla/mux" + "gocloud.dev/blob" + "hg.lukegb.com/lukegb/depot/web/fup/fuphttp/fngen" +) + +// parseExpiry parses an expiry string. +// This is one of: +// 1) The empty string - this returns the zero time. +// 2) An explicit RFC3339 time. +// 3) A string accepted by ParseDuration. +func parseExpiry(expStr string) (time.Time, error) { + if expStr == "" { + return time.Time{}, nil + } + t, err := time.Parse(time.RFC3339, expStr) + if err == nil { + return t, nil + } + duration, err := time.ParseDuration(expStr) + if err == nil { + return time.Now().Add(duration), nil + } + return time.Time{}, fmt.Errorf("unable to parse %q as RFC3339 or Go duration", expStr) +} + +// metaField returns the metadata field named by name. +// It tries the following, in order, returning the first one: +// 1) HTTP Header Fup-{name} +// 2) POST/PUT body parameter {name} +// 3) Query string parameter {name} +func metaField(r *http.Request, name string) string { + v := r.Header.Get(fmt.Sprintf("Fup-%s", name)) + if v != "" { + return v + } + + return r.FormValue("expiry") +} + +var ( + // knownArchives is a list of known non-archiving compression programs. + // This list is mostly taken from https://www.gnu.org/software/tar/manual/html_node/gzip.html. + knownArchives = map[string]bool{ + ".gz": true, + ".bz2": true, + ".Z": true, + ".lz": true, + ".lzma": true, + ".tlz": true, + ".lzo": true, + ".xz": true, + ".zst": true, + } +) + +func splitExtOnce(fn string) (string, string) { + ext := path.Ext(fn) + return fn[:len(fn)-len(ext)], ext +} + +// fileExt splits a filename into its "prefix" and its "file extension". +// It has special behaviour for known multipart extensions, like tar.gz. +func fileExt(fn string) (string, string) { + firstPre, firstExt := splitExtOnce(fn) + if !knownArchives[firstExt] { + return firstPre, firstExt + } + secondPre, secondExt := splitExtOnce(firstPre) + return secondPre, secondExt + firstExt +} + +// UploadResponse is the JSON object returned when an upload completes. +type UploadResponse struct { + URL string `json:"url"` + DirectURL string `json:"direct_url"` + DisplayURL string `json:"display_url"` + Filename string `json:"filename"` + Expiry *time.Time `json:"expiry"` +} + +func (a *Application) upload(rw http.ResponseWriter, r *http.Request) { + ctx := r.Context() + + meta := &Metadata{ + Attributes: &blob.Attributes{}, + } + var err error + meta.ExpiresAt, err = parseExpiry(metaField(r, "expiry")) + if err != nil { + log.Printf("upload: parsing expiry: %v", err) + a.badRequest(rw, r, fmt.Errorf("parsing expiry: %v", err)) + return + } + + var contentType string + if r.Header.Get("Content-Type") != "" { + contentType, _, err = mime.ParseMediaType(r.Header.Get("Content-Type")) + if err != nil { + log.Printf("upload: parsing content-type header: %v", err) + a.badRequest(rw, r, fmt.Errorf("parsing content-type %q: %v", r.Header.Get("Content-Type"), err)) + return + } + } + + var rdr io.Reader = r.Body + defer r.Body.Close() + + origContentType := contentType + var origFilename string + var origExt string + + if r.Method == "PUT" { + vars := mux.Vars(r) + origFilename, origExt = fileExt(vars["filename"]) + } else if contentType == "multipart/form-data" { + file, hdrs, err := r.FormFile("file") + if err != nil { + log.Printf("upload: multipart: failed to get file: %v", err) + a.badRequest(rw, r, fmt.Errorf("parsing multipart data: %v", err)) + return + } + rdr = file + defer file.Close() + + origFilename, origExt = fileExt(hdrs.Filename) + origContentType = hdrs.Header.Get("Content-Type") + } else { + if r.PostFormValue("content") == "" { + log.Printf("upload: empty content") + a.badRequest(rw, r, errors.New("empty content")) + return + } + + origExt := r.PostFormValue("extension") + if origExt == "" { + origExt = ".txt" + } else if origExt[0] != '.' { + origExt = "." + origExt + } + + rdr = strings.NewReader(r.PostFormValue("content")) + } + + // Find a mime type for the uploaded file. + mimeType := origContentType + fileExt := origExt + if mimeType == "" && origExt != "" { + // Try from the file extension instead... + mimeType = mime.TypeByExtension(origExt) + } + if mimeType == "" { + // We'll need to sniff it... + buf := make([]byte, 512) + if _, err := r.Body.Read(buf); err != nil { + log.Printf("upload: Read for MIME sniffing: %v", err) + a.internalError(rw, r) + return + } + + m := mimetype.Detect(buf) + mimeType = m.String() + if fileExt == "" { + fileExt = m.Extension() + } + + rdr = io.MultiReader(bytes.NewReader(buf), rdr) + } + + // Compute the new filename. + newName, err := fngen.UniqueName(ctx, a.storageBackend.Exists, origFilename, fileExt, a.filenameGenerator) + if err != nil { + log.Printf("upload: UniqueName: %v", err) + a.internalError(rw, r) + return + } + + // Now we build the upload options... + meta.Attributes.ContentType = mimeType + + wctx, wcancel := context.WithCancel(ctx) + defer wcancel() + w, err := a.storageBackend.NewWriter(wctx, newName, meta.WriterOptions()) + if err != nil { + log.Printf("upload: NewWriter(%q): %v", newName, err) + return + } + + if _, err := io.Copy(w, rdr); err != nil { + log.Printf("upload: Copy for %q: %v", newName, err) + wcancel() + w.Close() + a.internalError(rw, r) + return + } + + if err := w.Close(); err != nil { + log.Printf("upload: Close(%q): %v", newName, err) + a.internalError(rw, r) + return + } + + resp := UploadResponse{ + URL: a.appURL(url.PathEscape(newName)), + DirectURL: a.appURL("raw/" + url.PathEscape(newName)), + } + if a.useDirectDownload(newName, mimeType) { + resp.DisplayURL = resp.DirectURL + } else { + resp.DisplayURL = resp.URL + } + resp.Filename = newName + if !meta.ExpiresAt.IsZero() { + resp.Expiry = &meta.ExpiresAt + } + + // This is technically wrong: we don't implement content negotiation properly. Oh well. + if strings.ToLower(r.Header.Get("Accept")) != "application/json" { + http.Redirect(rw, r, resp.DisplayURL, http.StatusSeeOther) + return + } + + rw.Header().Set("Content-Type", "application/json; charset=utf-8") + rw.WriteHeader(http.StatusCreated) + + if err := json.NewEncoder(rw).Encode(&resp); err != nil { + log.Printf("upload: writing JSON response: %v", err) + // It's too late to return a proper error :( + } +} diff --git a/web/fup/fuphttp/httpupload_test.go b/web/fup/fuphttp/httpupload_test.go new file mode 100644 index 0000000000..0dc3ed8ee0 --- /dev/null +++ b/web/fup/fuphttp/httpupload_test.go @@ -0,0 +1,46 @@ +// SPDX-FileCopyrightText: 2021 Luke Granger-Brown +// +// SPDX-License-Identifier: Apache-2.0 + +package fuphttp + +import "testing" + +func TestFileExt(t *testing.T) { + tcs := []struct { + inp string + wantPrefix string + wantSuffix string + }{{ + inp: "foo.txt", + wantPrefix: "foo", + wantSuffix: ".txt", + }, { + inp: "foo", + wantPrefix: "foo", + wantSuffix: "", + }, { + inp: "foo.bar.tbz2", + wantPrefix: "foo.bar", + wantSuffix: ".tbz2", + }, { + inp: "foo.tar.bz2", + wantPrefix: "foo", + wantSuffix: ".tar.bz2", + }, { + inp: "foo.tar.ppp", + wantPrefix: "foo.tar", + wantSuffix: ".ppp", + }, { + inp: "my-github.deadbeef.zip", + wantPrefix: "my-github.deadbeef", + wantSuffix: ".zip", + }} + + for _, tc := range tcs { + gotPrefix, gotSuffix := fileExt(tc.inp) + if gotPrefix != tc.wantPrefix || gotSuffix != tc.wantSuffix { + t.Errorf("fileExt(%q) = (%q, %q); want (%q, %q)", tc.inp, gotPrefix, gotSuffix, tc.wantPrefix, tc.wantSuffix) + } + } +} diff --git a/web/fup/fuphttp/metadata.go b/web/fup/fuphttp/metadata.go index d029b52d8f..60dec65395 100644 --- a/web/fup/fuphttp/metadata.go +++ b/web/fup/fuphttp/metadata.go @@ -25,6 +25,30 @@ type Metadata struct { Attributes *blob.Attributes } +// WriterOptions returns a new WriterOptions based on the provided metadata. +func (m *Metadata) WriterOptions() *blob.WriterOptions { + attrs := m.Attributes + if attrs == nil { + attrs = &blob.Attributes{} + } + if attrs.Metadata == nil { + attrs.Metadata = make(map[string]string) + } + + if !m.ExpiresAt.IsZero() { + attrs.Metadata["expires-at"] = strconv.FormatInt(m.ExpiresAt.Unix(), 10) + } + + return &blob.WriterOptions{ + CacheControl: attrs.CacheControl, + ContentDisposition: attrs.ContentDisposition, + ContentEncoding: attrs.ContentEncoding, + ContentLanguage: attrs.ContentLanguage, + ContentType: attrs.ContentType, + Metadata: attrs.Metadata, + } +} + // metadata retrieves the Metadata for the object. // Note: if the object is expired, it will delete it. func metadata(ctx context.Context, bucket *blob.Bucket, filename string) (*Metadata, error) { diff --git a/web/fup/go.mod b/web/fup/go.mod index 81f69dd3eb..72206823c9 100644 --- a/web/fup/go.mod +++ b/web/fup/go.mod @@ -8,7 +8,7 @@ go 1.16 require ( github.com/dustinkirkland/golang-petname v0.0.0-20191129215211-8e5a1ed0cff0 - github.com/gabriel-vasile/mimetype v1.2.0 // indirect + github.com/gabriel-vasile/mimetype v1.2.0 github.com/google/safehtml v0.0.2 github.com/gorilla/mux v1.8.0 github.com/spf13/cobra v1.1.3