depot/go/trains/darwin/darwingest/darwingests3/darwingests3.go

214 lines
5.1 KiB
Go

package darwingests3
import (
"compress/gzip"
"context"
"encoding/xml"
"errors"
"fmt"
"io"
"log"
"regexp"
"sort"
"strconv"
"time"
"gocloud.dev/blob"
"hg.lukegb.com/lukegb/depot/go/trains/darwin"
)
var (
blobRegex = regexp.MustCompile(`^(?P<datetime>[0-9]{14})(_ref)?_v(?P<version>[0-9]+)\.xml\.gz$`)
)
const (
understoodVersionTimetable = 8
understoodVersionReference = 3
)
type fileType int
const (
fileTypeWantAny fileType = iota
fileTypeTimetable
fileTypeReference
)
func (ft fileType) IsA(wantFileType fileType) bool {
switch wantFileType {
case fileTypeWantAny:
return true
case ft:
return true
default:
return false
}
}
type fileInfo struct {
filename string
fileType fileType
version int
filenameDate time.Time
}
func parseFilename(fn string) (fileInfo, error) {
m := blobRegex.FindStringSubmatch(fn)
if m == nil {
return fileInfo{}, fmt.Errorf("couldn't parse filename %q: regex didn't match", fn)
}
filenameDateStr := m[1]
filenameDate, err := time.ParseInLocation("20060102150405", filenameDateStr, darwin.London)
if err != nil {
return fileInfo{}, fmt.Errorf("couldn't parse blob filename %q: datetime part %q unparsable: %v", fn, filenameDateStr, err)
}
versionStr := m[3]
version, err := strconv.Atoi(versionStr)
if err != nil {
return fileInfo{}, fmt.Errorf("couldn't parse blob filename %q: version part %q unparsable: %v", fn, versionStr, err)
}
fileType := fileTypeTimetable
if m[2] != "" {
fileType = fileTypeReference
}
return fileInfo{
filename: fn,
fileType: fileType,
version: version,
filenameDate: filenameDate,
}, nil
}
func filesInBucket(ctx context.Context, bucket *blob.Bucket, wantFileType fileType) ([]fileInfo, error) {
var fis []fileInfo
iter := bucket.List(nil)
for {
obj, err := iter.Next(ctx)
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("listing objects in bucket: %w", err)
}
fi, err := parseFilename(obj.Key)
if err != nil {
log.Print(err)
continue
}
if !fi.fileType.IsA(wantFileType) {
continue
}
fis = append(fis, fi)
}
return fis, nil
}
func LoadTimetableForDate(ctx context.Context, bucket *blob.Bucket, ts time.Time) (*darwin.PushPortTimetable, error) {
// Find the latest filename that covers "today".
// This is a little tricky, and probably wrong, but we'll try and be good enough.
fis, err := filesInBucket(ctx, bucket, fileTypeTimetable)
if err != nil {
return nil, fmt.Errorf("filesInBucket: %w", err)
}
// OK, find one where the date matches...
wanty, wantm, wantd := ts.Date()
var got *fileInfo
for _, fi := range fis {
if fi.version != understoodVersionTimetable {
continue
}
fiy, fim, fid := fi.filenameDate.Date()
if wanty == fiy && wantm == fim && wantd == fid {
fi := fi
got = &fi
break
}
}
if got == nil {
return nil, fmt.Errorf("unable to find matching timetable for %v")
}
return LoadTimetable(ctx, bucket, got.filename)
}
func decodeFromBucket(ctx context.Context, bucket *blob.Bucket, filename string, out interface{}) error {
r, err := bucket.NewReader(ctx, filename, nil)
if err != nil {
return fmt.Errorf("NewReader(%q): %w", filename, err)
}
defer r.Close()
gzr, err := gzip.NewReader(r)
if err != nil {
return fmt.Errorf("gzip.NewReader(fn=%q): %w", filename, err)
}
defer gzr.Close()
if err := xml.NewDecoder(gzr).Decode(out); err != nil {
return fmt.Errorf("decoding XML from %q: %w", filename, err)
}
return nil
}
var ErrBadVersion = errors.New("darwingests3: bad version, ignoring request")
func LoadTimetable(ctx context.Context, bucket *blob.Bucket, filename string) (*darwin.PushPortTimetable, error) {
log.Printf("loading timetable from S3 file %q", filename)
fi, err := parseFilename(filename)
if err != nil {
return nil, err
}
if fi.version != understoodVersionTimetable {
return nil, ErrBadVersion
}
var ppt darwin.PushPortTimetable
if err := decodeFromBucket(ctx, bucket, filename, &ppt); err != nil {
return nil, err
}
return &ppt, nil
}
func LoadLatestReferenceData(ctx context.Context, bucket *blob.Bucket) (*darwin.PushPortReferenceData, error) {
fis, err := filesInBucket(ctx, bucket, fileTypeReference)
if err != nil {
return nil, fmt.Errorf("filesInBucket: %w", err)
}
sort.Slice(fis, func(i, j int) bool {
if fis[i].version == understoodVersionReference && fis[j].version != understoodVersionReference {
return true
} else if fis[i].version != understoodVersionReference && fis[j].version == understoodVersionReference {
return false
}
return fis[i].filenameDate.Before(fis[j].filenameDate)
})
return LoadReferenceData(ctx, bucket, fis[0].filename)
}
func LoadReferenceData(ctx context.Context, bucket *blob.Bucket, filename string) (*darwin.PushPortReferenceData, error) {
log.Printf("loading reference data from S3 file %q", filename)
fi, err := parseFilename(filename)
if err != nil {
return nil, err
}
if fi.version != understoodVersionReference {
return nil, ErrBadVersion
}
var pprd darwin.PushPortReferenceData
if err := decodeFromBucket(ctx, bucket, filename, &pprd); err != nil {
return nil, err
}
return &pprd, nil
}