214 lines
5.1 KiB
Go
214 lines
5.1 KiB
Go
package darwingests3
|
|
|
|
import (
|
|
"compress/gzip"
|
|
"context"
|
|
"encoding/xml"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log"
|
|
"regexp"
|
|
"sort"
|
|
"strconv"
|
|
"time"
|
|
|
|
"gocloud.dev/blob"
|
|
"git.lukegb.com/lukegb/depot/go/trains/darwin"
|
|
)
|
|
|
|
var (
|
|
blobRegex = regexp.MustCompile(`^(?P<datetime>[0-9]{14})(_ref)?_v(?P<version>[0-9]+)\.xml\.gz$`)
|
|
)
|
|
|
|
const (
|
|
understoodVersionTimetable = 8
|
|
understoodVersionReference = 3
|
|
)
|
|
|
|
type fileType int
|
|
|
|
const (
|
|
fileTypeWantAny fileType = iota
|
|
fileTypeTimetable
|
|
fileTypeReference
|
|
)
|
|
|
|
func (ft fileType) IsA(wantFileType fileType) bool {
|
|
switch wantFileType {
|
|
case fileTypeWantAny:
|
|
return true
|
|
case ft:
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
type fileInfo struct {
|
|
filename string
|
|
fileType fileType
|
|
version int
|
|
filenameDate time.Time
|
|
}
|
|
|
|
func parseFilename(fn string) (fileInfo, error) {
|
|
m := blobRegex.FindStringSubmatch(fn)
|
|
if m == nil {
|
|
return fileInfo{}, fmt.Errorf("couldn't parse filename %q: regex didn't match", fn)
|
|
}
|
|
|
|
filenameDateStr := m[1]
|
|
filenameDate, err := time.ParseInLocation("20060102150405", filenameDateStr, darwin.London)
|
|
if err != nil {
|
|
return fileInfo{}, fmt.Errorf("couldn't parse blob filename %q: datetime part %q unparsable: %v", fn, filenameDateStr, err)
|
|
}
|
|
|
|
versionStr := m[3]
|
|
version, err := strconv.Atoi(versionStr)
|
|
if err != nil {
|
|
return fileInfo{}, fmt.Errorf("couldn't parse blob filename %q: version part %q unparsable: %v", fn, versionStr, err)
|
|
}
|
|
|
|
fileType := fileTypeTimetable
|
|
if m[2] != "" {
|
|
fileType = fileTypeReference
|
|
}
|
|
|
|
return fileInfo{
|
|
filename: fn,
|
|
fileType: fileType,
|
|
version: version,
|
|
filenameDate: filenameDate,
|
|
}, nil
|
|
}
|
|
|
|
func filesInBucket(ctx context.Context, bucket *blob.Bucket, wantFileType fileType) ([]fileInfo, error) {
|
|
var fis []fileInfo
|
|
|
|
iter := bucket.List(nil)
|
|
for {
|
|
obj, err := iter.Next(ctx)
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("listing objects in bucket: %w", err)
|
|
}
|
|
|
|
fi, err := parseFilename(obj.Key)
|
|
if err != nil {
|
|
log.Print(err)
|
|
continue
|
|
}
|
|
|
|
if !fi.fileType.IsA(wantFileType) {
|
|
continue
|
|
}
|
|
|
|
fis = append(fis, fi)
|
|
}
|
|
|
|
return fis, nil
|
|
}
|
|
|
|
func LoadTimetableForDate(ctx context.Context, bucket *blob.Bucket, ts time.Time) (*darwin.PushPortTimetable, error) {
|
|
// Find the latest filename that covers "today".
|
|
// This is a little tricky, and probably wrong, but we'll try and be good enough.
|
|
fis, err := filesInBucket(ctx, bucket, fileTypeTimetable)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("filesInBucket: %w", err)
|
|
}
|
|
|
|
// OK, find one where the date matches...
|
|
wanty, wantm, wantd := ts.Date()
|
|
var got *fileInfo
|
|
for _, fi := range fis {
|
|
if fi.version != understoodVersionTimetable {
|
|
continue
|
|
}
|
|
fiy, fim, fid := fi.filenameDate.Date()
|
|
if wanty == fiy && wantm == fim && wantd == fid {
|
|
fi := fi
|
|
got = &fi
|
|
break
|
|
}
|
|
}
|
|
if got == nil {
|
|
return nil, fmt.Errorf("unable to find matching timetable for %v")
|
|
}
|
|
|
|
return LoadTimetable(ctx, bucket, got.filename)
|
|
}
|
|
|
|
func decodeFromBucket(ctx context.Context, bucket *blob.Bucket, filename string, out interface{}) error {
|
|
r, err := bucket.NewReader(ctx, filename, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("NewReader(%q): %w", filename, err)
|
|
}
|
|
defer r.Close()
|
|
|
|
gzr, err := gzip.NewReader(r)
|
|
if err != nil {
|
|
return fmt.Errorf("gzip.NewReader(fn=%q): %w", filename, err)
|
|
}
|
|
defer gzr.Close()
|
|
|
|
if err := xml.NewDecoder(gzr).Decode(out); err != nil {
|
|
return fmt.Errorf("decoding XML from %q: %w", filename, err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
var ErrBadVersion = errors.New("darwingests3: bad version, ignoring request")
|
|
|
|
func LoadTimetable(ctx context.Context, bucket *blob.Bucket, filename string) (*darwin.PushPortTimetable, error) {
|
|
log.Printf("loading timetable from S3 file %q", filename)
|
|
fi, err := parseFilename(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if fi.version != understoodVersionTimetable {
|
|
return nil, ErrBadVersion
|
|
}
|
|
var ppt darwin.PushPortTimetable
|
|
if err := decodeFromBucket(ctx, bucket, filename, &ppt); err != nil {
|
|
return nil, err
|
|
}
|
|
return &ppt, nil
|
|
}
|
|
|
|
func LoadLatestReferenceData(ctx context.Context, bucket *blob.Bucket) (*darwin.PushPortReferenceData, error) {
|
|
fis, err := filesInBucket(ctx, bucket, fileTypeReference)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("filesInBucket: %w", err)
|
|
}
|
|
|
|
sort.Slice(fis, func(i, j int) bool {
|
|
if fis[i].version == understoodVersionReference && fis[j].version != understoodVersionReference {
|
|
return true
|
|
} else if fis[i].version != understoodVersionReference && fis[j].version == understoodVersionReference {
|
|
return false
|
|
}
|
|
return fis[i].filenameDate.Before(fis[j].filenameDate)
|
|
})
|
|
|
|
return LoadReferenceData(ctx, bucket, fis[0].filename)
|
|
}
|
|
|
|
func LoadReferenceData(ctx context.Context, bucket *blob.Bucket, filename string) (*darwin.PushPortReferenceData, error) {
|
|
log.Printf("loading reference data from S3 file %q", filename)
|
|
fi, err := parseFilename(filename)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if fi.version != understoodVersionReference {
|
|
return nil, ErrBadVersion
|
|
}
|
|
var pprd darwin.PushPortReferenceData
|
|
if err := decodeFromBucket(ctx, bucket, filename, &pprd); err != nil {
|
|
return nil, err
|
|
}
|
|
return &pprd, nil
|
|
}
|