package darwingests3 import ( "compress/gzip" "context" "encoding/xml" "errors" "fmt" "io" "log" "regexp" "sort" "strconv" "time" "gocloud.dev/blob" "hg.lukegb.com/lukegb/depot/go/trains/darwin" ) var ( blobRegex = regexp.MustCompile(`^(?P[0-9]{14})(_ref)?_v(?P[0-9]+)\.xml\.gz$`) ) const ( understoodVersionTimetable = 8 understoodVersionReference = 3 ) type fileType int const ( fileTypeWantAny fileType = iota fileTypeTimetable fileTypeReference ) func (ft fileType) IsA(wantFileType fileType) bool { switch wantFileType { case fileTypeWantAny: return true case ft: return true default: return false } } type fileInfo struct { filename string fileType fileType version int filenameDate time.Time } func parseFilename(fn string) (fileInfo, error) { m := blobRegex.FindStringSubmatch(fn) if m == nil { return fileInfo{}, fmt.Errorf("couldn't parse filename %q: regex didn't match", fn) } filenameDateStr := m[1] filenameDate, err := time.ParseInLocation("20060102150405", filenameDateStr, darwin.London) if err != nil { return fileInfo{}, fmt.Errorf("couldn't parse blob filename %q: datetime part %q unparsable: %v", fn, filenameDateStr, err) } versionStr := m[3] version, err := strconv.Atoi(versionStr) if err != nil { return fileInfo{}, fmt.Errorf("couldn't parse blob filename %q: version part %q unparsable: %v", fn, versionStr, err) } fileType := fileTypeTimetable if m[2] != "" { fileType = fileTypeReference } return fileInfo{ filename: fn, fileType: fileType, version: version, filenameDate: filenameDate, }, nil } func filesInBucket(ctx context.Context, bucket *blob.Bucket, wantFileType fileType) ([]fileInfo, error) { var fis []fileInfo iter := bucket.List(nil) for { obj, err := iter.Next(ctx) if err == io.EOF { break } if err != nil { return nil, fmt.Errorf("listing objects in bucket: %w", err) } fi, err := parseFilename(obj.Key) if err != nil { log.Print(err) continue } if !fi.fileType.IsA(wantFileType) { continue } fis = append(fis, fi) } return fis, nil } func LoadTimetableForDate(ctx context.Context, bucket *blob.Bucket, ts time.Time) (*darwin.PushPortTimetable, error) { // Find the latest filename that covers "today". // This is a little tricky, and probably wrong, but we'll try and be good enough. fis, err := filesInBucket(ctx, bucket, fileTypeTimetable) if err != nil { return nil, fmt.Errorf("filesInBucket: %w", err) } // OK, find one where the date matches... wanty, wantm, wantd := ts.Date() var got *fileInfo for _, fi := range fis { if fi.version != understoodVersionTimetable { continue } fiy, fim, fid := fi.filenameDate.Date() if wanty == fiy && wantm == fim && wantd == fid { fi := fi got = &fi break } } if got == nil { return nil, fmt.Errorf("unable to find matching timetable for %v") } return LoadTimetable(ctx, bucket, got.filename) } func decodeFromBucket(ctx context.Context, bucket *blob.Bucket, filename string, out interface{}) error { r, err := bucket.NewReader(ctx, filename, nil) if err != nil { return fmt.Errorf("NewReader(%q): %w", filename, err) } defer r.Close() gzr, err := gzip.NewReader(r) if err != nil { return fmt.Errorf("gzip.NewReader(fn=%q): %w", filename, err) } defer gzr.Close() if err := xml.NewDecoder(gzr).Decode(out); err != nil { return fmt.Errorf("decoding XML from %q: %w", filename, err) } return nil } var ErrBadVersion = errors.New("darwingests3: bad version, ignoring request") func LoadTimetable(ctx context.Context, bucket *blob.Bucket, filename string) (*darwin.PushPortTimetable, error) { log.Printf("loading timetable from S3 file %q", filename) fi, err := parseFilename(filename) if err != nil { return nil, err } if fi.version != understoodVersionTimetable { return nil, ErrBadVersion } var ppt darwin.PushPortTimetable if err := decodeFromBucket(ctx, bucket, filename, &ppt); err != nil { return nil, err } return &ppt, nil } func LoadLatestReferenceData(ctx context.Context, bucket *blob.Bucket) (*darwin.PushPortReferenceData, error) { fis, err := filesInBucket(ctx, bucket, fileTypeReference) if err != nil { return nil, fmt.Errorf("filesInBucket: %w", err) } sort.Slice(fis, func(i, j int) bool { if fis[i].version == understoodVersionReference && fis[j].version != understoodVersionReference { return true } else if fis[i].version != understoodVersionReference && fis[j].version == understoodVersionReference { return false } return fis[i].filenameDate.Before(fis[j].filenameDate) }) return LoadReferenceData(ctx, bucket, fis[0].filename) } func LoadReferenceData(ctx context.Context, bucket *blob.Bucket, filename string) (*darwin.PushPortReferenceData, error) { log.Printf("loading reference data from S3 file %q", filename) fi, err := parseFilename(filename) if err != nil { return nil, err } if fi.version != understoodVersionReference { return nil, ErrBadVersion } var pprd darwin.PushPortReferenceData if err := decodeFromBucket(ctx, bucket, filename, &pprd); err != nil { return nil, err } return &pprd, nil }