@@ -45,6 +45,8 @@ import (
4545 database "cloud.google.com/go/spanner/admin/database/apiv1"
4646 adminpb "cloud.google.com/go/spanner/admin/database/apiv1/databasepb"
4747 "cloud.google.com/go/spanner/apiv1/spannerpb"
48+ "go.opentelemetry.io/otel/attribute"
49+ "go.opentelemetry.io/otel/metric"
4850
4951 gcs "cloud.google.com/go/storage"
5052 "github.com/google/go-cmp/cmp"
@@ -349,14 +351,9 @@ func (a *Appender) publishCheckpointJob(ctx context.Context, pubInterval, republ
349351 case <- a .cpUpdated :
350352 case <- t .C :
351353 }
352- func () {
353- ctx , span := tracer .Start (ctx , "tessera.storage.gcp.publishCheckpointJob" )
354- defer span .End ()
355-
356- if err := a .sequencer .publishCheckpoint (ctx , pubInterval , republishInterval , a .publishCheckpoint ); err != nil {
357- klog .Warningf ("publishCheckpoint failed: %v" , err )
358- }
359- }()
354+ if err := a .sequencer .publishCheckpoint (ctx , pubInterval , republishInterval , a .updateCheckpoint ); err != nil {
355+ klog .Warningf ("publishCheckpoint failed: %v" , err )
356+ }
360357 }
361358}
362359
@@ -427,8 +424,8 @@ func (a *Appender) init(ctx context.Context) error {
427424 return nil
428425}
429426
430- func (a * Appender ) publishCheckpoint (ctx context.Context , size uint64 , root []byte ) error {
431- ctx , span := tracer .Start (ctx , "tessera.storage.gcp.publishCheckpoint " )
427+ func (a * Appender ) updateCheckpoint (ctx context.Context , size uint64 , root []byte ) error {
428+ ctx , span := tracer .Start (ctx , "tessera.storage.gcp.updateCheckpoint " )
432429 defer span .End ()
433430 span .SetAttributes (treeSizeKey .Int64 (otel .Clamp64 (size )))
434431
@@ -441,7 +438,7 @@ func (a *Appender) publishCheckpoint(ctx context.Context, size uint64, root []by
441438 return fmt .Errorf ("writeCheckpoint: %v" , err )
442439 }
443440
444- klog .V (2 ).Infof ("Published latest checkpoint: %d, %x" , size , root )
441+ klog .V (2 ).Infof ("Created and stored latest checkpoint: %d, %x" , size , root )
445442
446443 return nil
447444
@@ -473,16 +470,23 @@ func (lrs *logResourceStore) getCheckpoint(ctx context.Context) ([]byte, error)
473470//
474471// The location to which the tile is written is defined by the tile layout spec.
475472func (s * logResourceStore ) setTile (ctx context.Context , level , index uint64 , partial uint8 , data []byte ) error {
473+ start := time .Now ()
474+
476475 tPath := layout .TilePath (level , index , partial )
477- return s .objStore .setObject (ctx , tPath , data , & gcs.Conditions {DoesNotExist : true }, logContType , logCacheControl )
476+ err := s .objStore .setObject (ctx , tPath , data , & gcs.Conditions {DoesNotExist : true }, logContType , logCacheControl )
477+ opsHistogram .Record (ctx , time .Since (start ).Milliseconds (), metric .WithAttributes (opNameKey .String ("writeTile" )))
478+ return err
478479}
479480
480481// getTile retrieves the raw tile from the provided location.
481482//
482483// The location to which the tile is written is defined by the tile layout spec.
483484func (s * logResourceStore ) getTile (ctx context.Context , level , index uint64 , partial uint8 ) ([]byte , error ) {
485+ start := time .Now ()
486+
484487 tPath := layout .TilePath (level , index , partial )
485488 d , _ , err := s .objStore .getObject (ctx , tPath )
489+ opsHistogram .Record (ctx , time .Since (start ).Milliseconds (), metric .WithAttributes (opNameKey .String ("readTile" )))
486490 return d , err
487491}
488492
@@ -641,6 +645,7 @@ func (a *Appender) updateEntryBundles(ctx context.Context, fromSeq uint64, entri
641645 return nil
642646 }
643647
648+ numAdded := uint64 (0 )
644649 bundleIndex , entriesInBundle := fromSeq / layout .EntryBundleWidth , fromSeq % layout .EntryBundleWidth
645650 bundleWriter := & bytes.Buffer {}
646651 if entriesInBundle > 0 {
@@ -674,6 +679,8 @@ func (a *Appender) updateEntryBundles(ctx context.Context, fromSeq uint64, entri
674679 return fmt .Errorf ("bundleWriter.Write: %v" , err )
675680 }
676681 entriesInBundle ++
682+ fromSeq ++
683+ numAdded ++
677684 if entriesInBundle == layout .EntryBundleWidth {
678685 // This bundle is full, so we need to write it out...
679686 klog .V (1 ).Infof ("In-memory bundle idx %d is full, attempting write to GCS" , bundleIndex )
@@ -1010,7 +1017,17 @@ func (s *spannerCoordinator) nextIndex(ctx context.Context) (uint64, error) {
10101017// This function uses PubCoord with an exclusive lock to guarantee that only one tessera instance can attempt to publish
10111018// a checkpoint at any given time.
10121019func (s * spannerCoordinator ) publishCheckpoint (ctx context.Context , minStaleActive , minStaleRepub time.Duration , f func (context.Context , uint64 , []byte ) error ) error {
1020+ ctx , span := tracer .Start (ctx , "tessera.storage.gcp.publishCheckpoint" )
1021+ defer span .End ()
1022+
1023+ // outcomeAttrs is used to track any attributes which need to be attached to metrics based on the outcome of the attempt to publish.
1024+ var outcomeAttrs []attribute.KeyValue
1025+ start := time .Now ()
1026+
10131027 if _ , err := s .dbPool .ReadWriteTransaction (ctx , func (ctx context.Context , txn * spanner.ReadWriteTransaction ) error {
1028+ // Reset outcome attributes from any prior transaction attempts.
1029+ outcomeAttrs = []attribute.KeyValue {}
1030+
10141031 pRow , err := txn .ReadRowWithOptions (ctx , "PubCoord" , spanner.Key {0 }, []string {"publishedAt" , "size" }, & spanner.ReadOptions {LockHint : spannerpb .ReadRequest_LOCK_HINT_EXCLUSIVE })
10151032 if err != nil {
10161033 return fmt .Errorf ("failed to read PubCoord: %w" , err )
@@ -1024,6 +1041,7 @@ func (s *spannerCoordinator) publishCheckpoint(ctx context.Context, minStaleActi
10241041 cpAge := time .Since (pubAt )
10251042 if cpAge < minStaleActive {
10261043 klog .V (1 ).Infof ("publishCheckpoint: last checkpoint published %s ago (< required %s), not publishing new checkpoint" , cpAge , minStaleActive )
1044+ outcomeAttrs = append (outcomeAttrs , errorTypeKey .String ("skipped" ))
10271045 return nil
10281046 }
10291047
@@ -1051,6 +1069,7 @@ func (s *spannerCoordinator) publishCheckpoint(ctx context.Context, minStaleActi
10511069
10521070 if ! shouldPublish {
10531071 klog .V (1 ).Infof ("publishCheckpoint: skipping publish because tree hasn't grown and previous checkpoint is too recent" )
1072+ outcomeAttrs = append (outcomeAttrs , errorTypeKey .String ("skipped_no_growth" ))
10541073 return nil
10551074 }
10561075
@@ -1065,8 +1084,11 @@ func (s *spannerCoordinator) publishCheckpoint(ctx context.Context, minStaleActi
10651084
10661085 return nil
10671086 }); err != nil {
1087+ publishCount .Add (ctx , 1 , metric .WithAttributes (errorTypeKey .String ("error" )))
10681088 return err
10691089 }
1090+ opsHistogram .Record (ctx , time .Since (start ).Milliseconds (), metric .WithAttributes (opNameKey .String ("publishCheckpoint" )))
1091+ publishCount .Add (ctx , 1 , metric .WithAttributes (outcomeAttrs ... ))
10701092 return nil
10711093}
10721094
0 commit comments