Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ class BigtableServiceFactory implements Serializable {
private static final String BIGTABLE_ENABLE_CLIENT_SIDE_METRICS =
"bigtable_enable_client_side_metrics";

private static final String BIGTABLE_ENABLE_SKIP_LARGE_ROWS = "bigtable_enable_skip_large_rows";

@AutoValue
abstract static class ConfigId implements Serializable {

Expand Down Expand Up @@ -133,7 +135,10 @@ BigtableServiceEntry getServiceForReading(
BigtableDataSettings.enableBuiltinMetrics();
}

BigtableService service = new BigtableServiceImpl(settings);
boolean skipLargeRows =
ExperimentalOptions.hasExperiment(pipelineOptions, BIGTABLE_ENABLE_SKIP_LARGE_ROWS);

BigtableService service = new BigtableServiceImpl(settings, skipLargeRows);
entry = BigtableServiceEntry.create(configId, service);
entries.put(configId.id(), entry);
refCounts.put(configId.id(), new AtomicInteger(1));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,17 @@ class BigtableServiceImpl implements BigtableService {
private static final double WATERMARK_PERCENTAGE = .1;
private static final long MIN_BYTE_BUFFER_SIZE = 100 * 1024 * 1024; // 100MB

private final boolean skipLargeRows;

BigtableServiceImpl(BigtableDataSettings settings) throws IOException {
this(settings, false);
}

BigtableServiceImpl(BigtableDataSettings settings, boolean skipLargeRows) throws IOException {
this.projectId = settings.getProjectId();
this.instanceId = settings.getInstanceId();
this.client = BigtableDataClient.create(settings);
this.skipLargeRows = skipLargeRows;
LOG.info("Started Bigtable service with settings {}", settings);
}

Expand Down Expand Up @@ -142,6 +149,7 @@ static class BigtableReaderImpl implements Reader {
private ServerStream<Row> stream;

private boolean exhausted;
private final boolean skipLargeRows;

@VisibleForTesting
BigtableReaderImpl(
Expand All @@ -150,13 +158,15 @@ static class BigtableReaderImpl implements Reader {
String instanceId,
String tableId,
List<ByteKeyRange> ranges,
@Nullable RowFilter rowFilter) {
@Nullable RowFilter rowFilter,
boolean skipLargeRows) {
this.client = client;
this.projectId = projectId;
this.instanceId = instanceId;
this.tableId = tableId;
this.ranges = ranges;
this.rowFilter = rowFilter;
this.skipLargeRows = skipLargeRows;
}

@Override
Expand All @@ -173,11 +183,19 @@ public boolean start() throws IOException {
if (rowFilter != null) {
query.filter(Filters.FILTERS.fromProto(rowFilter));
}

try {
stream =
client
.readRowsCallable(new BigtableRowProtoAdapter())
.call(query, GrpcCallContext.createDefault());
if (skipLargeRows) {
stream =
client
.skipLargeRowsCallable(new BigtableRowProtoAdapter())
.call(query, GrpcCallContext.createDefault());
} else {
stream =
client
.readRowsCallable(new BigtableRowProtoAdapter())
.call(query, GrpcCallContext.createDefault());
}
results = stream.iterator();
serviceCallMetric.call("ok");
} catch (StatusRuntimeException e) {
Expand Down Expand Up @@ -667,7 +685,8 @@ public Reader createReader(BigtableSource source) throws IOException {
instanceId,
source.getTableId().get(),
source.getRanges(),
source.getRowFilter());
source.getRowFilter(),
skipLargeRows);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
import com.google.cloud.bigtable.data.v2.BigtableDataClient;
import com.google.cloud.bigtable.data.v2.BigtableDataSettings;
import com.google.cloud.bigtable.data.v2.models.RowMutation;
import com.google.cloud.bigtable.data.v2.models.TableId;
import java.io.IOException;
import java.util.Date;
import org.apache.beam.repackaged.core.org.apache.commons.lang3.StringUtils;
import org.apache.beam.sdk.Pipeline;
import org.apache.beam.sdk.PipelineResult;
import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
import org.apache.beam.sdk.metrics.Lineage;
import org.apache.beam.sdk.options.ExperimentalOptions;
import org.apache.beam.sdk.options.PipelineOptionsFactory;
import org.apache.beam.sdk.testing.PAssert;
import org.apache.beam.sdk.testing.TestPipeline;
Expand Down Expand Up @@ -148,6 +150,43 @@ public void testE2EBigtableSegmentRead() {
checkLineageSourceMetric(r, tableId);
}

@Test
public void testE2EBigtableReadWithSkippingLargeRows() {
// Write a few rows first
int numRows = 20;
int numLargeRows = 3;
// Each mutation can't exceed 100 MB. Break it down to 3 columns
String value = StringUtils.repeat("v", 90 * 1000 * 1000);
for (int i = 0; i < numLargeRows; i++) {
for (int j = 0; j < 3; j++) {
client.mutateRow(
RowMutation.create(TableId.of(tableId), "large_row-" + i)
.setCell(COLUMN_FAMILY_NAME, "q" + i, value));
}
}

for (int i = 0; i < numRows - numLargeRows; i++) {
client.mutateRow(
RowMutation.create(TableId.of(tableId), "row-" + i)
.setCell(COLUMN_FAMILY_NAME, "q", "value"));
}

ExperimentalOptions.addExperiment(
options.as(ExperimentalOptions.class), "bigtable_enable_skip_large_rows");

Pipeline p = Pipeline.create(options);
PCollection<Long> count =
p.apply(
BigtableIO.read()
.withProjectId(project)
.withInstanceId(options.getInstanceId())
.withTableId(tableId))
.apply(Count.globally());
PAssert.thatSingleton(count).isEqualTo((long) numRows - numLargeRows);
PipelineResult r = p.run();
checkLineageSourceMetric(r, tableId);
}

private void checkLineageSourceMetric(PipelineResult r, String tableId) {
// TODO(https://github.com/apache/beam/issues/32071) test malformed,
// when pipeline.run() is non-blocking, the metrics are not available by the time of query
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ public void testRead() throws IOException {
bigtableDataSettings.getInstanceId(),
mockBigtableSource.getTableId().get(),
mockBigtableSource.getRanges(),
null);
null,
false);

underTest.start();
Assert.assertEquals(expectedRow, underTest.getCurrentRow());
Expand Down
Loading