Skip to content

Commit 73c8da8

Browse files
fmeumcopybara-github
authored andcommitted
Fix disk cache failures on concurrent read-write access on Windows (bazelbuild#28417)
This applies the fix made to the download cache in 753dc97 to the disk cache. Work towards bazelbuild#28408 Closes bazelbuild#28417. PiperOrigin-RevId: 864861790 Change-Id: I6850fc27f3336f44f8d366daac63e4822cb94f73
1 parent a147aac commit 73c8da8

File tree

6 files changed

+220
-16
lines changed

6 files changed

+220
-16
lines changed

src/main/java/com/google/devtools/build/lib/bazel/repository/cache/DownloadCache.java

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import com.google.common.hash.Hasher;
2323
import com.google.common.io.BaseEncoding;
2424
import com.google.devtools.build.lib.vfs.DigestHashFunction;
25-
import com.google.devtools.build.lib.vfs.FileAccessException;
2625
import com.google.devtools.build.lib.vfs.FileSystemUtils;
2726
import com.google.devtools.build.lib.vfs.Path;
2827
import java.io.IOException;
@@ -242,19 +241,7 @@ private void storeCacheValue(
242241
Path tmpName = cacheEntry.getRelative(TMP_PREFIX + UUID.randomUUID());
243242
cacheEntry.createDirectoryAndParents();
244243
fileWriter.writeTo(tmpName);
245-
try {
246-
tmpName.renameTo(cacheValue);
247-
} catch (FileAccessException e) {
248-
// On Windows, atomically replacing a file that is currently opened (e.g. due to a concurrent
249-
// get on the cache) results in renameTo throwing this exception, which wraps an
250-
// AccessDeniedException. This case is benign since if the target path already exists, we know
251-
// that another thread won the race to place the file in the cache. As the exception is rather
252-
// generic and could result from other failure types, we rethrow the exception if the cache
253-
// entry hasn't been created.
254-
if (!cacheValue.exists()) {
255-
throw e;
256-
}
257-
}
244+
FileSystemUtils.renameToleratingConcurrentCreation(tmpName, cacheValue);
258245

259246
if (!Strings.isNullOrEmpty(canonicalId)) {
260247
String idHash = keyType.newHasher().putBytes(canonicalId.getBytes(UTF_8)).hash().toString();

src/main/java/com/google/devtools/build/lib/remote/disk/DiskCacheClient.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
import com.google.devtools.build.lib.remote.util.DigestOutputStream;
3636
import com.google.devtools.build.lib.remote.util.DigestUtil;
3737
import com.google.devtools.build.lib.remote.util.Utils;
38+
import com.google.devtools.build.lib.vfs.FileSystemUtils;
3839
import com.google.devtools.build.lib.vfs.Path;
3940
import com.google.protobuf.ByteString;
4041
import com.google.protobuf.ExtensionRegistryLite;
@@ -142,7 +143,7 @@ public void captureFile(Path src, Digest digest, Store store) throws IOException
142143
}
143144

144145
target.getParentDirectory().createDirectoryAndParents();
145-
src.renameTo(target);
146+
FileSystemUtils.renameToleratingConcurrentCreation(src, target);
146147
}
147148

148149
private ListenableFuture<Void> download(Digest digest, OutputStream out, Store store) {
@@ -333,7 +334,7 @@ public void saveFile(Digest digest, Store store, InputStream in) throws IOExcept
333334
}
334335
}
335336
path.getParentDirectory().createDirectoryAndParents();
336-
temp.renameTo(path);
337+
FileSystemUtils.renameToleratingConcurrentCreation(temp, path);
337338
} catch (IOException e) {
338339
try {
339340
temp.delete();

src/main/java/com/google/devtools/build/lib/vfs/FileSystemUtils.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import com.google.common.io.ByteStreams;
2626
import com.google.devtools.build.lib.concurrent.ThreadSafety.ConditionallyThreadSafe;
2727
import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe;
28+
import com.google.devtools.build.lib.util.OS;
2829
import com.google.devtools.build.lib.util.StringEncoding;
2930
import java.io.FileNotFoundException;
3031
import java.io.IOException;
@@ -489,6 +490,40 @@ public static MoveResult moveFile(Path from, Path to) throws IOException {
489490
}
490491
}
491492

493+
/**
494+
* Atomically renames a source file to a target file, tolerating the case where another thread has
495+
* concurrently created the target file (e.g. because it is known to have the same content in a
496+
* CAS-like structure).
497+
*
498+
* <p>This handles a Windows-specific edge case: when the target file is being read by another
499+
* process (e.g., during a concurrent cache lookup), the rename operation fails with a {@link
500+
* FileAccessException}. If the target file already exists when this happens, it means another
501+
* thread won the race to create it, so we can safely delete the source file.
502+
*
503+
* <p>The parent directories of the target file must already exist.
504+
*
505+
* @param source the file to rename
506+
* @param target the destination path
507+
*/
508+
@ThreadSafe
509+
public static void renameToleratingConcurrentCreation(Path source, Path target)
510+
throws IOException {
511+
try {
512+
source.renameTo(target);
513+
} catch (FileAccessException e) {
514+
// On Windows, atomically replacing a file that is currently opened (e.g. due to a concurrent
515+
// get on the cache) results in renameTo throwing this exception, which wraps an
516+
// AccessDeniedException. This case is benign since if the target path already exists, we know
517+
// that another thread won the race to place the file in the cache. As the exception is rather
518+
// generic and could result from other failure types, we rethrow the exception if the cache
519+
// entry hasn't been created.
520+
if (OS.getCurrent() != OS.WINDOWS || !target.exists()) {
521+
throw e;
522+
}
523+
source.delete();
524+
}
525+
}
526+
492527
/* Directory tree operations. */
493528

494529
/**

src/test/java/com/google/devtools/build/lib/remote/disk/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ java_test(
3333
"//src/test/java/com/google/devtools/build/lib:test_runner",
3434
"//src/test/java/com/google/devtools/build/lib/testutil:TestUtils",
3535
"//src/test/java/com/google/devtools/build/lib/testutil:external_file_system_lock",
36+
"//src/test/java/com/google/devtools/build/lib/vfs/util",
3637
"//third_party:error_prone_annotations",
3738
"//third_party:guava",
3839
"//third_party:junit4",

src/test/java/com/google/devtools/build/lib/remote/disk/DiskCacheClientTest.java

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,25 @@
3232
import com.google.devtools.build.lib.remote.common.OutputDigestMismatchException;
3333
import com.google.devtools.build.lib.remote.common.RemoteCacheClient.ActionKey;
3434
import com.google.devtools.build.lib.remote.util.DigestUtil;
35+
import com.google.devtools.build.lib.testutil.TestUtils;
3536
import com.google.devtools.build.lib.vfs.DigestHashFunction;
3637
import com.google.devtools.build.lib.vfs.FileSystem;
3738
import com.google.devtools.build.lib.vfs.FileSystemUtils;
3839
import com.google.devtools.build.lib.vfs.Path;
3940
import com.google.devtools.build.lib.vfs.SyscallCache;
4041
import com.google.devtools.build.lib.vfs.bazel.BazelHashFunctions;
4142
import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem;
43+
import com.google.devtools.build.lib.vfs.util.FileSystems;
4244
import com.google.errorprone.annotations.CanIgnoreReturnValue;
4345
import com.google.protobuf.ByteString;
4446
import com.google.protobuf.Message;
4547
import java.io.IOException;
48+
import java.io.OutputStream;
49+
import java.util.ArrayList;
50+
import java.util.concurrent.CountDownLatch;
51+
import java.util.concurrent.ExecutionException;
52+
import java.util.concurrent.Executors;
53+
import java.util.concurrent.Future;
4654
import org.junit.After;
4755
import org.junit.Before;
4856
import org.junit.Test;
@@ -346,6 +354,86 @@ public void downloadActionResult_withReferencedTreeFileMissing_returnsNull() thr
346354
assertThat(result).isNull();
347355
}
348356

357+
@Test
358+
public void concurrentUploadDownload()
359+
throws IOException, ExecutionException, InterruptedException {
360+
var nativeDiskCacheDir = TestUtils.createUniqueTmpDir(FileSystems.getNativeFileSystem());
361+
var nativeClient =
362+
new DiskCacheClient(nativeDiskCacheDir, DIGEST_UTIL, /* verifyDownloads= */ false);
363+
var tasks = new ArrayList<Future<?>>();
364+
// Use 1 MB blobs to increase the window for concurrent access during write/rename.
365+
var contentSize = 1024 * 1024;
366+
var numConcurrentOps = 10;
367+
try (var executor = Executors.newFixedThreadPool(numConcurrentOps)) {
368+
for (int attempt = 0; attempt < 100; attempt++) {
369+
var contentArray = new byte[contentSize];
370+
// Fill with a pattern based on the attempt number.
371+
for (int i = 0; i < contentSize; i++) {
372+
contentArray[i] = (byte) (attempt + i);
373+
}
374+
var contentBytes = ByteString.copyFrom(contentArray);
375+
var contentDigest = DIGEST_UTIL.compute(contentArray);
376+
// Use a latch to ensure all concurrent tasks start at roughly the same time.
377+
var startLatch = new CountDownLatch(numConcurrentOps);
378+
// Half the tasks do uploads, half do downloads with a slow OutputStream to keep the file
379+
// open longer. This maximizes the chance of a rename failing because a download has the
380+
// file open.
381+
for (int concurrentOp = 0; concurrentOp < numConcurrentOps; concurrentOp++) {
382+
boolean isUploader = concurrentOp % 2 == 0;
383+
tasks.add(
384+
executor.submit(
385+
() -> {
386+
// Signal ready and wait for all tasks to be ready.
387+
startLatch.countDown();
388+
startLatch.await();
389+
if (isUploader) {
390+
getFromFuture(nativeClient.uploadBlob(contentDigest, contentBytes));
391+
} else {
392+
// Use a slow OutputStream that pauses periodically to keep the file open
393+
// longer during download.
394+
var out =
395+
new OutputStream() {
396+
private int bytesWritten = 0;
397+
398+
@Override
399+
public void write(int b) throws IOException {
400+
bytesWritten++;
401+
maybeSleep();
402+
}
403+
404+
@Override
405+
public void write(byte[] b, int off, int len) throws IOException {
406+
bytesWritten += len;
407+
maybeSleep();
408+
}
409+
410+
private void maybeSleep() {
411+
// Sleep every 64KB to slow down the download.
412+
if (bytesWritten % (64 * 1024) < 100) {
413+
try {
414+
Thread.sleep(1);
415+
} catch (InterruptedException e) {
416+
Thread.currentThread().interrupt();
417+
}
418+
}
419+
}
420+
};
421+
try {
422+
getFromFuture(nativeClient.downloadBlob(contentDigest, out));
423+
} catch (CacheNotFoundException ignored) {
424+
// File not yet uploaded by another task.
425+
}
426+
}
427+
return null;
428+
}));
429+
}
430+
}
431+
for (var task : tasks) {
432+
task.get();
433+
}
434+
}
435+
}
436+
349437
private Tree getTreeWithFile(Digest fileDigest) {
350438
return Tree.newBuilder()
351439
.addChildren(Directory.newBuilder().addFiles(FileNode.newBuilder().setDigest(fileDigest)))

src/test/java/com/google/devtools/build/lib/vfs/FileSystemUtilsTest.java

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import static com.google.devtools.build.lib.vfs.FileSystemUtils.moveFile;
2121
import static com.google.devtools.build.lib.vfs.FileSystemUtils.relativePath;
2222
import static com.google.devtools.build.lib.vfs.FileSystemUtils.removeExtension;
23+
import static com.google.devtools.build.lib.vfs.FileSystemUtils.renameToleratingConcurrentCreation;
2324
import static com.google.devtools.build.lib.vfs.FileSystemUtils.touchFile;
2425
import static com.google.devtools.build.lib.vfs.FileSystemUtils.traverseTree;
2526
import static java.nio.charset.StandardCharsets.ISO_8859_1;
@@ -1016,6 +1017,97 @@ public void testCreateHardLinkForNonEmptyDirectory_success() throws Exception {
10161017
.isEqualTo(fileSystem.stat(originalPath3.asFragment(), false).getNodeId());
10171018
}
10181019

1020+
@Test
1021+
public void testRenameToleratingConcurrentCreation_success() throws Exception {
1022+
Path source = fileSystem.getPath("/source");
1023+
Path target = fileSystem.getPath("/target");
1024+
target.getParentDirectory().createDirectoryAndParents();
1025+
1026+
FileSystemUtils.writeContent(source, UTF_8, "hello world");
1027+
1028+
renameToleratingConcurrentCreation(source, target);
1029+
1030+
assertThat(source.exists()).isFalse();
1031+
assertThat(target.exists()).isTrue();
1032+
assertThat(FileSystemUtils.readContent(target, UTF_8)).isEqualTo("hello world");
1033+
}
1034+
1035+
@Test
1036+
public void testRenameToleratingConcurrentCreation_sourceDoesNotExist() throws Exception {
1037+
Path source = fileSystem.getPath("/source");
1038+
Path target = fileSystem.getPath("/target");
1039+
target.getParentDirectory().createDirectoryAndParents();
1040+
1041+
assertThrows(
1042+
FileNotFoundException.class, () -> renameToleratingConcurrentCreation(source, target));
1043+
}
1044+
1045+
@Test
1046+
public void testRenameToleratingConcurrentCreation_targetParentDoesNotExist() throws Exception {
1047+
Path source = fileSystem.getPath("/source");
1048+
Path target = fileSystem.getPath("/nonexistent/target");
1049+
1050+
FileSystemUtils.writeContent(source, UTF_8, "hello world");
1051+
1052+
assertThrows(
1053+
FileNotFoundException.class, () -> renameToleratingConcurrentCreation(source, target));
1054+
}
1055+
1056+
@Test
1057+
public void testRenameToleratingConcurrentCreation_toleratesFileAccessExceptionOnWindows()
1058+
throws Exception {
1059+
FileSystem fs = new FileAccessExceptionOnRenameFs();
1060+
Path source = fs.getPath("/source");
1061+
Path target = fs.getPath("/target");
1062+
source.getParentDirectory().createDirectoryAndParents();
1063+
target.getParentDirectory().createDirectoryAndParents();
1064+
1065+
FileSystemUtils.writeContent(source, UTF_8, "hello world");
1066+
FileSystemUtils.writeContent(target, UTF_8, "existing content");
1067+
1068+
if (OS.getCurrent() == OS.WINDOWS) {
1069+
renameToleratingConcurrentCreation(source, target);
1070+
1071+
assertThat(source.exists()).isFalse();
1072+
assertThat(target.exists()).isTrue();
1073+
assertThat(FileSystemUtils.readContent(target, UTF_8)).isEqualTo("existing content");
1074+
} else {
1075+
assertThrows(
1076+
FileAccessException.class, () -> renameToleratingConcurrentCreation(source, target));
1077+
1078+
assertThat(source.exists()).isTrue();
1079+
}
1080+
}
1081+
1082+
@Test
1083+
public void testRenameToleratingConcurrentCreation_throwsIfTargetDoesNotExistAfterException()
1084+
throws Exception {
1085+
FileSystem fs = new FileAccessExceptionOnRenameFs();
1086+
Path source = fs.getPath("/source");
1087+
Path target = fs.getPath("/target");
1088+
source.getParentDirectory().createDirectoryAndParents();
1089+
target.getParentDirectory().createDirectoryAndParents();
1090+
1091+
FileSystemUtils.writeContent(source, UTF_8, "hello world");
1092+
1093+
assertThrows(
1094+
FileAccessException.class, () -> renameToleratingConcurrentCreation(source, target));
1095+
1096+
assertThat(source.exists()).isTrue();
1097+
}
1098+
1099+
/** A file system that throws FileAccessException on rename, simulating Windows behavior. */
1100+
static class FileAccessExceptionOnRenameFs extends InMemoryFileSystem {
1101+
FileAccessExceptionOnRenameFs() {
1102+
super(DigestHashFunction.SHA256);
1103+
}
1104+
1105+
@Override
1106+
public void renameTo(PathFragment source, PathFragment target) throws IOException {
1107+
throw new FileAccessException("Access denied (simulated Windows behavior)");
1108+
}
1109+
}
1110+
10191111
static class MultipleDeviceFS extends InMemoryFileSystem {
10201112
MultipleDeviceFS() {
10211113
super(DigestHashFunction.SHA256);

0 commit comments

Comments
 (0)