diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index d0bfc6852befe..4262ebd196fec 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -26,7 +26,7 @@ Apache Hadoop Azure support This module contains code to support integration with Azure. - Currently this consists of a filesystem client to read data from + Currently, this consists of Azure Blob File System client to read data from and write data to Azure Storage. jar @@ -65,6 +65,7 @@ Max + org.apache.maven.plugins maven-checkstyle-plugin @@ -89,7 +90,7 @@ - + org.apache.maven.plugins maven-dependency-plugin @@ -109,7 +110,6 @@ - @@ -253,6 +252,7 @@ hadoop-mapreduce-client-core provided + org.apache.hadoop hadoop-mapreduce-client-app test + org.apache.hadoop hadoop-mapreduce-client-app test-jar test + org.apache.hadoop hadoop-mapreduce-client-jobclient test test-jar + org.apache.hadoop hadoop-distcp @@ -329,37 +336,44 @@ mockito-core test + org.apache.hadoop hadoop-minikdc test + org.bouncycastle bcprov-jdk18on test + org.bouncycastle bcpkix-jdk18on test + org.assertj assertj-core test + org.junit.jupiter junit-jupiter-api test + org.junit.jupiter junit-jupiter-engine test + org.junit.jupiter junit-jupiter-params @@ -368,169 +382,6 @@ - - parallel-tests-wasb - - - parallel-tests - wasb - - - - - - org.apache.hadoop - hadoop-maven-plugins - - - parallel-tests-createdir - - parallel-tests-createdir - - - - - - org.apache.maven.plugins - maven-surefire-plugin - - - default-test - - test - - - ${testsThreadCount} - false - ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true - ${fs.azure.scale.test.timeout} - - ${test.build.data}/${surefire.forkNumber} - ${test.build.dir}/${surefire.forkNumber} - ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-000${surefire.forkNumber} - ${fs.azure.scale.test.enabled} - ${fs.azure.scale.test.huge.filesize} - ${fs.azure.scale.test.huge.partitionsize} - ${fs.azure.scale.test.timeout} - ${fs.azure.scale.test.list.performance.threads} - ${fs.azure.scale.test.list.performance.files} - - ${http.maxConnections} - - **/azure/Test*.java - **/azure/**/Test*.java - - - **/azure/**/TestRollingWindowAverage*.java - - - - - serialized-test-wasb - - test - - - 1 - false - ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true - ${fs.azure.scale.test.timeout} - - ${test.build.data}/${surefire.forkNumber} - ${test.build.dir}/${surefire.forkNumber} - ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-000${surefire.forkNumber} - ${fs.azure.scale.test.enabled} - ${fs.azure.scale.test.huge.filesize} - ${fs.azure.scale.test.huge.partitionsize} - ${fs.azure.scale.test.timeout} - ${fs.azure.scale.test.list.performance.threads} - ${fs.azure.scale.test.list.performance.files} - - ${http.maxConnections} - - - **/azure/**/TestRollingWindowAverage*.java - - - - - - - org.apache.maven.plugins - maven-failsafe-plugin - - - default-integration-test-wasb - - integration-test - verify - - - ${testsThreadCount} - false - ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true - ${fs.azure.scale.test.timeout} - - - true - ${test.build.data}/${surefire.forkNumber} - ${test.build.dir}/${surefire.forkNumber} - ${hadoop.tmp.dir}/${surefire.forkNumber} - - - - - - fork-000${surefire.forkNumber} - - ${fs.azure.scale.test.enabled} - ${fs.azure.scale.test.huge.filesize} - ${fs.azure.scale.test.huge.partitionsize} - ${fs.azure.scale.test.timeout} - ${fs.azure.scale.test.list.performance.threads} - ${fs.azure.scale.test.list.performance.files} - - - - **/azure/ITest*.java - **/azure/**/ITest*.java - - - **/azure/ITestNativeFileSystemStatistics.java - - - - - - - sequential-integration-tests-wasb - - integration-test - verify - - - ${fs.azure.scale.test.timeout} - - false - ${fs.azure.scale.test.enabled} - ${fs.azure.scale.test.huge.filesize} - ${fs.azure.scale.test.huge.partitionsize} - ${fs.azure.scale.test.timeout} - ${fs.azure.scale.test.list.performance.threads} - ${fs.azure.scale.test.list.performance.files} - - - **/azure/ITestNativeFileSystemStatistics.java - - - - - - - - parallel-tests-abfs @@ -764,36 +615,6 @@ **/Test*.java - - **/TestRollingWindowAverage*.java - - - - - serialized-test - - test - - - 1 - false - ${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true - ${fs.azure.scale.test.timeout} - - ${test.build.data}/${surefire.forkNumber} - ${test.build.dir}/${surefire.forkNumber} - ${hadoop.tmp.dir}/${surefire.forkNumber} - fork-000${surefire.forkNumber} - ${fs.azure.scale.test.enabled} - ${fs.azure.scale.test.huge.filesize} - ${fs.azure.scale.test.huge.partitionsize} - ${fs.azure.scale.test.timeout} - ${fs.azure.scale.test.list.performance.threads} - ${fs.azure.scale.test.list.performance.files} - - - **/TestRollingWindowAverage*.java - @@ -850,18 +671,7 @@ **/ITest*.java - **/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java - **/ITestFileSystemOperationsWithThreads.java - **/ITestOutOfBandAzureBlobOperationsLive.java - **/ITestNativeAzureFileSystemAuthorizationWithOwner.java - **/ITestNativeAzureFileSystemConcurrencyLive.java - **/ITestNativeAzureFileSystemLive.java - **/ITestNativeAzureFSPageBlobLive.java **/ITestAzureBlobFileSystemRandomRead.java - **/ITestWasbRemoteCallHelper.java - **/ITestBlockBlobInputStream.java - **/ITestWasbAbfsCompatibility.java - **/ITestNativeFileSystemStatistics.java @@ -884,20 +694,10 @@ ${fs.azure.scale.test.list.performance.threads} ${fs.azure.scale.test.list.performance.files} - ${http.maxConnections} + ${http.maxConnections} + - **/ITestWasbAbfsCompatibility.java - **/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java - **/ITestFileSystemOperationsWithThreads.java - **/ITestOutOfBandAzureBlobOperationsLive.java - **/ITestNativeAzureFileSystemAuthorizationWithOwner.java - **/ITestNativeAzureFileSystemConcurrencyLive.java - **/ITestNativeAzureFileSystemLive.java - **/ITestNativeAzureFSPageBlobLive.java **/ITestAzureBlobFileSystemRandomRead.java - **/ITestWasbRemoteCallHelper.java - **/ITestBlockBlobInputStream.java - **/ITestNativeFileSystemStatistics.java @@ -957,5 +757,6 @@ true + diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureException.java deleted file mode 100644 index 48ec064d5fa88..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureException.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; - -/** - * Thrown if there is a problem communicating with Azure Storage service. - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public class AzureException extends IOException { - private static final long serialVersionUID = 1L; - - public AzureException(String message) { - super(message); - } - - public AzureException(String message, Throwable cause) { - super(message, cause); - } - - public AzureException(Throwable t) { - super(t); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java deleted file mode 100644 index 98d28d1ea421a..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadPoolExecutor.java +++ /dev/null @@ -1,347 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.hadoop.util.Time; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.VisibleForTesting; - -class AzureFileSystemThreadPoolExecutor { - - public static final Logger LOG = LoggerFactory.getLogger(AzureFileSystemThreadPoolExecutor.class); - - /* - * Number of threads to keep in the pool. - */ - private int threadCount; - - /* - * Prefix to be used for naming threads. - */ - private String threadNamePrefix; - - /* - * File system operation like delete/rename. Used for logging purpose. - */ - private String operation; - - /* - * Source blob key used for file operation. Used for logging purpose. - */ - private String key; - - /* - * Configuration name for recommendations. Used for logging purpose. - */ - private String config; - - /** - * Creates a new AzureFileSystemThreadPoolExecutor object. - * - * @param threadCount - * Number of threads to be used after reading user configuration. - * @param threadNamePrefix - * Prefix to be used to name threads for the file operation. - * @param operation - * File system operation like delete/rename. Used for logging purpose. - * @param key - * Source blob key used for file operation. Used for logging purpose. - * @param config - * Configuration name for recommendations. Used for logging purpose. - */ - public AzureFileSystemThreadPoolExecutor(int threadCount, String threadNamePrefix, - String operation, String key, String config) { - this.threadCount = threadCount; - this.threadNamePrefix = threadNamePrefix; - this.operation = operation; - this.key = key; - this.config = config; - } - - /** - * Gets a new thread pool - * @param threadCount - * Number of threads to keep in the pool. - * @param threadNamePrefix - * Prefix to be used for naming threads. - * - * @return - * Returns a new thread pool. - */ - @VisibleForTesting - ThreadPoolExecutor getThreadPool(int threadCount) throws Exception { - return new ThreadPoolExecutor(threadCount, threadCount, 2, TimeUnit.SECONDS, - new LinkedBlockingQueue(), new AzureFileSystemThreadFactory(this.threadNamePrefix)); - } - - /** - * Execute the file operation parallel using threads. All threads works on a - * single working set of files stored in input 'contents'. The synchronization - * between multiple threads is achieved through retrieving atomic index value - * from the array. Once thread gets the index, it retrieves the file and initiates - * the file operation. The advantage with this method is that file operations - * doesn't get serialized due to any thread. Also, the input copy is not changed - * such that caller can reuse the list for other purposes. - * - * This implementation also considers that failure of operation on single file - * is considered as overall operation failure. All threads bail out their execution - * as soon as they detect any single thread either got exception or operation is failed. - * - * @param contents - * List of blobs on which operation to be done. - * @param threadOperation - * The actual operation to be executed by each thread on a file. - * - * @param operationStatus - * Returns true if the operation is success, false if operation is failed. - * @throws IOException - * - */ - boolean executeParallel(FileMetadata[] contents, AzureFileSystemThreadTask threadOperation) throws IOException { - - boolean operationStatus = false; - boolean threadsEnabled = false; - int threadCount = this.threadCount; - ThreadPoolExecutor ioThreadPool = null; - - // Start time for file operation - long start = Time.monotonicNow(); - - // If number of files are less then reduce threads to file count. - threadCount = Math.min(contents.length, threadCount); - - if (threadCount > 1) { - try { - ioThreadPool = getThreadPool(threadCount); - threadsEnabled = true; - } catch(Exception e) { - // The possibility of this scenario is very remote. Added this code as safety net. - LOG.warn("Failed to create thread pool with threads {} for operation {} on blob {}." - + " Use config {} to set less number of threads. Setting config value to <= 1 will disable threads.", - threadCount, operation, key, config); - } - } else { - LOG.warn("Disabling threads for {} operation as thread count {} is <= 1", operation, threadCount); - } - - if (threadsEnabled) { - LOG.debug("Using thread pool for {} operation with threads {}", operation, threadCount); - boolean started = false; - AzureFileSystemThreadRunnable runnable = new AzureFileSystemThreadRunnable(contents, threadOperation, operation); - - // Don't start any new requests if there is an exception from any one thread. - for (int i = 0; i < threadCount && runnable.lastException == null && runnable.operationStatus; i++) - { - try { - ioThreadPool.execute(runnable); - started = true; - } catch (RejectedExecutionException ex) { - // If threads can't be scheduled then report error and move ahead with next thread. - // Don't fail operation due to this issue. - LOG.error("Rejected execution of thread for {} operation on blob {}." - + " Continuing with existing threads. Use config {} to set less number of threads" - + " to avoid this error", operation, key, config); - } - } - - // Stop accepting any new execute requests. - ioThreadPool.shutdown(); - - try { - // Wait for threads to terminate. Keep time out as large value - ioThreadPool.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS); - } catch(InterruptedException intrEx) { - // If current thread got interrupted then shutdown all threads now. - ioThreadPool.shutdownNow(); - - // Restore the interrupted status - Thread.currentThread().interrupt(); - LOG.error("Threads got interrupted {} blob operation for {} " - , operation, key); - } - - int threadsNotUsed = threadCount - runnable.threadsUsed.get(); - if (threadsNotUsed > 0) { - LOG.warn("{} threads not used for {} operation on blob {}", threadsNotUsed, operation, key); - } - - if (!started) { - // No threads started. Fall back to serial mode. - threadsEnabled = false; - LOG.info("Not able to schedule threads to {} blob {}. Fall back to {} blob serially." - , operation, key, operation); - } else { - IOException lastException = runnable.lastException; - - // There are no exceptions from threads and no operation failures. Consider this scenario - // as failure only if file operations are not done on all files. - if (lastException == null && runnable.operationStatus && runnable.filesProcessed.get() < contents.length) { - LOG.error("{} failed as operation on subfolders and files failed.", operation); - lastException = new IOException(operation + " failed as operation on subfolders and files failed."); - } - - if (lastException != null) { - // Threads started and executed. One or more threads seems to have hit exception. - // Raise the same exception. - throw lastException; - } - - operationStatus = runnable.operationStatus; - } - } - - if (!threadsEnabled) { - // No threads. Serialize the operation. Clear any last exceptions. - LOG.debug("Serializing the {} operation", operation); - for (int i = 0; i < contents.length; i++) { - if (!threadOperation.execute(contents[i])) { - LOG.warn("Failed to {} file {}", operation, contents[i]); - return false; - } - } - - // Operation is success - operationStatus = true; - } - - // Find the duration of time taken for file operation - long end = Time.monotonicNow(); - LOG.info("Time taken for {} operation is: {} ms with threads: {}", operation, (end - start), threadCount); - - return operationStatus; - } - - /** - * A ThreadFactory for Azure File operation threads with meaningful names helpful - * for debugging purposes. - */ - static class AzureFileSystemThreadFactory implements ThreadFactory { - - private String threadIdPrefix = "AzureFileSystemThread"; - - /** - * Atomic integer to provide thread id for thread names. - */ - private AtomicInteger threadSequenceNumber = new AtomicInteger(0); - - public AzureFileSystemThreadFactory(String prefix) { - threadIdPrefix = prefix; - } - - @Override - public Thread newThread(Runnable r) { - Thread t = new SubjectInheritingThread(r); - - // Use current thread name as part in naming thread such that use of - // same file system object will have unique names. - t.setName(String.format("%s-%s-%d", threadIdPrefix, Thread.currentThread().getName(), - threadSequenceNumber.getAndIncrement())); - return t; - } - - } - - static class AzureFileSystemThreadRunnable implements Runnable { - - // Tracks if any thread has raised exception. - private volatile IOException lastException = null; - - // Tracks if any thread has failed execution. - private volatile boolean operationStatus = true; - - // Atomic tracker to retrieve index of next file to be processed - private AtomicInteger fileIndex = new AtomicInteger(0); - - // Atomic tracker to count number of files successfully processed - private AtomicInteger filesProcessed = new AtomicInteger(0); - - // Atomic tracker to retrieve number of threads used to do at least one file operation. - private AtomicInteger threadsUsed = new AtomicInteger(0); - - // Type of file system operation - private String operation = "Unknown"; - - // List of files to be processed. - private final FileMetadata[] files; - - // Thread task which encapsulates the file system operation work on a file. - private AzureFileSystemThreadTask task; - - public AzureFileSystemThreadRunnable(final FileMetadata[] files, - AzureFileSystemThreadTask task, String operation) { - this.operation = operation; - this.files = files; - this.task = task; - } - - @Override - public void run() { - long start = Time.monotonicNow(); - int currentIndex; - int processedFilesCount = 0; - - while ((currentIndex = fileIndex.getAndIncrement()) < files.length) { - processedFilesCount++; - FileMetadata file = files[currentIndex]; - - try { - // Execute the file operation. - if (!task.execute(file)) { - LOG.error("{} operation failed for file {}", - this.operation, file.getKey()); - operationStatus = false; - } else { - filesProcessed.getAndIncrement(); - } - } catch (Exception e) { - LOG.error("Encountered Exception for {} operation for file {}", - this.operation, file.getKey()); - lastException = new IOException("Encountered Exception for " - + this.operation + " operation for file " + file.getKey(), e); - } - - // If any thread has seen exception or operation failed then we - // don't have to process further. - if (lastException != null || !operationStatus) { - LOG.warn("Terminating execution of {} operation now as some other thread" - + " already got exception or operation failed", this.operation, file.getKey()); - break; - } - } - - long end = Time.monotonicNow(); - LOG.debug("Time taken to process {} files count for {} operation: {} ms", - processedFilesCount, this.operation, (end - start)); - if (processedFilesCount > 0) { - threadsUsed.getAndIncrement(); - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadTask.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadTask.java deleted file mode 100644 index f5180900b9d57..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureFileSystemThreadTask.java +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; - -/** - * Interface for executing the file operation by a thread. - */ -public interface AzureFileSystemThreadTask { - // Execute the operation on the file. - boolean execute(FileMetadata file) throws IOException; -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureLinkedStack.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureLinkedStack.java deleted file mode 100644 index 4c52ef0e931d8..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureLinkedStack.java +++ /dev/null @@ -1,217 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * A simple generic stack implementation using linked lists. The stack - * implementation has five main operations: - *
    - *
  • push -- adds an element to the top of the stack
  • - *
  • pop -- removes an element from the top of the stack and returns a - * reference to it
  • - *
  • peek -- peek returns an element from the top of the stack without - * removing it
  • - *
  • isEmpty -- tests whether the stack is empty
  • - *
  • size -- returns the size of the stack
  • - *
  • toString -- returns a string representation of the stack.
  • - *
- */ - -public class AzureLinkedStack { - /* - * Linked node for Azure stack collection. - */ - private static class AzureLinkedNode { - private E element; // Linked element on the list. - private AzureLinkedNode next;// Reference to the next linked element on - // list. - - /* - * The constructor builds the linked node with no successor - * - * @param element : The value of the element to be stored with this node. - */ - private AzureLinkedNode(E anElement) { - element = anElement; - next = null; - } - - /* - * Constructor builds a linked node with a specified successor. The - * successor may be null. - * - * @param anElement : new element to be created. - * - * @param nextElement: successor to the new element. - */ - private AzureLinkedNode(E anElement, AzureLinkedNode nextElement) { - element = anElement; - next = nextElement; - } - - /* - * Get the element stored in the linked node. - * - * @return E : element stored in linked node. - */ - private E getElement() { - return element; - } - - /* - * Get the successor node to the element. - * - * @return E : reference to the succeeding node on the list. - */ - private AzureLinkedNode getNext() { - return next; - } - } - - private int count; // The number of elements stored on the stack. - private AzureLinkedNode top; // Top of the stack. - - /* - * Constructor creating an empty stack. - */ - public AzureLinkedStack() { - // Simply initialize the member variables. - // - count = 0; - top = null; - } - - /* - * Adds an element to the top of the stack. - * - * @param element : element pushed to the top of the stack. - */ - public void push(E element) { - // Create a new node containing a reference to be placed on the stack. - // Set the next reference to the new node to point to the current top - // of the stack. Set the top reference to point to the new node. Finally - // increment the count of nodes on the stack. - // - AzureLinkedNode newNode = new AzureLinkedNode(element, top); - top = newNode; - count++; - } - - /* - * Removes the element at the top of the stack and returns a reference to it. - * - * @return E : element popped from the top of the stack. - * - * @throws Exception on pop from an empty stack. - */ - public E pop() throws Exception { - // Make sure the stack is not empty. If it is empty, throw a StackEmpty - // exception. - // - if (isEmpty()) { - throw new Exception("AzureStackEmpty"); - } - - // Set a temporary reference equal to the element at the top of the stack, - // decrement the count of elements and return reference to the temporary. - // - E element = top.getElement(); - top = top.getNext(); - count--; - - // Return the reference to the element that was at the top of the stack. - // - return element; - } - - /* - * Return the top element of the stack without removing it. - * - * @return E - * - * @throws Exception on peek into an empty stack. - */ - public E peek() throws Exception { - // Make sure the stack is not empty. If it is empty, throw a StackEmpty - // exception. - // - if (isEmpty()) { - throw new Exception("AzureStackEmpty"); - } - - // Set a temporary reference equal to the element at the top of the stack - // and return the temporary. - // - E element = top.getElement(); - return element; - } - - /* - * Determines whether the stack is empty - * - * @return boolean true if the stack is empty and false otherwise. - */ - public boolean isEmpty() { - if (0 == size()) { - // Zero-sized stack so the stack is empty. - // - return true; - } - - // The stack is not empty. - // - return false; - } - - /* - * Determines the size of the stack - * - * @return int: Count of the number of elements in the stack. - */ - public int size() { - return count; - } - - /* - * Returns a string representation of the stack. - * - * @return String String representation of all elements in the stack. - */ - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - - AzureLinkedNode current = top; - for (int i = 0; i < size(); i++) { - E element = current.getElement(); - sb.append(element.toString()); - current = current.getNext(); - - // Insert commas between strings except after the last string. - // - if (size() - 1 > i) { - sb.append(", "); - } - } - - // Return the string. - // - return sb.toString(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java deleted file mode 100644 index 6e50cfc627e05..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java +++ /dev/null @@ -1,3105 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; -import static org.apache.hadoop.fs.azure.NativeAzureFileSystem.PATH_DELIMITER; - -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.io.UnsupportedEncodingException; -import java.net.HttpURLConnection; -import java.net.URI; -import java.net.URISyntaxException; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.nio.charset.Charset; -import java.nio.charset.StandardCharsets; -import java.security.InvalidKeyException; -import java.util.Calendar; -import java.util.Date; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Locale; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Optional; -import java.util.Set; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlobContainerWrapper; -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlobDirectoryWrapper; -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlobWrapper; -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlockBlobWrapper; -import org.apache.hadoop.fs.azure.StorageInterface.CloudPageBlobWrapper; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; -import org.apache.hadoop.fs.azure.metrics.BandwidthGaugeUpdater; -import org.apache.hadoop.fs.azure.metrics.ErrorMetricUpdater; -import org.apache.hadoop.fs.azure.metrics.ResponseReceivedMetricUpdater; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.permission.PermissionStatus; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.VersionInfo; -import org.eclipse.jetty.util.ajax.JSON; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.VisibleForTesting; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RetryExponentialRetry; -import com.microsoft.azure.storage.RetryNoRetry; -import com.microsoft.azure.storage.StorageCredentials; -import com.microsoft.azure.storage.StorageCredentialsAccountAndKey; -import com.microsoft.azure.storage.StorageCredentialsSharedAccessSignature; -import com.microsoft.azure.storage.StorageErrorCodeStrings; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.Constants; -import com.microsoft.azure.storage.StorageEvent; -import com.microsoft.azure.storage.core.BaseRequest; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.blob.BlobListingDetails; -import com.microsoft.azure.storage.blob.BlobProperties; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.BlobType; -import com.microsoft.azure.storage.blob.CloudBlob; -import com.microsoft.azure.storage.blob.CopyStatus; -import com.microsoft.azure.storage.blob.DeleteSnapshotsOption; -import com.microsoft.azure.storage.blob.ListBlobItem; -import com.microsoft.azure.storage.core.Utility; - -/** - * Core implementation of Windows Azure Filesystem for Hadoop. - * Provides the bridging logic between Hadoop's abstract filesystem and Azure Storage - * - */ -@InterfaceAudience.Private -@VisibleForTesting -public class AzureNativeFileSystemStore implements NativeFileSystemStore { - - /** - * Configuration knob on whether we do block-level MD5 validation on - * upload/download. - */ - static final String KEY_CHECK_BLOCK_MD5 = "fs.azure.check.block.md5"; - /** - * Configuration knob on whether we store blob-level MD5 on upload. - */ - static final String KEY_STORE_BLOB_MD5 = "fs.azure.store.blob.md5"; - static final String DEFAULT_STORAGE_EMULATOR_ACCOUNT_NAME = "storageemulator"; - static final String STORAGE_EMULATOR_ACCOUNT_NAME_PROPERTY_NAME = "fs.azure.storage.emulator.account.name"; - - /** - * Configuration for User-Agent field. - */ - static final String USER_AGENT_ID_KEY = "fs.azure.user.agent.prefix"; - static final String USER_AGENT_ID_DEFAULT = "unknown"; - - public static final Logger LOG = LoggerFactory.getLogger(AzureNativeFileSystemStore.class); - - private StorageInterface storageInteractionLayer; - private CloudBlobDirectoryWrapper rootDirectory; - private CloudBlobContainerWrapper container; - - // Constants local to this class. - // - private static final String KEY_ACCOUNT_KEYPROVIDER_PREFIX = "fs.azure.account.keyprovider."; - private static final String KEY_ACCOUNT_SAS_PREFIX = "fs.azure.sas."; - - // note: this value is not present in core-default.xml as our real default is - // computed as min(2*cpu,8) - private static final String KEY_CONCURRENT_CONNECTION_VALUE_OUT = "fs.azure.concurrentRequestCount.out"; - - private static final String HADOOP_BLOCK_SIZE_PROPERTY_NAME = "fs.azure.block.size"; - private static final String KEY_STREAM_MIN_READ_SIZE = "fs.azure.read.request.size"; - private static final String KEY_STORAGE_CONNECTION_TIMEOUT = "fs.azure.storage.timeout"; - private static final String KEY_WRITE_BLOCK_SIZE = "fs.azure.write.request.size"; - @VisibleForTesting - static final String KEY_INPUT_STREAM_VERSION = "fs.azure.input.stream.version.for.internal.use.only"; - - // Property controlling whether to allow reads on blob which are concurrently - // appended out-of-band. - private static final String KEY_READ_TOLERATE_CONCURRENT_APPEND = "fs.azure.io.read.tolerate.concurrent.append"; - - // Configurable throttling parameter properties. These properties are located - // in the core-site.xml configuration file. - private static final String KEY_MIN_BACKOFF_INTERVAL = "fs.azure.io.retry.min.backoff.interval"; - private static final String KEY_MAX_BACKOFF_INTERVAL = "fs.azure.io.retry.max.backoff.interval"; - private static final String KEY_BACKOFF_INTERVAL = "fs.azure.io.retry.backoff.interval"; - private static final String KEY_MAX_IO_RETRIES = "fs.azure.io.retry.max.retries"; - - private static final String KEY_COPYBLOB_MIN_BACKOFF_INTERVAL = - "fs.azure.io.copyblob.retry.min.backoff.interval"; - private static final String KEY_COPYBLOB_MAX_BACKOFF_INTERVAL = - "fs.azure.io.copyblob.retry.max.backoff.interval"; - private static final String KEY_COPYBLOB_BACKOFF_INTERVAL = - "fs.azure.io.copyblob.retry.backoff.interval"; - private static final String KEY_COPYBLOB_MAX_IO_RETRIES = - "fs.azure.io.copyblob.retry.max.retries"; - - private static final String KEY_SELF_THROTTLE_ENABLE = "fs.azure.selfthrottling.enable"; - private static final String KEY_SELF_THROTTLE_READ_FACTOR = "fs.azure.selfthrottling.read.factor"; - private static final String KEY_SELF_THROTTLE_WRITE_FACTOR = "fs.azure.selfthrottling.write.factor"; - - private static final String KEY_AUTO_THROTTLE_ENABLE = "fs.azure.autothrottling.enable"; - - private static final String KEY_ENABLE_STORAGE_CLIENT_LOGGING = "fs.azure.storage.client.logging"; - - /** - * Configuration keys to identify if WASB needs to run in Secure mode. In Secure mode - * all interactions with Azure storage is performed using SAS uris. There are two sub modes - * within the Secure mode , one is remote SAS key mode where the SAS keys are generated - * from a remote process and local mode where SAS keys are generated within WASB. - */ - @VisibleForTesting - public static final String KEY_USE_SECURE_MODE = "fs.azure.secure.mode"; - - /** - * By default the SAS Key mode is expected to run in Romote key mode. This flags sets it - * to run on the local mode. - */ - public static final String KEY_USE_LOCAL_SAS_KEY_MODE = "fs.azure.local.sas.key.mode"; - - /** - * Config to control case sensitive metadata key checks/retrieval. If this - * is false, blob metadata keys will be treated case insensitive. - */ - private static final String KEY_BLOB_METADATA_KEY_CASE_SENSITIVE = "fs.azure.blob.metadata.key.case.sensitive"; - private static final String PERMISSION_METADATA_KEY = "hdi_permission"; - private static final String OLD_PERMISSION_METADATA_KEY = "asv_permission"; - private static final String IS_FOLDER_METADATA_KEY = "hdi_isfolder"; - private static final String OLD_IS_FOLDER_METADATA_KEY = "asv_isfolder"; - static final String VERSION_METADATA_KEY = "hdi_version"; - static final String OLD_VERSION_METADATA_KEY = "asv_version"; - static final String FIRST_WASB_VERSION = "2013-01-01"; - static final String CURRENT_WASB_VERSION = "2013-09-01"; - static final String LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY = "hdi_tmpupload"; - static final String OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY = "asv_tmpupload"; - - /** - * Configuration key to indicate the set of directories in WASB where we - * should store files as page blobs instead of block blobs. - * - * Entries should be plain directory names (i.e. not URIs) with no leading or - * trailing slashes. Delimit the entries with commas. - */ - public static final String KEY_PAGE_BLOB_DIRECTORIES = - "fs.azure.page.blob.dir"; - /** - * The set of directories where we should store files as page blobs. - */ - private Set pageBlobDirs; - - /** - * Configuration key to indicate the set of directories in WASB where we - * should store files as block blobs with block compaction enabled. - * - * Entries can be directory paths relative to the container (e.g. "/path") or - * fully qualified wasb:// URIs (e.g. - * wasb://container@example.blob.core.windows.net/path) - */ - public static final String KEY_BLOCK_BLOB_WITH_COMPACTION_DIRECTORIES = - "fs.azure.block.blob.with.compaction.dir"; - - /** - * The set of directories where we should store files as block blobs with - * block compaction enabled. - */ - private Set blockBlobWithCompationDirs; - - /** - * Configuration key to indicate the set of directories in WASB where - * we should do atomic folder rename synchronized with createNonRecursive. - */ - public static final String KEY_ATOMIC_RENAME_DIRECTORIES = - "fs.azure.atomic.rename.dir"; - - /** - * Configuration key to enable flat listing of blobs. This config is useful - * only if listing depth is AZURE_UNBOUNDED_DEPTH. - */ - public static final String KEY_ENABLE_FLAT_LISTING = "fs.azure.flatlist.enable"; - - /** - * Optional config to enable a lock free pread which will bypass buffer in - * BlockBlobInputStream. - * This is not a config which can be set at cluster level. It can be used as - * an option on FutureDataInputStreamBuilder. - * @see FileSystem#openFile(org.apache.hadoop.fs.Path) - */ - public static final String FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE = - "fs.azure.block.blob.buffered.pread.disable"; - - /** - * The set of directories where we should apply atomic folder rename - * synchronized with createNonRecursive. - */ - private Set atomicRenameDirs; - - private static final String HTTP_SCHEME = "http"; - private static final String HTTPS_SCHEME = "https"; - private static final String WASB_AUTHORITY_DELIMITER = "@"; - private static final char ASTERISK_SYMBOL = '*'; - private static final String AZURE_ROOT_CONTAINER = "$root"; - - private static final int DEFAULT_CONCURRENT_WRITES = 8; - - private static final Charset METADATA_ENCODING = StandardCharsets.UTF_8; - - // Concurrent reads reads of data written out of band are disable by default. - // - private static final boolean DEFAULT_READ_TOLERATE_CONCURRENT_APPEND = false; - - // Default block sizes - public static final int DEFAULT_DOWNLOAD_BLOCK_SIZE = 4 * 1024 * 1024; - public static final int DEFAULT_UPLOAD_BLOCK_SIZE = 4 * 1024 * 1024; - public static final long DEFAULT_HADOOP_BLOCK_SIZE = 512 * 1024 * 1024L; - - private static final int DEFAULT_INPUT_STREAM_VERSION = 2; - - // Retry parameter defaults. - // - - private static final int DEFAULT_MIN_BACKOFF_INTERVAL = 3 * 1000; // 3s - private static final int DEFAULT_MAX_BACKOFF_INTERVAL = 30 * 1000; // 30s - private static final int DEFAULT_BACKOFF_INTERVAL = 3 * 1000; // 3s - private static final int DEFAULT_MAX_RETRY_ATTEMPTS = 30; - - private static final int DEFAULT_COPYBLOB_MIN_BACKOFF_INTERVAL = 3 * 1000; - private static final int DEFAULT_COPYBLOB_MAX_BACKOFF_INTERVAL = 90 * 1000; - private static final int DEFAULT_COPYBLOB_BACKOFF_INTERVAL = 30 * 1000; - private static final int DEFAULT_COPYBLOB_MAX_RETRY_ATTEMPTS = 15; - - // Self-throttling defaults. Allowed range = (0,1.0] - // Value of 1.0 means no self-throttling. - // Value of x means process data at factor x of unrestricted rate - private static final boolean DEFAULT_SELF_THROTTLE_ENABLE = true; - private static final float DEFAULT_SELF_THROTTLE_READ_FACTOR = 1.0f; - private static final float DEFAULT_SELF_THROTTLE_WRITE_FACTOR = 1.0f; - - private static final boolean DEFAULT_AUTO_THROTTLE_ENABLE = false; - - private static final int STORAGE_CONNECTION_TIMEOUT_DEFAULT = 90; - - /** - * Default values to control SAS Key mode. - * By default we set the values to false. - */ - public static final boolean DEFAULT_USE_SECURE_MODE = false; - private static final boolean DEFAULT_USE_LOCAL_SAS_KEY_MODE = false; - - /** - * Enable flat listing of blobs as default option. This is useful only if - * listing depth is AZURE_UNBOUNDED_DEPTH. - */ - public static final boolean DEFAULT_ENABLE_FLAT_LISTING = false; - - /** - * MEMBER VARIABLES - */ - - private URI sessionUri; - private Configuration sessionConfiguration; - private int concurrentWrites = DEFAULT_CONCURRENT_WRITES; - private boolean isAnonymousCredentials = false; - // Set to true if we are connecting using shared access signatures. - private boolean connectingUsingSAS = false; - private AzureFileSystemInstrumentation instrumentation; - private BandwidthGaugeUpdater bandwidthGaugeUpdater; - private static final JSON PERMISSION_JSON_SERIALIZER = createPermissionJsonSerializer(); - - private boolean suppressRetryPolicy = false; - private boolean canCreateOrModifyContainer = false; - private ContainerState currentKnownContainerState = ContainerState.Unknown; - private final Object containerStateLock = new Object(); - - private boolean tolerateOobAppends = DEFAULT_READ_TOLERATE_CONCURRENT_APPEND; - - private long hadoopBlockSize = DEFAULT_HADOOP_BLOCK_SIZE; - private int downloadBlockSizeBytes = DEFAULT_DOWNLOAD_BLOCK_SIZE; - private int uploadBlockSizeBytes = DEFAULT_UPLOAD_BLOCK_SIZE; - private int inputStreamVersion = DEFAULT_INPUT_STREAM_VERSION; - - // Bandwidth throttling exponential back-off parameters - // - private int minBackoff; // the minimum back-off interval (ms) between retries. - private int maxBackoff; // the maximum back-off interval (ms) between retries. - private int deltaBackoff; // the back-off interval (ms) between retries. - private int maxRetries; // the maximum number of retry attempts. - - // Self-throttling parameters - private boolean selfThrottlingEnabled; - private float selfThrottlingReadFactor; - private float selfThrottlingWriteFactor; - - private boolean autoThrottlingEnabled; - - private TestHookOperationContext testHookOperationContext = null; - - // Set if we're running against a storage emulator.. - private boolean isStorageEmulator = false; - - // Configs controlling WASB SAS key mode. - private boolean useSecureMode = false; - private boolean useLocalSasKeyMode = false; - - // User-Agent - private String userAgentId; - - private String delegationToken; - - private boolean metadataKeyCaseSensitive; - - /** The error message template when container is not accessible. */ - public static final String NO_ACCESS_TO_CONTAINER_MSG = "No credentials found for " - + "account %s in the configuration, and its container %s is not " - + "accessible using anonymous credentials. Please check if the container " - + "exists first. If it is not publicly available, you have to provide " - + "account credentials."; - - /** - * A test hook interface that can modify the operation context we use for - * Azure Storage operations, e.g. to inject errors. - */ - @VisibleForTesting - interface TestHookOperationContext { - OperationContext modifyOperationContext(OperationContext original); - } - - /** - * Suppress the default retry policy for the Storage, useful in unit tests to - * test negative cases without waiting forever. - */ - @VisibleForTesting - void suppressRetryPolicy() { - suppressRetryPolicy = true; - } - - /** - * Add a test hook to modify the operation context we use for Azure Storage - * operations. - * - * @param testHook - * The test hook, or null to unset previous hooks. - */ - @VisibleForTesting - void addTestHookToOperationContext(TestHookOperationContext testHook) { - this.testHookOperationContext = testHook; - } - - /** - * If we're asked by unit tests to not retry, set the retry policy factory in - * the client accordingly. - */ - private void suppressRetryPolicyInClientIfNeeded() { - if (suppressRetryPolicy) { - storageInteractionLayer.setRetryPolicyFactory(new RetryNoRetry()); - } - } - - /** - * Creates a JSON serializer that can serialize a PermissionStatus object into - * the JSON string we want in the blob metadata. - * - * @return The JSON serializer. - */ - private static JSON createPermissionJsonSerializer() { - org.eclipse.jetty.util.log.Log.getProperties().setProperty("org.eclipse.jetty.util.log.announce", "false"); - JSON serializer = new JSON(); - serializer.addConvertor(PermissionStatus.class, - new PermissionStatusJsonSerializer()); - return serializer; - } - - /** - * A converter for PermissionStatus to/from JSON as we want it in the blob - * metadata. - */ - private static class PermissionStatusJsonSerializer implements JSON.Convertor { - private static final String OWNER_TAG = "owner"; - private static final String GROUP_TAG = "group"; - private static final String PERMISSIONS_TAG = "permissions"; - - @Override - public void toJSON(Object obj, JSON.Output out) { - PermissionStatus permissionStatus = (PermissionStatus) obj; - // Don't store group as null, just store it as empty string - // (which is FileStatus behavior). - String group = permissionStatus.getGroupName() == null ? "" - : permissionStatus.getGroupName(); - out.add(OWNER_TAG, permissionStatus.getUserName()); - out.add(GROUP_TAG, group); - out.add(PERMISSIONS_TAG, permissionStatus.getPermission().toString()); - } - - @Override - public Object fromJSON(@SuppressWarnings("rawtypes") Map object) { - return PermissionStatusJsonSerializer.fromJSONMap(object); - } - - @SuppressWarnings("rawtypes") - public static PermissionStatus fromJSONString(String jsonString) { - // The JSON class can only find out about an object's class (and call me) - // if we store the class name in the JSON string. Since I don't want to - // do that (it's an implementation detail), I just deserialize as a - // the default Map (JSON's default behavior) and parse that. - return fromJSONMap((Map) PERMISSION_JSON_SERIALIZER.fromJSON(jsonString)); - } - - private static PermissionStatus fromJSONMap( - @SuppressWarnings("rawtypes") Map object) { - return new PermissionStatus((String) object.get(OWNER_TAG), - (String) object.get(GROUP_TAG), - // The initial - below is the Unix file type, - // which FsPermission needs there but ignores. - FsPermission.valueOf("-" + (String) object.get(PERMISSIONS_TAG))); - } - } - - @VisibleForTesting - void setAzureStorageInteractionLayer(StorageInterface storageInteractionLayer) { - this.storageInteractionLayer = storageInteractionLayer; - } - - @VisibleForTesting - public BandwidthGaugeUpdater getBandwidthGaugeUpdater() { - return bandwidthGaugeUpdater; - } - - /** - * Check if concurrent reads and writes on the same blob are allowed. - * - * @return true if concurrent reads and OOB writes has been configured, false - * otherwise. - */ - private boolean isConcurrentOOBAppendAllowed() { - return tolerateOobAppends; - } - - /** - * Method for the URI and configuration object necessary to create a storage - * session with an Azure session. It parses the scheme to ensure it matches - * the storage protocol supported by this file system. - * - * @param uri - URI for target storage blob. - * @param conf - reference to configuration object. - * @param instrumentation - the metrics source that will keep track of operations here. - * - * @throws IllegalArgumentException if URI or job object is null, or invalid scheme. - */ - @Override - public void initialize(URI uri, Configuration conf, AzureFileSystemInstrumentation instrumentation) - throws IllegalArgumentException, AzureException, IOException { - - if (null == instrumentation) { - throw new IllegalArgumentException("Null instrumentation"); - } - this.instrumentation = instrumentation; - - // Check that URI exists. - // - if (null == uri) { - throw new IllegalArgumentException( - "Cannot initialize WASB file system, URI is null"); - } - - // Check that configuration object is non-null. - // - if (null == conf) { - throw new IllegalArgumentException( - "Cannot initialize WASB file system, conf is null"); - } - - if (!conf.getBoolean( - NativeAzureFileSystem.SKIP_AZURE_METRICS_PROPERTY_NAME, false)) { - //If not skip azure metrics, create bandwidthGaugeUpdater - this.bandwidthGaugeUpdater = new BandwidthGaugeUpdater(instrumentation); - } - - // Incoming parameters validated. Capture the URI and the job configuration - // object. - // - sessionUri = uri; - sessionConfiguration = conf; - - useSecureMode = conf.getBoolean(KEY_USE_SECURE_MODE, - DEFAULT_USE_SECURE_MODE); - useLocalSasKeyMode = conf.getBoolean(KEY_USE_LOCAL_SAS_KEY_MODE, - DEFAULT_USE_LOCAL_SAS_KEY_MODE); - - if (null == this.storageInteractionLayer) { - if (!useSecureMode) { - this.storageInteractionLayer = new StorageInterfaceImpl(); - } else { - this.storageInteractionLayer = new SecureStorageInterfaceImpl( - useLocalSasKeyMode, conf); - } - } - - // Configure Azure storage session. - configureAzureStorageSession(); - - // Start an Azure storage session. - // - createAzureStorageSession(); - - // Extract the directories that should contain page blobs - pageBlobDirs = getDirectorySet(KEY_PAGE_BLOB_DIRECTORIES); - LOG.debug("Page blob directories: {}", setToString(pageBlobDirs)); - - // User-agent - userAgentId = conf.get(USER_AGENT_ID_KEY, USER_AGENT_ID_DEFAULT); - - // Extract the directories that should contain block blobs with compaction - blockBlobWithCompationDirs = getDirectorySet( - KEY_BLOCK_BLOB_WITH_COMPACTION_DIRECTORIES); - LOG.debug("Block blobs with compaction directories: {}", - setToString(blockBlobWithCompationDirs)); - - // Extract directories that should have atomic rename applied. - atomicRenameDirs = getDirectorySet(KEY_ATOMIC_RENAME_DIRECTORIES); - String hbaseRoot; - try { - - // Add to this the hbase root directory, or /hbase is that is not set. - hbaseRoot = verifyAndConvertToStandardFormat( - sessionConfiguration.get("hbase.rootdir", "hbase")); - if (hbaseRoot != null) { - atomicRenameDirs.add(hbaseRoot); - } - } catch (URISyntaxException e) { - LOG.warn("Unable to initialize HBase root as an atomic rename directory."); - } - LOG.debug("Atomic rename directories: {} ", setToString(atomicRenameDirs)); - metadataKeyCaseSensitive = conf - .getBoolean(KEY_BLOB_METADATA_KEY_CASE_SENSITIVE, true); - if (!metadataKeyCaseSensitive) { - LOG.info("{} configured as false. Blob metadata will be treated case insensitive.", - KEY_BLOB_METADATA_KEY_CASE_SENSITIVE); - } - } - - /** - * Helper to format a string for log output from Set - */ - private String setToString(Set set) { - StringBuilder sb = new StringBuilder(); - int i = 1; - for (String s : set) { - sb.append("/" + s); - if (i != set.size()) { - sb.append(", "); - } - i++; - } - return sb.toString(); - } - - /** - * Method to extract the account name from an Azure URI. - * - * @param uri - * -- WASB blob URI - * @returns accountName -- the account name for the URI. - * @throws URISyntaxException - * if the URI does not have an authority it is badly formed. - */ - private String getAccountFromAuthority(URI uri) throws URISyntaxException { - - // Check to make sure that the authority is valid for the URI. - // - String authority = uri.getRawAuthority(); - if (null == authority) { - // Badly formed or illegal URI. - // - throw new URISyntaxException(uri.toString(), - "Expected URI with a valid authority"); - } - - // Check if authority container the delimiter separating the account name from the - // the container. - // - if (!authority.contains(WASB_AUTHORITY_DELIMITER)) { - return authority; - } - - // Split off the container name and the authority. - // - String[] authorityParts = authority.split(WASB_AUTHORITY_DELIMITER, 2); - - // Because the string contains an '@' delimiter, a container must be - // specified. - // - if (authorityParts.length < 2 || "".equals(authorityParts[0])) { - // Badly formed WASB authority since there is no container. - // - final String errMsg = String - .format( - "URI '%s' has a malformed WASB authority, expected container name. " - + "Authority takes the form wasb://[@]", - uri.toString()); - throw new IllegalArgumentException(errMsg); - } - - // Return with the account name. It is possible that this name is NULL. - // - return authorityParts[1]; - } - - /** - * Method to extract the container name from an Azure URI. - * - * @param uri - * -- WASB blob URI - * @returns containerName -- the container name for the URI. May be null. - * @throws URISyntaxException - * if the uri does not have an authority it is badly formed. - */ - private String getContainerFromAuthority(URI uri) throws URISyntaxException { - - // Check to make sure that the authority is valid for the URI. - // - String authority = uri.getRawAuthority(); - if (null == authority) { - // Badly formed or illegal URI. - // - throw new URISyntaxException(uri.toString(), - "Expected URI with a valid authority"); - } - - // The URI has a valid authority. Extract the container name. It is the - // second component of the WASB URI authority. - if (!authority.contains(WASB_AUTHORITY_DELIMITER)) { - // The authority does not have a container name. Use the default container by - // setting the container name to the default Azure root container. - // - return AZURE_ROOT_CONTAINER; - } - - // Split off the container name and the authority. - String[] authorityParts = authority.split(WASB_AUTHORITY_DELIMITER, 2); - - // Because the string contains an '@' delimiter, a container must be - // specified. - if (authorityParts.length < 2 || "".equals(authorityParts[0])) { - // Badly formed WASB authority since there is no container. - final String errMsg = String - .format( - "URI '%s' has a malformed WASB authority, expected container name." - + "Authority takes the form wasb://[@]", - uri.toString()); - throw new IllegalArgumentException(errMsg); - } - - // Set the container name from the first entry for the split parts of the - // authority. - return authorityParts[0]; - } - - /** - * Get the appropriate return the appropriate scheme for communicating with - * Azure depending on whether wasb or wasbs is specified in the target URI. - * - * return scheme - HTTPS or HTTP as appropriate. - */ - private String getHTTPScheme() { - String sessionScheme = sessionUri.getScheme(); - // Check if we're on a secure URI scheme: wasbs or the legacy asvs scheme. - if (sessionScheme != null - && (sessionScheme.equalsIgnoreCase("asvs") - || sessionScheme.equalsIgnoreCase("wasbs"))) { - return HTTPS_SCHEME; - } else { - // At this point the scheme should be either null or asv or wasb. - // Intentionally I'm not going to validate it though since I don't feel - // it's this method's job to ensure a valid URI scheme for this file - // system. - return HTTP_SCHEME; - } - } - - /** - * Set the configuration parameters for this client storage session with - * Azure. - * - * @throws AzureException - */ - private void configureAzureStorageSession() throws AzureException { - - // Assertion: Target session URI already should have been captured. - if (sessionUri == null) { - throw new AssertionError( - "Expected a non-null session URI when configuring storage session"); - } - - // Assertion: A client session already should have been established with - // Azure. - if (storageInteractionLayer == null) { - throw new AssertionError(String.format( - "Cannot configure storage session for URI '%s' " - + "if storage session has not been established.", - sessionUri.toString())); - } - - // Determine whether or not reads are allowed concurrent with OOB writes. - tolerateOobAppends = sessionConfiguration.getBoolean( - KEY_READ_TOLERATE_CONCURRENT_APPEND, - DEFAULT_READ_TOLERATE_CONCURRENT_APPEND); - - // Retrieve configuration for the minimum stream read and write block size. - // - this.downloadBlockSizeBytes = sessionConfiguration.getInt( - KEY_STREAM_MIN_READ_SIZE, DEFAULT_DOWNLOAD_BLOCK_SIZE); - this.uploadBlockSizeBytes = sessionConfiguration.getInt( - KEY_WRITE_BLOCK_SIZE, DEFAULT_UPLOAD_BLOCK_SIZE); - this.hadoopBlockSize = sessionConfiguration.getLong( - HADOOP_BLOCK_SIZE_PROPERTY_NAME, DEFAULT_HADOOP_BLOCK_SIZE); - - this.inputStreamVersion = sessionConfiguration.getInt( - KEY_INPUT_STREAM_VERSION, DEFAULT_INPUT_STREAM_VERSION); - - // The job may want to specify a timeout to use when engaging the - // storage service. The default is currently 90 seconds. It may - // be necessary to increase this value for long latencies in larger - // jobs. If the timeout specified is greater than zero seconds use - // it, otherwise use the default service client timeout. - int storageConnectionTimeout = sessionConfiguration.getInt( - KEY_STORAGE_CONNECTION_TIMEOUT, 0); - - if (0 < storageConnectionTimeout) { - storageInteractionLayer.setTimeoutInMs(storageConnectionTimeout * 1000); - } - - // Set the concurrency values equal to the that specified in the - // configuration file. If it does not exist, set it to the default - // value calculated as double the number of CPU cores on the client - // machine. The concurrency value is minimum of double the cores and - // the read/write property. - int cpuCores = 2 * Runtime.getRuntime().availableProcessors(); - - concurrentWrites = sessionConfiguration.getInt( - KEY_CONCURRENT_CONNECTION_VALUE_OUT, - Math.min(cpuCores, DEFAULT_CONCURRENT_WRITES)); - - // Set up the exponential retry policy. - // - minBackoff = sessionConfiguration.getInt( - KEY_MIN_BACKOFF_INTERVAL, DEFAULT_MIN_BACKOFF_INTERVAL); - - maxBackoff = sessionConfiguration.getInt( - KEY_MAX_BACKOFF_INTERVAL, DEFAULT_MAX_BACKOFF_INTERVAL); - - deltaBackoff = sessionConfiguration.getInt( - KEY_BACKOFF_INTERVAL, DEFAULT_BACKOFF_INTERVAL); - - maxRetries = sessionConfiguration.getInt( - KEY_MAX_IO_RETRIES, DEFAULT_MAX_RETRY_ATTEMPTS); - - storageInteractionLayer.setRetryPolicyFactory( - new RetryExponentialRetry(minBackoff, deltaBackoff, maxBackoff, maxRetries)); - - - // read the self-throttling config. - selfThrottlingEnabled = sessionConfiguration.getBoolean( - KEY_SELF_THROTTLE_ENABLE, DEFAULT_SELF_THROTTLE_ENABLE); - - selfThrottlingReadFactor = sessionConfiguration.getFloat( - KEY_SELF_THROTTLE_READ_FACTOR, DEFAULT_SELF_THROTTLE_READ_FACTOR); - - selfThrottlingWriteFactor = sessionConfiguration.getFloat( - KEY_SELF_THROTTLE_WRITE_FACTOR, DEFAULT_SELF_THROTTLE_WRITE_FACTOR); - - if (!selfThrottlingEnabled) { - autoThrottlingEnabled = sessionConfiguration.getBoolean( - KEY_AUTO_THROTTLE_ENABLE, - DEFAULT_AUTO_THROTTLE_ENABLE); - if (autoThrottlingEnabled) { - ClientThrottlingIntercept.initializeSingleton(); - } - } else { - // cannot enable both self-throttling and client-throttling - autoThrottlingEnabled = false; - } - - OperationContext.setLoggingEnabledByDefault(sessionConfiguration. - getBoolean(KEY_ENABLE_STORAGE_CLIENT_LOGGING, false)); - - LOG.debug( - "AzureNativeFileSystemStore init. Settings={},{},{},{{},{},{},{}},{{},{},{}}", - concurrentWrites, tolerateOobAppends, - ((storageConnectionTimeout > 0) ? storageConnectionTimeout - : STORAGE_CONNECTION_TIMEOUT_DEFAULT), minBackoff, - deltaBackoff, maxBackoff, maxRetries, selfThrottlingEnabled, - selfThrottlingReadFactor, selfThrottlingWriteFactor); - } - - /** - * Connect to Azure storage using anonymous credentials. - * - * @param uri - * - URI to target blob (R/O access to public blob) - * - * @throws StorageException - * raised on errors communicating with Azure storage. - * @throws IOException - * raised on errors performing I/O or setting up the session. - * @throws URISyntaxException - * raised on creating mal-formed URI's. - */ - private void connectUsingAnonymousCredentials(final URI uri) - throws StorageException, IOException, URISyntaxException { - // Use an HTTP scheme since the URI specifies a publicly accessible - // container. Explicitly create a storage URI corresponding to the URI - // parameter for use in creating the service client. - String accountName = getAccountFromAuthority(uri); - URI storageUri = new URI(getHTTPScheme() + ":" + PATH_DELIMITER - + PATH_DELIMITER + accountName); - - // Create the service client with anonymous credentials. - String containerName = getContainerFromAuthority(uri); - storageInteractionLayer.createBlobClient(storageUri); - suppressRetryPolicyInClientIfNeeded(); - - // Capture the container reference. - container = storageInteractionLayer.getContainerReference(containerName); - rootDirectory = container.getDirectoryReference(""); - - // Check for container existence, and our ability to access it. - boolean canAccess; - try { - canAccess = container.exists(getInstrumentedContext()); - } catch (StorageException ex) { - LOG.error("Service returned StorageException when checking existence " - + "of container {} in account {}", containerName, accountName, ex); - canAccess = false; - } - if (!canAccess) { - throw new AzureException(String.format(NO_ACCESS_TO_CONTAINER_MSG, - accountName, containerName)); - } - - // Accessing the storage server unauthenticated using - // anonymous credentials. - isAnonymousCredentials = true; - } - - private void connectUsingCredentials(String accountName, - StorageCredentials credentials, String containerName) - throws URISyntaxException, StorageException, AzureException { - - URI blobEndPoint; - if (isStorageEmulatorAccount(accountName)) { - isStorageEmulator = true; - CloudStorageAccount account = - CloudStorageAccount.getDevelopmentStorageAccount(); - storageInteractionLayer.createBlobClient(account); - } else { - blobEndPoint = new URI(getHTTPScheme() + "://" + accountName); - storageInteractionLayer.createBlobClient(blobEndPoint, credentials); - } - suppressRetryPolicyInClientIfNeeded(); - - // Capture the container reference for debugging purposes. - container = storageInteractionLayer.getContainerReference(containerName); - rootDirectory = container.getDirectoryReference(""); - - // Can only create container if using account key credentials - canCreateOrModifyContainer = credentials instanceof StorageCredentialsAccountAndKey; - } - - /** - * Method to set up the Storage Interaction layer in Secure mode. - * @param accountName - Storage account provided in the initializer - * @param containerName - Container name provided in the initializer - * @param sessionUri - URI provided in the initializer - */ - private void connectToAzureStorageInSecureMode(String accountName, - String containerName, URI sessionUri) - throws AzureException, StorageException, URISyntaxException { - - LOG.debug("Connecting to Azure storage in Secure Mode"); - // Assertion: storageInteractionLayer instance has to be a SecureStorageInterfaceImpl - if (!(this.storageInteractionLayer instanceof SecureStorageInterfaceImpl)) { - throw new AssertionError("connectToAzureStorageInSecureMode() should be called only" - + " for SecureStorageInterfaceImpl instances"); - } - - ((SecureStorageInterfaceImpl) this.storageInteractionLayer). - setStorageAccountName(accountName); - connectingUsingSAS = true; - container = storageInteractionLayer.getContainerReference(containerName); - rootDirectory = container.getDirectoryReference(""); - - canCreateOrModifyContainer = true; - } - - /** - * Connect to Azure storage using account key credentials. - */ - private void connectUsingConnectionStringCredentials( - final String accountName, final String containerName, - final String accountKey) throws InvalidKeyException, StorageException, - IOException, URISyntaxException { - // If the account name is "acc.blob.core.windows.net", then the - // rawAccountName is just "acc" - String rawAccountName = accountName.split("\\.")[0]; - StorageCredentials credentials = new StorageCredentialsAccountAndKey( - rawAccountName, accountKey); - connectUsingCredentials(accountName, credentials, containerName); - } - - /** - * Connect to Azure storage using shared access signature credentials. - */ - private void connectUsingSASCredentials(final String accountName, - final String containerName, final String sas) throws InvalidKeyException, - StorageException, IOException, URISyntaxException { - StorageCredentials credentials = new StorageCredentialsSharedAccessSignature( - sas); - connectingUsingSAS = true; - connectUsingCredentials(accountName, credentials, containerName); - } - - private boolean isStorageEmulatorAccount(final String accountName) { - return accountName.equalsIgnoreCase(sessionConfiguration.get( - STORAGE_EMULATOR_ACCOUNT_NAME_PROPERTY_NAME, - DEFAULT_STORAGE_EMULATOR_ACCOUNT_NAME)); - } - - @VisibleForTesting - public static String getAccountKeyFromConfiguration(String accountName, - Configuration conf) throws KeyProviderException { - String key = null; - String keyProviderClass = conf.get(KEY_ACCOUNT_KEYPROVIDER_PREFIX - + accountName); - KeyProvider keyProvider = null; - - if (keyProviderClass == null) { - // No key provider was provided so use the provided key as is. - keyProvider = new SimpleKeyProvider(); - } else { - // create an instance of the key provider class and verify it - // implements KeyProvider - Object keyProviderObject = null; - try { - Class clazz = conf.getClassByName(keyProviderClass); - keyProviderObject = clazz.newInstance(); - } catch (Exception e) { - throw new KeyProviderException("Unable to load key provider class.", e); - } - if (!(keyProviderObject instanceof KeyProvider)) { - throw new KeyProviderException(keyProviderClass - + " specified in config is not a valid KeyProvider class."); - } - keyProvider = (KeyProvider) keyProviderObject; - } - key = keyProvider.getStorageAccountKey(accountName, conf); - - return key; - } - - /** - * Establish a session with Azure blob storage based on the target URI. The - * method determines whether or not the URI target contains an explicit - * account or an implicit default cluster-wide account. - * - * @throws AzureException - * @throws IOException - */ - private void createAzureStorageSession() - throws AzureException, IOException { - - // Make sure this object was properly initialized with references to - // the sessionUri and sessionConfiguration. - if (null == sessionUri || null == sessionConfiguration) { - throw new AzureException("Filesystem object not initialized properly." - + "Unable to start session with Azure Storage server."); - } - - // File system object initialized, attempt to establish a session - // with the Azure storage service for the target URI string. - try { - // Inspect the URI authority to determine the account and use the account - // to start an Azure blob client session using an account key for the - // the account or anonymously. - // For all URI's do the following checks in order: - // 1. Validate that can be used with the current Hadoop - // cluster by checking it exists in the list of configured accounts - // for the cluster. - // 2. Look up the AccountKey in the list of configured accounts for the - // cluster. - // 3. If there is no AccountKey, assume anonymous public blob access - // when accessing the blob. - // - // If the URI does not specify a container use the default root container - // under the account name. - - // Assertion: Container name on the session Uri should be non-null. - if (getContainerFromAuthority(sessionUri) == null) { - throw new AssertionError(String.format( - "Non-null container expected from session URI: %s.", - sessionUri.toString())); - } - - // Get the account name. - String accountName = getAccountFromAuthority(sessionUri); - if (null == accountName) { - // Account name is not specified as part of the URI. Throw indicating - // an invalid account name. - final String errMsg = String.format( - "Cannot load WASB file system account name not" - + " specified in URI: %s.", sessionUri.toString()); - throw new AzureException(errMsg); - } - - instrumentation.setAccountName(accountName); - String containerName = getContainerFromAuthority(sessionUri); - instrumentation.setContainerName(containerName); - - // Check whether this is a storage emulator account. - if (isStorageEmulatorAccount(accountName)) { - // It is an emulator account, connect to it with no credentials. - connectUsingCredentials(accountName, null, containerName); - return; - } - - // If the securemode flag is set, WASB uses SecureStorageInterfaceImpl instance - // to communicate with Azure storage. In SecureStorageInterfaceImpl SAS keys - // are used to communicate with Azure storage, so connectToAzureStorageInSecureMode - // instantiates the default container using a SAS Key. - if (useSecureMode) { - connectToAzureStorageInSecureMode(accountName, containerName, sessionUri); - return; - } - - // Check whether we have a shared access signature for that container. - String propertyValue = sessionConfiguration.get(KEY_ACCOUNT_SAS_PREFIX - + containerName + "." + accountName); - if (propertyValue != null) { - // SAS was found. Connect using that. - connectUsingSASCredentials(accountName, containerName, propertyValue); - return; - } - - // Check whether the account is configured with an account key. - propertyValue = getAccountKeyFromConfiguration(accountName, - sessionConfiguration); - if (StringUtils.isNotEmpty(propertyValue)) { - // Account key was found. - // Create the Azure storage session using the account key and container. - connectUsingConnectionStringCredentials( - getAccountFromAuthority(sessionUri), - getContainerFromAuthority(sessionUri), propertyValue); - } else { - LOG.debug("The account access key is not configured for {}. " - + "Now try anonymous access.", sessionUri); - connectUsingAnonymousCredentials(sessionUri); - } - } catch (Exception e) { - // Caught exception while attempting to initialize the Azure File - // System store, re-throw the exception. - throw new AzureException(e); - } - } - - private enum ContainerState { - /** - * We haven't checked the container state yet. - */ - Unknown, - /** - * We checked and the container doesn't exist. - */ - DoesntExist, - /** - * The container exists and doesn't have an WASB version stamp on it. - */ - ExistsNoVersion, - /** - * The container exists and has an unsupported WASB version stamped on it. - */ - ExistsAtWrongVersion, - /** - * The container exists and has the proper WASB version stamped on it. - */ - ExistsAtRightVersion - } - - private enum ContainerAccessType { - /** - * We're accessing the container for a pure read operation, e.g. read a - * file. - */ - PureRead, - /** - * We're accessing the container purely to write something, e.g. write a - * file. - */ - PureWrite, - /** - * We're accessing the container to read something then write, e.g. rename a - * file. - */ - ReadThenWrite - } - - /** - * Trims a suffix/prefix from the given string. For example if - * s is given as "/xy" and toTrim is "/", this method returns "xy" - */ - private static String trim(String s, String toTrim) { - return StringUtils.removeEnd(StringUtils.removeStart(s, toTrim), - toTrim); - } - - /** - * Checks if the given rawDir belongs to this account/container, and - * if so returns the canonicalized path for it. Otherwise return null. - */ - private String verifyAndConvertToStandardFormat(String rawDir) throws URISyntaxException { - URI asUri = new URI(rawDir); - if (asUri.getAuthority() == null - || asUri.getAuthority().toLowerCase(Locale.ENGLISH).equalsIgnoreCase( - sessionUri.getAuthority().toLowerCase(Locale.ENGLISH))) { - // Applies to me. - return trim(asUri.getPath(), "/"); - } else { - // Doen't apply to me. - return null; - } - } - - /** - * Take a comma-separated list of directories from a configuration variable - * and transform it to a set of directories. - */ - private Set getDirectorySet(final String configVar) - throws AzureException { - String[] rawDirs = sessionConfiguration.getStrings(configVar, new String[0]); - Set directorySet = new HashSet(); - for (String currentDir : rawDirs) { - String myDir; - try { - myDir = verifyAndConvertToStandardFormat(currentDir.trim()); - } catch (URISyntaxException ex) { - throw new AzureException(String.format( - "The directory %s specified in the configuration entry %s is not" - + " a valid URI.", - currentDir, configVar)); - } - if (myDir != null) { - directorySet.add(myDir); - } - } - return directorySet; - } - - /** - * Checks if the given key in Azure Storage should be stored as a page - * blob instead of block blob. - */ - public boolean isPageBlobKey(String key) { - return isKeyForDirectorySet(key, pageBlobDirs); - } - - /** - * Checks if the given key in Azure Storage should be stored as a block blobs - * with compaction enabled instead of normal block blob. - * - * @param key blob name - * @return true, if the file is in directory with block compaction enabled. - */ - public boolean isBlockBlobWithCompactionKey(String key) { - return isKeyForDirectorySet(key, blockBlobWithCompationDirs); - } - - /** - * Checks if the given key in Azure storage should have synchronized - * atomic folder rename createNonRecursive implemented. - */ - @Override - public boolean isAtomicRenameKey(String key) { - return isKeyForDirectorySet(key, atomicRenameDirs); - } - - public boolean isKeyForDirectorySet(String key, Set dirSet) { - String defaultFS = FileSystem.getDefaultUri(sessionConfiguration).toString(); - for (String dir : dirSet) { - if (dir.isEmpty()) { - // dir is root - return true; - } - - if (matchAsteriskPattern(key, dir)) { - return true; - } - - // Allow for blob directories with paths relative to the default file - // system. - // - try { - URI uriPageBlobDir = new URI(dir); - if (null == uriPageBlobDir.getAuthority()) { - // Concatenate the default file system prefix with the relative - // page blob directory path. - // - String dirWithPrefix = trim(defaultFS, "/") + "/" + dir; - if (matchAsteriskPattern(key, dirWithPrefix)) { - return true; - } - } - } catch (URISyntaxException e) { - LOG.info("URI syntax error creating URI for {}", dir); - } - } - return false; - } - - private boolean matchAsteriskPattern(String pathName, String pattern) { - if (pathName == null || pathName.length() == 0) { - return false; - } - - int pathIndex = 0; - int patternIndex = 0; - - while (pathIndex < pathName.length() && patternIndex < pattern.length()) { - char charToMatch = pattern.charAt(patternIndex); - - // normal char: - if (charToMatch != ASTERISK_SYMBOL) { - if (charToMatch != pathName.charAt(pathIndex)) { - return false; - } - pathIndex++; - patternIndex++; - continue; - } - - // ASTERISK_SYMBOL - // 1. * is used in path name: *a/b,a*/b, a/*b, a/b* - if (patternIndex > 0 && pattern.charAt(patternIndex - 1) != Path.SEPARATOR_CHAR - || patternIndex + 1 < pattern.length() && pattern.charAt(patternIndex + 1) != Path.SEPARATOR_CHAR) { - if (ASTERISK_SYMBOL != pathName.charAt(pathIndex)) { - return false; - } - - pathIndex++; - patternIndex++; - continue; - } - - // 2. * is used as wildcard: */a, a/*/b, a/* - patternIndex++; - // find next path separator - while (pathIndex < pathName.length() && pathName.charAt(pathIndex) != Path.SEPARATOR_CHAR) { - pathIndex++; - } - } - - // Ensure it is not a file/dir which shares same prefix as pattern - // Eg: pattern: /A/B, pathName: /A/BBB should not match - return patternIndex == pattern.length() - && (pathIndex == pathName.length() || pathName.charAt(pathIndex) == Path.SEPARATOR_CHAR); - } - - /** - * Returns the file block size. This is a fake value used for integration - * of the Azure store with Hadoop. - */ - @Override - public long getHadoopBlockSize() { - return hadoopBlockSize; - } - - /** - * This should be called from any method that does any modifications to the - * underlying container: it makes sure to put the WASB current version in the - * container's metadata if it's not already there. - */ - private ContainerState checkContainer(ContainerAccessType accessType) - throws StorageException, AzureException { - synchronized (containerStateLock) { - if (isOkContainerState(accessType)) { - return currentKnownContainerState; - } - if (currentKnownContainerState == ContainerState.ExistsAtWrongVersion) { - String containerVersion = retrieveVersionAttribute(container); - throw wrongVersionException(containerVersion); - } - // This means I didn't check it before or it didn't exist or - // we need to stamp the version. Since things may have changed by - // other machines since then, do the check again and don't depend - // on past information. - - // Sanity check: we don't expect this at this point. - if (currentKnownContainerState == ContainerState.ExistsAtRightVersion) { - throw new AssertionError("Unexpected state: " - + currentKnownContainerState); - } - - // Download the attributes - doubles as an existence check with just - // one service call - try { - container.downloadAttributes(getInstrumentedContext()); - currentKnownContainerState = ContainerState.Unknown; - } catch (StorageException ex) { - if (StorageErrorCodeStrings.CONTAINER_NOT_FOUND.toString() - .equals(ex.getErrorCode())) { - currentKnownContainerState = ContainerState.DoesntExist; - } else { - throw ex; - } - } - - if (currentKnownContainerState == ContainerState.DoesntExist) { - // If the container doesn't exist and we intend to write to it, - // create it now. - if (needToCreateContainer(accessType)) { - storeVersionAttribute(container); - container.create(getInstrumentedContext()); - currentKnownContainerState = ContainerState.ExistsAtRightVersion; - } - } else { - // The container exists, check the version. - String containerVersion = retrieveVersionAttribute(container); - if (containerVersion != null) { - if (containerVersion.equals(FIRST_WASB_VERSION)) { - // It's the version from when WASB was called ASV, just - // fix the version attribute if needed and proceed. - // We should be good otherwise. - if (needToStampVersion(accessType)) { - storeVersionAttribute(container); - container.uploadMetadata(getInstrumentedContext()); - } - } else if (!containerVersion.equals(CURRENT_WASB_VERSION)) { - // Don't know this version - throw. - currentKnownContainerState = ContainerState.ExistsAtWrongVersion; - throw wrongVersionException(containerVersion); - } else { - // It's our correct version. - currentKnownContainerState = ContainerState.ExistsAtRightVersion; - } - } else { - // No version info exists. - currentKnownContainerState = ContainerState.ExistsNoVersion; - if (needToStampVersion(accessType)) { - // Need to stamp the version - storeVersionAttribute(container); - container.uploadMetadata(getInstrumentedContext()); - currentKnownContainerState = ContainerState.ExistsAtRightVersion; - } - } - } - return currentKnownContainerState; - } - } - - private AzureException wrongVersionException(String containerVersion) { - return new AzureException("The container " + container.getName() - + " is at an unsupported version: " + containerVersion - + ". Current supported version: " + FIRST_WASB_VERSION); - } - - private boolean needToStampVersion(ContainerAccessType accessType) { - // We need to stamp the version on the container any time we write to - // it and we have the correct credentials to be able to write container - // metadata. - return accessType != ContainerAccessType.PureRead - && canCreateOrModifyContainer; - } - - private static boolean needToCreateContainer(ContainerAccessType accessType) { - // We need to pro-actively create the container (if it doesn't exist) if - // we're doing a pure write. No need to create it for pure read or read- - // then-write access. - return accessType == ContainerAccessType.PureWrite; - } - - // Determines whether we have to pull the container information again - // or we can work based off what we already have. - private boolean isOkContainerState(ContainerAccessType accessType) { - switch (currentKnownContainerState) { - case Unknown: - // When using SAS, we can't discover container attributes - // so just live with Unknown state and fail later if it - // doesn't exist. - return connectingUsingSAS; - case DoesntExist: - return false; // the container could have been created - case ExistsAtRightVersion: - return true; // fine to optimize - case ExistsAtWrongVersion: - return false; - case ExistsNoVersion: - // If there's no version, it's OK if we don't need to stamp the version - // or we can't anyway even if we wanted to. - return !needToStampVersion(accessType); - default: - throw new AssertionError("Unknown access type: " + accessType); - } - } - - private boolean getUseTransactionalContentMD5() { - return sessionConfiguration.getBoolean(KEY_CHECK_BLOCK_MD5, true); - } - - private BlobRequestOptions getUploadOptions() { - BlobRequestOptions options = new BlobRequestOptions(); - options.setStoreBlobContentMD5(sessionConfiguration.getBoolean( - KEY_STORE_BLOB_MD5, false)); - options.setUseTransactionalContentMD5(getUseTransactionalContentMD5()); - options.setConcurrentRequestCount(concurrentWrites); - - options.setRetryPolicyFactory(new RetryExponentialRetry(minBackoff, - deltaBackoff, maxBackoff, maxRetries)); - - return options; - } - - private BlobRequestOptions getDownloadOptions() { - BlobRequestOptions options = new BlobRequestOptions(); - options.setRetryPolicyFactory( - new RetryExponentialRetry(minBackoff, deltaBackoff, maxBackoff, maxRetries)); - options.setUseTransactionalContentMD5(getUseTransactionalContentMD5()); - return options; - } - - @Override - public DataOutputStream storefile(String keyEncoded, - PermissionStatus permissionStatus, - String key) - throws AzureException { - try { - - // Check if a session exists, if not create a session with the - // Azure storage server. - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AzureException(errMsg); - } - - // Check if there is an authenticated account associated with the - // file this instance of the WASB file system. If not the file system - // has not been authenticated and all access is anonymous. - if (!isAuthenticatedAccess()) { - // Preemptively raise an exception indicating no uploads are - // allowed to anonymous accounts. - throw new AzureException(new IOException( - "Uploads to public accounts using anonymous " - + "access is prohibited.")); - } - - checkContainer(ContainerAccessType.PureWrite); - - /** - * Note: Windows Azure Blob Storage does not allow the creation of arbitrary directory - * paths under the default $root directory. This is by design to eliminate - * ambiguity in specifying a implicit blob address. A blob in the $root conatiner - * cannot include a / in its name and must be careful not to include a trailing - * '/' when referencing blobs in the $root container. - * A '/; in the $root container permits ambiguous blob names as in the following - * example involving two containers $root and mycontainer: - * http://myaccount.blob.core.windows.net/$root - * http://myaccount.blob.core.windows.net/mycontainer - * If the URL "mycontainer/somefile.txt were allowed in $root then the URL: - * http://myaccount.blob.core.windows.net/mycontainer/myblob.txt - * could mean either: - * (1) container=mycontainer; blob=myblob.txt - * (2) container=$root; blob=mycontainer/myblob.txt - * - * To avoid this type of ambiguity the Azure blob storage prevents - * arbitrary path under $root. For a simple and more consistent user - * experience it was decided to eliminate the opportunity for creating - * such paths by making the $root container read-only under WASB. - */ - - // Check that no attempt is made to write to blobs on default - // $root containers. - if (AZURE_ROOT_CONTAINER.equals(getContainerFromAuthority(sessionUri))) { - // Azure containers are restricted to non-root containers. - final String errMsg = String.format( - "Writes to '%s' container for URI '%s' are prohibited, " - + "only updates on non-root containers permitted.", - AZURE_ROOT_CONTAINER, sessionUri.toString()); - throw new AzureException(errMsg); - } - - // Get the blob reference from the store's container and - // return it. - CloudBlobWrapper blob = getBlobReference(keyEncoded); - storePermissionStatus(blob, permissionStatus); - - // Create the output stream for the Azure blob. - // - OutputStream outputStream; - - if (isBlockBlobWithCompactionKey(key)) { - BlockBlobAppendStream blockBlobOutputStream = new BlockBlobAppendStream( - (CloudBlockBlobWrapper) blob, - keyEncoded, - this.uploadBlockSizeBytes, - true, - getInstrumentedContext()); - - outputStream = blockBlobOutputStream; - } else { - outputStream = openOutputStream(blob); - } - - DataOutputStream dataOutStream = new SyncableDataOutputStream(outputStream); - return dataOutStream; - } catch (Exception e) { - // Caught exception while attempting to open the blob output stream. - // Re-throw as an Azure storage exception. - throw new AzureException(e); - } - } - - /** - * Opens a new output stream to the given blob (page or block blob) - * to populate it from scratch with data. - */ - private OutputStream openOutputStream(final CloudBlobWrapper blob) - throws StorageException { - if (blob instanceof CloudPageBlobWrapper){ - return new PageBlobOutputStream( - (CloudPageBlobWrapper) blob, getInstrumentedContext(), sessionConfiguration); - } else { - - // Handle both ClouldBlockBlobWrapperImpl and (only for the test code path) - // MockCloudBlockBlobWrapper. - return ((CloudBlockBlobWrapper) blob).openOutputStream(getUploadOptions(), - getInstrumentedContext()); - } - } - - /** - * Opens a new input stream for the given blob (page or block blob) - * to read its data. - */ - private InputStream openInputStream(CloudBlobWrapper blob, - Optional options) throws StorageException, IOException { - if (blob instanceof CloudBlockBlobWrapper) { - LOG.debug("Using stream seek algorithm {}", inputStreamVersion); - switch(inputStreamVersion) { - case 1: - return blob.openInputStream(getDownloadOptions(), - getInstrumentedContext(isConcurrentOOBAppendAllowed())); - case 2: - boolean bufferedPreadDisabled = options.map(c -> c - .getBoolean(FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE, false)) - .orElse(false); - return new BlockBlobInputStream((CloudBlockBlobWrapper) blob, - getDownloadOptions(), - getInstrumentedContext(isConcurrentOOBAppendAllowed()), - bufferedPreadDisabled); - default: - throw new IOException("Unknown seek algorithm: " + inputStreamVersion); - } - } else { - return new PageBlobInputStream( - (CloudPageBlobWrapper) blob, getInstrumentedContext( - isConcurrentOOBAppendAllowed())); - } - } - - /** - * Default permission to use when no permission metadata is found. - * - * @return The default permission to use. - */ - private static PermissionStatus defaultPermissionNoBlobMetadata() { - return new PermissionStatus("", "", FsPermission.getDefault()); - } - - private static void storeMetadataAttribute(CloudBlobWrapper blob, - String key, String value) { - HashMap metadata = blob.getMetadata(); - if (null == metadata) { - metadata = new HashMap(); - } - metadata.put(key, value); - blob.setMetadata(metadata); - } - - private String getMetadataAttribute(HashMap metadata, - String... keyAlternatives) { - if (null == metadata) { - return null; - } - for (String key : keyAlternatives) { - if (metadataKeyCaseSensitive) { - if (metadata.containsKey(key)) { - return metadata.get(key); - } - } else { - // See HADOOP-17643 for details on why this case insensitive metadata - // checks been added - for (Entry entry : metadata.entrySet()) { - if (key.equalsIgnoreCase(entry.getKey())) { - return entry.getValue(); - } - } - } - } - return null; - } - - private static void removeMetadataAttribute(CloudBlobWrapper blob, - String key) { - HashMap metadata = blob.getMetadata(); - if (metadata != null) { - metadata.remove(key); - blob.setMetadata(metadata); - } - } - - private static void storePermissionStatus(CloudBlobWrapper blob, - PermissionStatus permissionStatus) { - storeMetadataAttribute(blob, PERMISSION_METADATA_KEY, - PERMISSION_JSON_SERIALIZER.toJSON(permissionStatus)); - // Remove the old metadata key if present - removeMetadataAttribute(blob, OLD_PERMISSION_METADATA_KEY); - } - - private PermissionStatus getPermissionStatus(CloudBlobWrapper blob) { - String permissionMetadataValue = getMetadataAttribute(blob.getMetadata(), - PERMISSION_METADATA_KEY, OLD_PERMISSION_METADATA_KEY); - if (permissionMetadataValue != null) { - return PermissionStatusJsonSerializer.fromJSONString( - permissionMetadataValue); - } else { - return defaultPermissionNoBlobMetadata(); - } - } - - private static void storeFolderAttribute(CloudBlobWrapper blob) { - storeMetadataAttribute(blob, IS_FOLDER_METADATA_KEY, "true"); - // Remove the old metadata key if present - removeMetadataAttribute(blob, OLD_IS_FOLDER_METADATA_KEY); - } - - private static String encodeMetadataAttribute(String value) throws UnsupportedEncodingException { - // We have to URL encode the attribute as it could - // have URI special characters which unless encoded will result - // in 403 errors from the server. This is due to metadata properties - // being sent in the HTTP header of the request which is in turn used - // on the server side to authorize the request. - return value == null ? null : URLEncoder.encode(value, METADATA_ENCODING.name()); - } - - private static String decodeMetadataAttribute(String encoded) throws UnsupportedEncodingException { - return encoded == null ? null : URLDecoder.decode(encoded, METADATA_ENCODING.name()); - } - - private static String ensureValidAttributeName(String attribute) { - // Attribute names must be valid C# identifiers so we have to - // convert the namespace dots (e.g. "user.something") in the - // attribute names. Using underscores here to be consistent with - // the constant metadata keys defined earlier in the file - return attribute.replace('.', '_'); - } - - private static void storeLinkAttribute(CloudBlobWrapper blob, - String linkTarget) throws UnsupportedEncodingException { - String encodedLinkTarget = encodeMetadataAttribute(linkTarget); - storeMetadataAttribute(blob, - LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY, - encodedLinkTarget); - // Remove the old metadata key if present - removeMetadataAttribute(blob, - OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY); - } - - private String getLinkAttributeValue(CloudBlobWrapper blob) - throws UnsupportedEncodingException { - String encodedLinkTarget = getMetadataAttribute(blob.getMetadata(), - LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY, - OLD_LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY); - return decodeMetadataAttribute(encodedLinkTarget); - } - - private boolean retrieveFolderAttribute(CloudBlobWrapper blob) { - HashMap metadata = blob.getMetadata(); - if (null != metadata) { - if (metadataKeyCaseSensitive) { - return metadata.containsKey(IS_FOLDER_METADATA_KEY) - || metadata.containsKey(OLD_IS_FOLDER_METADATA_KEY); - } else { - // See HADOOP-17643 for details on why this case insensitive metadata - // checks been added - for (String key : metadata.keySet()) { - if (key.equalsIgnoreCase(IS_FOLDER_METADATA_KEY) - || key.equalsIgnoreCase(OLD_IS_FOLDER_METADATA_KEY)) { - return true; - } - } - } - } - return false; - } - - private static void storeVersionAttribute(CloudBlobContainerWrapper container) { - HashMap metadata = container.getMetadata(); - if (null == metadata) { - metadata = new HashMap(); - } - metadata.put(VERSION_METADATA_KEY, CURRENT_WASB_VERSION); - if (metadata.containsKey(OLD_VERSION_METADATA_KEY)) { - metadata.remove(OLD_VERSION_METADATA_KEY); - } - container.setMetadata(metadata); - } - - private String retrieveVersionAttribute(CloudBlobContainerWrapper container) { - return getMetadataAttribute(container.getMetadata(), VERSION_METADATA_KEY, - OLD_VERSION_METADATA_KEY); - } - - @Override - public void storeEmptyFolder(String key, PermissionStatus permissionStatus) - throws AzureException { - - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - - // Check if there is an authenticated account associated with the file - // this instance of the WASB file system. If not the file system has not - // been authenticated and all access is anonymous. - if (!isAuthenticatedAccess()) { - // Preemptively raise an exception indicating no uploads are - // allowed to anonymous accounts. - throw new AzureException( - "Uploads to to public accounts using anonymous access is prohibited."); - } - - try { - checkContainer(ContainerAccessType.PureWrite); - - CloudBlobWrapper blob = getBlobReference(key); - storePermissionStatus(blob, permissionStatus); - storeFolderAttribute(blob); - openOutputStream(blob).close(); - } catch (StorageException e) { - // Caught exception while attempting upload. Re-throw as an Azure - // storage exception. - throw new AzureException(e); - } catch (URISyntaxException e) { - throw new AzureException(e); - } catch (IOException e) { - Throwable t = e.getCause(); - if (t instanceof StorageException) { - StorageException se = (StorageException) t; - // If we got this exception, the blob should have already been created - if (!"LeaseIdMissing".equals(se.getErrorCode())) { - throw new AzureException(e); - } - } else { - throw new AzureException(e); - } - } - } - - /** - * Stores an empty blob that's linking to the temporary file where're we're - * uploading the initial data. - */ - @Override - public void storeEmptyLinkFile(String key, String tempBlobKey, - PermissionStatus permissionStatus) throws AzureException { - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - // Check if there is an authenticated account associated with the file - // this instance of the WASB file system. If not the file system has not - // been authenticated and all access is anonymous. - if (!isAuthenticatedAccess()) { - // Preemptively raise an exception indicating no uploads are - // allowed to anonymous accounts. - throw new AzureException( - "Uploads to to public accounts using anonymous access is prohibited."); - } - - try { - checkContainer(ContainerAccessType.PureWrite); - - CloudBlobWrapper blob = getBlobReference(key); - storePermissionStatus(blob, permissionStatus); - storeLinkAttribute(blob, tempBlobKey); - openOutputStream(blob).close(); - } catch (Exception e) { - // Caught exception while attempting upload. Re-throw as an Azure - // storage exception. - throw new AzureException(e); - } - } - - /** - * If the blob with the given key exists and has a link in its metadata to a - * temporary file (see storeEmptyLinkFile), this method returns the key to - * that temporary file. Otherwise, returns null. - */ - @Override - public String getLinkInFileMetadata(String key) throws AzureException { - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - - try { - checkContainer(ContainerAccessType.PureRead); - - CloudBlobWrapper blob = getBlobReference(key); - blob.downloadAttributes(getInstrumentedContext()); - return getLinkAttributeValue(blob); - } catch (Exception e) { - // Caught exception while attempting download. Re-throw as an Azure - // storage exception. - throw new AzureException(e); - } - } - - /** - * Private method to check for authenticated access. - * - * @ returns boolean -- true if access is credentialed and authenticated and - * false otherwise. - */ - private boolean isAuthenticatedAccess() throws AzureException { - - if (isAnonymousCredentials) { - // Access to this storage account is unauthenticated. - return false; - } - // Access is authenticated. - return true; - } - - /** - * This private method uses the root directory or the original container to - * list blobs under the directory or container depending on whether the - * original file system object was constructed with a short- or long-form URI. - * If the root directory is non-null the URI in the file constructor was in - * the long form. - * - * @param includeMetadata - * if set, the listed items will have their metadata populated - * already. - * @param useFlatBlobListing - * if set the list is flat, otherwise it is hierarchical. - * - * @returns blobItems : iterable collection of blob items. - * @throws URISyntaxException - * - */ - private Iterable listRootBlobs(boolean includeMetadata, - boolean useFlatBlobListing) throws StorageException, URISyntaxException { - return rootDirectory.listBlobs( - null, - useFlatBlobListing, - includeMetadata - ? EnumSet.of(BlobListingDetails.METADATA) - : EnumSet.noneOf(BlobListingDetails.class), - null, - getInstrumentedContext()); - } - - /** - * This private method uses the root directory or the original container to - * list blobs under the directory or container given a specified prefix for - * the directory depending on whether the original file system object was - * constructed with a short- or long-form URI. If the root directory is - * non-null the URI in the file constructor was in the long form. - * - * @param aPrefix - * : string name representing the prefix of containing blobs. - * @param includeMetadata - * if set, the listed items will have their metadata populated - * already. - * @param useFlatBlobListing - * if set the list is flat, otherwise it is hierarchical. - * - * @returns blobItems : iterable collection of blob items. - * @throws URISyntaxException - * - */ - private Iterable listRootBlobs(String aPrefix, boolean includeMetadata, - boolean useFlatBlobListing) throws StorageException, URISyntaxException { - - Iterable list = rootDirectory.listBlobs(aPrefix, - useFlatBlobListing, - includeMetadata - ? EnumSet.of(BlobListingDetails.METADATA) - : EnumSet.noneOf(BlobListingDetails.class), - null, - getInstrumentedContext()); - return list; - } - - /** - * This private method uses the root directory or the original container to - * list blobs under the directory or container given a specified prefix for - * the directory depending on whether the original file system object was - * constructed with a short- or long-form URI. It also uses the specified flat - * or hierarchical option, listing details options, request options, and - * operation context. - * - * @param aPrefix - * string name representing the prefix of containing blobs. - * @param useFlatBlobListing - * - the list is flat if true, or hierarchical otherwise. - * @param listingDetails - * - determine whether snapshots, metadata, committed/uncommitted - * data - * @param options - * - object specifying additional options for the request. null = - * default options - * @param opContext - * - context of the current operation - * @returns blobItems : iterable collection of blob items. - * @throws URISyntaxException - * - */ - private Iterable listRootBlobs(String aPrefix, boolean useFlatBlobListing, - EnumSet listingDetails, BlobRequestOptions options, - OperationContext opContext) throws StorageException, URISyntaxException { - - CloudBlobDirectoryWrapper directory = this.container.getDirectoryReference(aPrefix); - return directory.listBlobs( - null, - useFlatBlobListing, - listingDetails, - options, - opContext); - } - - /** - * This private method uses the root directory or the original container to - * get the block blob reference depending on whether the original file system - * object was constructed with a short- or long-form URI. If the root - * directory is non-null the URI in the file constructor was in the long form. - * - * @param aKey - * : a key used to query Azure for the block blob. - * @returns blob : a reference to the Azure block blob corresponding to the - * key. - * @throws URISyntaxException - * - */ - private CloudBlobWrapper getBlobReference(String aKey) - throws StorageException, URISyntaxException { - - CloudBlobWrapper blob = null; - if (isPageBlobKey(aKey)) { - blob = this.container.getPageBlobReference(aKey); - } else { - blob = this.container.getBlockBlobReference(aKey); - blob.setStreamMinimumReadSizeInBytes(downloadBlockSizeBytes); - blob.setWriteBlockSizeInBytes(uploadBlockSizeBytes); - } - - return blob; - } - - /** - * This private method normalizes the key by stripping the container name from - * the path and returns a path relative to the root directory of the - * container. - * - * @param keyUri - * - adjust this key to a path relative to the root directory - * - * @returns normKey - */ - private String normalizeKey(URI keyUri) { - String normKey; - - // Strip the container name from the path and return the path - // relative to the root directory of the container. - int parts = isStorageEmulator ? 4 : 3; - normKey = keyUri.getPath().split("/", parts)[(parts - 1)]; - - // Return the fixed key. - return normKey; - } - - /** - * This private method normalizes the key by stripping the container name from - * the path and returns a path relative to the root directory of the - * container. - * - * @param blob - * - adjust the key to this blob to a path relative to the root - * directory - * - * @returns normKey - */ - private String normalizeKey(CloudBlobWrapper blob) { - return normalizeKey(blob.getUri()); - } - - /** - * This private method normalizes the key by stripping the container name from - * the path and returns a path relative to the root directory of the - * container. - * - * @param directory - * - adjust the key to this directory to a path relative to the root - * directory - * - * @returns normKey - */ - private String normalizeKey(CloudBlobDirectoryWrapper directory) { - String dirKey = normalizeKey(directory.getUri()); - // Strip the last delimiter - if (dirKey.endsWith(PATH_DELIMITER)) { - dirKey = dirKey.substring(0, dirKey.length() - 1); - } - return dirKey; - } - - /** - * Default method to creates a new OperationContext for the Azure Storage - * operation that has listeners hooked to it that will update the metrics for - * this file system. This method does not bind to receive send request - * callbacks by default. - * - * @return The OperationContext object to use. - */ - private OperationContext getInstrumentedContext() { - // Default is to not bind to receive send callback events. - return getInstrumentedContext(false); - } - - /** - * Creates a new OperationContext for the Azure Storage operation that has - * listeners hooked to it that will update the metrics for this file system. - * - * @param bindConcurrentOOBIo - * - bind to intercept send request call backs to handle OOB I/O. - * - * @return The OperationContext object to use. - */ - private OperationContext getInstrumentedContext(boolean bindConcurrentOOBIo) { - - OperationContext operationContext = new OperationContext(); - - // Set User-Agent - operationContext.getSendingRequestEventHandler().addListener(new StorageEvent() { - @Override - public void eventOccurred(SendingRequestEvent eventArg) { - HttpURLConnection connection = (HttpURLConnection) eventArg.getConnectionObject(); - String userAgentInfo = String.format(Utility.LOCALE_US, "WASB/%s (%s) %s", - VersionInfo.getVersion(), userAgentId, BaseRequest.getUserAgent()); - connection.setRequestProperty(Constants.HeaderConstants.USER_AGENT, userAgentInfo); - } - }); - - if (selfThrottlingEnabled) { - SelfThrottlingIntercept.hook(operationContext, selfThrottlingReadFactor, - selfThrottlingWriteFactor); - } else if (autoThrottlingEnabled) { - ClientThrottlingIntercept.hook(operationContext); - } - - if (bandwidthGaugeUpdater != null) { - //bandwidthGaugeUpdater is null when we config to skip azure metrics - ResponseReceivedMetricUpdater.hook( - operationContext, - instrumentation, - bandwidthGaugeUpdater); - } - - // Bind operation context to receive send request callbacks on this operation. - // If reads concurrent to OOB writes are allowed, the interception will reset - // the conditional header on all Azure blob storage read requests. - if (bindConcurrentOOBIo) { - SendRequestIntercept.bind(operationContext); - } - - if (testHookOperationContext != null) { - operationContext = - testHookOperationContext.modifyOperationContext(operationContext); - } - - ErrorMetricUpdater.hook(operationContext, instrumentation); - - // Return the operation context. - return operationContext; - } - - @Override - public FileMetadata retrieveMetadata(String key) throws IOException { - - // Attempts to check status may occur before opening any streams so first, - // check if a session exists, if not create a session with the Azure storage - // server. - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - - LOG.debug("Retrieving metadata for {}", key); - - try { - if (checkContainer(ContainerAccessType.PureRead) == ContainerState.DoesntExist) { - // The container doesn't exist, so spare some service calls and just - // return null now. - return null; - } - - // Handle the degenerate cases where the key does not exist or the - // key is a container. - if (key.equals("/")) { - // The key refers to root directory of container. - // Set the modification time for root to zero. - return new FileMetadata(key, 0, defaultPermissionNoBlobMetadata(), - BlobMaterialization.Implicit, hadoopBlockSize); - } - - CloudBlobWrapper blob = getBlobReference(key); - - // Download attributes and return file metadata only if the blob - // exists. - if (null != blob && blob.exists(getInstrumentedContext())) { - - LOG.debug("Found {} as an explicit blob. Checking if it's a file or folder.", key); - - try { - // The blob exists, so capture the metadata from the blob - // properties. - blob.downloadAttributes(getInstrumentedContext()); - BlobProperties properties = blob.getProperties(); - - if (retrieveFolderAttribute(blob)) { - LOG.debug("{} is a folder blob.", key); - return new FileMetadata(key, properties.getLastModified().getTime(), - getPermissionStatus(blob), BlobMaterialization.Explicit, hadoopBlockSize); - } else { - - LOG.debug("{} is a normal blob.", key); - - return new FileMetadata( - key, // Always return denormalized key with metadata. - getDataLength(blob, properties), - properties.getLastModified().getTime(), - getPermissionStatus(blob), hadoopBlockSize); - } - } catch(StorageException e){ - if (!NativeAzureFileSystemHelper.isFileNotFoundException(e)) { - throw e; - } - } - } - - // There is no file with that key name, but maybe it is a folder. - // Query the underlying folder/container to list the blobs stored - // there under that key. - // - Iterable objects = - listRootBlobs( - key, - true, - EnumSet.of(BlobListingDetails.METADATA), - null, - getInstrumentedContext()); - - // Check if the directory/container has the blob items. - for (ListBlobItem blobItem : objects) { - if (blobItem instanceof CloudBlockBlobWrapper - || blobItem instanceof CloudPageBlobWrapper) { - LOG.debug("Found blob as a directory-using this file under it to infer its properties {}", - blobItem.getUri()); - - blob = (CloudBlobWrapper) blobItem; - // The key specifies a directory. Create a FileMetadata object which - // specifies as such. - BlobProperties properties = blob.getProperties(); - - return new FileMetadata(key, properties.getLastModified().getTime(), - getPermissionStatus(blob), BlobMaterialization.Implicit, hadoopBlockSize); - } - } - - // Return to caller with a null metadata object. - return null; - - } catch (Exception e) { - // Re-throw the exception as an Azure storage exception. - throw new AzureException(e); - } - } - - @Override - public byte[] retrieveAttribute(String key, String attribute) throws IOException { - try { - checkContainer(ContainerAccessType.PureRead); - CloudBlobWrapper blob = getBlobReference(key); - blob.downloadAttributes(getInstrumentedContext()); - - String value = getMetadataAttribute(blob.getMetadata(), - ensureValidAttributeName(attribute)); - value = decodeMetadataAttribute(value); - return value == null ? null : value.getBytes(METADATA_ENCODING); - } catch (Exception e) { - throw new AzureException(e); - } - } - - @Override - public void storeAttribute(String key, String attribute, byte[] value) throws IOException { - try { - checkContainer(ContainerAccessType.ReadThenWrite); - CloudBlobWrapper blob = getBlobReference(key); - blob.downloadAttributes(getInstrumentedContext()); - - String encodedValue = encodeMetadataAttribute(new String(value, METADATA_ENCODING)); - storeMetadataAttribute(blob, ensureValidAttributeName(attribute), encodedValue); - blob.uploadMetadata(getInstrumentedContext()); - } catch (Exception e) { - throw new AzureException(e); - } - } - - @Override - public InputStream retrieve(String key) throws AzureException, IOException { - return retrieve(key, 0); - } - - @Override - public InputStream retrieve(String key, long startByteOffset) - throws AzureException, IOException { - return retrieve(key, startByteOffset, Optional.empty()); - } - - @Override - public InputStream retrieve(String key, long startByteOffset, - Optional options) throws AzureException, IOException { - try { - // Check if a session exists, if not create a session with the - // Azure storage server. - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - checkContainer(ContainerAccessType.PureRead); - - InputStream inputStream = openInputStream(getBlobReference(key), options); - if (startByteOffset > 0) { - // Skip bytes and ignore return value. This is okay - // because if you try to skip too far you will be positioned - // at the end and reads will not return data. - inputStream.skip(startByteOffset); - } - return inputStream; - } catch (IOException e) { - throw e; - } catch (Exception e) { - // Re-throw as an Azure storage exception. - throw new AzureException(e); - } - } - - @Override - public FileMetadata[] list(String prefix, final int maxListingCount, - final int maxListingDepth) throws IOException { - return listInternal(prefix, maxListingCount, maxListingDepth); - } - - private FileMetadata[] listInternal(String prefix, final int maxListingCount, - final int maxListingDepth) - throws IOException { - try { - checkContainer(ContainerAccessType.PureRead); - - if (0 < prefix.length() && !prefix.endsWith(PATH_DELIMITER)) { - prefix += PATH_DELIMITER; - } - - // Enable flat listing option only if depth is unbounded and config - // KEY_ENABLE_FLAT_LISTING is enabled. - boolean enableFlatListing = false; - if (maxListingDepth < 0 && sessionConfiguration.getBoolean( - KEY_ENABLE_FLAT_LISTING, DEFAULT_ENABLE_FLAT_LISTING)) { - enableFlatListing = true; - } - - Iterable objects; - if (prefix.equals("/")) { - objects = listRootBlobs(true, enableFlatListing); - } else { - objects = listRootBlobs(prefix, true, enableFlatListing); - } - - HashMap fileMetadata = new HashMap<>(256); - - for (ListBlobItem blobItem : objects) { - // Check that the maximum listing count is not exhausted. - // - if (0 < maxListingCount - && fileMetadata.size() >= maxListingCount) { - break; - } - - if (blobItem instanceof CloudBlockBlobWrapper || blobItem instanceof CloudPageBlobWrapper) { - String blobKey = null; - CloudBlobWrapper blob = (CloudBlobWrapper) blobItem; - BlobProperties properties = blob.getProperties(); - - // Determine format of the blob name depending on whether an absolute - // path is being used or not. - blobKey = normalizeKey(blob); - - FileMetadata metadata; - if (retrieveFolderAttribute(blob)) { - metadata = new FileMetadata(blobKey, - properties.getLastModified().getTime(), - getPermissionStatus(blob), - BlobMaterialization.Explicit, - hadoopBlockSize); - } else { - metadata = new FileMetadata( - blobKey, - getDataLength(blob, properties), - properties.getLastModified().getTime(), - getPermissionStatus(blob), - hadoopBlockSize); - } - // Add the metadata but remove duplicates. Note that the azure - // storage java SDK returns two types of entries: CloudBlobWrappter - // and CloudDirectoryWrapper. In the case where WASB generated the - // data, there will be an empty blob for each "directory", and we will - // receive a CloudBlobWrapper. If there are also files within this - // "directory", we will also receive a CloudDirectoryWrapper. To - // complicate matters, the data may not be generated by WASB, in - // which case we may not have an empty blob for each "directory". - // So, sometimes we receive both a CloudBlobWrapper and a - // CloudDirectoryWrapper for each directory, and sometimes we receive - // one or the other but not both. We remove duplicates, but - // prefer CloudBlobWrapper over CloudDirectoryWrapper. - // Furthermore, it is very unfortunate that the list results are not - // ordered, and it is a partial list which uses continuation. So - // the HashMap is the best structure to remove the duplicates, despite - // its potential large size. - fileMetadata.put(blobKey, metadata); - - } else if (blobItem instanceof CloudBlobDirectoryWrapper) { - CloudBlobDirectoryWrapper directory = (CloudBlobDirectoryWrapper) blobItem; - // Determine format of directory name depending on whether an absolute - // path is being used or not. - // - String dirKey = normalizeKey(directory); - // Strip the last / - if (dirKey.endsWith(PATH_DELIMITER)) { - dirKey = dirKey.substring(0, dirKey.length() - 1); - } - - // Reached the targeted listing depth. Return metadata for the - // directory using default permissions. - // - // Note: Something smarter should be done about permissions. Maybe - // inherit the permissions of the first non-directory blob. - // Also, getting a proper value for last-modified is tricky. - FileMetadata directoryMetadata = new FileMetadata(dirKey, 0, - defaultPermissionNoBlobMetadata(), BlobMaterialization.Implicit, - hadoopBlockSize); - - // Add the directory metadata to the list only if it's not already - // there. See earlier note, we prefer CloudBlobWrapper over - // CloudDirectoryWrapper because it may have additional metadata ( - // properties and ACLs). - if (!fileMetadata.containsKey(dirKey)) { - fileMetadata.put(dirKey, directoryMetadata); - } - - if (!enableFlatListing) { - // Currently at a depth of one, decrement the listing depth for - // sub-directories. - buildUpList(directory, fileMetadata, maxListingCount, - maxListingDepth - 1); - } - } - } - return fileMetadata.values().toArray(new FileMetadata[fileMetadata.size()]); - } catch (Exception e) { - // Re-throw as an Azure storage exception. - // - throw new AzureException(e); - } - } - - /** - * Build up a metadata list of blobs in an Azure blob directory. This method - * uses a in-order first traversal of blob directory structures to maintain - * the sorted order of the blob names. - * - * @param aCloudBlobDirectory Azure blob directory - * @param metadataHashMap a map of file metadata objects for each - * non-directory blob. - * @param maxListingCount maximum length of the built up list. - */ - private void buildUpList(CloudBlobDirectoryWrapper aCloudBlobDirectory, - HashMap metadataHashMap, final int maxListingCount, - final int maxListingDepth) throws Exception { - - // Push the blob directory onto the stack. - // - AzureLinkedStack> dirIteratorStack = - new AzureLinkedStack>(); - - Iterable blobItems = aCloudBlobDirectory.listBlobs(null, - false, EnumSet.of(BlobListingDetails.METADATA), null, - getInstrumentedContext()); - Iterator blobItemIterator = blobItems.iterator(); - - if (0 == maxListingDepth || 0 == maxListingCount) { - // Recurrence depth and listing count are already exhausted. Return - // immediately. - return; - } - - // The directory listing depth is unbounded if the maximum listing depth - // is negative. - final boolean isUnboundedDepth = (maxListingDepth < 0); - - // Reset the current directory listing depth. - int listingDepth = 1; - - // Loop until all directories have been traversed in-order. Loop only - // the following conditions are satisfied: - // (1) The stack is not empty, and - // (2) maxListingCount > 0 implies that the number of items in the - // metadata list is less than the max listing count. - while (null != blobItemIterator - && (maxListingCount <= 0 || metadataHashMap.size() < maxListingCount)) { - while (blobItemIterator.hasNext()) { - // Check if the count of items on the list exhausts the maximum - // listing count. - // - if (0 < maxListingCount && metadataHashMap.size() >= maxListingCount) { - break; - } - - ListBlobItem blobItem = blobItemIterator.next(); - - // Add the file metadata to the list if this is not a blob - // directory item. - // - if (blobItem instanceof CloudBlockBlobWrapper || blobItem instanceof CloudPageBlobWrapper) { - String blobKey = null; - CloudBlobWrapper blob = (CloudBlobWrapper) blobItem; - BlobProperties properties = blob.getProperties(); - - // Determine format of the blob name depending on whether an absolute - // path is being used or not. - blobKey = normalizeKey(blob); - - FileMetadata metadata; - if (retrieveFolderAttribute(blob)) { - metadata = new FileMetadata(blobKey, - properties.getLastModified().getTime(), - getPermissionStatus(blob), - BlobMaterialization.Explicit, - hadoopBlockSize); - } else { - metadata = new FileMetadata( - blobKey, - getDataLength(blob, properties), - properties.getLastModified().getTime(), - getPermissionStatus(blob), - hadoopBlockSize); - } - - // Add the metadata but remove duplicates. Note that the azure - // storage java SDK returns two types of entries: CloudBlobWrappter - // and CloudDirectoryWrapper. In the case where WASB generated the - // data, there will be an empty blob for each "directory", and we will - // receive a CloudBlobWrapper. If there are also files within this - // "directory", we will also receive a CloudDirectoryWrapper. To - // complicate matters, the data may not be generated by WASB, in - // which case we may not have an empty blob for each "directory". - // So, sometimes we receive both a CloudBlobWrapper and a - // CloudDirectoryWrapper for each directory, and sometimes we receive - // one or the other but not both. We remove duplicates, but - // prefer CloudBlobWrapper over CloudDirectoryWrapper. - // Furthermore, it is very unfortunate that the list results are not - // ordered, and it is a partial list which uses continuation. So - // the HashMap is the best structure to remove the duplicates, despite - // its potential large size. - metadataHashMap.put(blobKey, metadata); - } else if (blobItem instanceof CloudBlobDirectoryWrapper) { - CloudBlobDirectoryWrapper directory = (CloudBlobDirectoryWrapper) blobItem; - - // This is a directory blob, push the current iterator onto - // the stack of iterators and start iterating through the current - // directory. - if (isUnboundedDepth || maxListingDepth > listingDepth) { - // Push the current directory on the stack and increment the listing - // depth. - dirIteratorStack.push(blobItemIterator); - ++listingDepth; - - // The current blob item represents the new directory. Get - // an iterator for this directory and continue by iterating through - // this directory. - blobItems = directory.listBlobs(null, false, - EnumSet.noneOf(BlobListingDetails.class), null, - getInstrumentedContext()); - blobItemIterator = blobItems.iterator(); - } else { - // Determine format of directory name depending on whether an - // absolute path is being used or not. - String dirKey = normalizeKey(directory); - - // Add the directory metadata to the list only if it's not already - // there. See earlier note, we prefer CloudBlobWrapper over - // CloudDirectoryWrapper because it may have additional metadata ( - // properties and ACLs). - if (!metadataHashMap.containsKey(dirKey)) { - - // Reached the targeted listing depth. Return metadata for the - // directory using default permissions. - // - // Note: Something smarter should be done about permissions. Maybe - // inherit the permissions of the first non-directory blob. - // Also, getting a proper value for last-modified is tricky. - // - FileMetadata directoryMetadata = new FileMetadata(dirKey, - 0, - defaultPermissionNoBlobMetadata(), - BlobMaterialization.Implicit, - hadoopBlockSize); - - // Add the directory metadata to the list. - metadataHashMap.put(dirKey, directoryMetadata); - } - } - } - } - - // Traversal of directory tree - - // Check if the iterator stack is empty. If it is set the next blob - // iterator to null. This will act as a terminator for the for-loop. - // Otherwise pop the next iterator from the stack and continue looping. - // - if (dirIteratorStack.isEmpty()) { - blobItemIterator = null; - } else { - // Pop the next directory item from the stack and decrement the - // depth. - blobItemIterator = dirIteratorStack.pop(); - --listingDepth; - - // Assertion: Listing depth should not be less than zero. - if (listingDepth < 0) { - throw new AssertionError("Non-negative listing depth expected"); - } - } - } - } - - /** - * Return the actual data length of the blob with the specified properties. - * If it is a page blob, you can't rely on the length from the properties - * argument and you must get it from the file. Otherwise, you can. - */ - private long getDataLength(CloudBlobWrapper blob, BlobProperties properties) - throws AzureException { - if (blob instanceof CloudPageBlobWrapper) { - try { - return PageBlobInputStream.getPageBlobDataSize((CloudPageBlobWrapper) blob, - getInstrumentedContext( - isConcurrentOOBAppendAllowed())); - } catch (Exception e) { - throw new AzureException( - "Unexpected exception getting page blob actual data size.", e); - } - } - return properties.getLength(); - } - - /** - * Deletes the given blob, taking special care that if we get a - * blob-not-found exception upon retrying the operation, we just - * swallow the error since what most probably happened is that - * the first operation succeeded on the server. - * @param blob The blob to delete. - * @param lease Azure blob lease, or null if no lease is to be used. - * @throws StorageException - */ - private void safeDelete(CloudBlobWrapper blob, SelfRenewingLease lease) throws StorageException { - OperationContext operationContext = getInstrumentedContext(); - try { - blob.delete(operationContext, lease); - } catch (StorageException e) { - if (!NativeAzureFileSystemHelper.isFileNotFoundException(e)) { - LOG.error("Encountered Storage Exception for delete on Blob: {}" - + ", Exception Details: {} Error Code: {}", - blob.getUri(), e.getMessage(), e.getErrorCode()); - } - // On exception, check that if: - // 1. It's a BlobNotFound exception AND - // 2. It got there after one-or-more retries THEN - // we swallow the exception. - if (e.getErrorCode() != null - && "BlobNotFound".equals(e.getErrorCode()) - && operationContext.getRequestResults().size() > 1 - && operationContext.getRequestResults().get(0).getException() != null) { - LOG.debug("Swallowing delete exception on retry: {}", e.getMessage()); - return; - } else { - throw e; - } - } finally { - if (lease != null) { - lease.free(); - } - } - } - - /** - * API implementation to delete a blob in the back end azure storage. - */ - @Override - public boolean delete(String key, SelfRenewingLease lease) throws IOException { - try { - if (checkContainer(ContainerAccessType.ReadThenWrite) == ContainerState.DoesntExist) { - // Container doesn't exist, no need to do anything - return true; - } - // Get the blob reference and delete it. - CloudBlobWrapper blob = getBlobReference(key); - safeDelete(blob, lease); - return true; - } catch (Exception e) { - if (e instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException( - (StorageException) e)) { - // the file or directory does not exist - return false; - } - throw new AzureException(e); - } - } - - /** - * API implementation to delete a blob in the back end azure storage. - */ - @Override - public boolean delete(String key) throws IOException { - try { - return delete(key, null); - } catch (IOException e) { - Throwable t = e.getCause(); - if (t instanceof StorageException) { - StorageException se = (StorageException) t; - if ("LeaseIdMissing".equals(se.getErrorCode())){ - SelfRenewingLease lease = null; - try { - lease = acquireLease(key); - return delete(key, lease); - } catch (AzureException e3) { - LOG.warn("Got unexpected exception trying to acquire lease on " - + key + "." + e3.getMessage()); - throw e3; - } finally { - try { - if (lease != null){ - lease.free(); - } - } catch (Exception e4){ - LOG.error("Unable to free lease on " + key, e4); - } - } - } else { - throw e; - } - } else { - throw e; - } - } - } - - @Override - public void rename(String srcKey, String dstKey) throws IOException { - rename(srcKey, dstKey, false, null, true); - } - - @Override - public void rename(String srcKey, String dstKey, boolean acquireLease, - SelfRenewingLease existingLease) throws IOException { - rename(srcKey, dstKey, acquireLease, existingLease, true); - } - - @Override - public void rename(String srcKey, String dstKey, boolean acquireLease, - SelfRenewingLease existingLease, boolean overwriteDestination) throws IOException { - - LOG.debug("Moving {} to {}", srcKey, dstKey); - - if (acquireLease && existingLease != null) { - throw new IOException("Cannot acquire new lease if one already exists."); - } - - CloudBlobWrapper srcBlob = null; - CloudBlobWrapper dstBlob = null; - SelfRenewingLease lease = null; - try { - // Attempts rename may occur before opening any streams so first, - // check if a session exists, if not create a session with the Azure - // storage server. - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - - checkContainer(ContainerAccessType.ReadThenWrite); - // Get the source blob and assert its existence. If the source key - // needs to be normalized then normalize it. - // - - srcBlob = getBlobReference(srcKey); - if (!srcBlob.exists(getInstrumentedContext())) { - throw new AzureException("Source blob " + srcKey + " does not exist."); - } - - /** - * Conditionally get a lease on the source blob to prevent other writers - * from changing it. This is used for correctness in HBase when log files - * are renamed. It generally should do no harm other than take a little - * more time for other rename scenarios. When the HBase master renames a - * log file folder, the lease locks out other writers. This - * prevents a region server that the master thinks is dead, but is still - * alive, from committing additional updates. This is different than - * when HBase runs on HDFS, where the region server recovers the lease - * on a log file, to gain exclusive access to it, before it splits it. - */ - if (acquireLease) { - lease = srcBlob.acquireLease(); - } else if (existingLease != null) { - lease = existingLease; - } - - // Get the destination blob. The destination key always needs to be - // normalized. - // - dstBlob = getBlobReference(dstKey); - - // Rename the source blob to the destination blob by copying it to - // the destination blob then deleting it. - // - // Copy blob operation in Azure storage is very costly. It will be highly - // likely throttled during Azure storage gc. Short term fix will be using - // a more intensive exponential retry policy when the cluster is getting - // throttled. - try { - dstBlob.startCopyFromBlob(srcBlob, null, - getInstrumentedContext(), overwriteDestination); - } catch (StorageException se) { - if (se.getHttpStatusCode() == HttpURLConnection.HTTP_UNAVAILABLE) { - int copyBlobMinBackoff = sessionConfiguration.getInt( - KEY_COPYBLOB_MIN_BACKOFF_INTERVAL, - DEFAULT_COPYBLOB_MIN_BACKOFF_INTERVAL); - - int copyBlobMaxBackoff = sessionConfiguration.getInt( - KEY_COPYBLOB_MAX_BACKOFF_INTERVAL, - DEFAULT_COPYBLOB_MAX_BACKOFF_INTERVAL); - - int copyBlobDeltaBackoff = sessionConfiguration.getInt( - KEY_COPYBLOB_BACKOFF_INTERVAL, - DEFAULT_COPYBLOB_BACKOFF_INTERVAL); - - int copyBlobMaxRetries = sessionConfiguration.getInt( - KEY_COPYBLOB_MAX_IO_RETRIES, - DEFAULT_COPYBLOB_MAX_RETRY_ATTEMPTS); - - BlobRequestOptions options = new BlobRequestOptions(); - options.setRetryPolicyFactory(new RetryExponentialRetry( - copyBlobMinBackoff, copyBlobDeltaBackoff, copyBlobMaxBackoff, - copyBlobMaxRetries)); - dstBlob.startCopyFromBlob(srcBlob, options, - getInstrumentedContext(), overwriteDestination); - } else { - throw se; - } - } - waitForCopyToComplete(dstBlob, getInstrumentedContext()); - safeDelete(srcBlob, lease); - } catch (StorageException e) { - if (e.getHttpStatusCode() == HttpURLConnection.HTTP_UNAVAILABLE) { - LOG.warn("Rename: CopyBlob: StorageException: ServerBusy: Retry complete, will attempt client side copy for page blob"); - InputStream ipStream = null; - OutputStream opStream = null; - try { - if (srcBlob.getProperties().getBlobType() == BlobType.PAGE_BLOB){ - ipStream = openInputStream(srcBlob, Optional.empty()); - opStream = openOutputStream(dstBlob); - byte[] buffer = new byte[PageBlobFormatHelpers.PAGE_SIZE]; - int len; - while ((len = ipStream.read(buffer)) != -1) { - opStream.write(buffer, 0, len); - } - opStream.flush(); - opStream.close(); - ipStream.close(); - } else { - throw new AzureException(e); - } - safeDelete(srcBlob, lease); - } catch(StorageException se) { - LOG.warn("Rename: CopyBlob: StorageException: Failed"); - throw new AzureException(se); - } finally { - IOUtils.closeStream(ipStream); - IOUtils.closeStream(opStream); - } - } else { - throw new AzureException(e); - } - } catch (URISyntaxException e) { - // Re-throw exception as an Azure storage exception. - throw new AzureException(e); - } - } - - private void waitForCopyToComplete(CloudBlobWrapper blob, OperationContext opContext){ - boolean copyInProgress = true; - while (copyInProgress) { - try { - blob.downloadAttributes(opContext); - } - catch (StorageException se){ - } - - // test for null because mocked filesystem doesn't know about copystates yet. - copyInProgress = (blob.getCopyState() != null && blob.getCopyState().getStatus() == CopyStatus.PENDING); - if (copyInProgress) { - try { - Thread.sleep(1000); - } - catch (InterruptedException ie){ - //ignore - } - } - } - } - - /** - * Checks whether an explicit file/folder exists. - * This is used by redo of atomic rename. - * There was a bug(apache jira HADOOP-12780) during atomic rename if - * process crashes after an inner directory has been renamed but still - * there are file under that directory to be renamed then after the - * process comes again it tries to redo the renames. It checks whether - * the directory exists or not by calling filesystem.exist. - * But filesystem.Exists will treat that directory as implicit directory - * and return true as file exists under that directory. So It will try - * try to rename that directory and will fail as the corresponding blob - * does not exist. So this method explicitly checks for the blob. - */ - @Override - public boolean explicitFileExists(String key) throws AzureException { - CloudBlobWrapper blob; - try { - blob = getBlobReference(key); - if (null != blob && blob.exists(getInstrumentedContext())) { - return true; - } - - return false; - } catch (StorageException e) { - throw new AzureException(e); - } catch (URISyntaxException e) { - throw new AzureException(e); - } - } - - /** - * Changes the permission status on the given key. - */ - @Override - public void changePermissionStatus(String key, PermissionStatus newPermission) - throws AzureException { - try { - checkContainer(ContainerAccessType.ReadThenWrite); - CloudBlobWrapper blob = getBlobReference(key); - blob.downloadAttributes(getInstrumentedContext()); - storePermissionStatus(blob, newPermission); - blob.uploadMetadata(getInstrumentedContext()); - } catch (Exception e) { - throw new AzureException(e); - } - } - - @Override - public void purge(String prefix) throws IOException { - try { - - // Attempts to purge may occur before opening any streams so first, - // check if a session exists, if not create a session with the Azure - // storage server. - if (null == storageInteractionLayer) { - final String errMsg = String.format( - "Storage session expected for URI '%s' but does not exist.", - sessionUri); - throw new AssertionError(errMsg); - } - - if (checkContainer(ContainerAccessType.ReadThenWrite) == ContainerState.DoesntExist) { - // Container doesn't exist, no need to do anything. - return; - } - // Get all blob items with the given prefix from the container and delete - // them. - Iterable objects = listRootBlobs(prefix, false, false); - for (ListBlobItem blobItem : objects) { - ((CloudBlob) blobItem).delete(DeleteSnapshotsOption.NONE, null, null, - getInstrumentedContext()); - } - } catch (Exception e) { - // Re-throw as an Azure storage exception. - // - throw new AzureException(e); - } - } - - /** - * Get a lease on the blob identified by key. This lease will be renewed - * indefinitely by a background thread. - */ - @Override - public SelfRenewingLease acquireLease(String key) throws AzureException { - LOG.debug("acquiring lease on {}", key); - try { - checkContainer(ContainerAccessType.ReadThenWrite); - CloudBlobWrapper blob = getBlobReference(key); - return blob.acquireLease(); - } - catch (Exception e) { - - // Caught exception while attempting to get lease. Re-throw as an - // Azure storage exception. - throw new AzureException(e); - } - } - - @Override - public void updateFolderLastModifiedTime(String key, Date lastModified, - SelfRenewingLease folderLease) - throws AzureException { - try { - checkContainer(ContainerAccessType.ReadThenWrite); - CloudBlobWrapper blob = getBlobReference(key); - //setLastModified function is not available in 2.0.0 version. blob.uploadProperties automatically updates last modified - //timestamp to current time - blob.uploadProperties(getInstrumentedContext(), folderLease); - } catch (Exception e) { - - // Caught exception while attempting to update the properties. Re-throw as an - // Azure storage exception. - throw new AzureException(e); - } - } - - @Override - public void updateFolderLastModifiedTime(String key, - SelfRenewingLease folderLease) throws AzureException { - final Calendar lastModifiedCalendar = Calendar - .getInstance(Utility.LOCALE_US); - lastModifiedCalendar.setTimeZone(Utility.UTC_ZONE); - Date lastModified = lastModifiedCalendar.getTime(); - updateFolderLastModifiedTime(key, lastModified, folderLease); - } - - @Override - public void dump() throws IOException { - } - - @Override - public void close() { - if (bandwidthGaugeUpdater != null) { - bandwidthGaugeUpdater.close(); - bandwidthGaugeUpdater = null; - } - } - - // Finalizer to ensure complete shutdown - @Override - protected void finalize() throws Throwable { - LOG.debug("finalize() called"); - close(); - super.finalize(); - } - - @Override - public DataOutputStream retrieveAppendStream(String key, int bufferSize) throws IOException { - - try { - - if (isPageBlobKey(key)) { - throw new UnsupportedOperationException("Append not supported for Page Blobs"); - } - - CloudBlobWrapper blob = this.container.getBlockBlobReference(key); - - OutputStream outputStream; - - BlockBlobAppendStream blockBlobOutputStream = new BlockBlobAppendStream( - (CloudBlockBlobWrapper) blob, - key, - bufferSize, - isBlockBlobWithCompactionKey(key), - getInstrumentedContext()); - - outputStream = blockBlobOutputStream; - - DataOutputStream dataOutStream = new SyncableDataOutputStream( - outputStream); - - return dataOutStream; - } catch(Exception ex) { - throw new AzureException(ex); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlobMaterialization.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlobMaterialization.java deleted file mode 100644 index a1f82428b49e3..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlobMaterialization.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Indicates whether there are actual blobs indicating the existence of - * directories or whether we're inferring their existence from them having files - * in there. - */ -@InterfaceAudience.Private -enum BlobMaterialization { - /** - * Indicates a directory that isn't backed by an actual blob, but its - * existence is implied by the fact that there are files in there. For - * example, if the blob /a/b exists then it implies the existence of the /a - * directory if there's no /a blob indicating it. - */ - Implicit, - /** - * Indicates that the directory is backed by an actual blob that has the - * isFolder metadata on it. - */ - Explicit, -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlobOperationDescriptor.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlobOperationDescriptor.java deleted file mode 100644 index 6da64e124efd9..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlobOperationDescriptor.java +++ /dev/null @@ -1,222 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import com.microsoft.azure.storage.Constants.HeaderConstants; -import org.apache.hadoop.classification.InterfaceAudience; -import java.net.HttpURLConnection; -import java.net.URL; - -/** - * Determines the operation type (PutBlock, PutPage, GetBlob, etc) of Azure - * Storage operations. This is used by the handlers of the SendingRequestEvent - * and ResponseReceivedEvent exposed by the Azure Storage SDK to identify - * operation types (since the type of operation is not exposed by the SDK). - */ -@InterfaceAudience.Private -final class BlobOperationDescriptor { - - private BlobOperationDescriptor() { - // hide default constructor - } - - /** - * Gets the content length for the Azure Storage operation from the - * 'x-ms-range' header, if set. - * @param range the value of the 'x-ms-range' header. - * @return the content length, or zero if not set. - */ - private static long getContentLengthIfKnown(String range) { - long contentLength = 0; - // Format is "bytes=%d-%d" - if (range != null && range.startsWith("bytes=")) { - String[] offsets = range.substring("bytes=".length()).split("-"); - if (offsets.length == 2) { - contentLength = Long.parseLong(offsets[1]) - Long.parseLong(offsets[0]) - + 1; - } - } - return contentLength; - } - - /** - * Gets the content length for the Azure Storage operation, or returns zero if - * unknown. - * @param conn the connection object for the Azure Storage operation. - * @param operationType the Azure Storage operation type. - * @return the content length, or zero if unknown. - */ - static long getContentLengthIfKnown(HttpURLConnection conn, - OperationType operationType) { - long contentLength = 0; - switch (operationType) { - case AppendBlock: - case PutBlock: - String lengthString = conn.getRequestProperty( - HeaderConstants.CONTENT_LENGTH); - contentLength = (lengthString != null) - ? Long.parseLong(lengthString) - : 0; - break; - case PutPage: - case GetBlob: - contentLength = BlobOperationDescriptor.getContentLengthIfKnown( - conn.getRequestProperty("x-ms-range")); - break; - default: - break; - } - return contentLength; - } - - /** - * Gets the operation type of an Azure Storage operation. - * - * @param conn the connection object for the Azure Storage operation. - * @return the operation type. - */ - static OperationType getOperationType(HttpURLConnection conn) { - OperationType operationType = OperationType.Unknown; - String method = conn.getRequestMethod(); - String compValue = getQueryParameter(conn.getURL(), - "comp"); - - if (method.equalsIgnoreCase("PUT")) { - if (compValue != null) { - switch (compValue) { - case "metadata": - operationType = OperationType.SetMetadata; - break; - case "properties": - operationType = OperationType.SetProperties; - break; - case "block": - operationType = OperationType.PutBlock; - break; - case "page": - String pageWrite = conn.getRequestProperty("x-ms-page-write"); - if (pageWrite != null && pageWrite.equalsIgnoreCase( - "UPDATE")) { - operationType = OperationType.PutPage; - } - break; - case "appendblock": - operationType = OperationType.AppendBlock; - break; - case "blocklist": - operationType = OperationType.PutBlockList; - break; - default: - break; - } - } else { - String blobType = conn.getRequestProperty("x-ms-blob-type"); - if (blobType != null - && (blobType.equalsIgnoreCase("PageBlob") - || blobType.equalsIgnoreCase("BlockBlob") - || blobType.equalsIgnoreCase("AppendBlob"))) { - operationType = OperationType.CreateBlob; - } else if (blobType == null) { - String resType = getQueryParameter(conn.getURL(), - "restype"); - if (resType != null - && resType.equalsIgnoreCase("container")) { - operationType = operationType.CreateContainer; - } - } - } - } else if (method.equalsIgnoreCase("GET")) { - if (compValue != null) { - switch (compValue) { - case "list": - operationType = OperationType.ListBlobs; - break; - - case "metadata": - operationType = OperationType.GetMetadata; - break; - case "blocklist": - operationType = OperationType.GetBlockList; - break; - case "pagelist": - operationType = OperationType.GetPageList; - break; - default: - break; - } - } else if (conn.getRequestProperty("x-ms-range") != null) { - operationType = OperationType.GetBlob; - } - } else if (method.equalsIgnoreCase("HEAD")) { - operationType = OperationType.GetProperties; - } else if (method.equalsIgnoreCase("DELETE")) { - String resType = getQueryParameter(conn.getURL(), - "restype"); - if (resType != null - && resType.equalsIgnoreCase("container")) { - operationType = operationType.DeleteContainer; - } else { - operationType = OperationType.DeleteBlob; - } - } - return operationType; - } - - private static String getQueryParameter(URL url, String queryParameterName) { - String query = (url != null) ? url.getQuery(): null; - - if (query == null) { - return null; - } - - String searchValue = queryParameterName + "="; - - int offset = query.indexOf(searchValue); - String value = null; - if (offset != -1) { - int beginIndex = offset + searchValue.length(); - int endIndex = query.indexOf('&', beginIndex); - value = (endIndex == -1) - ? query.substring(beginIndex) - : query.substring(beginIndex, endIndex); - } - return value; - } - - @InterfaceAudience.Private - enum OperationType { - AppendBlock, - CreateBlob, - CreateContainer, - DeleteBlob, - DeleteContainer, - GetBlob, - GetBlockList, - GetMetadata, - GetPageList, - GetProperties, - ListBlobs, - PutBlock, - PutBlockList, - PutPage, - SetMetadata, - SetProperties, - Unknown - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java deleted file mode 100644 index c19bbe48fc3f6..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobAppendStream.java +++ /dev/null @@ -1,1141 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.UUID; -import java.util.Random; -import java.util.concurrent.ConcurrentLinkedDeque; -import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.Semaphore; -import java.util.concurrent.ThreadFactory; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; - -import org.apache.hadoop.fs.impl.StoreImplementationUtils; -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.util.Preconditions; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; -import org.apache.commons.lang3.StringUtils; - -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.commons.codec.binary.Base64; -import org.apache.hadoop.fs.StreamCapabilities; -import org.apache.hadoop.fs.Syncable; -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlockBlobWrapper; -import org.apache.hadoop.io.ElasticByteBufferPool; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.StorageErrorCodeStrings; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.BlockEntry; -import com.microsoft.azure.storage.blob.BlockListingFilter; -import com.microsoft.azure.storage.blob.BlockSearchMode; - -/** - * Stream object that implements append for Block Blobs in WASB. - * - * The stream object implements hflush/hsync and block compaction. Block - * compaction is the process of replacing a sequence of small blocks with one - * big block. Azure Block blobs supports up to 50000 blocks and every - * hflush/hsync generates one block. When the number of blocks is above 32000, - * the process of compaction decreases the total number of blocks, if possible. - * If compaction is disabled, hflush/hsync are empty functions. - * - * The stream object uses background threads for uploading the blocks and the - * block blob list. Blocks can be uploaded concurrently. However, when the block - * list is uploaded, block uploading should stop. If a block is uploaded before - * the block list and the block id is not in the list, the block will be lost. - * If the block is uploaded after the block list and the block id is in the - * list, the block list upload will fail. The exclusive access for the block - * list upload is managed by uploadingSemaphore. - */ - -public class BlockBlobAppendStream extends OutputStream implements Syncable, - StreamCapabilities { - - /** - * The name of the blob/file. - */ - private final String key; - - /** - * This variable tracks if this is new blob or existing one. - */ - private boolean blobExist; - - /** - * When the blob exist, to to prevent concurrent write we take a lease. - * Taking a lease is not necessary for new blobs. - */ - private SelfRenewingLease lease = null; - - /** - * The support for process of compaction is optional. - */ - private final boolean compactionEnabled; - - /** - * The number of blocks above each block compaction is triggered. - */ - private static final int DEFAULT_ACTIVATE_COMPACTION_BLOCK_COUNT = 32000; - - /** - * The number of blocks above each block compaction is triggered. - */ - private int activateCompactionBlockCount - = DEFAULT_ACTIVATE_COMPACTION_BLOCK_COUNT; - - /** - * The size of the output buffer. Writes store the data in outBuffer until - * either the size is above maxBlockSize or hflush/hsync is called. - */ - private final AtomicInteger maxBlockSize; - - /** - * The current buffer where writes are stored. - */ - private ByteBuffer outBuffer; - - /** - * The size of the blob that has been successfully stored in the Azure Blob - * service. - */ - private final AtomicLong committedBlobLength = new AtomicLong(0); - - /** - * Position of last block in the blob. - */ - private volatile long blobLength = 0; - - /** - * Minutes waiting before the close operation timed out. - */ - private static final int CLOSE_UPLOAD_DELAY = 10; - - /** - * Keep alive time for the threadpool. - */ - private static final int THREADPOOL_KEEP_ALIVE = 30; - /** - * Azure Block Blob used for the stream. - */ - private final CloudBlockBlobWrapper blob; - - /** - * Azure Storage operation context. - */ - private final OperationContext opContext; - - /** - * Commands send from client calls to the background thread pool. - */ - private abstract class UploadCommand { - - // the blob offset for the command - private final long commandBlobOffset; - - // command completion latch - private final CountDownLatch completed = new CountDownLatch(1); - - UploadCommand(long offset) { - this.commandBlobOffset = offset; - } - - long getCommandBlobOffset() { - return commandBlobOffset; - } - - void await() throws InterruptedException { - completed.await(); - } - - void awaitAsDependent() throws InterruptedException { - await(); - } - - void setCompleted() { - completed.countDown(); - } - - void execute() throws InterruptedException, IOException {} - - void dump() {} - } - - /** - * The list of recent commands. Before block list is committed, all the block - * listed in the list must be uploaded. activeBlockCommands is used for - * enumerating the blocks and waiting on the latch until the block is - * uploaded. - */ - private final ConcurrentLinkedQueue activeBlockCommands - = new ConcurrentLinkedQueue<>(); - - /** - * Variable to track if the stream has been closed. - */ - private volatile boolean closed = false; - - /** - * First IOException encountered. - */ - private final AtomicReference firstError - = new AtomicReference<>(); - - /** - * Flag set when the first error has been thrown. - */ - private boolean firstErrorThrown = false; - - /** - * Semaphore for serializing block uploads with NativeAzureFileSystem. - * - * The semaphore starts with number of permits equal to the number of block - * upload threads. Each block upload thread needs one permit to start the - * upload. The put block list acquires all the permits before the block list - * is committed. - */ - private final Semaphore uploadingSemaphore = new Semaphore( - MAX_NUMBER_THREADS_IN_THREAD_POOL, - true); - - /** - * Queue storing buffers with the size of the Azure block ready for - * reuse. The pool allows reusing the blocks instead of allocating new - * blocks. After the data is sent to the service, the buffer is returned - * back to the queue - */ - private final ElasticByteBufferPool poolReadyByteBuffers - = new ElasticByteBufferPool(); - - /** - * The blob's block list. - */ - private final List blockEntries = new ArrayList<>( - DEFAULT_CAPACITY_BLOCK_ENTRIES); - - private static final int DEFAULT_CAPACITY_BLOCK_ENTRIES = 1024; - - /** - * The uncommitted blob's block list. - */ - private final ConcurrentLinkedDeque uncommittedBlockEntries - = new ConcurrentLinkedDeque<>(); - - /** - * Variable to hold the next block id to be used for azure storage blocks. - */ - private static final int UNSET_BLOCKS_COUNT = -1; - private long nextBlockCount = UNSET_BLOCKS_COUNT; - - /** - * Variable to hold the block id prefix to be used for azure storage blocks. - */ - private String blockIdPrefix = null; - - /** - * Maximum number of threads in block upload thread pool. - */ - private static final int MAX_NUMBER_THREADS_IN_THREAD_POOL = 4; - - /** - * Number of times block upload needs is retried. - */ - private static final int MAX_BLOCK_UPLOAD_RETRIES = 3; - - /** - * Wait time between block upload retries in milliseconds. - */ - private static final int BLOCK_UPLOAD_RETRY_INTERVAL = 1000; - - /** - * Logger. - */ - private static final Logger LOG = - LoggerFactory.getLogger(BlockBlobAppendStream.class); - - /** - * The absolute maximum of blocks for a blob. It includes committed and - * temporary blocks. - */ - private static final int MAX_BLOCK_COUNT = 100000; - - /** - * The upload thread pool executor. - */ - private ThreadPoolExecutor ioThreadPool; - - /** - * Azure Storage access conditions for the blob. - */ - private final AccessCondition accessCondition = new AccessCondition(); - - /** - * Atomic integer to provide thread id for thread names for uploader threads. - */ - private final AtomicInteger threadSequenceNumber; - - /** - * Prefix to be used for thread names for uploader threads. - */ - private static final String THREAD_ID_PREFIX = "append-blockblob"; - - /** - * BlockBlobAppendStream constructor. - * - * @param blob - * Azure Block Blob - * @param aKey - * blob's name - * @param bufferSize - * the maximum size of a blob block. - * @param compactionEnabled - * is the compaction process enabled for this blob - * @param opContext - * Azure Store operation context for the blob - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream cannot be used for append operations - */ - public BlockBlobAppendStream(final CloudBlockBlobWrapper blob, - final String aKey, - final int bufferSize, - final boolean compactionEnabled, - final OperationContext opContext) - throws IOException { - - Preconditions.checkArgument(StringUtils.isNotEmpty(aKey)); - Preconditions.checkArgument(bufferSize >= 0); - - this.blob = blob; - this.opContext = opContext; - this.key = aKey; - this.maxBlockSize = new AtomicInteger(bufferSize); - this.threadSequenceNumber = new AtomicInteger(0); - this.blockIdPrefix = null; - this.compactionEnabled = compactionEnabled; - this.blobExist = true; - this.outBuffer = poolReadyByteBuffers.getBuffer(false, maxBlockSize.get()); - - try { - // download the block list - blockEntries.addAll( - blob.downloadBlockList( - BlockListingFilter.COMMITTED, - new BlobRequestOptions(), - opContext)); - - blobLength = blob.getProperties().getLength(); - - committedBlobLength.set(blobLength); - - // Acquiring lease on the blob. - lease = new SelfRenewingLease(blob, true); - accessCondition.setLeaseID(lease.getLeaseID()); - - } catch (StorageException ex) { - if (ex.getErrorCode().equals(StorageErrorCodeStrings.BLOB_NOT_FOUND)) { - blobExist = false; - } - else if (ex.getErrorCode().equals( - StorageErrorCodeStrings.LEASE_ALREADY_PRESENT)) { - throw new AzureException( - "Unable to set Append lease on the Blob: " + ex, ex); - } - else { - LOG.debug( - "Encountered storage exception." - + " StorageException : {} ErrorCode : {}", - ex, - ex.getErrorCode()); - throw new AzureException(ex); - } - } - - setBlocksCountAndBlockIdPrefix(blockEntries); - - this.ioThreadPool = new ThreadPoolExecutor( - MAX_NUMBER_THREADS_IN_THREAD_POOL, - MAX_NUMBER_THREADS_IN_THREAD_POOL, - THREADPOOL_KEEP_ALIVE, - TimeUnit.SECONDS, - new LinkedBlockingQueue<>(), - new UploaderThreadFactory()); - } - - /** - * Set payload size of the stream. - * It is intended to be used for unit testing purposes only. - */ - @VisibleForTesting - synchronized void setMaxBlockSize(int size) { - maxBlockSize.set(size); - - // it is for testing only so we can abandon the previously allocated - // payload - this.outBuffer = ByteBuffer.allocate(maxBlockSize.get()); - } - - /** - * Set compaction parameters. - * It is intended to be used for unit testing purposes only. - */ - @VisibleForTesting - void setCompactionBlockCount(int activationCount) { - activateCompactionBlockCount = activationCount; - } - - /** - * Get the list of block entries. It is used for testing purposes only. - * @return List of block entries. - */ - @VisibleForTesting - List getBlockList() throws StorageException, IOException { - return blob.downloadBlockList( - BlockListingFilter.COMMITTED, - new BlobRequestOptions(), - opContext); - } - - /** - * Writes the specified byte to this output stream. The general contract for - * write is that one byte is written to the output stream. The byte to be - * written is the eight low-order bits of the argument b. The 24 high-order - * bits of b are ignored. - * - * @param byteVal - * the byteValue to write. - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream has been closed. - */ - @Override - public void write(final int byteVal) throws IOException { - write(new byte[] { (byte) (byteVal & 0xFF) }); - } - - /** - * Writes length bytes from the specified byte array starting at offset to - * this output stream. - * - * @param data - * the byte array to write. - * @param offset - * the start offset in the data. - * @param length - * the number of bytes to write. - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream has been closed. - */ - @Override - public synchronized void write(final byte[] data, int offset, int length) - throws IOException { - Preconditions.checkArgument(data != null, "null data"); - - if (offset < 0 || length < 0 || length > data.length - offset) { - throw new IndexOutOfBoundsException(); - } - - if (closed) { - throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); - } - - while (outBuffer.remaining() < length) { - - int remaining = outBuffer.remaining(); - outBuffer.put(data, offset, remaining); - - // upload payload to azure storage - addBlockUploadCommand(); - - offset += remaining; - length -= remaining; - } - - outBuffer.put(data, offset, length); - } - - - /** - * Flushes this output stream and forces any buffered output bytes to be - * written out. If any data remains in the payload it is committed to the - * service. Data is queued for writing and forced out to the service - * before the call returns. - */ - @Override - public void flush() throws IOException { - - if (closed) { - // calling close() after the stream is closed starts with call to flush() - return; - } - - addBlockUploadCommand(); - - if (committedBlobLength.get() < blobLength) { - try { - // wait until the block list is committed - addFlushCommand().await(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } - } - } - - /** - * Force all data in the output stream to be written to Azure storage. - * Wait to return until this is complete. - */ - @Override - public void hsync() throws IOException { - // when block compaction is disabled, hsync is empty function - if (compactionEnabled) { - flush(); - } - } - - /** - * Force all data in the output stream to be written to Azure storage. - * Wait to return until this is complete. - */ - @Override - public void hflush() throws IOException { - // when block compaction is disabled, hflush is empty function - if (compactionEnabled) { - flush(); - } - } - - /** - * The Synchronization capabilities of this stream depend upon the compaction - * policy. - * @param capability string to query the stream support for. - * @return true for hsync and hflush when compaction is enabled. - */ - @Override - public boolean hasCapability(String capability) { - if (!compactionEnabled) { - return false; - } - return StoreImplementationUtils.isProbeForSyncable(capability); - } - - /** - * Force all data in the output stream to be written to Azure storage. - * Wait to return until this is complete. Close the access to the stream and - * shutdown the upload thread pool. - * If the blob was created, its lease will be released. - * Any error encountered caught in threads and stored will be rethrown here - * after cleanup. - */ - @Override - public synchronized void close() throws IOException { - - LOG.debug("close {} ", key); - - if (closed) { - return; - } - - // Upload the last block regardless of compactionEnabled flag - flush(); - - // Initiates an orderly shutdown in which previously submitted tasks are - // executed. - ioThreadPool.shutdown(); - - try { - // wait up to CLOSE_UPLOAD_DELAY minutes to upload all the blocks - if (!ioThreadPool.awaitTermination(CLOSE_UPLOAD_DELAY, TimeUnit.MINUTES)) { - LOG.error("Time out occurred while close() is waiting for IO request to" - + " finish in append" - + " for blob : {}", - key); - NativeAzureFileSystemHelper.logAllLiveStackTraces(); - throw new AzureException("Timed out waiting for IO requests to finish"); - } - } catch(InterruptedException ex) { - Thread.currentThread().interrupt(); - } - - // release the lease - if (firstError.get() == null && blobExist) { - try { - lease.free(); - } catch (StorageException ex) { - LOG.debug("Lease free update blob {} encountered Storage Exception:" - + " {} Error Code : {}", - key, - ex, - ex.getErrorCode()); - maybeSetFirstError(new AzureException(ex)); - } - } - - closed = true; - - // finally, throw the first exception raised if it has not - // been thrown elsewhere. - if (firstError.get() != null && !firstErrorThrown) { - throw firstError.get(); - } - } - - /** - * Helper method used to generate the blockIDs. The algorithm used is similar - * to the Azure storage SDK. - */ - private void setBlocksCountAndBlockIdPrefix(List blockEntries) { - - if (nextBlockCount == UNSET_BLOCKS_COUNT && blockIdPrefix == null) { - - Random sequenceGenerator = new Random(); - - String blockZeroBlockId = (!blockEntries.isEmpty()) - ? blockEntries.get(0).getId() - : ""; - String prefix = UUID.randomUUID().toString() + "-"; - String sampleNewerVersionBlockId = generateNewerVersionBlockId(prefix, - 0); - - if (!blockEntries.isEmpty() - && blockZeroBlockId.length() < sampleNewerVersionBlockId.length()) { - - // If blob has already been created with 2.2.0, append subsequent blocks - // with older version (2.2.0) blockId compute nextBlockCount, the way it - // was done before; and don't use blockIdPrefix - this.blockIdPrefix = ""; - nextBlockCount = (long) (sequenceGenerator.nextInt(Integer.MAX_VALUE)) - + sequenceGenerator.nextInt( - Integer.MAX_VALUE - MAX_BLOCK_COUNT); - nextBlockCount += blockEntries.size(); - - } else { - - // If there are no existing blocks, create the first block with newer - // version (4.2.0) blockId. If blob has already been created with 4.2.0, - // append subsequent blocks with newer version (4.2.0) blockId - this.blockIdPrefix = prefix; - nextBlockCount = blockEntries.size(); - } - } - } - - /** - * Helper method that generates the next block id for uploading a block to - * azure storage. - * @return String representing the block ID generated. - * @throws IOException if the stream is in invalid state - */ - private String generateBlockId() throws IOException { - - if (nextBlockCount == UNSET_BLOCKS_COUNT || blockIdPrefix == null) { - throw new AzureException( - "Append Stream in invalid state. nextBlockCount not set correctly"); - } - - return (!blockIdPrefix.isEmpty()) - ? generateNewerVersionBlockId(blockIdPrefix, nextBlockCount++) - : generateOlderVersionBlockId(nextBlockCount++); - } - - /** - * Helper method that generates an older (2.2.0) version blockId. - * @return String representing the block ID generated. - */ - private String generateOlderVersionBlockId(long id) { - - byte[] blockIdInBytes = new byte[8]; - for (int m = 0; m < 8; m++) { - blockIdInBytes[7 - m] = (byte) ((id >> (8 * m)) & 0xFF); - } - - return new String( - Base64.encodeBase64(blockIdInBytes), - StandardCharsets.UTF_8); - } - - /** - * Helper method that generates an newer (4.2.0) version blockId. - * @return String representing the block ID generated. - */ - private String generateNewerVersionBlockId(String prefix, long id) { - - String blockIdSuffix = String.format("%06d", id); - byte[] blockIdInBytes = - (prefix + blockIdSuffix).getBytes(StandardCharsets.UTF_8); - return new String(Base64.encodeBase64(blockIdInBytes), StandardCharsets.UTF_8); - } - - /** - * This is shared between upload block Runnable and CommitBlockList. The - * method captures retry logic - * @param blockId block name - * @param dataPayload block content - */ - private void writeBlockRequestInternal(String blockId, - ByteBuffer dataPayload, - boolean bufferPoolBuffer) { - IOException lastLocalException = null; - - int uploadRetryAttempts = 0; - while (uploadRetryAttempts < MAX_BLOCK_UPLOAD_RETRIES) { - try { - long startTime = System.nanoTime(); - - blob.uploadBlock(blockId, accessCondition, new ByteArrayInputStream( - dataPayload.array()), dataPayload.position(), - new BlobRequestOptions(), opContext); - - LOG.debug("upload block finished for {} ms. block {} ", - TimeUnit.NANOSECONDS.toMillis( - System.nanoTime() - startTime), blockId); - break; - - } catch(Exception ioe) { - LOG.debug("Encountered exception during uploading block for Blob {}" - + " Exception : {}", key, ioe); - uploadRetryAttempts++; - lastLocalException = new AzureException( - "Encountered Exception while uploading block: " + ioe, ioe); - try { - Thread.sleep( - BLOCK_UPLOAD_RETRY_INTERVAL * (uploadRetryAttempts + 1)); - } catch(InterruptedException ie) { - Thread.currentThread().interrupt(); - break; - } - } - } - - if (bufferPoolBuffer) { - poolReadyByteBuffers.putBuffer(dataPayload); - } - - if (uploadRetryAttempts == MAX_BLOCK_UPLOAD_RETRIES) { - maybeSetFirstError(lastLocalException); - } - } - - /** - * Set {@link #firstError} to the exception if it is not already set. - * @param exception exception to save - */ - private void maybeSetFirstError(IOException exception) { - firstError.compareAndSet(null, exception); - } - - - /** - * Throw the first error caught if it has not been raised already - * @throws IOException if one is caught and needs to be thrown. - */ - private void maybeThrowFirstError() throws IOException { - if (firstError.get() != null) { - firstErrorThrown = true; - throw firstError.get(); - } - } - - /** - * Write block list. The method captures retry logic - */ - private void writeBlockListRequestInternal() { - - IOException lastLocalException = null; - - int uploadRetryAttempts = 0; - while (uploadRetryAttempts < MAX_BLOCK_UPLOAD_RETRIES) { - try { - - long startTime = System.nanoTime(); - - blob.commitBlockList(blockEntries, accessCondition, - new BlobRequestOptions(), opContext); - - LOG.debug("Upload block list took {} ms for blob {} ", - TimeUnit.NANOSECONDS.toMillis( - System.nanoTime() - startTime), key); - break; - - } catch(Exception ioe) { - LOG.debug("Encountered exception during uploading block for Blob {}" - + " Exception : {}", key, ioe); - uploadRetryAttempts++; - lastLocalException = new AzureException( - "Encountered Exception while uploading block: " + ioe, ioe); - try { - Thread.sleep( - BLOCK_UPLOAD_RETRY_INTERVAL * (uploadRetryAttempts + 1)); - } catch(InterruptedException ie) { - Thread.currentThread().interrupt(); - break; - } - } - } - - if (uploadRetryAttempts == MAX_BLOCK_UPLOAD_RETRIES) { - maybeSetFirstError(lastLocalException); - } - } - - /** - * A ThreadFactory that creates uploader thread with - * meaningful names helpful for debugging purposes. - */ - class UploaderThreadFactory implements ThreadFactory { - - @Override - public Thread newThread(Runnable r) { - Thread t = new SubjectInheritingThread(r); - t.setName(String.format("%s-%d", THREAD_ID_PREFIX, - threadSequenceNumber.getAndIncrement())); - return t; - } - } - - /** - * Upload block commands. - */ - private class UploadBlockCommand extends UploadCommand { - - // the block content for upload - private final ByteBuffer payload; - - // description of the block - private final BlockEntry entry; - - UploadBlockCommand(String blockId, ByteBuffer payload) { - - super(blobLength); - - BlockEntry blockEntry = new BlockEntry(blockId); - blockEntry.setSize(payload.position()); - blockEntry.setSearchMode(BlockSearchMode.LATEST); - - this.payload = payload; - this.entry = blockEntry; - - uncommittedBlockEntries.add(blockEntry); - } - - /** - * Execute command. - */ - void execute() throws InterruptedException { - - uploadingSemaphore.acquire(1); - writeBlockRequestInternal(entry.getId(), payload, true); - uploadingSemaphore.release(1); - - } - - void dump() { - LOG.debug("upload block {} size: {} for blob {}", - entry.getId(), - entry.getSize(), - key); - } - } - - /** - * Upload blob block list commands. - */ - private class UploadBlockListCommand extends UploadCommand { - - private BlockEntry lastBlock = null; - - UploadBlockListCommand() { - super(blobLength); - - if (!uncommittedBlockEntries.isEmpty()) { - lastBlock = uncommittedBlockEntries.getLast(); - } - } - - void awaitAsDependent() throws InterruptedException { - // empty. later commit block does not need to wait previous commit block - // lists. - } - - void dump() { - LOG.debug("commit block list with {} blocks for blob {}", - uncommittedBlockEntries.size(), key); - } - - /** - * Execute command. - */ - public void execute() throws InterruptedException, IOException { - - if (committedBlobLength.get() >= getCommandBlobOffset()) { - LOG.debug("commit already applied for {}", key); - return; - } - - if (lastBlock == null) { - LOG.debug("nothing to commit for {}", key); - return; - } - - LOG.debug("active commands: {} for {}", activeBlockCommands.size(), key); - - for (UploadCommand activeCommand : activeBlockCommands) { - if (activeCommand.getCommandBlobOffset() < getCommandBlobOffset()) { - activeCommand.dump(); - activeCommand.awaitAsDependent(); - } else { - break; - } - } - - // stop all uploads until the block list is committed - uploadingSemaphore.acquire(MAX_NUMBER_THREADS_IN_THREAD_POOL); - - BlockEntry uncommittedBlock; - do { - uncommittedBlock = uncommittedBlockEntries.poll(); - blockEntries.add(uncommittedBlock); - } while (uncommittedBlock != lastBlock); - - if (blockEntries.size() > activateCompactionBlockCount) { - LOG.debug("Block compaction: activated with {} blocks for {}", - blockEntries.size(), key); - - // Block compaction - long startCompaction = System.nanoTime(); - blockCompaction(); - LOG.debug("Block compaction finished for {} ms with {} blocks for {}", - TimeUnit.NANOSECONDS.toMillis( - System.nanoTime() - startCompaction), - blockEntries.size(), key); - } - - writeBlockListRequestInternal(); - - uploadingSemaphore.release(MAX_NUMBER_THREADS_IN_THREAD_POOL); - - // remove blocks previous commands - for (Iterator it = activeBlockCommands.iterator(); - it.hasNext();) { - UploadCommand activeCommand = it.next(); - if (activeCommand.getCommandBlobOffset() <= getCommandBlobOffset()) { - it.remove(); - } else { - break; - } - } - - committedBlobLength.set(getCommandBlobOffset()); - } - - /** - * Internal output stream with read access to the internal buffer. - */ - private class ByteArrayOutputStreamInternal extends ByteArrayOutputStream { - - ByteArrayOutputStreamInternal(int size) { - super(size); - } - - byte[] getByteArray() { - return buf; - } - } - - /** - * Block compaction process. - * - * Block compaction is only enabled when the number of blocks exceeds - * activateCompactionBlockCount. The algorithm searches for the longest - * segment [b..e) where (e-b) > 2 && |b| + |b+1| ... |e-1| < maxBlockSize - * such that size(b1) + size(b2) + ... + size(bn) < maximum-block-size. - * It then downloads the blocks in the sequence, concatenates the data to - * form a single block, uploads this new block, and updates the block - * list to replace the sequence of blocks with the new block. - */ - private void blockCompaction() throws IOException { - //current segment [segmentBegin, segmentEnd) and file offset/size of the - // current segment - int segmentBegin = 0, segmentEnd = 0; - long segmentOffsetBegin = 0, segmentOffsetEnd = 0; - - //longest segment [maxSegmentBegin, maxSegmentEnd) and file offset/size of - // the longest segment - int maxSegmentBegin = 0, maxSegmentEnd = 0; - long maxSegmentOffsetBegin = 0, maxSegmentOffsetEnd = 0; - - for (BlockEntry block : blockEntries) { - segmentEnd++; - segmentOffsetEnd += block.getSize(); - if (segmentOffsetEnd - segmentOffsetBegin > maxBlockSize.get()) { - if (segmentEnd - segmentBegin > 2) { - if (maxSegmentEnd - maxSegmentBegin < segmentEnd - segmentBegin) { - maxSegmentBegin = segmentBegin; - maxSegmentEnd = segmentEnd; - maxSegmentOffsetBegin = segmentOffsetBegin; - maxSegmentOffsetEnd = segmentOffsetEnd - block.getSize(); - } - } - segmentBegin = segmentEnd - 1; - segmentOffsetBegin = segmentOffsetEnd - block.getSize(); - } - } - - if (maxSegmentEnd - maxSegmentBegin > 1) { - - LOG.debug("Block compaction: {} blocks for {}", - maxSegmentEnd - maxSegmentBegin, key); - - // download synchronously all the blocks from the azure storage - ByteArrayOutputStreamInternal blockOutputStream - = new ByteArrayOutputStreamInternal(maxBlockSize.get()); - - try { - long length = maxSegmentOffsetEnd - maxSegmentOffsetBegin; - blob.downloadRange(maxSegmentOffsetBegin, length, blockOutputStream, - new BlobRequestOptions(), opContext); - } catch(StorageException ex) { - LOG.error( - "Storage exception encountered during block compaction phase" - + " : {} Storage Exception : {} Error Code: {}", - key, ex, ex.getErrorCode()); - throw new AzureException( - "Encountered Exception while committing append blocks " + ex, ex); - } - - // upload synchronously new block to the azure storage - String blockId = generateBlockId(); - - ByteBuffer byteBuffer = ByteBuffer.wrap( - blockOutputStream.getByteArray()); - byteBuffer.position(blockOutputStream.size()); - - writeBlockRequestInternal(blockId, byteBuffer, false); - - // replace blocks from the longest segment with new block id - blockEntries.subList(maxSegmentBegin + 1, maxSegmentEnd - 1).clear(); - BlockEntry newBlock = blockEntries.get(maxSegmentBegin); - newBlock.setId(blockId); - newBlock.setSearchMode(BlockSearchMode.LATEST); - newBlock.setSize(maxSegmentOffsetEnd - maxSegmentOffsetBegin); - } - } - } - - /** - * Prepare block upload command and queue the command in thread pool executor. - */ - private synchronized void addBlockUploadCommand() throws IOException { - - maybeThrowFirstError(); - - if (blobExist && lease.isFreed()) { - throw new AzureException(String.format( - "Attempting to upload a block on blob : %s " - + " that does not have lease on the Blob. Failing upload", key)); - } - - int blockSize = outBuffer.position(); - if (blockSize > 0) { - UploadCommand command = new UploadBlockCommand(generateBlockId(), - outBuffer); - activeBlockCommands.add(command); - - blobLength += blockSize; - outBuffer = poolReadyByteBuffers.getBuffer(false, maxBlockSize.get()); - - ioThreadPool.execute(new WriteRequest(command)); - - } - } - - /** - * Prepare block list commit command and queue the command in thread pool - * executor. - */ - private synchronized UploadCommand addFlushCommand() throws IOException { - - maybeThrowFirstError(); - - if (blobExist && lease.isFreed()) { - throw new AzureException( - String.format("Attempting to upload block list on blob : %s" - + " that does not have lease on the Blob. Failing upload", key)); - } - - UploadCommand command = new UploadBlockListCommand(); - activeBlockCommands.add(command); - - ioThreadPool.execute(new WriteRequest(command)); - - return command; - } - - /** - * Runnable instance that uploads the block of data to azure storage. - */ - private class WriteRequest implements Runnable { - private final UploadCommand command; - - WriteRequest(UploadCommand command) { - this.command = command; - } - - @Override - public void run() { - - try { - command.dump(); - long startTime = System.nanoTime(); - command.execute(); - command.setCompleted(); - LOG.debug("command finished for {} ms", - TimeUnit.NANOSECONDS.toMillis( - System.nanoTime() - startTime)); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } catch (Exception ex) { - LOG.debug( - "Encountered exception during execution of command for Blob :" - + " {} Exception : {}", key, ex); - firstError.compareAndSet(null, new AzureException(ex)); - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java deleted file mode 100644 index 02ee8d6eac62c..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java +++ /dev/null @@ -1,482 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; - -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.BlobRequestOptions; - -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlockBlobWrapper; - -/** - * Encapsulates the BlobInputStream used by block blobs and adds support for - * random access and seek. Random access performance is improved by several - * orders of magnitude. - */ -final class BlockBlobInputStream extends FSInputStream { - private final CloudBlockBlobWrapper blob; - private final BlobRequestOptions options; - private final OperationContext opContext; - private final boolean bufferedPreadDisabled; - private InputStream blobInputStream = null; - private int minimumReadSizeInBytes = 0; - private long streamPositionAfterLastRead = -1; - // position of next network read within stream - private long streamPosition = 0; - // length of stream - private long streamLength = 0; - private boolean closed = false; - // internal buffer, re-used for performance optimization - private byte[] streamBuffer; - // zero-based offset within streamBuffer of current read position - private int streamBufferPosition; - // length of data written to streamBuffer, streamBuffer may be larger - private int streamBufferLength; - - /** - * Creates a seek-able stream for reading from block blobs. - * @param blob a block blob reference. - * @param options the blob request options. - * @param opContext the blob operation context. - * @throws IOException IO failure - */ - BlockBlobInputStream(CloudBlockBlobWrapper blob, BlobRequestOptions options, - OperationContext opContext, boolean bufferedPreadDisabled) - throws IOException { - this.blob = blob; - this.options = options; - this.opContext = opContext; - this.bufferedPreadDisabled = bufferedPreadDisabled; - - this.minimumReadSizeInBytes = blob.getStreamMinimumReadSizeInBytes(); - - try { - this.blobInputStream = blob.openInputStream(options, opContext); - } catch (StorageException e) { - throw new IOException(e); - } - - this.streamLength = blob.getProperties().getLength(); - } - - private void checkState() throws IOException { - if (closed) { - throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); - } - } - - /** - * Reset the internal stream buffer but do not release the memory. - * The buffer can be reused to avoid frequent memory allocations of - * a large buffer. - */ - private void resetStreamBuffer() { - streamBufferPosition = 0; - streamBufferLength = 0; - } - - /** - * Gets the read position of the stream. - * @return the zero-based byte offset of the read position. - * @throws IOException IO failure - */ - @Override - public synchronized long getPos() throws IOException { - checkState(); - return (streamBuffer != null) - ? streamPosition - streamBufferLength + streamBufferPosition - : streamPosition; - } - - /** - * Sets the read position of the stream. - * @param pos a zero-based byte offset in the stream. - * @throws EOFException if read is out of range - */ - @Override - public synchronized void seek(long pos) throws IOException { - checkState(); - if (pos < 0) { - throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK + " " + pos); - } - if (pos > streamLength) { - throw new EOFException( - FSExceptionMessages.CANNOT_SEEK_PAST_EOF + " " + pos); - } - - // calculate offset between the target and current position in the stream - long offset = pos - getPos(); - - if (offset == 0) { - // no=op, no state change - return; - } - - if (offset > 0) { - // forward seek, data can be skipped as an optimization - if (skip(offset) != offset) { - throw new EOFException(FSExceptionMessages.EOF_IN_READ_FULLY); - } - return; - } - - // reverse seek, offset is negative - if (streamBuffer != null) { - if (streamBufferPosition + offset >= 0) { - // target position is inside the stream buffer, - // only need to move backwards within the stream buffer - streamBufferPosition += offset; - } else { - // target position is outside the stream buffer, - // need to reset stream buffer and move position for next network read - resetStreamBuffer(); - streamPosition = pos; - } - } else { - streamPosition = pos; - } - - // close BlobInputStream after seek is invoked because BlobInputStream - // does not support seek - closeBlobInputStream(); - } - - /** - * Seeks an secondary copy of the data. This method is not supported. - * @param targetPos a zero-based byte offset in the stream. - * @return false - * @throws IOException IO failure - */ - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - - /** - * Gets the number of bytes that can be read (or skipped over) without - * performing a network operation. - * @throws IOException IO failure - */ - @Override - public synchronized int available() throws IOException { - checkState(); - if (blobInputStream != null) { - return blobInputStream.available(); - } else { - return (streamBuffer == null) - ? 0 - : streamBufferLength - streamBufferPosition; - } - } - - private void closeBlobInputStream() throws IOException { - if (blobInputStream != null) { - try { - blobInputStream.close(); - } finally { - blobInputStream = null; - } - } - } - - /** - * Closes this stream and releases any system resources associated with it. - * @throws IOException IO failure - */ - @Override - public synchronized void close() throws IOException { - closed = true; - closeBlobInputStream(); - streamBuffer = null; - streamBufferPosition = 0; - streamBufferLength = 0; - } - - private int doNetworkRead(byte[] buffer, int offset, int len) - throws IOException { - MemoryOutputStream outputStream; - boolean needToCopy = false; - - if (streamPositionAfterLastRead == streamPosition) { - // caller is reading sequentially, so initialize the stream buffer - if (streamBuffer == null) { - streamBuffer = new byte[(int) Math.min(minimumReadSizeInBytes, - streamLength)]; - } - resetStreamBuffer(); - outputStream = new MemoryOutputStream(streamBuffer, streamBufferPosition, - streamBuffer.length); - needToCopy = true; - } else { - outputStream = new MemoryOutputStream(buffer, offset, len); - } - - long bytesToRead = Math.min( - minimumReadSizeInBytes, - Math.min( - outputStream.capacity(), - streamLength - streamPosition)); - - try { - blob.downloadRange(streamPosition, bytesToRead, outputStream, options, - opContext); - } catch (StorageException e) { - throw new IOException(e); - } - - int bytesRead = outputStream.size(); - if (bytesRead > 0) { - streamPosition += bytesRead; - streamPositionAfterLastRead = streamPosition; - int count = Math.min(bytesRead, len); - if (needToCopy) { - streamBufferLength = bytesRead; - System.arraycopy(streamBuffer, streamBufferPosition, buffer, offset, - count); - streamBufferPosition += count; - } - return count; - } else { - // This may happen if the blob was modified after the length was obtained. - throw new EOFException("End of stream reached unexpectedly."); - } - } - - @Override - public int read(long position, byte[] buffer, int offset, int length) - throws IOException { - synchronized (this) { - checkState(); - } - if (!bufferedPreadDisabled) { - // This will do a seek + read in which the streamBuffer will get used. - return super.read(position, buffer, offset, length); - } - validatePositionedReadArgs(position, buffer, offset, length); - if (length == 0) { - return 0; - } - if (position >= streamLength) { - throw new EOFException("position is beyond stream capacity"); - } - MemoryOutputStream os = new MemoryOutputStream(buffer, offset, length); - long bytesToRead = Math.min(minimumReadSizeInBytes, - Math.min(os.capacity(), streamLength - position)); - try { - blob.downloadRange(position, bytesToRead, os, options, opContext); - } catch (StorageException e) { - throw new IOException(e); - } - int bytesRead = os.size(); - if (bytesRead == 0) { - // This may happen if the blob was modified after the length was obtained. - throw new EOFException("End of stream reached unexpectedly."); - } - return bytesRead; - } - - /** - * Reads up to len bytes of data from the input stream into an - * array of bytes. - * @param b a buffer into which the data is written. - * @param offset a start offset into {@code buffer} where the data is written. - * @param len the maximum number of bytes to be read. - * @return the number of bytes written into {@code buffer}, or -1. - * @throws IOException IO failure - */ - @Override - public synchronized int read(byte[] b, int offset, int len) - throws IOException { - checkState(); - NativeAzureFileSystemHelper.validateReadArgs(b, offset, len); - if (blobInputStream != null) { - int numberOfBytesRead = blobInputStream.read(b, offset, len); - streamPosition += numberOfBytesRead; - return numberOfBytesRead; - } else { - if (offset < 0 || len < 0 || len > b.length - offset) { - throw new IndexOutOfBoundsException("read arguments out of range"); - } - if (len == 0) { - return 0; - } - - int bytesRead = 0; - int available = available(); - if (available > 0) { - bytesRead = Math.min(available, len); - System.arraycopy(streamBuffer, streamBufferPosition, b, offset, - bytesRead); - streamBufferPosition += bytesRead; - } - - if (len == bytesRead) { - return len; - } - if (streamPosition >= streamLength) { - return (bytesRead > 0) ? bytesRead : -1; - } - - offset += bytesRead; - len -= bytesRead; - - return bytesRead + doNetworkRead(b, offset, len); - } - } - - /** - * Reads the next byte of data from the stream. - * @return the next byte of data, or -1 - * @throws IOException IO failure - */ - @Override - public int read() throws IOException { - byte[] buffer = new byte[1]; - int numberOfBytesRead = read(buffer, 0, 1); - return (numberOfBytesRead < 1) ? -1 : buffer[0]; - } - - /** - * Skips over and discards n bytes of data from this input stream. - * @param n the number of bytes to be skipped. - * @return the actual number of bytes skipped. - * @throws IOException IO failure - * @throws IndexOutOfBoundsException if n is negative or if the sum of n - * and the current value of getPos() is greater than the length of the stream. - */ - @Override - public synchronized long skip(long n) throws IOException { - checkState(); - - if (blobInputStream != null) { - // blobInput stream is open; delegate the work to it - long skipped = blobInputStream.skip(n); - // update position to the actual skip value - streamPosition += skipped; - return skipped; - } - - // no blob stream; implement the skip logic directly - if (n < 0 || n > streamLength - getPos()) { - throw new IndexOutOfBoundsException("skip range"); - } - - if (streamBuffer != null) { - // there's a buffer, so seek with it - if (n < streamBufferLength - streamBufferPosition) { - // new range is in the buffer, so just update the buffer position - // skip within the buffer. - streamBufferPosition += (int) n; - } else { - // skip is out of range, so move position to ne value and reset - // the buffer ready for the next read() - streamPosition = getPos() + n; - resetStreamBuffer(); - } - } else { - // no stream buffer; increment the stream position ready for - // the next triggered connection & read - streamPosition += n; - } - return n; - } - - /** - * An OutputStream backed by a user-supplied buffer. - */ - static class MemoryOutputStream extends OutputStream { - private final byte[] buffer; - private final int offset; - private final int length; - private int writePosition; - - /** - * Creates a MemoryOutputStream from a user-supplied buffer. - * @param buffer an array of bytes. - * @param offset a starting offset in buffer where the data - * will be written. - * @param length the maximum number of bytes to be written to the stream. - */ - MemoryOutputStream(byte[] buffer, int offset, int length) { - if (buffer == null) { - throw new NullPointerException("buffer"); - } - if (offset < 0 || length < 0 || length > buffer.length - offset) { - throw new IndexOutOfBoundsException("offset out of range of buffer"); - } - this.buffer = buffer; - this.offset = offset; - this.length = length; - this.writePosition = offset; - } - - /** - * Gets the current size of the stream. - */ - public synchronized int size() { - return writePosition - offset; - } - - /** - * Gets the current capacity of the stream. - */ - public synchronized int capacity() { - return length; - } - - /** - * Writes the next byte to the stream. - * @param b the byte to be written. - * @throws IOException IO failure - */ - public synchronized void write(int b) throws IOException { - if (size() > length - 1) { - throw new IOException("No space for more writes"); - } - buffer[writePosition++] = (byte) b; - } - - /** - * Writes a range of bytes to the stream. - * @param b a byte array. - * @param off the start offset in buffer from which the data - * is read. - * @param length the number of bytes to be written. - * @throws IOException IO failure - */ - public synchronized void write(byte[] b, int off, int length) - throws IOException { - if (b == null) { - throw new NullPointerException("Null buffer argument"); - } - if (off < 0 || length < 0 || length > b.length - off) { - throw new IndexOutOfBoundsException("array write offset"); - } - System.arraycopy(b, off, buffer, writePosition, length); - writePosition += length; - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java deleted file mode 100644 index 5fa0ad8ae34e8..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/CachingAuthorizer.java +++ /dev/null @@ -1,232 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; -import org.apache.hadoop.thirdparty.com.google.common.cache.Cache; -import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.thirdparty.com.google.common.cache.CacheBuilder; - -/** - * Class that provides caching for Authorize and getSasUri calls - * @param - The cache key type - * @param - The cached value type - */ -public class CachingAuthorizer { - - public static final Logger LOG = LoggerFactory - .getLogger(CachingAuthorizer.class); - - private Cache cache; - private boolean isEnabled = false; - private long cacheEntryExpiryPeriodInMinutes; - private String label; - - public static final String KEY_AUTH_SERVICE_CACHING_ENABLE = - "fs.azure.authorization.caching.enable"; - - public static final boolean KEY_AUTH_SERVICE_CACHING_ENABLE_DEFAULT = false; - - public static final String KEY_AUTH_SERVICE_CACHING_MAX_ENTRIES = - "fs.azure.authorization.caching.maxentries"; - - public static final int KEY_AUTH_SERVICE_CACHING_MAX_ENTRIES_DEFAULT = 512; - - public CachingAuthorizer(long ttlInMinutes, String label) { - cacheEntryExpiryPeriodInMinutes = ttlInMinutes; - this.label = label; - if (cacheEntryExpiryPeriodInMinutes <= 0) { - isEnabled = false; - } - } - - - public void init(Configuration conf) { - - isEnabled = conf.getBoolean(KEY_AUTH_SERVICE_CACHING_ENABLE, KEY_AUTH_SERVICE_CACHING_ENABLE_DEFAULT); - - if (isEnabled) { - LOG.debug("{} : Initializing CachingAuthorizer instance", label); - cache = CacheBuilder.newBuilder() - .maximumSize( - conf.getInt( - KEY_AUTH_SERVICE_CACHING_MAX_ENTRIES, - KEY_AUTH_SERVICE_CACHING_MAX_ENTRIES_DEFAULT - ) - ) - .expireAfterWrite(cacheEntryExpiryPeriodInMinutes, TimeUnit.MINUTES) - .build(); - } - } - - /** - * @param key - Cache key - * @return null on cache-miss. true/false on cache-hit - */ - public V get(K key) { - if (!isEnabled) { - return null; - } - - V result = cache.getIfPresent(key); - if (result == null) { - LOG.debug("{}: CACHE MISS: {}", label, key.toString()); - } - else { - LOG.debug("{}: CACHE HIT: {}, {}", label, key.toString(), result.toString()); - } - return result; - } - - public void put(K key, V value) { - if (isEnabled) { - LOG.debug("{}: CACHE PUT: {}, {}", label, key.toString(), value.toString()); - cache.put(key, value); - } - } - - public void clear() { - if (isEnabled) { - cache.invalidateAll(); - } - } -} - -/** - * POJO representing the cache key for authorization calls - */ -class CachedAuthorizerEntry { - - private String path; - private String accessType; - private String owner; - - CachedAuthorizerEntry(String path, String accessType, String owner) { - this.path = path; - this.accessType = accessType; - this.owner = owner; - } - - public String getPath() { - return path; - } - - public String getAccessType() { - return accessType; - } - - public String getOwner() { - return owner; - } - - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } - - if (o == null) { - return false; - } - - if (!(o instanceof CachedAuthorizerEntry)) { - return false; - } - - CachedAuthorizerEntry c = (CachedAuthorizerEntry) o; - return - this.getPath().equals(c.getPath()) - && this.getAccessType().equals(c.getAccessType()) - && this.getOwner().equals(c.getOwner()); - } - - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - @Override - public String toString() { - return path + ":" + accessType + ":" + owner; - } - -} - - -/** - * POJO representing the cache key for sas-key calls - */ -class CachedSASKeyEntry { - - private String storageAccount; - private String container; - private String path; - - CachedSASKeyEntry(String storageAccount, String container, String path) { - this.storageAccount = storageAccount; - this.container = container; - this.path = path; - } - - public String getStorageAccount() { - return storageAccount; - } - - public String getContainer() { - return container; - } - - public String getPath() { - return path; - } - - @Override - public boolean equals(Object o) { - if (o == this) { - return true; - } - - if (o == null) { - return false; - } - - if (!(o instanceof CachedSASKeyEntry)) { - return false; - } - - CachedSASKeyEntry c = (CachedSASKeyEntry) o; - return - this.getStorageAccount().equals(c.getStorageAccount()) - && this.getContainer().equals(c.getContainer()) - && this.getPath().equals(c.getPath()); - } - - @Override - public int hashCode() { - return this.toString().hashCode(); - } - - @Override - public String toString() { - return storageAccount + ":" + container + ":" + path; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java deleted file mode 100644 index 3c4803cd3df7a..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingAnalyzer.java +++ /dev/null @@ -1,284 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.util.Preconditions; -import org.apache.commons.lang3.StringUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.classification.InterfaceAudience; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; -import java.util.concurrent.atomic.AtomicReference; - -/** - * Throttles storage operations to minimize errors and maximum throughput. This - * improves throughput by as much as 35% when the service throttles requests due - * to exceeding account level ingress or egress limits. - */ -@InterfaceAudience.Private -class ClientThrottlingAnalyzer { - private static final Logger LOG = LoggerFactory.getLogger( - ClientThrottlingAnalyzer.class); - private static final int DEFAULT_ANALYSIS_PERIOD_MS = 10 * 1000; - private static final int MIN_ANALYSIS_PERIOD_MS = 1000; - private static final int MAX_ANALYSIS_PERIOD_MS = 30000; - private static final double MIN_ACCEPTABLE_ERROR_PERCENTAGE = .1; - private static final double MAX_EQUILIBRIUM_ERROR_PERCENTAGE = 1; - private static final double RAPID_SLEEP_DECREASE_FACTOR = .75; - private static final double RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS = 150 - * 1000; - private static final double SLEEP_DECREASE_FACTOR = .975; - private static final double SLEEP_INCREASE_FACTOR = 1.05; - private int analysisPeriodMs; - - private volatile int sleepDuration = 0; - private long consecutiveNoErrorCount = 0; - private String name = null; - private Timer timer = null; - private AtomicReference blobMetrics = null; - - private ClientThrottlingAnalyzer() { - // hide default constructor - } - - /** - * Creates an instance of the ClientThrottlingAnalyzer class with - * the specified name. - * - * @param name a name used to identify this instance. - * - * @throws IllegalArgumentException if name is null or empty. - */ - ClientThrottlingAnalyzer(String name) throws IllegalArgumentException { - this(name, DEFAULT_ANALYSIS_PERIOD_MS); - } - - /** - * Creates an instance of the ClientThrottlingAnalyzer class with - * the specified name and period. - * - * @param name A name used to identify this instance. - * - * @param period The frequency, in milliseconds, at which metrics are - * analyzed. - * - * @throws IllegalArgumentException - * If name is null or empty. - * If period is less than 1000 or greater than 30000 milliseconds. - */ - ClientThrottlingAnalyzer(String name, int period) - throws IllegalArgumentException { - Preconditions.checkArgument( - StringUtils.isNotEmpty(name), - "The argument 'name' cannot be null or empty."); - Preconditions.checkArgument( - period >= MIN_ANALYSIS_PERIOD_MS && period <= MAX_ANALYSIS_PERIOD_MS, - "The argument 'period' must be between 1000 and 30000."); - this.name = name; - this.analysisPeriodMs = period; - this.blobMetrics = new AtomicReference( - new BlobOperationMetrics(System.currentTimeMillis())); - this.timer = new Timer( - String.format("wasb-timer-client-throttling-analyzer-%s", name), true); - this.timer.schedule(new TimerTaskImpl(), - analysisPeriodMs, - analysisPeriodMs); - } - - /** - * Updates metrics with results from the current storage operation. - * - * @param count The count of bytes transferred. - * - * @param isFailedOperation True if the operation failed; otherwise false. - */ - public void addBytesTransferred(long count, boolean isFailedOperation) { - BlobOperationMetrics metrics = blobMetrics.get(); - if (isFailedOperation) { - metrics.bytesFailed.addAndGet(count); - metrics.operationsFailed.incrementAndGet(); - } else { - metrics.bytesSuccessful.addAndGet(count); - metrics.operationsSuccessful.incrementAndGet(); - } - } - - /** - * Suspends the current storage operation, as necessary, to reduce throughput. - */ - public void suspendIfNecessary() { - int duration = sleepDuration; - if (duration > 0) { - try { - Thread.sleep(duration); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } - } - } - - @VisibleForTesting - int getSleepDuration() { - return sleepDuration; - } - - private int analyzeMetricsAndUpdateSleepDuration(BlobOperationMetrics metrics, - int sleepDuration) { - final double percentageConversionFactor = 100; - double bytesFailed = metrics.bytesFailed.get(); - double bytesSuccessful = metrics.bytesSuccessful.get(); - double operationsFailed = metrics.operationsFailed.get(); - double operationsSuccessful = metrics.operationsSuccessful.get(); - double errorPercentage = (bytesFailed <= 0) - ? 0 - : percentageConversionFactor - * bytesFailed - / (bytesFailed + bytesSuccessful); - long periodMs = metrics.endTime - metrics.startTime; - - double newSleepDuration; - - if (errorPercentage < MIN_ACCEPTABLE_ERROR_PERCENTAGE) { - ++consecutiveNoErrorCount; - // Decrease sleepDuration in order to increase throughput. - double reductionFactor = - (consecutiveNoErrorCount * analysisPeriodMs - >= RAPID_SLEEP_DECREASE_TRANSITION_PERIOD_MS) - ? RAPID_SLEEP_DECREASE_FACTOR - : SLEEP_DECREASE_FACTOR; - - newSleepDuration = sleepDuration * reductionFactor; - } else if (errorPercentage < MAX_EQUILIBRIUM_ERROR_PERCENTAGE) { - // Do not modify sleepDuration in order to stabilize throughput. - newSleepDuration = sleepDuration; - } else { - // Increase sleepDuration in order to minimize error rate. - consecutiveNoErrorCount = 0; - - // Increase sleep duration in order to reduce throughput and error rate. - // First, calculate target throughput: bytesSuccessful / periodMs. - // Next, calculate time required to send *all* data (assuming next period - // is similar to previous) at the target throughput: (bytesSuccessful - // + bytesFailed) * periodMs / bytesSuccessful. Next, subtract periodMs to - // get the total additional delay needed. - double additionalDelayNeeded = 5 * analysisPeriodMs; - if (bytesSuccessful > 0) { - additionalDelayNeeded = (bytesSuccessful + bytesFailed) - * periodMs - / bytesSuccessful - - periodMs; - } - - // amortize the additional delay needed across the estimated number of - // requests during the next period - newSleepDuration = additionalDelayNeeded - / (operationsFailed + operationsSuccessful); - - final double maxSleepDuration = analysisPeriodMs; - final double minSleepDuration = sleepDuration * SLEEP_INCREASE_FACTOR; - - // Add 1 ms to avoid rounding down and to decrease proximity to the server - // side ingress/egress limit. Ensure that the new sleep duration is - // larger than the current one to more quickly reduce the number of - // errors. Don't allow the sleep duration to grow unbounded, after a - // certain point throttling won't help, for example, if there are far too - // many tasks/containers/nodes no amount of throttling will help. - newSleepDuration = Math.max(newSleepDuration, minSleepDuration) + 1; - newSleepDuration = Math.min(newSleepDuration, maxSleepDuration); - } - - if (LOG.isDebugEnabled()) { - LOG.debug(String.format( - "%5.5s, %10d, %10d, %10d, %10d, %6.2f, %5d, %5d, %5d", - name, - (int) bytesFailed, - (int) bytesSuccessful, - (int) operationsFailed, - (int) operationsSuccessful, - errorPercentage, - periodMs, - (int) sleepDuration, - (int) newSleepDuration)); - } - - return (int) newSleepDuration; - } - - /** - * Timer callback implementation for periodically analyzing metrics. - */ - class TimerTaskImpl extends TimerTask { - private AtomicInteger doingWork = new AtomicInteger(0); - - /** - * Periodically analyzes a snapshot of the blob storage metrics and updates - * the sleepDuration in order to appropriately throttle storage operations. - */ - @Override - public void run() { - boolean doWork = false; - try { - doWork = doingWork.compareAndSet(0, 1); - - // prevent concurrent execution of this task - if (!doWork) { - return; - } - - long now = System.currentTimeMillis(); - if (now - blobMetrics.get().startTime >= analysisPeriodMs) { - BlobOperationMetrics oldMetrics = blobMetrics.getAndSet( - new BlobOperationMetrics(now)); - oldMetrics.endTime = now; - sleepDuration = analyzeMetricsAndUpdateSleepDuration(oldMetrics, - sleepDuration); - } - } - finally { - if (doWork) { - doingWork.set(0); - } - } - } - } - - /** - * Stores blob operation metrics during each analysis period. - */ - static class BlobOperationMetrics { - private AtomicLong bytesFailed; - private AtomicLong bytesSuccessful; - private AtomicLong operationsFailed; - private AtomicLong operationsSuccessful; - private long endTime; - private long startTime; - - BlobOperationMetrics(long startTime) { - this.startTime = startTime; - this.bytesFailed = new AtomicLong(); - this.bytesSuccessful = new AtomicLong(); - this.operationsFailed = new AtomicLong(); - this.operationsSuccessful = new AtomicLong(); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingIntercept.java deleted file mode 100644 index 9da993bd2374c..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ClientThrottlingIntercept.java +++ /dev/null @@ -1,221 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import com.microsoft.azure.storage.ErrorReceivingResponseEvent; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RequestResult; -import com.microsoft.azure.storage.ResponseReceivedEvent; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.StorageEvent; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.classification.InterfaceAudience; - -import java.net.HttpURLConnection; - -/** - * Throttles Azure Storage read and write operations to achieve maximum - * throughput by minimizing errors. The errors occur when the account ingress - * or egress limits are exceeded and the server-side throttles requests. - * Server-side throttling causes the retry policy to be used, but the retry - * policy sleeps for long periods of time causing the total ingress or egress - * throughput to be as much as 35% lower than optimal. The retry policy is also - * after the fact, in that it applies after a request fails. On the other hand, - * the client-side throttling implemented here happens before requests are made - * and sleeps just enough to minimize errors, allowing optimal ingress and/or - * egress throughput. - */ -@InterfaceAudience.Private -final class ClientThrottlingIntercept { - private static final Logger LOG = LoggerFactory.getLogger( - ClientThrottlingIntercept.class); - private static ClientThrottlingIntercept singleton = null; - private ClientThrottlingAnalyzer readThrottler = null; - private ClientThrottlingAnalyzer writeThrottler = null; - - // Hide default constructor - private ClientThrottlingIntercept() { - readThrottler = new ClientThrottlingAnalyzer("read"); - writeThrottler = new ClientThrottlingAnalyzer("write"); - LOG.debug("Client-side throttling is enabled for the WASB file system."); - } - - static synchronized void initializeSingleton() { - if (singleton == null) { - singleton = new ClientThrottlingIntercept(); - } - } - - static void hook(OperationContext context) { - context.getErrorReceivingResponseEventHandler().addListener( - new ErrorReceivingResponseEventHandler()); - context.getSendingRequestEventHandler().addListener( - new SendingRequestEventHandler()); - context.getResponseReceivedEventHandler().addListener( - new ResponseReceivedEventHandler()); - } - - private static void updateMetrics(HttpURLConnection conn, - RequestResult result) { - BlobOperationDescriptor.OperationType operationType - = BlobOperationDescriptor.getOperationType(conn); - int status = result.getStatusCode(); - long contentLength = 0; - // If the socket is terminated prior to receiving a response, the HTTP - // status may be 0 or -1. A status less than 200 or greater than or equal - // to 500 is considered an error. - boolean isFailedOperation = (status < HttpURLConnection.HTTP_OK - || status >= java.net.HttpURLConnection.HTTP_INTERNAL_ERROR); - - switch (operationType) { - case AppendBlock: - case PutBlock: - case PutPage: - contentLength = BlobOperationDescriptor.getContentLengthIfKnown(conn, - operationType); - if (contentLength > 0) { - singleton.writeThrottler.addBytesTransferred(contentLength, - isFailedOperation); - } - break; - case GetBlob: - contentLength = BlobOperationDescriptor.getContentLengthIfKnown(conn, - operationType); - if (contentLength > 0) { - singleton.readThrottler.addBytesTransferred(contentLength, - isFailedOperation); - } - break; - default: - break; - } - } - - /** - * Called when a network error occurs before the HTTP status and response - * headers are received. Client-side throttling uses this to collect metrics. - * - * @param event The connection, operation, and request state. - */ - public static void errorReceivingResponse(ErrorReceivingResponseEvent event) { - updateMetrics((HttpURLConnection) event.getConnectionObject(), - event.getRequestResult()); - } - - /** - * Called before the Azure Storage SDK sends a request. Client-side throttling - * uses this to suspend the request, if necessary, to minimize errors and - * maximize throughput. - * - * @param event The connection, operation, and request state. - */ - public static void sendingRequest(SendingRequestEvent event) { - BlobOperationDescriptor.OperationType operationType - = BlobOperationDescriptor.getOperationType( - (HttpURLConnection) event.getConnectionObject()); - switch (operationType) { - case GetBlob: - singleton.readThrottler.suspendIfNecessary(); - break; - case AppendBlock: - case PutBlock: - case PutPage: - singleton.writeThrottler.suspendIfNecessary(); - break; - default: - break; - } - } - - /** - * Called after the Azure Storage SDK receives a response. Client-side - * throttling uses this to collect metrics. - * - * @param event The connection, operation, and request state. - */ - public static void responseReceived(ResponseReceivedEvent event) { - updateMetrics((HttpURLConnection) event.getConnectionObject(), - event.getRequestResult()); - } - - /** - * The ErrorReceivingResponseEvent is fired when the Azure Storage SDK - * encounters a network error before the HTTP status and response headers are - * received. - */ - @InterfaceAudience.Private - static class ErrorReceivingResponseEventHandler - extends StorageEvent { - - /** - * Called when a network error occurs before the HTTP status and response - * headers are received. Client-side throttling uses this to collect - * metrics. - * - * @param event The connection, operation, and request state. - */ - @Override - public void eventOccurred(ErrorReceivingResponseEvent event) { - singleton.errorReceivingResponse(event); - } - } - - /** - * The SendingRequestEvent is fired before the Azure Storage SDK sends a - * request. - */ - @InterfaceAudience.Private - static class SendingRequestEventHandler - extends StorageEvent { - - /** - * Called before the Azure Storage SDK sends a request. Client-side - * throttling uses this to suspend the request, if necessary, to minimize - * errors and maximize throughput. - * - * @param event The connection, operation, and request state. - */ - @Override - public void eventOccurred(SendingRequestEvent event) { - singleton.sendingRequest(event); - } - } - - /** - * The ResponseReceivedEvent is fired after the Azure Storage SDK receives a - * response. - */ - @InterfaceAudience.Private - static class ResponseReceivedEventHandler - extends StorageEvent { - - /** - * Called after the Azure Storage SDK receives a response. Client-side - * throttling uses this - * to collect metrics. - * - * @param event The connection, operation, and request state. - */ - @Override - public void eventOccurred(ResponseReceivedEvent event) { - singleton.responseReceived(event); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java deleted file mode 100644 index cbf3ab9616032..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/FileMetadata.java +++ /dev/null @@ -1,125 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.PermissionStatus; - -/** - *

- * Holds basic metadata for a file stored in a {@link NativeFileSystemStore}. - *

- */ -@InterfaceAudience.Private -class FileMetadata extends FileStatus { - // this is not final so that it can be cleared to save memory when not needed. - private String key; - private final BlobMaterialization blobMaterialization; - - /** - * Constructs a FileMetadata object for a file. - * - * @param key - * The key (path) to the file. - * @param length - * The length in bytes of the file. - * @param lastModified - * The last modified date (milliseconds since January 1, 1970 UTC.) - * @param permissionStatus - * The permission for the file. - * @param blockSize - * The Hadoop file block size. - */ - public FileMetadata(String key, long length, long lastModified, - PermissionStatus permissionStatus, final long blockSize) { - super(length, false, 1, blockSize, lastModified, 0, - permissionStatus.getPermission(), - permissionStatus.getUserName(), - permissionStatus.getGroupName(), - null); - this.key = key; - // Files are never implicit. - this.blobMaterialization = BlobMaterialization.Explicit; - } - - /** - * Constructs a FileMetadata object for a directory. - * - * @param key - * The key (path) to the directory. - * @param lastModified - * The last modified date (milliseconds since January 1, 1970 UTC.) - * @param permissionStatus - * The permission for the directory. - * @param blobMaterialization - * Whether this is an implicit (no real blob backing it) or explicit - * directory. - * @param blockSize - * The Hadoop file block size. - */ - public FileMetadata(String key, long lastModified, - PermissionStatus permissionStatus, BlobMaterialization blobMaterialization, - final long blockSize) { - super(0, true, 1, blockSize, lastModified, 0, - permissionStatus.getPermission(), - permissionStatus.getUserName(), - permissionStatus.getGroupName(), - null); - this.key = key; - this.blobMaterialization = blobMaterialization; - } - - @Override - public Path getPath() { - Path p = super.getPath(); - if (p == null) { - // Don't store this yet to reduce memory usage, as it will - // stay in the Eden Space and later we will update it - // with the full canonicalized path. - p = NativeAzureFileSystem.keyToPath(key); - } - return p; - } - - /** - * Returns the Azure storage key for the file. Used internally by the framework. - * - * @return The key for the file. - */ - public String getKey() { - return key; - } - - /** - * Indicates whether this is an implicit directory (no real blob backing it) - * or an explicit one. - * - * @return Implicit if this is an implicit directory, or Explicit if it's an - * explicit directory or a file. - */ - public BlobMaterialization getBlobMaterialization() { - return blobMaterialization; - } - - void removeKey() { - key = null; - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/KeyProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/KeyProvider.java deleted file mode 100644 index ed510f67cad5f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/KeyProvider.java +++ /dev/null @@ -1,44 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; - -/** - * The interface that every Azure file system key provider must implement. - */ -@InterfaceAudience.Private -public interface KeyProvider { - /** - * Key providers must implement this method. Given a list of configuration - * parameters for the specified Azure storage account, retrieve the plaintext - * storage account key. - * - * @param accountName - * the storage account name - * @param conf - * Hadoop configuration parameters - * @return the plaintext storage account key - * @throws KeyProviderException Thrown if there is a problem instantiating a - * KeyProvider or retrieving a key using a KeyProvider object. - */ - String getStorageAccountKey(String accountName, Configuration conf) - throws KeyProviderException; -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/KeyProviderException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/KeyProviderException.java deleted file mode 100644 index b65b2e6f42fe2..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/KeyProviderException.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Thrown if there is a problem instantiating a KeyProvider or retrieving a key - * using a KeyProvider object. - */ -@InterfaceAudience.Private -public class KeyProviderException extends Exception { - private static final long serialVersionUID = 1L; - - public KeyProviderException(String message) { - super(message); - } - - public KeyProviderException(String message, Throwable cause) { - super(message, cause); - } - - public KeyProviderException(Throwable t) { - super(t); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/LocalSASKeyGeneratorImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/LocalSASKeyGeneratorImpl.java deleted file mode 100644 index b573457cd1779..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/LocalSASKeyGeneratorImpl.java +++ /dev/null @@ -1,295 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.net.URI; -import java.net.URISyntaxException; -import java.security.InvalidKeyException; -import java.util.Calendar; -import java.util.Date; -import java.util.EnumSet; -import java.util.GregorianCalendar; -import java.util.HashMap; -import java.util.Map; -import java.util.StringTokenizer; -import java.util.TimeZone; - -import org.apache.hadoop.conf.Configuration; - -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.SharedAccessAccountPermissions; -import com.microsoft.azure.storage.SharedAccessAccountPolicy; -import com.microsoft.azure.storage.SharedAccessAccountResourceType; -import com.microsoft.azure.storage.SharedAccessAccountService; -import com.microsoft.azure.storage.StorageCredentialsAccountAndKey; -import com.microsoft.azure.storage.StorageCredentialsSharedAccessSignature; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.CloudBlobClient; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/*** - * Local SAS Key Generation implementation. This class resides in - * the same address space as the WASB driver. - * - * This class gets typically used for testing purposes. - * - */ - -public class LocalSASKeyGeneratorImpl extends SASKeyGeneratorImpl { - - public static final Logger LOG = LoggerFactory.getLogger( - LocalSASKeyGeneratorImpl.class); - - /** - * Map to cache CloudStorageAccount instances. - */ - private Map storageAccountMap; - private CachingAuthorizer cache; - private static final int HOURS_IN_DAY = 24; - - public LocalSASKeyGeneratorImpl(Configuration conf) { - super(conf); - storageAccountMap = new HashMap(); - cache = new CachingAuthorizer<>(getSasKeyExpiryPeriod(), "SASKEY"); - cache.init(conf); - } - - /** - * Implementation to generate SAS Key for a container - */ - @Override - public URI getContainerSASUri(String accountName, String container) - throws SASKeyGenerationException { - - LOG.debug("Retrieving Container SAS URI For {}@{}", container, accountName); - try { - - CachedSASKeyEntry cacheKey = new CachedSASKeyEntry(accountName, container, "/"); - URI cacheResult = cache.get(cacheKey); - if (cacheResult != null) { - return cacheResult; - } - - CloudStorageAccount account = - getSASKeyBasedStorageAccountInstance(accountName); - CloudBlobClient client = account.createCloudBlobClient(); - URI sasKey = client.getCredentials().transformUri( - client.getContainerReference(container).getUri()); - cache.put(cacheKey, sasKey); - return sasKey; - - } catch (StorageException stoEx) { - throw new SASKeyGenerationException("Encountered StorageException while" - + " generating SAS Key for container " + container + " inside " - + "storage account " + accountName, stoEx); - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException while" - + " generating SAS Key for container " + container + " inside storage" - + " account " + accountName, uriSyntaxEx); - } - } - - /** - * Helper method that creates a CloudStorageAccount instance based on - * SAS key for accountName - * - * @param accountName Storage Account Name - * @return CloudStorageAccount instance created using SAS key for - * the Storage Account. - * @throws SASKeyGenerationException - */ - private CloudStorageAccount getSASKeyBasedStorageAccountInstance( - String accountName) throws SASKeyGenerationException { - LOG.debug("Creating SAS key from account instance {}", accountName); - try { - - String accountNameWithoutDomain = - getAccountNameWithoutDomain(accountName); - - CloudStorageAccount account = - getStorageAccountInstance(accountNameWithoutDomain, - AzureNativeFileSystemStore.getAccountKeyFromConfiguration( - accountName, getConf())); - - return new CloudStorageAccount( - new StorageCredentialsSharedAccessSignature( - account.generateSharedAccessSignature( - getDefaultAccountAccessPolicy())), false, - account.getEndpointSuffix(), accountNameWithoutDomain); - - } catch (KeyProviderException keyProviderEx) { - throw new SASKeyGenerationException("Encountered KeyProviderException" - + " while retrieving Storage key from configuration for account " - + accountName, keyProviderEx); - } catch (InvalidKeyException invalidKeyEx) { - throw new SASKeyGenerationException("Encoutered InvalidKeyException " - + "while generating Account level SAS key for account" + accountName, - invalidKeyEx); - } catch(StorageException storeEx) { - throw new SASKeyGenerationException("Encoutered StorageException while " - + "generating Account level SAS key for account" + accountName, - storeEx); - } catch(URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException for" - + " account " + accountName, uriSyntaxEx); - } - } - - /** - * Implementation for generation of Relative Path Blob SAS Uri. - */ - @Override - public URI getRelativeBlobSASUri(String accountName, String container, - String relativePath) throws SASKeyGenerationException { - - CloudBlobContainer sc = null; - CloudBlobClient client = null; - CachedSASKeyEntry cacheKey = null; - - try { - - cacheKey = new CachedSASKeyEntry(accountName, container, relativePath); - URI cacheResult = cache.get(cacheKey); - if (cacheResult != null) { - return cacheResult; - } - - CloudStorageAccount account = - getSASKeyBasedStorageAccountInstance(accountName); - client = account.createCloudBlobClient(); - sc = client.getContainerReference(container); - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException " - + "while getting container references for container " + container - + " inside storage account : " + accountName, uriSyntaxEx); - } catch (StorageException stoEx) { - throw new SASKeyGenerationException("Encountered StorageException while " - + "getting container references for container " + container - + " inside storage account : " + accountName, stoEx); - } - - CloudBlockBlob blob = null; - try { - blob = sc.getBlockBlobReference(relativePath); - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException while " - + "getting Block Blob references for container " + container - + " inside storage account : " + accountName, uriSyntaxEx); - } catch (StorageException stoEx) { - throw new SASKeyGenerationException("Encountered StorageException while " - + "getting Block Blob references for container " + container - + " inside storage account : " + accountName, stoEx); - } - - try { - URI sasKey = client.getCredentials().transformUri(blob.getUri()); - cache.put(cacheKey, sasKey); - return sasKey; - } catch (StorageException stoEx) { - throw new SASKeyGenerationException("Encountered StorageException while " - + "generating SAS key for Blob: " + relativePath + " inside " - + "container : " + container + " in Storage Account : " + accountName, - stoEx); - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException " - + "while generating SAS key for Blob: " + relativePath + " inside " - + "container: " + container + " in Storage Account : " + accountName, - uriSyntaxEx); - } - } - - /** - * Helper method that creates CloudStorageAccount Instance using the - * storage account key. - * @param accountName Name of the storage account - * @param accountKey Storage Account key - * @return CloudStorageAccount instance for the storage account. - * @throws SASKeyGenerationException - */ - private CloudStorageAccount getStorageAccountInstance(String accountName, - String accountKey) throws SASKeyGenerationException { - - if (!storageAccountMap.containsKey(accountName)) { - if (accountKey == null || accountKey.isEmpty()) { - throw new SASKeyGenerationException( - "No key for Storage account " + accountName); - } - - CloudStorageAccount account = null; - try { - account = - new CloudStorageAccount(new StorageCredentialsAccountAndKey( - accountName, accountKey)); - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException " - + "for account " + accountName, uriSyntaxEx); - } - - storageAccountMap.put(accountName, account); - } - - return storageAccountMap.get(accountName); - } - - /** - * Helper method that returns the Storage account name without - * the domain name suffix. - * @param fullAccountName Storage account name with domain name suffix - * @return String - */ - private String getAccountNameWithoutDomain(String fullAccountName) { - StringTokenizer tokenizer = new StringTokenizer(fullAccountName, "."); - return tokenizer.nextToken(); - } - - /** - * Helper method to generate Access Policy for the Storage Account SAS Key - * @return SharedAccessAccountPolicy - */ - private SharedAccessAccountPolicy getDefaultAccountAccessPolicy() { - - SharedAccessAccountPolicy ap = - new SharedAccessAccountPolicy(); - - Calendar cal = new GregorianCalendar(TimeZone.getTimeZone("UTC")); - cal.setTime(new Date()); - cal.add(Calendar.HOUR, (int) getSasKeyExpiryPeriod() * HOURS_IN_DAY); - - ap.setSharedAccessExpiryTime(cal.getTime()); - ap.setPermissions(getDefaultAccoutSASKeyPermissions()); - ap.setResourceTypes(EnumSet.of(SharedAccessAccountResourceType.CONTAINER, - SharedAccessAccountResourceType.OBJECT)); - ap.setServices(EnumSet.of(SharedAccessAccountService.BLOB)); - - return ap; - } - - private EnumSet getDefaultAccoutSASKeyPermissions() { - return EnumSet.of(SharedAccessAccountPermissions.ADD, - SharedAccessAccountPermissions.CREATE, - SharedAccessAccountPermissions.DELETE, - SharedAccessAccountPermissions.LIST, - SharedAccessAccountPermissions.READ, - SharedAccessAccountPermissions.UPDATE, - SharedAccessAccountPermissions.WRITE); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java index 4e777da8b409f..36d732aeefc03 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java @@ -18,4039 +18,131 @@ package org.apache.hadoop.fs.azure; -import java.io.DataOutputStream; -import java.io.EOFException; -import java.io.FileNotFoundException; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.net.URI; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Date; -import java.util.EnumSet; -import java.util.TimeZone; -import java.util.UUID; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.Arrays; -import java.util.List; -import java.util.Optional; -import java.util.Stack; -import java.util.HashMap; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.core.JsonParser; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.BufferedFSInputStream; -import org.apache.hadoop.fs.CommonPathCapabilities; -import org.apache.hadoop.fs.CreateFlag; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.FileAlreadyExistsException; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PositionedReadable; -import org.apache.hadoop.fs.Seekable; -import org.apache.hadoop.fs.StreamCapabilities; -import org.apache.hadoop.fs.Syncable; -import org.apache.hadoop.fs.XAttrSetFlag; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemMetricsSystem; -import org.apache.hadoop.fs.azure.security.Constants; -import org.apache.hadoop.fs.azure.security.RemoteWasbDelegationTokenManager; -import org.apache.hadoop.fs.azure.security.WasbDelegationTokenManager; -import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl; -import org.apache.hadoop.fs.impl.OpenFileParameters; -import org.apache.hadoop.fs.impl.StoreImplementationUtils; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.permission.PermissionStatus; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.security.AccessControlException; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL; -import org.apache.hadoop.util.LambdaUtils; -import org.apache.hadoop.util.Progressable; -import org.apache.hadoop.util.Time; - -import org.apache.hadoop.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; -import static org.apache.hadoop.fs.azure.NativeAzureFileSystemHelper.*; -import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.hadoop.classification.VisibleForTesting; -import com.microsoft.azure.storage.StorageException; - -/** - * A {@link FileSystem} for reading and writing files stored on Windows Azure. This implementation is - * blob-based and stores files on Azure in their native form so they can be read - * by other Azure tools. - */ -@InterfaceAudience.Public -@InterfaceStability.Stable -public class NativeAzureFileSystem extends FileSystem { - private static final int USER_WX_PERMISION = 0300; - private static final String USER_HOME_DIR_PREFIX_DEFAULT = "/user"; - /** - * A description of a folder rename operation, including the source and - * destination keys, and descriptions of the files in the source folder. - */ - - public static class FolderRenamePending { - private SelfRenewingLease folderLease; - private String srcKey; - private String dstKey; - private FileMetadata[] fileMetadata = null; // descriptions of source files - private ArrayList fileStrings = null; - private NativeAzureFileSystem fs; - private static final int MAX_RENAME_PENDING_FILE_SIZE = 10000000; - private static final int FORMATTING_BUFFER = 10000; - private boolean committed; - public static final String SUFFIX = "-RenamePending.json"; - private static final ObjectReader READER = new ObjectMapper() - .configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true) - .readerFor(JsonNode.class); - - // Prepare in-memory information needed to do or redo a folder rename. - public FolderRenamePending(String srcKey, String dstKey, SelfRenewingLease lease, - NativeAzureFileSystem fs) throws IOException { - this.srcKey = srcKey; - this.dstKey = dstKey; - this.folderLease = lease; - this.fs = fs; - - // List all the files in the folder. - long start = Time.monotonicNow(); - fileMetadata = fs.getStoreInterface().list(srcKey, AZURE_LIST_ALL, - AZURE_UNBOUNDED_DEPTH); - - long end = Time.monotonicNow(); - LOG.debug("Time taken to list {} blobs for rename operation is: {} ms", fileMetadata.length, (end - start)); - - this.committed = true; - } - - // Prepare in-memory information needed to do or redo folder rename from - // a -RenamePending.json file read from storage. This constructor is to use during - // redo processing. - public FolderRenamePending(Path redoFile, NativeAzureFileSystem fs) - throws IllegalArgumentException, IOException { - - this.fs = fs; - - // open redo file - Path f = redoFile; - int l; - byte[] bytes; - try (FSDataInputStream input = fs.open(f)) { - bytes = new byte[MAX_RENAME_PENDING_FILE_SIZE]; - l = input.read(bytes); - } - if (l <= 0) { - // Jira HADOOP-12678 -Handle empty rename pending metadata file during - // atomic rename in redo path. If during renamepending file is created - // but not written yet, then this means that rename operation - // has not started yet. So we should delete rename pending metadata file. - LOG.error("Deleting empty rename pending file " - + redoFile + " -- no data available"); - deleteRenamePendingFile(fs, redoFile); - return; - } - if (l == MAX_RENAME_PENDING_FILE_SIZE) { - throw new IOException( - "Error reading pending rename file contents -- " - + "maximum file size exceeded"); - } - String contents = new String(bytes, 0, l, StandardCharsets.UTF_8); - - // parse the JSON - JsonNode json = null; - try { - json = READER.readValue(contents); - this.committed = true; - } catch (JsonMappingException e) { - - // The -RedoPending.json file is corrupted, so we assume it was - // not completely written - // and the redo operation did not commit. - this.committed = false; - } catch (JsonParseException e) { - this.committed = false; - } catch (IOException e) { - this.committed = false; - } - - if (!this.committed) { - LOG.error("Deleting corruped rename pending file {} \n {}", - redoFile, contents); - - // delete the -RenamePending.json file - deleteRenamePendingFile(fs, redoFile); - return; - } - - // initialize this object's fields - ArrayList fileStrList = new ArrayList(); - JsonNode oldFolderName = json.get("OldFolderName"); - JsonNode newFolderName = json.get("NewFolderName"); - if (oldFolderName == null || newFolderName == null) { - this.committed = false; - } else { - this.srcKey = oldFolderName.textValue(); - this.dstKey = newFolderName.textValue(); - if (this.srcKey == null || this.dstKey == null) { - this.committed = false; - } else { - JsonNode fileList = json.get("FileList"); - if (fileList == null) { - this.committed = false; - } else { - for (int i = 0; i < fileList.size(); i++) { - fileStrList.add(fileList.get(i).textValue()); - } - } - } - } - this.fileStrings = fileStrList; - } - - public FileMetadata[] getFiles() { - return fileMetadata; - } - - public SelfRenewingLease getFolderLease() { - return folderLease; - } - - /** - * Deletes rename pending metadata file - * @param fs -- the file system - * @param redoFile - rename pending metadata file path - * @throws IOException - If deletion fails - */ - @VisibleForTesting - void deleteRenamePendingFile(FileSystem fs, Path redoFile) - throws IOException { - try { - fs.delete(redoFile, false); - } catch (IOException e) { - // If the rename metadata was not found then somebody probably - // raced with us and finished the delete first - Throwable t = e.getCause(); - if (t != null && t instanceof StorageException - && "BlobNotFound".equals(((StorageException) t).getErrorCode())) { - LOG.warn("rename pending file " + redoFile + " is already deleted"); - } else { - throw e; - } - } - } - - /** - * Write to disk the information needed to redo folder rename, - * in JSON format. The file name will be - * {@code wasb:///folderName-RenamePending.json} - * The file format will be: - *
{@code
-     * {
-     *   FormatVersion: "1.0",
-     *   OperationTime: "",
-     *   OldFolderName: "",
-     *   NewFolderName: "",
-     *   FileList: [  ,  , ... ]
-     * }
-     *
-     * Here's a sample:
-     * {
-     *  FormatVersion: "1.0",
-     *  OperationUTCTime: "2014-07-01 23:50:35.572",
-     *  OldFolderName: "user/ehans/folderToRename",
-     *  NewFolderName: "user/ehans/renamedFolder",
-     *  FileList: [
-     *    "innerFile",
-     *    "innerFile2"
-     *  ]
-     * } }
- * @param fs file system on which a file is written. - * @throws IOException Thrown when fail to write file. - */ - public void writeFile(NativeAzureFileSystem fs) throws IOException { - Path path = getRenamePendingFilePath(); - LOG.debug("Preparing to write atomic rename state to {}", path.toString()); - OutputStream output = null; - - String contents = makeRenamePendingFileContents(); - - // Write file. - try { - output = fs.createInternal(path, FsPermission.getFileDefault(), false, null); - output.write(contents.getBytes(StandardCharsets.UTF_8)); - } catch (IOException e) { - throw new IOException("Unable to write RenamePending file for folder rename from " - + srcKey + " to " + dstKey, e); - } finally { - NativeAzureFileSystemHelper.cleanup(LOG, output); - } - } - - /** - * Return the contents of the JSON file to represent the operations - * to be performed for a folder rename. - * - * @return JSON string which represents the operation. - */ - public String makeRenamePendingFileContents() { - SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS"); - sdf.setTimeZone(TimeZone.getTimeZone("UTC")); - String time = sdf.format(new Date()); - - // Make file list string - StringBuilder builder = new StringBuilder(); - builder.append("[\n"); - for (int i = 0; i != fileMetadata.length; i++) { - if (i > 0) { - builder.append(",\n"); - } - builder.append(" "); - String noPrefix = StringUtils.removeStart(fileMetadata[i].getKey(), srcKey + "/"); - - // Quote string file names, escaping any possible " characters or other - // necessary characters in the name. - builder.append(quote(noPrefix)); - if (builder.length() >= - MAX_RENAME_PENDING_FILE_SIZE - FORMATTING_BUFFER) { - - // Give up now to avoid using too much memory. - LOG.error("Internal error: Exceeded maximum rename pending file size of {} bytes.", - MAX_RENAME_PENDING_FILE_SIZE); - - // return some bad JSON with an error message to make it human readable - return "exceeded maximum rename pending file size"; - } - } - builder.append("\n ]"); - String fileList = builder.toString(); - - // Make file contents as a string. Again, quote file names, escaping - // characters as appropriate. - String contents = "{\n" - + " FormatVersion: \"1.0\",\n" - + " OperationUTCTime: \"" + time + "\",\n" - + " OldFolderName: " + quote(srcKey) + ",\n" - + " NewFolderName: " + quote(dstKey) + ",\n" - + " FileList: " + fileList + "\n" - + "}\n"; - - return contents; - } - - /** - * This is an exact copy of org.codehaus.jettison.json.JSONObject.quote - * method. - * - * Produce a string in double quotes with backslash sequences in all the - * right places. A backslash will be inserted within - * A {@link FileSystem} for reading and writing files stored on Windows Azure. This implementation is - * blob-based and stores files on Azure in their native form so they can be read - * by other Azure tools. This implementation uses HTTPS for secure network communication. - *

- */ - public static class Secure extends NativeAzureFileSystem { - @Override - public String getScheme() { - return "wasbs"; - } - } - - public static final Logger LOG = LoggerFactory.getLogger(NativeAzureFileSystem.class); - - /** - * The time span in seconds before which we consider a temp blob to be - * dangling (not being actively uploaded to) and up for reclamation. - * - * So e.g. if this is 60, then any temporary blobs more than a minute old - * would be considered dangling. - */ - static final String AZURE_TEMP_EXPIRY_PROPERTY_NAME = "fs.azure.fsck.temp.expiry.seconds"; - private static final int AZURE_TEMP_EXPIRY_DEFAULT = 3600; - static final String PATH_DELIMITER = Path.SEPARATOR; - static final String AZURE_TEMP_FOLDER = "_$azuretmpfolder$"; - - private static final int AZURE_LIST_ALL = -1; - private static final int AZURE_UNBOUNDED_DEPTH = -1; - - /** - * The configuration property that determines which group owns files created - * in WASB. - */ - private static final String AZURE_DEFAULT_GROUP_PROPERTY_NAME = "fs.azure.permissions.supergroup"; - /** - * The default value for fs.azure.permissions.supergroup. Chosen as the same - * default as DFS. - */ - static final String AZURE_DEFAULT_GROUP_DEFAULT = "supergroup"; - - /** - * Configuration property used to specify list of users that can perform - * chown operation when authorization is enabled in WASB. - */ - public static final String AZURE_CHOWN_USERLIST_PROPERTY_NAME = - "fs.azure.chown.allowed.userlist"; - - static final String AZURE_CHOWN_USERLIST_PROPERTY_DEFAULT_VALUE = "*"; - - /** - * Configuration property used to specify list of daemon users that can - * perform chmod operation when authorization is enabled in WASB. - */ - public static final String AZURE_DAEMON_USERLIST_PROPERTY_NAME = - "fs.azure.daemon.userlist"; - - static final String AZURE_DAEMON_USERLIST_PROPERTY_DEFAULT_VALUE = "*"; - - /** - * Configuration property used to specify list of users that can perform - * chmod operation when authorization is enabled in WASB. - */ - public static final String AZURE_CHMOD_USERLIST_PROPERTY_NAME = - "fs.azure.chmod.allowed.userlist"; - - static final String AZURE_CHMOD_USERLIST_PROPERTY_DEFAULT_VALUE = "*"; - - static final String AZURE_RINGBUFFER_CAPACITY_PROPERTY_NAME = - "fs.azure.ring.buffer.capacity"; - static final String AZURE_OUTPUT_STREAM_BUFFER_SIZE_PROPERTY_NAME = - "fs.azure.output.stream.buffer.size"; - - public static final String SKIP_AZURE_METRICS_PROPERTY_NAME = "fs.azure.skip.metrics"; - - /* - * Property to enable Append API. - */ - public static final String APPEND_SUPPORT_ENABLE_PROPERTY_NAME = "fs.azure.enable.append.support"; - - /* - * Property to override canonical service name with filesystem's URI. - */ - public static final String RETURN_URI_AS_CANONICAL_SERVICE_NAME_PROPERTY_NAME = "fs.azure.override.canonical.service.name"; - - /** - * The configuration property to set number of threads to be used for rename operation. - */ - public static final String AZURE_RENAME_THREADS = "fs.azure.rename.threads"; - - /** - * The default number of threads to be used for rename operation. - */ - public static final int DEFAULT_AZURE_RENAME_THREADS = 0; - - /** - * The configuration property to set number of threads to be used for delete operation. - */ - public static final String AZURE_DELETE_THREADS = "fs.azure.delete.threads"; - - /** - * The default number of threads to be used for delete operation. - */ - public static final int DEFAULT_AZURE_DELETE_THREADS = 0; - - /** - * The number of threads to be used for delete operation after reading user configuration. - */ - private int deleteThreadCount = 0; - - /** - * The number of threads to be used for rename operation after reading user configuration. - */ - private int renameThreadCount = 0; - - private class NativeAzureFsInputStream extends FSInputStream { - private InputStream in; - private final String key; - private long pos = 0; - private boolean closed = false; - private boolean isPageBlob; - - // File length, valid only for streams over block blobs. - private long fileLength; - - NativeAzureFsInputStream(InputStream in, String key, long fileLength) { - this.in = in; - this.key = key; - this.isPageBlob = store.isPageBlobKey(key); - this.fileLength = fileLength; - } - - /** - * Return the size of the remaining available bytes - * if the size is less than or equal to {@link Integer#MAX_VALUE}, - * otherwise, return {@link Integer#MAX_VALUE}. - * - * This is to match the behavior of DFSInputStream.available(), - * which some clients may rely on (HBase write-ahead log reading in - * particular). - */ - @Override - public synchronized int available() throws IOException { - if (isPageBlob) { - return in.available(); - } else { - if (closed) { - throw new IOException("Stream closed"); - } - final long remaining = this.fileLength - pos; - return remaining <= Integer.MAX_VALUE ? - (int) remaining : Integer.MAX_VALUE; - } - } - - /* - * Reads the next byte of data from the input stream. The value byte is - * returned as an integer in the range 0 to 255. If no byte is available - * because the end of the stream has been reached, the value -1 is returned. - * This method blocks until input data is available, the end of the stream - * is detected, or an exception is thrown. - * - * @returns int An integer corresponding to the byte read. - */ - @Override - public synchronized int read() throws FileNotFoundException, IOException { - try { - int result = 0; - result = in.read(); - if (result != -1) { - pos++; - if (statistics != null) { - statistics.incrementBytesRead(1); - } - } - // Return to the caller with the result. - // - return result; - } catch(EOFException e) { - return -1; - } catch(IOException e) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(e); - - if (innerException instanceof StorageException) { - - LOG.error("Encountered Storage Exception for read on Blob : {}" - + " Exception details: {} Error Code : {}", - key, e, ((StorageException) innerException).getErrorCode()); - - if (NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException(String.format("%s is not found", key)); - } - } - - throw e; - } - } - - /* - * Reads up to len bytes of data from the input stream into an array of - * bytes. An attempt is made to read as many as len bytes, but a smaller - * number may be read. The number of bytes actually read is returned as an - * integer. This method blocks until input data is available, end of file is - * detected, or an exception is thrown. If len is zero, then no bytes are - * read and 0 is returned; otherwise, there is an attempt to read at least - * one byte. If no byte is available because the stream is at end of file, - * the value -1 is returned; otherwise, at least one byte is read and stored - * into b. - * - * @param b -- the buffer into which data is read - * - * @param off -- the start offset in the array b at which data is written - * - * @param len -- the maximum number of bytes read - * - * @ returns int The total number of byes read into the buffer, or -1 if - * there is no more data because the end of stream is reached. - */ - @Override - public synchronized int read(byte[] b, int off, int len) throws FileNotFoundException, IOException { - try { - int result = 0; - result = in.read(b, off, len); - if (result > 0) { - pos += result; - } - - if (null != statistics && result > 0) { - statistics.incrementBytesRead(result); - } - - // Return to the caller with the result. - return result; - } catch(IOException e) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(e); - - if (innerException instanceof StorageException) { - - LOG.error("Encountered Storage Exception for read on Blob : {}" - + " Exception details: {} Error Code : {}", - key, e, ((StorageException) innerException).getErrorCode()); - - if (NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException(String.format("%s is not found", key)); - } - } - - throw e; - } - } - - @Override - public int read(long position, byte[] buffer, int offset, int length) - throws IOException { - // SpotBugs reports bug type IS2_INCONSISTENT_SYNC here. - // This report is not valid here. - // 'this.in' is instance of BlockBlobInputStream and read(long, byte[], int, int) - // calls it's Super class method when 'fs.azure.block.blob.buffered.pread.disable' - // is configured false. Super class FSInputStream's implementation is having - // proper synchronization. - // When 'fs.azure.block.blob.buffered.pread.disable' is true, we want a lock free - // implementation of blob read. Here we don't use any of the InputStream's - // shared resource (buffer) and also don't change any cursor position etc. - // So its safe to go with unsynchronized way of read. - if (in instanceof PositionedReadable) { - try { - int result = ((PositionedReadable) this.in).read(position, buffer, - offset, length); - if (null != statistics && result > 0) { - statistics.incrementBytesRead(result); - } - return result; - } catch (IOException e) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(e); - if (innerException instanceof StorageException) { - LOG.error("Encountered Storage Exception for read on Blob : {}" - + " Exception details: {} Error Code : {}", - key, e, ((StorageException) innerException).getErrorCode()); - if (NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException(String.format("%s is not found", key)); - } - } - throw e; - } - } - return super.read(position, buffer, offset, length); - } - - @Override - public synchronized void close() throws IOException { - if (!closed) { - closed = true; - IOUtils.closeStream(in); - in = null; - } - } - - @Override - public synchronized void seek(long pos) throws FileNotFoundException, EOFException, IOException { - try { - checkNotClosed(); - if (pos < 0) { - throw new EOFException(FSExceptionMessages.NEGATIVE_SEEK); - } - if (this.pos > pos) { - if (in instanceof Seekable) { - ((Seekable) in).seek(pos); - this.pos = pos; - } else { - IOUtils.closeStream(in); - in = store.retrieve(key); - this.pos = in.skip(pos); - } - } else { - this.pos += in.skip(pos - this.pos); - } - LOG.debug("Seek to position {}. Bytes skipped {}", pos, - this.pos); - } catch(IOException e) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException(String.format("%s is not found", key)); - } - - throw e; - } catch(IndexOutOfBoundsException e) { - throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); - } - } - - @Override - public synchronized long getPos() throws IOException { - return pos; - } - - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - - - /* - * Helper method to check if a stream is closed. - */ - private void checkNotClosed() throws IOException { - if (closed) { - throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); - } - } - } - - /** - * Azure output stream; wraps an inner stream of different types. - */ - public class NativeAzureFsOutputStream extends OutputStream - implements Syncable, StreamCapabilities { - private String key; - private String keyEncoded; - private OutputStream out; - - public NativeAzureFsOutputStream(OutputStream out, String aKey, - String anEncodedKey) throws IOException { - // Check input arguments. The output stream should be non-null and the - // keys - // should be valid strings. - if (null == out) { - throw new IllegalArgumentException( - "Illegal argument: the output stream is null."); - } - - if (null == aKey || 0 == aKey.length()) { - throw new IllegalArgumentException( - "Illegal argument the key string is null or empty"); - } - - if (null == anEncodedKey || 0 == anEncodedKey.length()) { - throw new IllegalArgumentException( - "Illegal argument the encoded key string is null or empty"); - } - - // Initialize the member variables with the incoming parameters. - this.out = out; - - setKey(aKey); - setEncodedKey(anEncodedKey); - } - - /** - * Get a reference to the wrapped output stream. - * - * @return the underlying output stream - */ - @InterfaceAudience.LimitedPrivate({"HDFS"}) - public OutputStream getOutStream() { - return out; - } - - @Override // Syncable - public void hflush() throws IOException { - if (out instanceof Syncable) { - ((Syncable) out).hflush(); - } else { - flush(); - } - } - - @Override // Syncable - public void hsync() throws IOException { - if (out instanceof Syncable) { - ((Syncable) out).hsync(); - } else { - flush(); - } - } - - /** - * Propagate probe of stream capabilities to nested stream - * (if supported), else return false. - * @param capability string to query the stream support for. - * @return true if the nested stream supports the specific capability. - */ - @Override // StreamCapability - public boolean hasCapability(String capability) { - return StoreImplementationUtils.hasCapability(out, capability); - } - - @Override - public synchronized void close() throws IOException { - if (out != null) { - // Close the output stream and decode the key for the output stream - // before returning to the caller. - // - out.close(); - try { - restoreKey(); - } finally { - out = null; - } - } - } - - /** - * Writes the specified byte to this output stream. The general contract for - * write is that one byte is written to the output stream. The byte to be - * written is the eight low-order bits of the argument b. The 24 high-order - * bits of b are ignored. - * - * @param b - * 32-bit integer of block of 4 bytes - */ - @Override - public void write(int b) throws IOException { - checkOpen(); - try { - out.write(b); - } catch(IOException e) { - if (e.getCause() instanceof StorageException) { - StorageException storageExcp = (StorageException) e.getCause(); - LOG.error("Encountered Storage Exception for write on Blob : {}" - + " Exception details: {} Error Code : {}", - key, e.getMessage(), storageExcp.getErrorCode()); - } - throw e; - } - } - - /** - * Writes b.length bytes from the specified byte array to this output - * stream. The general contract for write(b) is that it should have exactly - * the same effect as the call write(b, 0, b.length). - * - * @param b - * Block of bytes to be written to the output stream. - */ - @Override - public void write(byte[] b) throws IOException { - checkOpen(); - try { - out.write(b); - } catch(IOException e) { - if (e.getCause() instanceof StorageException) { - StorageException storageExcp = (StorageException) e.getCause(); - LOG.error("Encountered Storage Exception for write on Blob : {}" - + " Exception details: {} Error Code : {}", - key, e.getMessage(), storageExcp.getErrorCode()); - } - throw e; - } - } - - /** - * Writes len from the specified byte array starting at offset - * off to the output stream. The general contract for write(b, - * off, len) is that some of the bytes in the array b - * are written to the output stream in order; element b[off] - * is the first byte written and b[off+len-1] is the last - * byte written by this operation. - * - * @param b - * Byte array to be written. - * @param off - * Write this offset in stream. - * @param len - * Number of bytes to be written. - */ - @Override - public void write(byte[] b, int off, int len) throws IOException { - checkOpen(); - try { - out.write(b, off, len); - } catch(IOException e) { - if (e.getCause() instanceof StorageException) { - StorageException storageExcp = (StorageException) e.getCause(); - LOG.error("Encountered Storage Exception for write on Blob : {}" - + " Exception details: {} Error Code : {}", - key, e.getMessage(), storageExcp.getErrorCode()); - } - throw e; - } - } - - /** - * Get the blob name. - * - * @return String Blob name. - */ - public String getKey() { - return key; - } - - /** - * Set the blob name. - * - * @param key - * Blob name. - */ - public void setKey(String key) { - this.key = key; - } - - /** - * Get the blob name. - * - * @return String Blob name. - */ - public String getEncodedKey() { - return keyEncoded; - } - - /** - * Set the blob name. - * - * @param anEncodedKey - * Blob name. - */ - public void setEncodedKey(String anEncodedKey) { - this.keyEncoded = anEncodedKey; - } - - /** - * Restore the original key name from the m_key member variable. Note: The - * output file stream is created with an encoded blob store key to guarantee - * load balancing on the front end of the Azure storage partition servers. - * The create also includes the name of the original key value which is - * stored in the m_key member variable. This method should only be called - * when the stream is closed. - */ - private void restoreKey() throws IOException { - store.rename(getEncodedKey(), getKey()); - } - - /** - * Check for the stream being open. - * @throws IOException if the stream is closed. - */ - private void checkOpen() throws IOException { - if (out == null) { - throw new IOException(FSExceptionMessages.STREAM_IS_CLOSED); - } - } - - } - - private URI uri; - private NativeFileSystemStore store; - private AzureNativeFileSystemStore actualStore; - private Path workingDir; - private AzureFileSystemInstrumentation instrumentation; - private String metricsSourceName; - private boolean isClosed = false; - private static boolean suppressRetryPolicy = false; - // A counter to create unique (within-process) names for my metrics sources. - private static AtomicInteger metricsSourceNameCounter = new AtomicInteger(); - private boolean appendSupportEnabled = false; - private boolean returnUriAsCanonicalServiceName = false; - private DelegationTokenAuthenticatedURL authURL; - private DelegationTokenAuthenticatedURL.Token authToken = new DelegationTokenAuthenticatedURL.Token(); - private String credServiceUrl; - private List chownAllowedUsers; - private List chmodAllowedUsers; - private List daemonUsers; - /** - * Configuration key to enable authorization support in WASB. - */ - public static final String KEY_AZURE_AUTHORIZATION = - "fs.azure.authorization"; - - /** - * Default value for the authorization support in WASB. - */ - private static final boolean DEFAULT_AZURE_AUTHORIZATION = false; - - /** - * Flag controlling authorization support in WASB. - */ - private boolean azureAuthorization = false; - - /** - * Flag controlling Kerberos support in WASB. - */ - private boolean kerberosSupportEnabled = false; - - /** - * Authorizer to use when authorization support is enabled in - * WASB. - */ - private WasbAuthorizerInterface authorizer = null; - - private UserGroupInformation ugi; - - private WasbDelegationTokenManager wasbDelegationTokenManager; - - public NativeAzureFileSystem() { - // set store in initialize() - } - - public NativeAzureFileSystem(NativeFileSystemStore store) { - this.store = store; - } - - /** - * Suppress the default retry policy for the Storage, useful in unit tests to - * test negative cases without waiting forever. - */ - @VisibleForTesting - static void suppressRetryPolicy() { - suppressRetryPolicy = true; - } - - /** - * Undo the effect of suppressRetryPolicy. - */ - @VisibleForTesting - static void resumeRetryPolicy() { - suppressRetryPolicy = false; - } - - /** - * Creates a new metrics source name that's unique within this process. - * @return metric source name - */ - @VisibleForTesting - public static String newMetricsSourceName() { - int number = metricsSourceNameCounter.incrementAndGet(); - final String baseName = "AzureFileSystemMetrics"; - if (number == 1) { // No need for a suffix for the first one - return baseName; - } else { - return baseName + number; - } - } - - /** - * Checks if the given URI scheme is a scheme that's affiliated with the Azure - * File System. - * - * @param scheme - * The URI scheme. - * @return true iff it's an Azure File System URI scheme. - */ - private static boolean isWasbScheme(String scheme) { - // The valid schemes are: asv (old name), asvs (old name over HTTPS), - // wasb (new name), wasbs (new name over HTTPS). - return scheme != null - && (scheme.equalsIgnoreCase("asv") || scheme.equalsIgnoreCase("asvs") - || scheme.equalsIgnoreCase("wasb") || scheme - .equalsIgnoreCase("wasbs")); - } - - /** - * Puts in the authority of the default file system if it is a WASB file - * system and the given URI's authority is null. - * - * @return The URI with reconstructed authority if necessary and possible. - */ - private static URI reconstructAuthorityIfNeeded(URI uri, Configuration conf) { - if (null == uri.getAuthority()) { - // If WASB is the default file system, get the authority from there - URI defaultUri = FileSystem.getDefaultUri(conf); - if (defaultUri != null && isWasbScheme(defaultUri.getScheme())) { - try { - // Reconstruct the URI with the authority from the default URI. - return new URI(uri.getScheme(), defaultUri.getAuthority(), - uri.getPath(), uri.getQuery(), uri.getFragment()); - } catch (URISyntaxException e) { - // This should never happen. - throw new Error("Bad URI construction", e); - } - } - } - return uri; - } - - @Override - protected void checkPath(Path path) { - // Make sure to reconstruct the path's authority if needed - super.checkPath(new Path(reconstructAuthorityIfNeeded(path.toUri(), - getConf()))); - } - - @Override - public void initialize(URI uri, Configuration conf) - throws IOException, IllegalArgumentException { - // Check authority for the URI to guarantee that it is non-null. - uri = reconstructAuthorityIfNeeded(uri, conf); - if (null == uri.getAuthority()) { - final String errMsg = String - .format("Cannot initialize WASB file system, URI authority not recognized."); - throw new IllegalArgumentException(errMsg); - } - super.initialize(uri, conf); - - if (store == null) { - store = createDefaultStore(conf); - } - - instrumentation = new AzureFileSystemInstrumentation(conf); - if(!conf.getBoolean(SKIP_AZURE_METRICS_PROPERTY_NAME, false)) { - // Make sure the metrics system is available before interacting with Azure - AzureFileSystemMetricsSystem.fileSystemStarted(); - metricsSourceName = newMetricsSourceName(); - String sourceDesc = "Azure Storage Volume File System metrics"; - AzureFileSystemMetricsSystem.registerSource(metricsSourceName, sourceDesc, - instrumentation); - } - - store.initialize(uri, conf, instrumentation); - setConf(conf); - this.ugi = UserGroupInformation.getCurrentUser(); - this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority()); - this.workingDir = new Path("/user", UserGroupInformation.getCurrentUser() - .getShortUserName()).makeQualified(getUri(), getWorkingDirectory()); - - this.appendSupportEnabled = conf.getBoolean(APPEND_SUPPORT_ENABLE_PROPERTY_NAME, false); - LOG.debug("NativeAzureFileSystem. Initializing."); - LOG.debug(" blockSize = {}", store.getHadoopBlockSize()); - - // Initialize thread counts from user configuration - deleteThreadCount = conf.getInt(AZURE_DELETE_THREADS, DEFAULT_AZURE_DELETE_THREADS); - renameThreadCount = conf.getInt(AZURE_RENAME_THREADS, DEFAULT_AZURE_RENAME_THREADS); - - boolean useSecureMode = conf.getBoolean(AzureNativeFileSystemStore.KEY_USE_SECURE_MODE, - AzureNativeFileSystemStore.DEFAULT_USE_SECURE_MODE); - - this.azureAuthorization = useSecureMode && - conf.getBoolean(KEY_AZURE_AUTHORIZATION, DEFAULT_AZURE_AUTHORIZATION); - this.kerberosSupportEnabled = - conf.getBoolean(Constants.AZURE_KERBEROS_SUPPORT_PROPERTY_NAME, false); - - if (this.azureAuthorization) { - - this.authorizer = - new RemoteWasbAuthorizerImpl(); - authorizer.init(conf); - - this.chmodAllowedUsers = - Arrays.asList(conf.getTrimmedStrings( - AZURE_CHMOD_USERLIST_PROPERTY_NAME, - AZURE_CHMOD_USERLIST_PROPERTY_DEFAULT_VALUE)); - this.chownAllowedUsers = - Arrays.asList(conf.getTrimmedStrings( - AZURE_CHOWN_USERLIST_PROPERTY_NAME, - AZURE_CHOWN_USERLIST_PROPERTY_DEFAULT_VALUE)); - this.daemonUsers = - Arrays.asList(conf.getTrimmedStrings( - AZURE_DAEMON_USERLIST_PROPERTY_NAME, - AZURE_DAEMON_USERLIST_PROPERTY_DEFAULT_VALUE)); - } - - if (UserGroupInformation.isSecurityEnabled() && kerberosSupportEnabled) { - this.wasbDelegationTokenManager = new RemoteWasbDelegationTokenManager(conf); - } - - this.returnUriAsCanonicalServiceName = conf.getBoolean(RETURN_URI_AS_CANONICAL_SERVICE_NAME_PROPERTY_NAME, false); - } - - @Override - public Path getHomeDirectory() { - return makeQualified(new Path( - USER_HOME_DIR_PREFIX_DEFAULT + "/" + this.ugi.getShortUserName())); - } - - @VisibleForTesting - public void updateWasbAuthorizer(WasbAuthorizerInterface authorizer) { - this.authorizer = authorizer; - } - - private NativeFileSystemStore createDefaultStore(Configuration conf) { - actualStore = new AzureNativeFileSystemStore(); - - if (suppressRetryPolicy) { - actualStore.suppressRetryPolicy(); - } - return actualStore; - } - - /** - * Azure Storage doesn't allow the blob names to end in a period, - * so encode this here to work around that limitation. - */ - private static String encodeTrailingPeriod(String toEncode) { - Matcher matcher = TRAILING_PERIOD_PATTERN.matcher(toEncode); - return matcher.replaceAll(TRAILING_PERIOD_PLACEHOLDER); - } - - /** - * Reverse the encoding done by encodeTrailingPeriod(). - */ - private static String decodeTrailingPeriod(String toDecode) { - Matcher matcher = TRAILING_PERIOD_PLACEHOLDER_PATTERN.matcher(toDecode); - return matcher.replaceAll("."); - } - - /** - * Convert the path to a key. By convention, any leading or trailing slash is - * removed, except for the special case of a single slash. - * @param path path converted to a key - * @return key string - */ - @VisibleForTesting - public String pathToKey(Path path) { - // Convert the path to a URI to parse the scheme, the authority, and the - // path from the path object. - URI tmpUri = path.toUri(); - String pathUri = tmpUri.getPath(); - - // The scheme and authority is valid. If the path does not exist add a "/" - // separator to list the root of the container. - Path newPath = path; - if ("".equals(pathUri)) { - newPath = new Path(tmpUri.toString() + Path.SEPARATOR); - } - - // Verify path is absolute if the path refers to a windows drive scheme. - if (!newPath.isAbsolute()) { - throw new IllegalArgumentException("Path must be absolute: " + path); - } - - String key = null; - key = newPath.toUri().getPath(); - key = removeTrailingSlash(key); - key = encodeTrailingPeriod(key); - if (key.length() == 1) { - return key; - } else { - return key.substring(1); // remove initial slash - } - } - - // Remove any trailing slash except for the case of a single slash. - private static String removeTrailingSlash(String key) { - if (key.length() == 0 || key.length() == 1) { - return key; - } - if (key.charAt(key.length() - 1) == '/') { - return key.substring(0, key.length() - 1); - } else { - return key; - } - } - - static Path keyToPath(String key) { - if (key.equals("/")) { - return new Path("/"); // container - } - return new Path("/" + decodeTrailingPeriod(key)); - } - - /** - * Get the absolute version of the path (fully qualified). - * This is public for testing purposes. - * - * @param path path to be absolute path. - * @return fully qualified path - */ - @VisibleForTesting - public Path makeAbsolute(Path path) { - if (path.isAbsolute()) { - return path; - } - return new Path(workingDir, path); - } - - /** - * For unit test purposes, retrieves the AzureNativeFileSystemStore store - * backing this file system. - * - * @return The store object. - */ - @VisibleForTesting - public AzureNativeFileSystemStore getStore() { - return actualStore; - } - - NativeFileSystemStore getStoreInterface() { - return store; - } - - /** - * @param requestingAccessForPath - The path to the ancestor/parent/subtree/file that needs to be - * checked before granting access to originalPath - * @param accessType - The type of access READ/WRITE being requested - * @param operation - A string describing the operation being performed ("delete", "create" etc.). - * @param originalPath - The originalPath that was being accessed - */ - private void performAuthCheck(Path requestingAccessForPath, WasbAuthorizationOperations accessType, - String operation, Path originalPath) throws WasbAuthorizationException, IOException { - - if (azureAuthorization && this.authorizer != null) { - - requestingAccessForPath = requestingAccessForPath.makeQualified(getUri(), getWorkingDirectory()); - originalPath = originalPath.makeQualified(getUri(), getWorkingDirectory()); - - String owner = getOwnerForPath(requestingAccessForPath); - - if (!this.authorizer.authorize(requestingAccessForPath.toString(), accessType.toString(), owner)) { - throw new WasbAuthorizationException(operation - + " operation for Path : " + originalPath.toString() + " not allowed"); - } - - } - } - - /** - * Gets the metrics source for this file system. - * This is mainly here for unit testing purposes. - * - * @return the metrics source. - */ - public AzureFileSystemInstrumentation getInstrumentation() { - return instrumentation; - } - - /** This optional operation is not yet supported. */ - @Override - public FSDataOutputStream append(Path f, int bufferSize, Progressable progress) - throws IOException { - - if (!appendSupportEnabled) { - throw new UnsupportedOperationException("Append Support not enabled"); - } - - LOG.debug("Opening file: {} for append", f); - - Path absolutePath = makeAbsolute(f); - - performAuthCheck(absolutePath, WasbAuthorizationOperations.WRITE, "append", absolutePath); - - String key = pathToKey(absolutePath); - FileMetadata meta = null; - try { - meta = store.retrieveMetadata(key); - } catch(Exception ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("%s is not found", key)); - } else { - throw ex; - } - } - - if (meta == null) { - throw new FileNotFoundException(f.toString()); - } - - if (meta.isDirectory()) { - throw new FileNotFoundException(f.toString() - + " is a directory not a file."); - } - - if (store.isPageBlobKey(key)) { - throw new IOException("Append not supported for Page Blobs"); - } - - DataOutputStream appendStream = null; - - try { - appendStream = store.retrieveAppendStream(key, bufferSize); - } catch (Exception ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException(String.format("%s is not found", key)); - } else { - throw ex; - } - } - - return new FSDataOutputStream(appendStream, statistics); - } - - @Override - public FSDataOutputStream create(Path f, FsPermission permission, - boolean overwrite, int bufferSize, short replication, long blockSize, - Progressable progress) throws IOException { - return create(f, permission, overwrite, true, - bufferSize, replication, blockSize, progress, - (SelfRenewingLease) null); - } - - /** - * Get a self-renewing lease on the specified file. - * @param path path whose lease to be renewed. - * @return Lease - * @throws AzureException when not being able to acquire a lease on the path - */ - public SelfRenewingLease acquireLease(Path path) throws AzureException { - String fullKey = pathToKey(makeAbsolute(path)); - return getStore().acquireLease(fullKey); - } - - @Override - public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, - boolean overwrite, int bufferSize, short replication, long blockSize, - Progressable progress) throws IOException { - - Path parent = f.getParent(); - - // Get exclusive access to folder if this is a directory designated - // for atomic rename. The primary use case of for HBase write-ahead - // log file management. - SelfRenewingLease lease = null; - if (store.isAtomicRenameKey(pathToKey(f))) { - try { - lease = acquireLease(parent); - } catch (AzureException e) { - - String errorCode = ""; - try { - StorageException e2 = (StorageException) e.getCause(); - errorCode = e2.getErrorCode(); - } catch (Exception e3) { - // do nothing if cast fails - } - if (errorCode.equals("BlobNotFound")) { - throw new FileNotFoundException("Cannot create file " + - f.getName() + " because parent folder does not exist."); - } - - LOG.warn("Got unexpected exception trying to get lease on {} . {}", - pathToKey(parent), e.getMessage()); - throw e; - } - } - - // See if the parent folder exists. If not, throw error. - // The exists() check will push any pending rename operation forward, - // if there is one, and return false. - // - // At this point, we have exclusive access to the source folder - // via the lease, so we will not conflict with an active folder - // rename operation. - // - // In the secure case, the call to exists will happen in the context - // of the user that initiated the operation. In this case, we should - // do the auth-check against ranger for the path. - if (!exists(parent)) { - try { - - // This'll let the keep-alive thread exit as soon as it wakes up. - lease.free(); - } catch (Exception e) { - LOG.warn("Unable to free lease because: {}", e.getMessage()); - } - throw new FileNotFoundException("Cannot create file " + - f.getName() + " because parent folder does not exist."); - } - - // Create file inside folder. - FSDataOutputStream out = null; - try { - out = create(f, permission, overwrite, false, - bufferSize, replication, blockSize, progress, lease); - } finally { - // Release exclusive access to folder. - try { - if (lease != null) { - lease.free(); - } - } catch (Exception e) { - NativeAzureFileSystemHelper.cleanup(LOG, out); - String msg = "Unable to free lease on " + parent.toUri(); - LOG.error(msg); - throw new IOException(msg, e); - } - } - return out; - } - - @Override - public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, - EnumSet flags, int bufferSize, short replication, long blockSize, - Progressable progress) throws IOException { - - // Check if file should be appended or overwritten. Assume that the file - // is overwritten on if the CREATE and OVERWRITE create flags are set. Note - // that any other combinations of create flags will result in an open new or - // open with append. - final EnumSet createflags = - EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE); - boolean overwrite = flags.containsAll(createflags); - - // Delegate the create non-recursive call. - return this.createNonRecursive(f, permission, overwrite, - bufferSize, replication, blockSize, progress); - } - - @Override - public FSDataOutputStream createNonRecursive(Path f, - boolean overwrite, int bufferSize, short replication, long blockSize, - Progressable progress) throws IOException { - return this.createNonRecursive(f, FsPermission.getFileDefault(), - overwrite, bufferSize, replication, blockSize, progress); - } - - - /** - * Create an Azure blob and return an output stream to use - * to write data to it. - * - * @param f - * @param permission - * @param overwrite - * @param createParent - * @param bufferSize - * @param replication - * @param blockSize - * @param progress - * @param parentFolderLease Lease on parent folder (or null if - * no lease). - * @return - * @throws IOException - */ - private FSDataOutputStream create(Path f, FsPermission permission, - boolean overwrite, boolean createParent, int bufferSize, - short replication, long blockSize, Progressable progress, - SelfRenewingLease parentFolderLease) - throws FileAlreadyExistsException, IOException { - - LOG.debug("Creating file: {}", f.toString()); - - if (containsColon(f)) { - throw new IOException("Cannot create file " + f - + " through WASB that has colons in the name"); - } - - Path absolutePath = makeAbsolute(f); - Path ancestor = getAncestor(absolutePath); - - performAuthCheck(ancestor, WasbAuthorizationOperations.WRITE, "create", absolutePath); - - return createInternal(f, permission, overwrite, parentFolderLease); - } - - - /** - * This is the version of the create call that is meant for internal usage. - * This version is not public facing and does not perform authorization checks. - * It is used by the public facing create call and by FolderRenamePending to - * create the internal -RenamePending.json file. - * @param f the path to a file to be created. - * @param permission for the newly created file. - * @param overwrite specifies if the file should be overwritten. - * @param parentFolderLease lease on the parent folder. - * @return the output stream used to write data into the newly created file . - * @throws IOException if an IO error occurs while attempting to delete the - * path. - * - */ - protected FSDataOutputStream createInternal(Path f, FsPermission permission, - boolean overwrite, - SelfRenewingLease parentFolderLease) - throws FileAlreadyExistsException, IOException { - - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - - FileMetadata existingMetadata = store.retrieveMetadata(key); - if (existingMetadata != null) { - if (existingMetadata.isDirectory()) { - throw new FileAlreadyExistsException("Cannot create file " + f - + "; already exists as a directory."); - } - if (!overwrite) { - throw new FileAlreadyExistsException("File already exists:" + f); - } - else { - performAuthCheck(absolutePath, WasbAuthorizationOperations.WRITE, "create", absolutePath); - } - } - - Path parentFolder = absolutePath.getParent(); - if (parentFolder != null && parentFolder.getParent() != null) { // skip root - // Update the parent folder last modified time if the parent folder - // already exists. - String parentKey = pathToKey(parentFolder); - FileMetadata parentMetadata = store.retrieveMetadata(parentKey); - if (parentMetadata != null && parentMetadata.isDirectory() && - parentMetadata.getBlobMaterialization() == BlobMaterialization.Explicit) { - if (parentFolderLease != null) { - store.updateFolderLastModifiedTime(parentKey, parentFolderLease); - } else { - updateParentFolderLastModifiedTime(key); - } - } else { - // Make sure that the parent folder exists. - // Create it using inherited permissions from the first existing directory going up the path - Path firstExisting = parentFolder.getParent(); - FileMetadata metadata = store.retrieveMetadata(pathToKey(firstExisting)); - while(metadata == null) { - // Guaranteed to terminate properly because we will eventually hit root, which will return non-null metadata - firstExisting = firstExisting.getParent(); - metadata = store.retrieveMetadata(pathToKey(firstExisting)); - } - mkdirs(parentFolder, metadata.getPermission(), true); - } - } - - // Mask the permission first (with the default permission mask as well). - FsPermission masked = applyUMask(permission, UMaskApplyMode.NewFile); - PermissionStatus permissionStatus = createPermissionStatus(masked); - - OutputStream bufOutStream; - if (store.isPageBlobKey(key)) { - // Store page blobs directly in-place without renames. - bufOutStream = store.storefile(key, permissionStatus, key); - } else { - // This is a block blob, so open the output blob stream based on the - // encoded key. - // - String keyEncoded = encodeKey(key); - - - // First create a blob at the real key, pointing back to the temporary file - // This accomplishes a few things: - // 1. Makes sure we can create a file there. - // 2. Makes it visible to other concurrent threads/processes/nodes what - // we're - // doing. - // 3. Makes it easier to restore/cleanup data in the event of us crashing. - store.storeEmptyLinkFile(key, keyEncoded, permissionStatus); - - // The key is encoded to point to a common container at the storage server. - // This reduces the number of splits on the server side when load balancing. - // Ingress to Azure storage can take advantage of earlier splits. We remove - // the root path to the key and prefix a random GUID to the tail (or leaf - // filename) of the key. Keys are thus broadly and randomly distributed over - // a single container to ease load balancing on the storage server. When the - // blob is committed it is renamed to its earlier key. Uncommitted blocks - // are not cleaned up and we leave it to Azure storage to garbage collect - // these - // blocks. - bufOutStream = new NativeAzureFsOutputStream(store.storefile( - keyEncoded, permissionStatus, key), key, keyEncoded); - } - // Construct the data output stream from the buffered output stream. - FSDataOutputStream fsOut = new FSDataOutputStream(bufOutStream, statistics); - - - // Increment the counter - instrumentation.fileCreated(); - - // Return data output stream to caller. - return fsOut; - } - - @Override - @Deprecated - public boolean delete(Path path) throws IOException { - return delete(path, true); - } - - @Override - public boolean delete(Path f, boolean recursive) throws IOException { - return delete(f, recursive, false); - } - - /** - * Delete file or folder with authorization checks. Most of the code - * is duplicate of the actual delete implementation and will be merged - * once the performance and funcional aspects are guaranteed not to - * regress existing delete semantics. - */ - private boolean deleteWithAuthEnabled(Path f, boolean recursive, - boolean skipParentFolderLastModifiedTimeUpdate) throws IOException { - - LOG.debug("Deleting file: {}", f); - - Path absolutePath = makeAbsolute(f); - Path parentPath = absolutePath.getParent(); - - // If delete is issued for 'root', parentPath will be null - // In that case, we perform auth check for root itself before - // proceeding for deleting contents under root. - if (parentPath != null) { - performAuthCheck(parentPath, WasbAuthorizationOperations.WRITE, "delete", absolutePath); - } else { - performAuthCheck(absolutePath, WasbAuthorizationOperations.WRITE, "delete", absolutePath); - } - - String key = pathToKey(absolutePath); - - // Capture the metadata for the path. - FileMetadata metaFile = null; - try { - metaFile = store.retrieveMetadata(key); - } catch (IOException e) { - - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - - return false; - } - throw e; - } - - if (null == metaFile) { - // The path to be deleted does not exist. - return false; - } - - FileMetadata parentMetadata = null; - String parentKey = null; - if (parentPath != null) { - parentKey = pathToKey(parentPath); - - try { - parentMetadata = store.retrieveMetadata(parentKey); - } catch (IOException e) { - Throwable innerException = checkForAzureStorageException(e); - if (innerException instanceof StorageException) { - // Invalid State. - // A FileNotFoundException is not thrown here as the API returns false - // if the file not present. But not retrieving metadata here is an - // unrecoverable state and can only happen if there is a race condition - // hence throwing a IOException - if (isFileNotFoundException((StorageException) innerException)) { - throw new IOException("File " + f + " has a parent directory " - + parentPath + " whose metadata cannot be retrieved. Can't resolve"); - } - } - throw e; - } - - // Same case as unable to retrieve metadata - if (parentMetadata == null) { - throw new IOException("File " + f + " has a parent directory " - + parentPath + " whose metadata cannot be retrieved. Can't resolve"); - } - - if (!parentMetadata.isDirectory()) { - // Invalid state: the parent path is actually a file. Throw. - throw new AzureException("File " + f + " has a parent directory " - + parentPath + " which is also a file. Can't resolve."); - } - } - - // The path exists, determine if it is a folder containing objects, - // an empty folder, or a simple file and take the appropriate actions. - if (!metaFile.isDirectory()) { - // The path specifies a file. We need to check the parent path - // to make sure it's a proper materialized directory before we - // delete the file. Otherwise we may get into a situation where - // the file we were deleting was the last one in an implicit directory - // (e.g. the blob store only contains the blob a/b and there's no - // corresponding directory blob a) and that would implicitly delete - // the directory as well, which is not correct. - - if (parentPath != null && parentPath.getParent() != null) {// Not root - - if (parentMetadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - LOG.debug("Found an implicit parent directory while trying to" - + " delete the file {}. Creating the directory blob for" - + " it in {}.", f, parentKey); - - store.storeEmptyFolder(parentKey, - createPermissionStatus(FsPermission.getDefault())); - } else { - if (!skipParentFolderLastModifiedTimeUpdate) { - updateParentFolderLastModifiedTime(key); - } - } - } - - // check if the file can be deleted based on sticky bit check - // This check will be performed only when authorization is enabled - if (isStickyBitCheckViolated(metaFile, parentMetadata)) { - throw new WasbAuthorizationException(String.format("%s has sticky bit set. " - + "File %s cannot be deleted.", parentPath, f)); - } - - try { - if (store.delete(key)) { - instrumentation.fileDeleted(); - } else { - return false; - } - } catch(IOException e) { - - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - return false; - } - - throw e; - } - } else { - // The path specifies a folder. Recursively delete all entries under the - // folder. - LOG.debug("Directory Delete encountered: {}", f); - if (parentPath != null && parentPath.getParent() != null) { - - if (parentMetadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - LOG.debug("Found an implicit parent directory while trying to" - + " delete the directory {}. Creating the directory blob for" - + " it in {}. ", f, parentKey); - - store.storeEmptyFolder(parentKey, - createPermissionStatus(FsPermission.getDefault())); - } - } - - // check if the folder can be deleted based on sticky bit check on parent - // This check will be performed only when authorization is enabled. - if (!metaFile.getKey().equals("/") - && isStickyBitCheckViolated(metaFile, parentMetadata)) { - - throw new WasbAuthorizationException(String.format("%s has sticky bit set. " - + "File %s cannot be deleted.", parentPath, f)); - } - - // Iterate through folder contents and get the list of files - // and folders that can be deleted. We might encounter IOException - // while listing blobs. In such cases, we return false. - ArrayList fileMetadataList = new ArrayList<>(); - boolean isPartialDelete = false; - - // Start time for list operation - long start = Time.monotonicNow(); - - try { - // Get list of files/folders that can be deleted - // based on authorization checks and stickybit checks - isPartialDelete = getFolderContentsToDelete(metaFile, fileMetadataList); - } catch (IOException e) { - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - return false; - } - throw e; - } - - long end = Time.monotonicNow(); - LOG.debug("Time taken to list {} blobs for delete operation: {} ms", - fileMetadataList.size(), (end - start)); - - // Here contents holds the list of metadata of the files and folders that can be deleted - // under the path that is requested for delete(excluding itself). - final FileMetadata[] contents = fileMetadataList.toArray(new FileMetadata[fileMetadataList.size()]); - - if (contents.length > 0 && !recursive) { - // The folder is non-empty and recursive delete was not specified. - // Throw an exception indicating that a non-recursive delete was - // specified for a non-empty folder. - throw new IOException("Non-recursive delete of non-empty directory " - + f); - } - - // Delete all files / folders in current directory stored as list in 'contents'. - AzureFileSystemThreadTask task = new AzureFileSystemThreadTask() { - @Override - public boolean execute(FileMetadata file) throws IOException{ - if (!deleteFile(file.getKey(), file.isDirectory())) { - LOG.warn("Attempt to delete non-existent {} {}", - file.isDirectory() ? "directory" : "file", - file.getKey()); - } - return true; - } - }; - - AzureFileSystemThreadPoolExecutor executor = getThreadPoolExecutor(this.deleteThreadCount, - "AzureBlobDeleteThread", "Delete", key, AZURE_DELETE_THREADS); - - if (!executor.executeParallel(contents, task)) { - LOG.error("Failed to delete files / subfolders in blob {}", key); - return false; - } - - if (metaFile.getKey().equals("/")) { - LOG.error("Cannot delete root directory {}", f); - return false; - } - - // Delete the current directory if all underlying contents are deleted - if (isPartialDelete || (store.retrieveMetadata(metaFile.getKey()) != null - && !deleteFile(metaFile.getKey(), metaFile.isDirectory()))) { - LOG.error("Failed delete directory : {}", f); - return false; - } - - // Update parent directory last modified time - Path parent = absolutePath.getParent(); - if (parent != null && parent.getParent() != null) { // not root - if (!skipParentFolderLastModifiedTimeUpdate) { - updateParentFolderLastModifiedTime(key); - } - } - } - - // File or directory was successfully deleted. - LOG.debug("Delete Successful for : {}", f); - return true; - } - - private boolean deleteWithoutAuth(Path f, boolean recursive, - boolean skipParentFolderLastModifiedTimeUpdate) throws IOException { - - LOG.debug("Deleting file: {}", f); - - Path absolutePath = makeAbsolute(f); - Path parentPath = absolutePath.getParent(); - - String key = pathToKey(absolutePath); - - // Capture the metadata for the path. - // - FileMetadata metaFile = null; - try { - metaFile = store.retrieveMetadata(key); - } catch (IOException e) { - - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - - return false; - } - throw e; - } - - if (null == metaFile) { - // The path to be deleted does not exist. - return false; - } - - // The path exists, determine if it is a folder containing objects, - // an empty folder, or a simple file and take the appropriate actions. - if (!metaFile.isDirectory()) { - // The path specifies a file. We need to check the parent path - // to make sure it's a proper materialized directory before we - // delete the file. Otherwise we may get into a situation where - // the file we were deleting was the last one in an implicit directory - // (e.g. the blob store only contains the blob a/b and there's no - // corresponding directory blob a) and that would implicitly delete - // the directory as well, which is not correct. - - if (parentPath.getParent() != null) {// Not root - String parentKey = pathToKey(parentPath); - - FileMetadata parentMetadata = null; - try { - parentMetadata = store.retrieveMetadata(parentKey); - } catch (IOException e) { - - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException) { - // Invalid State. - // A FileNotFoundException is not thrown here as the API returns false - // if the file not present. But not retrieving metadata here is an - // unrecoverable state and can only happen if there is a race condition - // hence throwing a IOException - if (isFileNotFoundException((StorageException) innerException)) { - throw new IOException("File " + f + " has a parent directory " - + parentPath + " whose metadata cannot be retrieved. Can't resolve"); - } - } - throw e; - } - - // Invalid State. - // A FileNotFoundException is not thrown here as the API returns false - // if the file not present. But not retrieving metadata here is an - // unrecoverable state and can only happen if there is a race condition - // hence throwing a IOException - if (parentMetadata == null) { - throw new IOException("File " + f + " has a parent directory " - + parentPath + " whose metadata cannot be retrieved. Can't resolve"); - } - - if (!parentMetadata.isDirectory()) { - // Invalid state: the parent path is actually a file. Throw. - throw new AzureException("File " + f + " has a parent directory " - + parentPath + " which is also a file. Can't resolve."); - } - - if (parentMetadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - LOG.debug("Found an implicit parent directory while trying to" - + " delete the file {}. Creating the directory blob for" - + " it in {}.", f, parentKey); - - store.storeEmptyFolder(parentKey, - createPermissionStatus(FsPermission.getDefault())); - } else { - if (!skipParentFolderLastModifiedTimeUpdate) { - updateParentFolderLastModifiedTime(key); - } - } - } - - try { - if (store.delete(key)) { - instrumentation.fileDeleted(); - } else { - return false; - } - } catch(IOException e) { - - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - return false; - } - - throw e; - } - } else { - // The path specifies a folder. Recursively delete all entries under the - // folder. - LOG.debug("Directory Delete encountered: {}", f); - if (parentPath.getParent() != null) { - String parentKey = pathToKey(parentPath); - FileMetadata parentMetadata = null; - - try { - parentMetadata = store.retrieveMetadata(parentKey); - } catch (IOException e) { - - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException) { - // Invalid State. - // A FileNotFoundException is not thrown here as the API returns false - // if the file not present. But not retrieving metadata here is an - // unrecoverable state and can only happen if there is a race condition - // hence throwing a IOException - if (isFileNotFoundException((StorageException) innerException)) { - throw new IOException("File " + f + " has a parent directory " - + parentPath + " whose metadata cannot be retrieved. Can't resolve"); - } - } - throw e; - } - - // Invalid State. - // A FileNotFoundException is not thrown here as the API returns false - // if the file not present. But not retrieving metadata here is an - // unrecoverable state and can only happen if there is a race condition - // hence throwing a IOException - if (parentMetadata == null) { - throw new IOException("File " + f + " has a parent directory " - + parentPath + " whose metadata cannot be retrieved. Can't resolve"); - } - - if (parentMetadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - LOG.debug("Found an implicit parent directory while trying to" - + " delete the directory {}. Creating the directory blob for" - + " it in {}. ", f, parentKey); - - store.storeEmptyFolder(parentKey, - createPermissionStatus(FsPermission.getDefault())); - } - } - - // Start time for list operation - long start = Time.monotonicNow(); - final FileMetadata[] contents; - - // List all the files in the folder with AZURE_UNBOUNDED_DEPTH depth. - try { - contents = store.list(key, AZURE_LIST_ALL, - AZURE_UNBOUNDED_DEPTH); - } catch (IOException e) { - Throwable innerException = checkForAzureStorageException(e); - - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - return false; - } - - throw e; - } - - long end = Time.monotonicNow(); - LOG.debug("Time taken to list {} blobs for delete operation: {} ms", contents.length, (end - start)); - - if (contents.length > 0) { - if (!recursive) { - // The folder is non-empty and recursive delete was not specified. - // Throw an exception indicating that a non-recursive delete was - // specified for a non-empty folder. - throw new IOException("Non-recursive delete of non-empty directory "+ f); - } - } - - // Delete all files / folders in current directory stored as list in 'contents'. - AzureFileSystemThreadTask task = new AzureFileSystemThreadTask() { - @Override - public boolean execute(FileMetadata file) throws IOException{ - if (!deleteFile(file.getKey(), file.isDirectory())) { - LOG.warn("Attempt to delete non-existent {} {}", - file.isDirectory() ? "directory" : "file", - file.getKey()); - } - return true; - } - }; - - AzureFileSystemThreadPoolExecutor executor = getThreadPoolExecutor(this.deleteThreadCount, - "AzureBlobDeleteThread", "Delete", key, AZURE_DELETE_THREADS); - - if (!executor.executeParallel(contents, task)) { - LOG.error("Failed to delete files / subfolders in blob {}", key); - return false; - } - - // Delete the current directory - if (store.retrieveMetadata(metaFile.getKey()) != null - && !deleteFile(metaFile.getKey(), metaFile.isDirectory())) { - LOG.error("Failed delete directory : {}", f); - return false; - } - - // Update parent directory last modified time - Path parent = absolutePath.getParent(); - if (parent != null && parent.getParent() != null) { // not root - if (!skipParentFolderLastModifiedTimeUpdate) { - updateParentFolderLastModifiedTime(key); - } - } - } - - // File or directory was successfully deleted. - LOG.debug("Delete Successful for : {}", f); - return true; - } - - /** - * Delete the specified file or folder. The parameter - * skipParentFolderLastModifiedTimeUpdate - * is used in the case of atomic folder rename redo. In that case, there is - * a lease on the parent folder, so (without reworking the code) modifying - * the parent folder update time will fail because of a conflict with the - * lease. Since we are going to delete the folder soon anyway so accurate - * modified time is not necessary, it's easier to just skip - * the modified time update. - * - * @param f file path to be deleted. - * @param recursive specify deleting recursively or not. - * @param skipParentFolderLastModifiedTimeUpdate If true, don't update the folder last - * modified time. - * @return true if and only if the file is deleted - * @throws IOException Thrown when fail to delete file or directory. - */ - public boolean delete(Path f, boolean recursive, - boolean skipParentFolderLastModifiedTimeUpdate) throws IOException { - - if (this.azureAuthorization) { - return deleteWithAuthEnabled(f, recursive, - skipParentFolderLastModifiedTimeUpdate); - } else { - return deleteWithoutAuth(f, recursive, - skipParentFolderLastModifiedTimeUpdate); - } - } - - public AzureFileSystemThreadPoolExecutor getThreadPoolExecutor(int threadCount, - String threadNamePrefix, String operation, String key, String config) { - return new AzureFileSystemThreadPoolExecutor(threadCount, threadNamePrefix, operation, key, config); - } - - /** - * Gets list of contents that can be deleted based on authorization check calls - * performed on the sub-tree for the folderToDelete. - * - * @param folderToDelete - metadata of the folder whose delete is requested. - * @param finalList - list of metadata of all files/folders that can be deleted . - * - * @return 'true' only if all the contents of the folderToDelete can be deleted - * @throws IOException Thrown when current user cannot be retrieved. - */ - private boolean getFolderContentsToDelete(FileMetadata folderToDelete, - ArrayList finalList) throws IOException { - - final int maxListingDepth = 1; - Stack foldersToProcess = new Stack(); - HashMap folderContentsMap = new HashMap(); - - boolean isPartialDelete = false; - - Path pathToDelete = makeAbsolute(folderToDelete.getPath()); - foldersToProcess.push(folderToDelete); - - while (!foldersToProcess.empty()) { - - FileMetadata currentFolder = foldersToProcess.pop(); - Path currentPath = makeAbsolute(currentFolder.getPath()); - boolean canDeleteChildren = true; - - // If authorization is enabled, check for 'write' permission on current folder - // This check maps to subfolders 'write' check for deleting contents recursively. - try { - performAuthCheck(currentPath, WasbAuthorizationOperations.WRITE, "delete", pathToDelete); - } catch (WasbAuthorizationException we) { - LOG.debug("Authorization check failed for {}", currentPath); - // We cannot delete the children of currentFolder since 'write' check on parent failed - canDeleteChildren = false; - } - - if (canDeleteChildren) { - - // get immediate children list - FileMetadata[] fileMetadataList = store.list(currentFolder.getKey(), - AZURE_LIST_ALL, maxListingDepth); - - // Process children of currentFolder and add them to list of contents - // that can be deleted. We Perform stickybit check on every file and - // folder under currentFolder in case stickybit is set on currentFolder. - for (FileMetadata childItem : fileMetadataList) { - if (isStickyBitCheckViolated(childItem, currentFolder, false)) { - // Stickybit check failed for the childItem that is being processed. - // This file/folder cannot be deleted and neither can the parent paths be deleted. - // Remove parent paths from list of contents that can be deleted. - canDeleteChildren = false; - Path filePath = makeAbsolute(childItem.getPath()); - LOG.error("User does not have permissions to delete {}. " - + "Parent directory has sticky bit set.", filePath); - } else { - // push the child directories to the stack to process their contents - if (childItem.isDirectory()) { - foldersToProcess.push(childItem); - } - // Add items to list of contents that can be deleted. - folderContentsMap.put(childItem.getKey(), childItem); - } - } - - } else { - // Cannot delete children since parent permission check has not passed and - // if there are files/folders under currentFolder they will not be deleted. - LOG.error("Authorization check failed. Files or folders under {} " - + "will not be processed for deletion.", currentPath); - } - - if (!canDeleteChildren) { - // We reach here if - // 1. cannot delete children since 'write' check on parent failed or - // 2. One of the files under the current folder cannot be deleted due to stickybit check. - // In this case we remove all the parent paths from the list of contents - // that can be deleted till we reach the original path of delete request - String pathToRemove = currentFolder.getKey(); - while (!pathToRemove.equals(folderToDelete.getKey())) { - if (folderContentsMap.containsKey(pathToRemove)) { - LOG.debug("Cannot delete {} since some of its contents " - + "cannot be deleted", pathToRemove); - folderContentsMap.remove(pathToRemove); - } - Path parentPath = keyToPath(pathToRemove).getParent(); - pathToRemove = pathToKey(parentPath); - } - // Since one or more files/folders cannot be deleted return value should indicate - // partial delete, so that the delete on the path requested by user is not performed - isPartialDelete = true; - } - } - - // final list of contents that can be deleted - for (HashMap.Entry entry : folderContentsMap.entrySet()) { - finalList.add(entry.getValue()); - } - - return isPartialDelete; - } - - private boolean isStickyBitCheckViolated(FileMetadata metaData, - FileMetadata parentMetadata, boolean throwOnException) throws IOException { - try { - return isStickyBitCheckViolated(metaData, parentMetadata); - } catch (FileNotFoundException ex) { - if (throwOnException) { - throw ex; - } else { - LOG.debug("Encountered FileNotFoundException while performing " - + "stickybit check operation for {}", metaData.getKey()); - // swallow exception and return that stickyBit check has been violated - return true; - } - } - } - - /** - * Checks if the Current user is not permitted access to a file/folder when - * sticky bit is set on parent path. Only the owner of parent path - * and owner of the file/folder itself are permitted to perform certain - * operations on file/folder based on sticky bit check. Sticky bit check will - * be performed only when authorization is enabled. - * - * @param metaData - metadata of the file/folder whose parent has sticky bit set. - * @param parentMetadata - metadata of the parent. - * - * @return true if Current user violates stickybit check - * @throws IOException Thrown when current user cannot be retrieved. - */ - private boolean isStickyBitCheckViolated(FileMetadata metaData, - FileMetadata parentMetadata) throws IOException { - - // In case stickybit check should not be performed, - // return value should indicate stickybit check is not violated. - if (!this.azureAuthorization) { - return false; - } - - // This should never happen when the sticky bit check is invoked. - if (parentMetadata == null) { - throw new FileNotFoundException( - String.format("Parent metadata for '%s' not found!", metaData.getKey())); - } - - // stickybit is not set on parent and hence cannot be violated - if (!parentMetadata.getPermission().getStickyBit()) { - return false; - } - - String currentUser = UserGroupInformation.getCurrentUser().getShortUserName(); - String parentDirectoryOwner = parentMetadata.getOwner(); - String currentFileOwner = metaData.getOwner(); - - // Files/Folders with no owner set will not pass stickybit check - if ((parentDirectoryOwner.equalsIgnoreCase(currentUser)) - || currentFileOwner.equalsIgnoreCase(currentUser)) { - - return false; - } - return true; - } - - /** - * Delete the specified file or directory and increment metrics. - * If the file or directory does not exist, the operation returns false. - * @param path the path to a file or directory. - * @param isDir true if the path is a directory; otherwise false. - * @return true if delete is successful; otherwise false. - * @throws IOException if an IO error occurs while attempting to delete the - * path. - * - */ - @VisibleForTesting - boolean deleteFile(String path, boolean isDir) throws IOException { - if (!store.delete(path)) { - return false; - } - - if (isDir) { - instrumentation.directoryDeleted(); - } else { - instrumentation.fileDeleted(); - } - return true; - } - - @Override - public FileStatus getFileStatus(Path f) throws FileNotFoundException, IOException { - - LOG.debug("Getting the file status for {}", f.toString()); - return getFileStatusInternal(f); - } - - /** - * Checks if a given path exists in the filesystem. - * Calls getFileStatusInternal and has the same costs - * as the public facing exists call. - * This internal version of the exists call does not perform - * authorization checks, and is used internally by various filesystem - * operations that need to check if the parent/ancestor/path exist. - * The idea is to avoid having to configure authorization policies for - * these internal calls. - * @param f the path to a file or directory. - * @return true if path exists; otherwise false. - * @throws IOException if an IO error occurs while attempting to check - * for existence of the path. - * - */ - protected boolean existsInternal(Path f) throws IOException { - try { - this.getFileStatusInternal(f); - return true; - } catch (FileNotFoundException fnfe) { - return false; - } - } - - /** - * Inner implementation of {@link #getFileStatus(Path)}. - * Return a file status object that represents the path. - * @param f The path we want information from - * @return a FileStatus object - * @throws FileNotFoundException when the path does not exist - * @throws IOException Other failure - */ - private FileStatus getFileStatusInternal(Path f) throws FileNotFoundException, IOException { - - Path absolutePath = makeAbsolute(f); - String key = pathToKey(absolutePath); - if (key.length() == 0) { // root always exists - return new FileStatus( - 0, - true, - 1, - store.getHadoopBlockSize(), - 0, - 0, - FsPermission.getDefault(), "", "", - absolutePath.makeQualified(getUri(), getWorkingDirectory())); - } - - // The path is either a folder or a file. Retrieve metadata to - // determine if it is a directory or file. - FileMetadata meta = null; - try { - meta = store.retrieveMetadata(key); - } catch(Exception ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("%s is not found", key)); - } - - throw ex; - } - - if (meta != null) { - if (meta.isDirectory()) { - // The path is a folder with files in it. - // - - LOG.debug("Path {} is a folder.", f.toString()); - - // If a rename operation for the folder was pending, redo it. - // Then the file does not exist, so signal that. - if (conditionalRedoFolderRename(f)) { - throw new FileNotFoundException( - absolutePath + ": No such file or directory."); - } - - // Return reference to the directory object. - return updateFileStatusPath(meta, absolutePath); - } - - // The path is a file. - LOG.debug("Found the path: {} as a file.", f.toString()); - - // Return with reference to a file object. - return updateFileStatusPath(meta, absolutePath); - } - - // File not found. Throw exception no such file or directory. - // - throw new FileNotFoundException( - absolutePath + ": No such file or directory."); - } - - // Return true if there is a rename pending and we redo it, otherwise false. - private boolean conditionalRedoFolderRename(Path f) throws IOException { - - // Can't rename /, so return immediately in that case. - if (f.getName().equals("")) { - return false; - } - - // Check if there is a -RenamePending.json file for this folder, and if so, - // redo the rename. - Path absoluteRenamePendingFile = renamePendingFilePath(f); - if (existsInternal(absoluteRenamePendingFile)) { - FolderRenamePending pending = - new FolderRenamePending(absoluteRenamePendingFile, this); - pending.redo(); - return true; - } else { - return false; - } - } - - // Return the path name that would be used for rename of folder with path f. - private Path renamePendingFilePath(Path f) { - Path absPath = makeAbsolute(f); - String key = pathToKey(absPath); - key += "-RenamePending.json"; - return keyToPath(key); - } - - @Override - public URI getUri() { - return uri; - } - - /** - * Retrieve the status of a given path if it is a file, or of all the - * contained files if it is a directory. - */ - @Override - public FileStatus[] listStatus(Path f) throws FileNotFoundException, IOException { - - LOG.debug("Listing status for {}", f.toString()); - - Path absolutePath = makeAbsolute(f); - - performAuthCheck(absolutePath, WasbAuthorizationOperations.READ, "liststatus", absolutePath); - - String key = pathToKey(absolutePath); - - FileMetadata meta = null; - try { - meta = store.retrieveMetadata(key); - } catch (IOException ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("%s is not found", f)); - } - - throw ex; - } - - if (meta == null) { - // There is no metadata found for the path. - LOG.debug("Did not find any metadata for path: {}", key); - throw new FileNotFoundException(f + " is not found"); - } - - if (!meta.isDirectory()) { - LOG.debug("Found path as a file"); - return new FileStatus[] { updateFileStatusPath(meta, absolutePath) }; - } - - FileMetadata[] listing; - - listing = listWithErrorHandling(key, AZURE_LIST_ALL, 1); - - // NOTE: We don't check for Null condition as the Store API should return - // an empty list if there are not listing. - - // For any -RenamePending.json files in the listing, - // push the rename forward. - boolean renamed = conditionalRedoFolderRenames(listing); - - // If any renames were redone, get another listing, - // since the current one may have changed due to the redo. - if (renamed) { - listing = listWithErrorHandling(key, AZURE_LIST_ALL, 1); - } - - // We only need to check for AZURE_TEMP_FOLDER if the key is the root, - // and if it is not the root we also know the exact size of the array - // of FileStatus. - - FileMetadata[] result = null; - - if (key.equals("/")) { - ArrayList status = new ArrayList<>(listing.length); - - for (FileMetadata fileMetadata : listing) { - if (fileMetadata.isDirectory()) { - // Make sure we hide the temp upload folder - if (fileMetadata.getKey().equals(AZURE_TEMP_FOLDER)) { - // Don't expose that. - continue; - } - status.add(updateFileStatusPath(fileMetadata, fileMetadata.getPath())); - } else { - status.add(updateFileStatusPath(fileMetadata, fileMetadata.getPath())); - } - } - result = status.toArray(new FileMetadata[0]); - } else { - for (int i = 0; i < listing.length; i++) { - FileMetadata fileMetadata = listing[i]; - listing[i] = updateFileStatusPath(fileMetadata, fileMetadata.getPath()); - } - result = listing; - } - - LOG.debug("Found path as a directory with {}" - + " files in it.", result.length); - - return result; - } - - private FileMetadata[] listWithErrorHandling(String prefix, final int maxListingCount, - final int maxListingDepth) throws IOException { - try { - return store.list(prefix, maxListingCount, maxListingDepth); - } catch (IOException ex) { - Throwable innerException - = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException( - (StorageException) innerException)) { - throw new FileNotFoundException(String.format("%s is not found", prefix)); - } - throw ex; - } - } - - // Redo any folder renames needed if there are rename pending files in the - // directory listing. Return true if one or more redo operations were done. - private boolean conditionalRedoFolderRenames(FileMetadata[] listing) - throws IllegalArgumentException, IOException { - boolean renamed = false; - for (FileMetadata fileMetadata : listing) { - Path subpath = fileMetadata.getPath(); - if (isRenamePendingFile(subpath)) { - FolderRenamePending pending = - new FolderRenamePending(subpath, this); - pending.redo(); - renamed = true; - } - } - return renamed; - } - - // True if this is a folder rename pending file, else false. - private boolean isRenamePendingFile(Path path) { - return path.toString().endsWith(FolderRenamePending.SUFFIX); - } - - private FileMetadata updateFileStatusPath(FileMetadata meta, Path path) { - meta.setPath(path.makeQualified(getUri(), getWorkingDirectory())); - // reduce memory use by setting the internal-only key to null - meta.removeKey(); - return meta; - } - - private static enum UMaskApplyMode { - NewFile, - NewDirectory, - NewDirectoryNoUmask, - ChangeExistingFile, - ChangeExistingDirectory, - } - - /** - * Applies the applicable UMASK's on the given permission. - * - * @param permission - * The permission to mask. - * @param applyMode - * Whether to also apply the default umask. - * @return The masked persmission. - */ - private FsPermission applyUMask(final FsPermission permission, - final UMaskApplyMode applyMode) { - FsPermission newPermission = new FsPermission(permission); - // Apply the default umask - this applies for new files or directories. - if (applyMode == UMaskApplyMode.NewFile - || applyMode == UMaskApplyMode.NewDirectory) { - newPermission = newPermission - .applyUMask(FsPermission.getUMask(getConf())); - } - return newPermission; - } - - /** - * Creates the PermissionStatus object to use for the given permission, based - * on the current user in context. - * - * @param permission - * The permission for the file. - * @return The permission status object to use. - * @throws IOException - * If login fails in getCurrentUser - */ - @VisibleForTesting - PermissionStatus createPermissionStatus(FsPermission permission) - throws IOException { - // Create the permission status for this file based on current user - return new PermissionStatus( - UserGroupInformation.getCurrentUser().getShortUserName(), - getConf().get(AZURE_DEFAULT_GROUP_PROPERTY_NAME, - AZURE_DEFAULT_GROUP_DEFAULT), - permission); - } - - private Path getAncestor(Path f) throws IOException { - - for (Path current = f, parent = current.getParent(); - parent != null; // Stop when you get to the root - current = parent, parent = current.getParent()) { - - String currentKey = pathToKey(current); - FileMetadata currentMetadata = store.retrieveMetadata(currentKey); - if (currentMetadata != null && currentMetadata.isDirectory()) { - Path ancestor = currentMetadata.getPath(); - LOG.debug("Found ancestor {}, for path: {}", ancestor.toString(), f.toString()); - return ancestor; - } - } - - return new Path("/"); - } - - @Override - public boolean mkdirs(Path f, FsPermission permission) throws IOException { - return mkdirs(f, permission, false); - } - - public boolean mkdirs(Path f, FsPermission permission, boolean noUmask) throws IOException { - - LOG.debug("Creating directory: {}", f.toString()); - - if (containsColon(f)) { - throw new IOException("Cannot create directory " + f - + " through WASB that has colons in the name"); - } - - Path absolutePath = makeAbsolute(f); - Path ancestor = getAncestor(absolutePath); - - if (absolutePath.equals(ancestor)) { - return true; - } - - performAuthCheck(ancestor, WasbAuthorizationOperations.WRITE, "mkdirs", absolutePath); - - PermissionStatus permissionStatus = null; - if(noUmask) { - // ensure owner still has wx permissions at the minimum - permissionStatus = createPermissionStatus( - applyUMask(FsPermission.createImmutable((short) (permission.toShort() | USER_WX_PERMISION)), - UMaskApplyMode.NewDirectoryNoUmask)); - } else { - permissionStatus = createPermissionStatus( - applyUMask(permission, UMaskApplyMode.NewDirectory)); - } - - - ArrayList keysToCreateAsFolder = new ArrayList(); - // Check that there is no file in the parent chain of the given path. - for (Path current = absolutePath, parent = current.getParent(); - parent != null; // Stop when you get to the root - current = parent, parent = current.getParent()) { - String currentKey = pathToKey(current); - FileMetadata currentMetadata = store.retrieveMetadata(currentKey); - if (currentMetadata != null && !currentMetadata.isDirectory()) { - throw new FileAlreadyExistsException("Cannot create directory " + f + " because " - + current + " is an existing file."); - } else if (currentMetadata == null) { - keysToCreateAsFolder.add(currentKey); - } - } - - for (String currentKey : keysToCreateAsFolder) { - store.storeEmptyFolder(currentKey, permissionStatus); - } - - instrumentation.directoryCreated(); - - // otherwise throws exception - return true; - } - - @Override - public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundException, IOException { - return open(f, bufferSize, Optional.empty()); - } - - private FSDataInputStream open(Path f, int bufferSize, - Optional options) - throws FileNotFoundException, IOException { - - LOG.debug("Opening file: {}", f.toString()); - - Path absolutePath = makeAbsolute(f); - - performAuthCheck(absolutePath, WasbAuthorizationOperations.READ, "read", absolutePath); - - String key = pathToKey(absolutePath); - FileMetadata meta = null; - try { - meta = store.retrieveMetadata(key); - } catch(Exception ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("%s is not found", key)); - } - - throw ex; - } - - if (meta == null) { - throw new FileNotFoundException(f.toString()); - } - if (meta.isDirectory()) { - throw new FileNotFoundException(f.toString() - + " is a directory not a file."); - } - - InputStream inputStream; - try { - inputStream = store.retrieve(key, 0, options); - } catch(Exception ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("%s is not found", key)); - } - - throw ex; - } - - return new FSDataInputStream(new BufferedFSInputStream( - new NativeAzureFsInputStream(inputStream, key, meta.getLen()), bufferSize)); - } - - @Override - protected CompletableFuture openFileWithOptions(Path path, - OpenFileParameters parameters) throws IOException { - AbstractFSBuilderImpl.rejectUnknownMandatoryKeys( - parameters.getMandatoryKeys(), - FS_OPTION_OPENFILE_STANDARD_OPTIONS, - "for " + path); - return LambdaUtils.eval( - new CompletableFuture<>(), () -> - open(path, parameters.getBufferSize(), Optional.of(parameters.getOptions()))); - } - - @Override - public boolean rename(Path src, Path dst) throws FileNotFoundException, IOException { - - FolderRenamePending renamePending = null; - - LOG.debug("Moving {} to {}", src, dst); - - if (containsColon(dst)) { - throw new IOException("Cannot rename to file " + dst - + " through WASB that has colons in the name"); - } - - Path absoluteSrcPath = makeAbsolute(src); - Path srcParentFolder = absoluteSrcPath.getParent(); - - if (srcParentFolder == null) { - // Cannot rename root of file system - return false; - } - - String srcKey = pathToKey(absoluteSrcPath); - - if (srcKey.length() == 0) { - // Cannot rename root of file system - return false; - } - - performAuthCheck(srcParentFolder, WasbAuthorizationOperations.WRITE, "rename", - absoluteSrcPath); - - if (this.azureAuthorization) { - try { - performStickyBitCheckForRenameOperation(absoluteSrcPath, srcParentFolder); - } catch (FileNotFoundException ex) { - return false; - } catch (IOException ex) { - Throwable innerException = checkForAzureStorageException(ex); - if (innerException instanceof StorageException - && isFileNotFoundException((StorageException) innerException)) { - LOG.debug("Encountered FileNotFound Exception when performing sticky bit check " - + "on {}. Failing rename", srcKey); - return false; - } - throw ex; - } - } - - // Figure out the final destination - Path absoluteDstPath = makeAbsolute(dst); - Path dstParentFolder = absoluteDstPath.getParent(); - - String dstKey = pathToKey(absoluteDstPath); - FileMetadata dstMetadata = null; - try { - dstMetadata = store.retrieveMetadata(dstKey); - } catch (IOException ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - // A BlobNotFound storage exception in only thrown from retrieveMetadata API when - // there is a race condition. If there is another thread which deletes the destination - // file or folder, then this thread calling rename should be able to continue with - // rename gracefully. Hence the StorageException is swallowed here. - if (innerException instanceof StorageException) { - if (NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - LOG.debug("BlobNotFound exception encountered for Destination key : {}. " - + "Swallowing the exception to handle race condition gracefully", dstKey); - } - } else { - throw ex; - } - } - - if (dstMetadata != null && dstMetadata.isDirectory()) { - // It's an existing directory. - performAuthCheck(absoluteDstPath, WasbAuthorizationOperations.WRITE, "rename", - absoluteDstPath); - - dstKey = pathToKey(makeAbsolute(new Path(dst, src.getName()))); - LOG.debug("Destination {} " - + " is a directory, adjusted the destination to be {}", dst, dstKey); - } else if (dstMetadata != null) { - // Attempting to overwrite a file using rename() - LOG.debug("Destination {}" - + " is an already existing file, failing the rename.", dst); - return false; - } else { - // Check that the parent directory exists. - FileMetadata parentOfDestMetadata = null; - try { - parentOfDestMetadata = store.retrieveMetadata(pathToKey(absoluteDstPath.getParent())); - } catch (IOException ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - LOG.debug("Parent of destination {} doesn't exists. Failing rename", dst); - return false; - } - - throw ex; - } - - if (parentOfDestMetadata == null) { - LOG.debug("Parent of the destination {}" - + " doesn't exist, failing the rename.", dst); - return false; - } else if (!parentOfDestMetadata.isDirectory()) { - LOG.debug("Parent of the destination {}" - + " is a file, failing the rename.", dst); - return false; - } else { - performAuthCheck(dstParentFolder, WasbAuthorizationOperations.WRITE, - "rename", absoluteDstPath); - } - } - FileMetadata srcMetadata = null; - try { - srcMetadata = store.retrieveMetadata(srcKey); - } catch (IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - LOG.debug("Source {} doesn't exists. Failing rename", src); - return false; - } - - throw ex; - } - - if (srcMetadata == null) { - // Source doesn't exist - LOG.debug("Source {} doesn't exist, failing the rename.", src); - return false; - } else if (!srcMetadata.isDirectory()) { - LOG.debug("Source {} found as a file, renaming.", src); - try { - // HADOOP-15086 - file rename must ensure that the destination does - // not exist. The fix is targeted to this call only to avoid - // regressions. Other call sites are attempting to rename temporary - // files, redo a failed rename operation, or rename a directory - // recursively; for these cases the destination may exist. - store.rename(srcKey, dstKey, false, null, - false); - } catch(IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException) { - if (NativeAzureFileSystemHelper.isFileNotFoundException( - (StorageException) innerException)) { - LOG.debug("BlobNotFoundException encountered. Failing rename", src); - return false; - } - if (NativeAzureFileSystemHelper.isBlobAlreadyExistsConflict( - (StorageException) innerException)) { - LOG.debug("Destination BlobAlreadyExists. Failing rename", src); - return false; - } - } - - throw ex; - } - } else { - - // Prepare for, execute and clean up after of all files in folder, and - // the root file, and update the last modified time of the source and - // target parent folders. The operation can be redone if it fails part - // way through, by applying the "Rename Pending" file. - - // The following code (internally) only does atomic rename preparation - // and lease management for page blob folders, limiting the scope of the - // operation to HBase log file folders, where atomic rename is required. - // In the future, we could generalize it easily to all folders. - renamePending = prepareAtomicFolderRename(srcKey, dstKey); - renamePending.execute(); - - LOG.debug("Renamed {} to {} successfully.", src, dst); - renamePending.cleanup(); - return true; - } - - // Update the last-modified time of the parent folders of both source - // and destination. - updateParentFolderLastModifiedTime(srcKey); - updateParentFolderLastModifiedTime(dstKey); +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Progressable; - LOG.debug("Renamed {} to {} successfully.", src, dst); - return true; - } +/** + * A {@link FileSystem} for reading and writing files stored on Windows Azure. This implementation is + * blob-based and stores files on Azure in their native form so they can be read + * by other Azure tools. + */ +@InterfaceAudience.Public +@InterfaceStability.Stable +@Deprecated +public class NativeAzureFileSystem extends FileSystem { - /** - * Update the last-modified time of the parent folder of the file - * identified by key. - * @param key - * @throws IOException - */ - private void updateParentFolderLastModifiedTime(String key) - throws IOException { - Path parent = makeAbsolute(keyToPath(key)).getParent(); - if (parent != null && parent.getParent() != null) { // not root - String parentKey = pathToKey(parent); + private static final String SCHEME = "wasb"; + private static final String SECURE_SCHEME = "wasbs"; - // ensure the parent is a materialized folder - FileMetadata parentMetadata = store.retrieveMetadata(parentKey); - // The metadata could be null if the implicit folder only contains a - // single file. In this case, the parent folder no longer exists if the - // file is renamed; so we can safely ignore the null pointer case. - if (parentMetadata != null) { - if (parentMetadata.isDirectory() - && parentMetadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - store.storeEmptyFolder(parentKey, - createPermissionStatus(FsPermission.getDefault())); - } + public static final String WASB_INIT_ERROR_MESSAGE = + "WASB Driver using wasb(s) schema is no longer supported. " + + "Instead use ABFS Driver for FNS account by changing the scheme to abfs(s)." + + "For more details contact askabfs@microsoft.com"; - if (store.isAtomicRenameKey(parentKey)) { - SelfRenewingLease lease = null; - try { - lease = leaseSourceFolder(parentKey); - store.updateFolderLastModifiedTime(parentKey, lease); - } catch (AzureException e) { - String errorCode = ""; - try { - StorageException e2 = (StorageException) e.getCause(); - errorCode = e2.getErrorCode(); - } catch (Exception e3) { - // do nothing if cast fails - } - if (errorCode.equals("BlobNotFound")) { - throw new FileNotFoundException("Folder does not exist: " + parentKey); - } - LOG.warn("Got unexpected exception trying to get lease on {}. {}", - parentKey, e.getMessage()); - throw e; - } finally { - try { - if (lease != null) { - lease.free(); - } - } catch (Exception e) { - LOG.error("Unable to free lease on {}", parentKey, e); - } - } - } else { - store.updateFolderLastModifiedTime(parentKey, null); - } - } - } + public NativeAzureFileSystem() { } - /** - * If the source is a page blob folder, - * prepare to rename this folder atomically. This means to get exclusive - * access to the source folder, and record the actions to be performed for - * this rename in a "Rename Pending" file. This code was designed to - * meet the needs of HBase, which requires atomic rename of write-ahead log - * (WAL) folders for correctness. - * - * Before calling this method, the caller must ensure that the source is a - * folder. - * - * For non-page-blob directories, prepare the in-memory information needed, - * but don't take the lease or write the redo file. This is done to limit the - * scope of atomic folder rename to HBase, at least at the time of writing - * this code. - * - * @param srcKey Source folder name. - * @param dstKey Destination folder name. - * @throws IOException - */ - @VisibleForTesting - FolderRenamePending prepareAtomicFolderRename( - String srcKey, String dstKey) throws IOException { - - if (store.isAtomicRenameKey(srcKey)) { - - // Block unwanted concurrent access to source folder. - SelfRenewingLease lease = leaseSourceFolder(srcKey); - - // Prepare in-memory information needed to do or redo a folder rename. - FolderRenamePending renamePending = - new FolderRenamePending(srcKey, dstKey, lease, this); - - // Save it to persistent storage to help recover if the operation fails. - renamePending.writeFile(this); - return renamePending; - } else { - FolderRenamePending renamePending = - new FolderRenamePending(srcKey, dstKey, null, this); - return renamePending; + public static class Secure extends NativeAzureFileSystem { + @Override + public String getScheme() { + return SECURE_SCHEME; } } /** - * Get a self-renewing Azure blob lease on the source folder zero-byte file. - */ - private SelfRenewingLease leaseSourceFolder(String srcKey) - throws AzureException { - return store.acquireLease(srcKey); - } - - /** - * Performs sticky bit check on source folder for rename operation. + * Fails Any Attempt to use WASB FileSystem Implementation. * - * @param srcPath - path which is to be renamed. - * @param srcParentPath - parent to srcPath to check for stickybit check. - * @throws FileNotFoundException if srcPath or srcParentPath do not exist. - * @throws WasbAuthorizationException if stickybit check is violated. - * @throws IOException when retrieving metadata operation fails. - */ - private void performStickyBitCheckForRenameOperation(Path srcPath, - Path srcParentPath) - throws FileNotFoundException, WasbAuthorizationException, IOException { - - String srcKey = pathToKey(srcPath); - FileMetadata srcMetadata = null; - srcMetadata = store.retrieveMetadata(srcKey); - if (srcMetadata == null) { - LOG.debug("Source {} doesn't exist. Failing rename.", srcPath); - throw new FileNotFoundException( - String.format("%s does not exist.", srcPath)); - } - - String parentkey = pathToKey(srcParentPath); - FileMetadata parentMetadata = store.retrieveMetadata(parentkey); - if (parentMetadata == null) { - LOG.debug("Path {} doesn't exist, failing rename.", srcParentPath); - throw new FileNotFoundException( - String.format("%s does not exist.", parentkey)); - } - - if (isStickyBitCheckViolated(srcMetadata, parentMetadata)) { - throw new WasbAuthorizationException( - String.format("Rename operation for %s is not permitted." - + " Details : Stickybit check failed.", srcPath)); - } - } - - /** - * Set the working directory to the given directory. + * @param uri the URI of the file system + * @param conf the configuration + * @throws IOException on IO problems + * @throws UnsupportedOperationException if the URI is invalid */ @Override - public void setWorkingDirectory(Path newDir) { - workingDir = makeAbsolute(newDir); + public void initialize(URI uri, Configuration conf) + throws IOException, UnsupportedOperationException { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } @Override - public Path getWorkingDirectory() { - return workingDir; + public String getScheme() { + return SCHEME; } @Override - public void setPermission(Path p, FsPermission permission) throws FileNotFoundException, IOException { - Path absolutePath = makeAbsolute(p); - - String key = pathToKey(absolutePath); - FileMetadata metadata = null; - try { - metadata = store.retrieveMetadata(key); - } catch (IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("File %s doesn't exists.", p)); - } - - throw ex; - } - - if (metadata == null) { - throw new FileNotFoundException("File doesn't exist: " + p); - } - - // If authorization is enabled, check if the user is - // part of chmod allowed list or a daemon user or owner of the file/folder - if (azureAuthorization) { - UserGroupInformation currentUgi = UserGroupInformation.getCurrentUser(); - - // Check if the user is part of chown allowed list or a daemon user. - if (!isAllowedUser(currentUgi.getShortUserName(), chmodAllowedUsers) - && !isAllowedUser(currentUgi.getShortUserName(), daemonUsers)) { - - //Check if the user is the owner of the file. - String owner = metadata.getOwner(); - if (!currentUgi.getShortUserName().equals(owner)) { - throw new WasbAuthorizationException( - String.format("user '%s' does not have the privilege to " - + "change the permission of files/folders.", - currentUgi.getShortUserName())); - } - } - } - - permission = applyUMask(permission, - metadata.isDirectory() ? UMaskApplyMode.ChangeExistingDirectory - : UMaskApplyMode.ChangeExistingFile); - if (metadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - // It's an implicit folder, need to materialize it. - store.storeEmptyFolder(key, createPermissionStatus(permission)); - } else if (!metadata.getPermission(). - equals(permission)) { - store.changePermissionStatus(key, new PermissionStatus( - metadata.getOwner(), - metadata.getGroup(), - permission)); - } + public URI getUri() { + return null; } @Override - public void setOwner(Path p, String username, String groupname) + public FSDataInputStream open(final Path path, final int i) throws IOException { - Path absolutePath = makeAbsolute(p); - - String key = pathToKey(absolutePath); - FileMetadata metadata = null; - - try { - metadata = store.retrieveMetadata(key); - } catch (IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - - throw new FileNotFoundException(String.format("File %s doesn't exists.", p)); - } - - throw ex; - } - - if (metadata == null) { - throw new FileNotFoundException("File doesn't exist: " + p); - } - - /* If authorization is enabled, check if the user has privileges - * to change the ownership of file/folder - */ - if (this.azureAuthorization && username != null) { - UserGroupInformation currentUgi = UserGroupInformation.getCurrentUser(); - - if (!isAllowedUser(currentUgi.getShortUserName(), - chownAllowedUsers)) { - throw new WasbAuthorizationException( - String.format("user '%s' does not have the privilege to change " - + "the ownership of files/folders.", - currentUgi.getShortUserName())); - } - } - - PermissionStatus newPermissionStatus = new PermissionStatus( - username == null ? - metadata.getOwner() : username, - groupname == null ? - metadata.getGroup() : groupname, - metadata.getPermission()); - if (metadata.getBlobMaterialization() == BlobMaterialization.Implicit) { - // It's an implicit folder, need to materialize it. - store.storeEmptyFolder(key, newPermissionStatus); - } else { - store.changePermissionStatus(key, newPermissionStatus); - } + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } - /** - * Set the value of an attribute for a path. - * - * @param path The path on which to set the attribute - * @param xAttrName The attribute to set - * @param value The byte value of the attribute to set (encoded in utf-8) - * @param flag The mode in which to set the attribute - * @throws IOException If there was an issue setting the attribute on Azure - */ @Override - public void setXAttr(Path path, String xAttrName, byte[] value, EnumSet flag) throws IOException { - Path absolutePath = makeAbsolute(path); - performAuthCheck(absolutePath, WasbAuthorizationOperations.WRITE, "setXAttr", absolutePath); - - String key = pathToKey(absolutePath); - FileMetadata metadata; - try { - metadata = store.retrieveMetadata(key); - } catch (IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException("File " + path + " doesn't exists."); - } - throw ex; - } - - if (metadata == null) { - throw new FileNotFoundException("File doesn't exist: " + path); - } - - boolean xAttrExists = store.retrieveAttribute(key, xAttrName) != null; - XAttrSetFlag.validate(xAttrName, xAttrExists, flag); - store.storeAttribute(key, xAttrName, value); + public FSDataOutputStream create(final Path path, + final FsPermission fsPermission, + final boolean b, + final int i, + final short i1, + final long l, + final Progressable progressable) throws IOException { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } - /** - * Get the value of an attribute for a path. - * - * @param path The path on which to get the attribute - * @param xAttrName The attribute to get - * @return The bytes of the attribute's value (encoded in utf-8) - * or null if the attribute does not exist - * @throws IOException If there was an issue getting the attribute from Azure - */ @Override - public byte[] getXAttr(Path path, String xAttrName) throws IOException { - Path absolutePath = makeAbsolute(path); - performAuthCheck(absolutePath, WasbAuthorizationOperations.READ, "getXAttr", absolutePath); - - String key = pathToKey(absolutePath); - FileMetadata metadata; - try { - metadata = store.retrieveMetadata(key); - } catch (IOException ex) { - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - if (innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) { - throw new FileNotFoundException("File " + path + " doesn't exists."); - } - throw ex; - } - - if (metadata == null) { - throw new FileNotFoundException("File doesn't exist: " + path); - } - - return store.retrieveAttribute(key, xAttrName); - } - - /** - * Is the user allowed? - *
    - *
  1. No user: false
  2. - *
  3. Empty list: false
  4. - *
  5. List == ["*"]: true
  6. - *
  7. Otherwise: is the user in the list?
  8. - *
- * @param username user to check; may be null - * @param userList list of users; may be null or empty - * @return - * @throws IllegalArgumentException if the userList is invalid. - */ - private boolean isAllowedUser(String username, List userList) { - - if (null == userList || userList.isEmpty()) { - return false; - } - - boolean shouldSkipUserCheck = userList.size() == 1 - && userList.get(0).equals("*"); - - // skip the check if the allowed users config value is set as '*' - if (!shouldSkipUserCheck) { - Preconditions.checkArgument(!userList.contains("*"), - "User list must contain either '*' or a list of user names," - + " but not both."); - return userList.contains(username); - } - return true; + public FSDataOutputStream append(final Path path, + final int i, + final Progressable progressable) + throws IOException { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } @Override - public synchronized void close() throws IOException { - if (isClosed) { - return; - } - - // Call the base close() to close any resources there. - super.close(); - // Close the store to close any resources there - e.g. the bandwidth - // updater thread would be stopped at this time. - store.close(); - // Notify the metrics system that this file system is closed, which may - // trigger one final metrics push to get the accurate final file system - // metrics out. - - long startTime = System.currentTimeMillis(); - - if(!getConf().getBoolean(SKIP_AZURE_METRICS_PROPERTY_NAME, false)) { - AzureFileSystemMetricsSystem.unregisterSource(metricsSourceName); - AzureFileSystemMetricsSystem.fileSystemClosed(); - } - - LOG.debug("Submitting metrics when file system closed took {} ms.", - (System.currentTimeMillis() - startTime)); - isClosed = true; + public boolean rename(final Path path, final Path path1) throws IOException { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } - /** - * Get a delegation token from remote service endpoint if - * 'fs.azure.enable.kerberos.support' is set to 'true'. - * @param renewer the account name that is allowed to renew the token. - * @return delegation token - * @throws IOException thrown when getting the current user. - */ @Override - public synchronized Token getDelegationToken(final String renewer) throws IOException { - if (kerberosSupportEnabled) { - return wasbDelegationTokenManager.getDelegationToken(renewer); - } else { - return super.getDelegationToken(renewer); - } + public boolean delete(final Path path, final boolean b) throws IOException { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } @Override - public void access(Path path, FsAction mode) throws IOException { - if (azureAuthorization && authorizer != null) { - try { - // Required to check the existence of the path. - getFileStatus(path); - switch (mode) { - case READ: - case READ_EXECUTE: - performAuthCheck(path, WasbAuthorizationOperations.READ, "access", path); - break; - case WRITE: - case WRITE_EXECUTE: - performAuthCheck(path, WasbAuthorizationOperations.WRITE, "access", - path); - break; - case READ_WRITE: - case ALL: - performAuthCheck(path, WasbAuthorizationOperations.READ, "access", path); - performAuthCheck(path, WasbAuthorizationOperations.WRITE, "access", - path); - break; - case EXECUTE: - case NONE: - default: - break; - } - } catch (WasbAuthorizationException wae){ - throw new AccessControlException(wae); - } - } else { - super.access(path, mode); - } - } - - /** - * A handler that defines what to do with blobs whose upload was - * interrupted. - */ - private abstract class DanglingFileHandler { - abstract void handleFile(FileMetadata file, FileMetadata tempFile) - throws IOException; - } - - /** - * Handler implementation for just deleting dangling files and cleaning - * them up. - */ - private class DanglingFileDeleter extends DanglingFileHandler { - @Override - void handleFile(FileMetadata file, FileMetadata tempFile) - throws IOException { - - LOG.debug("Deleting dangling file {}", file.getKey()); - // Not handling delete return type as false return essentially - // means its a no-op for the caller - store.delete(file.getKey()); - store.delete(tempFile.getKey()); - } - } - - /** - * Handler implementation for just moving dangling files to recovery - * location (/lost+found). - */ - private class DanglingFileRecoverer extends DanglingFileHandler { - private final Path destination; - - DanglingFileRecoverer(Path destination) { - this.destination = destination; - } - - @Override - void handleFile(FileMetadata file, FileMetadata tempFile) - throws IOException { - - LOG.debug("Recovering {}", file.getKey()); - // Move to the final destination - String finalDestinationKey = - pathToKey(new Path(destination, file.getKey())); - store.rename(tempFile.getKey(), finalDestinationKey); - if (!finalDestinationKey.equals(file.getKey())) { - // Delete the empty link file now that we've restored it. - store.delete(file.getKey()); - } - } - } - - /** - * Check if a path has colons in its name - */ - private boolean containsColon(Path p) { - return p.toUri().getPath().toString().contains(":"); - } - - /** - * Implements recover and delete (-move and -delete) behaviors for handling - * dangling files (blobs whose upload was interrupted). - * - * @param root - * The root path to check from. - * @param handler - * The handler that deals with dangling files. - */ - private void handleFilesWithDanglingTempData(Path root, - DanglingFileHandler handler) throws IOException { - // Calculate the cut-off for when to consider a blob to be dangling. - long cutoffForDangling = new Date().getTime() - - getConf().getInt(AZURE_TEMP_EXPIRY_PROPERTY_NAME, - AZURE_TEMP_EXPIRY_DEFAULT) * 1000; - // Go over all the blobs under the given root and look for blobs to - // recover. - FileMetadata[] listing = store.list(pathToKey(root), AZURE_LIST_ALL, - AZURE_UNBOUNDED_DEPTH); - - for (FileMetadata file : listing) { - if (!file.isDirectory()) { // We don't recover directory blobs - // See if this blob has a link in it (meaning it's a place-holder - // blob for when the upload to the temp blob is complete). - String link = store.getLinkInFileMetadata(file.getKey()); - if (link != null) { - // It has a link, see if the temp blob it is pointing to is - // existent and old enough to be considered dangling. - FileMetadata linkMetadata = store.retrieveMetadata(link); - if (linkMetadata != null - && linkMetadata.getModificationTime() >= cutoffForDangling) { - // Found one! - handler.handleFile(file, linkMetadata); - } - } - } - } - } - - /** - * Looks under the given root path for any blob that are left "dangling", - * meaning that they are place-holder blobs that we created while we upload - * the data to a temporary blob, but for some reason we crashed in the middle - * of the upload and left them there. If any are found, we move them to the - * destination given. - * - * @param root - * The root path to consider. - * @param destination - * The destination path to move any recovered files to. - * @throws IOException Thrown when fail to recover files. - */ - public void recoverFilesWithDanglingTempData(Path root, Path destination) + public FileStatus[] listStatus(final Path path) throws IOException { - - LOG.debug("Recovering files with dangling temp data in {}", root); - handleFilesWithDanglingTempData(root, - new DanglingFileRecoverer(destination)); - } - - /** - * Looks under the given root path for any blob that are left "dangling", - * meaning that they are place-holder blobs that we created while we upload - * the data to a temporary blob, but for some reason we crashed in the middle - * of the upload and left them there. If any are found, we delete them. - * - * @param root - * The root path to consider. - * @throws IOException Thrown when fail to delete. - */ - public void deleteFilesWithDanglingTempData(Path root) throws IOException { - - LOG.debug("Deleting files with dangling temp data in {}", root); - handleFilesWithDanglingTempData(root, new DanglingFileDeleter()); + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } @Override - protected void finalize() throws Throwable { - LOG.debug("finalize() called."); - close(); - super.finalize(); - } - - /** - * Encode the key with a random prefix for load balancing in Azure storage. - * Upload data to a random temporary file then do storage side renaming to - * recover the original key. - * - * @param aKey a key to be encoded. - * @return Encoded version of the original key. - */ - private static String encodeKey(String aKey) { - // Get the tail end of the key name. - // - String fileName = aKey.substring(aKey.lastIndexOf(Path.SEPARATOR) + 1, - aKey.length()); - - // Construct the randomized prefix of the file name. The prefix ensures the - // file always drops into the same folder but with a varying tail key name. - String filePrefix = AZURE_TEMP_FOLDER + Path.SEPARATOR - + UUID.randomUUID().toString(); - - // Concatenate the randomized prefix with the tail of the key name. - String randomizedKey = filePrefix + fileName; - - // Return to the caller with the randomized key. - return randomizedKey; - } - - /* - * Helper method to retrieve owner information for a given path. - * The method returns empty string in case the file is not found or the metadata does not contain owner information - */ - @VisibleForTesting - public String getOwnerForPath(Path absolutePath) throws IOException { - String owner = ""; - FileMetadata meta = null; - String key = pathToKey(absolutePath); - try { - - meta = store.retrieveMetadata(key); - - if (meta != null) { - owner = meta.getOwner(); - LOG.debug("Retrieved '{}' as owner for path - {}", owner, absolutePath); - } else { - // meta will be null if file/folder doen not exist - LOG.debug("Cannot find file/folder - '{}'. Returning owner as empty string", absolutePath); - } - } catch(IOException ex) { - - Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex); - boolean isfileNotFoundException = innerException instanceof StorageException - && NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException); - - // should not throw when the exception is related to blob/container/file/folder not found - if (!isfileNotFoundException) { - String errorMsg = "Could not retrieve owner information for path - " + absolutePath; - LOG.error(errorMsg); - throw new IOException(errorMsg, ex); - } - } - return owner; - } - - /** - * Helper method to update the chownAllowedUsers in tests. - * @param chownAllowedUsers list of chown allowed users - */ - @VisibleForTesting - void updateChownAllowedUsers(List chownAllowedUsers) { - this.chownAllowedUsers = chownAllowedUsers; - } - - /** - * Helper method to update the chmodAllowedUsers in tests. - * @param chmodAllowedUsers list of chmod allowed users - */ - @VisibleForTesting - void updateChmodAllowedUsers(List chmodAllowedUsers) { - this.chmodAllowedUsers = chmodAllowedUsers; + public void setWorkingDirectory(final Path path) { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } - /** - * Helper method to update the daemonUsers in tests. - * @param daemonUsers list of daemon users - */ - @VisibleForTesting - void updateDaemonUsers(List daemonUsers) { - this.daemonUsers = daemonUsers; + @Override + public Path getWorkingDirectory() { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } @Override - public boolean hasPathCapability(final Path path, final String capability) + public boolean mkdirs(final Path path, final FsPermission fsPermission) throws IOException { - switch (validatePathCapabilityArgs(path, capability)) { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); + } - case CommonPathCapabilities.FS_PERMISSIONS: - return true; - // Append support is dynamic - case CommonPathCapabilities.FS_APPEND: - return appendSupportEnabled; - default: - return super.hasPathCapability(path, capability); - } + @Override + public FileStatus getFileStatus(final Path path) throws IOException { + throw new UnsupportedOperationException(WASB_INIT_ERROR_MESSAGE); } } diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java deleted file mode 100644 index 75bc99b2cf8d8..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemHelper.java +++ /dev/null @@ -1,155 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.EOFException; -import java.io.IOException; -import java.net.HttpURLConnection; -import java.util.Map; - -import org.apache.hadoop.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.microsoft.azure.storage.StorageErrorCode; -import com.microsoft.azure.storage.StorageErrorCodeStrings; -import com.microsoft.azure.storage.StorageException; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.FSExceptionMessages; - -/** - * Utility class that has helper methods. - * - */ - -@InterfaceAudience.Private -final class NativeAzureFileSystemHelper { - - private NativeAzureFileSystemHelper() { - // Hiding the cosnstructor as this is a utility class. - } - - private static final Logger LOG = LoggerFactory.getLogger(NativeAzureFileSystemHelper.class); - - public static void cleanup(Logger log, java.io.Closeable closeable) { - if (closeable != null) { - try { - closeable.close(); - } catch(IOException e) { - if (log != null) { - log.debug("Exception in closing {}", closeable, e); - } - } - } - } - - /* - * Helper method to recursively check if the cause of the exception is - * a Azure storage exception. - */ - public static Throwable checkForAzureStorageException(Exception e) { - - Throwable innerException = e.getCause(); - - while (innerException != null - && !(innerException instanceof StorageException)) { - - innerException = innerException.getCause(); - } - - return innerException; - } - - /* - * Helper method to check if the AzureStorageException is - * because backing blob was not found. - */ - public static boolean isFileNotFoundException(StorageException e) { - - String errorCode = e.getErrorCode(); - if (errorCode != null - && (errorCode.equals(StorageErrorCodeStrings.BLOB_NOT_FOUND) - || errorCode.equals(StorageErrorCodeStrings.RESOURCE_NOT_FOUND) - || errorCode.equals(StorageErrorCodeStrings.CONTAINER_NOT_FOUND) - || errorCode.equals(StorageErrorCode.BLOB_NOT_FOUND.toString()) - || errorCode.equals(StorageErrorCode.RESOURCE_NOT_FOUND.toString()) - || errorCode.equals(StorageErrorCode.CONTAINER_NOT_FOUND.toString()))) { - - return true; - } - - return false; - } - - /* - * Determines if a conditional request failed because the blob already - * exists. - * - * @param e - the storage exception thrown by the failed operation. - * - * @return true if a conditional request failed because the blob already - * exists; otherwise, returns false. - */ - static boolean isBlobAlreadyExistsConflict(StorageException e) { - if (e.getHttpStatusCode() == HttpURLConnection.HTTP_CONFLICT - && StorageErrorCodeStrings.BLOB_ALREADY_EXISTS.equals(e.getErrorCode())) { - return true; - } - return false; - } - - /* - * Helper method that logs stack traces from all live threads. - */ - public static void logAllLiveStackTraces() { - - for (Map.Entry entry : Thread.getAllStackTraces().entrySet()) { - LOG.debug("Thread " + entry.getKey().getName()); - StackTraceElement[] trace = entry.getValue(); - for (int j = 0; j < trace.length; j++) { - LOG.debug("\tat " + trace[j]); - } - } - } - - /** - * Validation code, based on - * {@code FSInputStream.validatePositionedReadArgs()}. - * @param buffer destination buffer - * @param offset offset within the buffer - * @param length length of bytes to read - * @throws EOFException if the position is negative - * @throws IndexOutOfBoundsException if there isn't space for the amount of - * data requested. - * @throws IllegalArgumentException other arguments are invalid. - */ - static void validateReadArgs(byte[] buffer, int offset, int length) - throws EOFException { - Preconditions.checkArgument(length >= 0, "length is negative"); - Preconditions.checkArgument(buffer != null, "Null buffer"); - if (buffer.length - offset < length) { - throw new IndexOutOfBoundsException( - FSExceptionMessages.TOO_MANY_BYTES_FOR_DEST_BUFFER - + ": request length=" + length - + ", with offset =" + offset - + "; buffer capacity =" + (buffer.length - offset)); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java deleted file mode 100644 index 3ea1687f000ef..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java +++ /dev/null @@ -1,148 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.net.URI; -import java.util.Date; -import java.util.Optional; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; -import org.apache.hadoop.fs.permission.PermissionStatus; - -import org.apache.hadoop.classification.VisibleForTesting; - -/** - *

- * An abstraction for a key-based {@link File} store. - *

- */ -@InterfaceAudience.Private -interface NativeFileSystemStore { - - void initialize(URI uri, Configuration conf, AzureFileSystemInstrumentation instrumentation) throws IOException; - - void storeEmptyFolder(String key, PermissionStatus permissionStatus) - throws AzureException; - - FileMetadata retrieveMetadata(String key) throws IOException; - - InputStream retrieve(String key) throws IOException; - - InputStream retrieve(String key, long byteRangeStart) throws IOException; - - InputStream retrieve(String key, long byteRangeStart, - Optional options) throws IOException; - - DataOutputStream storefile(String keyEncoded, - PermissionStatus permissionStatus, - String key) throws AzureException; - - boolean isPageBlobKey(String key); - - boolean isAtomicRenameKey(String key); - - /** - * Returns the file block size. This is a fake value used for integration - * of the Azure store with Hadoop. - * @return The file block size. - */ - long getHadoopBlockSize(); - - void storeEmptyLinkFile(String key, String tempBlobKey, - PermissionStatus permissionStatus) throws AzureException; - - String getLinkInFileMetadata(String key) throws AzureException; - - FileMetadata[] list(String prefix, final int maxListingCount, - final int maxListingDepth) throws IOException; - - void changePermissionStatus(String key, PermissionStatus newPermission) - throws AzureException; - - byte[] retrieveAttribute(String key, String attribute) throws IOException; - - void storeAttribute(String key, String attribute, byte[] value) throws IOException; - - /** - * API to delete a blob in the back end azure storage. - * @param key - key to the blob being deleted. - * @return return true when delete is successful, false if - * blob cannot be found or delete is not possible without - * exception. - * @throws IOException Exception encountered while deleting in - * azure storage. - */ - boolean delete(String key) throws IOException; - - void rename(String srcKey, String dstKey) throws IOException; - - void rename(String srcKey, String dstKey, boolean acquireLease, SelfRenewingLease existingLease) - throws IOException; - - void rename(String srcKey, String dstKey, boolean acquireLease, - SelfRenewingLease existingLease, boolean overwriteDestination) - throws IOException; - - /** - * Delete all keys with the given prefix. Used for testing. - * - * @param prefix prefix of objects to be deleted. - * @throws IOException Exception encountered while deleting keys. - */ - @VisibleForTesting - void purge(String prefix) throws IOException; - - /** - * Diagnostic method to dump state to the console. - * - * @throws IOException Exception encountered while dumping to console. - */ - void dump() throws IOException; - - void close(); - - void updateFolderLastModifiedTime(String key, SelfRenewingLease folderLease) - throws AzureException; - - void updateFolderLastModifiedTime(String key, Date lastModified, - SelfRenewingLease folderLease) throws AzureException; - - /** - * API to delete a blob in the back end azure storage. - * @param key - key to the blob being deleted. - * @param lease - Active lease on the blob. - * @return return true when delete is successful, false if - * blob cannot be found or delete is not possible without - * exception. - * @throws IOException Exception encountered while deleting in - * azure storage. - */ - boolean delete(String key, SelfRenewingLease lease) throws IOException; - - SelfRenewingLease acquireLease(String key) throws AzureException; - - DataOutputStream retrieveAppendStream(String key, int bufferSize) throws IOException; - - boolean explicitFileExists(String key) throws AzureException; -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobFormatHelpers.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobFormatHelpers.java deleted file mode 100644 index 9a316a51bd26e..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobFormatHelpers.java +++ /dev/null @@ -1,58 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.nio.ByteBuffer; - -import com.microsoft.azure.storage.blob.BlobRequestOptions; - -/** - * Constants and helper methods for ASV's custom data format in page blobs. - */ -final class PageBlobFormatHelpers { - public static final short PAGE_SIZE = 512; - public static final short PAGE_HEADER_SIZE = 2; - public static final short PAGE_DATA_SIZE = PAGE_SIZE - PAGE_HEADER_SIZE; - - // Hide constructor for utility class. - private PageBlobFormatHelpers() { - - } - - /** - * Stores the given short as a two-byte array. - */ - public static byte[] fromShort(short s) { - return ByteBuffer.allocate(2).putShort(s).array(); - } - - /** - * Retrieves a short from the given two bytes. - */ - public static short toShort(byte firstByte, byte secondByte) { - return ByteBuffer.wrap(new byte[] { firstByte, secondByte }) - .getShort(); - } - - public static BlobRequestOptions withMD5Checking() { - BlobRequestOptions options = new BlobRequestOptions(); - options.setUseTransactionalContentMD5(true); - return options; - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobInputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobInputStream.java deleted file mode 100644 index 0d8936582e9d3..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobInputStream.java +++ /dev/null @@ -1,506 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.PAGE_DATA_SIZE; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.PAGE_HEADER_SIZE; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.PAGE_SIZE; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.toShort; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.withMD5Checking; - -import java.io.ByteArrayOutputStream; -import java.io.EOFException; -import java.io.IOException; -import java.io.InputStream; -import java.util.ArrayList; - -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.azure.StorageInterface.CloudPageBlobWrapper; - -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.PageRange; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * An input stream that reads file data from a page blob stored - * using ASV's custom format. - */ - -final class PageBlobInputStream extends InputStream { - private static final Logger LOG = LoggerFactory.getLogger(PageBlobInputStream.class); - - // The blob we're reading from. - private final CloudPageBlobWrapper blob; - // The operation context to use for storage requests. - private final OperationContext opContext; - // The number of pages remaining to be read from the server. - private long numberOfPagesRemaining; - // The current byte offset to start reading from the server next, - // equivalent to (total number of pages we've read) * (page size). - private long currentOffsetInBlob; - // The buffer holding the current data we last read from the server. - private byte[] currentBuffer; - // The current byte offset we're at in the buffer. - private int currentBufferOffset; - // The current buffer length - private int currentBufferLength; - // Maximum number of pages to get per any one request. - private static final int MAX_PAGES_PER_DOWNLOAD = - 4 * 1024 * 1024 / PAGE_SIZE; - // Whether the stream has been closed. - private boolean closed = false; - // Total stream size, or -1 if not initialized. - long pageBlobSize = -1; - // Current position in stream of valid data. - long filePosition = 0; - - /** - * Helper method to extract the actual data size of a page blob. - * This typically involves 2 service requests (one for page ranges, another - * for the last page's data). - * - * @param blob The blob to get the size from. - * @param opContext The operation context to use for the requests. - * @return The total data size of the blob in bytes. - * @throws IOException If the format is corrupt. - * @throws StorageException If anything goes wrong in the requests. - */ - public static long getPageBlobDataSize(CloudPageBlobWrapper blob, - OperationContext opContext) throws IOException, StorageException { - // Get the page ranges for the blob. There should be one range starting - // at byte 0, but we tolerate (and ignore) ranges after the first one. - ArrayList pageRanges = - blob.downloadPageRanges(new BlobRequestOptions(), opContext); - if (pageRanges.size() == 0) { - return 0; - } - if (pageRanges.get(0).getStartOffset() != 0) { - // Not expected: we always upload our page blobs as a contiguous range - // starting at byte 0. - throw badStartRangeException(blob, pageRanges.get(0)); - } - long totalRawBlobSize = pageRanges.get(0).getEndOffset() + 1; - - // Get the last page. - long lastPageStart = totalRawBlobSize - PAGE_SIZE; - ByteArrayOutputStream baos = - new ByteArrayOutputStream(PageBlobFormatHelpers.PAGE_SIZE); - blob.downloadRange(lastPageStart, PAGE_SIZE, baos, - new BlobRequestOptions(), opContext); - - byte[] lastPage = baos.toByteArray(); - short lastPageSize = getPageSize(blob, lastPage, 0); - long totalNumberOfPages = totalRawBlobSize / PAGE_SIZE; - return (totalNumberOfPages - 1) * PAGE_DATA_SIZE + lastPageSize; - } - - /** - * Constructs a stream over the given page blob. - */ - public PageBlobInputStream(CloudPageBlobWrapper blob, - OperationContext opContext) - throws IOException { - this.blob = blob; - this.opContext = opContext; - ArrayList allRanges; - try { - allRanges = - blob.downloadPageRanges(new BlobRequestOptions(), opContext); - } catch (StorageException e) { - throw new IOException(e); - } - if (allRanges.size() > 0) { - if (allRanges.get(0).getStartOffset() != 0) { - throw badStartRangeException(blob, allRanges.get(0)); - } - if (allRanges.size() > 1) { - LOG.warn(String.format( - "Blob %s has %d page ranges beyond the first range. " - + "Only reading the first range.", - blob.getUri(), allRanges.size() - 1)); - } - numberOfPagesRemaining = - (allRanges.get(0).getEndOffset() + 1) / PAGE_SIZE; - } else { - numberOfPagesRemaining = 0; - } - } - - /** Return the size of the remaining available bytes - * if the size is less than or equal to {@link Integer#MAX_VALUE}, - * otherwise, return {@link Integer#MAX_VALUE}. - * - * This is to match the behavior of DFSInputStream.available(), - * which some clients may rely on (HBase write-ahead log reading in - * particular). - */ - @Override - public synchronized int available() throws IOException { - if (closed) { - throw new IOException("Stream closed"); - } - if (pageBlobSize == -1) { - try { - pageBlobSize = getPageBlobDataSize(blob, opContext); - } catch (StorageException e) { - throw new IOException("Unable to get page blob size.", e); - } - } - - final long remaining = pageBlobSize - filePosition; - return remaining <= Integer.MAX_VALUE ? - (int) remaining : Integer.MAX_VALUE; - } - - @Override - public synchronized void close() throws IOException { - closed = true; - } - - private boolean dataAvailableInBuffer() { - return currentBuffer != null - && currentBufferOffset < currentBufferLength; - } - - /** - * Check our buffer and download more from the server if needed. - * If data is not available in the buffer, method downloads maximum - * page blob download size (4MB) or if there is less then 4MB left, - * all remaining pages. - * If we are on the last page, method will return true even if - * we reached the end of stream. - * @return true if there's more data in the buffer, false if buffer is empty - * and we reached the end of the blob. - * @throws IOException - */ - private synchronized boolean ensureDataInBuffer() throws IOException { - if (dataAvailableInBuffer()) { - // We still have some data in our buffer. - return true; - } - currentBuffer = null; - currentBufferOffset = 0; - currentBufferLength = 0; - if (numberOfPagesRemaining == 0) { - // No more data to read. - return false; - } - final long pagesToRead = Math.min(MAX_PAGES_PER_DOWNLOAD, - numberOfPagesRemaining); - final int bufferSize = (int) (pagesToRead * PAGE_SIZE); - - // Download page to current buffer. - try { - // Create a byte array output stream to capture the results of the - // download. - ByteArrayOutputStream baos = new ByteArrayOutputStream(bufferSize); - blob.downloadRange(currentOffsetInBlob, bufferSize, baos, - withMD5Checking(), opContext); - validateDataIntegrity(baos.toByteArray()); - } catch (StorageException e) { - throw new IOException(e); - } - numberOfPagesRemaining -= pagesToRead; - currentOffsetInBlob += bufferSize; - - return true; - } - - private void validateDataIntegrity(byte[] buffer) - throws IOException { - - if (buffer.length % PAGE_SIZE != 0) { - throw new AssertionError("Unexpected buffer size: " - + buffer.length); - } - - int bufferLength = 0; - int numberOfPages = buffer.length / PAGE_SIZE; - long totalPagesAfterCurrent = numberOfPagesRemaining; - - for (int page = 0; page < numberOfPages; page++) { - // Calculate the number of pages that exist in the blob after this one - totalPagesAfterCurrent--; - - short currentPageSize = getPageSize(blob, buffer, page * PAGE_SIZE); - - // Only the last page can be partially filled. - if (currentPageSize < PAGE_DATA_SIZE - && totalPagesAfterCurrent > 0) { - throw fileCorruptException(blob, String.format( - "Page with partial data found in the middle (%d pages from the" - + " end) that only has %d bytes of data.", - totalPagesAfterCurrent, currentPageSize)); - } - bufferLength += currentPageSize + PAGE_HEADER_SIZE; - } - - currentBufferOffset = PAGE_HEADER_SIZE; - currentBufferLength = bufferLength; - currentBuffer = buffer; - } - - // Reads the page size from the page header at the given offset. - private static short getPageSize(CloudPageBlobWrapper blob, - byte[] data, int offset) throws IOException { - short pageSize = toShort(data[offset], data[offset + 1]); - if (pageSize < 0 || pageSize > PAGE_DATA_SIZE) { - throw fileCorruptException(blob, String.format( - "Unexpected page size in the header: %d.", - pageSize)); - } - return pageSize; - } - - @Override - public synchronized int read(byte[] outputBuffer, int offset, int len) - throws IOException { - // If len is zero return 0 per the InputStream contract - if (len == 0) { - return 0; - } - - int numberOfBytesRead = 0; - while (len > 0) { - if (!ensureDataInBuffer()) { - break; - } - int bytesRemainingInCurrentPage = getBytesRemainingInCurrentPage(); - int numBytesToRead = Math.min(len, bytesRemainingInCurrentPage); - System.arraycopy(currentBuffer, currentBufferOffset, outputBuffer, - offset, numBytesToRead); - numberOfBytesRead += numBytesToRead; - offset += numBytesToRead; - len -= numBytesToRead; - if (numBytesToRead == bytesRemainingInCurrentPage) { - // We've finished this page, move on to the next. - advancePagesInBuffer(1); - } else { - currentBufferOffset += numBytesToRead; - } - } - - // if outputBuffer len is > 0 and zero bytes were read, we reached - // an EOF - if (numberOfBytesRead == 0) { - return -1; - } - - filePosition += numberOfBytesRead; - return numberOfBytesRead; - } - - @Override - public int read() throws IOException { - byte[] oneByte = new byte[1]; - int result = read(oneByte); - if (result < 0) { - return result; - } - return oneByte[0]; - } - - /** - * Skips over and discards n bytes of data from this input - * stream. The skip method may, for a variety of reasons, end - * up skipping over some smaller number of bytes, possibly 0. - * This may result from any of a number of conditions; reaching end of file - * before n bytes have been skipped is only one possibility. - * The actual number of bytes skipped is returned. If {@code n} is - * negative, the {@code skip} method for class {@code InputStream} always - * returns 0, and no bytes are skipped. Subclasses may handle the negative - * value differently. - * - *

The skip method of this class creates a - * byte array and then repeatedly reads into it until n bytes - * have been read or the end of the stream has been reached. Subclasses are - * encouraged to provide a more efficient implementation of this method. - * For instance, the implementation may depend on the ability to seek. - * - * @param n the number of bytes to be skipped. - * @return the actual number of bytes skipped. - * @exception IOException if the stream does not support seek, - * or if some other I/O error occurs. - */ - @Override - public synchronized long skip(long n) throws IOException { - long skipped = skipImpl(n); - filePosition += skipped; // track the position in the stream - return skipped; - } - - private long skipImpl(long n) throws IOException { - - if (n == 0) { - return 0; - } - - // First skip within the current buffer as much as possible. - long skippedWithinBuffer = skipWithinBuffer(n); - if (skippedWithinBuffer > n) { - // TO CONSIDER: Using a contracts framework such as Google's cofoja for - // these post-conditions. - throw new AssertionError(String.format( - "Bug in skipWithinBuffer: it skipped over %d bytes when asked to " - + "skip %d bytes.", skippedWithinBuffer, n)); - } - n -= skippedWithinBuffer; - long skipped = skippedWithinBuffer; - - if (n == 0) { - return skipped; - } - - if (numberOfPagesRemaining == 0) { - throw new EOFException(FSExceptionMessages.CANNOT_SEEK_PAST_EOF); - } else if (numberOfPagesRemaining > 1) { - // skip over as many pages as we can, but we must read the last - // page as it may not be full - long pagesToSkipOver = Math.min(n / PAGE_DATA_SIZE, - numberOfPagesRemaining - 1); - numberOfPagesRemaining -= pagesToSkipOver; - currentOffsetInBlob += pagesToSkipOver * PAGE_SIZE; - skipped += pagesToSkipOver * PAGE_DATA_SIZE; - n -= pagesToSkipOver * PAGE_DATA_SIZE; - } - - if (n == 0) { - return skipped; - } - - // Now read in at the current position, and skip within current buffer. - if (!ensureDataInBuffer()) { - return skipped; - } - return skipped + skipWithinBuffer(n); - } - - /** - * Skip over n bytes within the current buffer or just over skip the whole - * buffer if n is greater than the bytes remaining in the buffer. - * @param n The number of data bytes to skip. - * @return The number of bytes actually skipped. - * @throws IOException if data corruption found in the buffer. - */ - private long skipWithinBuffer(long n) throws IOException { - if (!dataAvailableInBuffer()) { - return 0; - } - long skipped = 0; - // First skip within the current page. - skipped = skipWithinCurrentPage(n); - if (skipped > n) { - throw new AssertionError(String.format( - "Bug in skipWithinCurrentPage: it skipped over %d bytes when asked" - + " to skip %d bytes.", skipped, n)); - } - n -= skipped; - if (n == 0 || !dataAvailableInBuffer()) { - return skipped; - } - - // Calculate how many whole pages (pages before the possibly partially - // filled last page) remain. - int currentPageIndex = currentBufferOffset / PAGE_SIZE; - int numberOfPagesInBuffer = currentBuffer.length / PAGE_SIZE; - int wholePagesRemaining = numberOfPagesInBuffer - currentPageIndex - 1; - - if (n < (PAGE_DATA_SIZE * wholePagesRemaining)) { - // I'm within one of the whole pages remaining, skip in there. - advancePagesInBuffer((int) (n / PAGE_DATA_SIZE)); - currentBufferOffset += n % PAGE_DATA_SIZE; - return n + skipped; - } - - // Skip over the whole pages. - advancePagesInBuffer(wholePagesRemaining); - skipped += wholePagesRemaining * PAGE_DATA_SIZE; - n -= wholePagesRemaining * PAGE_DATA_SIZE; - - // At this point we know we need to skip to somewhere in the last page, - // or just go to the end. - return skipWithinCurrentPage(n) + skipped; - } - - /** - * Skip over n bytes within the current page or just over skip the whole - * page if n is greater than the bytes remaining in the page. - * @param n The number of data bytes to skip. - * @return The number of bytes actually skipped. - * @throws IOException if data corruption found in the buffer. - */ - private long skipWithinCurrentPage(long n) throws IOException { - int remainingBytesInCurrentPage = getBytesRemainingInCurrentPage(); - if (n <= remainingBytesInCurrentPage) { - currentBufferOffset += n; - return n; - } else { - advancePagesInBuffer(1); - return remainingBytesInCurrentPage; - } - } - - /** - * Gets the number of bytes remaining within the current page in the buffer. - * @return The number of bytes remaining. - * @throws IOException if data corruption found in the buffer. - */ - private int getBytesRemainingInCurrentPage() throws IOException { - if (!dataAvailableInBuffer()) { - return 0; - } - // Calculate our current position relative to the start of the current - // page. - int currentDataOffsetInPage = - (currentBufferOffset % PAGE_SIZE) - PAGE_HEADER_SIZE; - int pageBoundary = getCurrentPageStartInBuffer(); - // Get the data size of the current page from the header. - short sizeOfCurrentPage = getPageSize(blob, currentBuffer, pageBoundary); - return sizeOfCurrentPage - currentDataOffsetInPage; - } - - private static IOException badStartRangeException(CloudPageBlobWrapper blob, - PageRange startRange) { - return fileCorruptException(blob, String.format( - "Page blobs for ASV should always use a page range starting at byte 0. " - + "This starts at byte %d.", - startRange.getStartOffset())); - } - - private void advancePagesInBuffer(int numberOfPages) { - currentBufferOffset = - getCurrentPageStartInBuffer() - + (numberOfPages * PAGE_SIZE) - + PAGE_HEADER_SIZE; - } - - private int getCurrentPageStartInBuffer() { - return PAGE_SIZE * (currentBufferOffset / PAGE_SIZE); - } - - private static IOException fileCorruptException(CloudPageBlobWrapper blob, - String reason) { - return new IOException(String.format( - "The page blob: '%s' is corrupt or has an unexpected format: %s.", - blob.getUri(), reason)); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java deleted file mode 100644 index f77a6b805140d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/PageBlobOutputStream.java +++ /dev/null @@ -1,607 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.PAGE_DATA_SIZE; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.PAGE_HEADER_SIZE; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.PAGE_SIZE; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.fromShort; -import static org.apache.hadoop.fs.azure.PageBlobFormatHelpers.withMD5Checking; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.Locale; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.fs.StreamCapabilities; -import org.apache.hadoop.fs.Syncable; -import org.apache.hadoop.fs.azure.StorageInterface.CloudPageBlobWrapper; -import org.apache.commons.lang3.exception.ExceptionUtils; -import org.apache.hadoop.conf.Configuration; - -import org.apache.hadoop.classification.VisibleForTesting; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.CloudPageBlob; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * An output stream that write file data to a page blob stored using ASV's - * custom format. - */ -final class PageBlobOutputStream extends OutputStream implements Syncable, StreamCapabilities { - /** - * The maximum number of raw bytes Azure Storage allows us to upload in a - * single request (4 MB). - */ - private static final int MAX_RAW_BYTES_PER_REQUEST = 4 * 1024 * 1024; - /** - * The maximum number of pages Azure Storage allows us to upload in a - * single request. - */ - private static final int MAX_PAGES_IN_REQUEST = - MAX_RAW_BYTES_PER_REQUEST / PAGE_SIZE; - /** - * The maximum number of data bytes (header not included) we can upload - * in a single request. I'm limiting it to (N - 1) pages to account for - * the possibility that we may have to rewrite the previous request's - * last page. - */ - private static final int MAX_DATA_BYTES_PER_REQUEST = - PAGE_DATA_SIZE * (MAX_PAGES_IN_REQUEST - 1); - - private final CloudPageBlobWrapper blob; - private final OperationContext opContext; - - /** - * If the IO thread encounters an error, it'll store it here. - */ - private volatile IOException lastError; - - /** - * Current size of the page blob in bytes. It may be extended if the file - * gets full. - */ - private long currentBlobSize; - /** - * The current byte offset we're at in the blob (how many bytes we've - * uploaded to the server). - */ - private long currentBlobOffset; - /** - * The data in the last page that we wrote to the server, in case we have to - * overwrite it in the new request. - */ - private byte[] previousLastPageDataWritten = new byte[0]; - /** - * The current buffer we're writing to before sending to the server. - */ - private ByteArrayOutputStream outBuffer; - /** - * The task queue for writing to the server. - */ - private final LinkedBlockingQueue ioQueue; - /** - * The thread pool we're using for writing to the server. Note that the IO - * write is NOT designed for parallelism, so there can only be one thread - * in that pool (I'm using the thread pool mainly for the lifetime management - * capabilities, otherwise I'd have just used a simple Thread). - */ - private final ThreadPoolExecutor ioThreadPool; - - // The last task given to the ioThreadPool to execute, to allow - // waiting until it's done. - private WriteRequest lastQueuedTask; - // Whether the stream has been closed. - private boolean closed = false; - - public static final Logger LOG = LoggerFactory.getLogger(AzureNativeFileSystemStore.class); - - // Set the minimum page blob file size to 128MB, which is >> the default - // block size of 32MB. This default block size is often used as the - // hbase.regionserver.hlog.blocksize. - // The goal is to have a safe minimum size for HBase log files to allow them - // to be filled and rolled without exceeding the minimum size. A larger size - // can be used by setting the fs.azure.page.blob.size configuration variable. - public static final long PAGE_BLOB_MIN_SIZE = 128L * 1024L * 1024L; - - // The default and minimum amount to extend a page blob by if it starts - // to get full. - public static final long - PAGE_BLOB_DEFAULT_EXTENSION_SIZE = 128L * 1024L * 1024L; - - // The configured page blob extension size (either the default, or if greater, - // the value configured in fs.azure.page.blob.extension.size - private long configuredPageBlobExtensionSize; - - /** - * Constructs an output stream over the given page blob. - * - * @param blob the blob that this stream is associated with. - * @param opContext an object used to track the execution of the operation - * @throws StorageException if anything goes wrong creating the blob. - */ - public PageBlobOutputStream(final CloudPageBlobWrapper blob, - final OperationContext opContext, - final Configuration conf) throws StorageException { - this.blob = blob; - this.outBuffer = new ByteArrayOutputStream(); - this.opContext = opContext; - this.lastQueuedTask = null; - this.ioQueue = new LinkedBlockingQueue(); - - // As explained above: the IO writes are not designed for parallelism, - // so we only have one thread in this thread pool. - this.ioThreadPool = new ThreadPoolExecutor(1, 1, 2, TimeUnit.SECONDS, - ioQueue); - - - - // Make page blob files have a size that is the greater of a - // minimum size, or the value of fs.azure.page.blob.size from configuration. - long pageBlobConfigSize = conf.getLong("fs.azure.page.blob.size", 0); - LOG.debug("Read value of fs.azure.page.blob.size as " + pageBlobConfigSize - + " from configuration (0 if not present)."); - long pageBlobSize = Math.max(PAGE_BLOB_MIN_SIZE, pageBlobConfigSize); - - // Ensure that the pageBlobSize is a multiple of page size. - if (pageBlobSize % PAGE_SIZE != 0) { - pageBlobSize += PAGE_SIZE - pageBlobSize % PAGE_SIZE; - } - blob.create(pageBlobSize, new BlobRequestOptions(), opContext); - currentBlobSize = pageBlobSize; - - // Set the page blob extension size. It must be a minimum of the default - // value. - configuredPageBlobExtensionSize = - conf.getLong("fs.azure.page.blob.extension.size", 0); - if (configuredPageBlobExtensionSize < PAGE_BLOB_DEFAULT_EXTENSION_SIZE) { - configuredPageBlobExtensionSize = PAGE_BLOB_DEFAULT_EXTENSION_SIZE; - } - - // make sure it is a multiple of the page size - if (configuredPageBlobExtensionSize % PAGE_SIZE != 0) { - configuredPageBlobExtensionSize += - PAGE_SIZE - configuredPageBlobExtensionSize % PAGE_SIZE; - } - } - - private void checkStreamState() throws IOException { - if (lastError != null) { - throw lastError; - } - } - - /** - * Query the stream for a specific capability. - * - * @param capability string to query the stream support for. - * @return true for hsync and hflush. - */ - @Override - public boolean hasCapability(String capability) { - switch (capability.toLowerCase(Locale.ENGLISH)) { - case StreamCapabilities.HSYNC: - case StreamCapabilities.HFLUSH: - return true; - default: - return false; - } - } - - /** - * Closes this output stream and releases any system resources associated with - * this stream. If any data remains in the buffer it is committed to the - * service. - */ - @Override - public synchronized void close() throws IOException { - if (closed) { - return; - } - - LOG.debug("Closing page blob output stream."); - flush(); - checkStreamState(); - ioThreadPool.shutdown(); - try { - LOG.debug(ioThreadPool.toString()); - if (!ioThreadPool.awaitTermination(10, TimeUnit.MINUTES)) { - LOG.debug("Timed out after 10 minutes waiting for IO requests to finish"); - NativeAzureFileSystemHelper.logAllLiveStackTraces(); - LOG.debug(ioThreadPool.toString()); - throw new IOException("Timed out waiting for IO requests to finish"); - } - } catch (InterruptedException e) { - LOG.debug("Caught InterruptedException"); - - // Restore the interrupted status - Thread.currentThread().interrupt(); - } - - closed = true; - } - - - - /** - * A single write request for data to write to Azure storage. - */ - private class WriteRequest implements Runnable { - private final byte[] dataPayload; - private final CountDownLatch doneSignal = new CountDownLatch(1); - - public WriteRequest(byte[] dataPayload) { - this.dataPayload = dataPayload; - } - - public void waitTillDone() throws InterruptedException { - doneSignal.await(); - } - - @Override - public void run() { - try { - LOG.debug("before runInternal()"); - runInternal(); - LOG.debug("after runInternal()"); - } finally { - doneSignal.countDown(); - } - } - - private void runInternal() { - if (lastError != null) { - // We're already in an error state, no point doing anything. - return; - } - if (dataPayload.length == 0) { - // Nothing to do. - return; - } - - // Since we have to rewrite the last request's last page's data - // (may be empty), total data size is our data plus whatever was - // left from there. - final int totalDataBytes = dataPayload.length - + previousLastPageDataWritten.length; - // Calculate the total number of pages we're writing to the server. - final int numberOfPages = (totalDataBytes / PAGE_DATA_SIZE) - + (totalDataBytes % PAGE_DATA_SIZE == 0 ? 0 : 1); - // Fill up the raw bytes we're writing. - byte[] rawPayload = new byte[numberOfPages * PAGE_SIZE]; - // Keep track of the size of the last page we uploaded. - int currentLastPageDataSize = -1; - for (int page = 0; page < numberOfPages; page++) { - // Our current byte offset in the data. - int dataOffset = page * PAGE_DATA_SIZE; - // Our current byte offset in the raw buffer. - int rawOffset = page * PAGE_SIZE; - // The size of the data in the current page. - final short currentPageDataSize = (short) Math.min(PAGE_DATA_SIZE, - totalDataBytes - dataOffset); - // Save off this page's size as the potential last page's size. - currentLastPageDataSize = currentPageDataSize; - - // Write out the page size in the header. - final byte[] header = fromShort(currentPageDataSize); - System.arraycopy(header, 0, rawPayload, rawOffset, header.length); - rawOffset += header.length; - - int bytesToCopyFromDataPayload = currentPageDataSize; - if (dataOffset < previousLastPageDataWritten.length) { - // First write out the last page's data. - final int bytesToCopyFromLastPage = Math.min(currentPageDataSize, - previousLastPageDataWritten.length - dataOffset); - System.arraycopy(previousLastPageDataWritten, dataOffset, - rawPayload, rawOffset, bytesToCopyFromLastPage); - bytesToCopyFromDataPayload -= bytesToCopyFromLastPage; - rawOffset += bytesToCopyFromLastPage; - dataOffset += bytesToCopyFromLastPage; - } - - if (dataOffset >= previousLastPageDataWritten.length) { - // Then write the current payload's data. - System.arraycopy(dataPayload, - dataOffset - previousLastPageDataWritten.length, - rawPayload, rawOffset, bytesToCopyFromDataPayload); - } - } - - // Raw payload constructed, ship it off to the server. - writePayloadToServer(rawPayload); - - // Post-send bookkeeping. - currentBlobOffset += rawPayload.length; - if (currentLastPageDataSize < PAGE_DATA_SIZE) { - // Partial page, save it off so it's overwritten in the next request. - final int startOffset = (numberOfPages - 1) * PAGE_SIZE + PAGE_HEADER_SIZE; - previousLastPageDataWritten = Arrays.copyOfRange(rawPayload, - startOffset, - startOffset + currentLastPageDataSize); - // Since we're rewriting this page, set our current offset in the server - // to that page's beginning. - currentBlobOffset -= PAGE_SIZE; - } else { - // It wasn't a partial page, we won't need to rewrite it. - previousLastPageDataWritten = new byte[0]; - } - - // Extend the file if we need more room in the file. This typically takes - // less than 200 milliseconds if it has to actually be done, - // so it is okay to include it in a write and won't cause a long pause. - // Other writes can be queued behind this write in any case. - conditionalExtendFile(); - } - - /** - * Writes the given raw payload to Azure Storage at the current blob - * offset. - */ - private void writePayloadToServer(byte[] rawPayload) { - final ByteArrayInputStream wrapperStream = - new ByteArrayInputStream(rawPayload); - LOG.debug("writing payload of " + rawPayload.length + " bytes to Azure page blob"); - try { - long start = System.currentTimeMillis(); - blob.uploadPages(wrapperStream, currentBlobOffset, rawPayload.length, - withMD5Checking(), PageBlobOutputStream.this.opContext); - long end = System.currentTimeMillis(); - LOG.trace("Azure uploadPages time for " + rawPayload.length + " bytes = " + (end - start)); - } catch (IOException ex) { - LOG.debug(ExceptionUtils.getStackTrace(ex)); - lastError = ex; - } catch (StorageException ex) { - LOG.debug(ExceptionUtils.getStackTrace(ex)); - lastError = new IOException(ex); - } - if (lastError != null) { - LOG.debug("Caught error in PageBlobOutputStream#writePayloadToServer()"); - } - } - } - - private synchronized void flushIOBuffers() { - if (outBuffer.size() == 0) { - return; - } - lastQueuedTask = new WriteRequest(outBuffer.toByteArray()); - ioThreadPool.execute(lastQueuedTask); - outBuffer = new ByteArrayOutputStream(); - } - - @VisibleForTesting - synchronized void waitForLastFlushCompletion() throws IOException { - try { - if (lastQueuedTask != null) { - lastQueuedTask.waitTillDone(); - } - } catch (InterruptedException e1) { - // Restore the interrupted status - Thread.currentThread().interrupt(); - } - } - - /** - * Extend the page blob file if we are close to the end. - */ - private void conditionalExtendFile() { - - // maximum allowed size of an Azure page blob (1 terabyte) - final long MAX_PAGE_BLOB_SIZE = 1024L * 1024L * 1024L * 1024L; - - // If blob is already at the maximum size, then don't try to extend it. - if (currentBlobSize == MAX_PAGE_BLOB_SIZE) { - return; - } - - // If we are within the maximum write size of the end of the file, - if (currentBlobSize - currentBlobOffset <= MAX_RAW_BYTES_PER_REQUEST) { - - // Extend the file. Retry up to 3 times with back-off. - CloudPageBlob cloudPageBlob = (CloudPageBlob) blob.getBlob(); - long newSize = currentBlobSize + configuredPageBlobExtensionSize; - - // Make sure we don't exceed maximum blob size. - if (newSize > MAX_PAGE_BLOB_SIZE) { - newSize = MAX_PAGE_BLOB_SIZE; - } - final int MAX_RETRIES = 3; - int retries = 1; - boolean resizeDone = false; - while(!resizeDone && retries <= MAX_RETRIES) { - try { - cloudPageBlob.resize(newSize); - resizeDone = true; - currentBlobSize = newSize; - } catch (StorageException e) { - LOG.warn("Failed to extend size of " + cloudPageBlob.getUri()); - try { - - // sleep 2, 8, 18 seconds for up to 3 retries - Thread.sleep(2000 * retries * retries); - } catch (InterruptedException e1) { - - // Restore the interrupted status - Thread.currentThread().interrupt(); - } - } finally { - retries++; - } - } - } - } - - /** - * Flushes this output stream and forces any buffered output bytes to be - * written out. If any data remains in the buffer it is committed to the - * service. Data is queued for writing but not forced out to the service - * before the call returns. - */ - @Override - public void flush() throws IOException { - checkStreamState(); - flushIOBuffers(); - } - - /** - * Writes b.length bytes from the specified byte array to this output stream. - * - * @param data - * the byte array to write. - * - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream has been closed. - */ - @Override - public void write(final byte[] data) throws IOException { - write(data, 0, data.length); - } - - /** - * Writes length bytes from the specified byte array starting at offset to - * this output stream. - * - * @param data - * the byte array to write. - * @param offset - * the start offset in the data. - * @param length - * the number of bytes to write. - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream has been closed. - */ - @Override - public void write(final byte[] data, final int offset, final int length) - throws IOException { - if (offset < 0 || length < 0 || length > data.length - offset) { - throw new IndexOutOfBoundsException(); - } - - writeInternal(data, offset, length); - } - - /** - * Writes the specified byte to this output stream. The general contract for - * write is that one byte is written to the output stream. The byte to be - * written is the eight low-order bits of the argument b. The 24 high-order - * bits of b are ignored. - * - * @param byteVal - * the byteValue to write. - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream has been closed. - */ - @Override - public void write(final int byteVal) throws IOException { - write(new byte[] { (byte) (byteVal & 0xFF) }); - } - - /** - * Writes the data to the buffer and triggers writes to the service as needed. - * - * @param data - * the byte array to write. - * @param offset - * the start offset in the data. - * @param length - * the number of bytes to write. - * @throws IOException - * if an I/O error occurs. In particular, an IOException may be - * thrown if the output stream has been closed. - */ - private synchronized void writeInternal(final byte[] data, int offset, - int length) throws IOException { - while (length > 0) { - checkStreamState(); - final int availableBufferBytes = MAX_DATA_BYTES_PER_REQUEST - - this.outBuffer.size(); - final int nextWrite = Math.min(availableBufferBytes, length); - - outBuffer.write(data, offset, nextWrite); - offset += nextWrite; - length -= nextWrite; - - if (outBuffer.size() > MAX_DATA_BYTES_PER_REQUEST) { - throw new RuntimeException("Internal error: maximum write size " + - Integer.toString(MAX_DATA_BYTES_PER_REQUEST) + "exceeded."); - } - - if (outBuffer.size() == MAX_DATA_BYTES_PER_REQUEST) { - flushIOBuffers(); - } - } - } - - /** - * Force all data in the output stream to be written to Azure storage. - * Wait to return until this is complete. - */ - @Override - public synchronized void hsync() throws IOException { - LOG.debug("Entering PageBlobOutputStream#hsync()."); - long start = System.currentTimeMillis(); - flush(); - LOG.debug(ioThreadPool.toString()); - try { - if (lastQueuedTask != null) { - lastQueuedTask.waitTillDone(); - } - } catch (InterruptedException e1) { - - // Restore the interrupted status - Thread.currentThread().interrupt(); - } - checkStreamState(); - LOG.debug("Leaving PageBlobOutputStream#hsync(). Total hsync duration = " - + (System.currentTimeMillis() - start) + " msec."); - } - - @Override - public void hflush() throws IOException { - - // hflush is required to force data to storage, so call hsync, - // which does that. - hsync(); - } - - @Deprecated - public void sync() throws IOException { - - // Sync has been deprecated in favor of hflush. - hflush(); - } - - // For unit testing purposes: kill the IO threads. - @VisibleForTesting - void killIoThreads() { - ioThreadPool.shutdownNow(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java deleted file mode 100644 index 473fa54f97c83..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteSASKeyGeneratorImpl.java +++ /dev/null @@ -1,337 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.List; -import java.util.concurrent.TimeUnit; - -import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.security.Constants; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.hadoop.security.UserGroupInformation; - -import org.apache.http.NameValuePair; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; - -import static org.apache.hadoop.fs.azure.WasbRemoteCallHelper.REMOTE_CALL_SUCCESS_CODE; - -/** - * Class implementing a RemoteSASKeyGenerator. This class - * uses the url passed in via the Configuration to make a - * rest call to generate the required SAS Key. - */ -public class RemoteSASKeyGeneratorImpl extends SASKeyGeneratorImpl { - - public static final Logger LOG = - LoggerFactory.getLogger(AzureNativeFileSystemStore.class); - private static final ObjectReader RESPONSE_READER = new ObjectMapper() - .readerFor(RemoteSASKeyGenerationResponse.class); - - /** - * Configuration parameter name expected in the Configuration - * object to provide the url of the remote service {@value} - */ - public static final String KEY_CRED_SERVICE_URLS = - "fs.azure.cred.service.urls"; - /** - * Configuration key to enable http retry policy for SAS Key generation. {@value} - */ - public static final String - SAS_KEY_GENERATOR_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY = - "fs.azure.saskeygenerator.http.retry.policy.enabled"; - /** - * Configuration key for SAS Key Generation http retry policy spec. {@value} - */ - public static final String - SAS_KEY_GENERATOR_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY = - "fs.azure.saskeygenerator.http.retry.policy.spec"; - /** - * Container SAS Key generation OP name. {@value} - */ - private static final String CONTAINER_SAS_OP = "GET_CONTAINER_SAS"; - /** - * Relative Blob SAS Key generation OP name. {@value} - */ - private static final String BLOB_SAS_OP = "GET_RELATIVE_BLOB_SAS"; - /** - * Query parameter specifying the expiry period to be used for sas key - * {@value} - */ - private static final String SAS_EXPIRY_QUERY_PARAM_NAME = "sas_expiry"; - /** - * Query parameter name for the storage account. {@value} - */ - private static final String STORAGE_ACCOUNT_QUERY_PARAM_NAME = - "storage_account"; - /** - * Query parameter name for the storage account container. {@value} - */ - private static final String CONTAINER_QUERY_PARAM_NAME = "container"; - /** - * Query parameter name for the relative path inside the storage - * account container. {@value} - */ - private static final String RELATIVE_PATH_QUERY_PARAM_NAME = "relative_path"; - /** - * SAS Key Generation Remote http client retry policy spec. {@value} - */ - private static final String - SAS_KEY_GENERATOR_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT = - "10,3,100,2"; - /** - * Saskey caching period - */ - private static final String SASKEY_CACHEENTRY_EXPIRY_PERIOD = - "fs.azure.saskey.cacheentry.expiry.period"; - - private WasbRemoteCallHelper remoteCallHelper = null; - private boolean isKerberosSupportEnabled; - private boolean isSpnegoTokenCacheEnabled; - private RetryPolicy retryPolicy; - private String[] commaSeparatedUrls; - private CachingAuthorizer cache; - - private static final int HOURS_IN_DAY = 24; - private static final int MINUTES_IN_HOUR = 60; - - public RemoteSASKeyGeneratorImpl(Configuration conf) { - super(conf); - } - - public void initialize(Configuration conf) throws IOException { - - LOG.debug("Initializing RemoteSASKeyGeneratorImpl instance"); - - this.retryPolicy = RetryUtils.getMultipleLinearRandomRetry(conf, - SAS_KEY_GENERATOR_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY, true, - SAS_KEY_GENERATOR_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY, - SAS_KEY_GENERATOR_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT); - - this.isKerberosSupportEnabled = - conf.getBoolean(Constants.AZURE_KERBEROS_SUPPORT_PROPERTY_NAME, false); - this.isSpnegoTokenCacheEnabled = - conf.getBoolean(Constants.AZURE_ENABLE_SPNEGO_TOKEN_CACHE, true); - this.commaSeparatedUrls = conf.getTrimmedStrings(KEY_CRED_SERVICE_URLS); - if (this.commaSeparatedUrls == null || this.commaSeparatedUrls.length <= 0) { - throw new IOException( - KEY_CRED_SERVICE_URLS + " config not set" + " in configuration."); - } - if (isKerberosSupportEnabled && UserGroupInformation.isSecurityEnabled()) { - this.remoteCallHelper = new SecureWasbRemoteCallHelper(retryPolicy, false, - isSpnegoTokenCacheEnabled); - } else { - this.remoteCallHelper = new WasbRemoteCallHelper(retryPolicy); - } - - /* Expire the cache entry five minutes before the actual saskey expiry, so that we never encounter a case - * where a stale sas-key-entry is picked up from the cache; which is expired on use. - */ - long sasKeyExpiryPeriodInMinutes = getSasKeyExpiryPeriod() * HOURS_IN_DAY * MINUTES_IN_HOUR; // sas-expiry is in days, convert into mins - long cacheEntryDurationInMinutes = - conf.getTimeDuration(SASKEY_CACHEENTRY_EXPIRY_PERIOD, sasKeyExpiryPeriodInMinutes, TimeUnit.MINUTES); - cacheEntryDurationInMinutes = (cacheEntryDurationInMinutes > (sasKeyExpiryPeriodInMinutes - 5)) - ? (sasKeyExpiryPeriodInMinutes - 5) - : cacheEntryDurationInMinutes; - this.cache = new CachingAuthorizer<>(cacheEntryDurationInMinutes, "SASKEY"); - this.cache.init(conf); - LOG.debug("Initialization of RemoteSASKeyGenerator instance successful"); - } - - @Override - public URI getContainerSASUri(String storageAccount, - String container) throws SASKeyGenerationException { - RemoteSASKeyGenerationResponse sasKeyResponse = null; - try { - CachedSASKeyEntry cacheKey = new CachedSASKeyEntry(storageAccount, container, "/"); - URI cacheResult = cache.get(cacheKey); - if (cacheResult != null) { - return cacheResult; - } - - LOG.debug("Generating Container SAS Key: Storage Account {}, Container {}", storageAccount, container); - URIBuilder uriBuilder = new URIBuilder(); - uriBuilder.setPath("/" + CONTAINER_SAS_OP); - uriBuilder.addParameter(STORAGE_ACCOUNT_QUERY_PARAM_NAME, storageAccount); - uriBuilder.addParameter(CONTAINER_QUERY_PARAM_NAME, container); - uriBuilder.addParameter(SAS_EXPIRY_QUERY_PARAM_NAME, - "" + getSasKeyExpiryPeriod()); - - sasKeyResponse = makeRemoteRequest(commaSeparatedUrls, uriBuilder.getPath(), - uriBuilder.getQueryParams()); - - if (sasKeyResponse.getResponseCode() == REMOTE_CALL_SUCCESS_CODE) { - URI sasKey = new URI(sasKeyResponse.getSasKey()); - cache.put(cacheKey, sasKey); - return sasKey; - } else { - throw new SASKeyGenerationException( - "Remote Service encountered error in SAS Key generation : " - + sasKeyResponse.getResponseMessage()); - } - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException" - + " while building the HttpGetRequest to remote service for ", - uriSyntaxEx); - } - } - - @Override - public URI getRelativeBlobSASUri(String storageAccount, - String container, String relativePath) throws SASKeyGenerationException { - - try { - CachedSASKeyEntry cacheKey = new CachedSASKeyEntry(storageAccount, container, relativePath); - URI cacheResult = cache.get(cacheKey); - if (cacheResult != null) { - return cacheResult; - } - - LOG.debug("Generating RelativePath SAS Key for relativePath {} inside Container {} inside Storage Account {}", - relativePath, container, storageAccount); - - URIBuilder uriBuilder = new URIBuilder(); - uriBuilder.setPath("/" + BLOB_SAS_OP); - uriBuilder.addParameter(STORAGE_ACCOUNT_QUERY_PARAM_NAME, storageAccount); - uriBuilder.addParameter(CONTAINER_QUERY_PARAM_NAME, container); - uriBuilder.addParameter(RELATIVE_PATH_QUERY_PARAM_NAME, relativePath); - uriBuilder.addParameter(SAS_EXPIRY_QUERY_PARAM_NAME, - "" + getSasKeyExpiryPeriod()); - - RemoteSASKeyGenerationResponse sasKeyResponse = - makeRemoteRequest(commaSeparatedUrls, uriBuilder.getPath(), - uriBuilder.getQueryParams()); - if (sasKeyResponse.getResponseCode() == REMOTE_CALL_SUCCESS_CODE) { - URI sasKey = new URI(sasKeyResponse.getSasKey()); - cache.put(cacheKey, sasKey); - return sasKey; - } else { - throw new SASKeyGenerationException( - "Remote Service encountered error in SAS Key generation : " - + sasKeyResponse.getResponseMessage()); - } - } catch (URISyntaxException uriSyntaxEx) { - throw new SASKeyGenerationException("Encountered URISyntaxException" - + " while building the HttpGetRequest to " + " remote service", - uriSyntaxEx); - } - } - - /** - * Helper method to make a remote request. - * - * @param urls - Urls to use for the remote request - * @param path - hadoop.auth token for the remote request - * @param queryParams - queryParams to be used. - * @return RemoteSASKeyGenerationResponse - */ - private RemoteSASKeyGenerationResponse makeRemoteRequest(String[] urls, - String path, List queryParams) - throws SASKeyGenerationException { - - try { - String responseBody = remoteCallHelper - .makeRemoteRequest(urls, path, queryParams, HttpGet.METHOD_NAME); - return RESPONSE_READER.readValue(responseBody); - - } catch (WasbRemoteCallException remoteCallEx) { - throw new SASKeyGenerationException("Encountered RemoteCallException" - + " while retrieving SAS key from remote service", remoteCallEx); - } catch (JsonParseException jsonParserEx) { - throw new SASKeyGenerationException("Encountered JsonParseException " - + "while parsing the response from remote" - + " service into RemoteSASKeyGenerationResponse object", - jsonParserEx); - } catch (JsonMappingException jsonMappingEx) { - throw new SASKeyGenerationException("Encountered JsonMappingException" - + " while mapping the response from remote service into " - + "RemoteSASKeyGenerationResponse object", jsonMappingEx); - } catch (IOException ioEx) { - throw new SASKeyGenerationException("Encountered IOException while " - + "accessing remote service to retrieve SAS Key", ioEx); - } - } -} - -/** - * POJO representing the response expected from a Remote - * SAS Key generation service. - * The remote SAS Key generation service is expected to - * return SAS key in json format: - * { - * "responseCode" : 0 or non-zero , - * "responseMessage" : relavant message on failure , - * "sasKey" : Requested SAS Key - * } - */ -class RemoteSASKeyGenerationResponse { - - /** - * Response code for the call. - */ - private int responseCode; - - /** - * An intelligent message corresponding to - * result. Specifically in case of failure - * the reason for failure. - */ - private String responseMessage; - - /** - * SAS Key corresponding to the request. - */ - private String sasKey; - - public int getResponseCode() { - return responseCode; - } - - public void setResponseCode(int responseCode) { - this.responseCode = responseCode; - } - - public String getResponseMessage() { - return responseMessage; - } - - public void setResponseMessage(String responseMessage) { - this.responseMessage = responseMessage; - } - - public String getSasKey() { - return sasKey; - } - - public void setSasKey(String sasKey) { - this.sasKey = sasKey; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java deleted file mode 100644 index eca8443b6c587..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/RemoteWasbAuthorizerImpl.java +++ /dev/null @@ -1,250 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import com.fasterxml.jackson.core.JsonParseException; -import com.fasterxml.jackson.databind.JsonMappingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.ObjectReader; -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.security.Constants; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.utils.URIBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.util.concurrent.TimeUnit; - -import java.io.IOException; - -import static org.apache.hadoop.fs.azure.WasbRemoteCallHelper.REMOTE_CALL_SUCCESS_CODE; - -/** - * Class implementing WasbAuthorizerInterface using a remote - * service that implements the authorization operation. This - * class expects the url of the remote service to be passed - * via config. - */ -public class RemoteWasbAuthorizerImpl implements WasbAuthorizerInterface { - - public static final Logger LOG = LoggerFactory - .getLogger(RemoteWasbAuthorizerImpl.class); - private static final ObjectReader RESPONSE_READER = new ObjectMapper() - .readerFor(RemoteWasbAuthorizerResponse.class); - - /** - * Configuration parameter name expected in the Configuration object to - * provide the urls of the remote service instances. {@value} - */ - public static final String KEY_REMOTE_AUTH_SERVICE_URLS = - "fs.azure.authorization.remote.service.urls"; - /** - * Authorization operation OP name in the remote service {@value} - */ - private static final String CHECK_AUTHORIZATION_OP = "CHECK_AUTHORIZATION"; - /** - * Query parameter specifying the access operation type. {@value} - */ - private static final String ACCESS_OPERATION_QUERY_PARAM_NAME = - "operation_type"; - /** - * Query parameter specifying the wasb absolute path. {@value} - */ - private static final String WASB_ABSOLUTE_PATH_QUERY_PARAM_NAME = - "wasb_absolute_path"; - /** - * Query parameter name for sending owner of the specific resource {@value} - */ - private static final String WASB_RESOURCE_OWNER_QUERY_PARAM_NAME = - "wasb_resource_owner"; - - /** - * Authorization Remote http client retry policy enabled configuration key. {@value} - */ - private static final String AUTHORIZER_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY = - "fs.azure.authorizer.http.retry.policy.enabled"; - - /** - * Authorization Remote http client retry policy spec. {@value} - */ - private static final String AUTHORIZER_HTTP_CLIENT_RETRY_POLICY_SPEC_SPEC = - "fs.azure.authorizer.http.retry.policy.spec"; - - /** - * Authorization Remote http client retry policy spec default value. {@value} - */ - private static final String AUTHORIZER_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT = - "10,3,100,2"; - - /** - * Authorization caching period - */ - private static final String AUTHORIZATION_CACHEENTRY_EXPIRY_PERIOD = - "fs.azure.authorization.cacheentry.expiry.period"; - - private WasbRemoteCallHelper remoteCallHelper = null; - private boolean isKerberosSupportEnabled; - private boolean isSpnegoTokenCacheEnabled; - private RetryPolicy retryPolicy; - private String[] commaSeparatedUrls = null; - private CachingAuthorizer cache; - - @VisibleForTesting public void updateWasbRemoteCallHelper( - WasbRemoteCallHelper helper) { - this.remoteCallHelper = helper; - } - - @Override - public void init(Configuration conf) - throws IOException { - LOG.debug("Initializing RemoteWasbAuthorizerImpl instance"); - this.isKerberosSupportEnabled = - conf.getBoolean(Constants.AZURE_KERBEROS_SUPPORT_PROPERTY_NAME, false); - this.isSpnegoTokenCacheEnabled = - conf.getBoolean(Constants.AZURE_ENABLE_SPNEGO_TOKEN_CACHE, true); - this.commaSeparatedUrls = - conf.getTrimmedStrings(KEY_REMOTE_AUTH_SERVICE_URLS); - if (this.commaSeparatedUrls == null - || this.commaSeparatedUrls.length <= 0) { - throw new IOException(KEY_REMOTE_AUTH_SERVICE_URLS + " config not set" - + " in configuration."); - } - this.retryPolicy = RetryUtils.getMultipleLinearRandomRetry(conf, - AUTHORIZER_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY, true, - AUTHORIZER_HTTP_CLIENT_RETRY_POLICY_SPEC_SPEC, - AUTHORIZER_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT); - if (isKerberosSupportEnabled && UserGroupInformation.isSecurityEnabled()) { - this.remoteCallHelper = new SecureWasbRemoteCallHelper(retryPolicy, false, - isSpnegoTokenCacheEnabled); - } else { - this.remoteCallHelper = new WasbRemoteCallHelper(retryPolicy); - } - - this.cache = new CachingAuthorizer<>( - conf.getTimeDuration(AUTHORIZATION_CACHEENTRY_EXPIRY_PERIOD, 5L, TimeUnit.MINUTES), "AUTHORIZATION" - ); - this.cache.init(conf); - } - - @Override - public boolean authorize(String wasbAbsolutePath, String accessType, String resourceOwner) - throws IOException { - - /* Make an exception for the internal -RenamePending files */ - if (wasbAbsolutePath.endsWith(NativeAzureFileSystem.FolderRenamePending.SUFFIX)) { - return true; - } - - CachedAuthorizerEntry cacheKey = new CachedAuthorizerEntry(wasbAbsolutePath, accessType, resourceOwner); - Boolean cacheresult = cache.get(cacheKey); - if (cacheresult != null) { - return cacheresult; - } - - boolean authorizeresult = authorizeInternal(wasbAbsolutePath, accessType, resourceOwner); - cache.put(cacheKey, authorizeresult); - - return authorizeresult; - } - - private boolean authorizeInternal(String wasbAbsolutePath, String accessType, String resourceOwner) - throws IOException { - - try { - final URIBuilder uriBuilder = new URIBuilder(); - uriBuilder.setPath("/" + CHECK_AUTHORIZATION_OP); - uriBuilder - .addParameter(WASB_ABSOLUTE_PATH_QUERY_PARAM_NAME, wasbAbsolutePath); - uriBuilder.addParameter(ACCESS_OPERATION_QUERY_PARAM_NAME, accessType); - if (resourceOwner != null && StringUtils.isNotEmpty(resourceOwner)) { - uriBuilder.addParameter(WASB_RESOURCE_OWNER_QUERY_PARAM_NAME, - resourceOwner); - } - - String responseBody = remoteCallHelper - .makeRemoteRequest(commaSeparatedUrls, uriBuilder.getPath(), - uriBuilder.getQueryParams(), HttpGet.METHOD_NAME); - - RemoteWasbAuthorizerResponse authorizerResponse = RESPONSE_READER - .readValue(responseBody); - - if (authorizerResponse == null) { - throw new WasbAuthorizationException( - "RemoteWasbAuthorizerResponse object null from remote call"); - } else if (authorizerResponse.getResponseCode() - == REMOTE_CALL_SUCCESS_CODE) { - return authorizerResponse.getAuthorizationResult(); - } else { - throw new WasbAuthorizationException( - "Remote authorization" + " service encountered an error " - + authorizerResponse.getResponseMessage()); - } - } catch (WasbRemoteCallException | JsonParseException | JsonMappingException ex) { - throw new WasbAuthorizationException(ex); - } - } -} - -/** - * POJO representing the response expected from a remote - * authorization service. - * The remote service is expected to return the authorization - * response in the following JSON format - * { - * "responseCode" : 0 or non-zero , - * "responseMessage" : relevant message of failure - * "authorizationResult" : authorization result - * true - if auhorization allowed - * false - otherwise. - * } - */ -class RemoteWasbAuthorizerResponse { - - private int responseCode; - private boolean authorizationResult; - private String responseMessage; - - public int getResponseCode() { - return responseCode; - } - - public void setResponseCode(int responseCode) { - this.responseCode = responseCode; - } - - public boolean getAuthorizationResult() { - return authorizationResult; - } - - public void setAuthorizationResult(boolean authorizationResult) { - this.authorizationResult = authorizationResult; - } - - public String getResponseMessage() { - return responseMessage; - } - - public void setResponseMessage(String message) { - this.responseMessage = message; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGenerationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGenerationException.java deleted file mode 100644 index 7cfafc3ed9406..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGenerationException.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * Exception that gets thrown during generation of SAS Key. - * - */ -public class SASKeyGenerationException extends AzureException { - - private static final long serialVersionUID = 1L; - - public SASKeyGenerationException(String message) { - super(message); - } - - public SASKeyGenerationException(String message, Throwable cause) { - super(message, cause); - } - - public SASKeyGenerationException(Throwable t) { - super(t); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGeneratorImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGeneratorImpl.java deleted file mode 100644 index 1a8e7541fc86c..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGeneratorImpl.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.conf.Configuration; - -/** - * Abstract base class for the SAS Key Generator implementation - * - */ -public abstract class SASKeyGeneratorImpl implements SASKeyGeneratorInterface { - - /** - * Configuration key to be used to specify the expiry period for SAS keys - * This value currently is specified in days. {@value} - */ - public static final String KEY_SAS_KEY_EXPIRY_PERIOD = - "fs.azure.sas.expiry.period"; - - /** - * Default value for the SAS key expiry period in days. {@value} - */ - public static final long DEFAULT_CONTAINER_SAS_KEY_PERIOD = 90; - - private long sasKeyExpiryPeriod; - - private Configuration conf; - - public SASKeyGeneratorImpl(Configuration conf) { - this.conf = conf; - this.sasKeyExpiryPeriod = conf.getTimeDuration( - KEY_SAS_KEY_EXPIRY_PERIOD, DEFAULT_CONTAINER_SAS_KEY_PERIOD, - TimeUnit.DAYS); - } - - public long getSasKeyExpiryPeriod() { - return sasKeyExpiryPeriod; - } - - public Configuration getConf() { - return conf; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGeneratorInterface.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGeneratorInterface.java deleted file mode 100644 index 3067c1096df84..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SASKeyGeneratorInterface.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.net.URI; - -/** - * Iterface used by AzureNativeFileSysteStore to retrieve SAS Keys for the - * respective azure storage entity. This interface is expected to be - * implemented in two modes: - * 1) Local Mode: In this mode SAS Keys are generated - * in same address space as the WASB. This will be primarily used for - * testing purposes. - * 2) Remote Mode: In this mode SAS Keys are generated in a sepearte process - * other than WASB and will be communicated via client. - */ -public interface SASKeyGeneratorInterface { - - /** - * Interface method to retrieve SAS Key for a container within the storage - * account. - * - * @param accountName - * - Storage account name - * @param container - * - Container name within the storage account. - * @return SAS URI for the container. - * @throws SASKeyGenerationException Exception that gets thrown during - * generation of SAS Key. - */ - URI getContainerSASUri(String accountName, String container) - throws SASKeyGenerationException; - - /** - * Interface method to retrieve SAS Key for a blob within the container of the - * storage account. - * - * @param accountName - * - Storage account name - * @param container - * - Container name within the storage account. - * @param relativePath - * - Relative path within the container - * @return SAS URI for the relative path blob. - * @throws SASKeyGenerationException Exception that gets thrown during - * generation of SAS Key. - */ - URI getRelativeBlobSASUri(String accountName, String container, - String relativePath) throws SASKeyGenerationException; -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureModeException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureModeException.java deleted file mode 100644 index 5bec77d165015..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureModeException.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * Exception that is thrown when any error is encountered - * is SAS Mode operation of WASB. - */ -public class SecureModeException extends AzureException { - - private static final long serialVersionUID = 1L; - - public SecureModeException(String message) { - super(message); - } - - public SecureModeException(String message, Throwable cause) { - super(message, cause); - } - - public SecureModeException(Throwable t) { - super(t); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureStorageInterfaceImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureStorageInterfaceImpl.java deleted file mode 100644 index f6eb75cad59c8..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureStorageInterfaceImpl.java +++ /dev/null @@ -1,606 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; - -import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RetryPolicyFactory; -import com.microsoft.azure.storage.StorageCredentials; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.StorageUri; -import com.microsoft.azure.storage.blob.BlobProperties; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.BlockListingFilter; -import com.microsoft.azure.storage.blob.CloudBlob; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlobDirectory; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import com.microsoft.azure.storage.blob.CloudPageBlob; -import com.microsoft.azure.storage.blob.CopyState; -import com.microsoft.azure.storage.blob.DeleteSnapshotsOption; -import com.microsoft.azure.storage.blob.ListBlobItem; -import com.microsoft.azure.storage.blob.BlobListingDetails; -import com.microsoft.azure.storage.blob.PageRange; -import com.microsoft.azure.storage.blob.BlockEntry; - -import org.apache.hadoop.classification.InterfaceAudience; - -/*** - * An implementation of the StorageInterface for SAS Key mode. - * - */ - -public class SecureStorageInterfaceImpl extends StorageInterface { - - public static final Logger LOG = LoggerFactory.getLogger( - SecureStorageInterfaceImpl.class); - public static final String SAS_ERROR_CODE = "SAS Error"; - private SASKeyGeneratorInterface sasKeyGenerator; - private String storageAccount; - private RetryPolicyFactory retryPolicy; - private int timeoutIntervalInMs; - private boolean useContainerSasKeyForAllAccess; - - /** - * Configuration key to specify if containerSasKey should be used for all accesses - */ - public static final String KEY_USE_CONTAINER_SASKEY_FOR_ALL_ACCESS = - "fs.azure.saskey.usecontainersaskeyforallaccess"; - - public SecureStorageInterfaceImpl(boolean useLocalSASKeyMode, - Configuration conf) throws SecureModeException { - - if (useLocalSASKeyMode) { - LOG.debug("Authenticating with SecureStorage and local SAS key"); - this.sasKeyGenerator = new LocalSASKeyGeneratorImpl(conf); - } else { - LOG.debug("Authenticating with SecureStorage and remote SAS key generation"); - RemoteSASKeyGeneratorImpl remoteSasKeyGenerator = - new RemoteSASKeyGeneratorImpl(conf); - try { - remoteSasKeyGenerator.initialize(conf); - } catch (IOException ioe) { - throw new SecureModeException("Remote SAS Key mode could" - + " not be initialized", ioe); - } - this.sasKeyGenerator = remoteSasKeyGenerator; - } - this.useContainerSasKeyForAllAccess = conf.getBoolean(KEY_USE_CONTAINER_SASKEY_FOR_ALL_ACCESS, true); - LOG.debug("Container SAS key {} be used for all access", - useContainerSasKeyForAllAccess ? "will" : "will not"); - } - - @Override - public void setTimeoutInMs(int timeoutInMs) { - timeoutIntervalInMs = timeoutInMs; - } - - @Override - public void setRetryPolicyFactory(RetryPolicyFactory retryPolicyFactory) { - retryPolicy = retryPolicyFactory; - } - - @Override - public void createBlobClient(CloudStorageAccount account) { - String errorMsg = "createBlobClient is an invalid operation in" - + " SAS Key Mode"; - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - - @Override - public void createBlobClient(URI baseUri) { - String errorMsg = "createBlobClient is an invalid operation in " - + "SAS Key Mode"; - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - - @Override - public void createBlobClient(URI baseUri, StorageCredentials credentials) { - String errorMsg = "createBlobClient is an invalid operation in SAS " - + "Key Mode"; - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - - @Override - public StorageCredentials getCredentials() { - String errorMsg = "getCredentials is an invalid operation in SAS " - + "Key Mode"; - LOG.error(errorMsg); - throw new UnsupportedOperationException(errorMsg); - } - - @Override - public CloudBlobContainerWrapper getContainerReference(String name) - throws URISyntaxException, StorageException { - - try { - CloudBlobContainer container = new CloudBlobContainer(sasKeyGenerator.getContainerSASUri( - storageAccount, name)); - if (retryPolicy != null) { - container.getServiceClient().getDefaultRequestOptions().setRetryPolicyFactory(retryPolicy); - } - if (timeoutIntervalInMs > 0) { - container.getServiceClient().getDefaultRequestOptions().setTimeoutIntervalInMs(timeoutIntervalInMs); - } - return (useContainerSasKeyForAllAccess) - ? new SASCloudBlobContainerWrapperImpl(storageAccount, container, null) - : new SASCloudBlobContainerWrapperImpl(storageAccount, container, sasKeyGenerator); - } catch (SASKeyGenerationException sasEx) { - String errorMsg = "Encountered SASKeyGeneration exception while " - + "generating SAS Key for container : " + name - + " inside Storage account : " + storageAccount; - LOG.error(errorMsg); - throw new StorageException(SAS_ERROR_CODE, errorMsg, sasEx); - } - } - - public void setStorageAccountName(String storageAccount) { - this.storageAccount = storageAccount; - } - - @InterfaceAudience.Private - static class SASCloudBlobContainerWrapperImpl - extends CloudBlobContainerWrapper { - - private final CloudBlobContainer container; - private String storageAccount; - private SASKeyGeneratorInterface sasKeyGenerator; - - public SASCloudBlobContainerWrapperImpl(String storageAccount, - CloudBlobContainer container, SASKeyGeneratorInterface sasKeyGenerator) { - this.storageAccount = storageAccount; - this.container = container; - this.sasKeyGenerator = sasKeyGenerator; - } - - @Override - public String getName() { - return container.getName(); - } - - @Override - public boolean exists(OperationContext opContext) throws StorageException { - return container.exists(AccessCondition.generateEmptyCondition(), null, - opContext); - } - - @Override - public void create(OperationContext opContext) throws StorageException { - container.create(null, opContext); - } - - @Override - public HashMap getMetadata() { - return container.getMetadata(); - } - - @Override - public void setMetadata(HashMap metadata) { - container.setMetadata(metadata); - } - - @Override - public void downloadAttributes(OperationContext opContext) - throws StorageException { - container.downloadAttributes(AccessCondition.generateEmptyCondition(), - null, opContext); - } - - @Override - public void uploadMetadata(OperationContext opContext) - throws StorageException { - container.uploadMetadata(AccessCondition.generateEmptyCondition(), null, - opContext); - } - - @Override - public CloudBlobDirectoryWrapper getDirectoryReference(String relativePath) - throws URISyntaxException, StorageException { - - CloudBlobDirectory dir = container.getDirectoryReference(relativePath); - return new SASCloudBlobDirectoryWrapperImpl(dir); - } - - @Override - public CloudBlobWrapper getBlockBlobReference(String relativePath) - throws URISyntaxException, StorageException { - try { - CloudBlockBlob blob = (sasKeyGenerator!=null) - ? new CloudBlockBlob(sasKeyGenerator.getRelativeBlobSASUri(storageAccount, getName(), relativePath)) - : container.getBlockBlobReference(relativePath); - blob.getServiceClient().setDefaultRequestOptions( - container.getServiceClient().getDefaultRequestOptions()); - return new SASCloudBlockBlobWrapperImpl(blob); - } catch (SASKeyGenerationException sasEx) { - String errorMsg = "Encountered SASKeyGeneration exception while " - + "generating SAS Key for relativePath : " + relativePath - + " inside container : " + getName() + " Storage account : " + storageAccount; - LOG.error(errorMsg); - throw new StorageException(SAS_ERROR_CODE, errorMsg, sasEx); - } - } - - @Override - public CloudBlobWrapper getPageBlobReference(String relativePath) - throws URISyntaxException, StorageException { - try { - CloudPageBlob blob = (sasKeyGenerator!=null) - ? new CloudPageBlob(sasKeyGenerator.getRelativeBlobSASUri(storageAccount, getName(), relativePath)) - : container.getPageBlobReference(relativePath); - - blob.getServiceClient().setDefaultRequestOptions( - container.getServiceClient().getDefaultRequestOptions()); - return new SASCloudPageBlobWrapperImpl(blob); - } catch (SASKeyGenerationException sasEx) { - String errorMsg = "Encountered SASKeyGeneration exception while " - + "generating SAS Key for relativePath : " + relativePath - + " inside container : " + getName() - + " Storage account : " + storageAccount; - LOG.error(errorMsg); - throw new StorageException(SAS_ERROR_CODE, errorMsg, sasEx); - } - } - } - - // - // WrappingIterator - // - - /** - * This iterator wraps every ListBlobItem as they come from the listBlobs() - * calls to their proper wrapping objects. - */ - private static class SASWrappingIterator implements Iterator { - private final Iterator present; - - public SASWrappingIterator(Iterator present) { - this.present = present; - } - - public static Iterable wrap( - final Iterable present) { - return new Iterable() { - @Override - public Iterator iterator() { - return new SASWrappingIterator(present.iterator()); - } - }; - } - - @Override - public boolean hasNext() { - return present.hasNext(); - } - - @Override - public ListBlobItem next() { - ListBlobItem unwrapped = present.next(); - if (unwrapped instanceof CloudBlobDirectory) { - return new SASCloudBlobDirectoryWrapperImpl((CloudBlobDirectory) unwrapped); - } else if (unwrapped instanceof CloudBlockBlob) { - return new SASCloudBlockBlobWrapperImpl((CloudBlockBlob) unwrapped); - } else if (unwrapped instanceof CloudPageBlob) { - return new SASCloudPageBlobWrapperImpl((CloudPageBlob) unwrapped); - } else { - return unwrapped; - } - } - - @Override - public void remove() { - present.remove(); - } - } - - // - // CloudBlobDirectoryWrapperImpl - // - @InterfaceAudience.Private - static class SASCloudBlobDirectoryWrapperImpl extends CloudBlobDirectoryWrapper { - private final CloudBlobDirectory directory; - - public SASCloudBlobDirectoryWrapperImpl(CloudBlobDirectory directory) { - this.directory = directory; - } - - @Override - public URI getUri() { - return directory.getUri(); - } - - @Override - public Iterable listBlobs(String prefix, - boolean useFlatBlobListing, EnumSet listingDetails, - BlobRequestOptions options, OperationContext opContext) - throws URISyntaxException, StorageException { - return SASWrappingIterator.wrap(directory.listBlobs(prefix, - useFlatBlobListing, listingDetails, options, opContext)); - } - - @Override - public CloudBlobContainer getContainer() throws URISyntaxException, - StorageException { - return directory.getContainer(); - } - - @Override - public CloudBlobDirectory getParent() throws URISyntaxException, - StorageException { - return directory.getParent(); - } - - @Override - public StorageUri getStorageUri() { - return directory.getStorageUri(); - } - } - - abstract static class SASCloudBlobWrapperImpl implements CloudBlobWrapper { - private final CloudBlob blob; - @Override - public CloudBlob getBlob() { - return blob; - } - - public URI getUri() { - return getBlob().getUri(); - } - - protected SASCloudBlobWrapperImpl(CloudBlob blob) { - this.blob = blob; - } - - @Override - public HashMap getMetadata() { - return getBlob().getMetadata(); - } - - @Override - public void delete(OperationContext opContext, SelfRenewingLease lease) - throws StorageException { - getBlob().delete(DeleteSnapshotsOption.NONE, getLeaseCondition(lease), - null, opContext); - } - - /** - * Return and access condition for this lease, or else null if - * there's no lease. - */ - private AccessCondition getLeaseCondition(SelfRenewingLease lease) { - AccessCondition leaseCondition = null; - if (lease != null) { - leaseCondition = AccessCondition.generateLeaseCondition(lease.getLeaseID()); - } - return leaseCondition; - } - - @Override - public boolean exists(OperationContext opContext) - throws StorageException { - return getBlob().exists(null, null, opContext); - } - - @Override - public void downloadAttributes( - OperationContext opContext) throws StorageException { - getBlob().downloadAttributes(null, null, opContext); - } - - @Override - public BlobProperties getProperties() { - return getBlob().getProperties(); - } - - @Override - public void setMetadata(HashMap metadata) { - getBlob().setMetadata(metadata); - } - - @Override - public InputStream openInputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return getBlob().openInputStream(null, options, opContext); - } - - public OutputStream openOutputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return ((CloudBlockBlob) getBlob()).openOutputStream(null, options, opContext); - } - - public void upload(InputStream sourceStream, OperationContext opContext) - throws StorageException, IOException { - getBlob().upload(sourceStream, 0, null, null, opContext); - } - - @Override - public CloudBlobContainer getContainer() throws URISyntaxException, - StorageException { - return getBlob().getContainer(); - } - - @Override - public CloudBlobDirectory getParent() throws URISyntaxException, - StorageException { - return getBlob().getParent(); - } - - @Override - public void uploadMetadata(OperationContext opContext) - throws StorageException { - uploadMetadata(null, null, opContext); - } - - @Override - public void uploadMetadata(AccessCondition accessConditions, BlobRequestOptions options, - OperationContext opContext) throws StorageException{ - getBlob().uploadMetadata(accessConditions, options, opContext); - } - - public void uploadProperties(OperationContext opContext, SelfRenewingLease lease) - throws StorageException { - - // Include lease in request if lease not null. - getBlob().uploadProperties(getLeaseCondition(lease), null, opContext); - } - - @Override - public int getStreamMinimumReadSizeInBytes() { - return getBlob().getStreamMinimumReadSizeInBytes(); - } - - @Override - public void setStreamMinimumReadSizeInBytes(int minimumReadSizeBytes) { - getBlob().setStreamMinimumReadSizeInBytes(minimumReadSizeBytes); - } - - @Override - public void setWriteBlockSizeInBytes(int writeBlockSizeBytes) { - getBlob().setStreamWriteSizeInBytes(writeBlockSizeBytes); - } - - @Override - public StorageUri getStorageUri() { - return getBlob().getStorageUri(); - } - - @Override - public CopyState getCopyState() { - return getBlob().getCopyState(); - } - - @Override - public void startCopyFromBlob(CloudBlobWrapper sourceBlob, BlobRequestOptions options, - OperationContext opContext, boolean overwriteDestination) - throws StorageException, URISyntaxException { - AccessCondition dstAccessCondition = - overwriteDestination - ? null - : AccessCondition.generateIfNotExistsCondition(); - getBlob().startCopy(sourceBlob.getBlob().getQualifiedUri(), - null, dstAccessCondition, options, opContext); - } - - @Override - public void downloadRange(long offset, long length, OutputStream outStream, - BlobRequestOptions options, OperationContext opContext) - throws StorageException, IOException { - - getBlob().downloadRange(offset, length, outStream, null, options, opContext); - } - - @Override - public SelfRenewingLease acquireLease() throws StorageException { - return new SelfRenewingLease(this, false); - } - } - - // - // CloudBlockBlobWrapperImpl - // - - static class SASCloudBlockBlobWrapperImpl extends SASCloudBlobWrapperImpl implements CloudBlockBlobWrapper { - - public SASCloudBlockBlobWrapperImpl(CloudBlockBlob blob) { - super(blob); - } - - public OutputStream openOutputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return ((CloudBlockBlob) getBlob()).openOutputStream(null, options, opContext); - } - - public void upload(InputStream sourceStream, OperationContext opContext) - throws StorageException, IOException { - getBlob().upload(sourceStream, 0, null, null, opContext); - } - - public void uploadProperties(OperationContext opContext) - throws StorageException { - getBlob().uploadProperties(null, null, opContext); - } - - @Override - public List downloadBlockList(BlockListingFilter filter, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - return ((CloudBlockBlob) getBlob()).downloadBlockList(filter, null, options, opContext); - - } - - @Override - public void uploadBlock(String blockId, AccessCondition accessCondition, - InputStream sourceStream, - long length, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - ((CloudBlockBlob) getBlob()).uploadBlock(blockId, sourceStream, length, - accessCondition, options, opContext); - } - - @Override - public void commitBlockList(List blockList, AccessCondition accessCondition, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - ((CloudBlockBlob) getBlob()).commitBlockList(blockList, accessCondition, options, opContext); - } - } - - static class SASCloudPageBlobWrapperImpl extends SASCloudBlobWrapperImpl implements CloudPageBlobWrapper { - public SASCloudPageBlobWrapperImpl(CloudPageBlob blob) { - super(blob); - } - - public void create(final long length, BlobRequestOptions options, - OperationContext opContext) throws StorageException { - ((CloudPageBlob) getBlob()).create(length, null, options, opContext); - } - - public void uploadPages(final InputStream sourceStream, final long offset, - final long length, BlobRequestOptions options, OperationContext opContext) - throws StorageException, IOException { - ((CloudPageBlob) getBlob()).uploadPages(sourceStream, offset, length, null, - options, opContext); - } - - public ArrayList downloadPageRanges(BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return ((CloudPageBlob) getBlob()).downloadPageRanges( - null, options, opContext); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureWasbRemoteCallHelper.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureWasbRemoteCallHelper.java deleted file mode 100644 index f4ec1721ec490..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SecureWasbRemoteCallHelper.java +++ /dev/null @@ -1,230 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.commons.lang3.Validate; -import org.apache.hadoop.fs.azure.security.Constants; -import org.apache.hadoop.fs.azure.security.SpnegoToken; -import org.apache.hadoop.fs.azure.security.WasbDelegationTokenIdentifier; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.security.authentication.client.AuthenticatedURL; -import org.apache.hadoop.security.authentication.client.AuthenticationException; -import org.apache.hadoop.security.authentication.client.Authenticator; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.delegation.web.KerberosDelegationTokenAuthenticator; -import org.apache.http.NameValuePair; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.client.methods.HttpUriRequest; -import org.apache.http.client.utils.URIBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.net.InetAddress; -import java.net.URISyntaxException; -import java.security.PrivilegedExceptionAction; -import java.util.List; - -/** - * Helper class the has constants and helper methods - * used in WASB when integrating with a remote http cred - * service which uses Kerberos and delegation tokens. - * Currently, remote service will be used to generate - * SAS keys, authorization and delegation token operations. - */ -public class SecureWasbRemoteCallHelper extends WasbRemoteCallHelper { - - public static final Logger LOG = - LoggerFactory.getLogger(SecureWasbRemoteCallHelper.class); - /** - * Delegation token query parameter to be used when making rest call. - */ - private static final String DELEGATION_TOKEN_QUERY_PARAM_NAME = "delegation"; - - /** - * Delegation token to be used for making the remote call. - */ - private Token delegationToken = null; - - /** - * Does Remote Http Call requires Kerberos Authentication always, even if the delegation token is present. - */ - private boolean alwaysRequiresKerberosAuth; - - /** - * Enable caching of Spnego token. - */ - private boolean isSpnegoTokenCachingEnabled; - - /** - * Cached SPNEGO token. - */ - private SpnegoToken spnegoToken; - - public SecureWasbRemoteCallHelper(RetryPolicy retryPolicy, - boolean alwaysRequiresKerberosAuth, boolean isSpnegoTokenCachingEnabled) { - super(retryPolicy); - this.alwaysRequiresKerberosAuth = alwaysRequiresKerberosAuth; - this.isSpnegoTokenCachingEnabled = isSpnegoTokenCachingEnabled; - } - - @Override - public String makeRemoteRequest(final String[] urls, - final String path, final List queryParams, - final String httpMethod) throws IOException { - final UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - UserGroupInformation connectUgi = ugi.getRealUser(); - if (connectUgi != null) { - queryParams.add(new NameValuePair() { - @Override public String getName() { - return Constants.DOAS_PARAM; - } - - @Override public String getValue() { - return ugi.getShortUserName(); - } - }); - } else { - connectUgi = ugi; - } - - final Token delegationToken = getDelegationToken(ugi); - if (!alwaysRequiresKerberosAuth && delegationToken != null) { - final String delegationTokenEncodedUrlString = - delegationToken.encodeToUrlString(); - queryParams.add(new NameValuePair() { - @Override public String getName() { - return DELEGATION_TOKEN_QUERY_PARAM_NAME; - } - - @Override public String getValue() { - return delegationTokenEncodedUrlString; - } - }); - } - - if (delegationToken == null) { - connectUgi.checkTGTAndReloginFromKeytab(); - } - String s = null; - try { - s = connectUgi.doAs(new PrivilegedExceptionAction() { - @Override public String run() throws Exception { - return retryableRequest(urls, path, queryParams, httpMethod); - } - }); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw new IOException(e.getMessage(), e); - } - return s; - } - - @Override - public HttpUriRequest getHttpRequest(String[] urls, String path, - List queryParams, int urlIndex, String httpMethod, - boolean requiresNewAuth) throws URISyntaxException, IOException { - URIBuilder uriBuilder = - new URIBuilder(urls[urlIndex]).setPath(path).setParameters(queryParams); - if (uriBuilder.getHost().equals("localhost")) { - uriBuilder.setHost(InetAddress.getLocalHost().getCanonicalHostName()); - } - HttpUriRequest httpUriRequest = null; - switch (httpMethod) { - case HttpPut.METHOD_NAME: - httpUriRequest = new HttpPut(uriBuilder.build()); - break; - case HttpPost.METHOD_NAME: - httpUriRequest = new HttpPost(uriBuilder.build()); - break; - default: - httpUriRequest = new HttpGet(uriBuilder.build()); - break; - } - - LOG.debug("SecureWasbRemoteCallHelper#getHttpRequest() {}", - uriBuilder.build().toURL()); - if (alwaysRequiresKerberosAuth || delegationToken == null) { - AuthenticatedURL.Token token = null; - final Authenticator kerberosAuthenticator = - new KerberosDelegationTokenAuthenticator(); - try { - if (isSpnegoTokenCachingEnabled && !requiresNewAuth - && spnegoToken != null && spnegoToken.isTokenValid()){ - token = spnegoToken.getToken(); - } else { - token = new AuthenticatedURL.Token(); - kerberosAuthenticator.authenticate(uriBuilder.build().toURL(), token); - spnegoToken = new SpnegoToken(token); - } - } catch (AuthenticationException e) { - throw new WasbRemoteCallException( - Constants.AUTHENTICATION_FAILED_ERROR_MESSAGE, e); - } - Validate.isTrue(token.isSet(), - "Authenticated Token is NOT present. The request cannot proceed."); - - httpUriRequest.setHeader("Cookie", - AuthenticatedURL.AUTH_COOKIE + "=" + token); - } - return httpUriRequest; - } - - private Token getDelegationToken( - UserGroupInformation userGroupInformation) throws IOException { - if (this.delegationToken == null) { - Token token = null; - for (Token iterToken : userGroupInformation.getTokens()) { - if (iterToken.getKind() - .equals(WasbDelegationTokenIdentifier.TOKEN_KIND)) { - token = iterToken; - LOG.debug("{} token found in cache : {}", - WasbDelegationTokenIdentifier.TOKEN_KIND, iterToken); - break; - } - } - LOG.debug("UGI Information: {}", userGroupInformation.toString()); - - // ugi tokens are usually indicative of a task which can't - // refetch tokens. even if ugi has credentials, don't attempt - // to get another token to match hdfs/rpc behavior - if (token != null) { - LOG.debug("Using UGI token: {}", token); - setDelegationToken(token); - } - } - if (LOG.isDebugEnabled()) { - LOG.debug("Delegation token from cache - {}", delegationToken != null - ? delegationToken.encodeToUrlString() - : "null"); - } - return this.delegationToken; - } - - private void setDelegationToken( - final Token token) { - synchronized (this) { - this.delegationToken = token; - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java deleted file mode 100644 index 8ab568fdc3bfc..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfRenewingLease.java +++ /dev/null @@ -1,213 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.fs.azure.StorageInterface.CloudBlobWrapper; - -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.CloudBlob; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.concurrent.atomic.AtomicInteger; - -import static com.microsoft.azure.storage.StorageErrorCodeStrings.LEASE_ALREADY_PRESENT; - -/** - * An Azure blob lease that automatically renews itself indefinitely - * using a background thread. Use it to synchronize distributed processes, - * or to prevent writes to the blob by other processes that don't - * have the lease. - * - * Creating a new Lease object blocks the caller until the Azure blob lease is - * acquired. - * - * Attempting to get a lease on a non-existent blob throws StorageException. - * - * Call free() to release the Lease. - * - * You can use this Lease like a distributed lock. If the holder process - * dies, the lease will time out since it won't be renewed. - */ -public class SelfRenewingLease { - - private CloudBlobWrapper blobWrapper; - private Thread renewer; - private volatile boolean leaseFreed; - private String leaseID = null; - private static final int LEASE_TIMEOUT = 60; // Lease timeout in seconds - - // Time to wait to renew lease in milliseconds - public static final int LEASE_RENEWAL_PERIOD = 40000; - private static final Logger LOG = LoggerFactory.getLogger(SelfRenewingLease.class); - - // Used to allocate thread serial numbers in thread name - private static AtomicInteger threadNumber = new AtomicInteger(0); - - - // Time to wait to retry getting the lease in milliseconds - @VisibleForTesting - static final int LEASE_ACQUIRE_RETRY_INTERVAL = 2000; - - public SelfRenewingLease(CloudBlobWrapper blobWrapper, boolean throwIfPresent) - throws StorageException { - - this.leaseFreed = false; - this.blobWrapper = blobWrapper; - - // Keep trying to get the lease until you get it. - CloudBlob blob = blobWrapper.getBlob(); - while(leaseID == null) { - try { - leaseID = blob.acquireLease(LEASE_TIMEOUT, null); - } catch (StorageException e) { - - if (throwIfPresent && e.getErrorCode().equals(LEASE_ALREADY_PRESENT)) { - throw e; - } - - // Throw again if we don't want to keep waiting. - // We expect it to be that the lease is already present, - // or in some cases that the blob does not exist. - if (!LEASE_ALREADY_PRESENT.equals(e.getErrorCode())) { - LOG.info( - "Caught exception when trying to get lease on blob " - + blobWrapper.getUri().toString() + ". " + e.getMessage()); - throw e; - } - } - if (leaseID == null) { - try { - Thread.sleep(LEASE_ACQUIRE_RETRY_INTERVAL); - } catch (InterruptedException e) { - - // Restore the interrupted status - Thread.currentThread().interrupt(); - } - } - } - renewer = new SubjectInheritingThread(new Renewer()); - - // A Renewer running should not keep JVM from exiting, so make it a daemon. - renewer.setDaemon(true); - renewer.setName("AzureLeaseRenewer-" + threadNumber.getAndIncrement()); - renewer.start(); - LOG.debug("Acquired lease " + leaseID + " on " + blob.getUri() - + " managed by thread " + renewer.getName()); - } - - /** - * Free the lease and stop the keep-alive thread. - * @throws StorageException Thrown when fail to free the lease. - */ - public void free() throws StorageException { - AccessCondition accessCondition = AccessCondition.generateEmptyCondition(); - accessCondition.setLeaseID(leaseID); - try { - blobWrapper.getBlob().releaseLease(accessCondition); - } catch (StorageException e) { - if ("BlobNotFound".equals(e.getErrorCode())) { - - // Don't do anything -- it's okay to free a lease - // on a deleted file. The delete freed the lease - // implicitly. - } else { - - // This error is not anticipated, so re-throw it. - LOG.warn("Unanticipated exception when trying to free lease " + leaseID - + " on " + blobWrapper.getStorageUri()); - throw(e); - } - } finally { - - // Even if releasing the lease fails (e.g. because the file was deleted), - // make sure to record that we freed the lease, to terminate the - // keep-alive thread. - leaseFreed = true; - LOG.debug("Freed lease " + leaseID + " on " + blobWrapper.getUri() - + " managed by thread " + renewer.getName()); - } - } - - public boolean isFreed() { - return leaseFreed; - } - - public String getLeaseID() { - return leaseID; - } - - public CloudBlob getCloudBlob() { - return blobWrapper.getBlob(); - } - - private class Renewer implements Runnable { - - /** - * Start a keep-alive thread that will continue to renew - * the lease until it is freed or the process dies. - */ - @Override - public void run() { - LOG.debug("Starting lease keep-alive thread."); - AccessCondition accessCondition = - AccessCondition.generateEmptyCondition(); - accessCondition.setLeaseID(leaseID); - - while(!leaseFreed) { - try { - Thread.sleep(LEASE_RENEWAL_PERIOD); - } catch (InterruptedException e) { - LOG.debug("Keep-alive thread for lease " + leaseID + - " interrupted."); - - // Restore the interrupted status - Thread.currentThread().interrupt(); - } - try { - if (!leaseFreed) { - blobWrapper.getBlob().renewLease(accessCondition); - - // It'll be very rare to renew the lease (most will be short) - // so log that we did it, to help with system debugging. - LOG.info("Renewed lease " + leaseID + " on " - + getCloudBlob().getUri()); - } - } catch (StorageException e) { - if (!leaseFreed) { - - // Free the lease so we don't leave this thread running forever. - leaseFreed = true; - - // Normally leases should be freed and there should be no - // exceptions, so log a warning. - LOG.warn("Attempt to renew lease " + leaseID + " on " - + getCloudBlob().getUri() - + " failed, but lease not yet freed. Reason: " + - e.getMessage()); - } - } - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfThrottlingIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfThrottlingIntercept.java deleted file mode 100644 index ad71016a745e7..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SelfThrottlingIntercept.java +++ /dev/null @@ -1,195 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.net.HttpURLConnection; -import java.util.Date; - -import org.apache.hadoop.classification.InterfaceAudience; - -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RequestResult; -import com.microsoft.azure.storage.ResponseReceivedEvent; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.StorageEvent; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/* - * Self throttling is implemented by hooking into send & response callbacks - * One instance of this class is created per operationContext so each blobUpload/blobDownload/etc. - * - * Self throttling only applies to 2nd and subsequent packets of an operation. This is a simple way to - * ensure it only affects bulk transfers and not every tiny request. - * - * A blobDownload will involve sequential packet transmissions and so there are no concurrency concerns - * A blobUpload will generally involve concurrent upload worker threads that share one operationContext and one throttling instance. - * -- we do not track the latencies for each worker thread as they are doing similar work and will rarely collide in practice. - * -- concurrent access to lastE2Edelay must be protected. - * -- volatile is necessary and should be sufficient to protect simple access to primitive values (java 1.5 onwards) - * -- synchronized{} blocks are also used to be conservative and for easier maintenance. - * - * If an operation were to perform concurrent GETs and PUTs there is the possibility of getting confused regarding - * whether lastE2Edelay was a read or write measurement. This scenario does not occur. - * - * readFactor = target read throughput as factor of unrestricted throughput. - * writeFactor = target write throughput as factor of unrestricted throughput. - * - * As we introduce delays it is important to only measure the actual E2E latency and not the augmented latency - * To achieve this, we fiddle the 'startDate' of the transfer tracking object. - */ - - -/** - * - * Introduces delays in our Azure traffic to prevent overrunning the server-side throttling limits. - * - */ -@InterfaceAudience.Private -public class SelfThrottlingIntercept { - public static final Logger LOG = LoggerFactory.getLogger(SelfThrottlingIntercept.class); - - private final float readFactor; - private final float writeFactor; - private final OperationContext operationContext; - - // Concurrency: access to non-final members must be thread-safe - private long lastE2Elatency; - - public SelfThrottlingIntercept(OperationContext operationContext, - float readFactor, float writeFactor) { - this.operationContext = operationContext; - this.readFactor = readFactor; - this.writeFactor = writeFactor; - } - - public static void hook(OperationContext operationContext, float readFactor, - float writeFactor) { - - SelfThrottlingIntercept throttler = new SelfThrottlingIntercept( - operationContext, readFactor, writeFactor); - ResponseReceivedListener responseListener = throttler.new ResponseReceivedListener(); - SendingRequestListener sendingListener = throttler.new SendingRequestListener(); - - operationContext.getResponseReceivedEventHandler().addListener( - responseListener); - operationContext.getSendingRequestEventHandler().addListener( - sendingListener); - } - - public void responseReceived(ResponseReceivedEvent event) { - RequestResult result = event.getRequestResult(); - Date startDate = result.getStartDate(); - Date stopDate = result.getStopDate(); - long elapsed = stopDate.getTime() - startDate.getTime(); - - synchronized (this) { - this.lastE2Elatency = elapsed; - } - - if (LOG.isDebugEnabled()) { - int statusCode = result.getStatusCode(); - String etag = result.getEtag(); - HttpURLConnection urlConnection = (HttpURLConnection) event - .getConnectionObject(); - int contentLength = urlConnection.getContentLength(); - String requestMethod = urlConnection.getRequestMethod(); - long threadId = Thread.currentThread().getId(); - LOG.debug(String - .format( - "SelfThrottlingIntercept:: ResponseReceived: threadId=%d, Status=%d, Elapsed(ms)=%d, ETAG=%s, contentLength=%d, requestMethod=%s", - threadId, statusCode, elapsed, etag, contentLength, requestMethod)); - } - } - - public void sendingRequest(SendingRequestEvent sendEvent) { - long lastLatency; - boolean operationIsRead; // for logging - synchronized (this) { - - lastLatency = this.lastE2Elatency; - } - - float sleepMultiple; - HttpURLConnection urlConnection = (HttpURLConnection) sendEvent - .getConnectionObject(); - - // Azure REST API never uses POST, so PUT is a sufficient test for an - // upload. - if (urlConnection.getRequestMethod().equalsIgnoreCase("PUT")) { - operationIsRead = false; - sleepMultiple = (1 / writeFactor) - 1; - } else { - operationIsRead = true; - sleepMultiple = (1 / readFactor) - 1; - } - - long sleepDuration = (long) (sleepMultiple * lastLatency); - if (sleepDuration < 0) { - sleepDuration = 0; - } - - if (sleepDuration > 0) { - try { - // Thread.sleep() is not exact but it seems sufficiently accurate for - // our needs. If needed this could become a loop of small waits that - // tracks actual - // elapsed time. - Thread.sleep(sleepDuration); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } - - // reset to avoid counting the sleep against request latency - sendEvent.getRequestResult().setStartDate(new Date()); - } - - if (LOG.isDebugEnabled()) { - boolean isFirstRequest = (lastLatency == 0); - long threadId = Thread.currentThread().getId(); - LOG.debug(String - .format( - " SelfThrottlingIntercept:: SendingRequest: threadId=%d, requestType=%s, isFirstRequest=%b, sleepDuration=%d", - threadId, operationIsRead ? "read " : "write", isFirstRequest, - sleepDuration)); - } - } - - // simply forwards back to the main class. - // this is necessary as our main class cannot implement two base-classes. - @InterfaceAudience.Private - class SendingRequestListener extends StorageEvent { - - @Override - public void eventOccurred(SendingRequestEvent event) { - sendingRequest(event); - } - } - - // simply forwards back to the main class. - // this is necessary as our main class cannot implement two base-classes. - @InterfaceAudience.Private - class ResponseReceivedListener extends StorageEvent { - - @Override - public void eventOccurred(ResponseReceivedEvent event) { - responseReceived(event); - } - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SendRequestIntercept.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SendRequestIntercept.java deleted file mode 100644 index 98f9de7bff554..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SendRequestIntercept.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.net.HttpURLConnection; -import java.security.InvalidKeyException; - -import org.apache.hadoop.classification.InterfaceAudience; - -import com.microsoft.azure.storage.Constants.HeaderConstants; -import com.microsoft.azure.storage.core.StorageCredentialsHelper; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.StorageCredentials; -import com.microsoft.azure.storage.StorageEvent; -import com.microsoft.azure.storage.StorageException; - -/** - * Manages the lifetime of binding on the operation contexts to intercept send - * request events to Azure storage and allow concurrent OOB I/Os. - */ -@InterfaceAudience.Private -public final class SendRequestIntercept extends StorageEvent { - - private static final String ALLOW_ALL_REQUEST_PRECONDITIONS = "*"; - - /** - * Hidden default constructor for SendRequestIntercept. - */ - private SendRequestIntercept() { - } - - /** - * Binds a new lister to the operation context so the WASB file system can - * appropriately intercept sends and allow concurrent OOB I/Os. This - * by-passes the blob immutability check when reading streams. - * - * @param opContext the operation context assocated with this request. - */ - public static void bind(OperationContext opContext) { - opContext.getSendingRequestEventHandler().addListener(new SendRequestIntercept()); - } - - /** - * Handler which processes the sending request event from Azure SDK. The - * handler simply sets reset the conditional header to make all read requests - * unconditional if reads with concurrent OOB writes are allowed. - * - * @param sendEvent - * - send event context from Windows Azure SDK. - */ - @Override - public void eventOccurred(SendingRequestEvent sendEvent) { - - if (!(sendEvent.getConnectionObject() instanceof HttpURLConnection)) { - // Pass if there is no HTTP connection associated with this send - // request. - return; - } - - // Capture the HTTP URL connection object and get size of the payload for - // the request. - HttpURLConnection urlConnection = (HttpURLConnection) sendEvent - .getConnectionObject(); - - // Determine whether this is a download request by checking that the request - // method - // is a "GET" operation. - if (urlConnection.getRequestMethod().equalsIgnoreCase("GET")) { - // If concurrent reads on OOB writes are allowed, reset the if-match - // condition on the conditional header. - urlConnection.setRequestProperty(HeaderConstants.IF_MATCH, - ALLOW_ALL_REQUEST_PRECONDITIONS); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ShellDecryptionKeyProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ShellDecryptionKeyProvider.java deleted file mode 100644 index d9d6fc3cb4812..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/ShellDecryptionKeyProvider.java +++ /dev/null @@ -1,62 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.util.Arrays; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.Shell; - -/** - * Shell decryption key provider which invokes an external script that will - * perform the key decryption. - */ -@InterfaceAudience.Private -public class ShellDecryptionKeyProvider extends SimpleKeyProvider { - static final String KEY_ACCOUNT_SHELLKEYPROVIDER_SCRIPT = - "fs.azure.shellkeyprovider.script"; - - @Override - public String getStorageAccountKey(String accountName, Configuration conf) - throws KeyProviderException { - String envelope = super.getStorageAccountKey(accountName, conf); - - final String command = conf.get(KEY_ACCOUNT_SHELLKEYPROVIDER_SCRIPT); - if (command == null) { - throw new KeyProviderException( - "Script path is not specified via fs.azure.shellkeyprovider.script"); - } - - String[] cmd = command.split(" "); - String[] cmdWithEnvelope = Arrays.copyOf(cmd, cmd.length + 1); - cmdWithEnvelope[cmdWithEnvelope.length - 1] = envelope; - - String decryptedKey = null; - try { - decryptedKey = Shell.execCommand(cmdWithEnvelope); - } catch (IOException ex) { - throw new KeyProviderException(ex); - } - - // trim any whitespace - return decryptedKey.trim(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SimpleKeyProvider.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SimpleKeyProvider.java deleted file mode 100644 index 64811e13ee9eb..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SimpleKeyProvider.java +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.security.ProviderUtils; - -/** - * Key provider that simply returns the storage account key from the - * configuration as plaintext. - */ -@InterfaceAudience.Private -public class SimpleKeyProvider implements KeyProvider { - private static final Logger LOG = LoggerFactory.getLogger(SimpleKeyProvider.class); - - protected static final String KEY_ACCOUNT_KEY_PREFIX = - "fs.azure.account.key."; - - @Override - public String getStorageAccountKey(String accountName, Configuration conf) - throws KeyProviderException { - String key = null; - try { - Configuration c = ProviderUtils.excludeIncompatibleCredentialProviders( - conf, NativeAzureFileSystem.class); - char[] keyChars = c.getPassword(getStorageAccountKeyName(accountName)); - if (keyChars != null) { - key = new String(keyChars); - } - } catch(IOException ioe) { - LOG.warn("Unable to get key from credential providers.", ioe); - } - return key; - } - - protected String getStorageAccountKeyName(String accountName) { - return KEY_ACCOUNT_KEY_PREFIX + accountName; - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/StorageInterface.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/StorageInterface.java deleted file mode 100644 index dbb38491d7f55..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/StorageInterface.java +++ /dev/null @@ -1,793 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.List; -import java.util.EnumSet; -import java.util.HashMap; - -import org.apache.hadoop.classification.InterfaceAudience; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RetryPolicyFactory; -import com.microsoft.azure.storage.StorageCredentials; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.BlobListingDetails; -import com.microsoft.azure.storage.blob.BlobProperties; -import com.microsoft.azure.storage.blob.BlockEntry; -import com.microsoft.azure.storage.blob.BlockListingFilter; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.CloudBlob; -import com.microsoft.azure.storage.blob.CopyState; -import com.microsoft.azure.storage.blob.ListBlobItem; -import com.microsoft.azure.storage.blob.PageRange; - -/** - * This is a very thin layer over the methods exposed by the Windows Azure - * Storage SDK that we need for WASB implementation. This base class has a real - * implementation that just simply redirects to the SDK, and a memory-backed one - * that's used for unit tests. - * - * IMPORTANT: all the methods here must remain very simple redirects since code - * written here can't be properly unit tested. - */ -@InterfaceAudience.Private -abstract class StorageInterface { - - /** - * Sets the timeout to use when making requests to the storage service. - *

- * The server timeout interval begins at the time that the complete request - * has been received by the service, and the server begins processing the - * response. If the timeout interval elapses before the response is returned - * to the client, the operation times out. The timeout interval resets with - * each retry, if the request is retried. - * - * The default timeout interval for a request made via the service client is - * 90 seconds. You can change this value on the service client by setting this - * property, so that all subsequent requests made via the service client will - * use the new timeout interval. You can also change this value for an - * individual request, by setting the - * {@link com.microsoft.azure.storage.RequestOptions#timeoutIntervalInMs} - * property. - * - * If you are downloading a large blob, you should increase the value of the - * timeout beyond the default value. - * - * @param timeoutInMs - * The timeout, in milliseconds, to use when making requests to the - * storage service. - */ - public abstract void setTimeoutInMs(int timeoutInMs); - - /** - * Sets the RetryPolicyFactory object to use when making service requests. - * - * @param retryPolicyFactory - * the RetryPolicyFactory object to use when making service requests. - */ - public abstract void setRetryPolicyFactory( - final RetryPolicyFactory retryPolicyFactory); - - /** - * Creates a new Blob service client. - * - * @param account cloud storage account. - */ - public abstract void createBlobClient(CloudStorageAccount account); - - /** - * Creates an instance of the CloudBlobClient class using the - * specified Blob service endpoint. - * - * @param baseUri - * A java.net.URI object that represents the Blob - * service endpoint used to create the client. - */ - public abstract void createBlobClient(URI baseUri); - - /** - * Creates an instance of the CloudBlobClient class using the - * specified Blob service endpoint and account credentials. - * - * @param baseUri - * A java.net.URI object that represents the Blob - * service endpoint used to create the client. - * @param credentials - * A {@link StorageCredentials} object that represents the account - * credentials. - */ - public abstract void createBlobClient(URI baseUri, - StorageCredentials credentials); - - /** - * Returns the credentials for the Blob service, as configured for the storage - * account. - * - * @return A {@link StorageCredentials} object that represents the credentials - * for this storage account. - */ - public abstract StorageCredentials getCredentials(); - - /** - * Returns a reference to a {@link CloudBlobContainerWrapper} object that - * represents the cloud blob container for the specified address. - * - * @param name - * A String that represents the name of the container. - * @return A {@link CloudBlobContainerWrapper} object that represents a - * reference to the cloud blob container. - * - * @throws URISyntaxException - * If the resource URI is invalid. - * @throws StorageException - * If a storage service error occurred. - */ - public abstract CloudBlobContainerWrapper getContainerReference(String name) - throws URISyntaxException, StorageException; - - /** - * A thin wrapper over the - * {@link com.microsoft.azure.storage.blob.CloudBlobDirectory} class - * that simply redirects calls to the real object except in unit tests. - */ - @InterfaceAudience.Private - public abstract static class CloudBlobDirectoryWrapper implements - ListBlobItem { - /** - * Returns the URI for this directory. - * - * @return A java.net.URI object that represents the URI for - * this directory. - */ - public abstract URI getUri(); - - /** - * Returns an enumerable collection of blob items whose names begin with the - * specified prefix, using the specified flat or hierarchical option, - * listing details options, request options, and operation context. - * - * @param prefix - * A String that represents the prefix of the blob - * name. - * @param useFlatBlobListing - * true to indicate that the returned list will be - * flat; false to indicate that the returned list will - * be hierarchical. - * @param listingDetails - * A java.util.EnumSet object that contains - * {@link BlobListingDetails} values that indicate whether - * snapshots, metadata, and/or uncommitted blocks are returned. - * Committed blocks are always returned. - * @param options - * A {@link BlobRequestOptions} object that specifies any - * additional options for the request. Specifying null - * will use the default request options from the associated service - * client ({@link com.microsoft.azure.storage.blob.CloudBlobClient}). - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @return An enumerable collection of {@link ListBlobItem} objects that - * represent the block items whose names begin with the specified - * prefix in this directory. - * - * @throws StorageException - * If a storage service error occurred. - * @throws URISyntaxException - * If the resource URI is invalid. - */ - public abstract Iterable listBlobs(String prefix, - boolean useFlatBlobListing, EnumSet listingDetails, - BlobRequestOptions options, OperationContext opContext) - throws URISyntaxException, StorageException; - } - - /** - * A thin wrapper over the - * {@link com.microsoft.azure.storage.blob.CloudBlobContainer} class - * that simply redirects calls to the real object except in unit tests. - */ - @InterfaceAudience.Private - public abstract static class CloudBlobContainerWrapper { - /** - * Returns the name of the container. - * - * @return A String that represents the name of the container. - */ - public abstract String getName(); - - /** - * Returns a value that indicates whether the container exists, using the - * specified operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @return true if the container exists, otherwise - * false. - * - * @throws StorageException - * If a storage service error occurred. - */ - public abstract boolean exists(OperationContext opContext) - throws StorageException; - - /** - * Returns the metadata for the container. - * - * @return A java.util.HashMap object that represents the - * metadata for the container. - */ - public abstract HashMap getMetadata(); - - /** - * Sets the metadata for the container. - * - * @param metadata - * A java.util.HashMap object that represents the - * metadata being assigned to the container. - */ - public abstract void setMetadata(HashMap metadata); - - /** - * Downloads the container's attributes, which consist of metadata and - * properties, using the specified operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - public abstract void downloadAttributes(OperationContext opContext) - throws StorageException; - - /** - * Uploads the container's metadata using the specified operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - public abstract void uploadMetadata(OperationContext opContext) - throws StorageException; - - /** - * Creates the container using the specified operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - public abstract void create(OperationContext opContext) - throws StorageException; - - /** - * Returns a wrapper for a CloudBlobDirectory. - * - * @param relativePath - * A String that represents the name of the directory, - * relative to the container - * - * @throws StorageException - * If a storage service error occurred. - * - * @throws URISyntaxException - * If URI syntax exception occurred. - */ - public abstract CloudBlobDirectoryWrapper getDirectoryReference( - String relativePath) throws URISyntaxException, StorageException; - - /** - * Returns a wrapper for a CloudBlockBlob. - * - * @param relativePath - * A String that represents the name of the blob, - * relative to the container - * - * @throws StorageException - * If a storage service error occurred. - * - * @throws URISyntaxException - * If URI syntax exception occurred. - */ - public abstract CloudBlobWrapper getBlockBlobReference( - String relativePath) throws URISyntaxException, StorageException; - - /** - * Returns a wrapper for a CloudPageBlob. - * - * @param relativePath - * A String that represents the name of the blob, relative to the container - * - * @throws StorageException - * If a storage service error occurred. - * - * @throws URISyntaxException - * If URI syntax exception occurred. - */ - public abstract CloudBlobWrapper getPageBlobReference(String relativePath) - throws URISyntaxException, StorageException; - } - - - /** - * A thin wrapper over the {@link CloudBlob} class that simply redirects calls - * to the real object except in unit tests. - */ - @InterfaceAudience.Private - public interface CloudBlobWrapper extends ListBlobItem { - /** - * Returns the URI for this blob. - * - * @return A java.net.URI object that represents the URI for - * the blob. - */ - URI getUri(); - - /** - * Returns the metadata for the blob. - * - * @return A java.util.HashMap object that represents the - * metadata for the blob. - */ - HashMap getMetadata(); - - /** - * Sets the metadata for the blob. - * - * @param metadata - * A java.util.HashMap object that contains the - * metadata being assigned to the blob. - */ - void setMetadata(HashMap metadata); - - /** - * Copies an existing blob's contents, properties, and metadata to this instance of the CloudBlob - * class, using the specified operation context. - * - * @param sourceBlob - * A CloudBlob object that represents the source blob to copy. - * @param options - * A {@link BlobRequestOptions} object that specifies any additional options for the request. Specifying - * null will use the default request options from the associated service client ( - * {@link CloudBlobClient}). - * @param opContext - * An {@link OperationContext} object that represents the context for the current operation. This object - * is used to track requests to the storage service, and to provide additional runtime information about - * the operation. - * - * @throws StorageException - * If a storage service error occurred. - * @throws URISyntaxException - * - */ - public abstract void startCopyFromBlob(CloudBlobWrapper sourceBlob, - BlobRequestOptions options, OperationContext opContext, boolean overwriteDestination) - throws StorageException, URISyntaxException; - - /** - * Returns the blob's copy state. - * - * @return A {@link CopyState} object that represents the copy state of the - * blob. - */ - CopyState getCopyState(); - - /** - * Downloads a range of bytes from the blob to the given byte buffer, using the specified request options and - * operation context. - * - * @param offset - * The byte offset to use as the starting point for the source. - * @param length - * The number of bytes to read. - * @param buffer - * The byte buffer, as an array of bytes, to which the blob bytes are downloaded. - * @param bufferOffset - * The byte offset to use as the starting point for the target. - * @param options - * A {@link BlobRequestOptions} object that specifies any additional options for the request. Specifying - * null will use the default request options from the associated service client ( - * {@link CloudBlobClient}). - * @param opContext - * An {@link OperationContext} object that represents the context for the current operation. This object - * is used to track requests to the storage service, and to provide additional runtime information about - * the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - void downloadRange(final long offset, final long length, - final OutputStream outStream, final BlobRequestOptions options, - final OperationContext opContext) - throws StorageException, IOException; - - /** - * Deletes the blob using the specified operation context. - *

- * A blob that has snapshots cannot be deleted unless the snapshots are also - * deleted. If a blob has snapshots, use the - * {@link DeleteSnapshotsOption#DELETE_SNAPSHOTS_ONLY} or - * {@link DeleteSnapshotsOption#INCLUDE_SNAPSHOTS} value in the - * deleteSnapshotsOption parameter to specify how the snapshots - * should be handled when the blob is deleted. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - void delete(OperationContext opContext, SelfRenewingLease lease) - throws StorageException; - - /** - * Checks to see if the blob exists, using the specified operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @return true if the blob exists, otherwise - * false. - * - * @throws StorageException - * If a storage service error occurred. - */ - boolean exists(OperationContext opContext) - throws StorageException; - - /** - * Populates a blob's properties and metadata using the specified operation - * context. - *

- * This method populates the blob's system properties and user-defined - * metadata. Before reading a blob's properties or metadata, call this - * method or its overload to retrieve the latest values for the blob's - * properties and metadata from the Windows Azure storage service. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - void downloadAttributes(OperationContext opContext) - throws StorageException; - - /** - * Returns the blob's properties. - * - * @return A {@link BlobProperties} object that represents the properties of - * the blob. - */ - BlobProperties getProperties(); - - /** - * Opens a blob input stream to download the blob using the specified - * operation context. - *

- * Use {@link CloudBlobClient#setStreamMinimumReadSizeInBytes} to configure - * the read size. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @return An InputStream object that represents the stream to - * use for reading from the blob. - * - * @throws StorageException - * If a storage service error occurred. - */ - InputStream openInputStream(BlobRequestOptions options, - OperationContext opContext) throws StorageException; - - /** - * Uploads the blob's metadata to the storage service using the specified - * lease ID, request options, and operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - void uploadMetadata(OperationContext opContext) - throws StorageException; - - /** - * Uploads the blob's metadata to the storage service using the specified - * lease ID, request options, and operation context. - * - * @param accessCondition - * A {@link AccessCondition} object that represents the access conditions for the blob. - * - * @param options - * A {@link BlobRequestOptions} object that specifies any additional options for the request. Specifying - * null will use the default request options from the associated service client ( - * {@link CloudBlobClient}). - * - * @param opContext - * An {@link OperationContext} object that represents the context - * for the current operation. This object is used to track requests - * to the storage service, and to provide additional runtime - * information about the operation. - * - * @throws StorageException - * If a storage service error occurred. - */ - void uploadMetadata(AccessCondition accessCondition, BlobRequestOptions options, - OperationContext opContext) throws StorageException; - - void uploadProperties(OperationContext opContext, - SelfRenewingLease lease) - throws StorageException; - - SelfRenewingLease acquireLease() throws StorageException; - - /** - * Gets the minimum read block size to use with this Blob. - * - * @return The minimum block size, in bytes, for reading from a block blob. - */ - int getStreamMinimumReadSizeInBytes(); - - /** - * Sets the minimum read block size to use with this Blob. - * - * @param minimumReadSizeBytes - * The maximum block size, in bytes, for reading from a block blob - * while using a {@link BlobInputStream} object, ranging from 512 - * bytes to 64 MB, inclusive. - */ - void setStreamMinimumReadSizeInBytes( - int minimumReadSizeBytes); - - /** - * Sets the write block size to use with this Blob. - * - * @param writeBlockSizeBytes - * The maximum block size, in bytes, for writing to a block blob - * while using a {@link BlobOutputStream} object, ranging from 1 MB - * to 4 MB, inclusive. - * - * @throws IllegalArgumentException - * If writeBlockSizeInBytes is less than 1 MB or - * greater than 4 MB. - */ - void setWriteBlockSizeInBytes(int writeBlockSizeBytes); - - CloudBlob getBlob(); - } - - /** - * A thin wrapper over the - * {@link com.microsoft.azure.storage.blob.CloudBlockBlob} class - * that simply redirects calls to the real object except in unit tests. - */ - public abstract interface CloudBlockBlobWrapper - extends CloudBlobWrapper { - /** - * Creates and opens an output stream to write data to the block blob using the specified - * operation context. - * - * @param opContext - * An {@link OperationContext} object that represents the context for the current operation. This object - * is used to track requests to the storage service, and to provide additional runtime information about - * the operation. - * - * @return A {@link BlobOutputStream} object used to write data to the blob. - * - * @throws StorageException - * If a storage service error occurred. - */ - OutputStream openOutputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException; - - /** - * - * @param filter A {@link BlockListingFilter} value that specifies whether to download - * committed blocks, uncommitted blocks, or all blocks. - * @param options A {@link BlobRequestOptions} object that specifies any additional options for - * the request. Specifying null will use the default request options from - * the associated service client ( CloudBlobClient). - * @param opContext An {@link OperationContext} object that represents the context for the current - * operation. This object is used to track requests to the storage service, - * and to provide additional runtime information about the operation. - * @return An ArrayList object of {@link BlockEntry} objects that represent the list - * block items downloaded from the block blob. - * @throws IOException If an I/O error occurred. - * @throws StorageException If a storage service error occurred. - */ - List downloadBlockList(BlockListingFilter filter, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException; - - /** - * - * @param blockId A String that represents the Base-64 encoded block ID. Note for a given blob - * the length of all Block IDs must be identical. - * @param accessCondition An {@link AccessCondition} object that represents the access conditions for the blob. - * @param sourceStream An {@link InputStream} object that represents the input stream to write to the - * block blob. - * @param length A long which represents the length, in bytes, of the stream data, - * or -1 if unknown. - * @param options A {@link BlobRequestOptions} object that specifies any additional options for the - * request. Specifying null will use the default request options from the - * associated service client ( CloudBlobClient). - * @param opContext An {@link OperationContext} object that represents the context for the current operation. - * This object is used to track requests to the storage service, and to provide - * additional runtime information about the operation. - * @throws IOException If an I/O error occurred. - * @throws StorageException If a storage service error occurred. - */ - void uploadBlock(String blockId, AccessCondition accessCondition, InputStream sourceStream, - long length, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException; - - /** - * - * @param blockList An enumerable collection of {@link BlockEntry} objects that represents the list - * block items being committed. The size field is ignored. - * @param accessCondition An {@link AccessCondition} object that represents the access conditions for the blob. - * @param options A {@link BlobRequestOptions} object that specifies any additional options for the - * request. Specifying null will use the default request options from the associated - * service client ( CloudBlobClient). - * @param opContext An {@link OperationContext} object that represents the context for the current operation. - * This object is used to track requests to the storage service, and to provide additional - * runtime information about the operation. - * @throws IOException If an I/O error occurred. - * @throws StorageException If a storage service error occurred. - */ - void commitBlockList(List blockList, AccessCondition accessCondition, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException; - - } - - /** - * A thin wrapper over the - * {@link com.microsoft.azure.storage.blob.CloudPageBlob} - * class that simply redirects calls to the real object except in unit tests. - */ - public abstract interface CloudPageBlobWrapper - extends CloudBlobWrapper { - /** - * Creates a page blob using the specified request options and operation context. - * - * @param length - * The size, in bytes, of the page blob. - * @param options - * A {@link BlobRequestOptions} object that specifies any additional options for the request. Specifying - * null will use the default request options from the associated service client ( - * {@link CloudBlobClient}). - * @param opContext - * An {@link OperationContext} object that represents the context for the current operation. This object - * is used to track requests to the storage service, and to provide additional runtime information about - * the operation. - * - * @throws IllegalArgumentException - * If the length is not a multiple of 512. - * - * @throws StorageException - * If a storage service error occurred. - */ - void create(final long length, BlobRequestOptions options, - OperationContext opContext) throws StorageException; - - - /** - * Uploads a range of contiguous pages, up to 4 MB in size, at the specified offset in the page blob, using the - * specified lease ID, request options, and operation context. - * - * @param sourceStream - * An InputStream object that represents the input stream to write to the page blob. - * @param offset - * The offset, in number of bytes, at which to begin writing the data. This value must be a multiple of - * 512. - * @param length - * The length, in bytes, of the data to write. This value must be a multiple of 512. - * @param options - * A {@link BlobRequestOptions} object that specifies any additional options for the request. Specifying - * null will use the default request options from the associated service client ( - * {@link CloudBlobClient}). - * @param opContext - * An {@link OperationContext} object that represents the context for the current operation. This object - * is used to track requests to the storage service, and to provide additional runtime information about - * the operation. - * - * @throws IllegalArgumentException - * If the offset or length are not multiples of 512, or if the length is greater than 4 MB. - * @throws IOException - * If an I/O exception occurred. - * @throws StorageException - * If a storage service error occurred. - */ - void uploadPages(final InputStream sourceStream, final long offset, - final long length, BlobRequestOptions options, - OperationContext opContext) throws StorageException, IOException; - - /** - * Returns a collection of page ranges and their starting and ending byte offsets using the specified request - * options and operation context. - * - * @param options - * A {@link BlobRequestOptions} object that specifies any additional options for the request. Specifying - * null will use the default request options from the associated service client ( - * {@link CloudBlobClient}). - * @param opContext - * An {@link OperationContext} object that represents the context for the current operation. This object - * is used to track requests to the storage service, and to provide additional runtime information about - * the operation. - * - * @return An ArrayList object that represents the set of page ranges and their starting and ending - * byte offsets. - * - * @throws StorageException - * If a storage service error occurred. - */ - - ArrayList downloadPageRanges(BlobRequestOptions options, - OperationContext opContext) throws StorageException; - - void uploadMetadata(OperationContext opContext) - throws StorageException; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/StorageInterfaceImpl.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/StorageInterfaceImpl.java deleted file mode 100644 index e600f9e59da3f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/StorageInterfaceImpl.java +++ /dev/null @@ -1,522 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import org.apache.hadoop.classification.InterfaceAudience; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RetryPolicyFactory; -import com.microsoft.azure.storage.StorageCredentials; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.StorageUri; -import com.microsoft.azure.storage.blob.BlobListingDetails; -import com.microsoft.azure.storage.blob.BlobProperties; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.BlockEntry; -import com.microsoft.azure.storage.blob.BlockListingFilter; -import com.microsoft.azure.storage.blob.CloudBlob; -import com.microsoft.azure.storage.blob.CloudBlobClient; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlobDirectory; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import com.microsoft.azure.storage.blob.CloudPageBlob; -import com.microsoft.azure.storage.blob.CopyState; -import com.microsoft.azure.storage.blob.DeleteSnapshotsOption; -import com.microsoft.azure.storage.blob.ListBlobItem; -import com.microsoft.azure.storage.blob.PageRange; - -/** - * A real implementation of the Azure interaction layer that just redirects - * calls to the Windows Azure storage SDK. - */ -@InterfaceAudience.Private -class StorageInterfaceImpl extends StorageInterface { - private CloudBlobClient serviceClient; - private RetryPolicyFactory retryPolicyFactory; - private int timeoutIntervalInMs; - - private void updateRetryPolicy() { - if (serviceClient != null && retryPolicyFactory != null) { - serviceClient.getDefaultRequestOptions().setRetryPolicyFactory(retryPolicyFactory); - } - } - - private void updateTimeoutInMs() { - if (serviceClient != null && timeoutIntervalInMs > 0) { - serviceClient.getDefaultRequestOptions().setTimeoutIntervalInMs(timeoutIntervalInMs); - } - } - - @Override - public void setRetryPolicyFactory(final RetryPolicyFactory retryPolicyFactory) { - this.retryPolicyFactory = retryPolicyFactory; - updateRetryPolicy(); - } - - @Override - public void setTimeoutInMs(int timeoutInMs) { - timeoutIntervalInMs = timeoutInMs; - updateTimeoutInMs(); - } - - @Override - public void createBlobClient(CloudStorageAccount account) { - serviceClient = account.createCloudBlobClient(); - updateRetryPolicy(); - updateTimeoutInMs(); - } - - @Override - public void createBlobClient(URI baseUri) { - createBlobClient(baseUri, (StorageCredentials)null); - } - - @Override - public void createBlobClient(URI baseUri, StorageCredentials credentials) { - serviceClient = new CloudBlobClient(baseUri, credentials); - updateRetryPolicy(); - updateTimeoutInMs(); - } - - @Override - public StorageCredentials getCredentials() { - return serviceClient.getCredentials(); - } - - @Override - public CloudBlobContainerWrapper getContainerReference(String uri) - throws URISyntaxException, StorageException { - return new CloudBlobContainerWrapperImpl( - serviceClient.getContainerReference(uri)); - } - - // - // WrappingIterator - // - - /** - * This iterator wraps every ListBlobItem as they come from the listBlobs() - * calls to their proper wrapping objects. - */ - private static class WrappingIterator implements Iterator { - private final Iterator present; - - public WrappingIterator(Iterator present) { - this.present = present; - } - - public static Iterable wrap( - final Iterable present) { - return new Iterable() { - @Override - public Iterator iterator() { - return new WrappingIterator(present.iterator()); - } - }; - } - - @Override - public boolean hasNext() { - return present.hasNext(); - } - - @Override - public ListBlobItem next() { - ListBlobItem unwrapped = present.next(); - if (unwrapped instanceof CloudBlobDirectory) { - return new CloudBlobDirectoryWrapperImpl((CloudBlobDirectory) unwrapped); - } else if (unwrapped instanceof CloudBlockBlob) { - return new CloudBlockBlobWrapperImpl((CloudBlockBlob) unwrapped); - } else if (unwrapped instanceof CloudPageBlob) { - return new CloudPageBlobWrapperImpl((CloudPageBlob) unwrapped); - } else { - return unwrapped; - } - } - - @Override - public void remove() { - present.remove(); - } - } - - // - // CloudBlobDirectoryWrapperImpl - // - @InterfaceAudience.Private - static class CloudBlobDirectoryWrapperImpl extends CloudBlobDirectoryWrapper { - private final CloudBlobDirectory directory; - - public CloudBlobDirectoryWrapperImpl(CloudBlobDirectory directory) { - this.directory = directory; - } - - @Override - public URI getUri() { - return directory.getUri(); - } - - @Override - public Iterable listBlobs(String prefix, - boolean useFlatBlobListing, EnumSet listingDetails, - BlobRequestOptions options, OperationContext opContext) - throws URISyntaxException, StorageException { - return WrappingIterator.wrap(directory.listBlobs(prefix, - useFlatBlobListing, listingDetails, options, opContext)); - } - - @Override - public CloudBlobContainer getContainer() throws URISyntaxException, - StorageException { - return directory.getContainer(); - } - - @Override - public CloudBlobDirectory getParent() throws URISyntaxException, - StorageException { - return directory.getParent(); - } - - @Override - public StorageUri getStorageUri() { - return directory.getStorageUri(); - } - - } - - // - // CloudBlobContainerWrapperImpl - // - @InterfaceAudience.Private - static class CloudBlobContainerWrapperImpl extends CloudBlobContainerWrapper { - private final CloudBlobContainer container; - - public CloudBlobContainerWrapperImpl(CloudBlobContainer container) { - this.container = container; - } - - @Override - public String getName() { - return container.getName(); - } - - @Override - public boolean exists(OperationContext opContext) throws StorageException { - return container.exists(AccessCondition.generateEmptyCondition(), null, - opContext); - } - - @Override - public void create(OperationContext opContext) throws StorageException { - container.create(null, opContext); - } - - @Override - public HashMap getMetadata() { - return container.getMetadata(); - } - - @Override - public void setMetadata(HashMap metadata) { - container.setMetadata(metadata); - } - - @Override - public void downloadAttributes(OperationContext opContext) - throws StorageException { - container.downloadAttributes(AccessCondition.generateEmptyCondition(), - null, opContext); - } - - @Override - public void uploadMetadata(OperationContext opContext) - throws StorageException { - container.uploadMetadata(AccessCondition.generateEmptyCondition(), null, - opContext); - } - - @Override - public CloudBlobDirectoryWrapper getDirectoryReference(String relativePath) - throws URISyntaxException, StorageException { - - CloudBlobDirectory dir = container.getDirectoryReference(relativePath); - return new CloudBlobDirectoryWrapperImpl(dir); - } - - @Override - public CloudBlobWrapper getBlockBlobReference(String relativePath) - throws URISyntaxException, StorageException { - - return new CloudBlockBlobWrapperImpl(container.getBlockBlobReference(relativePath)); - } - - @Override - public CloudBlobWrapper getPageBlobReference(String relativePath) - throws URISyntaxException, StorageException { - return new CloudPageBlobWrapperImpl( - container.getPageBlobReference(relativePath)); - } - - } - - abstract static class CloudBlobWrapperImpl implements CloudBlobWrapper { - private final CloudBlob blob; - - @Override - public CloudBlob getBlob() { - return blob; - } - - public URI getUri() { - return getBlob().getUri(); - } - - protected CloudBlobWrapperImpl(CloudBlob blob) { - this.blob = blob; - } - - @Override - public HashMap getMetadata() { - return getBlob().getMetadata(); - } - - @Override - public void delete(OperationContext opContext, SelfRenewingLease lease) - throws StorageException { - getBlob().delete(DeleteSnapshotsOption.NONE, getLeaseCondition(lease), - null, opContext); - } - - /** - * Return and access condition for this lease, or else null if - * there's no lease. - */ - private AccessCondition getLeaseCondition(SelfRenewingLease lease) { - AccessCondition leaseCondition = null; - if (lease != null) { - leaseCondition = AccessCondition.generateLeaseCondition(lease.getLeaseID()); - } - return leaseCondition; - } - - @Override - public boolean exists(OperationContext opContext) - throws StorageException { - return getBlob().exists(null, null, opContext); - } - - @Override - public void downloadAttributes( - OperationContext opContext) throws StorageException { - getBlob().downloadAttributes(null, null, opContext); - } - - @Override - public BlobProperties getProperties() { - return getBlob().getProperties(); - } - - @Override - public void setMetadata(HashMap metadata) { - getBlob().setMetadata(metadata); - } - - @Override - public InputStream openInputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return getBlob().openInputStream(null, options, opContext); - } - - public OutputStream openOutputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return ((CloudBlockBlob) getBlob()).openOutputStream(null, options, opContext); - } - - public void upload(InputStream sourceStream, OperationContext opContext) - throws StorageException, IOException { - getBlob().upload(sourceStream, 0, null, null, opContext); - } - - @Override - public CloudBlobContainer getContainer() throws URISyntaxException, - StorageException { - return getBlob().getContainer(); - } - - @Override - public CloudBlobDirectory getParent() throws URISyntaxException, - StorageException { - return getBlob().getParent(); - } - - @Override - public void uploadMetadata(OperationContext opContext) - throws StorageException { - uploadMetadata(null, null, opContext); - } - - @Override - public void uploadMetadata(AccessCondition accessConditions, BlobRequestOptions options, - OperationContext opContext) throws StorageException{ - getBlob().uploadMetadata(accessConditions, options, opContext); - } - - public void uploadProperties(OperationContext opContext, SelfRenewingLease lease) - throws StorageException { - - // Include lease in request if lease not null. - getBlob().uploadProperties(getLeaseCondition(lease), null, opContext); - } - - @Override - public int getStreamMinimumReadSizeInBytes() { - return getBlob().getStreamMinimumReadSizeInBytes(); - } - - @Override - public void setStreamMinimumReadSizeInBytes(int minimumReadSizeBytes) { - getBlob().setStreamMinimumReadSizeInBytes(minimumReadSizeBytes); - } - - @Override - public void setWriteBlockSizeInBytes(int writeBlockSizeBytes) { - getBlob().setStreamWriteSizeInBytes(writeBlockSizeBytes); - } - - @Override - public StorageUri getStorageUri() { - return getBlob().getStorageUri(); - } - - @Override - public CopyState getCopyState() { - return getBlob().getCopyState(); - } - - @Override - public void startCopyFromBlob(CloudBlobWrapper sourceBlob, BlobRequestOptions options, - OperationContext opContext, boolean overwriteDestination) - throws StorageException, URISyntaxException { - AccessCondition dstAccessCondition = - overwriteDestination - ? null - : AccessCondition.generateIfNotExistsCondition(); - getBlob().startCopy(sourceBlob.getBlob().getQualifiedUri(), - null, dstAccessCondition, options, opContext); - } - - @Override - public void downloadRange(long offset, long length, OutputStream outStream, - BlobRequestOptions options, OperationContext opContext) - throws StorageException, IOException { - - getBlob().downloadRange(offset, length, outStream, null, options, opContext); - } - - @Override - public SelfRenewingLease acquireLease() throws StorageException { - return new SelfRenewingLease(this, false); - } - } - - - // - // CloudBlockBlobWrapperImpl - // - - static class CloudBlockBlobWrapperImpl extends CloudBlobWrapperImpl implements CloudBlockBlobWrapper { - public CloudBlockBlobWrapperImpl(CloudBlockBlob blob) { - super(blob); - } - - public OutputStream openOutputStream( - BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return ((CloudBlockBlob) getBlob()).openOutputStream(null, options, opContext); - } - - public void upload(InputStream sourceStream, OperationContext opContext) - throws StorageException, IOException { - getBlob().upload(sourceStream, 0, null, null, opContext); - } - - public void uploadProperties(OperationContext opContext) - throws StorageException { - getBlob().uploadProperties(null, null, opContext); - } - - @Override - public List downloadBlockList(BlockListingFilter filter, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - return ((CloudBlockBlob) getBlob()).downloadBlockList(filter, null, options, opContext); - - } - - @Override - public void uploadBlock(String blockId, AccessCondition accessCondition, InputStream sourceStream, - long length, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - ((CloudBlockBlob) getBlob()).uploadBlock(blockId, sourceStream, length, accessCondition, options, opContext); - } - - @Override - public void commitBlockList(List blockList, AccessCondition accessCondition, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - ((CloudBlockBlob) getBlob()).commitBlockList(blockList, accessCondition, options, opContext); - } - } - - static class CloudPageBlobWrapperImpl extends CloudBlobWrapperImpl implements CloudPageBlobWrapper { - public CloudPageBlobWrapperImpl(CloudPageBlob blob) { - super(blob); - } - - public void create(final long length, BlobRequestOptions options, - OperationContext opContext) throws StorageException { - ((CloudPageBlob) getBlob()).create(length, null, options, opContext); - } - - public void uploadPages(final InputStream sourceStream, final long offset, - final long length, BlobRequestOptions options, OperationContext opContext) - throws StorageException, IOException { - ((CloudPageBlob) getBlob()).uploadPages(sourceStream, offset, length, null, - options, opContext); - } - - public ArrayList downloadPageRanges(BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return ((CloudPageBlob) getBlob()).downloadPageRanges( - null, options, opContext); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java deleted file mode 100644 index f8aed2612a857..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/SyncableDataOutputStream.java +++ /dev/null @@ -1,106 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.DataOutputStream; -import java.io.IOException; -import java.io.OutputStream; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.StreamCapabilities; -import org.apache.hadoop.fs.Syncable; -import org.apache.hadoop.fs.impl.StoreImplementationUtils; - -/** - * Support the Syncable interface on top of a DataOutputStream. - * This allows passing the sync/hflush/hsync calls through to the - * wrapped stream passed in to the constructor. This is required - * for HBase when wrapping a PageBlobOutputStream used as a write-ahead log. - */ -public class SyncableDataOutputStream extends DataOutputStream - implements Syncable, StreamCapabilities { - - private static final Logger LOG = LoggerFactory.getLogger(SyncableDataOutputStream.class); - - public SyncableDataOutputStream(OutputStream out) { - super(out); - } - - /** - * Get a reference to the wrapped output stream. - * - * @return the underlying output stream - */ - @InterfaceAudience.LimitedPrivate({"HDFS"}) - public OutputStream getOutStream() { - return out; - } - - @Override - public boolean hasCapability(String capability) { - return StoreImplementationUtils.hasCapability(out, capability); - } - - @Override - public void hflush() throws IOException { - if (out instanceof Syncable) { - ((Syncable) out).hflush(); - } - } - - @Override - public void hsync() throws IOException { - if (out instanceof Syncable) { - ((Syncable) out).hsync(); - } - } - - @Override - public void close() throws IOException { - IOException ioeFromFlush = null; - try { - flush(); - } catch (IOException e) { - ioeFromFlush = e; - throw e; - } finally { - try { - this.out.close(); - } catch (IOException e) { - // If there was an Exception during flush(), the Azure SDK will throw back the - // same when we call close on the same stream. When try and finally both throw - // Exception, Java will use Throwable#addSuppressed for one of the Exception so - // that the caller will get one exception back. When within this, if both - // Exceptions are equal, it will throw back IllegalStateException. This makes us - // to throw back a non IOE. The below special handling is to avoid this. - if (ioeFromFlush == e) { - // Do nothing.. - // The close() call gave back the same IOE which flush() gave. Just swallow it - LOG.debug("flush() and close() throwing back same Exception. Just swallowing the latter", e); - } else { - // Let Java handle 2 different Exceptions been thrown from try and finally. - throw e; - } - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/Wasb.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/Wasb.java deleted file mode 100644 index dd354d78e77f3..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/Wasb.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.DelegateToFileSystem; - -/** - * WASB implementation of AbstractFileSystem. - * This impl delegates to the old FileSystem - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public class Wasb extends DelegateToFileSystem { - - Wasb(final URI theUri, final Configuration conf) throws IOException, - URISyntaxException { - super(theUri, new NativeAzureFileSystem(), conf, "wasb", false); - } - - @Override - public int getUriDefaultPort() { - return -1; - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizationException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizationException.java deleted file mode 100644 index eff9248dffe42..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizationException.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * Exception that gets thrown during the authorization failures - * in WASB. - */ -public class WasbAuthorizationException extends AzureException { - - private static final long serialVersionUID = 1L; - - public WasbAuthorizationException(String message) { - super(message); - } - - public WasbAuthorizationException(String message, Throwable cause) { - super(message, cause); - } - - public WasbAuthorizationException(Throwable t) { - super(t); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizationOperations.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizationOperations.java deleted file mode 100644 index 7c63d4b8e3c39..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizationOperations.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * Different authorization operations supported - * in WASB. - */ - -public enum WasbAuthorizationOperations { - - READ, WRITE, EXECUTE; - - @Override - public String toString() { - switch(this) { - case READ: - return "read"; - case WRITE: - return "write"; - default: - throw new IllegalArgumentException( - "Invalid Authorization Operation"); - } - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizerInterface.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizerInterface.java deleted file mode 100644 index af0e95483302d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbAuthorizerInterface.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; - -/** - * Interface to implement authorization support in WASB. - * API's of this interface will be implemented in the - * StorageInterface Layer before making calls to Azure - * Storage. - */ -public interface WasbAuthorizerInterface { - /** - * Initializer method - * @param conf - Configuration object - * @throws WasbAuthorizationException - On authorization exceptions - * @throws IOException - When not able to reach the authorizer - */ - public void init(Configuration conf) - throws WasbAuthorizationException, IOException; - - /** - * Authorizer API to authorize access in WASB. - - * @param wasbAbolutePath : Absolute WASB Path used for access. - * @param accessType : Type of access - * @param owner : owner of the file/folder specified in the wasb path - * @return : true - If access allowed false - If access is not allowed. - * @throws WasbAuthorizationException - On authorization exceptions - * @throws IOException - When not able to reach the authorizer - */ - boolean authorize(String wasbAbolutePath, String accessType, String owner) - throws WasbAuthorizationException, IOException; -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java deleted file mode 100644 index 578586166f40d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbFsck.java +++ /dev/null @@ -1,200 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.Arrays; -import java.util.List; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.conf.Configured; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.Tool; -import org.apache.hadoop.util.ToolRunner; - -import org.apache.hadoop.classification.VisibleForTesting; - -/** - * An fsck tool implementation for WASB that does various admin/cleanup/recovery - * tasks on the WASB file system. - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public class WasbFsck extends Configured implements Tool { - private FileSystem mockFileSystemForTesting = null; - private static final String LOST_AND_FOUND_PATH = "/lost+found"; - private boolean pathNameWarning = false; - - public WasbFsck(Configuration conf) { - super(conf); - } - - /** - * For testing purposes, set the file system to use here instead of relying on - * getting it from the FileSystem class based on the URI. - * - * @param fileSystem - * The file system to use. - */ - @VisibleForTesting - public void setMockFileSystemForTesting(FileSystem fileSystem) { - this.mockFileSystemForTesting = fileSystem; - } - - @Override - public int run(String[] args) throws Exception { - if (doPrintUsage(Arrays.asList(args))) { - printUsage(); - return -1; - } - Path pathToCheck = null; - boolean doRecover = false; - boolean doDelete = false; - for (String arg : args) { - if (!arg.startsWith("-")) { - if (pathToCheck != null) { - System.err - .println("Can't specify multiple paths to check on the command-line"); - return 1; - } - pathToCheck = new Path(arg); - } else if (arg.equals("-move")) { - doRecover = true; - } else if (arg.equals("-delete")) { - doDelete = true; - } - } - if (doRecover && doDelete) { - System.err - .println("Conflicting options: can't specify both -move and -delete."); - return 1; - } - if (pathToCheck == null) { - pathToCheck = new Path("/"); // Check everything. - } - FileSystem fs; - if (mockFileSystemForTesting == null) { - fs = FileSystem.get(pathToCheck.toUri(), getConf()); - } else { - fs = mockFileSystemForTesting; - } - - if (!recursiveCheckChildPathName(fs, fs.makeQualified(pathToCheck))) { - pathNameWarning = true; - } - - if (!(fs instanceof NativeAzureFileSystem)) { - System.err - .println("Can only check WASB file system. Instead I'm asked to" - + " check: " + fs.getUri()); - return 2; - } - NativeAzureFileSystem wasbFs = (NativeAzureFileSystem) fs; - if (doRecover) { - System.out.println("Recovering files with dangling data under: " - + pathToCheck); - wasbFs.recoverFilesWithDanglingTempData(pathToCheck, new Path( - LOST_AND_FOUND_PATH)); - } else if (doDelete) { - System.out.println("Deleting temp files with dangling data under: " - + pathToCheck); - wasbFs.deleteFilesWithDanglingTempData(pathToCheck); - } else { - System.out.println("Please specify -move or -delete"); - } - return 0; - } - - public boolean getPathNameWarning() { - return pathNameWarning; - } - - /** - * Recursively check if a given path and its child paths have colons in their - * names. It returns true if none of them has a colon or this path does not - * exist, and false otherwise. - */ - private boolean recursiveCheckChildPathName(FileSystem fs, Path p) - throws IOException { - if (p == null) { - return true; - } - FileStatus status; - try { - status = fs.getFileStatus(p); - } catch (FileNotFoundException e) { - System.out.println("Path " + p + " does not exist!"); - return true; - } - - if (status.isFile()) { - if (containsColon(p)) { - System.out.println("Warning: file " + p + " has a colon in its name."); - return false; - } else { - return true; - } - } else { - boolean flag; - if (containsColon(p)) { - System.out.println("Warning: directory " + p - + " has a colon in its name."); - flag = false; - } else { - flag = true; - } - FileStatus[] listed = fs.listStatus(p); - for (FileStatus l : listed) { - if (!recursiveCheckChildPathName(fs, l.getPath())) { - flag = false; - } - } - return flag; - } - } - - private boolean containsColon(Path p) { - return p.toUri().getPath().toString().contains(":"); - } - - private static void printUsage() { - System.out.println("Usage: WasbFSck [] [-move | -delete]"); - System.out.println("\t\tstart checking from this path"); - System.out.println("\t-move\tmove any files whose upload was interrupted" - + " mid-stream to " + LOST_AND_FOUND_PATH); - System.out - .println("\t-delete\tdelete any files whose upload was interrupted" - + " mid-stream"); - ToolRunner.printGenericCommandUsage(System.out); - } - - private boolean doPrintUsage(List args) { - return args.contains("-H"); - } - - public static void main(String[] args) throws Exception { - int res = ToolRunner.run(new WasbFsck(new Configuration()), args); - System.exit(res); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallException.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallException.java deleted file mode 100644 index 43c1b618362b0..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallException.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * Exception that gets thrown when a remote call - * made from WASB to external cred service fails. - */ - -public class WasbRemoteCallException extends AzureException { - - private static final long serialVersionUID = 1L; - - public WasbRemoteCallException(String message) { - super(message); - } - - public WasbRemoteCallException(String message, Throwable cause) { - super(message, cause); - } - - public WasbRemoteCallException(Throwable t) { - super(t); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java deleted file mode 100644 index e6e0c9379ef37..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/WasbRemoteCallHelper.java +++ /dev/null @@ -1,297 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.azure.security.Constants; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.http.Header; -import org.apache.http.HttpResponse; -import org.apache.http.HttpStatus; -import org.apache.http.NameValuePair; -import org.apache.http.StatusLine; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPost; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.client.methods.HttpUriRequest; -import org.apache.http.client.utils.URIBuilder; -import org.apache.http.impl.client.HttpClientBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.InterruptedIOException; -import java.net.InetAddress; -import java.net.URISyntaxException; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Random; - -/** - * Helper class the has constants and helper methods - * used in WASB when integrating with a remote http cred - * service. Currently, remote service will be used to generate - * SAS keys. - */ -public class WasbRemoteCallHelper { - - public static final Logger LOG = - LoggerFactory.getLogger(WasbRemoteCallHelper.class); - /** - * Return code when the remote call is successful. {@value} - */ - public static final int REMOTE_CALL_SUCCESS_CODE = 0; - - /** - * Application Json content type. - */ - private static final String APPLICATION_JSON = "application/json"; - - /** - * Max content length of the response. - */ - private static final int MAX_CONTENT_LENGTH = 1024; - - /** - * Client instance to be used for making the remote call. - */ - private HttpClient client = null; - - private Random random = new Random(); - - private RetryPolicy retryPolicy = null; - - public WasbRemoteCallHelper(RetryPolicy retryPolicy) { - this.client = HttpClientBuilder.create().build(); - this.retryPolicy = retryPolicy; - } - - @VisibleForTesting public void updateHttpClient(HttpClient client) { - this.client = client; - } - - /** - * Helper method to make remote HTTP Get request. - * - * @param urls - Service urls to be used, if one fails try another. - * @param path - URL endpoint for the resource. - * @param queryParams - list of query parameters - * @param httpMethod - http Method to be used. - * @return Http Response body returned as a string. The caller - * is expected to semantically understand the response. - * @throws IOException when there an error in executing the remote http request. - */ - public String makeRemoteRequest(String[] urls, String path, - List queryParams, String httpMethod) throws IOException { - - return retryableRequest(urls, path, queryParams, httpMethod); - } - - protected String retryableRequest(String[] urls, String path, - List queryParams, String httpMethod) throws IOException { - HttpResponse response = null; - HttpUriRequest httpRequest = null; - - /** - * Get the index of local url if any. If list of urls contains strings like - * "https://localhost:" or "http://localhost", consider it as local url and - * give it affinity more than other urls in the list. - */ - - int indexOfLocalUrl = -1; - for (int i = 0; i < urls.length; i++) { - if (urls[i].toLowerCase().startsWith("https://localhost:") || urls[i] - .toLowerCase().startsWith("http://localhost:")) { - indexOfLocalUrl = i; - } - } - - boolean requiresNewAuth = false; - for (int retry = 0, index = (indexOfLocalUrl != -1) - ? indexOfLocalUrl - : random - .nextInt(urls.length);; retry++, index++) { - if (index >= urls.length) { - index = index % urls.length; - } - /** - * If the first request fails to localhost, then randomly pick the next url - * from the remaining urls in the list, so that load can be balanced. - */ - if (indexOfLocalUrl != -1 && retry == 1) { - index = (index + random.nextInt(urls.length)) % urls.length; - if (index == indexOfLocalUrl) { - index = (index + 1) % urls.length; - } - } - try { - httpRequest = - getHttpRequest(urls, path, queryParams, index, httpMethod, - requiresNewAuth); - httpRequest.setHeader("Accept", APPLICATION_JSON); - response = client.execute(httpRequest); - StatusLine statusLine = response.getStatusLine(); - if (statusLine == null - || statusLine.getStatusCode() != HttpStatus.SC_OK) { - requiresNewAuth = - (statusLine == null) - || (statusLine.getStatusCode() == HttpStatus.SC_UNAUTHORIZED); - - throw new WasbRemoteCallException( - httpRequest.getURI().toString() + ":" + ((statusLine != null) - ? statusLine.toString() - : "NULL")); - } else { - requiresNewAuth = false; - } - - Header contentTypeHeader = response.getFirstHeader("Content-Type"); - if (contentTypeHeader == null || !APPLICATION_JSON - .equals(contentTypeHeader.getValue())) { - throw new WasbRemoteCallException( - httpRequest.getURI().toString() + ":" - + "Content-Type mismatch: expected: " + APPLICATION_JSON - + ", got " + ((contentTypeHeader != null) ? contentTypeHeader - .getValue() : "NULL")); - } - - Header contentLengthHeader = response.getFirstHeader("Content-Length"); - if (contentLengthHeader == null) { - throw new WasbRemoteCallException( - httpRequest.getURI().toString() + ":" - + "Content-Length header missing"); - } - - try { - if (Integer.parseInt(contentLengthHeader.getValue()) - > MAX_CONTENT_LENGTH) { - throw new WasbRemoteCallException( - httpRequest.getURI().toString() + ":" + "Content-Length:" - + contentLengthHeader.getValue() + "exceeded max:" - + MAX_CONTENT_LENGTH); - } - } catch (NumberFormatException nfe) { - throw new WasbRemoteCallException( - httpRequest.getURI().toString() + ":" - + "Invalid Content-Length value :" + contentLengthHeader - .getValue()); - } - - BufferedReader rd = null; - StringBuilder responseBody = new StringBuilder(); - try { - rd = new BufferedReader( - new InputStreamReader(response.getEntity().getContent(), - StandardCharsets.UTF_8)); - String responseLine = ""; - while ((responseLine = rd.readLine()) != null) { - responseBody.append(responseLine); - } - } finally { - rd.close(); - } - return responseBody.toString(); - } catch (URISyntaxException uriSyntaxEx) { - throw new WasbRemoteCallException("Encountered URISyntaxException " - + "while building the HttpGetRequest to remote service", - uriSyntaxEx); - } catch (IOException e) { - LOG.debug(e.getMessage(), e); - try { - shouldRetry(e, retry, (httpRequest != null) - ? httpRequest.getURI().toString() - : urls[index]); - } catch (IOException ioex) { - String message = - "Encountered error while making remote call to " + String - .join(",", urls) + " retried " + retry + " time(s)."; - LOG.error(message, ioex); - throw new WasbRemoteCallException(message, ioex); - } - } - } - } - - protected HttpUriRequest getHttpRequest(String[] urls, String path, - List queryParams, int urlIndex, String httpMethod, - boolean requiresNewAuth) throws URISyntaxException, IOException { - URIBuilder uriBuilder = null; - uriBuilder = - new URIBuilder(urls[urlIndex]).setPath(path).setParameters(queryParams); - if (uriBuilder.getHost().equals("localhost")) { - uriBuilder.setHost(InetAddress.getLocalHost().getCanonicalHostName()); - } - HttpUriRequest httpUriRequest = null; - switch (httpMethod) { - case HttpPut.METHOD_NAME: - httpUriRequest = new HttpPut(uriBuilder.build()); - break; - case HttpPost.METHOD_NAME: - httpUriRequest = new HttpPost(uriBuilder.build()); - break; - default: - httpUriRequest = new HttpGet(uriBuilder.build()); - break; - } - return httpUriRequest; - } - - private void shouldRetry(final IOException ioe, final int retry, - final String url) throws IOException { - CharSequence authenticationExceptionMessage = - Constants.AUTHENTICATION_FAILED_ERROR_MESSAGE; - if (ioe instanceof WasbRemoteCallException && ioe.getMessage() - .equals(authenticationExceptionMessage)) { - throw ioe; - } - try { - final RetryPolicy.RetryAction a = (retryPolicy != null) - ? retryPolicy - .shouldRetry(ioe, retry, 0, true) - : RetryPolicy.RetryAction.FAIL; - - boolean isRetry = a.action == RetryPolicy.RetryAction.RetryDecision.RETRY; - boolean isFailoverAndRetry = - a.action == RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY; - - if (isRetry || isFailoverAndRetry) { - LOG.debug("Retrying connect to Remote service:{}. Already tried {}" - + " time(s); retry policy is {}, " + "delay {}ms.", url, retry, - retryPolicy, a.delayMillis); - - Thread.sleep(a.delayMillis); - return; - } - } catch (InterruptedIOException e) { - LOG.warn(e.getMessage(), e); - Thread.currentThread().interrupt(); - return; - } catch (Exception e) { - LOG.warn("Original exception is ", ioe); - throw new WasbRemoteCallException(e.getMessage(), e); - } - LOG.debug("Not retrying anymore, already retried the urls {} time(s)", - retry); - throw new WasbRemoteCallException( - url + ":" + "Encountered IOException while making remote call", ioe); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/Wasbs.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/Wasbs.java deleted file mode 100644 index 0b4a7824b58c9..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/Wasbs.java +++ /dev/null @@ -1,47 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.DelegateToFileSystem; - -/** - * WASB implementation of AbstractFileSystem for wasbs scheme. - * This impl delegates to the old FileSystem - */ -@InterfaceAudience.Public -@InterfaceStability.Evolving -public class Wasbs extends DelegateToFileSystem { - - Wasbs(final URI theUri, final Configuration conf) throws IOException, - URISyntaxException { - super(theUri, new NativeAzureFileSystem(), conf, "wasbs", false); - } - - @Override - public int getUriDefaultPort() { - return -1; - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemInstrumentation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemInstrumentation.java deleted file mode 100644 index 6cce271b227d0..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemInstrumentation.java +++ /dev/null @@ -1,396 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import java.util.UUID; -import java.util.concurrent.atomic.AtomicLong; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.metrics2.MetricsCollector; -import org.apache.hadoop.metrics2.MetricsInfo; -import org.apache.hadoop.metrics2.MetricsSource; -import org.apache.hadoop.metrics2.annotation.Metrics; -import org.apache.hadoop.metrics2.lib.MetricsRegistry; -import org.apache.hadoop.metrics2.lib.MutableCounterLong; -import org.apache.hadoop.metrics2.lib.MutableGaugeLong; - -/** - * A metrics source for the WASB file system to track all the metrics we care - * about for getting a clear picture of the performance/reliability/interaction - * of the Hadoop cluster with Azure Storage. - */ -@Metrics(about="Metrics for WASB", context="azureFileSystem") -@InterfaceAudience.Public -@InterfaceStability.Evolving -public final class AzureFileSystemInstrumentation implements MetricsSource { - - public static final String METRIC_TAG_FILESYSTEM_ID = "wasbFileSystemId"; - public static final String METRIC_TAG_ACCOUNT_NAME = "accountName"; - public static final String METRIC_TAG_CONTAINTER_NAME = "containerName"; - - public static final String WASB_WEB_RESPONSES = "wasb_web_responses"; - public static final String WASB_BYTES_WRITTEN = - "wasb_bytes_written_last_second"; - public static final String WASB_BYTES_READ = - "wasb_bytes_read_last_second"; - public static final String WASB_RAW_BYTES_UPLOADED = - "wasb_raw_bytes_uploaded"; - public static final String WASB_RAW_BYTES_DOWNLOADED = - "wasb_raw_bytes_downloaded"; - public static final String WASB_FILES_CREATED = "wasb_files_created"; - public static final String WASB_FILES_DELETED = "wasb_files_deleted"; - public static final String WASB_DIRECTORIES_CREATED = "wasb_directories_created"; - public static final String WASB_DIRECTORIES_DELETED = "wasb_directories_deleted"; - public static final String WASB_UPLOAD_RATE = - "wasb_maximum_upload_bytes_per_second"; - public static final String WASB_DOWNLOAD_RATE = - "wasb_maximum_download_bytes_per_second"; - public static final String WASB_UPLOAD_LATENCY = - "wasb_average_block_upload_latency_ms"; - public static final String WASB_DOWNLOAD_LATENCY = - "wasb_average_block_download_latency_ms"; - public static final String WASB_CLIENT_ERRORS = "wasb_client_errors"; - public static final String WASB_SERVER_ERRORS = "wasb_server_errors"; - - /** - * Config key for how big the rolling window size for latency metrics should - * be (in seconds). - */ - private static final String KEY_ROLLING_WINDOW_SIZE = "fs.azure.metrics.rolling.window.size"; - - private final MetricsRegistry registry = - new MetricsRegistry("azureFileSystem") - .setContext("azureFileSystem"); - private final MutableCounterLong numberOfWebResponses = - registry.newCounter( - WASB_WEB_RESPONSES, - "Total number of web responses obtained from Azure Storage", - 0L); - private AtomicLong inMemoryNumberOfWebResponses = new AtomicLong(0); - private final MutableCounterLong numberOfFilesCreated = - registry.newCounter( - WASB_FILES_CREATED, - "Total number of files created through the WASB file system.", - 0L); - private final MutableCounterLong numberOfFilesDeleted = - registry.newCounter( - WASB_FILES_DELETED, - "Total number of files deleted through the WASB file system.", - 0L); - private final MutableCounterLong numberOfDirectoriesCreated = - registry.newCounter( - WASB_DIRECTORIES_CREATED, - "Total number of directories created through the WASB file system.", - 0L); - private final MutableCounterLong numberOfDirectoriesDeleted = - registry.newCounter( - WASB_DIRECTORIES_DELETED, - "Total number of directories deleted through the WASB file system.", - 0L); - private final MutableGaugeLong bytesWrittenInLastSecond = - registry.newGauge( - WASB_BYTES_WRITTEN, - "Total number of bytes written to Azure Storage during the last second.", - 0L); - private final MutableGaugeLong bytesReadInLastSecond = - registry.newGauge( - WASB_BYTES_READ, - "Total number of bytes read from Azure Storage during the last second.", - 0L); - private final MutableGaugeLong maximumUploadBytesPerSecond = - registry.newGauge( - WASB_UPLOAD_RATE, - "The maximum upload rate encountered to Azure Storage in bytes/second.", - 0L); - private final MutableGaugeLong maximumDownloadBytesPerSecond = - registry.newGauge( - WASB_DOWNLOAD_RATE, - "The maximum download rate encountered to Azure Storage in bytes/second.", - 0L); - private final MutableCounterLong rawBytesUploaded = - registry.newCounter( - WASB_RAW_BYTES_UPLOADED, - "Total number of raw bytes (including overhead) uploaded to Azure" - + " Storage.", - 0L); - private final MutableCounterLong rawBytesDownloaded = - registry.newCounter( - WASB_RAW_BYTES_DOWNLOADED, - "Total number of raw bytes (including overhead) downloaded from Azure" - + " Storage.", - 0L); - private final MutableCounterLong clientErrors = - registry.newCounter( - WASB_CLIENT_ERRORS, - "Total number of client-side errors by WASB (excluding 404).", - 0L); - private final MutableCounterLong serverErrors = - registry.newCounter( - WASB_SERVER_ERRORS, - "Total number of server-caused errors by WASB.", - 0L); - private final MutableGaugeLong averageBlockUploadLatencyMs; - private final MutableGaugeLong averageBlockDownloadLatencyMs; - private long currentMaximumUploadBytesPerSecond; - private long currentMaximumDownloadBytesPerSecond; - private static final int DEFAULT_LATENCY_ROLLING_AVERAGE_WINDOW = - 5; // seconds - private final RollingWindowAverage currentBlockUploadLatency; - private final RollingWindowAverage currentBlockDownloadLatency; - private UUID fileSystemInstanceId; - - public AzureFileSystemInstrumentation(Configuration conf) { - fileSystemInstanceId = UUID.randomUUID(); - registry.tag("wasbFileSystemId", - "A unique identifier for the file ", - fileSystemInstanceId.toString()); - final int rollingWindowSizeInSeconds = - conf.getInt(KEY_ROLLING_WINDOW_SIZE, - DEFAULT_LATENCY_ROLLING_AVERAGE_WINDOW); - averageBlockUploadLatencyMs = - registry.newGauge( - WASB_UPLOAD_LATENCY, - String.format("The average latency in milliseconds of uploading a single block" - + ". The average latency is calculated over a %d-second rolling" - + " window.", rollingWindowSizeInSeconds), - 0L); - averageBlockDownloadLatencyMs = - registry.newGauge( - WASB_DOWNLOAD_LATENCY, - String.format("The average latency in milliseconds of downloading a single block" - + ". The average latency is calculated over a %d-second rolling" - + " window.", rollingWindowSizeInSeconds), - 0L); - currentBlockUploadLatency = - new RollingWindowAverage(rollingWindowSizeInSeconds * 1000); - currentBlockDownloadLatency = - new RollingWindowAverage(rollingWindowSizeInSeconds * 1000); - } - - /** - * The unique identifier for this file system in the metrics. - * @return The unique identifier. - */ - public UUID getFileSystemInstanceId() { - return fileSystemInstanceId; - } - - /** - * Get the metrics registry information. - * @return The metrics registry information. - */ - public MetricsInfo getMetricsRegistryInfo() { - return registry.info(); - } - - /** - * Sets the account name to tag all the metrics with. - * @param accountName The account name. - */ - public void setAccountName(String accountName) { - registry.tag("accountName", - "Name of the Azure Storage account that these metrics are going against", - accountName); - } - - /** - * Sets the container name to tag all the metrics with. - * @param containerName The container name. - */ - public void setContainerName(String containerName) { - registry.tag("containerName", - "Name of the Azure Storage container that these metrics are going against", - containerName); - } - - /** - * Indicate that we just got a web response from Azure Storage. This should - * be called for every web request/response we do (to get accurate metrics - * of how we're hitting the storage service). - */ - public void webResponse() { - numberOfWebResponses.incr(); - inMemoryNumberOfWebResponses.incrementAndGet(); - } - - /** - * Gets the current number of web responses obtained from Azure Storage. - * @return The number of web responses. - */ - public long getCurrentWebResponses() { - return inMemoryNumberOfWebResponses.get(); - } - - /** - * Indicate that we just created a file through WASB. - */ - public void fileCreated() { - numberOfFilesCreated.incr(); - } - - /** - * Indicate that we just deleted a file through WASB. - */ - public void fileDeleted() { - numberOfFilesDeleted.incr(); - } - - /** - * Indicate that we just created a directory through WASB. - */ - public void directoryCreated() { - numberOfDirectoriesCreated.incr(); - } - - /** - * Indicate that we just deleted a directory through WASB. - */ - public void directoryDeleted() { - numberOfDirectoriesDeleted.incr(); - } - - /** - * Sets the current gauge value for how many bytes were written in the last - * second. - * @param currentBytesWritten The number of bytes. - */ - public void updateBytesWrittenInLastSecond(long currentBytesWritten) { - bytesWrittenInLastSecond.set(currentBytesWritten); - } - - /** - * Sets the current gauge value for how many bytes were read in the last - * second. - * @param currentBytesRead The number of bytes. - */ - public void updateBytesReadInLastSecond(long currentBytesRead) { - bytesReadInLastSecond.set(currentBytesRead); - } - - /** - * Record the current bytes-per-second upload rate seen. - * @param bytesPerSecond The bytes per second. - */ - public synchronized void currentUploadBytesPerSecond(long bytesPerSecond) { - if (bytesPerSecond > currentMaximumUploadBytesPerSecond) { - currentMaximumUploadBytesPerSecond = bytesPerSecond; - maximumUploadBytesPerSecond.set(bytesPerSecond); - } - } - - /** - * Record the current bytes-per-second download rate seen. - * @param bytesPerSecond The bytes per second. - */ - public synchronized void currentDownloadBytesPerSecond(long bytesPerSecond) { - if (bytesPerSecond > currentMaximumDownloadBytesPerSecond) { - currentMaximumDownloadBytesPerSecond = bytesPerSecond; - maximumDownloadBytesPerSecond.set(bytesPerSecond); - } - } - - /** - * Indicate that we just uploaded some data to Azure storage. - * @param numberOfBytes The raw number of bytes uploaded (including overhead). - */ - public void rawBytesUploaded(long numberOfBytes) { - rawBytesUploaded.incr(numberOfBytes); - } - - /** - * Indicate that we just downloaded some data to Azure storage. - * @param numberOfBytes The raw number of bytes downloaded (including overhead). - */ - public void rawBytesDownloaded(long numberOfBytes) { - rawBytesDownloaded.incr(numberOfBytes); - } - - /** - * Indicate that we just uploaded a block and record its latency. - * @param latency The latency in milliseconds. - */ - public void blockUploaded(long latency) { - currentBlockUploadLatency.addPoint(latency); - } - - /** - * Indicate that we just downloaded a block and record its latency. - * @param latency The latency in milliseconds. - */ - public void blockDownloaded(long latency) { - currentBlockDownloadLatency.addPoint(latency); - } - - /** - * Indicate that we just encountered a client-side error. - */ - public void clientErrorEncountered() { - clientErrors.incr(); - } - - /** - * Indicate that we just encountered a server-caused error. - */ - public void serverErrorEncountered() { - serverErrors.incr(); - } - - /** - * Get the current rolling average of the upload latency. - * @return rolling average of upload latency in milliseconds. - */ - public long getBlockUploadLatency() { - return currentBlockUploadLatency.getCurrentAverage(); - } - - /** - * Get the current rolling average of the download latency. - * @return rolling average of download latency in milliseconds. - */ - public long getBlockDownloadLatency() { - return currentBlockDownloadLatency.getCurrentAverage(); - } - - /** - * Get the current maximum upload bandwidth. - * @return maximum upload bandwidth in bytes per second. - */ - public long getCurrentMaximumUploadBandwidth() { - return currentMaximumUploadBytesPerSecond; - } - - /** - * Get the current maximum download bandwidth. - * @return maximum download bandwidth in bytes per second. - */ - public long getCurrentMaximumDownloadBandwidth() { - return currentMaximumDownloadBytesPerSecond; - } - - @Override - public void getMetrics(MetricsCollector builder, boolean all) { - averageBlockDownloadLatencyMs.set( - currentBlockDownloadLatency.getCurrentAverage()); - averageBlockUploadLatencyMs.set( - currentBlockUploadLatency.getCurrentAverage()); - registry.snapshot(builder.addRecord(registry.info().name()), true); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java deleted file mode 100644 index 322795ab82712..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/AzureFileSystemMetricsSystem.java +++ /dev/null @@ -1,69 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.metrics2.MetricsSource; -import org.apache.hadoop.metrics2.impl.MetricsSystemImpl; - -/** - * AzureFileSystemMetricsSystem - */ -@InterfaceAudience.Private -public final class AzureFileSystemMetricsSystem { - private static MetricsSystemImpl instance; - private static int numFileSystems; - - //private ctor - private AzureFileSystemMetricsSystem(){ - - } - - public static synchronized void fileSystemStarted() { - if (numFileSystems == 0) { - instance = new MetricsSystemImpl(); - instance.init("azure-file-system"); - } - numFileSystems++; - } - - public static synchronized void fileSystemClosed() { - if (numFileSystems == 1) { - instance.publishMetricsNow(); - instance.stop(); - instance.shutdown(); - instance = null; - } - numFileSystems--; - } - - public static void registerSource(String name, String desc, - MetricsSource source) { - //caller has to use unique name to register source - instance.register(name, desc, source); - } - - public static synchronized void unregisterSource(String name) { - if (instance != null) { - //publish metrics before unregister a metrics source - instance.publishMetricsNow(); - instance.unregisterSource(name); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/BandwidthGaugeUpdater.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/BandwidthGaugeUpdater.java deleted file mode 100644 index d0a1bd0e7fb63..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/BandwidthGaugeUpdater.java +++ /dev/null @@ -1,286 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import java.util.ArrayList; -import java.util.Date; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; - -/** - * Internal implementation class to help calculate the current bytes - * uploaded/downloaded and the maximum bandwidth gauges. - */ -@InterfaceAudience.Private -public final class BandwidthGaugeUpdater { - - public static final String THREAD_NAME = "AzureNativeFilesystemStore-UploadBandwidthUpdater"; - - private static final int DEFAULT_WINDOW_SIZE_MS = 1000; - private static final int PROCESS_QUEUE_INITIAL_CAPACITY = 1000; - private int windowSizeMs; - private ArrayList allBlocksWritten = - createNewToProcessQueue(); - private ArrayList allBlocksRead = - createNewToProcessQueue(); - private final Object blocksWrittenLock = new Object(); - private final Object blocksReadLock = new Object(); - private final AzureFileSystemInstrumentation instrumentation; - private Thread uploadBandwidthUpdater; - private volatile boolean suppressAutoUpdate = false; - - /** - * Create a new updater object with default values. - * @param instrumentation The metrics source to update. - */ - public BandwidthGaugeUpdater(AzureFileSystemInstrumentation instrumentation) { - this(instrumentation, DEFAULT_WINDOW_SIZE_MS, false); - } - - /** - * Create a new updater object with some overrides (used in unit tests). - * @param instrumentation The metrics source to update. - * @param windowSizeMs The window size to use for calculating bandwidth - * (in milliseconds). - * @param manualUpdateTrigger If true, then this object won't create the - * auto-update threads, and will wait for manual - * calls to triggerUpdate to occur. - */ - public BandwidthGaugeUpdater(AzureFileSystemInstrumentation instrumentation, - int windowSizeMs, boolean manualUpdateTrigger) { - this.windowSizeMs = windowSizeMs; - this.instrumentation = instrumentation; - if (!manualUpdateTrigger) { - uploadBandwidthUpdater = new SubjectInheritingThread(new UploadBandwidthUpdater(), THREAD_NAME); - uploadBandwidthUpdater.setDaemon(true); - uploadBandwidthUpdater.start(); - } - } - - /** - * Indicate that a block has been uploaded. - * @param startDate The exact time the upload started. - * @param endDate The exact time the upload ended. - * @param length The number of bytes uploaded in the block. - */ - public void blockUploaded(Date startDate, Date endDate, long length) { - synchronized (blocksWrittenLock) { - allBlocksWritten.add(new BlockTransferWindow(startDate, endDate, length)); - } - } - - /** - * Indicate that a block has been downloaded. - * @param startDate The exact time the download started. - * @param endDate The exact time the download ended. - * @param length The number of bytes downloaded in the block. - */ - public void blockDownloaded(Date startDate, Date endDate, long length) { - synchronized (blocksReadLock) { - allBlocksRead.add(new BlockTransferWindow(startDate, endDate, length)); - } - } - - /** - * Creates a new ArrayList to hold incoming block transfer notifications - * before they're processed. - * @return The newly created ArrayList. - */ - private static ArrayList createNewToProcessQueue() { - return new ArrayList(PROCESS_QUEUE_INITIAL_CAPACITY); - } - - /** - * Update the metrics source gauge for how many bytes were transferred - * during the last time window. - * @param updateWrite If true, update the write (upload) counter. - * Otherwise update the read (download) counter. - * @param bytes The number of bytes transferred. - */ - private void updateBytesTransferred(boolean updateWrite, long bytes) { - if (updateWrite) { - instrumentation.updateBytesWrittenInLastSecond(bytes); - } - else { - instrumentation.updateBytesReadInLastSecond(bytes); - } - } - - /** - * Update the metrics source gauge for what the current transfer rate - * is. - * @param updateWrite If true, update the write (upload) counter. - * Otherwise update the read (download) counter. - * @param bytesPerSecond The number of bytes per second we're seeing. - */ - private void updateBytesTransferRate(boolean updateWrite, long bytesPerSecond) { - if (updateWrite) { - instrumentation.currentUploadBytesPerSecond(bytesPerSecond); - } - else { - instrumentation.currentDownloadBytesPerSecond(bytesPerSecond); - } - } - - /** - * For unit test purposes, suppresses auto-update of the metrics - * from the dedicated thread. - */ - public void suppressAutoUpdate() { - suppressAutoUpdate = true; - } - - /** - * Resumes auto-update (undo suppressAutoUpdate). - */ - public void resumeAutoUpdate() { - suppressAutoUpdate = false; - } - - /** - * Triggers the update of the metrics gauge based on all the blocks - * uploaded/downloaded so far. This is typically done periodically in a - * dedicated update thread, but exposing as public for unit test purposes. - * - * @param updateWrite If true, we'll update the write (upload) metrics. - * Otherwise we'll update the read (download) ones. - */ - public void triggerUpdate(boolean updateWrite) { - ArrayList toProcess = null; - synchronized (updateWrite ? blocksWrittenLock : blocksReadLock) { - if (updateWrite && !allBlocksWritten.isEmpty()) { - toProcess = allBlocksWritten; - allBlocksWritten = createNewToProcessQueue(); - } else if (!updateWrite && !allBlocksRead.isEmpty()) { - toProcess = allBlocksRead; - allBlocksRead = createNewToProcessQueue(); - } - } - - // Check to see if we have any blocks to process. - if (toProcess == null) { - // Nothing to process, set the current bytes and rate to zero. - updateBytesTransferred(updateWrite, 0); - updateBytesTransferRate(updateWrite, 0); - return; - } - - // The cut-off time for when we want to calculate rates is one - // window size ago from now. - long cutoffTime = new Date().getTime() - windowSizeMs; - - // Go through all the blocks we're processing, and calculate the - // total number of bytes processed as well as the maximum transfer - // rate we experienced for any single block during our time window. - long maxSingleBlockTransferRate = 0; - long bytesInLastSecond = 0; - for (BlockTransferWindow currentWindow : toProcess) { - long windowDuration = currentWindow.getEndDate().getTime() - - currentWindow.getStartDate().getTime(); - if (windowDuration == 0) { - // Edge case, assume it took 1 ms but we were too fast - windowDuration = 1; - } - if (currentWindow.getStartDate().getTime() > cutoffTime) { - // This block was transferred fully within our time window, - // just add its bytes to the total. - bytesInLastSecond += currentWindow.bytesTransferred; - } else if (currentWindow.getEndDate().getTime() > cutoffTime) { - // This block started its transfer before our time window, - // interpolate to estimate how many bytes from that block - // were actually transferred during our time window. - long adjustedBytes = (currentWindow.getBytesTransferred() - * (currentWindow.getEndDate().getTime() - cutoffTime)) - / windowDuration; - bytesInLastSecond += adjustedBytes; - } - // Calculate the transfer rate for this block. - long currentBlockTransferRate = - (currentWindow.getBytesTransferred() * 1000) / windowDuration; - maxSingleBlockTransferRate = - Math.max(maxSingleBlockTransferRate, currentBlockTransferRate); - } - updateBytesTransferred(updateWrite, bytesInLastSecond); - // The transfer rate we saw in the last second is a tricky concept to - // define: If we saw two blocks, one 2 MB block transferred in 0.2 seconds, - // and one 4 MB block transferred in 0.2 seconds, then the maximum rate - // is 20 MB/s (the 4 MB block), the average of the two blocks is 15 MB/s, - // and the aggregate rate is 6 MB/s (total of 6 MB transferred in one - // second). As a first cut, I'm taking the definition to be the maximum - // of aggregate or of any single block's rate (so in the example case it's - // 6 MB/s). - long aggregateTransferRate = bytesInLastSecond; - long maxObservedTransferRate = - Math.max(aggregateTransferRate, maxSingleBlockTransferRate); - updateBytesTransferRate(updateWrite, maxObservedTransferRate); - } - - /** - * A single block transfer. - */ - private static final class BlockTransferWindow { - private final Date startDate; - private final Date endDate; - private final long bytesTransferred; - - public BlockTransferWindow(Date startDate, Date endDate, - long bytesTransferred) { - this.startDate = startDate; - this.endDate = endDate; - this.bytesTransferred = bytesTransferred; - } - - public Date getStartDate() { return startDate; } - public Date getEndDate() { return endDate; } - public long getBytesTransferred() { return bytesTransferred; } - } - - /** - * The auto-update thread. - */ - private final class UploadBandwidthUpdater implements Runnable { - @Override - public void run() { - try { - while (true) { - Thread.sleep(windowSizeMs); - if (!suppressAutoUpdate) { - triggerUpdate(true); - triggerUpdate(false); - } - } - } catch (InterruptedException e) { - } - } - } - - public void close() { - if (uploadBandwidthUpdater != null) { - // Interrupt and join the updater thread in death. - uploadBandwidthUpdater.interrupt(); - try { - uploadBandwidthUpdater.join(); - } catch (InterruptedException e) { - } - uploadBandwidthUpdater = null; - } - } - -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/ErrorMetricUpdater.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/ErrorMetricUpdater.java deleted file mode 100644 index dc23354e7b253..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/ErrorMetricUpdater.java +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import static java.net.HttpURLConnection.HTTP_NOT_FOUND; //404 -import static java.net.HttpURLConnection.HTTP_BAD_REQUEST; //400 -import static java.net.HttpURLConnection.HTTP_INTERNAL_ERROR; //500 - -import org.apache.hadoop.classification.InterfaceAudience; - -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RequestResult; -import com.microsoft.azure.storage.ResponseReceivedEvent; -import com.microsoft.azure.storage.StorageEvent; - - -/** - * An event listener to the ResponseReceived event from Azure Storage that will - * update error metrics appropriately when it gets that event. - */ -@InterfaceAudience.Private -public final class ErrorMetricUpdater extends StorageEvent { - private final AzureFileSystemInstrumentation instrumentation; - private final OperationContext operationContext; - - private ErrorMetricUpdater(OperationContext operationContext, - AzureFileSystemInstrumentation instrumentation) { - this.instrumentation = instrumentation; - this.operationContext = operationContext; - } - - /** - * Hooks a new listener to the given operationContext that will update the - * error metrics for the WASB file system appropriately in response to - * ResponseReceived events. - * - * @param operationContext The operationContext to hook. - * @param instrumentation The metrics source to update. - */ - public static void hook( - OperationContext operationContext, - AzureFileSystemInstrumentation instrumentation) { - ErrorMetricUpdater listener = - new ErrorMetricUpdater(operationContext, - instrumentation); - operationContext.getResponseReceivedEventHandler().addListener(listener); - } - - @Override - public void eventOccurred(ResponseReceivedEvent eventArg) { - RequestResult currentResult = operationContext.getLastResult(); - int statusCode = currentResult.getStatusCode(); - // Check if it's a client-side error: a 4xx status - // We exclude 404 because it happens frequently during the normal - // course of operation (each call to exists() would generate that - // if it's not found). - if (statusCode >= HTTP_BAD_REQUEST && statusCode < HTTP_INTERNAL_ERROR - && statusCode != HTTP_NOT_FOUND) { - instrumentation.clientErrorEncountered(); - } else if (statusCode >= HTTP_INTERNAL_ERROR) { - // It's a server error: a 5xx status. Could be an Azure Storage - // bug or (more likely) throttling. - instrumentation.serverErrorEncountered(); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/ResponseReceivedMetricUpdater.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/ResponseReceivedMetricUpdater.java deleted file mode 100644 index 4c61f6817cf75..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/ResponseReceivedMetricUpdater.java +++ /dev/null @@ -1,142 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import java.net.HttpURLConnection; - -import org.apache.hadoop.classification.InterfaceAudience; - -import com.microsoft.azure.storage.Constants.HeaderConstants; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RequestResult; -import com.microsoft.azure.storage.ResponseReceivedEvent; -import com.microsoft.azure.storage.StorageEvent; - - -/** - * An event listener to the ResponseReceived event from Azure Storage that will - * update metrics appropriately when it gets that event. - */ -@InterfaceAudience.Private -public final class ResponseReceivedMetricUpdater extends StorageEvent { - - private final AzureFileSystemInstrumentation instrumentation; - private final BandwidthGaugeUpdater blockUploadGaugeUpdater; - - private ResponseReceivedMetricUpdater(OperationContext operationContext, - AzureFileSystemInstrumentation instrumentation, - BandwidthGaugeUpdater blockUploadGaugeUpdater) { - this.instrumentation = instrumentation; - this.blockUploadGaugeUpdater = blockUploadGaugeUpdater; - } - - /** - * Hooks a new listener to the given operationContext that will update the - * metrics for the WASB file system appropriately in response to - * ResponseReceived events. - * - * @param operationContext The operationContext to hook. - * @param instrumentation The metrics source to update. - * @param blockUploadGaugeUpdater The blockUploadGaugeUpdater to use. - */ - public static void hook( - OperationContext operationContext, - AzureFileSystemInstrumentation instrumentation, - BandwidthGaugeUpdater blockUploadGaugeUpdater) { - ResponseReceivedMetricUpdater listener = - new ResponseReceivedMetricUpdater(operationContext, - instrumentation, blockUploadGaugeUpdater); - operationContext.getResponseReceivedEventHandler().addListener(listener); - } - - /** - * Get the content length of the request in the given HTTP connection. - * @param connection The connection. - * @return The content length, or zero if not found. - */ - private long getRequestContentLength(HttpURLConnection connection) { - String lengthString = connection.getRequestProperty( - HeaderConstants.CONTENT_LENGTH); - if (lengthString != null){ - return Long.parseLong(lengthString); - } - else{ - return 0; - } - } - - /** - * Gets the content length of the response in the given HTTP connection. - * @param connection The connection. - * @return The content length. - */ - private long getResponseContentLength(HttpURLConnection connection) { - return connection.getContentLength(); - } - - /** - * Handle the response-received event from Azure SDK. - */ - @Override - public void eventOccurred(ResponseReceivedEvent eventArg) { - instrumentation.webResponse(); - if (!(eventArg.getConnectionObject() instanceof HttpURLConnection)) { - // Typically this shouldn't happen, but just let it pass - return; - } - HttpURLConnection connection = - (HttpURLConnection) eventArg.getConnectionObject(); - RequestResult currentResult = eventArg.getRequestResult(); - if (currentResult == null) { - // Again, typically shouldn't happen, but let it pass - return; - } - - long requestLatency = currentResult.getStopDate().getTime() - - currentResult.getStartDate().getTime(); - - if (currentResult.getStatusCode() == HttpURLConnection.HTTP_CREATED - && connection.getRequestMethod().equalsIgnoreCase("PUT")) { - // If it's a PUT with an HTTP_CREATED status then it's a successful - // block upload. - long length = getRequestContentLength(connection); - if (length > 0) { - blockUploadGaugeUpdater.blockUploaded( - currentResult.getStartDate(), - currentResult.getStopDate(), - length); - instrumentation.rawBytesUploaded(length); - instrumentation.blockUploaded(requestLatency); - } - } else if (currentResult.getStatusCode() == HttpURLConnection.HTTP_PARTIAL - && connection.getRequestMethod().equalsIgnoreCase("GET")) { - // If it's a GET with an HTTP_PARTIAL status then it's a successful - // block download. - long length = getResponseContentLength(connection); - if (length > 0) { - blockUploadGaugeUpdater.blockDownloaded( - currentResult.getStartDate(), - currentResult.getStopDate(), - length); - instrumentation.rawBytesDownloaded(length); - instrumentation.blockDownloaded(requestLatency); - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/RollingWindowAverage.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/RollingWindowAverage.java deleted file mode 100644 index 184907a9e7bf7..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/RollingWindowAverage.java +++ /dev/null @@ -1,103 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import java.util.ArrayDeque; -import java.util.Date; - -import org.apache.hadoop.classification.InterfaceAudience; - -/** - * Helper class to calculate rolling-window averages. - * Used to calculate rolling-window metrics in AzureNativeFileSystem. - */ -@InterfaceAudience.Private -final class RollingWindowAverage { - private final ArrayDeque currentPoints = - new ArrayDeque(); - private final long windowSizeMs; - - /** - * Create a new rolling-window average for the given window size. - * @param windowSizeMs The size of the window in milliseconds. - */ - public RollingWindowAverage(long windowSizeMs) { - this.windowSizeMs = windowSizeMs; - } - - /** - * Add a new data point that just happened. - * @param value The value of the data point. - */ - public synchronized void addPoint(long value) { - currentPoints.offer(new DataPoint(new Date(), value)); - cleanupOldPoints(); - } - - /** - * Get the current average. - * @return The current average. - */ - public synchronized long getCurrentAverage() { - cleanupOldPoints(); - if (currentPoints.isEmpty()) { - return 0; - } - long sum = 0; - for (DataPoint current : currentPoints) { - sum += current.getValue(); - } - return sum / currentPoints.size(); - } - - /** - * Clean up points that don't count any more (are before our - * rolling window) from our current queue of points. - */ - private void cleanupOldPoints() { - Date cutoffTime = new Date(new Date().getTime() - windowSizeMs); - while (!currentPoints.isEmpty() - && currentPoints.peekFirst().getEventTime().before(cutoffTime)) { - currentPoints.removeFirst(); - } - } - - /** - * A single data point. - */ - private static class DataPoint { - private final Date eventTime; - private final long value; - - public DataPoint(Date eventTime, long value) { - this.eventTime = eventTime; - this.value = value; - } - - public Date getEventTime() { - return eventTime; - } - - public long getValue() { - return value; - } - - - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/package.html b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/package.html deleted file mode 100644 index 5e8d6a84693e6..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/metrics/package.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - - - -

-Infrastructure for a Metrics2 source that provides information on Windows -Azure Filesystem for Hadoop instances. -

- - - diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/package.html b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/package.html deleted file mode 100644 index de01683995d8f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/package.html +++ /dev/null @@ -1,31 +0,0 @@ - - - - - - -

-A distributed implementation of {@link -org.apache.hadoop.fs.FileSystem} for reading and writing files on -Azure Block Storage. -This implementation is blob-based and stores files on Azure in their native form for -interoperability with other Azure tools. -

- - - diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/Constants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/Constants.java deleted file mode 100644 index 792fe0a65b679..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/Constants.java +++ /dev/null @@ -1,50 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -/** - * Constants for used with WASB security implementation. - */ -public final class Constants { - - /** - * The configuration property to enable Kerberos support. - */ - - public static final String AZURE_KERBEROS_SUPPORT_PROPERTY_NAME = - "fs.azure.enable.kerberos.support"; - /** - * The configuration property to enable SPNEGO token cache. - */ - public static final String AZURE_ENABLE_SPNEGO_TOKEN_CACHE = - "fs.azure.enable.spnego.token.cache"; - - /** - * Parameter to be used for impersonation. - */ - public static final String DOAS_PARAM = "doas"; - /** - * Error message for Authentication failures. - */ - public static final String AUTHENTICATION_FAILED_ERROR_MESSAGE = - "Authentication Failed "; - - private Constants() { - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java deleted file mode 100644 index 9c40325e217e7..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/JsonUtils.java +++ /dev/null @@ -1,52 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.hadoop.util.JsonSerialization; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Locale; -import java.util.Map; - -/** - * Utility class to parse JSON. - */ -public final class JsonUtils { - public static final Logger LOG = LoggerFactory.getLogger(JsonUtils.class); - - private JsonUtils() { - } - - public static Map parse(final String jsonString) throws IOException { - try { - return JsonSerialization.mapReader().readValue(jsonString); - } catch (Exception e) { - LOG.debug("JSON Parsing exception: {} while parsing {}", e.getMessage(), - jsonString); - if (jsonString.toLowerCase(Locale.ENGLISH).contains("server error")) { - LOG.error( - "Internal Server Error was encountered while making a request"); - } - throw new IOException("JSON Parsing Error: " + e.getMessage(), e); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/RemoteWasbDelegationTokenManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/RemoteWasbDelegationTokenManager.java deleted file mode 100644 index 36381dc472540..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/RemoteWasbDelegationTokenManager.java +++ /dev/null @@ -1,169 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.SecureWasbRemoteCallHelper; -import org.apache.hadoop.fs.azure.WasbRemoteCallHelper; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.io.retry.RetryPolicy; -import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; -import org.apache.http.client.methods.HttpGet; -import org.apache.http.client.methods.HttpPut; -import org.apache.http.client.utils.URIBuilder; - -import java.io.IOException; -import java.util.Map; - -/** - * Class to manage delegation token operations by making rest call to remote service. - */ -public class RemoteWasbDelegationTokenManager - implements WasbDelegationTokenManager { - - /** - * Configuration parameter name expected in the configuration - * object to provide the url of the delegation token service to fetch the delegation tokens. - */ - public static final String KEY_DELEGATION_TOKEN_SERVICE_URLS = - "fs.azure.delegation.token.service.urls"; - /** - * Configuration key to enable http retry policy for delegation token service calls. - */ - public static final String DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY = - "fs.azure.delegationtokenservice.http.retry.policy.enabled"; - /** - * Configuration key for delegation token service http retry policy spec. - */ - public static final String DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY = - "fs.azure.delegationtokenservice.http.retry.policy.spec"; - /** - * Default remote delegation token manager endpoint. - */ - private static final String DEFAULT_DELEGATION_TOKEN_MANAGER_ENDPOINT = - "/tokenmanager/v1"; - /** - * Default for delegation token service http retry policy spec. - */ - private static final String DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT = - "10,3,100,2"; - - private static final boolean - DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT = true; - - private static final Text WASB_DT_SERVICE_NAME = new Text("WASB_DT_SERVICE"); - /** - * Query parameter value for Getting delegation token http request - */ - private static final String GET_DELEGATION_TOKEN_OP = "GETDELEGATIONTOKEN"; - /** - * Query parameter value for renewing delegation token http request - */ - private static final String RENEW_DELEGATION_TOKEN_OP = - "RENEWDELEGATIONTOKEN"; - /** - * Query parameter value for canceling the delegation token http request - */ - private static final String CANCEL_DELEGATION_TOKEN_OP = - "CANCELDELEGATIONTOKEN"; - /** - * op parameter to represent the operation. - */ - private static final String OP_PARAM_KEY_NAME = "op"; - /** - * renewer parameter to represent the renewer of the delegation token. - */ - private static final String RENEWER_PARAM_KEY_NAME = "renewer"; - /** - * service parameter to represent the service which returns delegation tokens. - */ - private static final String SERVICE_PARAM_KEY_NAME = "service"; - /** - * token parameter to represent the delegation token. - */ - private static final String TOKEN_PARAM_KEY_NAME = "token"; - private WasbRemoteCallHelper remoteCallHelper; - private String[] dtServiceUrls; - private boolean isSpnegoTokenCacheEnabled; - - public RemoteWasbDelegationTokenManager(Configuration conf) - throws IOException { - RetryPolicy retryPolicy = RetryUtils.getMultipleLinearRandomRetry(conf, - DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY, - DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT, - DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY, - DT_MANAGER_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT); - this.isSpnegoTokenCacheEnabled = - conf.getBoolean(Constants.AZURE_ENABLE_SPNEGO_TOKEN_CACHE, true); - - remoteCallHelper = new SecureWasbRemoteCallHelper(retryPolicy, true, - isSpnegoTokenCacheEnabled); - this.dtServiceUrls = - conf.getTrimmedStrings(KEY_DELEGATION_TOKEN_SERVICE_URLS); - if (this.dtServiceUrls == null || this.dtServiceUrls.length <= 0) { - throw new IOException( - KEY_DELEGATION_TOKEN_SERVICE_URLS + " config not set" - + " in configuration."); - } - } - - @Override - public Token getDelegationToken( - String renewer) throws IOException { - URIBuilder uriBuilder = - new URIBuilder().setPath(DEFAULT_DELEGATION_TOKEN_MANAGER_ENDPOINT) - .addParameter(OP_PARAM_KEY_NAME, GET_DELEGATION_TOKEN_OP) - .addParameter(RENEWER_PARAM_KEY_NAME, renewer) - .addParameter(SERVICE_PARAM_KEY_NAME, - WASB_DT_SERVICE_NAME.toString()); - String responseBody = remoteCallHelper - .makeRemoteRequest(dtServiceUrls, uriBuilder.getPath(), - uriBuilder.getQueryParams(), HttpGet.METHOD_NAME); - return TokenUtils.toDelegationToken(JsonUtils.parse(responseBody)); - } - - @Override - public long renewDelegationToken(Token token) - throws IOException { - URIBuilder uriBuilder = - new URIBuilder().setPath(DEFAULT_DELEGATION_TOKEN_MANAGER_ENDPOINT) - .addParameter(OP_PARAM_KEY_NAME, RENEW_DELEGATION_TOKEN_OP) - .addParameter(TOKEN_PARAM_KEY_NAME, token.encodeToUrlString()); - - String responseBody = remoteCallHelper - .makeRemoteRequest(dtServiceUrls, uriBuilder.getPath(), - uriBuilder.getQueryParams(), HttpPut.METHOD_NAME); - - Map parsedResp = JsonUtils.parse(responseBody); - return ((Number) parsedResp.get("long")).longValue(); - } - - @Override - public void cancelDelegationToken(Token token) - throws IOException { - URIBuilder uriBuilder = - new URIBuilder().setPath(DEFAULT_DELEGATION_TOKEN_MANAGER_ENDPOINT) - .addParameter(OP_PARAM_KEY_NAME, CANCEL_DELEGATION_TOKEN_OP) - .addParameter(TOKEN_PARAM_KEY_NAME, token.encodeToUrlString()); - remoteCallHelper.makeRemoteRequest(dtServiceUrls, uriBuilder.getPath(), - uriBuilder.getQueryParams(), HttpPut.METHOD_NAME); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/SpnegoToken.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/SpnegoToken.java deleted file mode 100644 index fba4e4142f59a..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/SpnegoToken.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import org.apache.hadoop.security.authentication.client.AuthenticatedURL; - -/** - * Class to represent SPNEGO token. - */ -public class SpnegoToken { - private AuthenticatedURL.Token token; - private long expiryTime; - private static final long TOKEN_VALIDITY_TIME_IN_MS = 60 * 60 * 1000L; - - public SpnegoToken(AuthenticatedURL.Token token) { - this.token = token; - //set the expiry time of the token to be 60 minutes, - // actual token will be valid for more than few hours and treating token as opaque. - this.expiryTime = System.currentTimeMillis() + TOKEN_VALIDITY_TIME_IN_MS; - } - - public AuthenticatedURL.Token getToken() { - return token; - } - - public long getExpiryTime() { - return expiryTime; - } - - public boolean isTokenValid() { - return (expiryTime >= System.currentTimeMillis()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/TokenUtils.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/TokenUtils.java deleted file mode 100644 index 90b9082eb7cef..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/TokenUtils.java +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenIdentifier; -import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.util.Map; - -/** - * Utility methods common for token management - */ -public final class TokenUtils { - public static final Logger LOG = LoggerFactory.getLogger(TokenUtils.class); - public static final String URL_STRING = "urlString"; - - private TokenUtils() { - } - - public static Token toDelegationToken( - final Map inputMap) throws IOException { - final Map m = (Map) inputMap.get(Token.class.getSimpleName()); - return (Token) toToken(m); - } - - public static Token toToken(final Map m) - throws IOException { - if (m == null) { - return null; - } - String urlString = (String) m.get(URL_STRING); - if (urlString != null) { - final Token token = new Token<>(); - LOG.debug("Read url string param - {}", urlString); - token.decodeFromUrlString(urlString); - return token; - } - return null; - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbDelegationTokenIdentifier.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbDelegationTokenIdentifier.java deleted file mode 100644 index 530e04572e2b0..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbDelegationTokenIdentifier.java +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; - -/** - * Delegation token Identifier for WASB delegation tokens. - */ -public class WasbDelegationTokenIdentifier extends DelegationTokenIdentifier { - public static final Text TOKEN_KIND = new Text("WASB delegation"); - - public WasbDelegationTokenIdentifier(){ - super(TOKEN_KIND); - } - - public WasbDelegationTokenIdentifier(Text kind) { - super(kind); - } - - public WasbDelegationTokenIdentifier(Text kind, Text owner, Text renewer, - Text realUser) { - super(kind, owner, renewer, realUser); - } - - @Override - public Text getKind() { - return TOKEN_KIND; - } - -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbDelegationTokenManager.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbDelegationTokenManager.java deleted file mode 100644 index 1d7341600718d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbDelegationTokenManager.java +++ /dev/null @@ -1,54 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.delegation.web.DelegationTokenIdentifier; - -import java.io.IOException; - -/** - * Interface for Managing the Delegation tokens. - */ -public interface WasbDelegationTokenManager { - - /** - * Get Delegation token - * @param renewer delegation token renewer - * @return delegation token - * @throws IOException when error in getting the delegation token - */ - Token getDelegationToken(String renewer) - throws IOException; - - /** - * Renew the delegation token - * @param token delegation token. - * @return renewed time. - * @throws IOException when error in renewing the delegation token - */ - long renewDelegationToken(Token token) throws IOException; - - /** - * Cancel the delegation token - * @param token delegation token. - * @throws IOException when error in cancelling the delegation token. - */ - void cancelDelegationToken(Token token) throws IOException; -} diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbTokenRenewer.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbTokenRenewer.java deleted file mode 100644 index 6df76475f5594..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/WasbTokenRenewer.java +++ /dev/null @@ -1,91 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - *

- * http://www.apache.org/licenses/LICENSE-2.0 - *

- * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.security; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenRenewer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; - -/** - * Token Renewer for renewing WASB delegation tokens with remote service. - */ -public class WasbTokenRenewer extends TokenRenewer { - public static final Logger LOG = - LoggerFactory.getLogger(WasbTokenRenewer.class); - - /** - * Checks if this particular object handles the Kind of token passed. - * @param kind the kind of the token - * @return true if it handles passed token kind false otherwise. - */ - @Override - public boolean handleKind(Text kind) { - return WasbDelegationTokenIdentifier.TOKEN_KIND.equals(kind); - } - - /** - * Checks if passed token is managed. - * @param token the token being checked - * @return true if it is managed. - * @throws IOException thrown when evaluating if token is managed. - */ - @Override - public boolean isManaged(Token token) throws IOException { - return true; - } - - /** - * Renew the delegation token. - * @param token token to renew. - * @param conf configuration object. - * @return extended expiry time of the token. - * @throws IOException thrown when trying get current user. - * @throws InterruptedException thrown when thread is interrupted - */ - @Override - public long renew(final Token token, Configuration conf) - throws IOException, InterruptedException { - LOG.debug("Renewing the delegation token"); - return getInstance(conf).renewDelegationToken(token); - } - - /** - * Cancel the delegation token. - * @param token token to cancel. - * @param conf configuration object. - * @throws IOException thrown when trying get current user. - * @throws InterruptedException thrown when thread is interrupted. - */ - @Override - public void cancel(final Token token, Configuration conf) - throws IOException, InterruptedException { - LOG.debug("Cancelling the delegation token"); - getInstance(conf).cancelDelegationToken(token); - } - - private WasbDelegationTokenManager getInstance(Configuration conf) - throws IOException { - return new RemoteWasbDelegationTokenManager(conf); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/package-info.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/package-info.java deleted file mode 100644 index 1e1bfbe3aea2f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/security/package-info.java +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -package org.apache.hadoop.fs.azure.security; -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier b/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier index 90169185863a6..702ad8014f0c4 100644 --- a/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier +++ b/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenIdentifier @@ -14,4 +14,3 @@ # limitations under the License. org.apache.hadoop.fs.azurebfs.security.AbfsDelegationTokenIdentifier -org.apache.hadoop.fs.azure.security.WasbDelegationTokenIdentifier \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer b/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer index d889534c73cd8..159e6296457e7 100644 --- a/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer +++ b/hadoop-tools/hadoop-azure/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer @@ -14,4 +14,3 @@ # limitations under the License. org.apache.hadoop.fs.azurebfs.security.AbfsTokenRenewer -org.apache.hadoop.fs.azure.security.WasbTokenRenewer \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/deprecated_wasb.md b/hadoop-tools/hadoop-azure/src/site/markdown/deprecated_wasb.md deleted file mode 100644 index b020296412312..0000000000000 --- a/hadoop-tools/hadoop-azure/src/site/markdown/deprecated_wasb.md +++ /dev/null @@ -1,564 +0,0 @@ - - -# Azure Blob Storage Support by Deprecated WASB Driver - - - -See also: - -* [WASB Deprecation](./wasb.html) -* [ABFS](./index.html) -* [Namespace Disabled Accounts on ABFS](./fns_blob.html) -* [Testing](./testing_azure.html) - -## Introduction - -The `hadoop-azure` module provides support for integration with -[Azure Blob Storage](http://azure.microsoft.com/en-us/documentation/services/storage/). -The built jar file, named `hadoop-azure.jar`, also declares transitive dependencies -on the additional artifacts it requires, notably the -[Azure Storage SDK for Java](https://github.com/Azure/azure-storage-java). - -To make it part of Apache Hadoop's default classpath, simply make sure that -`HADOOP_OPTIONAL_TOOLS` in `hadoop-env.sh` has `'hadoop-azure` in the list. -Example: - -```bash -export HADOOP_OPTIONAL_TOOLS="hadoop-azure,hadoop-azure-datalake" -``` -## Features - -* Read and write data stored in an Azure Blob Storage account. -* Present a hierarchical file system view by implementing the standard [`Hadoop - FileSystem`](../api/org/apache/hadoop/fs/FileSystem.html) interface. -* Supports configuration of multiple Azure Blob Storage accounts. -* Supports both block blobs (suitable for most use cases, such as MapReduce) and - page blobs (suitable for continuous write use cases, such as an HBase - write-ahead log). -* Reference file system paths using URLs using the `wasb` scheme. -* Also reference file system paths using URLs with the `wasbs` scheme for SSL - encrypted access. -* Can act as a source of data in a MapReduce job, or a sink. -* Tested on both Linux and Windows. -* Tested at scale. - -## Limitations - -* File owner and group are persisted, but the permissions model is not enforced. - Authorization occurs at the level of the entire Azure Blob Storage account. -* File last access time is not tracked. - -## Usage - -### Concepts - -The Azure Blob Storage data model presents 3 core concepts: - -* **Storage Account**: All access is done through a storage account. -* **Container**: A container is a grouping of multiple blobs. A storage account - may have multiple containers. In Hadoop, an entire file system hierarchy is - stored in a single container. It is also possible to configure multiple - containers, effectively presenting multiple file systems that can be referenced - using distinct URLs. -* **Blob**: A file of any type and size. In Hadoop, files are stored in blobs. - The internal implementation also uses blobs to persist the file system - hierarchy and other metadata. - -### Configuring Credentials - -Usage of Azure Blob Storage requires configuration of credentials. Typically -this is set in core-site.xml. The configuration property name is of the form -`fs.azure.account.key..blob.core.windows.net` and the value is the -access key. **The access key is a secret that protects access to your storage -account. Do not share the access key (or the core-site.xml file) with an -untrusted party.** - -For example: - -```xml - - fs.azure.account.key.youraccount.blob.core.windows.net - YOUR ACCESS KEY - -``` -In many Hadoop clusters, the core-site.xml file is world-readable. It is possible to -protect the access key within a credential provider as well. This provides an encrypted -file format along with protection with file permissions. - -#### Protecting the Azure Credentials for WASB with Credential Providers - -To protect these credentials from prying eyes, it is recommended that you use -the credential provider framework to securely store them and access them -through configuration. The following describes its use for Azure credentials -in WASB FileSystem. - -For additional reading on the credential provider API see: -[Credential Provider API](../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html). - -##### End to End Steps for Distcp and WASB with Credential Providers - -###### provision - -```bash -% hadoop credential create fs.azure.account.key.youraccount.blob.core.windows.net -value 123 - -provider localjceks://file/home/lmccay/wasb.jceks -``` - -###### configure core-site.xml or command line system property - -```xml - - hadoop.security.credential.provider.path - localjceks://file/home/lmccay/wasb.jceks - Path to interrogate for protected credentials. - -``` - -###### distcp - -```bash -% hadoop distcp - [-D hadoop.security.credential.provider.path=localjceks://file/home/lmccay/wasb.jceks] - hdfs://hostname:9001/user/lmccay/007020615 wasb://yourcontainer@youraccount.blob.core.windows.net/testDir/ -``` - -NOTE: You may optionally add the provider path property to the distcp command line instead of -added job specific configuration to a generic core-site.xml. The square brackets above illustrate -this capability. - -#### Protecting the Azure Credentials for WASB within an Encrypted File - -In addition to using the credential provider framework to protect your credentials, it's -also possible to configure it in encrypted form. An additional configuration property -specifies an external program to be invoked by Hadoop processes to decrypt the -key. The encrypted key value is passed to this external program as a command -line argument: - -```xml - - fs.azure.account.keyprovider.youraccount - org.apache.hadoop.fs.azure.ShellDecryptionKeyProvider - - - - fs.azure.account.key.youraccount.blob.core.windows.net - YOUR ENCRYPTED ACCESS KEY - - - - fs.azure.shellkeyprovider.script - PATH TO DECRYPTION PROGRAM - - -``` - -### Block Blob with Compaction Support and Configuration - -Block blobs are the default kind of blob and are good for most big-data use -cases. However, block blobs have strict limit of 50,000 blocks per blob. -To prevent reaching the limit WASB, by default, does not upload new block to -the service after every `hflush()` or `hsync()`. - -For most of the cases, combining data from multiple `write()` calls in -blocks of 4Mb is a good optimization. But, in others cases, like HBase log files, -every call to `hflush()` or `hsync()` must upload the data to the service. - -Block blobs with compaction upload the data to the cloud service after every -`hflush()`/`hsync()`. To mitigate the limit of 50000 blocks, `hflush() -`/`hsync()` runs once compaction process, if number of blocks in the blob -is above 32,000. - -Block compaction search and replaces a sequence of small blocks with one big -block. That means there is associated cost with block compaction: reading -small blocks back to the client and writing it again as one big block. - -In order to have the files you create be block blobs with block compaction -enabled, the client must set the configuration variable -`fs.azure.block.blob.with.compaction.dir` to a comma-separated list of -folder names. - -For example: - -```xml - - fs.azure.block.blob.with.compaction.dir - /hbase/WALs,/data/myblobfiles - -``` - -### Page Blob Support and Configuration - -The Azure Blob Storage interface for Hadoop supports two kinds of blobs, -[block blobs and page blobs](http://msdn.microsoft.com/en-us/library/azure/ee691964.aspx). -Block blobs are the default kind of blob and are good for most big-data use -cases, like input data for Hive, Pig, analytical map-reduce jobs etc. Page blob -handling in hadoop-azure was introduced to support HBase log files. Page blobs -can be written any number of times, whereas block blobs can only be appended to -50,000 times before you run out of blocks and your writes will fail. That won't -work for HBase logs, so page blob support was introduced to overcome this -limitation. - -Page blobs can be up to 1TB in size, larger than the maximum 200GB size for block -blobs. -You should stick to block blobs for most usage, and page blobs are only tested in context of HBase write-ahead logs. - -In order to have the files you create be page blobs, you must set the -configuration variable `fs.azure.page.blob.dir` to a comma-separated list of -folder names. - -For example: - -```xml - - fs.azure.page.blob.dir - /hbase/WALs,/hbase/oldWALs,/data/mypageblobfiles - -``` - -You can set this to simply / to make all files page blobs. - -The configuration option `fs.azure.page.blob.size` is the default initial -size for a page blob. It must be 128MB or greater, and no more than 1TB, -specified as an integer number of bytes. - -The configuration option `fs.azure.page.blob.extension.size` is the page blob -extension size. This defines the amount to extend a page blob if it starts to -get full. It must be 128MB or greater, specified as an integer number of bytes. - -### Custom User-Agent -WASB passes User-Agent header to the Azure back-end. The default value -contains WASB version, Java Runtime version, Azure Client library version, and the -value of the configuration option `fs.azure.user.agent.prefix`. Customized User-Agent -header enables better troubleshooting and analysis by Azure service. - -```xml - - fs.azure.user.agent.prefix - Identifier - -``` - -### Atomic Folder Rename - -Azure storage stores files as a flat key/value store without formal support -for folders. The hadoop-azure file system layer simulates folders on top -of Azure storage. By default, folder rename in the hadoop-azure file system -layer is not atomic. That means that a failure during a folder rename -could, for example, leave some folders in the original directory and -some in the new one. - -HBase depends on atomic folder rename. Hence, a configuration setting was -introduced called `fs.azure.atomic.rename.dir` that allows you to specify a -comma-separated list of directories to receive special treatment so that -folder rename is made atomic. The default value of this setting is just -`/hbase`. Redo will be applied to finish a folder rename that fails. A file -`-renamePending.json` may appear temporarily and is the record of -the intention of the rename operation, to allow redo in event of a failure. - -For example: - -```xml - - fs.azure.atomic.rename.dir - /hbase,/data - -``` - -### Accessing wasb URLs - -After credentials are configured in core-site.xml, any Hadoop component may -reference files in that Azure Blob Storage account by using URLs of the following -format: - - wasb[s]://@.blob.core.windows.net/ - -The schemes `wasb` and `wasbs` identify a URL on a file system backed by Azure -Blob Storage. `wasb` utilizes unencrypted HTTP access for all interaction with -the Azure Blob Storage API. `wasbs` utilizes SSL encrypted HTTPS access. - -For example, the following -[FileSystem Shell](../hadoop-project-dist/hadoop-common/FileSystemShell.html) -commands demonstrate access to a storage account named `youraccount` and a -container named `yourcontainer`. - -```bash -% hadoop fs -mkdir wasb://yourcontainer@youraccount.blob.core.windows.net/testDir - -% hadoop fs -put testFile wasb://yourcontainer@youraccount.blob.core.windows.net/testDir/testFile - -% hadoop fs -cat wasbs://yourcontainer@youraccount.blob.core.windows.net/testDir/testFile -test file content -``` - -It's also possible to configure `fs.defaultFS` to use a `wasb` or `wasbs` URL. -This causes all bare paths, such as `/testDir/testFile` to resolve automatically -to that file system. - -### Append API Support and Configuration - -The Azure Blob Storage interface for Hadoop has optional support for Append API for -single writer by setting the configuration `fs.azure.enable.append.support` to true. - -For Example: - -```xml - - fs.azure.enable.append.support - true - -``` - -It must be noted Append support in Azure Blob Storage interface DIFFERS FROM HDFS SEMANTICS. Append -support does not enforce single writer internally but requires applications to guarantee this semantic. -It becomes a responsibility of the application either to ensure single-threaded handling for a particular -file path, or rely on some external locking mechanism of its own. Failure to do so will result in -unexpected behavior. - -### Multithread Support - -Rename and Delete blob operations on directories with large number of files and sub directories currently is very slow as these operations are done one blob at a time serially. These files and sub folders can be deleted or renamed parallel. Following configurations can be used to enable threads to do parallel processing - -To enable 10 threads for Delete operation. Set configuration value to 0 or 1 to disable threads. The default behavior is threads disabled. - -```xml - - fs.azure.delete.threads - 10 - -``` - -To enable 20 threads for Rename operation. Set configuration value to 0 or 1 to disable threads. The default behavior is threads disabled. - -```xml - - fs.azure.rename.threads - 20 - -``` - -### WASB Secure mode and configuration - -WASB can operate in secure mode where the Storage access keys required to communicate with Azure storage does not have to -be in the same address space as the process using WASB. In this mode all interactions with Azure storage is performed using -SAS uris. There are two sub modes within the Secure mode, one is remote SAS key mode where the SAS keys are generated from -a remote process and local mode where SAS keys are generated within WASB. By default the SAS Key mode is expected to run in -Romote mode, however for testing purposes the local mode can be enabled to generate SAS keys in the same process as WASB. - -To enable Secure mode following property needs to be set to true. - -```xml - - fs.azure.secure.mode - true - -``` - -To enable SAS key generation locally following property needs to be set to true. - -```xml - - fs.azure.local.sas.key.mode - true - -``` - -To use the remote SAS key generation mode, comma separated external REST services are expected to provided required SAS keys. -Following property can used to provide the end point to use for remote SAS Key generation: - -```xml - - fs.azure.cred.service.urls - {URL} - -``` - -The remote service is expected to provide support for two REST calls ```{URL}/GET_CONTAINER_SAS``` and ```{URL}/GET_RELATIVE_BLOB_SAS```, for generating -container and relative blob sas keys. An example requests - -```{URL}/GET_CONTAINER_SAS?storage_account=&container=&sas_expiry=&delegation_token=``` -```{URL}/GET_CONTAINER_SAS?storage_account=&container=&relative_path=&sas_expiry=&delegation_token=``` - -The service is expected to return a response in JSON format: - -```json -{ - "responseCode" : 0 or non-zero , - "responseMessage" : relavant message on failure , - "sasKey" : Requested SAS Key -} -``` - -### Authorization Support in WASB - -Authorization support can be enabled in WASB using the following configuration: - -```xml - - fs.azure.authorization - true - -``` - -The current implementation of authorization relies on the presence of an external service that can enforce -the authorization. The service is expected to be running on comma separated URLs provided by the following config. - -```xml - - fs.azure.authorization.remote.service.urls - {URL} - -``` - -The remote service is expected to provide support for the following REST call: ```{URL}/CHECK_AUTHORIZATION``` -An example request: - ```{URL}/CHECK_AUTHORIZATION?wasb_absolute_path=&operation_type=&delegation_token=``` - -The service is expected to return a response in JSON format: - -```json -{ - "responseCode" : 0 or non-zero , - "responseMessage" : relevant message on failure , - "authorizationResult" : true/false -} -``` - -### Delegation token support in WASB - -Delegation token support can be enabled in WASB using the following configuration: - -```xml - - fs.azure.enable.kerberos.support - true - -``` - -The current implementation of delegation token implementation relies on the presence of an external service instances that can generate and manage delegation tokens. The service is expected to be running on comma separated URLs provided by the following config. - -```xml - - fs.azure.delegation.token.service.urls - {URL} - -``` - -The remote service is expected to provide support for the following REST call: ```{URL}?op=GETDELEGATIONTOKEN```, ```{URL}?op=RENEWDELEGATIONTOKEN``` and ```{URL}?op=CANCELDELEGATIONTOKEN``` -An example request: - ```{URL}?op=GETDELEGATIONTOKEN&renewer=``` - ```{URL}?op=RENEWDELEGATIONTOKEN&token=``` - ```{URL}?op=CANCELDELEGATIONTOKEN&token=``` - -The service is expected to return a response in JSON format for GETDELEGATIONTOKEN request: - -```json -{ - "Token" : { - "urlString": URL string of delegation token. - } -} -``` -### chown behaviour when authorization is enabled in WASB - -When authorization is enabled, only the users listed in the following configuration -are allowed to change the owning user of files/folders in WASB. The configuration -value takes a comma separated list of user names who are allowed to perform chown. - -```xml - - fs.azure.chown.allowed.userlist - user1,user2 - -``` -### chmod behaviour when authorization is enabled in WASB - -When authorization is enabled, only the owner and the users listed in the -following configurations are allowed to change the permissions of files/folders in WASB. -The configuration value takes a comma separated list of user names who are allowed to perform chmod. - -```xml - - fs.azure.daemon.userlist - user1,user2 - - - fs.azure.chmod.allowed.userlist - userA,userB - -``` - -Caching of both SAS keys and Authorization responses can be enabled using the following setting: -The cache settings are applicable only when fs.azure.authorization is enabled. -The cache is maintained at a filesystem object level. -``` - - fs.azure.authorization.caching.enable - true - -``` - -The maximum number of entries that the cache can hold can be customized using the following setting: -``` - - fs.azure.authorization.caching.maxentries - 512 - -``` - - The validity of an authorization cache-entry can be controlled using the following setting: - Setting the value to zero disables authorization-caching. - If the key is not specified, a default expiry duration of 5m takes effect. - ``` - - fs.azure.authorization.cacheentry.expiry.period - 5m - -``` - - The validity of a SASKey cache-entry can be controlled using the following setting. - Setting the value to zero disables SASKey-caching. - If the key is not specified, the default expiry duration specified in the sas-key request takes effect. - ``` - - fs.azure.saskey.cacheentry.expiry.period - 90d - -``` - - Use container saskey for access to all blobs within the container. - Blob-specific saskeys are not used when this setting is enabled. - This setting provides better performance compared to blob-specific saskeys. - ``` - - fs.azure.saskey.usecontainersaskeyforallaccess - true - -``` - -### Performance optimization configurations - -`fs.azure.block.blob.buffered.pread.disable`: By default the positional read API will do a -seek and read on input stream. This read will fill the buffer cache in -BlockBlobInputStream. If this configuration is true it will skip usage of buffer and do a -lock free call for reading from blob. This optimization is very much helpful for HBase kind -of short random read over a shared InputStream instance. -Note: This is not a config which can be set at cluster level. It can be used as -an option on FutureDataInputStreamBuilder. -See FileSystem#openFile(Path path) - -## Further Reading - -* [Testing the Azure WASB client](testing_azure.html). -* MSDN article, [Understanding Block Blobs, Append Blobs, and Page Blobs](https://docs.microsoft.com/en-us/rest/api/storageservices/understanding-block-blobs--append-blobs--and-page-blobs) diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md b/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md index 9850bc34170df..44fed0d33f41a 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/fns_blob.md @@ -25,15 +25,15 @@ Refer to [WASB Deprecation](./wasb.html) for more details. ## Azure Service Endpoints Used by ABFS Driver Azure Services offers two set of endpoints for interacting with storage accounts: -1. [Azure Blob Storage](./blobEndpoint.md) referred as Blob Endpoint +1. [Azure Blob Storage](./blobEndpoint.html) referred as Blob Endpoint 2. [Azure Data Lake Storage](https://learn.microsoft.com/en-us/rest/api/storageservices/datalakestoragegen2/operation-groups) referred as DFS Endpoint The ABFS Driver by default is designed to work with DFS Endpoint only which primarily supports HNS Enabled Accounts only. -To enable ABFS Driver to work with FNS Accounts, support for Blob Endpoint is being added. +To enable ABFS Driver to work with FNS Accounts, support for Blob Endpoint is added. This is because Azure services do not recommend using DFS Endpoint for FNS Accounts. -FNS over DFS endpoint is **REMOVED**. All requests will be switched to Blob endpoint internally if +FNS over DFS endpoint is therefore **REMOVED**. All requests will be switched to Blob endpoint internally if account is detected as FNS. ABFS Driver will only allow FNS Accounts to be accessed using Blob Endpoint. @@ -90,11 +90,11 @@ configured service type. Choosing a separate ingress service is **only supported ``` - How to configure Shared Key - auth: [Shared Key](./index.md#a-nameshared-key-autha-default-shared-key) + auth: [Shared Key](./index.html#a-nameshared-key-autha-default-shared-key) - How to configure - OAuth: [OAuth](./index.md#a-nameoauth-client-credentialsa-oauth-20-client-credentials) + OAuth: [OAuth](./index.html#a-nameoauth-client-credentialsa-oauth-20-client-credentials) - How to configure fixed - SAS: [Fixed SAS](./index.md#using-accountservice-sas-with-abfs) + SAS: [Fixed SAS](./index.html#using-accountservice-sas-with-abfs) OAuth is recommended auth type as it is more secure and flexible. diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/index.md b/hadoop-tools/hadoop-azure/src/site/markdown/index.md index 98ee99256b39f..5a90265c81220 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/index.md @@ -33,9 +33,9 @@ You can set this locally in your `.profile`/`.bashrc`, but note it won't propagate to jobs running in-cluster. See also: -* [FNS (non-HNS)](./fns_blob.html) -* [Legacy-Deprecated-WASB](./wasb.html) -* [Testing](./testing_azure.html) +* [ABFS Driver for FNS (non-HNS) Accounts](./fns_blob.html) +* [Deprecated WASB Driver for FNS (non-HNS) Accounts](./wasb.html) +* [Testing of ABFS Driver](./testing_azure.html) * [WASB Migration Config Support](./wasbToAbfsMigration.html) ## Features of the ABFS connector. diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md index f8e4dde3e86e8..fd2445ab550e6 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/testing_azure.md @@ -12,7 +12,15 @@ limitations under the License. See accompanying LICENSE file. --> -# Testing the Azure WASB client +# Testing the Azure ABFS client + +Azure Data Lake Storage Gen 2 (ADLS Gen 2) is a set of capabilities dedicated to +big data analytics, built on top of Azure Blob Storage. The ABFS and ABFSS +schemes target the ADLS Gen 2 REST API now having support for both HNS and FNS Accounts. +ADLS Gen 2 with HNS Enabled using DFS Endpoint offers better performance and +scalability. ADLS Gen 2 also offers authentication and authorization compatible +with the Hadoop Distributed File System permissions model when hierarchical +namespace is enabled for the storage account. @@ -24,7 +32,7 @@ convention `Test*.java`. Integration tests follow the naming convention ## Policy for submitting patches which affect the `hadoop-azure` module. -The Apache Jenkins infrastucture does not run any cloud integration tests, +The Apache Jenkins infrastructure does not run any cloud integration tests, due to the need to keep credentials secure. ### The submitter of any patch is required to run all the integration tests and declare which Azure region they used. @@ -90,17 +98,17 @@ For example: - fs.azure.wasb.account.name - {ACCOUNTNAME}.blob.core.windows.net + fs.azure.abfs.account.name + {ACCOUNTNAME}.dfs.core.windows.net - fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net + fs.azure.account.key.{ACCOUNTNAME}.dfs.core.windows.net {ACCOUNT ACCESS KEY} ``` -To run contract tests, set the WASB file system URI in `src/test/resources/azure-auth-keys.xml` +To run contract tests, set the ABFS file system URI in `src/test/resources/azure-auth-keys.xml` and the account access key. For example: ```xml @@ -108,12 +116,12 @@ and the account access key. For example: - fs.contract.test.fs.wasb - wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net + fs.contract.test.fs.abfs + wasb://{CONTAINERNAME}@{ACCOUNTNAME}.dfs.core.windows.net The name of the azure file system for testing. - fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net + fs.azure.account.key.{ACCOUNTNAME}.dfs.core.windows.net {ACCOUNT ACCESS KEY} @@ -126,21 +134,21 @@ Overall, to run all the tests using `mvn test`, a sample `azure-auth-keys.xml` - fs.azure.wasb.account.name - {ACCOUNTNAME}.blob.core.windows.net + fs.azure.abfs.account.name + {ACCOUNTNAME}.dfs.core.windows.net - fs.azure.account.key.{ACCOUNTNAME}.blob.core.windows.net + fs.azure.account.key.{ACCOUNTNAME}.dfs.core.windows.net {ACCOUNT ACCESS KEY} - fs.contract.test.fs.wasb - wasb://{CONTAINERNAME}@{ACCOUNTNAME}.blob.core.windows.net + fs.contract.test.fs.abfs + wasb://{CONTAINERNAME}@{ACCOUNTNAME}.dfs.core.windows.net ``` -DO NOT ADD `azure-auth-keys.xml` TO REVISION CONTROL. The keys to your Azure +DO NOT ADD `azure-auth-keys.xml` TO VERSION CONTROL. The keys to your Azure Storage account are a secret and must not be shared. @@ -153,35 +161,29 @@ mvn -T 1C clean verify ``` It's also possible to execute multiple test suites in parallel by passing the -`parallel-tests=wasb|abfs|both` property on the command line. The tests spend most of their +`parallel-tests=abfs` property on the command line. The tests spend most of their time blocked on network I/O, so running in parallel tends to complete full test runs faster. ```bash -mvn -T 1C -Dparallel-tests=both clean verify -mvn -T 1C -Dparallel-tests=wasb clean verify mvn -T 1C -Dparallel-tests=abfs clean verify ``` -`-Dparallel-tests=wasb` runs the WASB related integration tests from azure directory
`-Dparallel-tests=abfs` runs the ABFS related integration tests from azurebfs directory
-`-Dparallel-tests=both` runs all the integration tests from both azure and azurebfs directory
Some tests must run with exclusive access to the storage container, so even with the `parallel-tests` property, several test suites will run in serial in a separate Maven execution step after the parallel tests. -By default, `parallel-tests` runs 4 test suites concurrently. This can be tuned +By default, `parallel-tests` runs 8 test suites concurrently. This can be tuned by passing the `testsThreadCount` property. ```bash mvn -T 1C -Dparallel-tests -DtestsThreadCount=8 clean verify ``` - ```bash mvn -T 1C clean test @@ -266,37 +268,6 @@ The most bandwidth intensive tests (those which upload data) always run sequentially; those which are slow due to HTTPS setup costs or server-side actions are included in the set of parallelized tests. - -### Scale test tuning options - - -Some of the tests can be tuned from the maven build or from the -configuration file used to run the tests. - -```bash -mvn -T 1C verify -Dparallel-tests -Dscale -DtestsThreadCount=8 -Dfs.azure.scale.test.huge.filesize=128M -``` - -The algorithm is - -1. The value is queried from the configuration file, using a default value if -it is not set. -1. The value is queried from the JVM System Properties, where it is passed -down by maven. -1. If the system property is null, an empty string, or it has the value `unset`, -then the configuration value is used. The `unset` option is used to -[work round a quirk in maven property propagation](http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven). - -Only a few properties can be set this way; more will be added. - -| Property | Meaninging | -|-----------|-------------| -| `fs.azure.scale.test.huge.filesize`| Size for huge file uploads | -| `fs.azure.scale.test.huge.huge.partitionsize`| Size for partitions in huge file uploads | - -The file and partition sizes are numeric values with a k/m/g/t/p suffix depending -on the desired size. For example: 128M, 128m, 2G, 2G, 4T or even 1P. - #### Scale test configuration options Some scale tests perform multiple operations (such as creating many directories). @@ -340,23 +311,6 @@ smaller to achieve faster test runs. ``` -Azure-specific scale test properties are - -##### `fs.azure.scale.test.huge.filesize`: size in MB for "Huge file tests". - -The Huge File tests validate Azure storages's ability to handle large files —the property -`fs.azure.scale.test.huge.filesize` declares the file size to use. - -```xml - - fs.azure.scale.test.huge.filesize - 200M - -``` - -Tests at this scale are slow: they are best executed from hosts running in -the cloud infrastructure where the storage endpoint is based. - ## Using the emulator A selection of tests can run against the @@ -389,7 +343,7 @@ Logging at debug level is the standard way to provide more diagnostics output; after setting this rerun the tests ```properties -log4j.logger.org.apache.hadoop.fs.azure=DEBUG +log4j.logger.org.apache.hadoop.fs.azurebfs=DEBUG ``` ## Adding new tests @@ -415,18 +369,6 @@ call to `exists()`, `isFile()`, etc. on a failure. Using `org.apache.hadoop.fs.contract.ContractTestUtils` to make assertions about the state of a filesystem helps here. -*Isolating Scale tests*. Any test doing large amounts of IO MUST extend the -class `AbstractAzureScaleTest`, so only running if `scale` is defined on a build, -supporting test timeouts configurable by the user. Scale tests should also -support configurability as to the actual size of objects/number of operations, -so that behavior at different scale can be verified. - -*Designed for parallel execution*. A key need here is for each test suite to work -on isolated parts of the filesystem. Subclasses of `AbstractWasbTestBase` -SHOULD use the `path()`, `methodpath()` and `blobpath()` methods, -to build isolated paths. Tests MUST NOT assume that they have exclusive access -to a bucket. - *Extending existing tests where appropriate*. This recommendation goes against normal testing best practise of "test one thing per method". Because it is so slow to create directory trees or upload large files, we do @@ -453,31 +395,8 @@ is critical. There are a set of base classes which should be extended for Azure tests and integration tests. -##### `org.apache.hadoop.fs.azure.AbstractWasbTestWithTimeout` - -This extends the junit `Assert` class with thread names and timeouts, -the default timeout being set in `AzureTestConstants.AZURE_TEST_TIMEOUT` to -ten minutes. The thread names are set to aid analyzing the stack trace of -a test: a `jstack` call can be used to - -##### `org.apache.hadoop.fs.azure.AbstractWasbTestBase` - -The base class for tests which use `AzureBlobStorageTestAccount` to create -mock or live Azure clients; in test teardown it tries to clean up store state. - -1. This class requires subclasses to implement `createTestAccount()` to create -a mock or real test account. - -1. The configuration used to create a test account *should* be that from -`createConfiguration()`; this can be extended in subclasses to tune the settings. - - -##### `org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest` - -This extends `AbstractWasbTestBase` for scale tests; those test which -only run when `-Dscale` is used to select the "scale" profile. -These tests have a timeout of 30 minutes, so as to support slow test runs. - +##### `org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest` +This is the base class for all ABFS integration tests. Having shared base classes help reduces future maintenance. Please use them. @@ -491,31 +410,17 @@ not provide meaningful logs or assertion messages precisely to avoid this. This means efficient in test setup/teardown, and, ideally, making use of existing public datasets to save setup time and tester cost. - -The reference example is `ITestAzureHugeFiles`:. This marks the test suite as -`@FixMethodOrder(MethodSorters.NAME_ASCENDING)` then orders the test cases such -that each test case expects the previous test to have completed (here: uploaded a file, -renamed a file, ...). This provides for independent tests in the reports, yet still -permits an ordered sequence of operations. Do note the use of `Assume.assume()` -to detect when the preconditions for a single test case are not met, hence, -the tests become skipped, rather than fail with a trace which is really a false alarm. - - ### Works Over Long-haul Links -As well as making file size and operation counts scaleable, this includes -making test timeouts adequate. The Scale tests make this configurable; it's -hard coded to ten minutes in `AbstractAzureIntegrationTest()`; subclasses can -change this by overriding `getTestTimeoutMillis()`. +As well as making file size and operation counts scalable, this includes +making test timeouts adequate. -Equally importantly: support proxies, as some testers need them. +Equally, importantly: support proxies, as some testers need them. ### Provides Diagnostics and timing information 1. Create logs, log things. -1. you can use `AbstractWasbTestBase.describe(format-string, args)` here; it -adds some newlines so as to be easier to spot. 1. Use `ContractTestUtils.NanoTimer` to measure the duration of operations, and log the output. @@ -535,7 +440,7 @@ including error messages*. Keeps costs down. -1. Do not only cleanup if a test case completes successfully; test suite +1. Do not only clean up if a test case completes successfully; test suite teardown must do it. 1. That teardown code must check for the filesystem and other fields being null before the cleanup. Why? If test setup fails, the teardown methods still @@ -550,7 +455,7 @@ We really appreciate this — you will too. ### How to keep your credentials really safe -Although the `auth-keys.xml` file is marged as ignored in git and subversion, +Although the `auth-keys.xml` file is marked as ignored in git and subversion, it is still in your source tree, and there's always that risk that it may creep out. @@ -568,7 +473,7 @@ using an absolute XInclude reference to it. ### Cleaning up Containers -The Azure tests create containers with the prefix `"wasbtests-"` and delete +The Azure tests create containers with the prefix `"abfs-testcontainer-"` and delete them after the test runs. If a test run is interrupted, these containers may not get deleted. There is a special test case which can be manually invoked to list and delete these, `CleanupTestContainers` @@ -581,17 +486,6 @@ This will delete the containers; the output log of the test run will provide the details and summary of the operation. -# Testing the Azure ABFS Client - -Azure Data Lake Storage Gen 2 (ADLS Gen 2) is a set of capabilities dedicated to -big data analytics, built on top of Azure Blob Storage. The ABFS and ABFSS -schemes target the ADLS Gen 2 REST API, and the WASB and WASBS schemes target -the Azure Blob Storage REST API. ADLS Gen 2 offers better performance and -scalability. ADLS Gen 2 also offers authentication and authorization compatible -with the Hadoop Distributed File System permissions model when hierarchical -namespace is enabled for the storage account. Furthermore, the metadata and data -produced by ADLS Gen 2 REST API can be consumed by Blob REST API, and vice versa. - ## Generating test run configurations and test triggers over various config combinations To simplify the testing across various authentication and features combinations diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md b/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md index 270fd14da4c44..571d3e8cdce00 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/wasb.md @@ -12,17 +12,19 @@ limitations under the License. See accompanying LICENSE file. --> -# Hadoop Azure Support: WASB Driver +# Deprecated WASB Driver + +### Note: WASB Driver is removed and won't be a part of official hadoop releases starting from hadoop-3.5.0 ## Introduction -WASB Driver is a legacy Hadoop File System driver that was developed to support +WASB Driver was a legacy Hadoop File System driver that was developed to support [FNS(FlatNameSpace) Azure Storage accounts](https://learn.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction) that do not honor File-Folder syntax. -HDFS Folder operations hence are mimicked at client side by WASB driver and -certain folder operations like Rename and Delete can lead to a lot of IOPs with +HDFS Folder operations hence were mimicked at client side by WASB driver and +certain folder operations like Rename and Delete could lead to a lot of IOPs with client-side enumeration and orchestration of rename/delete operation blob by blob. It was not ideal for other APIs too as initial checks for path is a file or folder -needs to be done over multiple metadata calls. These led to a degraded performance. +needed to be done over multiple metadata calls. These led to a degraded performance. To provide better service to Analytics users, Microsoft released [ADLS Gen2](https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction) which are HNS (Hierarchical Namespace) enabled, i.e. File-Folder aware storage accounts. @@ -40,19 +42,19 @@ transition situation. workloads to migrate to the ABFS driver, which is available only on HNS enabled accounts in a fully tested and supported scenario. -## Deprecation plans for WASB Driver -We are introducing a new feature that will enable the ABFS driver to support +## Deprecation of WASB Driver +We have introduced a new feature that will enable the [ABFS](./fns_blob.html) driver to support FNS accounts (over BlobEndpoint that WASB Driver uses) using the ABFS scheme. This feature will enable us to use the ABFS driver to interact with data stored in GPv2 (General Purpose v2) storage accounts. -With this feature, the users who still use the legacy WASB driver will be able -to migrate to the ABFS driver without much re-work on their workloads. They will +With this feature, the WASB users are now required to migrate to the ABFS driver +without much re-work on their workloads. They will however need to change the URIs from the WASB scheme to the ABFS scheme. +Refer to [Wasb To Abfs Migration Guide](./wasbToAbfsMigration.html) for more details. -Once ABFS driver has built FNS support capability to migrate WASB users, WASB -driver will be marked for removal in next major release. This will remove any ambiguity -for new users onboards as there will be only one Microsoft driver for Azure Storage +With removal of WASB Driver our aim is to remove any ambiguity +for new user onboards as there will be only one Microsoft driver for Azure Storage and migrating users will get SLA bound support for driver and service, which was not guaranteed over WASB. @@ -61,7 +63,7 @@ move to HNS enabled accounts with the ABFS driver, which is our recommended stac for big data analytics on ADLS Gen2. ### Impact for existing ABFS users using ADLS Gen2 (HNS enabled account) -This feature does not impact the existing users who are using ADLS Gen2 Accounts +Removal of WASB Driver does not impact the existing users who are using ADLS Gen2 Accounts (HNS enabled account) with ABFS driver. They do not need to make any changes to their workloads or configurations. They @@ -76,7 +78,7 @@ users to transition to a supported scenario immediately, while they plan to ultimately move to ADLS Gen2 (HNS enabled account). ### New Authentication Options for a migrating user -Below auth types that WASB provides will continue to work on the new FNS over +Below auth types that WASB provided will continue to work on the new FNS over ABFS Driver over configuration that accepts these SAS types (similar to WASB): 1. SharedKey 2. Account SAS diff --git a/hadoop-tools/hadoop-azure/src/site/markdown/wasbToAbfsMigration.md b/hadoop-tools/hadoop-azure/src/site/markdown/wasbToAbfsMigration.md index 87bb56083a6fd..9613696ce2cf0 100644 --- a/hadoop-tools/hadoop-azure/src/site/markdown/wasbToAbfsMigration.md +++ b/hadoop-tools/hadoop-azure/src/site/markdown/wasbToAbfsMigration.md @@ -21,13 +21,13 @@ for the same. ## Introduction ABFS driver has now built support for -FNS accounts (over BlobEndpoint that WASB Driver uses) using the ABFS scheme. +FNS accounts (over BlobEndpoint that WASB Driver used to have) using the ABFS scheme. Refer to: [ABFS Driver for Namespace Disabled Accounts](./fns_blob.html) for more details. -The legacy WASB driver has been **deprecated** and is no longer recommended for -use. Refer to: [WASB Deprecation](./wasb.html) for more details. +The legacy WASB driver has been **removed** and is no longer part of official hadoop releases. +Refer to: [WASB Deprecation](./wasb.html) for more details. It's highly recommended for current WASB Driver users to migrate to ABFS driver, -the only Microsoft driver for Azure Storage. +the only Hadoop driver for Azure Storage from Microsoft. Microsoft recommends all Big Data and Analytics users to use Azure Data Lake Gen2 (ADLS Gen2) using the ABFS driver. It is thus preferred to diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java deleted file mode 100644 index d963b14d5a01b..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestBase.java +++ /dev/null @@ -1,175 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import org.apache.hadoop.io.IOUtils; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.*; - -/** - * Abstract test class that provides basic setup and teardown of testing Azure - * Storage account. Each subclass defines a different set of test cases to run - * and overrides {@link #createTestAccount()} to set up the testing account used - * to run those tests. The returned account might integrate with Azure Storage - * directly or it might be a mock implementation. - */ -public abstract class AbstractWasbTestBase extends AbstractWasbTestWithTimeout - implements AzureTestConstants { - - protected static final Logger LOG = - LoggerFactory.getLogger(AbstractWasbTestBase.class); - - protected NativeAzureFileSystem fs; - protected AzureBlobStorageTestAccount testAccount; - - @BeforeEach - public void setUp() throws Exception { - AzureBlobStorageTestAccount account = createTestAccount(); - assumeNotNull(account, "test account"); - bindToTestAccount(account); - } - - @AfterEach - public void tearDown() throws Exception { - describe("closing test account and filesystem"); - testAccount = cleanupTestAccount(testAccount); - IOUtils.closeStream(fs); - fs = null; - } - - /** - * Create the configuration to use when creating a test account. - * Subclasses can override this to tune the test account configuration. - * @return a configuration. - */ - public Configuration createConfiguration() { - return AzureBlobStorageTestAccount.createTestConfiguration(); - } - - /** - * Create the test account. - * Subclasses must implement this. - * @return the test account. - * @throws Exception - */ - protected abstract AzureBlobStorageTestAccount createTestAccount() - throws Exception; - - /** - * Get the test account. - * @return the current test account. - */ - protected AzureBlobStorageTestAccount getTestAccount() { - return testAccount; - } - - /** - * Get the filesystem - * @return the current filesystem. - */ - protected NativeAzureFileSystem getFileSystem() { - return fs; - } - - /** - * Get the configuration used to create the filesystem - * @return the configuration of the test FS - */ - protected Configuration getConfiguration() { - return getFileSystem().getConf(); - } - - /** - * Bind to a new test account; closing any existing one. - * This updates the test account returned in {@link #getTestAccount()} - * and the filesystem in {@link #getFileSystem()}. - * @param account new test account - */ - protected void bindToTestAccount(AzureBlobStorageTestAccount account) { - // clean any existing test account - cleanupTestAccount(testAccount); - IOUtils.closeStream(fs); - testAccount = account; - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - } - - /** - * Return a path to a blob which will be unique for this fork. - * @param filepath filepath - * @return a path under the default blob directory - * @throws IOException - */ - protected Path blobPath(String filepath) throws IOException { - return blobPathForTests(getFileSystem(), filepath); - } - - /** - * Create a path under the test path provided by - * the FS contract. - * @param filepath path string in - * @return a path qualified by the test filesystem - * @throws IOException IO problems - */ - protected Path path(String filepath) throws IOException { - return pathForTests(getFileSystem(), filepath); - } - - /** - * Return a path bonded to this method name, unique to this fork during - * parallel execution. - * @return a method name unique to (fork, method). - * @throws IOException IO problems - */ - protected Path methodPath() throws IOException { - return path(methodName.getMethodName()); - } - - /** - * Return a blob path bonded to this method name, unique to this fork during - * parallel execution. - * @return a method name unique to (fork, method). - * @throws IOException IO problems - */ - protected Path methodBlobPath() throws IOException { - return blobPath(methodName.getMethodName()); - } - - /** - * Describe a test in the logs. - * @param text text to print - * @param args arguments to format in the printing - */ - protected void describe(String text, Object... args) { - LOG.info("\n\n{}: {}\n", - methodName.getMethodName(), - String.format(text, args)); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestWithTimeout.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestWithTimeout.java deleted file mode 100644 index d6624e8c0c178..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AbstractWasbTestWithTimeout.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Timeout; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import org.junit.jupiter.api.extension.RegisterExtension; -import org.apache.hadoop.test.TestName; - -import java.util.concurrent.TimeUnit; - -import static org.assertj.core.api.Assumptions.assumeThat; - -/** - * Base class for any Wasb test with timeouts & named threads. - * This class does not attempt to bind to Azure. - */ -@Timeout(value = AzureTestConstants.AZURE_TEST_TIMEOUT, unit = TimeUnit.MILLISECONDS) -public class AbstractWasbTestWithTimeout extends Assertions { - - /** - * The name of the current method. - */ - @RegisterExtension - public TestName methodName = new TestName(); - - /** - * Name the junit thread for the class. This will overridden - * before the individual test methods are run. - */ - @BeforeAll - public static void nameTestThread() { - Thread.currentThread().setName("JUnit"); - } - - /** - * Name the thread to the current test method. - */ - @BeforeEach - public void nameThread() { - Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); - } - - /** - * Override point: the test timeout in milliseconds. - * @return a timeout in milliseconds - */ - protected int getTestTimeoutMillis() { - return AzureTestConstants.AZURE_TEST_TIMEOUT; - } - - public static void assumeNotNull(Object objects) { - assumeThat(objects).isNotNull(); - } - - public static void assumeNotNull(Object objects, String message) { - assumeThat(objects).as(message).isNotNull(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java deleted file mode 100644 index c5f6cb762a337..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/AzureBlobStorageTestAccount.java +++ /dev/null @@ -1,947 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - **/ - -package org.apache.hadoop.fs.azure; - -import com.microsoft.azure.storage.*; -import com.microsoft.azure.storage.blob.*; -import com.microsoft.azure.storage.core.Base64; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.commons.configuration2.SubsetConfiguration; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemMetricsSystem; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.metrics2.AbstractMetric; -import org.apache.hadoop.metrics2.MetricsRecord; -import org.apache.hadoop.metrics2.MetricsSink; -import org.apache.hadoop.metrics2.MetricsTag; -import org.apache.hadoop.metrics2.impl.TestMetricsConfig; - -import java.io.File; -import java.io.IOException; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.*; -import java.util.concurrent.ConcurrentLinkedQueue; - -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.DEFAULT_STORAGE_EMULATOR_ACCOUNT_NAME; -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_USE_LOCAL_SAS_KEY_MODE; -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_USE_SECURE_MODE; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.verifyWasbAccountNameInConfig; -import static org.junit.jupiter.api.Assertions.assertNotNull; - -/** - * Helper class to create WASB file systems backed by either a mock in-memory - * implementation or a real Azure Storage account. - */ -public final class AzureBlobStorageTestAccount implements AutoCloseable, - AzureTestConstants { - private static final Logger LOG = LoggerFactory.getLogger( - AzureBlobStorageTestAccount.class); - - private static final String SAS_PROPERTY_NAME = "fs.azure.sas."; - private static final String TEST_CONFIGURATION_FILE_NAME = "azure-test.xml"; - public static final String ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key."; - public static final String TEST_ACCOUNT_NAME_PROPERTY_NAME = "fs.azure.account.name"; - public static final String WASB_TEST_ACCOUNT_NAME_WITH_DOMAIN = "fs.azure.wasb.account.name"; - public static final String MOCK_ACCOUNT_NAME = "mockAccount-c01112a3-2a23-433e-af2a-e808ea385136.blob.core.windows.net"; - public static final String WASB_ACCOUNT_NAME_DOMAIN_SUFFIX = ".blob.core.windows.net"; - public static final String WASB_ACCOUNT_NAME_DOMAIN_SUFFIX_REGEX = "\\.blob(\\.preprod)?\\.core\\.windows\\.net"; - public static final String MOCK_CONTAINER_NAME = "mockContainer"; - public static final String WASB_AUTHORITY_DELIMITER = "@"; - public static final String WASB_SCHEME = "wasb"; - public static final String PATH_DELIMITER = "/"; - public static final String AZURE_ROOT_CONTAINER = "$root"; - public static final String MOCK_WASB_URI = "wasb://" + MOCK_CONTAINER_NAME - + WASB_AUTHORITY_DELIMITER + MOCK_ACCOUNT_NAME + "/"; - private static final String USE_EMULATOR_PROPERTY_NAME = "fs.azure.test.emulator"; - - private static final String KEY_DISABLE_THROTTLING = "fs.azure.disable.bandwidth.throttling"; - private static final String KEY_READ_TOLERATE_CONCURRENT_APPEND = "fs.azure.io.read.tolerate.concurrent.append"; - public static final String DEFAULT_PAGE_BLOB_DIRECTORY = "pageBlobs"; - public static final String DEFAULT_ATOMIC_RENAME_DIRECTORIES = "/atomicRenameDir1,/atomicRenameDir2"; - - private CloudStorageAccount account; - private CloudBlobContainer container; - private CloudBlockBlob blob; - private NativeAzureFileSystem fs; - private AzureNativeFileSystemStore storage; - private MockStorageInterface mockStorage; - private String pageBlobDirectory; - private static final ConcurrentLinkedQueue allMetrics = - new ConcurrentLinkedQueue(); - private static boolean metricsConfigSaved = false; - private boolean skipContainerDelete = false; - - private AzureBlobStorageTestAccount(NativeAzureFileSystem fs, - CloudStorageAccount account, - CloudBlobContainer container) { - this(fs, account, container, false); - } - - private AzureBlobStorageTestAccount(NativeAzureFileSystem fs, - CloudStorageAccount account, - CloudBlobContainer container, - boolean skipContainerDelete) { - this.account = account; - this.container = container; - this.fs = fs; - this.skipContainerDelete = skipContainerDelete; - } - - /** - * Create a test account with an initialized storage reference. - * - * @param storage - * -- store to be accessed by the account - * @param account - * -- Windows Azure account object - * @param container - * -- Windows Azure container object - */ - private AzureBlobStorageTestAccount(AzureNativeFileSystemStore storage, - CloudStorageAccount account, CloudBlobContainer container) { - this.account = account; - this.container = container; - this.storage = storage; - } - - /** - * Create a test account sessions with the default root container. - * - * @param fs - * - file system, namely WASB file system - * @param account - * - Windows Azure account object - * @param blob - * - block blob reference - */ - private AzureBlobStorageTestAccount(NativeAzureFileSystem fs, - CloudStorageAccount account, CloudBlockBlob blob) { - - this.account = account; - this.blob = blob; - this.fs = fs; - } - - private AzureBlobStorageTestAccount(NativeAzureFileSystem fs, - MockStorageInterface mockStorage) { - this.fs = fs; - this.mockStorage = mockStorage; - } - - private static void addRecord(MetricsRecord record) { - allMetrics.add(record); - } - - public static String getMockContainerUri() { - return String.format("http://%s/%s", - AzureBlobStorageTestAccount.MOCK_ACCOUNT_NAME, - AzureBlobStorageTestAccount.MOCK_CONTAINER_NAME); - } - - public static String toMockUri(String path) { - return String.format("http://%s/%s/%s", - AzureBlobStorageTestAccount.MOCK_ACCOUNT_NAME, - AzureBlobStorageTestAccount.MOCK_CONTAINER_NAME, path); - } - - public static String toMockUri(Path path) { - // Remove the first SEPARATOR - return toMockUri(path.toUri().getRawPath().substring(1)); - } - - public static Path pageBlobPath() { - return new Path("/" + DEFAULT_PAGE_BLOB_DIRECTORY); - } - - @Deprecated - public static Path pageBlobPath(String fileName) { - return new Path(pageBlobPath(), fileName); - } - - public Number getLatestMetricValue(String metricName, Number defaultValue) - throws IndexOutOfBoundsException{ - boolean found = false; - Number ret = null; - for (MetricsRecord currentRecord : allMetrics) { - // First check if this record is coming for my file system. - if (wasGeneratedByMe(currentRecord)) { - for (AbstractMetric currentMetric : currentRecord.metrics()) { - if (currentMetric.name().equalsIgnoreCase(metricName)) { - found = true; - ret = currentMetric.value(); - break; - } - } - } - } - if (!found) { - if (defaultValue != null) { - return defaultValue; - } - throw new IndexOutOfBoundsException(metricName); - } - return ret; - } - - /** - * Checks if the given record was generated by my WASB file system instance. - * @param currentRecord The metrics record to check. - * @return - */ - private boolean wasGeneratedByMe(MetricsRecord currentRecord) { - assertNotNull(fs, "null filesystem"); - assertNotNull(fs.getInstrumentation().getFileSystemInstanceId(), - "null filesystemn instance ID"); - String myFsId = fs.getInstrumentation().getFileSystemInstanceId().toString(); - for (MetricsTag currentTag : currentRecord.tags()) { - if (currentTag.name().equalsIgnoreCase("wasbFileSystemId")) { - return currentTag.value().equals(myFsId); - } - } - return false; - } - - - /** - * Gets the blob reference to the given blob key. - * - * @param blobKey - * The blob key (no initial slash). - * @return The blob reference. - */ - public CloudBlockBlob getBlobReference(String blobKey) - throws Exception { - return container.getBlockBlobReference( - String.format(blobKey)); - } - - /** - * Acquires a short lease on the given blob in this test account. - * - * @param blobKey - * The key to the blob (no initial slash). - * @return The lease ID. - */ - public String acquireShortLease(String blobKey) throws Exception { - return getBlobReference(blobKey).acquireLease(60, null); - } - - /** - * Releases the lease on the container. - * - * @param leaseID - * The lease ID. - */ - public void releaseLease(String leaseID, String blobKey) throws Exception { - AccessCondition accessCondition = new AccessCondition(); - accessCondition.setLeaseID(leaseID); - getBlobReference(blobKey).releaseLease(accessCondition); - } - - private static void saveMetricsConfigFile() throws IOException { - if (!metricsConfigSaved) { - String testFilename = TestMetricsConfig.getTestFilename( - "hadoop-metrics2-azure-file-system"); - File dest = new File(testFilename).getCanonicalFile(); - dest.getParentFile().mkdirs(); - new org.apache.hadoop.metrics2.impl.ConfigBuilder() - .add("azure-file-system.sink.azuretestcollector.class", - StandardCollector.class.getName()) - .save(testFilename); - metricsConfigSaved = true; - } - } - - public static AzureBlobStorageTestAccount createMock() throws Exception { - return createMock(new Configuration()); - } - - public static AzureBlobStorageTestAccount createMock(Configuration conf) throws Exception { - saveMetricsConfigFile(); - configurePageBlobDir(conf); - configureAtomicRenameDir(conf); - AzureNativeFileSystemStore store = new AzureNativeFileSystemStore(); - MockStorageInterface mockStorage = new MockStorageInterface(); - store.setAzureStorageInteractionLayer(mockStorage); - NativeAzureFileSystem fs = new NativeAzureFileSystem(store); - setMockAccountKey(conf); - configureSecureModeTestSettings(conf); - // register the fs provider. - - fs.initialize(new URI(MOCK_WASB_URI), conf); - AzureBlobStorageTestAccount testAcct = - new AzureBlobStorageTestAccount(fs, mockStorage); - return testAcct; - } - - /** - * Set the page blob directories configuration to the default if it is not - * already set. Some tests may set it differently (e.g. the page blob - * tests in TestNativeAzureFSPageBlobLive). - * @param conf The configuration to conditionally update. - */ - private static void configurePageBlobDir(Configuration conf) { - if (conf.get(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES) == null) { - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, - "/" + DEFAULT_PAGE_BLOB_DIRECTORY); - } - } - - /** Do the same for the atomic rename directories configuration */ - private static void configureAtomicRenameDir(Configuration conf) { - if (conf.get(AzureNativeFileSystemStore.KEY_ATOMIC_RENAME_DIRECTORIES) == null) { - conf.set(AzureNativeFileSystemStore.KEY_ATOMIC_RENAME_DIRECTORIES, - DEFAULT_ATOMIC_RENAME_DIRECTORIES); - } - } - - /** - * Creates a test account that goes against the storage emulator. - * - * @return The test account, or null if the emulator isn't setup. - */ - public static AzureBlobStorageTestAccount createForEmulator() - throws Exception { - saveMetricsConfigFile(); - NativeAzureFileSystem fs = null; - CloudBlobContainer container = null; - Configuration conf = createTestConfiguration(); - if (!conf.getBoolean(USE_EMULATOR_PROPERTY_NAME, false)) { - // Not configured to test against the storage emulator. - LOG.warn("Skipping emulator Azure test because configuration " - + "doesn't indicate that it's running."); - return null; - } - CloudStorageAccount account = - CloudStorageAccount.getDevelopmentStorageAccount(); - fs = new NativeAzureFileSystem(); - String containerName = String.format("wasbtests-%s-%tQ", - System.getProperty("user.name"), new Date()); - container = account.createCloudBlobClient().getContainerReference( - containerName); - container.create(); - - // Set account URI and initialize Azure file system. - URI accountUri = createAccountUri(DEFAULT_STORAGE_EMULATOR_ACCOUNT_NAME, - containerName); - configureSecureModeTestSettings(conf); - - fs.initialize(accountUri, conf); - - // Create test account initializing the appropriate member variables. - // - AzureBlobStorageTestAccount testAcct = - new AzureBlobStorageTestAccount(fs, account, container); - - return testAcct; - } - - public static AzureBlobStorageTestAccount createOutOfBandStore( - int uploadBlockSize, int downloadBlockSize) throws Exception { - return createOutOfBandStore(uploadBlockSize, downloadBlockSize, false); - } - - public static AzureBlobStorageTestAccount createOutOfBandStore( - int uploadBlockSize, int downloadBlockSize, boolean enableSecureMode) throws Exception { - - saveMetricsConfigFile(); - - CloudBlobContainer container = null; - Configuration conf = createTestConfiguration(); - CloudStorageAccount account = createTestAccount(conf); - if (null == account) { - return null; - } - - String containerName = String.format("wasbtests-%s-%tQ", - System.getProperty("user.name"), new Date()); - - // Create the container. - container = account.createCloudBlobClient().getContainerReference( - containerName); - container.create(); - - String accountName = verifyWasbAccountNameInConfig(conf); - - // Ensure that custom throttling is disabled and tolerate concurrent - // out-of-band appends. - conf.setBoolean(KEY_DISABLE_THROTTLING, true); - conf.setBoolean(KEY_READ_TOLERATE_CONCURRENT_APPEND, true); - conf.setBoolean(KEY_USE_SECURE_MODE, enableSecureMode); - configureSecureModeTestSettings(conf); - - // Set account URI and initialize Azure file system. - URI accountUri = createAccountUri(accountName, containerName); - - // Set up instrumentation. - // - AzureFileSystemMetricsSystem.fileSystemStarted(); - String sourceName = NativeAzureFileSystem.newMetricsSourceName(); - String sourceDesc = "Azure Storage Volume File System metrics"; - - AzureFileSystemInstrumentation instrumentation = new AzureFileSystemInstrumentation(conf); - - AzureFileSystemMetricsSystem.registerSource( - sourceName, sourceDesc, instrumentation); - - - // Create a new AzureNativeFileSystemStore object. - AzureNativeFileSystemStore testStorage = new AzureNativeFileSystemStore(); - - // Initialize the store with the throttling feedback interfaces. - testStorage.initialize(accountUri, conf, instrumentation); - - // Create test account initializing the appropriate member variables. - // - AzureBlobStorageTestAccount testAcct = - new AzureBlobStorageTestAccount(testStorage, account, container); - - return testAcct; - } - - /** - * Sets the mock account key in the given configuration. - * - * @param conf - * The configuration. - */ - public static void setMockAccountKey(Configuration conf) { - setMockAccountKey(conf, MOCK_ACCOUNT_NAME); - } - - /** - * Configure default values for Secure Mode testing. - * These values are relevant only when testing in Secure Mode. - * - * @param conf - * The configuration. - */ - public static void configureSecureModeTestSettings(Configuration conf) { - conf.set(KEY_USE_LOCAL_SAS_KEY_MODE, "true"); // always use local sas-key mode for testing - } - - /** - * Sets the mock account key in the given configuration. - * - * @param conf - * The configuration. - */ - public static void setMockAccountKey(Configuration conf, String accountName) { - conf.set(ACCOUNT_KEY_PROPERTY_NAME + accountName, - Base64.encode(new byte[] { 1, 2, 3 })); - } - - private static URI createAccountUri(String accountName) - throws URISyntaxException { - return new URI(WASB_SCHEME + ":" + PATH_DELIMITER + PATH_DELIMITER - + accountName); - } - - private static URI createAccountUri(String accountName, String containerName) - throws URISyntaxException { - return new URI(WASB_SCHEME + ":" + PATH_DELIMITER + PATH_DELIMITER - + containerName + WASB_AUTHORITY_DELIMITER + accountName); - } - - public static AzureBlobStorageTestAccount create() throws Exception { - return create(""); - } - - public static AzureBlobStorageTestAccount create(String containerNameSuffix) - throws Exception { - return create(containerNameSuffix, - EnumSet.of(CreateOptions.CreateContainer)); - } - - // Create a test account which uses throttling. - public static AzureBlobStorageTestAccount createThrottled() throws Exception { - return create("", - EnumSet.of(CreateOptions.useThrottling, CreateOptions.CreateContainer)); - } - - public static AzureBlobStorageTestAccount create(Configuration conf) - throws Exception { - return create("", EnumSet.of(CreateOptions.CreateContainer), conf); - } - - static CloudStorageAccount createStorageAccount(String accountName, - Configuration conf, boolean allowAnonymous) throws URISyntaxException, - KeyProviderException { - String accountKey = AzureNativeFileSystemStore - .getAccountKeyFromConfiguration(accountName, conf); - final StorageCredentials credentials; - if (accountKey == null) { - if (allowAnonymous) { - credentials = StorageCredentialsAnonymous.ANONYMOUS; - } else { - LOG.warn("Skipping live Azure test because of missing key for" - + " account '" + accountName + "'."); - return null; - } - } else { - credentials = new StorageCredentialsAccountAndKey( - accountName.split("\\.")[0], accountKey); - } - - return new CloudStorageAccount(credentials); - } - - public static Configuration createTestConfiguration() { - return createTestConfiguration(null); - } - - private static Configuration createTestConfiguration(Configuration conf) { - if (conf == null) { - conf = new Configuration(); - } - - conf.addResource(TEST_CONFIGURATION_FILE_NAME); - return conf; - } - - public static CloudStorageAccount createTestAccount() - throws URISyntaxException, KeyProviderException - { - return createTestAccount(createTestConfiguration()); - } - - static CloudStorageAccount createTestAccount(Configuration conf) - throws URISyntaxException, KeyProviderException { - AzureTestUtils.assumeNamespaceDisabled(conf); - - String testAccountName = verifyWasbAccountNameInConfig(conf); - if (testAccountName == null) { - LOG.warn("Skipping live Azure test because of missing test account"); - return null; - } - return createStorageAccount(testAccountName, conf, false); - } - - public static enum CreateOptions { - UseSas, Readonly, CreateContainer, useThrottling - } - - public static AzureBlobStorageTestAccount create(String containerNameSuffix, - EnumSet createOptions) throws Exception { - return create(containerNameSuffix, createOptions, null); - } - - public static AzureBlobStorageTestAccount create( - String containerNameSuffix, - EnumSet createOptions, - Configuration initialConfiguration) - throws Exception { - return create(containerNameSuffix, createOptions, initialConfiguration, false); - } - - public static AzureBlobStorageTestAccount create( - String containerNameSuffix, - EnumSet createOptions, - Configuration initialConfiguration, - boolean useContainerSuffixAsContainerName) - throws Exception { - saveMetricsConfigFile(); - NativeAzureFileSystem fs = null; - CloudBlobContainer container = null; - Configuration conf = createTestConfiguration(initialConfiguration); - configurePageBlobDir(conf); - configureAtomicRenameDir(conf); - CloudStorageAccount account = createTestAccount(conf); - if (account == null) { - return null; - } - fs = new NativeAzureFileSystem(); - String containerName = useContainerSuffixAsContainerName - ? containerNameSuffix - : String.format( - "wasbtests-%s-%s%s", - System.getProperty("user.name"), - UUID.randomUUID().toString(), - containerNameSuffix); - container = account.createCloudBlobClient().getContainerReference( - containerName); - if (createOptions.contains(CreateOptions.CreateContainer)) { - container.createIfNotExists(); - } - String accountName = verifyWasbAccountNameInConfig(conf); - if (createOptions.contains(CreateOptions.UseSas)) { - String sas = generateSAS(container, - createOptions.contains(CreateOptions.Readonly)); - if (!createOptions.contains(CreateOptions.CreateContainer)) { - // The caller doesn't want the container to be pre-created, - // so delete it now that we have generated the SAS. - container.delete(); - } - // Remove the account key from the configuration to make sure we don't - // cheat and use that. - // but only if not in secure mode, which requires that login - if (!conf.getBoolean(AzureNativeFileSystemStore.KEY_USE_SECURE_MODE, false)) { - conf.set(ACCOUNT_KEY_PROPERTY_NAME + accountName, ""); - } - // Set the SAS key. - conf.set(SAS_PROPERTY_NAME + containerName + "." + accountName, sas); - } - - // Check if throttling is turned on and set throttling parameters - // appropriately. - if (createOptions.contains(CreateOptions.useThrottling)) { - conf.setBoolean(KEY_DISABLE_THROTTLING, false); - } else { - conf.setBoolean(KEY_DISABLE_THROTTLING, true); - } - - configureSecureModeTestSettings(conf); - - // Set account URI and initialize Azure file system. - URI accountUri = createAccountUri(accountName, containerName); - fs.initialize(accountUri, conf); - - // Create test account initializing the appropriate member variables. - // - AzureBlobStorageTestAccount testAcct = - new AzureBlobStorageTestAccount(fs, account, container, - useContainerSuffixAsContainerName); - - return testAcct; - } - - private static String generateContainerName() throws Exception { - String containerName = - String.format ("wasbtests-%s-%tQ", - System.getProperty("user.name"), - new Date()); - return containerName; - } - - private static String generateSAS(CloudBlobContainer container, - boolean readonly) throws Exception { - - // Create a container if it does not exist. - container.createIfNotExists(); - - // Create a new shared access policy. - SharedAccessBlobPolicy sasPolicy = new SharedAccessBlobPolicy(); - - // Create a UTC Gregorian calendar value. - GregorianCalendar calendar = new GregorianCalendar( - TimeZone.getTimeZone("UTC")); - - // Specify the current time as the start time for the shared access - // signature. - // - calendar.setTime(new Date()); - sasPolicy.setSharedAccessStartTime(calendar.getTime()); - - // Use the start time delta one hour as the end time for the shared - // access signature. - calendar.add(Calendar.HOUR, 10); - sasPolicy.setSharedAccessExpiryTime(calendar.getTime()); - - if (readonly) { - // Set READ permissions - sasPolicy.setPermissions(EnumSet.of( - SharedAccessBlobPermissions.READ, - SharedAccessBlobPermissions.LIST)); - } else { - // Set READ and WRITE permissions. - // - sasPolicy.setPermissions(EnumSet.of( - SharedAccessBlobPermissions.READ, - SharedAccessBlobPermissions.WRITE, - SharedAccessBlobPermissions.LIST)); - } - - // Create the container permissions. - BlobContainerPermissions containerPermissions = new BlobContainerPermissions(); - - // Turn public access to the container off. - containerPermissions.setPublicAccess(BlobContainerPublicAccessType.OFF); - - container.uploadPermissions(containerPermissions); - - // Create a shared access signature for the container. - String sas = container.generateSharedAccessSignature(sasPolicy, null); - // HACK: when the just generated SAS is used straight away, we get an - // authorization error intermittently. Sleeping for 1.5 seconds fixes that - // on my box. - Thread.sleep(1500); - - // Return to caller with the shared access signature. - return sas; - } - - public static void primePublicContainer(CloudBlobClient blobClient, - String accountName, String containerName, String blobName, int fileSize) - throws Exception { - - // Create a container if it does not exist. The container name - // must be lower case. - CloudBlobContainer container = blobClient - .getContainerReference(containerName); - - container.createIfNotExists(); - - // Create a new shared access policy. - SharedAccessBlobPolicy sasPolicy = new SharedAccessBlobPolicy(); - - // Set READ and WRITE permissions. - // - sasPolicy.setPermissions(EnumSet.of( - SharedAccessBlobPermissions.READ, - SharedAccessBlobPermissions.WRITE, - SharedAccessBlobPermissions.LIST, - SharedAccessBlobPermissions.DELETE)); - - // Create the container permissions. - BlobContainerPermissions containerPermissions = new BlobContainerPermissions(); - - // Turn public access to the container off. - containerPermissions - .setPublicAccess(BlobContainerPublicAccessType.CONTAINER); - - // Set the policy using the values set above. - containerPermissions.getSharedAccessPolicies().put("testwasbpolicy", - sasPolicy); - container.uploadPermissions(containerPermissions); - - // Create a blob output stream. - CloudBlockBlob blob = container.getBlockBlobReference(blobName); - BlobOutputStream outputStream = blob.openOutputStream(); - - outputStream.write(new byte[fileSize]); - outputStream.close(); - } - - public static AzureBlobStorageTestAccount createAnonymous( - final String blobName, final int fileSize) throws Exception { - - NativeAzureFileSystem fs = null; - CloudBlobContainer container = null; - Configuration conf = createTestConfiguration(), noTestAccountConf = new Configuration(); - - // Set up a session with the cloud blob client to generate SAS and check the - // existence of a container and capture the container object. - CloudStorageAccount account = createTestAccount(conf); - if (account == null) { - return null; - } - CloudBlobClient blobClient = account.createCloudBlobClient(); - - // Capture the account URL and the account name. - String accountName = verifyWasbAccountNameInConfig(conf); - - configureSecureModeTestSettings(conf); - - // Generate a container name and create a shared access signature string for - // it. - // - String containerName = generateContainerName(); - - // Set up public container with the specified blob name. - primePublicContainer(blobClient, accountName, containerName, blobName, - fileSize); - - // Capture the blob container object. It should exist after generating the - // shared access signature. - container = blobClient.getContainerReference(containerName); - if (null == container || !container.exists()) { - final String errMsg = String - .format("Container '%s' expected but not found while creating SAS account."); - throw new Exception(errMsg); - } - - // Set the account URI. - URI accountUri = createAccountUri(accountName, containerName); - - // Initialize the Native Azure file system with anonymous credentials. - fs = new NativeAzureFileSystem(); - fs.initialize(accountUri, noTestAccountConf); - - // Create test account initializing the appropriate member variables. - AzureBlobStorageTestAccount testAcct = new AzureBlobStorageTestAccount(fs, - account, container); - - // Return to caller with test account. - return testAcct; - } - - private static CloudBlockBlob primeRootContainer(CloudBlobClient blobClient, - String accountName, String blobName, int fileSize) throws Exception { - - // Create a container if it does not exist. The container name - // must be lower case. - CloudBlobContainer container = blobClient.getContainerReference("https://" - + accountName + "/" + "$root"); - container.createIfNotExists(); - - // Create a blob output stream. - CloudBlockBlob blob = container.getBlockBlobReference(blobName); - BlobOutputStream outputStream = blob.openOutputStream(); - - outputStream.write(new byte[fileSize]); - outputStream.close(); - - // Return a reference to the block blob object. - return blob; - } - - public static AzureBlobStorageTestAccount createRoot(final String blobName, - final int fileSize) throws Exception { - - NativeAzureFileSystem fs = null; - CloudBlobContainer container = null; - Configuration conf = createTestConfiguration(); - - // Set up a session with the cloud blob client to generate SAS and check the - // existence of a container and capture the container object. - CloudStorageAccount account = createTestAccount(conf); - if (account == null) { - return null; - } - CloudBlobClient blobClient = account.createCloudBlobClient(); - - // Capture the account URL and the account name. - String accountName = verifyWasbAccountNameInConfig(conf); - - configureSecureModeTestSettings(conf); - - // Set up public container with the specified blob name. - CloudBlockBlob blobRoot = primeRootContainer(blobClient, accountName, - blobName, fileSize); - - // Capture the blob container object. It should exist after generating the - // shared access signature. - container = blobClient.getContainerReference(AZURE_ROOT_CONTAINER); - if (null == container || !container.exists()) { - final String errMsg = String - .format("Container '%s' expected but not found while creating SAS account."); - throw new Exception(errMsg); - } - - // Set the account URI without a container name. - URI accountUri = createAccountUri(accountName); - - // Initialize the Native Azure file system with anonymous credentials. - fs = new NativeAzureFileSystem(); - fs.initialize(accountUri, conf); - - // Create test account initializing the appropriate member variables. - // Set the container value to null for the default root container. - // - AzureBlobStorageTestAccount testAcct = new AzureBlobStorageTestAccount( - fs, account, blobRoot); - - // Return to caller with test account. - return testAcct; - } - - public void closeFileSystem() throws Exception { - if (fs != null) { - fs.close(); - } - } - - public void cleanup() throws Exception { - if (fs != null) { - fs.close(); - fs = null; - } - if (!skipContainerDelete && container != null) { - container.deleteIfExists(); - container = null; - } - if (blob != null) { - // The blob member variable is set for blobs under root containers. - // Delete blob objects created for root container tests when cleaning - // up the test account. - blob.delete(); - blob = null; - } - } - - @Override - public void close() throws Exception { - cleanup(); - } - - public NativeAzureFileSystem getFileSystem() { - return fs; - } - - public AzureNativeFileSystemStore getStore() { - return this.storage; - } - - /** - * Gets the real blob container backing this account if it's not a mock. - * - * @return A container, or null if it's a mock. - */ - public CloudBlobContainer getRealContainer() { - return container; - } - - /** - * Gets the real blob account backing this account if it's not a mock. - * - * @return An account, or null if it's a mock. - */ - public CloudStorageAccount getRealAccount() { - return account; - } - - /** - * Gets the mock storage interface if this account is backed by a mock. - * - * @return The mock storage, or null if it's backed by a real account. - */ - public MockStorageInterface getMockStorage() { - return mockStorage; - } - - public static class StandardCollector implements MetricsSink { - @Override - public void init(SubsetConfiguration conf) { - } - - @Override - public void putMetrics(MetricsRecord record) { - addRecord(record); - } - - @Override - public void flush() { - } - } - - public void setPageBlobDirectory(String directory) { - this.pageBlobDirectory = directory; - } - - public String getPageBlobDirectory() { - return pageBlobDirectory; - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ExceptionHandlingTestHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ExceptionHandlingTestHelper.java deleted file mode 100644 index bea1c76d48e0d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ExceptionHandlingTestHelper.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.conf.Configuration; - -public class ExceptionHandlingTestHelper { - - /* - * Helper method to create a PageBlob test storage account. - */ - public static AzureBlobStorageTestAccount getPageBlobTestStorageAccount() - throws Exception { - - Configuration conf = new Configuration(); - - // Configure the page blob directories key so every file created is a page blob. - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); - - // Configure the atomic rename directories key so every folder will have - // atomic rename applied. - conf.set(AzureNativeFileSystemStore.KEY_ATOMIC_RENAME_DIRECTORIES, "/"); - return AzureBlobStorageTestAccount.create(conf); - } - - /* - * Helper method to create an empty file - */ - public static void createEmptyFile(AzureBlobStorageTestAccount testAccount, Path testPath) throws Exception { - FileSystem fs = testAccount.getFileSystem(); - FSDataOutputStream inputStream = fs.create(testPath); - inputStream.close(); - } - - /* - * Helper method to create an folder and files inside it. - */ - public static void createTestFolder(AzureBlobStorageTestAccount testAccount, Path testFolderPath) throws Exception { - FileSystem fs = testAccount.getFileSystem(); - fs.mkdirs(testFolderPath); - String testFolderFilePathBase = "test"; - - for (int i = 0; i < 10; i++) { - Path p = new Path(testFolderPath.toString() + "/" + testFolderFilePathBase + i + ".dat"); - fs.create(p).close(); - } - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIo.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIo.java deleted file mode 100644 index c008d64386bf8..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIo.java +++ /dev/null @@ -1,180 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; - -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.permission.PermissionStatus; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; - -/** - * Handle OOB IO into a shared container. - */ -public class ITestAzureConcurrentOutOfBandIo extends AbstractWasbTestBase { - - private static final Logger LOG = - LoggerFactory.getLogger(ITestAzureConcurrentOutOfBandIo.class); - - // Class constants. - static final int DOWNLOAD_BLOCK_SIZE = 8 * 1024 * 1024; - static final int UPLOAD_BLOCK_SIZE = 4 * 1024 * 1024; - static final int BLOB_SIZE = 32 * 1024 * 1024; - - // Number of blocks to be written before flush. - static final int NUMBER_OF_BLOCKS = 2; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.createOutOfBandStore( - UPLOAD_BLOCK_SIZE, DOWNLOAD_BLOCK_SIZE); - } - - class DataBlockWriter implements Runnable { - - Thread runner; - AzureBlobStorageTestAccount writerStorageAccount; - String key; - boolean done = false; - - /** - * Constructor captures the test account. - * - * @param testAccount - */ - public DataBlockWriter(AzureBlobStorageTestAccount testAccount, String key) { - writerStorageAccount = testAccount; - this.key = key; - } - - /** - * Start writing blocks to Azure storage. - */ - public void startWriting() { - runner = new SubjectInheritingThread(this); // Create the block writer thread. - runner.start(); // Start the block writer thread. - } - - /** - * Stop writing blocks to Azure storage. - */ - public void stopWriting() { - done = true; - } - - /** - * Implementation of the runnable interface. The run method is a tight loop - * which repeatedly updates the blob with a 4 MB block. - */ - public void run() { - byte[] dataBlockWrite = new byte[UPLOAD_BLOCK_SIZE]; - - OutputStream outputStream = null; - - try { - for (int i = 0; !done; i++) { - // Write two 4 MB blocks to the blob. - // - outputStream = writerStorageAccount.getStore().storefile( - key, - new PermissionStatus("", "", FsPermission.getDefault()), - key); - - Arrays.fill(dataBlockWrite, (byte) (i % 256)); - for (int j = 0; j < NUMBER_OF_BLOCKS; j++) { - outputStream.write(dataBlockWrite); - } - - outputStream.flush(); - outputStream.close(); - } - } catch (AzureException e) { - LOG.error("DatablockWriter thread encountered a storage exception." - + e.getMessage(), e); - } catch (IOException e) { - LOG.error("DatablockWriter thread encountered an I/O exception." - + e.getMessage(), e); - } - } - } - - @Test - public void testReadOOBWrites() throws Exception { - - byte[] dataBlockWrite = new byte[UPLOAD_BLOCK_SIZE]; - byte[] dataBlockRead = new byte[UPLOAD_BLOCK_SIZE]; - - // Write to blob to make sure it exists. - // - // Write five 4 MB blocks to the blob. To ensure there is data in the blob before - // reading. This eliminates the race between the reader and writer threads. - String key = "WASB_String" + AzureTestUtils.getForkID() + ".txt"; - OutputStream outputStream = testAccount.getStore().storefile( - key, - new PermissionStatus("", "", FsPermission.getDefault()), - key); - Arrays.fill(dataBlockWrite, (byte) 255); - for (int i = 0; i < NUMBER_OF_BLOCKS; i++) { - outputStream.write(dataBlockWrite); - } - - outputStream.flush(); - outputStream.close(); - - // Start writing blocks to Azure store using the DataBlockWriter thread. - DataBlockWriter writeBlockTask = new DataBlockWriter(testAccount, key); - writeBlockTask.startWriting(); - int count = 0; - - for (int i = 0; i < 5; i++) { - try(InputStream inputStream = testAccount.getStore().retrieve(key)) { - count = 0; - int c = 0; - - while (c >= 0) { - c = inputStream.read(dataBlockRead, 0, UPLOAD_BLOCK_SIZE); - if (c < 0) { - break; - } - - // Counting the number of bytes. - count += c; - } - } catch (IOException e) { - System.out.println(e.getCause().toString()); - e.printStackTrace(); - fail(); - } - } - - // Stop writing blocks. - writeBlockTask.stopWriting(); - - // Validate that a block was read. - assertEquals(NUMBER_OF_BLOCKS * UPLOAD_BLOCK_SIZE, count); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIoWithSecureMode.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIoWithSecureMode.java deleted file mode 100644 index 2b0ea56821c9e..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureConcurrentOutOfBandIoWithSecureMode.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -/** - * Extends ITestAzureConcurrentOutOfBandIo in order to run testReadOOBWrites with secure mode - * (fs.azure.secure.mode) both enabled and disabled. - */ -public class ITestAzureConcurrentOutOfBandIoWithSecureMode - extends ITestAzureConcurrentOutOfBandIo { - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.createOutOfBandStore( - UPLOAD_BLOCK_SIZE, DOWNLOAD_BLOCK_SIZE, true); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureFileSystemErrorConditions.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureFileSystemErrorConditions.java deleted file mode 100644 index a4baab22dad04..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestAzureFileSystemErrorConditions.java +++ /dev/null @@ -1,242 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.HttpURLConnection; -import java.net.URI; -import java.util.Arrays; -import java.util.HashMap; -import java.util.concurrent.Callable; - -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.StorageEvent; -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.TestHookOperationContext; -import org.apache.hadoop.test.GenericTestUtils; - -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.NO_ACCESS_TO_CONTAINER_MSG; -import static org.apache.hadoop.test.LambdaTestUtils.intercept; - -/** - * Error handling. - */ -public class ITestAzureFileSystemErrorConditions extends - AbstractWasbTestWithTimeout { - private static final int ALL_THREE_FILE_SIZE = 1024; - - @Test - public void testNoInitialize() throws Exception { - intercept(AssertionError.class, - new Callable() { - @Override - public FileMetadata call() throws Exception { - return new AzureNativeFileSystemStore() - .retrieveMetadata("foo"); - } - }); - } - - /** - * Try accessing an unauthorized or non-existent (treated the same) container - * from WASB. - */ - @Test - public void testAccessUnauthorizedPublicContainer() throws Exception { - final String container = "nonExistentContainer"; - final String account = "hopefullyNonExistentAccount"; - Path noAccessPath = new Path( - "wasb://" + container + "@" + account + "/someFile"); - NativeAzureFileSystem.suppressRetryPolicy(); - try { - FileSystem.get(noAccessPath.toUri(), new Configuration()) - .open(noAccessPath); - assertTrue(false, "Should've thrown."); - } catch (AzureException ex) { - GenericTestUtils.assertExceptionContains( - String.format(NO_ACCESS_TO_CONTAINER_MSG, account, container), ex); - } finally { - NativeAzureFileSystem.resumeRetryPolicy(); - } - } - - @Test - public void testAccessContainerWithWrongVersion() throws Exception { - AzureNativeFileSystemStore store = new AzureNativeFileSystemStore(); - MockStorageInterface mockStorage = new MockStorageInterface(); - store.setAzureStorageInteractionLayer(mockStorage); - try (FileSystem fs = new NativeAzureFileSystem(store)) { - Configuration conf = new Configuration(); - AzureBlobStorageTestAccount.setMockAccountKey(conf); - HashMap metadata = new HashMap(); - metadata.put(AzureNativeFileSystemStore.VERSION_METADATA_KEY, - "2090-04-05"); // It's from the future! - mockStorage.addPreExistingContainer( - AzureBlobStorageTestAccount.getMockContainerUri(), metadata); - - AzureException ex = intercept(AzureException.class, - new Callable() { - @Override - public FileStatus[] call() throws Exception { - fs.initialize(new URI(AzureBlobStorageTestAccount.MOCK_WASB_URI), - conf); - return fs.listStatus(new Path("/")); - } - }); - GenericTestUtils.assertExceptionContains( - "unsupported version: 2090-04-05.", ex); - } - } - - private interface ConnectionRecognizer { - boolean isTargetConnection(HttpURLConnection connection); - } - - private class TransientErrorInjector extends StorageEvent { - private final ConnectionRecognizer connectionRecognizer; - private boolean injectedErrorOnce = false; - - public TransientErrorInjector(ConnectionRecognizer connectionRecognizer) { - this.connectionRecognizer = connectionRecognizer; - } - - @Override - public void eventOccurred(SendingRequestEvent eventArg) { - HttpURLConnection connection - = (HttpURLConnection) eventArg.getConnectionObject(); - if (!connectionRecognizer.isTargetConnection(connection)) { - return; - } - if (!injectedErrorOnce) { - connection.setReadTimeout(1); - connection.disconnect(); - injectedErrorOnce = true; - } - } - } - - private void injectTransientError(NativeAzureFileSystem fs, - final ConnectionRecognizer connectionRecognizer) { - fs.getStore().addTestHookToOperationContext(new TestHookOperationContext() { - @Override - public OperationContext modifyOperationContext(OperationContext original) { - original.getSendingRequestEventHandler().addListener( - new TransientErrorInjector(connectionRecognizer)); - return original; - } - }); - } - - @Test - public void testTransientErrorOnDelete() throws Exception { - // Need to do this test against a live storage account - AzureBlobStorageTestAccount testAccount = - AzureBlobStorageTestAccount.create(); - assumeNotNull(testAccount); - try { - NativeAzureFileSystem fs = testAccount.getFileSystem(); - injectTransientError(fs, new ConnectionRecognizer() { - @Override - public boolean isTargetConnection(HttpURLConnection connection) { - return connection.getRequestMethod().equals("DELETE"); - } - }); - Path testFile = new Path("/a/b"); - assertTrue(fs.createNewFile(testFile)); - assertTrue(fs.rename(testFile, new Path("/x"))); - } finally { - testAccount.cleanup(); - } - } - - private void writeAllThreeFile(NativeAzureFileSystem fs, Path testFile) - throws IOException { - byte[] buffer = new byte[ALL_THREE_FILE_SIZE]; - Arrays.fill(buffer, (byte) 3); - try(OutputStream stream = fs.create(testFile)) { - stream.write(buffer); - } - } - - private void readAllThreeFile(NativeAzureFileSystem fs, Path testFile) - throws IOException { - byte[] buffer = new byte[ALL_THREE_FILE_SIZE]; - InputStream inStream = fs.open(testFile); - assertEquals(buffer.length, - inStream.read(buffer, 0, buffer.length)); - inStream.close(); - for (int i = 0; i < buffer.length; i++) { - assertEquals(3, buffer[i]); - } - } - - @Test - public void testTransientErrorOnCommitBlockList() throws Exception { - // Need to do this test against a live storage account - AzureBlobStorageTestAccount testAccount = - AzureBlobStorageTestAccount.create(); - assumeNotNull(testAccount); - try { - NativeAzureFileSystem fs = testAccount.getFileSystem(); - injectTransientError(fs, new ConnectionRecognizer() { - @Override - public boolean isTargetConnection(HttpURLConnection connection) { - return connection.getRequestMethod().equals("PUT") - && connection.getURL().getQuery() != null - && connection.getURL().getQuery().contains("blocklist"); - } - }); - Path testFile = new Path("/a/b"); - writeAllThreeFile(fs, testFile); - readAllThreeFile(fs, testFile); - } finally { - testAccount.cleanup(); - } - } - - @Test - public void testTransientErrorOnRead() throws Exception { - // Need to do this test against a live storage account - AzureBlobStorageTestAccount testAccount = - AzureBlobStorageTestAccount.create(); - assumeNotNull(testAccount); - try { - NativeAzureFileSystem fs = testAccount.getFileSystem(); - Path testFile = new Path("/a/b"); - writeAllThreeFile(fs, testFile); - injectTransientError(fs, new ConnectionRecognizer() { - @Override - public boolean isTargetConnection(HttpURLConnection connection) { - return connection.getRequestMethod().equals("GET"); - } - }); - readAllThreeFile(fs, testFile); - } finally { - testAccount.cleanup(); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobDataValidation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobDataValidation.java deleted file mode 100644 index 6f266c418a414..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobDataValidation.java +++ /dev/null @@ -1,242 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_CHECK_BLOCK_MD5; -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_STORE_BLOB_MD5; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.HttpURLConnection; -import java.util.Arrays; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.TestHookOperationContext; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.Test; - -import com.microsoft.azure.storage.Constants; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.ResponseReceivedEvent; -import com.microsoft.azure.storage.StorageErrorCodeStrings; -import com.microsoft.azure.storage.StorageEvent; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.BlockEntry; -import com.microsoft.azure.storage.blob.BlockSearchMode; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import com.microsoft.azure.storage.core.Base64; - -/** - * Test that we do proper data integrity validation with MD5 checks as - * configured. - */ -public class ITestBlobDataValidation extends AbstractWasbTestWithTimeout { - private AzureBlobStorageTestAccount testAccount; - - @AfterEach - public void tearDown() throws Exception { - testAccount = AzureTestUtils.cleanupTestAccount(testAccount); - } - - /** - * Test that by default we don't store the blob-level MD5. - */ - @Test - public void testBlobMd5StoreOffByDefault() throws Exception { - testAccount = AzureBlobStorageTestAccount.create(); - testStoreBlobMd5(false); - } - - /** - * Test that we get blob-level MD5 storage and validation if we specify that - * in the configuration. - */ - @Test - public void testStoreBlobMd5() throws Exception { - Configuration conf = new Configuration(); - conf.setBoolean(KEY_STORE_BLOB_MD5, true); - testAccount = AzureBlobStorageTestAccount.create(conf); - testStoreBlobMd5(true); - } - - /** - * Trims a suffix/prefix from the given string. For example if - * s is given as "/xy" and toTrim is "/", this method returns "xy" - */ - private static String trim(String s, String toTrim) { - return StringUtils.removeEnd(StringUtils.removeStart(s, toTrim), - toTrim); - } - - private void testStoreBlobMd5(boolean expectMd5Stored) throws Exception { - assumeNotNull(testAccount); - // Write a test file. - NativeAzureFileSystem fs = testAccount.getFileSystem(); - Path testFilePath = AzureTestUtils.pathForTests(fs, methodName.getMethodName()); - String testFileKey = trim(testFilePath.toUri().getPath(), "/"); - OutputStream outStream = fs.create(testFilePath); - outStream.write(new byte[] { 5, 15 }); - outStream.close(); - - // Check that we stored/didn't store the MD5 field as configured. - CloudBlockBlob blob = testAccount.getBlobReference(testFileKey); - blob.downloadAttributes(); - String obtainedMd5 = blob.getProperties().getContentMD5(); - if (expectMd5Stored) { - assertNotNull(obtainedMd5); - } else { - assertNull(obtainedMd5, "Expected no MD5, found: " + obtainedMd5); - } - - // Mess with the content so it doesn't match the MD5. - String newBlockId = Base64.encode(new byte[] { 55, 44, 33, 22 }); - blob.uploadBlock(newBlockId, - new ByteArrayInputStream(new byte[] { 6, 45 }), 2); - blob.commitBlockList(Arrays.asList(new BlockEntry[] { new BlockEntry( - newBlockId, BlockSearchMode.UNCOMMITTED) })); - - // Now read back the content. If we stored the MD5 for the blob content - // we should get a data corruption error. - InputStream inStream = fs.open(testFilePath); - try { - byte[] inBuf = new byte[100]; - while (inStream.read(inBuf) > 0){ - //nothing; - } - inStream.close(); - if (expectMd5Stored) { - fail("Should've thrown because of data corruption."); - } - } catch (IOException ex) { - if (!expectMd5Stored) { - throw ex; - } - StorageException cause = (StorageException)ex.getCause(); - assertNotNull(cause); - assertEquals(StorageErrorCodeStrings.INVALID_MD5, cause.getErrorCode(), - "Unexpected cause: " + cause); - } - } - - /** - * Test that by default we check block-level MD5. - */ - @Test - public void testCheckBlockMd5() throws Exception { - testAccount = AzureBlobStorageTestAccount.create(); - testCheckBlockMd5(true); - } - - /** - * Test that we don't check block-level MD5 if we specify that in the - * configuration. - */ - @Test - public void testDontCheckBlockMd5() throws Exception { - Configuration conf = new Configuration(); - conf.setBoolean(KEY_CHECK_BLOCK_MD5, false); - testAccount = AzureBlobStorageTestAccount.create(conf); - testCheckBlockMd5(false); - } - - /** - * Connection inspector to check that MD5 fields for content is set/not set as - * expected. - */ - private static class ContentMD5Checker extends - StorageEvent { - private final boolean expectMd5; - - public ContentMD5Checker(boolean expectMd5) { - this.expectMd5 = expectMd5; - } - - @Override - public void eventOccurred(ResponseReceivedEvent eventArg) { - HttpURLConnection connection = (HttpURLConnection) eventArg - .getConnectionObject(); - if (isGetRange(connection)) { - checkObtainedMd5(connection - .getHeaderField(Constants.HeaderConstants.CONTENT_MD5)); - } else if (isPutBlock(connection)) { - checkObtainedMd5(connection - .getRequestProperty(Constants.HeaderConstants.CONTENT_MD5)); - } - } - - private void checkObtainedMd5(String obtainedMd5) { - if (expectMd5) { - assertNotNull(obtainedMd5); - } else { - assertNull(obtainedMd5, "Expected no MD5, found: " + obtainedMd5); - } - } - - private static boolean isPutBlock(HttpURLConnection connection) { - return connection.getRequestMethod().equals("PUT") - && connection.getURL().getQuery() != null - && connection.getURL().getQuery().contains("blockid"); - } - - private static boolean isGetRange(HttpURLConnection connection) { - return connection.getRequestMethod().equals("GET") - && connection - .getHeaderField(Constants.HeaderConstants.STORAGE_RANGE_HEADER) != null; - } - } - - private void testCheckBlockMd5(final boolean expectMd5Checked) - throws Exception { - assumeNotNull(testAccount); - Path testFilePath = new Path("/testFile"); - - // Add a hook to check that for GET/PUT requests we set/don't set - // the block-level MD5 field as configured. I tried to do clever - // testing by also messing with the raw data to see if we actually - // validate the data as expected, but the HttpURLConnection wasn't - // pluggable enough for me to do that. - testAccount.getFileSystem().getStore() - .addTestHookToOperationContext(new TestHookOperationContext() { - @Override - public OperationContext modifyOperationContext( - OperationContext original) { - original.getResponseReceivedEventHandler().addListener( - new ContentMD5Checker(expectMd5Checked)); - return original; - } - }); - - OutputStream outStream = testAccount.getFileSystem().create(testFilePath); - outStream.write(new byte[] { 5, 15 }); - outStream.close(); - - InputStream inStream = testAccount.getFileSystem().open(testFilePath); - byte[] inBuf = new byte[100]; - while (inStream.read(inBuf) > 0){ - //nothing; - } - inStream.close(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobTypeSpeedDifference.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobTypeSpeedDifference.java deleted file mode 100644 index 5c3f156e304c2..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlobTypeSpeedDifference.java +++ /dev/null @@ -1,163 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.Date; - -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; - - -/** - * A simple benchmark to find out the difference in speed between block - * and page blobs. - */ -public class ITestBlobTypeSpeedDifference extends AbstractWasbTestBase { - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - /** - * Writes data to the given stream of the given size, flushing every - * x bytes. - */ - private static void writeTestFile(OutputStream writeStream, - long size, long flushInterval) throws IOException { - int bufferSize = (int) Math.min(1000, flushInterval); - byte[] buffer = new byte[bufferSize]; - Arrays.fill(buffer, (byte) 7); - int bytesWritten = 0; - int bytesUnflushed = 0; - while (bytesWritten < size) { - int numberToWrite = (int) Math.min(bufferSize, size - bytesWritten); - writeStream.write(buffer, 0, numberToWrite); - bytesWritten += numberToWrite; - bytesUnflushed += numberToWrite; - if (bytesUnflushed >= flushInterval) { - writeStream.flush(); - bytesUnflushed = 0; - } - } - } - - private static class TestResult { - final long timeTakenInMs; - final long totalNumberOfRequests; - - TestResult(long timeTakenInMs, long totalNumberOfRequests) { - this.timeTakenInMs = timeTakenInMs; - this.totalNumberOfRequests = totalNumberOfRequests; - } - } - - /** - * Writes data to the given file of the given size, flushing every - * x bytes. Measure performance of that and return it. - */ - private static TestResult writeTestFile(NativeAzureFileSystem fs, Path path, - long size, long flushInterval) throws IOException { - AzureFileSystemInstrumentation instrumentation = - fs.getInstrumentation(); - long initialRequests = instrumentation.getCurrentWebResponses(); - Date start = new Date(); - OutputStream output = fs.create(path); - writeTestFile(output, size, flushInterval); - output.close(); - long finalRequests = instrumentation.getCurrentWebResponses(); - return new TestResult(new Date().getTime() - start.getTime(), - finalRequests - initialRequests); - } - - /** - * Writes data to a block blob of the given size, flushing every - * x bytes. Measure performance of that and return it. - */ - private static TestResult writeBlockBlobTestFile(NativeAzureFileSystem fs, - long size, long flushInterval) throws IOException { - return writeTestFile(fs, new Path("/blockBlob"), size, flushInterval); - } - - /** - * Writes data to a page blob of the given size, flushing every - * x bytes. Measure performance of that and return it. - */ - private static TestResult writePageBlobTestFile(NativeAzureFileSystem fs, - long size, long flushInterval) throws IOException { - Path testFile = AzureTestUtils.blobPathForTests(fs, - "writePageBlobTestFile"); - return writeTestFile(fs, - testFile, - size, flushInterval); - } - - /** - * Runs the benchmark over a small 10 KB file, flushing every 500 bytes. - */ - @Test - public void testTenKbFileFrequentFlush() throws Exception { - testForSizeAndFlushInterval(getFileSystem(), 10 * 1000, 500); - } - - /** - * Runs the benchmark for the given file size and flush frequency. - */ - private static void testForSizeAndFlushInterval(NativeAzureFileSystem fs, - final long size, final long flushInterval) throws IOException { - for (int i = 0; i < 5; i++) { - TestResult pageBlobResults = writePageBlobTestFile(fs, size, flushInterval); - System.out.printf( - "Page blob upload took %d ms. Total number of requests: %d.\n", - pageBlobResults.timeTakenInMs, pageBlobResults.totalNumberOfRequests); - TestResult blockBlobResults = writeBlockBlobTestFile(fs, size, flushInterval); - System.out.printf( - "Block blob upload took %d ms. Total number of requests: %d.\n", - blockBlobResults.timeTakenInMs, blockBlobResults.totalNumberOfRequests); - } - } - - /** - * Runs the benchmark for the given file size and flush frequency from the - * command line. - */ - public static void main(String[] argv) throws Exception { - Configuration conf = new Configuration(); - long size = 10 * 1000 * 1000; - long flushInterval = 2000; - if (argv.length > 0) { - size = Long.parseLong(argv[0]); - } - if (argv.length > 1) { - flushInterval = Long.parseLong(argv[1]); - } - testForSizeAndFlushInterval( - (NativeAzureFileSystem) FileSystem.get(conf), - size, - flushInterval); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java deleted file mode 100644 index 778bbb8849b70..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java +++ /dev/null @@ -1,927 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.EOFException; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.EnumSet; -import java.util.Random; -import java.util.concurrent.Callable; - -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FutureDataInputStreamBuilder; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer; - -import static org.apache.hadoop.test.LambdaTestUtils.*; - -/** - * Test semantics and performance of the original block blob input stream - * (KEY_INPUT_STREAM_VERSION=1) and the new - * BlockBlobInputStream (KEY_INPUT_STREAM_VERSION=2). - */ -@TestMethodOrder(MethodOrderer.Alphanumeric.class) - -public class ITestBlockBlobInputStream extends AbstractAzureScaleTest { - private static final Logger LOG = LoggerFactory.getLogger( - ITestBlockBlobInputStream.class); - private static final int KILOBYTE = 1024; - private static final int MEGABYTE = KILOBYTE * KILOBYTE; - private static final int TEST_FILE_SIZE = 6 * MEGABYTE; - private static final Path TEST_FILE_PATH = new Path( - "TestBlockBlobInputStream.txt"); - - private AzureBlobStorageTestAccount accountUsingInputStreamV1; - private AzureBlobStorageTestAccount accountUsingInputStreamV2; - private long testFileLength; - - - - private FileStatus testFileStatus; - private Path hugefile; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - Configuration conf = new Configuration(); - conf.setInt(AzureNativeFileSystemStore.KEY_INPUT_STREAM_VERSION, 1); - - accountUsingInputStreamV1 = AzureBlobStorageTestAccount.create( - "testblockblobinputstream", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - conf, - true); - - accountUsingInputStreamV2 = AzureBlobStorageTestAccount.create( - "testblockblobinputstream", - EnumSet.noneOf(AzureBlobStorageTestAccount.CreateOptions.class), - null, - true); - - assumeNotNull(accountUsingInputStreamV1); - assumeNotNull(accountUsingInputStreamV2); - hugefile = fs.makeQualified(TEST_FILE_PATH); - try { - testFileStatus = fs.getFileStatus(TEST_FILE_PATH); - testFileLength = testFileStatus.getLen(); - } catch (FileNotFoundException e) { - // file doesn't exist - testFileLength = 0; - } - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = new Configuration(); - conf.setInt(AzureNativeFileSystemStore.KEY_INPUT_STREAM_VERSION, 1); - - accountUsingInputStreamV1 = AzureBlobStorageTestAccount.create( - "testblockblobinputstream", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - conf, - true); - - accountUsingInputStreamV2 = AzureBlobStorageTestAccount.create( - "testblockblobinputstream", - EnumSet.noneOf(AzureBlobStorageTestAccount.CreateOptions.class), - null, - true); - - assumeNotNull(accountUsingInputStreamV1); - assumeNotNull(accountUsingInputStreamV2); - return accountUsingInputStreamV1; - } - - /** - * Create a test file by repeating the characters in the alphabet. - * @throws IOException - */ - private void createTestFileAndSetLength() throws IOException { - FileSystem fs = accountUsingInputStreamV1.getFileSystem(); - - // To reduce test run time, the test file can be reused. - if (fs.exists(TEST_FILE_PATH)) { - testFileStatus = fs.getFileStatus(TEST_FILE_PATH); - testFileLength = testFileStatus.getLen(); - LOG.info("Reusing test file: {}", testFileStatus); - return; - } - - int sizeOfAlphabet = ('z' - 'a' + 1); - byte[] buffer = new byte[26 * KILOBYTE]; - char character = 'a'; - for (int i = 0; i < buffer.length; i++) { - buffer[i] = (byte) character; - character = (character == 'z') ? 'a' : (char) ((int) character + 1); - } - - LOG.info("Creating test file {} of size: {}", TEST_FILE_PATH, - TEST_FILE_SIZE); - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - - try(FSDataOutputStream outputStream = fs.create(TEST_FILE_PATH)) { - int bytesWritten = 0; - while (bytesWritten < TEST_FILE_SIZE) { - outputStream.write(buffer); - bytesWritten += buffer.length; - } - LOG.info("Closing stream {}", outputStream); - ContractTestUtils.NanoTimer closeTimer - = new ContractTestUtils.NanoTimer(); - outputStream.close(); - closeTimer.end("time to close() output stream"); - } - timer.end("time to write %d KB", TEST_FILE_SIZE / 1024); - testFileLength = fs.getFileStatus(TEST_FILE_PATH).getLen(); - } - - void assumeHugeFileExists() throws IOException { - ContractTestUtils.assertPathExists(fs, "huge file not created", hugefile); - FileStatus status = fs.getFileStatus(hugefile); - ContractTestUtils.assertIsFile(hugefile, status); - assertTrue(status.getLen() > 0, "File " + hugefile + " is empty"); - } - - /** - * Calculate megabits per second from the specified values for bytes and - * milliseconds. - * @param bytes The number of bytes. - * @param milliseconds The number of milliseconds. - * @return The number of megabits per second. - */ - private static double toMbps(long bytes, long milliseconds) { - return bytes / 1000.0 * 8 / milliseconds; - } - - @Test - public void test_0100_CreateHugeFile() throws IOException { - createTestFileAndSetLength(); - } - - @Test - public void test_0200_BasicReadTest() throws Exception { - assumeHugeFileExists(); - - try ( - FSDataInputStream inputStreamV1 - = accountUsingInputStreamV1.getFileSystem().open(TEST_FILE_PATH); - - FSDataInputStream inputStreamV2 - = accountUsingInputStreamV2.getFileSystem().open(TEST_FILE_PATH); - ) { - byte[] bufferV1 = new byte[3 * MEGABYTE]; - byte[] bufferV2 = new byte[bufferV1.length]; - - // v1 forward seek and read a kilobyte into first kilobyte of bufferV1 - inputStreamV1.seek(5 * MEGABYTE); - int numBytesReadV1 = inputStreamV1.read(bufferV1, 0, KILOBYTE); - assertEquals(KILOBYTE, numBytesReadV1); - - // v2 forward seek and read a kilobyte into first kilobyte of bufferV2 - inputStreamV2.seek(5 * MEGABYTE); - int numBytesReadV2 = inputStreamV2.read(bufferV2, 0, KILOBYTE); - assertEquals(KILOBYTE, numBytesReadV2); - - assertArrayEquals(bufferV1, bufferV2); - - int len = MEGABYTE; - int offset = bufferV1.length - len; - - // v1 reverse seek and read a megabyte into last megabyte of bufferV1 - inputStreamV1.seek(3 * MEGABYTE); - numBytesReadV1 = inputStreamV1.read(bufferV1, offset, len); - assertEquals(len, numBytesReadV1); - - // v2 reverse seek and read a megabyte into last megabyte of bufferV2 - inputStreamV2.seek(3 * MEGABYTE); - numBytesReadV2 = inputStreamV2.read(bufferV2, offset, len); - assertEquals(len, numBytesReadV2); - - assertArrayEquals(bufferV1, bufferV2); - } - } - - @Test - public void test_0201_RandomReadTest() throws Exception { - assumeHugeFileExists(); - - try ( - FSDataInputStream inputStreamV1 - = accountUsingInputStreamV1.getFileSystem().open(TEST_FILE_PATH); - - FSDataInputStream inputStreamV2 - = accountUsingInputStreamV2.getFileSystem().open(TEST_FILE_PATH); - ) { - final int bufferSize = 4 * KILOBYTE; - byte[] bufferV1 = new byte[bufferSize]; - byte[] bufferV2 = new byte[bufferV1.length]; - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - inputStreamV1.seek(0); - inputStreamV2.seek(0); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - int seekPosition = 2 * KILOBYTE; - inputStreamV1.seek(seekPosition); - inputStreamV2.seek(seekPosition); - - inputStreamV1.seek(0); - inputStreamV2.seek(0); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - seekPosition = 5 * KILOBYTE; - inputStreamV1.seek(seekPosition); - inputStreamV2.seek(seekPosition); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - seekPosition = 10 * KILOBYTE; - inputStreamV1.seek(seekPosition); - inputStreamV2.seek(seekPosition); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - seekPosition = 4100 * KILOBYTE; - inputStreamV1.seek(seekPosition); - inputStreamV2.seek(seekPosition); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - } - } - - private void verifyConsistentReads(FSDataInputStream inputStreamV1, - FSDataInputStream inputStreamV2, - byte[] bufferV1, - byte[] bufferV2) throws IOException { - int size = bufferV1.length; - final int numBytesReadV1 = inputStreamV1.read(bufferV1, 0, size); - assertEquals(size, numBytesReadV1, "Bytes read from V1 stream"); - - final int numBytesReadV2 = inputStreamV2.read(bufferV2, 0, size); - assertEquals(size, numBytesReadV2, "Bytes read from V2 stream"); - - assertArrayEquals(bufferV1, bufferV2, "Mismatch in read data"); - } - - @Test - public void test_202_PosReadTest() throws Exception { - assumeHugeFileExists(); - FutureDataInputStreamBuilder builder = accountUsingInputStreamV2 - .getFileSystem().openFile(TEST_FILE_PATH); - builder.opt(AzureNativeFileSystemStore.FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE, true); - try ( - FSDataInputStream inputStreamV1 - = accountUsingInputStreamV1.getFileSystem().open(TEST_FILE_PATH); - FSDataInputStream inputStreamV2 - = accountUsingInputStreamV2.getFileSystem().open(TEST_FILE_PATH); - FSDataInputStream inputStreamV2NoBuffer = builder.build().get(); - ) { - final int bufferSize = 4 * KILOBYTE; - byte[] bufferV1 = new byte[bufferSize]; - byte[] bufferV2 = new byte[bufferSize]; - byte[] bufferV2NoBuffer = new byte[bufferSize]; - - verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, 0, - bufferV1, bufferV2, bufferV2NoBuffer); - - int pos = 2 * KILOBYTE; - verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos, - bufferV1, bufferV2, bufferV2NoBuffer); - - pos = 10 * KILOBYTE; - verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos, - bufferV1, bufferV2, bufferV2NoBuffer); - - pos = 4100 * KILOBYTE; - verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos, - bufferV1, bufferV2, bufferV2NoBuffer); - } - } - - private void verifyConsistentReads(FSDataInputStream inputStreamV1, - FSDataInputStream inputStreamV2, FSDataInputStream inputStreamV2NoBuffer, - int pos, byte[] bufferV1, byte[] bufferV2, byte[] bufferV2NoBuffer) - throws IOException { - int size = bufferV1.length; - int numBytesReadV1 = inputStreamV1.read(pos, bufferV1, 0, size); - assertEquals(size, numBytesReadV1, "Bytes read from V1 stream"); - - int numBytesReadV2 = inputStreamV2.read(pos, bufferV2, 0, size); - assertEquals(size, numBytesReadV2, "Bytes read from V2 stream"); - - int numBytesReadV2NoBuffer = inputStreamV2NoBuffer.read(pos, - bufferV2NoBuffer, 0, size); - assertEquals(size, numBytesReadV2NoBuffer, - "Bytes read from V2 stream (buffered pread disabled)"); - - assertArrayEquals(bufferV1, bufferV2, "Mismatch in read data"); - assertArrayEquals(bufferV2, bufferV2NoBuffer, "Mismatch in read data"); - } - - /** - * Validates the implementation of InputStream.markSupported. - * @throws IOException - */ - @Test - public void test_0301_MarkSupportedV1() throws IOException { - validateMarkSupported(accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of InputStream.markSupported. - * @throws IOException - */ - @Test - public void test_0302_MarkSupportedV2() throws IOException { - validateMarkSupported(accountUsingInputStreamV1.getFileSystem()); - } - - private void validateMarkSupported(FileSystem fs) throws IOException { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - assertTrue(inputStream.markSupported(), "mark is not supported"); - } - } - - /** - * Validates the implementation of InputStream.mark and reset - * for version 1 of the block blob input stream. - * @throws Exception - */ - @Test - public void test_0303_MarkAndResetV1() throws Exception { - validateMarkAndReset(accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of InputStream.mark and reset - * for version 2 of the block blob input stream. - * @throws Exception - */ - @Test - public void test_0304_MarkAndResetV2() throws Exception { - validateMarkAndReset(accountUsingInputStreamV2.getFileSystem()); - } - - private void validateMarkAndReset(FileSystem fs) throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - inputStream.mark(KILOBYTE - 1); - - byte[] buffer = new byte[KILOBYTE]; - int bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - - inputStream.reset(); - assertEquals(0, inputStream.getPos(), "rest -> pos 0"); - - inputStream.mark(8 * KILOBYTE - 1); - - buffer = new byte[8 * KILOBYTE]; - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - - intercept(IOException.class, - "Resetting to invalid mark", - new Callable() { - @Override - public FSDataInputStream call() throws Exception { - inputStream.reset(); - return inputStream; - } - } - ); - } - } - - /** - * Validates the implementation of Seekable.seekToNewSource, which should - * return false for version 1 of the block blob input stream. - * @throws IOException - */ - @Test - public void test_0305_SeekToNewSourceV1() throws IOException { - validateSeekToNewSource(accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of Seekable.seekToNewSource, which should - * return false for version 2 of the block blob input stream. - * @throws IOException - */ - @Test - public void test_0306_SeekToNewSourceV2() throws IOException { - validateSeekToNewSource(accountUsingInputStreamV2.getFileSystem()); - } - - private void validateSeekToNewSource(FileSystem fs) throws IOException { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - assertFalse(inputStream.seekToNewSource(0)); - } - } - - /** - * Validates the implementation of InputStream.skip and ensures there is no - * network I/O for version 1 of the block blob input stream. - * @throws Exception - */ - @Test - public void test_0307_SkipBoundsV1() throws Exception { - validateSkipBounds(accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of InputStream.skip and ensures there is no - * network I/O for version 2 of the block blob input stream. - * @throws Exception - */ - @Test - public void test_0308_SkipBoundsV2() throws Exception { - validateSkipBounds(accountUsingInputStreamV2.getFileSystem()); - } - - private void validateSkipBounds(FileSystem fs) throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - NanoTimer timer = new NanoTimer(); - - long skipped = inputStream.skip(-1); - assertEquals(0, skipped); - - skipped = inputStream.skip(0); - assertEquals(0, skipped); - - assertTrue(testFileLength > 0); - - skipped = inputStream.skip(testFileLength); - assertEquals(testFileLength, skipped); - - intercept(EOFException.class, - new Callable() { - @Override - public Long call() throws Exception { - return inputStream.skip(1); - } - } - ); - long elapsedTimeMs = timer.elapsedTimeMs(); - assertTrue(elapsedTimeMs < 20, String.format( - "There should not be any network I/O (elapsedTimeMs=%1$d).", - elapsedTimeMs)); - } - } - - /** - * Validates the implementation of Seekable.seek and ensures there is no - * network I/O for forward seek. - * @throws Exception - */ - @Test - public void test_0309_SeekBoundsV1() throws Exception { - validateSeekBounds(accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of Seekable.seek and ensures there is no - * network I/O for forward seek. - * @throws Exception - */ - @Test - public void test_0310_SeekBoundsV2() throws Exception { - validateSeekBounds(accountUsingInputStreamV2.getFileSystem()); - } - - private void validateSeekBounds(FileSystem fs) throws Exception { - assumeHugeFileExists(); - try ( - FSDataInputStream inputStream = fs.open(TEST_FILE_PATH); - ) { - NanoTimer timer = new NanoTimer(); - - inputStream.seek(0); - assertEquals(0, inputStream.getPos()); - - intercept(EOFException.class, - FSExceptionMessages.NEGATIVE_SEEK, - new Callable() { - @Override - public FSDataInputStream call() throws Exception { - inputStream.seek(-1); - return inputStream; - } - } - ); - - assertTrue(testFileLength > 0, "Test file length only " + testFileLength); - inputStream.seek(testFileLength); - assertEquals(testFileLength, inputStream.getPos()); - - intercept(EOFException.class, - FSExceptionMessages.CANNOT_SEEK_PAST_EOF, - new Callable() { - @Override - public FSDataInputStream call() throws Exception { - inputStream.seek(testFileLength + 1); - return inputStream; - } - } - ); - - long elapsedTimeMs = timer.elapsedTimeMs(); - assertTrue( - elapsedTimeMs < 20, String.format( - "There should not be any network I/O (elapsedTimeMs=%1$d).", - elapsedTimeMs)); - } - } - - /** - * Validates the implementation of Seekable.seek, Seekable.getPos, - * and InputStream.available. - * @throws Exception - */ - @Test - public void test_0311_SeekAndAvailableAndPositionV1() throws Exception { - validateSeekAndAvailableAndPosition( - accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of Seekable.seek, Seekable.getPos, - * and InputStream.available. - * @throws Exception - */ - @Test - public void test_0312_SeekAndAvailableAndPositionV2() throws Exception { - validateSeekAndAvailableAndPosition( - accountUsingInputStreamV2.getFileSystem()); - } - - private void validateSeekAndAvailableAndPosition(FileSystem fs) - throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - byte[] expected1 = {(byte) 'a', (byte) 'b', (byte) 'c'}; - byte[] expected2 = {(byte) 'd', (byte) 'e', (byte) 'f'}; - byte[] expected3 = {(byte) 'b', (byte) 'c', (byte) 'd'}; - byte[] expected4 = {(byte) 'g', (byte) 'h', (byte) 'i'}; - byte[] buffer = new byte[3]; - - int bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected1, buffer); - assertEquals(buffer.length, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected2, buffer); - assertEquals(2 * buffer.length, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // reverse seek - int seekPos = 0; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected1, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // reverse seek - seekPos = 1; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected3, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // forward seek - seekPos = 6; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected4, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - } - } - - /** - * Validates the implementation of InputStream.skip, Seekable.getPos, - * and InputStream.available. - * @throws IOException - */ - @Test - public void test_0313_SkipAndAvailableAndPositionV1() throws IOException { - validateSkipAndAvailableAndPosition( - accountUsingInputStreamV1.getFileSystem()); - } - - /** - * Validates the implementation of InputStream.skip, Seekable.getPos, - * and InputStream.available. - * @throws IOException - */ - @Test - public void test_0314_SkipAndAvailableAndPositionV2() throws IOException { - validateSkipAndAvailableAndPosition( - accountUsingInputStreamV1.getFileSystem()); - } - - private void validateSkipAndAvailableAndPosition(FileSystem fs) - throws IOException { - assumeHugeFileExists(); - try ( - FSDataInputStream inputStream = fs.open(TEST_FILE_PATH); - ) { - byte[] expected1 = {(byte) 'a', (byte) 'b', (byte) 'c'}; - byte[] expected2 = {(byte) 'd', (byte) 'e', (byte) 'f'}; - byte[] expected3 = {(byte) 'b', (byte) 'c', (byte) 'd'}; - byte[] expected4 = {(byte) 'g', (byte) 'h', (byte) 'i'}; - - assertEquals(testFileLength, inputStream.available()); - assertEquals(0, inputStream.getPos()); - - int n = 3; - long skipped = inputStream.skip(n); - - assertEquals(skipped, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - assertEquals(skipped, n); - - byte[] buffer = new byte[3]; - int bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected2, buffer); - assertEquals(buffer.length + skipped, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // does skip still work after seek? - int seekPos = 1; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected3, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - long currentPosition = inputStream.getPos(); - n = 2; - skipped = inputStream.skip(n); - - assertEquals(currentPosition + skipped, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - assertEquals(skipped, n); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected4, buffer); - assertEquals(buffer.length + skipped + currentPosition, - inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - } - } - - /** - * Ensures parity in the performance of sequential read for - * version 1 and version 2 of the block blob input stream. - * @throws IOException - */ - @Test - public void test_0315_SequentialReadPerformance() throws IOException { - assumeHugeFileExists(); - final int maxAttempts = 10; - final double maxAcceptableRatio = 1.01; - double v1ElapsedMs = 0, v2ElapsedMs = 0; - double ratio = Double.MAX_VALUE; - for (int i = 0; i < maxAttempts && ratio >= maxAcceptableRatio; i++) { - v1ElapsedMs = sequentialRead(1, - accountUsingInputStreamV1.getFileSystem(), false); - v2ElapsedMs = sequentialRead(2, - accountUsingInputStreamV2.getFileSystem(), false); - ratio = v2ElapsedMs / v1ElapsedMs; - LOG.info(String.format( - "v1ElapsedMs=%1$d, v2ElapsedMs=%2$d, ratio=%3$.2f", - (long) v1ElapsedMs, - (long) v2ElapsedMs, - ratio)); - } - assertTrue( - ratio < maxAcceptableRatio, String.format( - "Performance of version 2 is not acceptable: v1ElapsedMs=%1$d," - + " v2ElapsedMs=%2$d, ratio=%3$.2f", - (long) v1ElapsedMs, - (long) v2ElapsedMs, - ratio)); - } - - /** - * Ensures parity in the performance of sequential read after reverse seek for - * version 2 of the block blob input stream. - * @throws IOException - */ - @Test - public void test_0316_SequentialReadAfterReverseSeekPerformanceV2() - throws IOException { - assumeHugeFileExists(); - final int maxAttempts = 10; - final double maxAcceptableRatio = 1.01; - double beforeSeekElapsedMs = 0, afterSeekElapsedMs = 0; - double ratio = Double.MAX_VALUE; - for (int i = 0; i < maxAttempts && ratio >= maxAcceptableRatio; i++) { - beforeSeekElapsedMs = sequentialRead(2, - accountUsingInputStreamV2.getFileSystem(), false); - afterSeekElapsedMs = sequentialRead(2, - accountUsingInputStreamV2.getFileSystem(), true); - ratio = afterSeekElapsedMs / beforeSeekElapsedMs; - LOG.info(String.format( - "beforeSeekElapsedMs=%1$d, afterSeekElapsedMs=%2$d, ratio=%3$.2f", - (long) beforeSeekElapsedMs, - (long) afterSeekElapsedMs, - ratio)); - } - assertTrue( - ratio < maxAcceptableRatio, String.format( - "Performance of version 2 after reverse seek is not acceptable:" - + " beforeSeekElapsedMs=%1$d, afterSeekElapsedMs=%2$d," - + " ratio=%3$.2f", - (long) beforeSeekElapsedMs, - (long) afterSeekElapsedMs, - ratio)); - } - - private long sequentialRead(int version, - FileSystem fs, - boolean afterReverseSeek) throws IOException { - byte[] buffer = new byte[16 * KILOBYTE]; - long totalBytesRead = 0; - long bytesRead = 0; - - try(FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - if (afterReverseSeek) { - while (bytesRead > 0 && totalBytesRead < 4 * MEGABYTE) { - bytesRead = inputStream.read(buffer); - totalBytesRead += bytesRead; - } - totalBytesRead = 0; - inputStream.seek(0); - } - - NanoTimer timer = new NanoTimer(); - while ((bytesRead = inputStream.read(buffer)) > 0) { - totalBytesRead += bytesRead; - } - long elapsedTimeMs = timer.elapsedTimeMs(); - - LOG.info(String.format( - "v%1$d: bytesRead=%2$d, elapsedMs=%3$d, Mbps=%4$.2f," - + " afterReverseSeek=%5$s", - version, - totalBytesRead, - elapsedTimeMs, - toMbps(totalBytesRead, elapsedTimeMs), - afterReverseSeek)); - - assertEquals(testFileLength, totalBytesRead); - inputStream.close(); - return elapsedTimeMs; - } - } - - @Test - public void test_0317_RandomReadPerformance() throws IOException { - assumeHugeFileExists(); - final int maxAttempts = 10; - final double maxAcceptableRatio = 0.10; - double v1ElapsedMs = 0, v2ElapsedMs = 0; - double ratio = Double.MAX_VALUE; - for (int i = 0; i < maxAttempts && ratio >= maxAcceptableRatio; i++) { - v1ElapsedMs = randomRead(1, - accountUsingInputStreamV1.getFileSystem()); - v2ElapsedMs = randomRead(2, - accountUsingInputStreamV2.getFileSystem()); - ratio = v2ElapsedMs / v1ElapsedMs; - LOG.info(String.format( - "v1ElapsedMs=%1$d, v2ElapsedMs=%2$d, ratio=%3$.2f", - (long) v1ElapsedMs, - (long) v2ElapsedMs, - ratio)); - } - assertTrue( - ratio < maxAcceptableRatio, String.format( - "Performance of version 2 is not acceptable: v1ElapsedMs=%1$d," - + " v2ElapsedMs=%2$d, ratio=%3$.2f", - (long) v1ElapsedMs, - (long) v2ElapsedMs, - ratio)); - } - - private long randomRead(int version, FileSystem fs) throws IOException { - assumeHugeFileExists(); - final int minBytesToRead = 2 * MEGABYTE; - Random random = new Random(); - byte[] buffer = new byte[8 * KILOBYTE]; - long totalBytesRead = 0; - long bytesRead = 0; - try(FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - NanoTimer timer = new NanoTimer(); - - do { - bytesRead = inputStream.read(buffer); - totalBytesRead += bytesRead; - inputStream.seek(random.nextInt( - (int) (testFileLength - buffer.length))); - } while (bytesRead > 0 && totalBytesRead < minBytesToRead); - - long elapsedTimeMs = timer.elapsedTimeMs(); - - inputStream.close(); - - LOG.info(String.format( - "v%1$d: totalBytesRead=%2$d, elapsedTimeMs=%3$d, Mbps=%4$.2f", - version, - totalBytesRead, - elapsedTimeMs, - toMbps(totalBytesRead, elapsedTimeMs))); - - assertTrue(minBytesToRead <= totalBytesRead); - - return elapsedTimeMs; - } - } - - @Test - public void test_999_DeleteHugeFiles() throws IOException { - try { - NanoTimer timer = new NanoTimer(); - NativeAzureFileSystem fs = getFileSystem(); - fs.delete(TEST_FILE_PATH, false); - timer.end("time to delete %s", TEST_FILE_PATH); - } finally { - // clean up the test account - AzureTestUtils.cleanupTestAccount(accountUsingInputStreamV1); - } - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestContainerChecks.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestContainerChecks.java deleted file mode 100644 index 8110b6d6ae8e3..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestContainerChecks.java +++ /dev/null @@ -1,190 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.assertj.core.api.Assumptions.assumeThat; - -import java.io.FileNotFoundException; -import java.util.EnumSet; -import java.util.concurrent.Callable; - -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.CreateOptions; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.test.LambdaTestUtils; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import com.microsoft.azure.storage.blob.BlobOutputStream; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlockBlob; - -/** - * Tests that WASB creates containers only if needed. - */ -public class ITestContainerChecks extends AbstractWasbTestWithTimeout { - private AzureBlobStorageTestAccount testAccount; - private boolean runningInSASMode = false; - - @AfterEach - public void tearDown() throws Exception { - testAccount = AzureTestUtils.cleanup(testAccount); - } - - @BeforeEach - public void setMode() { - runningInSASMode = AzureBlobStorageTestAccount.createTestConfiguration(). - getBoolean(AzureNativeFileSystemStore.KEY_USE_SECURE_MODE, false); - } - - @Test - public void testContainerExistAfterDoesNotExist() throws Exception { - testAccount = blobStorageTestAccount(); - assumeNotNull(testAccount); - CloudBlobContainer container = testAccount.getRealContainer(); - FileSystem fs = testAccount.getFileSystem(); - - // Starting off with the container not there - assertFalse(container.exists()); - - // A list shouldn't create the container and will set file system store - // state to DoesNotExist - try { - fs.listStatus(new Path("/")); - assertTrue(false, "Should've thrown."); - } catch (FileNotFoundException ex) { - assertTrue(ex.getMessage().contains("is not found"), - "Unexpected exception: " + ex); - } - assertFalse(container.exists()); - - // Create a container outside of the WASB FileSystem - container.create(); - // Add a file to the container outside of the WASB FileSystem - CloudBlockBlob blob = testAccount.getBlobReference("foo"); - BlobOutputStream outputStream = blob.openOutputStream(); - outputStream.write(new byte[10]); - outputStream.close(); - - // Make sure the file is visible - assertTrue(fs.exists(new Path("/foo"))); - assertTrue(container.exists()); - } - - protected AzureBlobStorageTestAccount blobStorageTestAccount() - throws Exception { - return AzureBlobStorageTestAccount.create("", - EnumSet.noneOf(CreateOptions.class)); - } - - @Test - public void testContainerCreateAfterDoesNotExist() throws Exception { - testAccount = blobStorageTestAccount(); - assumeNotNull(testAccount); - CloudBlobContainer container = testAccount.getRealContainer(); - FileSystem fs = testAccount.getFileSystem(); - - // Starting off with the container not there - assertFalse(container.exists()); - - // A list shouldn't create the container and will set file system store - // state to DoesNotExist - try { - assertNull(fs.listStatus(new Path("/"))); - assertTrue(false, "Should've thrown."); - } catch (FileNotFoundException ex) { - assertTrue(ex.getMessage().contains("is not found"), - "Unexpected exception: " + ex); - } - assertFalse(container.exists()); - - // Write should succeed - assertTrue(fs.createNewFile(new Path("/foo"))); - assertTrue(container.exists()); - } - - @Test - public void testContainerCreateOnWrite() throws Exception { - testAccount = blobStorageTestAccount(); - assumeNotNull(testAccount); - CloudBlobContainer container = testAccount.getRealContainer(); - FileSystem fs = testAccount.getFileSystem(); - - // Starting off with the container not there - assertFalse(container.exists()); - - // A list shouldn't create the container. - try { - fs.listStatus(new Path("/")); - assertTrue(false, "Should've thrown."); - } catch (FileNotFoundException ex) { - assertTrue(ex.getMessage().contains("is not found"), - "Unexpected exception: " + ex); - } - assertFalse(container.exists()); - - // Neither should a read. - Path foo = new Path("/testContainerCreateOnWrite-foo"); - Path bar = new Path("/testContainerCreateOnWrite-bar"); - LambdaTestUtils.intercept(FileNotFoundException.class, - new Callable() { - @Override - public String call() throws Exception { - fs.open(foo).close(); - return "Stream to " + foo; - } - } - ); - assertFalse(container.exists()); - - // Neither should a rename - assertFalse(fs.rename(foo, bar)); - assertFalse(container.exists()); - - // But a write should. - assertTrue(fs.createNewFile(foo)); - assertTrue(container.exists()); - } - - @Test - public void testContainerChecksWithSas() throws Exception { - - assumeThat(runningInSASMode).isFalse(); - testAccount = AzureBlobStorageTestAccount.create("", - EnumSet.of(CreateOptions.UseSas)); - assumeNotNull(testAccount); - CloudBlobContainer container = testAccount.getRealContainer(); - FileSystem fs = testAccount.getFileSystem(); - - // The container shouldn't be there - assertFalse(container.exists()); - - // A write should just fail - try { - fs.createNewFile(new Path("/testContainerChecksWithSas-foo")); - assertFalse(true, "Should've thrown."); - } catch (AzureException ex) { - } - assertFalse(container.exists()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionHandling.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionHandling.java deleted file mode 100644 index d8293e45c813f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionHandling.java +++ /dev/null @@ -1,319 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.FileNotFoundException; -import java.io.IOException; - -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import static org.apache.hadoop.fs.FSExceptionMessages.STREAM_IS_CLOSED; -import static org.apache.hadoop.fs.azure.ExceptionHandlingTestHelper.*; -import static org.apache.hadoop.test.LambdaTestUtils.intercept; - -/** - * Single threaded exception handling. - */ -public class ITestFileSystemOperationExceptionHandling - extends AbstractWasbTestBase { - - private FSDataInputStream inputStream = null; - - private Path testPath; - private Path testFolderPath; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - testPath = path("testfile.dat"); - testFolderPath = path("testfolder"); - } - - /** - * Helper method that creates a InputStream to validate exceptions - * for various scenarios. - */ - private void setupInputStreamToTest(AzureBlobStorageTestAccount testAccount) - throws Exception { - - FileSystem fs = testAccount.getFileSystem(); - - // Step 1: Create a file and write dummy data. - Path base = methodPath(); - Path testFilePath1 = new Path(base, "test1.dat"); - Path testFilePath2 = new Path(base, "test2.dat"); - FSDataOutputStream outputStream = fs.create(testFilePath1); - String testString = "This is a test string"; - outputStream.write(testString.getBytes()); - outputStream.close(); - - // Step 2: Open a read stream on the file. - inputStream = fs.open(testFilePath1); - - // Step 3: Rename the file - fs.rename(testFilePath1, testFilePath2); - } - - /** - * Tests a basic single threaded read scenario for Page blobs. - */ - @Test - public void testSingleThreadedPageBlobReadScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - AzureBlobStorageTestAccount testAccount = getPageBlobTestStorageAccount(); - setupInputStreamToTest(testAccount); - byte[] readBuffer = new byte[512]; - inputStream.read(readBuffer); - }); - } - - /** - * Tests a basic single threaded seek scenario for Page blobs. - */ - @Test - public void testSingleThreadedPageBlobSeekScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - AzureBlobStorageTestAccount testAccount = getPageBlobTestStorageAccount(); - setupInputStreamToTest(testAccount); - inputStream.seek(5); - }); - } - - /** - * Test a basic single thread seek scenario for Block blobs. - */ - @Test - public void testSingleThreadBlockBlobSeekScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - AzureBlobStorageTestAccount testAccount = createTestAccount(); - setupInputStreamToTest(testAccount); - inputStream.seek(5); - inputStream.read(); - }); - } - - /** - * Tests a basic single threaded read scenario for Block blobs. - */ - @Test - public void testSingledThreadBlockBlobReadScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - AzureBlobStorageTestAccount testAccount = createTestAccount(); - setupInputStreamToTest(testAccount); - byte[] readBuffer = new byte[512]; - inputStream.read(readBuffer); - }); - } - - /** - * Tests basic single threaded setPermission scenario. - */ - @Test - public void testSingleThreadedBlockBlobSetPermissionScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(createTestAccount(), testPath); - fs.delete(testPath, true); - fs.setPermission(testPath, - new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); - }); - } - - /** - * Tests basic single threaded setPermission scenario. - */ - @Test - public void testSingleThreadedPageBlobSetPermissionScenario() - throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(getPageBlobTestStorageAccount(), testPath); - fs.delete(testPath, true); - fs.setOwner(testPath, "testowner", "testgroup"); - }); - } - - /** - * Tests basic single threaded setPermission scenario. - */ - @Test - public void testSingleThreadedBlockBlobSetOwnerScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(createTestAccount(), testPath); - fs.delete(testPath, true); - fs.setOwner(testPath, "testowner", "testgroup"); - }); - } - - /** - * Tests basic single threaded setPermission scenario. - */ - @Test - public void testSingleThreadedPageBlobSetOwnerScenario() throws Throwable { - assertThrows(FileNotFoundException.class, ()->{ - createEmptyFile(getPageBlobTestStorageAccount(), testPath); - fs.delete(testPath, true); - fs.setPermission(testPath, - new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); - }); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedBlockBlobListStatusScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createTestFolder(createTestAccount(), testFolderPath); - fs.delete(testFolderPath, true); - fs.listStatus(testFolderPath); - }); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedPageBlobListStatusScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createTestFolder(getPageBlobTestStorageAccount(), testFolderPath); - fs.delete(testFolderPath, true); - fs.listStatus(testFolderPath); - }); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedBlockBlobRenameScenario() throws Throwable { - - createEmptyFile(createTestAccount(), - testPath); - Path dstPath = new Path("dstFile.dat"); - fs.delete(testPath, true); - boolean renameResult = fs.rename(testPath, dstPath); - assertFalse(renameResult); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedPageBlobRenameScenario() throws Throwable { - - createEmptyFile(getPageBlobTestStorageAccount(), - testPath); - Path dstPath = new Path("dstFile.dat"); - fs.delete(testPath, true); - boolean renameResult = fs.rename(testPath, dstPath); - assertFalse(renameResult); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedBlockBlobDeleteScenario() throws Throwable { - - createEmptyFile(createTestAccount(), - testPath); - fs.delete(testPath, true); - boolean deleteResult = fs.delete(testPath, true); - assertFalse(deleteResult); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedPageBlobDeleteScenario() throws Throwable { - - createEmptyFile(getPageBlobTestStorageAccount(), - testPath); - fs.delete(testPath, true); - boolean deleteResult = fs.delete(testPath, true); - assertFalse(deleteResult); - } - - /** - * Test basic single threaded listStatus scenario. - */ - @Test - public void testSingleThreadedBlockBlobOpenScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(createTestAccount(), testPath); - fs.delete(testPath, true); - inputStream = fs.open(testPath); - }); - } - - /** - * Test delete then open a file. - */ - @Test - public void testSingleThreadedPageBlobOpenScenario() throws Throwable { - assertThrows(FileNotFoundException.class, ()->{ - createEmptyFile(getPageBlobTestStorageAccount(), testPath); - fs.delete(testPath, true); - inputStream = fs.open(testPath); - }); - } - - /** - * Attempts to write to the azure stream after it is closed will raise - * an IOException. - */ - @Test - public void testWriteAfterClose() throws Throwable { - FSDataOutputStream out = fs.create(testPath); - out.close(); - intercept(IOException.class, STREAM_IS_CLOSED, - () -> out.write('a')); - intercept(IOException.class, STREAM_IS_CLOSED, - () -> out.write(new byte[]{'a'})); - out.hsync(); - out.flush(); - out.close(); - } - - @AfterEach - public void tearDown() throws Exception { - if (inputStream != null) { - inputStream.close(); - } - - ContractTestUtils.rm(fs, testPath, true, true); - super.tearDown(); - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() - throws Exception { - return AzureBlobStorageTestAccount.create(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionMessage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionMessage.java deleted file mode 100644 index a5ff76c2fa4b7..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationExceptionMessage.java +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.net.URI; -import java.util.UUID; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.test.GenericTestUtils; - -import com.microsoft.azure.storage.CloudStorageAccount; -import org.junit.jupiter.api.Test; - -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.NO_ACCESS_TO_CONTAINER_MSG; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.verifyWasbAccountNameInConfig; - -/** - * Test for error messages coming from SDK. - */ -public class ITestFileSystemOperationExceptionMessage - extends AbstractWasbTestWithTimeout { - - - - @Test - public void testAnonymouseCredentialExceptionMessage() throws Throwable { - - Configuration conf = AzureBlobStorageTestAccount.createTestConfiguration(); - CloudStorageAccount account = - AzureBlobStorageTestAccount.createTestAccount(conf); - AzureTestUtils.assume("No test account", account != null); - - String testStorageAccount = verifyWasbAccountNameInConfig(conf); - conf = new Configuration(); - conf.set("fs.AbstractFileSystem.wasb.impl", - "org.apache.hadoop.fs.azure.Wasb"); - conf.set("fs.azure.skip.metrics", "true"); - - String testContainer = UUID.randomUUID().toString(); - String wasbUri = String.format("wasb://%s@%s", - testContainer, testStorageAccount); - - try(NativeAzureFileSystem filesystem = new NativeAzureFileSystem()) { - filesystem.initialize(new URI(wasbUri), conf); - fail("Expected an exception, got " + filesystem); - } catch (Exception ex) { - - Throwable innerException = ex.getCause(); - while (innerException != null - && !(innerException instanceof AzureException)) { - innerException = innerException.getCause(); - } - - if (innerException != null) { - GenericTestUtils.assertExceptionContains(String.format( - NO_ACCESS_TO_CONTAINER_MSG, testStorageAccount, testContainer), - ex); - } else { - fail("No inner azure exception"); - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java deleted file mode 100644 index 19080d031b6c9..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsExceptionHandlingMultiThreaded.java +++ /dev/null @@ -1,381 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.FileNotFoundException; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; - -import static org.apache.hadoop.fs.azure.ExceptionHandlingTestHelper.*; - -/** - * Multithreaded operations on FS, verify failures are as expected. - */ -public class ITestFileSystemOperationsExceptionHandlingMultiThreaded - extends AbstractWasbTestBase { - - FSDataInputStream inputStream = null; - - private Path testPath; - private Path testFolderPath; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - testPath = path("testfile.dat"); - testFolderPath = path("testfolder"); - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - @Override - public void tearDown() throws Exception { - - IOUtils.closeStream(inputStream); - ContractTestUtils.rm(fs, testPath, true, false); - ContractTestUtils.rm(fs, testFolderPath, true, false); - super.tearDown(); - } - - /** - * Helper method to creates an input stream to test various scenarios. - */ - private void getInputStreamToTest(FileSystem fs, Path testPath) - throws Throwable { - - FSDataOutputStream outputStream = fs.create(testPath); - String testString = "This is a test string"; - outputStream.write(testString.getBytes()); - outputStream.close(); - - inputStream = fs.open(testPath); - } - - /** - * Test to validate correct exception is thrown for Multithreaded read - * scenario for block blobs. - */ - @Test - public void testMultiThreadedBlockBlobReadScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - AzureBlobStorageTestAccount testAccount = createTestAccount(); - NativeAzureFileSystem fs = testAccount.getFileSystem(); - Path base = methodPath(); - Path testFilePath1 = new Path(base, "test1.dat"); - Path renamePath = new Path(base, "test2.dat"); - getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new SubjectInheritingThread( - new RenameThread(fs, testFilePath1, renamePath)); - renameThread.start(); - - renameThread.join(); - - byte[] readBuffer = new byte[512]; - inputStream.read(readBuffer); - }); - } - - /** - * Test to validate correct exception is thrown for Multithreaded seek - * scenario for block blobs. - */ - @Test - public void testMultiThreadBlockBlobSeekScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - /* - * AzureBlobStorageTestAccount testAccount = createTestAccount(); - * fs = testAccount.getFileSystem(); - */ - Path base = methodPath(); - Path testFilePath1 = new Path(base, "test1.dat"); - Path renamePath = new Path(base, "test2.dat"); - - getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new SubjectInheritingThread( - new RenameThread(fs, testFilePath1, renamePath)); - renameThread.start(); - - renameThread.join(); - - inputStream.seek(5); - inputStream.read(); - }); - } - - /** - * Tests basic multi threaded setPermission scenario. - */ - @Test - public void testMultiThreadedPageBlobSetPermissionScenario() - throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile( - getPageBlobTestStorageAccount(), - testPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testPath)); - t.start(); - while (t.isAlive()) { - fs.setPermission(testPath, - new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); - } - fs.setPermission(testPath, - new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); - }); - } - - /** - * Tests basic multi threaded setPermission scenario. - */ - @Test - public void testMultiThreadedBlockBlobSetPermissionScenario() - throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(createTestAccount(), testPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testPath)); - t.start(); - while (t.isAlive()) { - fs.setPermission(testPath, - new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); - } - fs.setPermission(testPath, - new FsPermission(FsAction.EXECUTE, FsAction.READ, FsAction.READ)); - }); - } - - /** - * Tests basic multi threaded setPermission scenario. - */ - @Test - public void testMultiThreadedPageBlobOpenScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(createTestAccount(), testPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testPath)); - t.start(); - while (t.isAlive()) { - inputStream = fs.open(testPath); - inputStream.close(); - } - - inputStream = fs.open(testPath); - inputStream.close(); - }); - } - - /** - * Tests basic multi threaded setPermission scenario. - */ - @Test - public void testMultiThreadedBlockBlobOpenScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile( - getPageBlobTestStorageAccount(), - testPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testPath)); - t.start(); - - while (t.isAlive()) { - inputStream = fs.open(testPath); - inputStream.close(); - } - inputStream = fs.open(testPath); - inputStream.close(); - }); - } - - /** - * Tests basic multi threaded setOwner scenario. - */ - @Test - public void testMultiThreadedBlockBlobSetOwnerScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile(createTestAccount(), testPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testPath)); - t.start(); - while (t.isAlive()) { - fs.setOwner(testPath, "testowner", "testgroup"); - } - fs.setOwner(testPath, "testowner", "testgroup"); - }); - } - - /** - * Tests basic multi threaded setOwner scenario. - */ - @Test - public void testMultiThreadedPageBlobSetOwnerScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createEmptyFile( - getPageBlobTestStorageAccount(), - testPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testPath)); - t.start(); - while (t.isAlive()) { - fs.setOwner(testPath, "testowner", "testgroup"); - } - fs.setOwner(testPath, "testowner", "testgroup"); - }); - } - - /** - * Tests basic multi threaded listStatus scenario. - */ - @Test - public void testMultiThreadedBlockBlobListStatusScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createTestFolder(createTestAccount(), testFolderPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testFolderPath)); - t.start(); - while (t.isAlive()) { - fs.listStatus(testFolderPath); - } - fs.listStatus(testFolderPath); - }); - } - - /** - * Tests basic multi threaded listStatus scenario. - */ - @Test - public void testMultiThreadedPageBlobListStatusScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - createTestFolder( - getPageBlobTestStorageAccount(), - testFolderPath); - Thread t = new SubjectInheritingThread(new DeleteThread(fs, testFolderPath)); - t.start(); - while (t.isAlive()) { - fs.listStatus(testFolderPath); - } - fs.listStatus(testFolderPath); - }); - } - - /** - * Test to validate correct exception is thrown for Multithreaded read - * scenario for page blobs. - */ - @Test - public void testMultiThreadedPageBlobReadScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - bindToTestAccount(getPageBlobTestStorageAccount()); - Path base = methodPath(); - Path testFilePath1 = new Path(base, "test1.dat"); - Path renamePath = new Path(base, "test2.dat"); - - getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new SubjectInheritingThread( - new RenameThread(fs, testFilePath1, renamePath)); - renameThread.start(); - - renameThread.join(); - byte[] readBuffer = new byte[512]; - inputStream.read(readBuffer); - }); - } - - /** - * Test to validate correct exception is thrown for Multithreaded seek - * scenario for page blobs. - */ - - @Test - public void testMultiThreadedPageBlobSeekScenario() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - bindToTestAccount(getPageBlobTestStorageAccount()); - - Path base = methodPath(); - Path testFilePath1 = new Path(base, "test1.dat"); - Path renamePath = new Path(base, "test2.dat"); - - getInputStreamToTest(fs, testFilePath1); - Thread renameThread = new SubjectInheritingThread( - new RenameThread(fs, testFilePath1, renamePath)); - renameThread.start(); - - renameThread.join(); - inputStream.seek(5); - }); - } - - - /** - * Helper thread that just renames the test file. - */ - private static class RenameThread implements Runnable { - - private final FileSystem fs; - private final Path testPath; - private final Path renamePath; - - RenameThread(FileSystem fs, - Path testPath, - Path renamePath) { - this.fs = fs; - this.testPath = testPath; - this.renamePath = renamePath; - } - - @Override - public void run() { - try { - fs.rename(testPath, renamePath); - } catch (Exception e) { - // Swallowing the exception as the - // correctness of the test is controlled - // by the other thread - } - } - } - - private static class DeleteThread implements Runnable { - private final FileSystem fs; - private final Path testPath; - - DeleteThread(FileSystem fs, Path testPath) { - this.fs = fs; - this.testPath = testPath; - } - - @Override - public void run() { - try { - fs.delete(testPath, true); - } catch (Exception e) { - // Swallowing the exception as the - // correctness of the test is controlled - // by the other thread - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsWithThreads.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsWithThreads.java deleted file mode 100644 index 5a17a07e1a074..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestFileSystemOperationsWithThreads.java +++ /dev/null @@ -1,812 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; -import java.util.concurrent.RejectedExecutionException; -import java.util.concurrent.ThreadPoolExecutor; -import java.util.concurrent.TimeUnit; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem.FolderRenamePending; -import org.apache.hadoop.test.GenericTestUtils.LogCapturer; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Tests the Native Azure file system (WASB) using parallel threads for rename and delete operations. - */ -public class ITestFileSystemOperationsWithThreads extends AbstractWasbTestBase { - - private final int renameThreads = 10; - private final int deleteThreads = 20; - private int iterations = 1; - private LogCapturer logs = null; - - @BeforeEach - public void setUp() throws Exception { - super.setUp(); - Configuration conf = fs.getConf(); - - // By default enable parallel threads for rename and delete operations. - // Also enable flat listing of blobs for these operations. - conf.setInt(NativeAzureFileSystem.AZURE_RENAME_THREADS, renameThreads); - conf.setInt(NativeAzureFileSystem.AZURE_DELETE_THREADS, deleteThreads); - conf.setBoolean(AzureNativeFileSystemStore.KEY_ENABLE_FLAT_LISTING, true); - - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - // Capture logs - logs = LogCapturer.captureLogs(LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)); - } - - /* - * Helper method to create sub directory and different types of files - * for multiple iterations. - */ - private void createFolder(FileSystem fs, String root) throws Exception { - fs.mkdirs(new Path(root)); - for (int i = 0; i < this.iterations; i++) { - fs.mkdirs(new Path(root + "/" + i)); - fs.createNewFile(new Path(root + "/" + i + "/fileToRename")); - fs.createNewFile(new Path(root + "/" + i + "/file/to/rename")); - fs.createNewFile(new Path(root + "/" + i + "/file+to%rename")); - fs.createNewFile(new Path(root + "/fileToRename" + i)); - } - } - - /* - * Helper method to do rename operation and validate all files in source folder - * doesn't exists and similar files exists in new folder. - */ - private void validateRenameFolder(FileSystem fs, String source, String dest) throws Exception { - // Create source folder with files. - createFolder(fs, source); - Path sourceFolder = new Path(source); - Path destFolder = new Path(dest); - - // rename operation - assertTrue(fs.rename(sourceFolder, destFolder)); - assertTrue(fs.exists(destFolder)); - - for (int i = 0; i < this.iterations; i++) { - // Check destination folder and files exists. - assertTrue(fs.exists(new Path(dest + "/" + i))); - assertTrue(fs.exists(new Path(dest + "/" + i + "/fileToRename"))); - assertTrue(fs.exists(new Path(dest + "/" + i + "/file/to/rename"))); - assertTrue(fs.exists(new Path(dest + "/" + i + "/file+to%rename"))); - assertTrue(fs.exists(new Path(dest + "/fileToRename" + i))); - - // Check source folder and files doesn't exists. - assertFalse(fs.exists(new Path(source + "/" + i))); - assertFalse(fs.exists(new Path(source + "/" + i + "/fileToRename"))); - assertFalse(fs.exists(new Path(source + "/" + i + "/file/to/rename"))); - assertFalse(fs.exists(new Path(source + "/" + i + "/file+to%rename"))); - assertFalse(fs.exists(new Path(source + "/fileToRename" + i))); - } - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameSmallFolderWithThreads() throws Exception { - - validateRenameFolder(fs, "root", "rootnew"); - - // With single iteration, we would have created 7 blobs. - int expectedThreadsCreated = Math.min(7, renameThreads); - - // Validate from logs that threads are created. - String content = logs.getOutput(); - assertInLog(content, "ms with threads: " + expectedThreadsCreated); - - // Validate thread executions - for (int i = 0; i < expectedThreadsCreated; i++) { - assertInLog(content, - "AzureBlobRenameThread-" + Thread.currentThread().getName() + "-" + i); - } - - // Also ensure that we haven't spawned extra threads. - if (expectedThreadsCreated < renameThreads) { - for (int i = expectedThreadsCreated; i < renameThreads; i++) { - assertNotInLog(content, - "AzureBlobRenameThread-" + Thread.currentThread().getName() + "-" + i); - } - } - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameLargeFolderWithThreads() throws Exception { - - // Populate source folder with large number of files and directories. - this.iterations = 10; - validateRenameFolder(fs, "root", "rootnew"); - - // Validate from logs that threads are created. - String content = logs.getOutput(); - assertInLog(content, "ms with threads: " + renameThreads); - - // Validate thread executions - for (int i = 0; i < renameThreads; i++) { - assertInLog(content, - "AzureBlobRenameThread-" + Thread.currentThread().getName() + "-" + i); - } - } - - /* - * Test case for rename operation with threads disabled and flat listing enabled. - */ - @Test - public void testRenameLargeFolderDisableThreads() throws Exception { - Configuration conf = fs.getConf(); - - // Number of threads set to 0 or 1 disables threads. - conf.setInt(NativeAzureFileSystem.AZURE_RENAME_THREADS, 0); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - // Populate source folder with large number of files and directories. - this.iterations = 10; - validateRenameFolder(fs, "root", "rootnew"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, - "Disabling threads for Rename operation as thread count 0"); - - // Validate no thread executions - for (int i = 0; i < renameThreads; i++) { - String term = "AzureBlobRenameThread-" - + Thread.currentThread().getName() - + "-" + i; - assertNotInLog(content, term); - } - } - - /** - * Assert that a log contains the given term. - * @param content log output - * @param term search term - */ - protected void assertInLog(String content, String term) { - assertTrue(!content.isEmpty(), "Empty log"); - if (!content.contains(term)) { - String message = "No " + term + " found in logs"; - LOG.error(message); - System.err.println(content); - fail(message); - } - } - - /** - * Assert that a log does not contain the given term. - * @param content log output - * @param term search term - */ - protected void assertNotInLog(String content, String term) { - assertTrue(!content.isEmpty(), "Empty log"); - if (content.contains(term)) { - String message = term + " found in logs"; - LOG.error(message); - System.err.println(content); - fail(message); - } - } - - /* - * Test case for rename operation with threads and flat listing disabled. - */ - @Test - public void testRenameSmallFolderDisableThreadsDisableFlatListing() throws Exception { - Configuration conf = fs.getConf(); - conf = fs.getConf(); - - // Number of threads set to 0 or 1 disables threads. - conf.setInt(NativeAzureFileSystem.AZURE_RENAME_THREADS, 1); - conf.setBoolean(AzureNativeFileSystemStore.KEY_ENABLE_FLAT_LISTING, false); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - validateRenameFolder(fs, "root", "rootnew"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, - "Disabling threads for Rename operation as thread count 1"); - - // Validate no thread executions - for (int i = 0; i < renameThreads; i++) { - assertNotInLog(content, - "AzureBlobRenameThread-" + Thread.currentThread().getName() + "-" + i); - } - } - - /* - * Helper method to do delete operation and validate all files in source folder - * doesn't exists after delete operation. - */ - private void validateDeleteFolder(FileSystem fs, String source) throws Exception { - // Create folder with files. - createFolder(fs, "root"); - Path sourceFolder = new Path(source); - - // Delete operation - assertTrue(fs.delete(sourceFolder, true)); - assertFalse(fs.exists(sourceFolder)); - - for (int i = 0; i < this.iterations; i++) { - // check that source folder and files doesn't exists - assertFalse(fs.exists(new Path(source + "/" + i))); - assertFalse(fs.exists(new Path(source + "/" + i + "/fileToRename"))); - assertFalse(fs.exists(new Path(source + "/" + i + "/file/to/rename"))); - assertFalse(fs.exists(new Path(source + "/" + i + "/file+to%rename"))); - assertFalse(fs.exists(new Path(source + "/fileToRename" + i))); - } - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteSmallFolderWithThreads() throws Exception { - - validateDeleteFolder(fs, "root"); - - // With single iteration, we would have created 7 blobs. - int expectedThreadsCreated = Math.min(7, deleteThreads); - - // Validate from logs that threads are enabled. - String content = logs.getOutput(); - assertInLog(content, "ms with threads: " + expectedThreadsCreated); - - // Validate thread executions - for (int i = 0; i < expectedThreadsCreated; i++) { - assertInLog(content, - "AzureBlobDeleteThread-" + Thread.currentThread().getName() + "-" + i); - } - - // Also ensure that we haven't spawned extra threads. - if (expectedThreadsCreated < deleteThreads) { - for (int i = expectedThreadsCreated; i < deleteThreads; i++) { - assertNotInLog(content, - "AzureBlobDeleteThread-" + Thread.currentThread().getName() + "-" + i); - } - } - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteLargeFolderWithThreads() throws Exception { - // Populate source folder with large number of files and directories. - this.iterations = 10; - validateDeleteFolder(fs, "root"); - - // Validate from logs that threads are enabled. - String content = logs.getOutput(); - assertInLog(content, "ms with threads: " + deleteThreads); - - // Validate thread executions - for (int i = 0; i < deleteThreads; i++) { - assertInLog(content, - "AzureBlobDeleteThread-" + Thread.currentThread().getName() + "-" + i); - } - } - - /* - * Test case for delete operation with threads disabled and flat listing enabled. - */ - @Test - public void testDeleteLargeFolderDisableThreads() throws Exception { - Configuration conf = fs.getConf(); - conf.setInt(NativeAzureFileSystem.AZURE_DELETE_THREADS, 0); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - // Populate source folder with large number of files and directories. - this.iterations = 10; - validateDeleteFolder(fs, "root"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, - "Disabling threads for Delete operation as thread count 0"); - - // Validate no thread executions - for (int i = 0; i < deleteThreads; i++) { - assertNotInLog(content, - "AzureBlobDeleteThread-" + Thread.currentThread().getName() + "-" + i); - } - } - - /* - * Test case for rename operation with threads and flat listing disabled. - */ - @Test - public void testDeleteSmallFolderDisableThreadsDisableFlatListing() throws Exception { - Configuration conf = fs.getConf(); - - // Number of threads set to 0 or 1 disables threads. - conf.setInt(NativeAzureFileSystem.AZURE_DELETE_THREADS, 1); - conf.setBoolean(AzureNativeFileSystemStore.KEY_ENABLE_FLAT_LISTING, false); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - validateDeleteFolder(fs, "root"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, - "Disabling threads for Delete operation as thread count 1"); - - // Validate no thread executions - for (int i = 0; i < deleteThreads; i++) { - assertNotInLog(content, - "AzureBlobDeleteThread-" + Thread.currentThread().getName() + "-" + i); - } - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteThreadPoolExceptionFailure() throws Exception { - - // Spy azure file system object and raise exception for new thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenThrow(new Exception()); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)).thenReturn(mockThreadPoolExecutor); - - validateDeleteFolder(mockFs, "root"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, "Failed to create thread pool with threads"); - assertInLog(content, "Serializing the Delete operation"); - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteThreadPoolExecuteFailure() throws Exception { - - // Mock thread pool executor to throw exception for all requests. - ThreadPoolExecutor mockThreadExecutor = Mockito.mock(ThreadPoolExecutor.class); - Mockito.doThrow(new RejectedExecutionException()).when(mockThreadExecutor).execute(Mockito.any(Runnable.class)); - - // Spy azure file system object and return mocked thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenReturn(mockThreadExecutor); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)).thenReturn(mockThreadPoolExecutor); - - validateDeleteFolder(mockFs, "root"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, - "Rejected execution of thread for Delete operation on blob"); - assertInLog(content, "Serializing the Delete operation"); - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteThreadPoolExecuteSingleThreadFailure() throws Exception { - - // Spy azure file system object and return mocked thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - // Spy a thread pool executor and link it to azure file system object. - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)).thenReturn(mockThreadPoolExecutor); - - // Create a thread executor and link it to mocked thread pool executor object. - ThreadPoolExecutor mockThreadExecutor = Mockito.spy(mockThreadPoolExecutor.getThreadPool(7)); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenReturn(mockThreadExecutor); - - // Mock thread executor to throw exception for all requests. - Mockito.doCallRealMethod().doThrow(new RejectedExecutionException()).when(mockThreadExecutor).execute(Mockito.any(Runnable.class)); - - validateDeleteFolder(mockFs, "root"); - - // Validate from logs that threads are enabled and unused threads. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Delete operation with threads 7"); - assertInLog(content, - "6 threads not used for Delete operation on blob"); - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteThreadPoolTerminationFailure() throws Exception { - - // Spy azure file system object and return mocked thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - // Spy a thread pool executor and link it to azure file system object. - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - ((NativeAzureFileSystem) fs).getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)); - - // Create a thread executor and link it to mocked thread pool executor object. - // Mock thread executor to throw exception for terminating threads. - ThreadPoolExecutor mockThreadExecutor = Mockito.mock(ThreadPoolExecutor.class); - Mockito.doNothing().when(mockThreadExecutor).execute(Mockito.any(Runnable.class)); - Mockito.when(mockThreadExecutor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS)).thenThrow(new InterruptedException()); - - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenReturn(mockThreadExecutor); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(deleteThreads, "AzureBlobDeleteThread", "Delete", - path, NativeAzureFileSystem.AZURE_DELETE_THREADS)).thenReturn(mockThreadPoolExecutor); - - createFolder(mockFs, "root"); - Path sourceFolder = new Path("root"); - boolean exception = false; - try { - mockFs.delete(sourceFolder, true); - } catch (IOException e){ - exception = true; - } - - assertTrue(exception); - assertTrue(mockFs.exists(sourceFolder)); - - // Validate from logs that threads are enabled and delete operation is failed. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Delete operation with threads"); - assertInLog(content, "Threads got interrupted Delete blob operation"); - assertInLog(content, - "Delete failed as operation on subfolders and files failed."); - } - - /* - * Validate that when a directory is deleted recursively, the operation succeeds - * even if a child directory delete fails because the directory does not exist. - * This can happen if a child directory is deleted by an external agent while - * the parent is in progress of being deleted recursively. - */ - @Test - public void testRecursiveDirectoryDeleteWhenChildDirectoryDeleted() - throws Exception { - testRecusiveDirectoryDelete(true); - } - - /* - * Validate that when a directory is deleted recursively, the operation succeeds - * even if a file delete fails because it does not exist. - * This can happen if a file is deleted by an external agent while - * the parent directory is in progress of being deleted. - */ - @Test - public void testRecursiveDirectoryDeleteWhenDeletingChildFileReturnsFalse() - throws Exception { - testRecusiveDirectoryDelete(false); - } - - private void testRecusiveDirectoryDelete(boolean useDir) throws Exception { - String childPathToBeDeletedByExternalAgent = (useDir) - ? "root/0" - : "root/0/fileToRename"; - // Spy azure file system object and return false for deleting one file - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path( - childPathToBeDeletedByExternalAgent))); - - Answer answer = new Answer() { - public Boolean answer(InvocationOnMock invocation) throws Throwable { - String path = (String) invocation.getArguments()[0]; - boolean isDir = (boolean) invocation.getArguments()[1]; - boolean realResult = fs.deleteFile(path, isDir); - assertTrue(realResult); - boolean fakeResult = false; - return fakeResult; - } - }; - - Mockito.when(mockFs.deleteFile(path, useDir)).thenAnswer(answer); - - createFolder(mockFs, "root"); - Path sourceFolder = new Path("root"); - - assertTrue(mockFs.delete(sourceFolder, true)); - assertFalse(mockFs.exists(sourceFolder)); - - // Validate from logs that threads are enabled, that a child directory was - // deleted by an external caller, and the parent delete operation still - // succeeds. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Delete operation with threads"); - assertInLog(content, String.format("Attempt to delete non-existent %s %s", - useDir ? "directory" : "file", path)); - } - - /* - * Test case for delete operation with multiple threads and flat listing enabled. - */ - @Test - public void testDeleteSingleDeleteException() throws Exception { - - // Spy azure file system object and raise exception for deleting one file - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root/0"))); - Mockito.doThrow(new IOException()).when(mockFs).deleteFile(path, true); - - createFolder(mockFs, "root"); - Path sourceFolder = new Path("root"); - - boolean exception = false; - try { - mockFs.delete(sourceFolder, true); - } catch (IOException e){ - exception = true; - } - - assertTrue(exception); - assertTrue(mockFs.exists(sourceFolder)); - - // Validate from logs that threads are enabled and delete operation failed. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Delete operation with threads"); - assertInLog(content, - "Encountered Exception for Delete operation for file " + path); - assertInLog(content, - "Terminating execution of Delete operation now as some other thread already got exception or operation failed"); - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameThreadPoolExceptionFailure() throws Exception { - - // Spy azure file system object and raise exception for new thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - ((NativeAzureFileSystem) fs).getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenThrow(new Exception()); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.doReturn(mockThreadPoolExecutor).when(mockFs).getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS); - - validateRenameFolder(mockFs, "root", "rootnew"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, "Failed to create thread pool with threads"); - assertInLog(content, "Serializing the Rename operation"); - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameThreadPoolExecuteFailure() throws Exception { - - // Mock thread pool executor to throw exception for all requests. - ThreadPoolExecutor mockThreadExecutor = Mockito.mock(ThreadPoolExecutor.class); - Mockito.doThrow(new RejectedExecutionException()).when(mockThreadExecutor).execute(Mockito.any(Runnable.class)); - - // Spy azure file system object and return mocked thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - mockFs.getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenReturn(mockThreadExecutor); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)).thenReturn(mockThreadPoolExecutor); - - validateRenameFolder(mockFs, "root", "rootnew"); - - // Validate from logs that threads are disabled. - String content = logs.getOutput(); - assertInLog(content, - "Rejected execution of thread for Rename operation on blob"); - assertInLog(content, "Serializing the Rename operation"); - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameThreadPoolExecuteSingleThreadFailure() throws Exception { - - // Spy azure file system object and return mocked thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - // Spy a thread pool executor and link it to azure file system object. - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - mockFs.getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)).thenReturn(mockThreadPoolExecutor); - - // Create a thread executor and link it to mocked thread pool executor object. - ThreadPoolExecutor mockThreadExecutor = Mockito.spy(mockThreadPoolExecutor.getThreadPool(7)); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenReturn(mockThreadExecutor); - - // Mock thread executor to throw exception for all requests. - Mockito.doCallRealMethod().doThrow(new RejectedExecutionException()).when(mockThreadExecutor).execute(Mockito.any(Runnable.class)); - - validateRenameFolder(mockFs, "root", "rootnew"); - - // Validate from logs that threads are enabled and unused threads exists. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Rename operation with threads 7"); - assertInLog(content, - "6 threads not used for Rename operation on blob"); - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameThreadPoolTerminationFailure() throws Exception { - - // Spy azure file system object and return mocked thread pool - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - // Spy a thread pool executor and link it to azure file system object. - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root"))); - AzureFileSystemThreadPoolExecutor mockThreadPoolExecutor = Mockito.spy( - mockFs.getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)); - - // With single iteration, we would have created 7 blobs resulting 7 threads. - Mockito.when(mockFs.getThreadPoolExecutor(renameThreads, "AzureBlobRenameThread", "Rename", - path, NativeAzureFileSystem.AZURE_RENAME_THREADS)).thenReturn(mockThreadPoolExecutor); - - // Mock thread executor to throw exception for all requests. - ThreadPoolExecutor mockThreadExecutor = Mockito.mock(ThreadPoolExecutor.class); - Mockito.doNothing().when(mockThreadExecutor).execute(Mockito.any(Runnable.class)); - Mockito.when(mockThreadExecutor.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS)).thenThrow(new InterruptedException()); - Mockito.when(mockThreadPoolExecutor.getThreadPool(7)).thenReturn(mockThreadExecutor); - - - createFolder(mockFs, "root"); - Path sourceFolder = new Path("root"); - Path destFolder = new Path("rootnew"); - boolean exception = false; - try { - mockFs.rename(sourceFolder, destFolder); - } catch (IOException e){ - exception = true; - } - - assertTrue(exception); - assertTrue(mockFs.exists(sourceFolder)); - - // Validate from logs that threads are enabled and rename operation is failed. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Rename operation with threads"); - assertInLog(content, "Threads got interrupted Rename blob operation"); - assertInLog(content, - "Rename failed as operation on subfolders and files failed."); - } - - /* - * Test case for rename operation with multiple threads and flat listing enabled. - */ - @Test - public void testRenameSingleRenameException() throws Exception { - - // Spy azure file system object and raise exception for deleting one file - Path sourceFolder = new Path("root"); - Path destFolder = new Path("rootnew"); - - // Spy azure file system object and populate rename pending spy object. - NativeAzureFileSystem mockFs = Mockito.spy((NativeAzureFileSystem) fs); - - // Populate data now only such that rename pending spy object would see this data. - createFolder(mockFs, "root"); - - String srcKey = mockFs.pathToKey(mockFs.makeAbsolute(sourceFolder)); - String dstKey = mockFs.pathToKey(mockFs.makeAbsolute(destFolder)); - - FolderRenamePending mockRenameFs = Mockito.spy(mockFs.prepareAtomicFolderRename(srcKey, dstKey)); - Mockito.when(mockFs.prepareAtomicFolderRename(srcKey, dstKey)).thenReturn(mockRenameFs); - String path = mockFs.pathToKey(mockFs.makeAbsolute(new Path("root/0"))); - Mockito.doThrow(new IOException()).when(mockRenameFs).renameFile(Mockito.any(FileMetadata.class)); - - boolean exception = false; - try { - mockFs.rename(sourceFolder, destFolder); - } catch (IOException e){ - exception = true; - } - - assertTrue(exception); - assertTrue(mockFs.exists(sourceFolder)); - - // Validate from logs that threads are enabled and delete operation failed. - String content = logs.getOutput(); - assertInLog(content, - "Using thread pool for Rename operation with threads"); - assertInLog(content, - "Encountered Exception for Rename operation for file " + path); - assertInLog(content, - "Terminating execution of Rename operation now as some other thread already got exception or operation failed"); - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestListPerformance.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestListPerformance.java deleted file mode 100644 index 44d57d1b19f83..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestListPerformance.java +++ /dev/null @@ -1,198 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; - -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.LocatedFileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -import static org.assertj.core.api.Assumptions.assumeThat; - -/** - * Test list performance. - */ -@TestMethodOrder(MethodOrderer.Alphanumeric.class) -public class ITestListPerformance extends AbstractAzureScaleTest { - private static final Logger LOG = LoggerFactory.getLogger( - ITestListPerformance.class); - - private static final Path TEST_DIR_PATH = new Path( - "DirectoryWithManyFiles"); - - private static final int NUMBER_OF_THREADS = 10; - private static final int NUMBER_OF_FILES_PER_THREAD = 1000; - - private int threads; - - private int filesPerThread; - - private int expectedFileCount; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - Configuration conf = getConfiguration(); - // fail fast - threads = AzureTestUtils.getTestPropertyInt(conf, - "fs.azure.scale.test.list.performance.threads", NUMBER_OF_THREADS); - filesPerThread = AzureTestUtils.getTestPropertyInt(conf, - "fs.azure.scale.test.list.performance.files", NUMBER_OF_FILES_PER_THREAD); - expectedFileCount = threads * filesPerThread; - LOG.info("Thread = {}, Files per Thread = {}, expected files = {}", - threads, filesPerThread, expectedFileCount); - conf.set("fs.azure.io.retry.max.retries", "1"); - conf.set("fs.azure.delete.threads", "16"); - createTestAccount(); - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create( - "itestlistperformance", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - null, - true); - } - - @Test - public void test_0101_CreateDirectoryWithFiles() throws Exception { - assumeThat(fs.exists(TEST_DIR_PATH)).as("Test path exists; skipping").isFalse(); - - ExecutorService executorService = Executors.newFixedThreadPool(threads); - CloudBlobContainer container = testAccount.getRealContainer(); - - final String basePath = (fs.getWorkingDirectory().toUri().getPath() + "/" + TEST_DIR_PATH + "/").substring(1); - - ArrayList> tasks = new ArrayList<>(threads); - fs.mkdirs(TEST_DIR_PATH); - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - for (int i = 0; i < threads; i++) { - tasks.add( - new Callable() { - public Integer call() { - int written = 0; - for (int j = 0; j < filesPerThread; j++) { - String blobName = basePath + UUID.randomUUID().toString(); - try { - CloudBlockBlob blob = container.getBlockBlobReference( - blobName); - blob.uploadText(""); - written ++; - } catch (Exception e) { - LOG.error("Filed to write {}", blobName, e); - break; - } - } - LOG.info("Thread completed with {} files written", written); - return written; - } - } - ); - } - - List> futures = executorService.invokeAll(tasks, - getTestTimeoutMillis(), TimeUnit.MILLISECONDS); - long elapsedMs = timer.elapsedTimeMs(); - LOG.info("time to create files: {} millis", elapsedMs); - - for (Future future : futures) { - assertTrue(future.isDone(), "Future timed out"); - assertEquals(filesPerThread, future.get().intValue(), - "Future did not write all files timed out"); - } - } - - @Test - public void test_0200_ListStatusPerformance() throws Exception { - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - FileStatus[] fileList = fs.listStatus(TEST_DIR_PATH); - long elapsedMs = timer.elapsedTimeMs(); - LOG.info(String.format( - "files=%1$d, elapsedMs=%2$d", - fileList.length, - elapsedMs)); - Map foundInList =new HashMap<>(expectedFileCount); - - for (FileStatus fileStatus : fileList) { - foundInList.put(fileStatus.getPath(), fileStatus); - LOG.info("{}: {}", fileStatus.getPath(), - fileStatus.isDirectory() ? "dir" : "file"); - } - assertEquals(expectedFileCount, fileList.length, - "Mismatch between expected files and actual"); - - - // now do a listFiles() recursive - ContractTestUtils.NanoTimer initialStatusCallTimer - = new ContractTestUtils.NanoTimer(); - RemoteIterator listing - = fs.listFiles(TEST_DIR_PATH, true); - long initialListTime = initialStatusCallTimer.elapsedTimeMs(); - timer = new ContractTestUtils.NanoTimer(); - while (listing.hasNext()) { - FileStatus fileStatus = listing.next(); - Path path = fileStatus.getPath(); - FileStatus removed = foundInList.remove(path); - assertNotNull(removed, - "Did not find " + path + "{} in the previous listing"); - } - elapsedMs = timer.elapsedTimeMs(); - LOG.info("time for listFiles() initial call: {} millis;" - + " time to iterate: {} millis", initialListTime, elapsedMs); - assertEquals(0, foundInList.size(), - "Not all files from listStatus() were found in listFiles()"); - - } - - @Test - public void test_0300_BulkDeletePerformance() throws Exception { - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - fs.delete(TEST_DIR_PATH,true); - long elapsedMs = timer.elapsedTimeMs(); - LOG.info("time for delete(): {} millis; {} nanoS per file", - elapsedMs, timer.nanosPerOperation(expectedFileCount)); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthWithBlobSpecificKeys.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthWithBlobSpecificKeys.java deleted file mode 100644 index 0f3d1271641e6..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthWithBlobSpecificKeys.java +++ /dev/null @@ -1,40 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.conf.Configuration; - -import static org.apache.hadoop.fs.azure.SecureStorageInterfaceImpl.KEY_USE_CONTAINER_SASKEY_FOR_ALL_ACCESS; - -/** - * Test class to hold all WASB authorization tests that use blob-specific keys - * to access storage. - */ -public class ITestNativeAzureFSAuthWithBlobSpecificKeys - extends TestNativeAzureFileSystemAuthorization { - - - @Override - public Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - conf.set(KEY_USE_CONTAINER_SASKEY_FOR_ALL_ACCESS, "false"); - return conf; - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthorizationCaching.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthorizationCaching.java deleted file mode 100644 index 511f08f2ad179..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSAuthorizationCaching.java +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.conf.Configuration; -import org.junit.jupiter.api.Test; - -import static org.apache.hadoop.fs.azure.CachingAuthorizer.KEY_AUTH_SERVICE_CACHING_ENABLE; - -/** - * Test class to hold all WASB authorization caching related tests. - */ -public class ITestNativeAzureFSAuthorizationCaching - extends TestNativeAzureFileSystemAuthorization { - - private static final int DUMMY_TTL_VALUE = 5000; - - @Override - public Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - conf.set(KEY_AUTH_SERVICE_CACHING_ENABLE, "true"); - return conf; - } - - /** - * Test to verify cache behavior -- assert that PUT overwrites value if present - */ - @Test - public void testCachePut() throws Throwable { - CachingAuthorizer cache = new CachingAuthorizer<>(DUMMY_TTL_VALUE, "TEST"); - cache.init(createConfiguration()); - cache.put("TEST", 1); - cache.put("TEST", 3); - int result = cache.get("TEST"); - assertEquals(3, result, "Cache returned unexpected result"); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSPageBlobLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSPageBlobLive.java deleted file mode 100644 index a4d8729a6804e..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFSPageBlobLive.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.conf.Configuration; - -/** - * Run the base Azure file system tests strictly on page blobs to make sure fundamental - * operations on page blob files and folders work as expected. - * These operations include create, delete, rename, list, and so on. - */ -public class ITestNativeAzureFSPageBlobLive extends - NativeAzureFileSystemBaseTest { - - @Override - protected AzureBlobStorageTestAccount createTestAccount() - throws Exception { - Configuration conf = new Configuration(); - - // Configure the page blob directories key so every file created is a page blob. - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); - - // Configure the atomic rename directories key so every folder will have - // atomic rename applied. - conf.set(AzureNativeFileSystemStore.KEY_ATOMIC_RENAME_DIRECTORIES, "/"); - return AzureBlobStorageTestAccount.create(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAppend.java deleted file mode 100644 index d1ba65c5cb1fb..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAppend.java +++ /dev/null @@ -1,354 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; -import java.util.Arrays; - -import org.apache.commons.lang3.RandomStringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.test.GenericTestUtils; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * Test append operations. - */ -public class ITestNativeAzureFileSystemAppend extends AbstractWasbTestBase { - - private Path testPath; - - @Override - public Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - conf.setBoolean(NativeAzureFileSystem.APPEND_SUPPORT_ENABLE_PROPERTY_NAME, - true); - return conf; - } - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - testPath = methodPath(); - } - - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(createConfiguration()); - } - - /* - * Helper method that creates test data of size provided by the - * "size" parameter. - */ - private static byte[] getTestData(int size) { - byte[] testData = new byte[size]; - System.arraycopy(RandomStringUtils.randomAlphabetic(size).getBytes(), 0, testData, 0, size); - return testData; - } - - // Helper method to create file and write fileSize bytes of data on it. - private byte[] createBaseFileWithData(int fileSize, Path testPath) throws Throwable { - - try(FSDataOutputStream createStream = fs.create(testPath)) { - byte[] fileData = null; - - if (fileSize != 0) { - fileData = getTestData(fileSize); - createStream.write(fileData); - } - return fileData; - } - } - - /* - * Helper method to verify a file data equal to "dataLength" parameter - */ - private boolean verifyFileData(int dataLength, byte[] testData, int testDataIndex, - FSDataInputStream srcStream) { - - try { - - byte[] fileBuffer = new byte[dataLength]; - byte[] testDataBuffer = new byte[dataLength]; - - int fileBytesRead = srcStream.read(fileBuffer); - - if (fileBytesRead < dataLength) { - return false; - } - - System.arraycopy(testData, testDataIndex, testDataBuffer, 0, dataLength); - - if (!Arrays.equals(fileBuffer, testDataBuffer)) { - return false; - } - - return true; - - } catch (Exception ex) { - return false; - } - - } - - /* - * Helper method to verify Append on a testFile. - */ - private boolean verifyAppend(byte[] testData, Path testFile) { - - try(FSDataInputStream srcStream = fs.open(testFile)) { - - int baseBufferSize = 2048; - int testDataSize = testData.length; - int testDataIndex = 0; - - while (testDataSize > baseBufferSize) { - - if (!verifyFileData(baseBufferSize, testData, testDataIndex, srcStream)) { - return false; - } - testDataIndex += baseBufferSize; - testDataSize -= baseBufferSize; - } - - if (!verifyFileData(testDataSize, testData, testDataIndex, srcStream)) { - return false; - } - - return true; - } catch(Exception ex) { - return false; - } - } - - /* - * Test case to verify if an append on small size data works. This tests - * append E2E - */ - @Test - public void testSingleAppend() throws Throwable{ - - FSDataOutputStream appendStream = null; - try { - int baseDataSize = 50; - byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, testPath); - - int appendDataSize = 20; - byte[] appendDataBuffer = getTestData(appendDataSize); - appendStream = fs.append(testPath, 10); - appendStream.write(appendDataBuffer); - appendStream.close(); - byte[] testData = new byte[baseDataSize + appendDataSize]; - System.arraycopy(baseDataBuffer, 0, testData, 0, baseDataSize); - System.arraycopy(appendDataBuffer, 0, testData, baseDataSize, appendDataSize); - - assertTrue(verifyAppend(testData, testPath)); - } finally { - if (appendStream != null) { - appendStream.close(); - } - } - } - - /* - * Test case to verify append to an empty file. - */ - @Test - public void testSingleAppendOnEmptyFile() throws Throwable { - - FSDataOutputStream appendStream = null; - - try { - createBaseFileWithData(0, testPath); - - int appendDataSize = 20; - byte[] appendDataBuffer = getTestData(appendDataSize); - appendStream = fs.append(testPath, 10); - appendStream.write(appendDataBuffer); - appendStream.close(); - - assertTrue(verifyAppend(appendDataBuffer, testPath)); - } finally { - if (appendStream != null) { - appendStream.close(); - } - } - } - - /* - * Test to verify that we can open only one Append stream on a File. - */ - @Test - public void testSingleAppenderScenario() throws Throwable { - - FSDataOutputStream appendStream1 = null; - FSDataOutputStream appendStream2 = null; - IOException ioe = null; - try { - createBaseFileWithData(0, testPath); - appendStream1 = fs.append(testPath, 10); - boolean encounteredException = false; - try { - appendStream2 = fs.append(testPath, 10); - } catch(IOException ex) { - encounteredException = true; - ioe = ex; - } - - appendStream1.close(); - - assertTrue(encounteredException); - GenericTestUtils.assertExceptionContains("Unable to set Append lease on the Blob", ioe); - } finally { - if (appendStream1 != null) { - appendStream1.close(); - } - - if (appendStream2 != null) { - appendStream2.close(); - } - } - } - - /* - * Tests to verify multiple appends on a Blob. - */ - @Test - public void testMultipleAppends() throws Throwable { - - int baseDataSize = 50; - byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, testPath); - - int appendDataSize = 100; - int targetAppendCount = 50; - byte[] testData = new byte[baseDataSize + (appendDataSize*targetAppendCount)]; - int testDataIndex = 0; - System.arraycopy(baseDataBuffer, 0, testData, testDataIndex, baseDataSize); - testDataIndex += baseDataSize; - - int appendCount = 0; - - FSDataOutputStream appendStream = null; - - try { - while (appendCount < targetAppendCount) { - - byte[] appendDataBuffer = getTestData(appendDataSize); - appendStream = fs.append(testPath, 30); - appendStream.write(appendDataBuffer); - appendStream.close(); - - System.arraycopy(appendDataBuffer, 0, testData, testDataIndex, appendDataSize); - testDataIndex += appendDataSize; - appendCount++; - } - - assertTrue(verifyAppend(testData, testPath)); - - } finally { - if (appendStream != null) { - appendStream.close(); - } - } - } - - /* - * Test to verify we multiple appends on the same stream. - */ - @Test - public void testMultipleAppendsOnSameStream() throws Throwable { - - int baseDataSize = 50; - byte[] baseDataBuffer = createBaseFileWithData(baseDataSize, testPath); - int appendDataSize = 100; - int targetAppendCount = 50; - byte[] testData = new byte[baseDataSize + (appendDataSize*targetAppendCount)]; - int testDataIndex = 0; - System.arraycopy(baseDataBuffer, 0, testData, testDataIndex, baseDataSize); - testDataIndex += baseDataSize; - int appendCount = 0; - - FSDataOutputStream appendStream = null; - - try { - - while (appendCount < targetAppendCount) { - - appendStream = fs.append(testPath, 50); - - int singleAppendChunkSize = 20; - int appendRunSize = 0; - while (appendRunSize < appendDataSize) { - - byte[] appendDataBuffer = getTestData(singleAppendChunkSize); - appendStream.write(appendDataBuffer); - System.arraycopy(appendDataBuffer, 0, testData, - testDataIndex + appendRunSize, singleAppendChunkSize); - - appendRunSize += singleAppendChunkSize; - } - - appendStream.close(); - testDataIndex += appendDataSize; - appendCount++; - } - - assertTrue(verifyAppend(testData, testPath)); - } finally { - if (appendStream != null) { - appendStream.close(); - } - } - } - - @Test - /* - * Test to verify the behavior when Append Support configuration flag is set to false - */ - public void testFalseConfigurationFlagBehavior() throws Throwable { - assertThrows(UnsupportedOperationException.class, ()->{ - fs = testAccount.getFileSystem(); - Configuration conf = fs.getConf(); - conf.setBoolean(NativeAzureFileSystem.APPEND_SUPPORT_ENABLE_PROPERTY_NAME, false); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - FSDataOutputStream appendStream = null; - - try { - createBaseFileWithData(0, testPath); - appendStream = fs.append(testPath, 10); - } finally { - if (appendStream != null) { - appendStream.close(); - } - } - - }); - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAtomicRenameDirList.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAtomicRenameDirList.java deleted file mode 100644 index 75116944da450..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemAtomicRenameDirList.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; - -import org.apache.hadoop.conf.Configuration; - -import org.junit.jupiter.api.Test; - -/** - * Test atomic renaming. - */ -public class ITestNativeAzureFileSystemAtomicRenameDirList - extends AbstractWasbTestBase { - - // HBase-site config controlling HBase root dir - private static final String HBASE_ROOT_DIR_CONF_STRING = "hbase.rootdir"; - private static final String HBASE_ROOT_DIR_VALUE_ON_DIFFERENT_FS = - "wasb://somedifferentfilesystem.blob.core.windows.net/hbase"; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - @Test - public void testAtomicRenameKeyDoesntNPEOnInitializingWithNonDefaultURI() - throws IOException { - NativeAzureFileSystem azureFs = fs; - AzureNativeFileSystemStore azureStore = azureFs.getStore(); - Configuration conf = fs.getConf(); - conf.set(HBASE_ROOT_DIR_CONF_STRING, HBASE_ROOT_DIR_VALUE_ON_DIFFERENT_FS); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - azureStore.isAtomicRenameKey("anyrandomkey"); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemClientLogging.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemClientLogging.java deleted file mode 100644 index 7363373bc4bd0..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemClientLogging.java +++ /dev/null @@ -1,136 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.net.URI; -import java.util.StringTokenizer; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.test.GenericTestUtils.LogCapturer; - -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Test to validate Azure storage client side logging. Tests works only when - * testing with Live Azure storage because Emulator does not have support for - * client-side logging. - * - * Important: Do not attempt to move off commons-logging. - * The tests will fail. - */ -public class ITestNativeAzureFileSystemClientLogging - extends AbstractWasbTestBase { - - // Core-site config controlling Azure Storage Client logging - private static final String KEY_LOGGING_CONF_STRING = "fs.azure.storage.client.logging"; - - // Temporary directory created using WASB. - private static final String TEMP_DIR = "tempDir"; - - /* - * Helper method to verify the client logging is working. This check primarily - * checks to make sure we see a line in the logs corresponding to the entity - * that is created during test run. - */ - private boolean verifyStorageClientLogs(String capturedLogs, String entity) - throws Exception { - - URI uri = testAccount.getRealAccount().getBlobEndpoint(); - String container = testAccount.getRealContainer().getName(); - String validateString = uri + Path.SEPARATOR + container + Path.SEPARATOR - + entity; - boolean entityFound = false; - - StringTokenizer tokenizer = new StringTokenizer(capturedLogs, "\n"); - - while (tokenizer.hasMoreTokens()) { - String token = tokenizer.nextToken(); - if (token.contains(validateString)) { - entityFound = true; - break; - } - } - return entityFound; - } - - /* - * Helper method that updates the core-site config to enable/disable logging. - */ - private void updateFileSystemConfiguration(Boolean loggingFlag) - throws Exception { - - Configuration conf = fs.getConf(); - conf.set(KEY_LOGGING_CONF_STRING, loggingFlag.toString()); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - } - - // Using WASB code to communicate with Azure Storage. - private void performWASBOperations() throws Exception { - - Path tempDir = new Path(Path.SEPARATOR + TEMP_DIR); - fs.mkdirs(tempDir); - fs.delete(tempDir, true); - } - - @Test - public void testLoggingEnabled() throws Exception { - - LogCapturer logs = - LogCapturer.captureLogs(LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME)); - - // Update configuration based on the Test. - updateFileSystemConfiguration(true); - - performWASBOperations(); - - String output = getLogOutput(logs); - assertTrue(verifyStorageClientLogs(output, TEMP_DIR), - "Log entry " + TEMP_DIR + " not found in " + output); - } - - protected String getLogOutput(LogCapturer logs) { - String output = logs.getOutput(); - assertTrue(!output.isEmpty(), "No log created/captured"); - return output; - } - - @Test - public void testLoggingDisabled() throws Exception { - - LogCapturer logs = LogCapturer.captureLogs(LoggerFactory.getLogger(Logger.ROOT_LOGGER_NAME)); - - // Update configuration based on the Test. - updateFileSystemConfiguration(false); - - performWASBOperations(); - String output = getLogOutput(logs); - - assertFalse(verifyStorageClientLogs(output, TEMP_DIR), - "Log entry " + TEMP_DIR + " found in " + output); - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java deleted file mode 100644 index d8c15f4ee4829..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemConcurrencyLive.java +++ /dev/null @@ -1,241 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - - -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.Timeout; - -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; - -/*** - * Test class to hold all Live Azure storage concurrency tests. - */ -public class ITestNativeAzureFileSystemConcurrencyLive - extends AbstractWasbTestBase { - - private static final int THREAD_COUNT = 102; - private static final int TEST_EXECUTION_TIMEOUT = 30; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - /** - * Validate contract for FileSystem.create when overwrite is true and there - * are concurrent callers of FileSystem.delete. An existing file should be - * overwritten, even if the original destination exists but is deleted by an - * external agent during the create operation. - */ - @Test - @Timeout(TEST_EXECUTION_TIMEOUT) - public void testConcurrentCreateDeleteFile() throws Exception { - Path testFile = methodPath(); - - List tasks = new ArrayList<>(THREAD_COUNT); - - for (int i = 0; i < THREAD_COUNT; i++) { - tasks.add(new CreateFileTask(fs, testFile)); - } - - ExecutorService es = null; - - try { - es = Executors.newFixedThreadPool(THREAD_COUNT); - - List> futures = es.invokeAll(tasks); - - for (Future future : futures) { - assertTrue(future.isDone()); - - // we are using Callable, so if an exception - // occurred during the operation, it will be thrown - // when we call get - assertEquals(null, future.get()); - } - } finally { - if (es != null) { - es.shutdownNow(); - } - } - } - - /** - * Validate contract for FileSystem.delete when invoked concurrently. - * One of the threads should successfully delete the file and return true; - * all other threads should return false. - */ - @Test - @Timeout(TEST_EXECUTION_TIMEOUT) - public void testConcurrentDeleteFile() throws Exception { - Path testFile = new Path("test.dat"); - fs.create(testFile).close(); - - List tasks = new ArrayList<>(THREAD_COUNT); - - for (int i = 0; i < THREAD_COUNT; i++) { - tasks.add(new DeleteFileTask(fs, testFile)); - } - - ExecutorService es = null; - try { - es = Executors.newFixedThreadPool(THREAD_COUNT); - - List> futures = es.invokeAll(tasks); - - int successCount = 0; - for (Future future : futures) { - assertTrue(future.isDone()); - - // we are using Callable, so if an exception - // occurred during the operation, it will be thrown - // when we call get - Boolean success = future.get(); - if (success) { - successCount++; - } - } - - assertEquals(1, successCount, - "Exactly one delete operation should return true."); - } finally { - if (es != null) { - es.shutdownNow(); - } - } - } - - /** - * Validate the bug fix for HADOOP-17089. Please note that we were never - * able to reproduce this except during a Spark job that ran for multiple days - * and in a hacked-up azure-storage SDK that added sleep before and after - * the call to factory.setNamespaceAware(true) as shown in the description of - * - * @see https://github.com/Azure/azure-storage-java/pull/546 - */ - @Test - @Timeout(TEST_EXECUTION_TIMEOUT) - public void testConcurrentList() throws Exception { - final Path testDir = new Path("/tmp/data-loss/11230174258112/_temporary/0/_temporary/attempt_20200624190514_0006_m_0"); - final Path testFile = new Path(testDir, "part-00004-15ea87b1-312c-4fdf-1820-95afb3dfc1c3-a010.snappy.parquet"); - fs.create(testFile).close(); - List tasks = new ArrayList<>(THREAD_COUNT); - - for (int i = 0; i < THREAD_COUNT; i++) { - tasks.add(new ListTask(fs, testDir)); - } - - ExecutorService es = null; - try { - es = Executors.newFixedThreadPool(THREAD_COUNT); - - List> futures = es.invokeAll(tasks); - - for (Future future : futures) { - assertTrue(future.isDone()); - - // we are using Callable, so if an exception - // occurred during the operation, it will be thrown - // when we call get - long fileCount = future.get(); - assertEquals(1, fileCount, "The list should always contain 1 file."); - } - } finally { - if (es != null) { - es.shutdownNow(); - } - } - } - - abstract class FileSystemTask implements Callable { - private final FileSystem fileSystem; - private final Path path; - - FileSystem getFileSystem() { - return this.fileSystem; - } - - Path getFilePath() { - return this.path; - } - - FileSystemTask(FileSystem fs, Path p) { - this.fileSystem = fs; - this.path = p; - } - - public abstract V call() throws Exception; - } - - class DeleteFileTask extends FileSystemTask { - - DeleteFileTask(FileSystem fs, Path p) { - super(fs, p); - } - - @Override - public Boolean call() throws Exception { - return this.getFileSystem().delete(this.getFilePath(), false); - } - } - - class CreateFileTask extends FileSystemTask { - CreateFileTask(FileSystem fs, Path p) { - super(fs, p); - } - - public Void call() throws Exception { - FileSystem fs = getFileSystem(); - Path p = getFilePath(); - - // Create an empty file and close the stream. - FSDataOutputStream stream = fs.create(p, true); - stream.close(); - - // Delete the file. We don't care if delete returns true or false. - // We just want to ensure the file does not exist. - this.getFileSystem().delete(this.getFilePath(), false); - - return null; - } - } - - class ListTask extends FileSystemTask { - ListTask(FileSystem fs, Path p) { - super(fs, p); - } - - public Integer call() throws Exception { - FileSystem fs = getFileSystem(); - Path p = getFilePath(); - FileStatus[] files = fs.listStatus(p); - return files.length; - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractEmulator.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractEmulator.java deleted file mode 100644 index 2cc7592dc1f9b..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractEmulator.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.assertj.core.api.Assumptions.assumeThat; - -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.test.TestName; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** - * Run the {@code FileSystemContractBaseTest} tests against the emulator - */ -public class ITestNativeAzureFileSystemContractEmulator extends - FileSystemContractBaseTest { - private AzureBlobStorageTestAccount testAccount; - private Path basePath; - - @RegisterExtension - private TestName methodName = new TestName(); - - private void nameThread() { - Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); - } - - @BeforeEach - public void setUp() throws Exception { - nameThread(); - testAccount = AzureBlobStorageTestAccount.createForEmulator(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeThat(fs) - .as("FileSystem must not be null for this test") - .isNotNull(); - basePath = fs.makeQualified( - AzureTestUtils.createTestPath( - new Path("ITestNativeAzureFileSystemContractEmulator"))); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - testAccount = AzureTestUtils.cleanup(testAccount); - fs = null; - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractLive.java deleted file mode 100644 index 702b9c6375ce6..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractLive.java +++ /dev/null @@ -1,107 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.assertj.core.api.Assumptions.assumeThat; - -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.test.TestName; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** - * Run the {@link FileSystemContractBaseTest} test suite against azure storage. - */ -public class ITestNativeAzureFileSystemContractLive extends - FileSystemContractBaseTest { - private AzureBlobStorageTestAccount testAccount; - private Path basePath; - - @RegisterExtension - private TestName methodName = new TestName(); - - private void nameThread() { - Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); - } - - @BeforeEach - public void setUp() throws Exception { - nameThread(); - testAccount = AzureBlobStorageTestAccount.create(); - if (testAccount != null) { - fs = testAccount.getFileSystem(); - } - assumeThat(fs).isNotNull(); - basePath = fs.makeQualified( - AzureTestUtils.createTestPath( - new Path("NativeAzureFileSystemContractLive"))); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - testAccount = AzureTestUtils.cleanup(testAccount); - fs = null; - } - - @Override - public Path getTestBaseDir() { - return basePath; - } - - protected int getGlobalTimeout() { - return AzureTestConstants.AZURE_TEST_TIMEOUT; - } - - /** - * The following tests are failing on Azure and the Azure - * file system code needs to be modified to make them pass. - * A separate work item has been opened for this. - */ - @Disabled - @Test - public void testMoveFileUnderParent() throws Throwable { - } - - @Disabled - @Test - public void testRenameFileToSelf() throws Throwable { - } - - @Disabled - @Test - public void testRenameChildDirForbidden() throws Exception { - } - - @Disabled - @Test - public void testMoveDirUnderParent() throws Throwable { - } - - @Disabled - @Test - public void testRenameDirToSelf() throws Throwable { - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractPageBlobLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractPageBlobLive.java deleted file mode 100644 index 5f0e951142b7c..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemContractPageBlobLive.java +++ /dev/null @@ -1,115 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.assertj.core.api.Assumptions.assumeThat; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; - -import org.apache.hadoop.test.TestName; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.extension.RegisterExtension; - -/** - * Run the {@link FileSystemContractBaseTest} test suite against azure - * storage, after switching the FS using page blobs everywhere. - */ -public class ITestNativeAzureFileSystemContractPageBlobLive extends - FileSystemContractBaseTest { - private AzureBlobStorageTestAccount testAccount; - private Path basePath; - @RegisterExtension - private TestName methodName = new TestName(); - - private void nameThread() { - Thread.currentThread().setName("JUnit-" + methodName.getMethodName()); - } - - private AzureBlobStorageTestAccount createTestAccount() - throws Exception { - Configuration conf = new Configuration(); - - // Configure the page blob directories key so every file created is a page blob. - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); - - // Configure the atomic rename directories key so every folder will have - // atomic rename applied. - conf.set(AzureNativeFileSystemStore.KEY_ATOMIC_RENAME_DIRECTORIES, "/"); - return AzureBlobStorageTestAccount.create(conf); - } - - @BeforeEach - public void setUp() throws Exception { - testAccount = createTestAccount(); - assumeThat(testAccount).isNotNull(); - fs = testAccount.getFileSystem(); - basePath = AzureTestUtils.pathForTests(fs, "filesystemcontractpageblob"); - } - - @Override - public void tearDown() throws Exception { - testAccount = AzureTestUtils.cleanup(testAccount); - fs = null; - } - - protected int getGlobalTimeout() { - return AzureTestConstants.AZURE_TEST_TIMEOUT; - } - - @Override - public Path getTestBaseDir() { - return basePath; - } - - /** - * The following tests are failing on Azure and the Azure - * file system code needs to be modified to make them pass. - * A separate work item has been opened for this. - */ - @Disabled - @Test - public void testMoveFileUnderParent() throws Throwable { - } - - @Disabled - @Test - public void testRenameFileToSelf() throws Throwable { - } - - @Disabled - @Test - public void testRenameChildDirForbidden() throws Exception { - } - - @Disabled - @Test - public void testMoveDirUnderParent() throws Throwable { - } - - @Disabled - @Test - public void testRenameDirToSelf() throws Throwable { - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemLive.java deleted file mode 100644 index fd2ba05a35ac4..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeAzureFileSystemLive.java +++ /dev/null @@ -1,309 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; - -import org.junit.jupiter.api.Test; - -import com.microsoft.azure.storage.StorageException; - -/** - * Tests the Native Azure file system (WASB) against an actual blob store. - */ -public class ITestNativeAzureFileSystemLive extends - NativeAzureFileSystemBaseTest { - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - /** - * Tests the rename file operation to ensure that when there are multiple - * attempts to rename a file to the same destination, only one rename - * operation is successful (HADOOP-15086). - */ - @Test - public void testMultipleRenameFileOperationsToSameDestination() - throws IOException, InterruptedException { - final CountDownLatch latch = new CountDownLatch(1); - final AtomicInteger successfulRenameCount = new AtomicInteger(0); - final AtomicReference unexpectedError = new AtomicReference(); - final Path dest = path("dest"); - - // Run 10 threads to rename multiple files to the same target path - List threads = new ArrayList<>(); - - for (int i = 0; i < 10; i++) { - final int threadNumber = i; - Path src = path("test" + threadNumber); - threads.add(new SubjectInheritingThread(() -> { - try { - latch.await(Long.MAX_VALUE, TimeUnit.SECONDS); - } catch (InterruptedException e) { - } - try { - try (OutputStream output = fs.create(src)) { - output.write(("Source file number " + threadNumber).getBytes()); - } - - if (fs.rename(src, dest)) { - LOG.info("rename succeeded for thread " + threadNumber); - successfulRenameCount.incrementAndGet(); - } - } catch (IOException e) { - unexpectedError.compareAndSet(null, e); - ContractTestUtils.fail("Exception unexpected", e); - } - })); - } - - // Start each thread - threads.forEach(t -> t.start()); - - // Wait for threads to start and wait on latch - Thread.sleep(2000); - - // Now start to rename - latch.countDown(); - - // Wait for all threads to complete - threads.forEach(t -> { - try { - t.join(); - } catch (InterruptedException e) { - } - }); - - if (unexpectedError.get() != null) { - throw unexpectedError.get(); - } - assertEquals(1, successfulRenameCount.get()); - LOG.info("Success, only one rename operation succeeded!"); - } - - @Test - public void testLazyRenamePendingCanOverwriteExistingFile() - throws Exception { - final String srcFile = "srcFile"; - final String dstFile = "dstFile"; - Path srcPath = path(srcFile); - FSDataOutputStream srcStream = fs.create(srcPath); - assertTrue(fs.exists(srcPath)); - Path dstPath = path(dstFile); - FSDataOutputStream dstStream = fs.create(dstPath); - assertTrue(fs.exists(dstPath)); - NativeAzureFileSystem nfs = fs; - final String fullSrcKey = nfs.pathToKey(nfs.makeAbsolute(srcPath)); - final String fullDstKey = nfs.pathToKey(nfs.makeAbsolute(dstPath)); - nfs.getStoreInterface().rename(fullSrcKey, fullDstKey, true, null); - assertTrue(fs.exists(dstPath)); - assertFalse(fs.exists(srcPath)); - IOUtils.cleanupWithLogger(null, srcStream); - IOUtils.cleanupWithLogger(null, dstStream); - } - /** - * Tests fs.delete() function to delete a blob when another blob is holding a - * lease on it. Delete if called without a lease should fail if another process - * is holding a lease and throw appropriate exception - * This is a scenario that would happen in HMaster startup when it tries to - * clean up the temp dirs while the HMaster process which was killed earlier - * held lease on the blob when doing some DDL operation - */ - @Test - public void testDeleteThrowsExceptionWithLeaseExistsErrorMessage() - throws Exception { - LOG.info("Starting test"); - // Create the file - Path path = methodPath(); - fs.create(path); - assertPathExists("test file", path); - NativeAzureFileSystem nfs = fs; - final String fullKey = nfs.pathToKey(nfs.makeAbsolute(path)); - final AzureNativeFileSystemStore store = nfs.getStore(); - - // Acquire the lease on the file in a background thread - final CountDownLatch leaseAttemptComplete = new CountDownLatch(1); - final CountDownLatch beginningDeleteAttempt = new CountDownLatch(1); - SubjectInheritingThread t = new SubjectInheritingThread() { - @Override - public void work() { - // Acquire the lease and then signal the main test thread. - SelfRenewingLease lease = null; - try { - lease = store.acquireLease(fullKey); - LOG.info("Lease acquired: " + lease.getLeaseID()); - } catch (AzureException e) { - LOG.warn("Lease acqusition thread unable to acquire lease", e); - } finally { - leaseAttemptComplete.countDown(); - } - - // Wait for the main test thread to signal it will attempt the delete. - try { - beginningDeleteAttempt.await(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - - // Keep holding the lease past the lease acquisition retry interval, so - // the test covers the case of delete retrying to acquire the lease. - try { - Thread.sleep(SelfRenewingLease.LEASE_ACQUIRE_RETRY_INTERVAL * 3); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - - try { - if (lease != null){ - LOG.info("Freeing lease"); - lease.free(); - } - } catch (StorageException se) { - LOG.warn("Unable to free lease.", se); - } - } - }; - - // Start the background thread and wait for it to signal the lease is held. - t.start(); - try { - leaseAttemptComplete.await(); - } catch (InterruptedException ex) { - Thread.currentThread().interrupt(); - } - - // Try to delete the same file - beginningDeleteAttempt.countDown(); - store.delete(fullKey); - - // At this point file SHOULD BE DELETED - assertPathDoesNotExist("Leased path", path); - } - - /** - * Check that isPageBlobKey works as expected. This assumes that - * in the test configuration, the list of supported page blob directories - * only includes "pageBlobs". That's why this test is made specific - * to this subclass. - */ - @Test - public void testIsPageBlobKey() { - AzureNativeFileSystemStore store = fs.getStore(); - - // Use literal strings so it's easier to understand the tests. - // In case the constant changes, we want to know about it so we can update this test. - assertEquals(AzureBlobStorageTestAccount.DEFAULT_PAGE_BLOB_DIRECTORY, "pageBlobs"); - - // URI prefix for test environment. - String uriPrefix = "file:///"; - - // negative tests - String[] negativeKeys = { "", "/", "bar", "bar/", "bar/pageBlobs", "bar/pageBlobs/foo", - "bar/pageBlobs/foo/", "/pageBlobs/", "/pageBlobs", "pageBlobsxyz/" }; - for (String s : negativeKeys) { - assertFalse(store.isPageBlobKey(s)); - assertFalse(store.isPageBlobKey(uriPrefix + s)); - } - - // positive tests - String[] positiveKeys = { "pageBlobs/", "pageBlobs/foo/", "pageBlobs/foo/bar/" }; - for (String s : positiveKeys) { - assertTrue(store.isPageBlobKey(s)); - assertTrue(store.isPageBlobKey(uriPrefix + s)); - } - } - - /** - * Test that isAtomicRenameKey() works as expected. - */ - @Test - public void testIsAtomicRenameKey() { - - AzureNativeFileSystemStore store = fs.getStore(); - - // We want to know if the default configuration changes so we can fix - // this test. - assertEquals(AzureBlobStorageTestAccount.DEFAULT_ATOMIC_RENAME_DIRECTORIES, - "/atomicRenameDir1,/atomicRenameDir2"); - - // URI prefix for test environment. - String uriPrefix = "file:///"; - - // negative tests - String[] negativeKeys = { "", "/", "bar", "bar/", "bar/hbase", - "bar/hbase/foo", "bar/hbase/foo/", "/hbase/", "/hbase", - "hbasexyz/", "foo/atomicRenameDir1/"}; - for (String s : negativeKeys) { - assertFalse(store.isAtomicRenameKey(s)); - assertFalse(store.isAtomicRenameKey(uriPrefix + s)); - } - - // Positive tests. The directories for atomic rename are /hbase - // plus the ones in the configuration (DEFAULT_ATOMIC_RENAME_DIRECTORIES - // for this test). - String[] positiveKeys = { "hbase/", "hbase/foo/", "hbase/foo/bar/", - "atomicRenameDir1/foo/", "atomicRenameDir2/bar/"}; - for (String s : positiveKeys) { - assertTrue(store.isAtomicRenameKey(s)); - assertTrue(store.isAtomicRenameKey(uriPrefix + s)); - } - } - - /** - * Tests fs.mkdir() function to create a target blob while another thread - * is holding the lease on the blob. mkdir should not fail since the blob - * already exists. - * This is a scenario that would happen in HBase distributed log splitting. - * Multiple threads will try to create and update "recovered.edits" folder - * under the same path. - */ - @Test - public void testMkdirOnExistingFolderWithLease() throws Exception { - SelfRenewingLease lease; - // Create the folder - Path path = methodPath(); - fs.mkdirs(path); - NativeAzureFileSystem nfs = fs; - String fullKey = nfs.pathToKey(nfs.makeAbsolute(path)); - AzureNativeFileSystemStore store = nfs.getStore(); - // Acquire the lease on the folder - lease = store.acquireLease(fullKey); - assertNotNull(lease.getLeaseID() != null, "lease ID"); - // Try to create the same folder - store.storeEmptyFolder(fullKey, - nfs.createPermissionStatus(FsPermission.getDirDefault())); - lease.free(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeFileSystemStatistics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeFileSystemStatistics.java deleted file mode 100644 index 9248507b7055e..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestNativeFileSystemStatistics.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.cleanupTestAccount; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.readStringFromFile; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.writeStringToFile; - -@TestMethodOrder(MethodOrderer.Alphanumeric.class) -/** - * Because FileSystem.Statistics is per FileSystem, so statistics can not be ran in - * parallel, hence in this test file, force them to run in sequential. - */ -public class ITestNativeFileSystemStatistics extends AbstractWasbTestWithTimeout{ - - @Test - public void test_001_NativeAzureFileSystemMocked() throws Exception { - AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount.createMock(); - assumeNotNull(testAccount); - testStatisticsWithAccount(testAccount); - } - - @Test - public void test_002_NativeAzureFileSystemPageBlobLive() throws Exception { - Configuration conf = new Configuration(); - // Configure the page blob directories key so every file created is a page blob. - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); - - // Configure the atomic rename directories key so every folder will have - // atomic rename applied. - conf.set(AzureNativeFileSystemStore.KEY_ATOMIC_RENAME_DIRECTORIES, "/"); - AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount.create(conf); - assumeNotNull(testAccount); - testStatisticsWithAccount(testAccount); - } - - @Test - public void test_003_NativeAzureFileSystem() throws Exception { - AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount.create(); - assumeNotNull(testAccount); - testStatisticsWithAccount(testAccount); - } - - private void testStatisticsWithAccount(AzureBlobStorageTestAccount testAccount) throws Exception { - assumeNotNull(testAccount); - NativeAzureFileSystem fs = testAccount.getFileSystem(); - testStatistics(fs); - cleanupTestAccount(testAccount); - } - - /** - * When tests are ran in parallel, this tests will fail because - * FileSystem.Statistics is per FileSystem class. - */ - @SuppressWarnings("deprecation") - private void testStatistics(NativeAzureFileSystem fs) throws Exception { - FileSystem.clearStatistics(); - FileSystem.Statistics stats = FileSystem.getStatistics("wasb", - NativeAzureFileSystem.class); - assertEquals(0, stats.getBytesRead()); - assertEquals(0, stats.getBytesWritten()); - Path newFile = new Path("testStats"); - writeStringToFile(fs, newFile, "12345678"); - assertEquals(8, stats.getBytesWritten()); - assertEquals(0, stats.getBytesRead()); - String readBack = readStringFromFile(fs, newFile); - assertEquals("12345678", readBack); - assertEquals(8, stats.getBytesRead()); - assertEquals(8, stats.getBytesWritten()); - assertTrue(fs.delete(newFile, true)); - assertEquals(8, stats.getBytesRead()); - assertEquals(8, stats.getBytesWritten()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutOfBandAzureBlobOperationsLive.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutOfBandAzureBlobOperationsLive.java deleted file mode 100644 index 9b14da71f1dbc..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutOfBandAzureBlobOperationsLive.java +++ /dev/null @@ -1,185 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.security.UserGroupInformation; -import org.junit.jupiter.api.Test; - -import com.microsoft.azure.storage.blob.BlobOutputStream; -import com.microsoft.azure.storage.blob.CloudBlockBlob; - -/** - * Live blob operations. - */ -public class ITestOutOfBandAzureBlobOperationsLive extends AbstractWasbTestBase { - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - // scenario for this particular test described at MONARCH-HADOOP-764 - // creating a file out-of-band would confuse mkdirs("") - // eg oob creation of "user//testFolder/a/input/file" - // Then wasb creation of "user//testFolder/a/output" fails - @Test - public void outOfBandFolder_uncleMkdirs() throws Exception { - - // NOTE: manual use of CloubBlockBlob targets working directory explicitly. - // WASB driver methods prepend working directory implicitly. - String workingDir = "user/" - + UserGroupInformation.getCurrentUser().getShortUserName() + "/"; - - CloudBlockBlob blob = testAccount.getBlobReference(workingDir - + "testFolder1/a/input/file"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - assertTrue(fs.exists(new Path("testFolder1/a/input/file"))); - - Path targetFolder = new Path("testFolder1/a/output"); - assertTrue(fs.mkdirs(targetFolder)); - } - - // scenario for this particular test described at MONARCH-HADOOP-764 - @Test - public void outOfBandFolder_parentDelete() throws Exception { - - // NOTE: manual use of CloubBlockBlob targets working directory explicitly. - // WASB driver methods prepend working directory implicitly. - String workingDir = "user/" - + UserGroupInformation.getCurrentUser().getShortUserName() + "/"; - CloudBlockBlob blob = testAccount.getBlobReference(workingDir - + "testFolder2/a/input/file"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - assertTrue(fs.exists(new Path("testFolder2/a/input/file"))); - - Path targetFolder = new Path("testFolder2/a/input"); - assertTrue(fs.delete(targetFolder, true)); - } - - @Test - public void outOfBandFolder_rootFileDelete() throws Exception { - - CloudBlockBlob blob = testAccount.getBlobReference("fileY"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - assertTrue(fs.exists(new Path("/fileY"))); - assertTrue(fs.delete(new Path("/fileY"), true)); - } - - @Test - public void outOfBandFolder_firstLevelFolderDelete() throws Exception { - - CloudBlockBlob blob = testAccount.getBlobReference("folderW/file"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - assertTrue(fs.exists(new Path("/folderW"))); - assertTrue(fs.exists(new Path("/folderW/file"))); - assertTrue(fs.delete(new Path("/folderW"), true)); - } - - // scenario for this particular test described at MONARCH-HADOOP-764 - @Test - public void outOfBandFolder_siblingCreate() throws Exception { - - // NOTE: manual use of CloubBlockBlob targets working directory explicitly. - // WASB driver methods prepend working directory implicitly. - String workingDir = "user/" - + UserGroupInformation.getCurrentUser().getShortUserName() + "/"; - CloudBlockBlob blob = testAccount.getBlobReference(workingDir - + "testFolder3/a/input/file"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - assertTrue(fs.exists(new Path("testFolder3/a/input/file"))); - - Path targetFile = new Path("testFolder3/a/input/file2"); - FSDataOutputStream s2 = fs.create(targetFile); - s2.close(); - } - - // scenario for this particular test described at MONARCH-HADOOP-764 - // creating a new file in the root folder - @Test - public void outOfBandFolder_create_rootDir() throws Exception { - Path targetFile = new Path("/newInRoot"); - FSDataOutputStream s2 = fs.create(targetFile); - s2.close(); - } - - // scenario for this particular test described at MONARCH-HADOOP-764 - @Test - public void outOfBandFolder_rename() throws Exception { - - // NOTE: manual use of CloubBlockBlob targets working directory explicitly. - // WASB driver methods prepend working directory implicitly. - String workingDir = "user/" - + UserGroupInformation.getCurrentUser().getShortUserName() + "/"; - CloudBlockBlob blob = testAccount.getBlobReference(workingDir - + "testFolder4/a/input/file"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - - Path srcFilePath = new Path("testFolder4/a/input/file"); - assertTrue(fs.exists(srcFilePath)); - - Path destFilePath = new Path("testFolder4/a/input/file2"); - fs.rename(srcFilePath, destFilePath); - } - - // Verify that you can rename a file which is the only file in an implicit folder in the - // WASB file system. - // scenario for this particular test described at MONARCH-HADOOP-892 - @Test - public void outOfBandSingleFile_rename() throws Exception { - - //NOTE: manual use of CloubBlockBlob targets working directory explicitly. - // WASB driver methods prepend working directory implicitly. - String workingDir = "user/" + UserGroupInformation.getCurrentUser().getShortUserName() + "/"; - CloudBlockBlob blob = testAccount.getBlobReference(workingDir + "testFolder5/a/input/file"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - - Path srcFilePath = new Path("testFolder5/a/input/file"); - assertTrue(fs.exists(srcFilePath)); - - Path destFilePath = new Path("testFolder5/file2"); - fs.rename(srcFilePath, destFilePath); - } - - // WASB must force explicit parent directories in create, delete, mkdirs, rename. - // scenario for this particular test described at MONARCH-HADOOP-764 - @Test - public void outOfBandFolder_rename_rootLevelFiles() throws Exception { - - // NOTE: manual use of CloubBlockBlob targets working directory explicitly. - // WASB driver methods prepend working directory implicitly. - CloudBlockBlob blob = testAccount.getBlobReference("fileX"); - BlobOutputStream s = blob.openOutputStream(); - s.close(); - - Path srcFilePath = new Path("/fileX"); - assertTrue(fs.exists(srcFilePath)); - - Path destFilePath = new Path("/fileXrename"); - fs.rename(srcFilePath, destFilePath); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java deleted file mode 100644 index d7f968d71e7d2..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestOutputStreamSemantics.java +++ /dev/null @@ -1,424 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.Random; - -import com.microsoft.azure.storage.blob.BlockEntry; -import com.microsoft.azure.storage.blob.BlockListingFilter; -import com.microsoft.azure.storage.blob.CloudBlockBlob; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; - -import org.apache.hadoop.fs.StreamCapabilities; -import org.junit.jupiter.api.Test; - -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertHasStreamCapabilities; -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertLacksStreamCapabilities; -import static org.assertj.core.api.Assertions.assertThat; - -/** - * Test semantics of functions flush, hflush, hsync, and close for block blobs, - * block blobs with compaction, and page blobs. - */ -public class ITestOutputStreamSemantics extends AbstractWasbTestBase { - - private static final String PAGE_BLOB_DIR = "/pageblob"; - private static final String BLOCK_BLOB_DIR = "/blockblob"; - private static final String BLOCK_BLOB_COMPACTION_DIR = "/compaction"; - - private byte[] getRandomBytes() { - byte[] buffer = new byte[PageBlobFormatHelpers.PAGE_SIZE - - PageBlobFormatHelpers.PAGE_HEADER_SIZE]; - Random rand = new Random(); - rand.nextBytes(buffer); - return buffer; - } - - private Path getBlobPathWithTestName(String parentDir, String name) { - return new Path(parentDir + "/" + name); - } - - private void validate(Path path, byte[] writeBuffer, boolean isEqual) - throws IOException { - String blobPath = path.toUri().getPath(); - try (FSDataInputStream inputStream = fs.open(path)) { - byte[] readBuffer = new byte[PageBlobFormatHelpers.PAGE_SIZE - - PageBlobFormatHelpers.PAGE_HEADER_SIZE]; - int numBytesRead = inputStream.read(readBuffer, 0, readBuffer.length); - - if (isEqual) { - assertArrayEquals(writeBuffer, readBuffer, - String.format("Bytes read do not match bytes written to %1$s", blobPath)); - } else { - assertThat(readBuffer).isNotEqualTo(writeBuffer).as( - String.format("Bytes read unexpectedly match bytes written to %1$s", blobPath)); - } - } - } - - private boolean isBlockBlobAppendStreamWrapper(FSDataOutputStream stream) { - return - ((SyncableDataOutputStream) - ((NativeAzureFileSystem.NativeAzureFsOutputStream) - stream.getWrappedStream()) - .getOutStream()) - .getOutStream() - instanceof BlockBlobAppendStream; - } - - private boolean isPageBlobStreamWrapper(FSDataOutputStream stream) { - return - ((SyncableDataOutputStream) stream.getWrappedStream()) - .getOutStream() - instanceof PageBlobOutputStream; - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = new Configuration(); - - // Configure the page blob directories - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, PAGE_BLOB_DIR); - - // Configure the block blob with compaction directories - conf.set(AzureNativeFileSystemStore.KEY_BLOCK_BLOB_WITH_COMPACTION_DIRECTORIES, - BLOCK_BLOB_COMPACTION_DIR); - - return AzureBlobStorageTestAccount.create( - "", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - conf); - } - - // Verify flush writes data to storage for Page Blobs - @Test - public void testPageBlobFlush() throws IOException { - Path path = getBlobPathWithTestName(PAGE_BLOB_DIR, methodName.getMethodName()); - - try (FSDataOutputStream stream = fs.create(path)) { - byte[] buffer = getRandomBytes(); - stream.write(buffer); - stream.flush(); - - // flush is asynchronous for Page Blob, so we need to - // wait for it to complete - SyncableDataOutputStream syncStream = - (SyncableDataOutputStream) stream.getWrappedStream(); - PageBlobOutputStream pageBlobStream = - (PageBlobOutputStream)syncStream.getOutStream(); - pageBlobStream.waitForLastFlushCompletion(); - - validate(path, buffer, true); - } - } - - - // Verify hflush writes data to storage for Page Blobs - @Test - public void testPageBlobHFlush() throws IOException { - Path path = getBlobPathWithTestName(PAGE_BLOB_DIR, methodName.getMethodName()); - - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isPageBlobStreamWrapper(stream)); - byte[] buffer = getRandomBytes(); - stream.write(buffer); - stream.hflush(); - validate(path, buffer, true); - } - } - - // HSync must write data to storage for Page Blobs - @Test - public void testPageBlobHSync() throws IOException { - Path path = getBlobPathWithTestName(PAGE_BLOB_DIR, methodName.getMethodName()); - - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isPageBlobStreamWrapper(stream)); - byte[] buffer = getRandomBytes(); - stream.write(buffer); - stream.hsync(); - validate(path, buffer, true); - } - } - - // Close must write data to storage for Page Blobs - @Test - public void testPageBlobClose() throws IOException { - Path path = getBlobPathWithTestName(PAGE_BLOB_DIR, methodName.getMethodName()); - - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isPageBlobStreamWrapper(stream)); - byte[] buffer = getRandomBytes(); - stream.write(buffer); - stream.close(); - validate(path, buffer, true); - } - } - - // Page Blobs have StreamCapabilities.HFLUSH and StreamCapabilities.HSYNC. - @Test - public void testPageBlobCapabilities() throws IOException { - Path path = getBlobPathWithTestName(PAGE_BLOB_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - assertHasStreamCapabilities(stream, - StreamCapabilities.HFLUSH, - StreamCapabilities.HSYNC); - assertLacksStreamCapabilities(stream, - StreamCapabilities.DROPBEHIND, - StreamCapabilities.READAHEAD, - StreamCapabilities.UNBUFFER); - stream.write(getRandomBytes()); - } - } - - // Verify flush does not write data to storage for Block Blobs - @Test - public void testBlockBlobFlush() throws Exception { - Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR, methodName.getMethodName()); - byte[] buffer = getRandomBytes(); - - try (FSDataOutputStream stream = fs.create(path)) { - for (int i = 0; i < 10; i++) { - stream.write(buffer); - stream.flush(); - } - } - String blobPath = path.toUri().getPath(); - // Create a blob reference to read and validate the block list - CloudBlockBlob blob = testAccount.getBlobReference(blobPath.substring(1)); - // after the stream is closed, the block list should be non-empty - ArrayList blockList = blob.downloadBlockList( - BlockListingFilter.COMMITTED, - null,null, null); - assertEquals(1, blockList.size()); - } - - // Verify hflush does not write data to storage for Block Blobs - @Test - public void testBlockBlobHFlush() throws Exception { - Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR, methodName.getMethodName()); - byte[] buffer = getRandomBytes(); - - try (FSDataOutputStream stream = fs.create(path)) { - for (int i = 0; i < 10; i++) { - stream.write(buffer); - stream.hflush(); - } - } - String blobPath = path.toUri().getPath(); - // Create a blob reference to read and validate the block list - CloudBlockBlob blob = testAccount.getBlobReference(blobPath.substring(1)); - // after the stream is closed, the block list should be non-empty - ArrayList blockList = blob.downloadBlockList( - BlockListingFilter.COMMITTED, - null,null, null); - assertEquals(1, blockList.size()); - } - - // Verify hsync does not write data to storage for Block Blobs - @Test - public void testBlockBlobHSync() throws Exception { - Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR, methodName.getMethodName()); - byte[] buffer = getRandomBytes(); - - try (FSDataOutputStream stream = fs.create(path)) { - for (int i = 0; i < 10; i++) { - stream.write(buffer); - stream.hsync(); - } - } - String blobPath = path.toUri().getPath(); - // Create a blob reference to read and validate the block list - CloudBlockBlob blob = testAccount.getBlobReference(blobPath.substring(1)); - // after the stream is closed, the block list should be non-empty - ArrayList blockList = blob.downloadBlockList( - BlockListingFilter.COMMITTED, - null,null, null); - assertEquals(1, blockList.size()); - } - - // Close must write data to storage for Block Blobs - @Test - public void testBlockBlobClose() throws IOException { - Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR, methodName.getMethodName()); - - try (FSDataOutputStream stream = fs.create(path)) { - byte[] buffer = getRandomBytes(); - stream.write(buffer); - stream.close(); - validate(path, buffer, true); - } - } - - // Block Blobs do not have any StreamCapabilities. - @Test - public void testBlockBlobCapabilities() throws IOException { - Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - assertLacksStreamCapabilities(stream, - StreamCapabilities.HFLUSH, - StreamCapabilities.HSYNC, - StreamCapabilities.DROPBEHIND, - StreamCapabilities.READAHEAD, - StreamCapabilities.UNBUFFER); - stream.write(getRandomBytes()); - } - } - - // Verify flush writes data to storage for Block Blobs with compaction - @Test - public void testBlockBlobCompactionFlush() throws Exception { - Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR, methodName.getMethodName()); - byte[] buffer = getRandomBytes(); - - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isBlockBlobAppendStreamWrapper(stream)); - for (int i = 0; i < 10; i++) { - stream.write(buffer); - stream.flush(); - } - } - String blobPath = path.toUri().getPath(); - // Create a blob reference to read and validate the block list - CloudBlockBlob blob = testAccount.getBlobReference(blobPath.substring(1)); - // after the stream is closed, the block list should be non-empty - ArrayList blockList = blob.downloadBlockList( - BlockListingFilter.COMMITTED, - null,null, null); - assertEquals(1, blockList.size()); - } - - // Verify hflush writes data to storage for Block Blobs with Compaction - @Test - public void testBlockBlobCompactionHFlush() throws Exception { - Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR, methodName.getMethodName()); - byte[] buffer = getRandomBytes(); - - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isBlockBlobAppendStreamWrapper(stream)); - for (int i = 0; i < 10; i++) { - stream.write(buffer); - stream.hflush(); - } - } - String blobPath = path.toUri().getPath(); - // Create a blob reference to read and validate the block list - CloudBlockBlob blob = testAccount.getBlobReference(blobPath.substring(1)); - // after the stream is closed, the block list should be non-empty - ArrayList blockList = blob.downloadBlockList( - BlockListingFilter.COMMITTED, - null,null, null); - assertEquals(10, blockList.size()); - } - - // Verify hsync writes data to storage for Block Blobs with compaction - @Test - public void testBlockBlobCompactionHSync() throws Exception { - Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR, methodName.getMethodName()); - byte[] buffer = getRandomBytes(); - - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isBlockBlobAppendStreamWrapper(stream)); - for (int i = 0; i < 10; i++) { - stream.write(buffer); - stream.hsync(); - } - } - String blobPath = path.toUri().getPath(); - // Create a blob reference to read and validate the block list - CloudBlockBlob blob = testAccount.getBlobReference(blobPath.substring(1)); - // after the stream is closed, the block list should be non-empty - ArrayList blockList = blob.downloadBlockList( - BlockListingFilter.COMMITTED, - null,null, null); - assertEquals(10, blockList.size()); - } - - // Close must write data to storage for Block Blobs with compaction - @Test - public void testBlockBlobCompactionClose() throws IOException { - Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isBlockBlobAppendStreamWrapper(stream)); - byte[] buffer = getRandomBytes(); - stream.write(buffer); - stream.close(); - validate(path, buffer, true); - } - } - - // Block Blobs with Compaction have StreamCapabilities.HFLUSH and HSYNC. - @Test - public void testBlockBlobCompactionCapabilities() throws IOException { - Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - assertHasStreamCapabilities(stream, - StreamCapabilities.HFLUSH, - StreamCapabilities.HSYNC); - assertLacksStreamCapabilities(stream, - StreamCapabilities.DROPBEHIND, - StreamCapabilities.READAHEAD, - StreamCapabilities.UNBUFFER); - stream.write(getRandomBytes()); - } - } - - // A small write does not write data to storage for Page Blobs - @Test - public void testPageBlobSmallWrite() throws IOException { - Path path = getBlobPathWithTestName(PAGE_BLOB_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isPageBlobStreamWrapper(stream)); - byte[] buffer = getRandomBytes(); - stream.write(buffer); - validate(path, buffer, false); - } - } - - // A small write does not write data to storage for Block Blobs - @Test - public void testBlockBlobSmallWrite() throws IOException { - Path path = getBlobPathWithTestName(BLOCK_BLOB_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - byte[] buffer = getRandomBytes(); - stream.write(buffer); - validate(path, buffer, false); - } - } - - // A small write does not write data to storage for Block Blobs - // with Compaction - @Test - public void testBlockBlobCompactionSmallWrite() throws IOException { - Path path = getBlobPathWithTestName(BLOCK_BLOB_COMPACTION_DIR, methodName.getMethodName()); - try (FSDataOutputStream stream = fs.create(path)) { - assertTrue(isBlockBlobAppendStreamWrapper(stream)); - byte[] buffer = getRandomBytes(); - stream.write(buffer); - validate(path, buffer, false); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobInputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobInputStream.java deleted file mode 100644 index e74570b3775be..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobInputStream.java +++ /dev/null @@ -1,523 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.EOFException; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.EnumSet; -import java.util.concurrent.Callable; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; -import org.junit.jupiter.api.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -import static org.apache.hadoop.test.LambdaTestUtils.intercept; - -/** - * Test semantics of the page blob input stream - */ -@TestMethodOrder(MethodOrderer.Alphanumeric.class) -@Timeout(600) -public class ITestPageBlobInputStream extends AbstractWasbTestBase { - private static final Logger LOG = LoggerFactory.getLogger( - ITestPageBlobInputStream.class); - private static final int KILOBYTE = 1024; - private static final int MEGABYTE = KILOBYTE * KILOBYTE; - private static final int TEST_FILE_SIZE = 6 * MEGABYTE; - private static final Path TEST_FILE_PATH = new Path( - "TestPageBlobInputStream.txt"); - - private long testFileLength; - - private FileStatus testFileStatus; - private Path hugefile; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - createTestAccount(); - - hugefile = fs.makeQualified(TEST_FILE_PATH); - try { - testFileStatus = fs.getFileStatus(TEST_FILE_PATH); - testFileLength = testFileStatus.getLen(); - } catch (FileNotFoundException e) { - // file doesn't exist - testFileLength = 0; - } - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = new Configuration(); - - // Configure the page blob directories key so every file created is a page blob. - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); - - return AzureBlobStorageTestAccount.create( - "testpageblobinputstream", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - conf, - true); - } - - /** - * Create a test file by repeating the characters in the alphabet. - * @throws IOException - */ - private void createTestFileAndSetLength() throws IOException { - // To reduce test run time, the test file can be reused. - if (fs.exists(TEST_FILE_PATH)) { - testFileStatus = fs.getFileStatus(TEST_FILE_PATH); - testFileLength = testFileStatus.getLen(); - LOG.info("Reusing test file: {}", testFileStatus); - return; - } - - byte[] buffer = new byte[256]; - for (int i = 0; i < buffer.length; i++) { - buffer[i] = (byte) i; - } - - LOG.info("Creating test file {} of size: {}", TEST_FILE_PATH, - TEST_FILE_SIZE); - - try(FSDataOutputStream outputStream = fs.create(TEST_FILE_PATH)) { - int bytesWritten = 0; - while (bytesWritten < TEST_FILE_SIZE) { - outputStream.write(buffer); - bytesWritten += buffer.length; - } - LOG.info("Closing stream {}", outputStream); - outputStream.close(); - } - testFileLength = fs.getFileStatus(TEST_FILE_PATH).getLen(); - } - - void assumeHugeFileExists() throws IOException { - ContractTestUtils.assertPathExists(fs, "huge file not created", hugefile); - FileStatus status = fs.getFileStatus(hugefile); - ContractTestUtils.assertIsFile(hugefile, status); - assertTrue(status.getLen() > 0, "File " + hugefile + " is empty"); - } - - @Test - public void test_0100_CreateHugeFile() throws IOException { - createTestFileAndSetLength(); - } - - @Test - public void test_0200_BasicReadTest() throws Exception { - assumeHugeFileExists(); - - try ( - FSDataInputStream inputStream = fs.open(TEST_FILE_PATH); - ) { - byte[] buffer = new byte[3 * MEGABYTE]; - - // v1 forward seek and read a kilobyte into first kilobyte of buffer - long position = 5 * MEGABYTE; - inputStream.seek(position); - int numBytesRead = inputStream.read(buffer, 0, KILOBYTE); - assertEquals(KILOBYTE, numBytesRead); - - byte[] expected = new byte[3 * MEGABYTE]; - - for (int i = 0; i < KILOBYTE; i++) { - expected[i] = (byte) ((i + position) % 256); - } - - assertArrayEquals(expected, buffer); - - int len = MEGABYTE; - int offset = buffer.length - len; - - // v1 reverse seek and read a megabyte into last megabyte of buffer - position = 3 * MEGABYTE; - inputStream.seek(position); - numBytesRead = inputStream.read(buffer, offset, len); - assertEquals(len, numBytesRead); - - for (int i = offset; i < offset + len; i++) { - expected[i] = (byte) ((i + position) % 256); - } - - assertArrayEquals(expected, buffer); - } - } - - @Test - public void test_0201_RandomReadTest() throws Exception { - assumeHugeFileExists(); - - try ( - FSDataInputStream inputStream = fs.open(TEST_FILE_PATH); - ) { - final int bufferSize = 4 * KILOBYTE; - byte[] buffer = new byte[bufferSize]; - long position = 0; - - verifyConsistentReads(inputStream, buffer, position); - - inputStream.seek(0); - - verifyConsistentReads(inputStream, buffer, position); - - int seekPosition = 2 * KILOBYTE; - inputStream.seek(seekPosition); - position = seekPosition; - verifyConsistentReads(inputStream, buffer, position); - - inputStream.seek(0); - position = 0; - verifyConsistentReads(inputStream, buffer, position); - - seekPosition = 5 * KILOBYTE; - inputStream.seek(seekPosition); - position = seekPosition; - verifyConsistentReads(inputStream, buffer, position); - - seekPosition = 10 * KILOBYTE; - inputStream.seek(seekPosition); - position = seekPosition; - verifyConsistentReads(inputStream, buffer, position); - - seekPosition = 4100 * KILOBYTE; - inputStream.seek(seekPosition); - position = seekPosition; - verifyConsistentReads(inputStream, buffer, position); - - for (int i = 4 * 1024 * 1023; i < 5000; i++) { - seekPosition = i; - inputStream.seek(seekPosition); - position = seekPosition; - verifyConsistentReads(inputStream, buffer, position); - } - - inputStream.seek(0); - position = 0; - buffer = new byte[1]; - - for (int i = 0; i < 5000; i++) { - assertEquals(1, inputStream.skip(1)); - position++; - verifyConsistentReads(inputStream, buffer, position); - position++; - } - } - } - - private void verifyConsistentReads(FSDataInputStream inputStream, - byte[] buffer, - long position) throws IOException { - int size = buffer.length; - final int numBytesRead = inputStream.read(buffer, 0, size); - assertEquals(size, numBytesRead, "Bytes read from stream"); - - byte[] expected = new byte[size]; - for (int i = 0; i < expected.length; i++) { - expected[i] = (byte) ((position + i) % 256); - } - - assertArrayEquals(expected, buffer, "Mismatch"); - } - - /** - * Validates the implementation of InputStream.markSupported. - * @throws IOException - */ - @Test - public void test_0301_MarkSupported() throws IOException { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - assertTrue(inputStream.markSupported(), "mark is not supported"); - } - } - - /** - * Validates the implementation of InputStream.mark and reset - * for version 1 of the block blob input stream. - * @throws Exception - */ - @Test - public void test_0303_MarkAndResetV1() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - inputStream.mark(KILOBYTE - 1); - - byte[] buffer = new byte[KILOBYTE]; - int bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - - inputStream.reset(); - assertEquals(0, inputStream.getPos(), "rest -> pos 0"); - - inputStream.mark(8 * KILOBYTE - 1); - - buffer = new byte[8 * KILOBYTE]; - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - - intercept(IOException.class, - "Resetting to invalid mark", - new Callable() { - @Override - public FSDataInputStream call() throws Exception { - inputStream.reset(); - return inputStream; - } - } - ); - } - } - - /** - * Validates the implementation of Seekable.seekToNewSource, which should - * return false for version 1 of the block blob input stream. - * @throws IOException - */ - @Test - public void test_0305_SeekToNewSourceV1() throws IOException { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - assertFalse(inputStream.seekToNewSource(0)); - } - } - - /** - * Validates the implementation of InputStream.skip and ensures there is no - * network I/O for version 1 of the block blob input stream. - * @throws Exception - */ - @Test - public void test_0307_SkipBounds() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - long skipped = inputStream.skip(-1); - assertEquals(0, skipped); - - skipped = inputStream.skip(0); - assertEquals(0, skipped); - - assertTrue(testFileLength > 0); - - skipped = inputStream.skip(testFileLength); - assertEquals(testFileLength, skipped); - - intercept(EOFException.class, - new Callable() { - @Override - public Long call() throws Exception { - return inputStream.skip(1); - } - } - ); - } - } - - /** - * Validates the implementation of Seekable.seek and ensures there is no - * network I/O for forward seek. - * @throws Exception - */ - @Test - public void test_0309_SeekBounds() throws Exception { - assumeHugeFileExists(); - try ( - FSDataInputStream inputStream = fs.open(TEST_FILE_PATH); - ) { - inputStream.seek(0); - assertEquals(0, inputStream.getPos()); - - intercept(EOFException.class, - FSExceptionMessages.NEGATIVE_SEEK, - new Callable() { - @Override - public FSDataInputStream call() throws Exception { - inputStream.seek(-1); - return inputStream; - } - } - ); - - assertTrue(testFileLength > 0, "Test file length only " + testFileLength); - inputStream.seek(testFileLength); - assertEquals(testFileLength, inputStream.getPos()); - - intercept(EOFException.class, - FSExceptionMessages.CANNOT_SEEK_PAST_EOF, - new Callable() { - @Override - public FSDataInputStream call() throws Exception { - inputStream.seek(testFileLength + 1); - return inputStream; - } - } - ); - } - } - - /** - * Validates the implementation of Seekable.seek, Seekable.getPos, - * and InputStream.available. - * @throws Exception - */ - @Test - public void test_0311_SeekAndAvailableAndPosition() throws Exception { - assumeHugeFileExists(); - try (FSDataInputStream inputStream = fs.open(TEST_FILE_PATH)) { - byte[] expected1 = {0, 1, 2}; - byte[] expected2 = {3, 4, 5}; - byte[] expected3 = {1, 2, 3}; - byte[] expected4 = {6, 7, 8}; - byte[] buffer = new byte[3]; - - int bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected1, buffer); - assertEquals(buffer.length, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected2, buffer); - assertEquals(2 * buffer.length, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // reverse seek - int seekPos = 0; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected1, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // reverse seek - seekPos = 1; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected3, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // forward seek - seekPos = 6; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected4, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - } - } - - /** - * Validates the implementation of InputStream.skip, Seekable.getPos, - * and InputStream.available. - * @throws IOException - */ - @Test - public void test_0313_SkipAndAvailableAndPosition() throws IOException { - assumeHugeFileExists(); - try ( - FSDataInputStream inputStream = fs.open(TEST_FILE_PATH); - ) { - byte[] expected1 = {0, 1, 2}; - byte[] expected2 = {3, 4, 5}; - byte[] expected3 = {1, 2, 3}; - byte[] expected4 = {6, 7, 8}; - assertEquals(testFileLength, inputStream.available()); - assertEquals(0, inputStream.getPos()); - - int n = 3; - long skipped = inputStream.skip(n); - - assertEquals(skipped, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - assertEquals(skipped, n); - - byte[] buffer = new byte[3]; - int bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected2, buffer); - assertEquals(buffer.length + skipped, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - // does skip still work after seek? - int seekPos = 1; - inputStream.seek(seekPos); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected3, buffer); - assertEquals(buffer.length + seekPos, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - - long currentPosition = inputStream.getPos(); - n = 2; - skipped = inputStream.skip(n); - - assertEquals(currentPosition + skipped, inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - assertEquals(skipped, n); - - bytesRead = inputStream.read(buffer); - assertEquals(buffer.length, bytesRead); - assertArrayEquals(expected4, buffer); - assertEquals(buffer.length + skipped + currentPosition, - inputStream.getPos()); - assertEquals(testFileLength - inputStream.getPos(), - inputStream.available()); - } - } - - @Test - public void test_999_DeleteHugeFiles() throws IOException { - fs.delete(TEST_FILE_PATH, false); - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobOutputStream.java deleted file mode 100644 index b99f891f7ac34..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestPageBlobOutputStream.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.util.EnumSet; - -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.test.LambdaTestUtils; - -public class ITestPageBlobOutputStream extends AbstractWasbTestBase { - - private static final Path TEST_FILE_PATH = new Path( - "TestPageBlobOutputStream.txt"); - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = new Configuration(); - // Configure the page blob directories key so every file created is a page - // blob. - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, "/"); - return AzureBlobStorageTestAccount.create("testpagebloboutputstream", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - conf, true); - } - - @Test - public void testHflush() throws Exception { - Path path = fs.makeQualified(TEST_FILE_PATH); - FSDataOutputStream os = fs.create(path); - os.write(1); - os.hflush(); - // Delete the blob so that Azure call will fail. - fs.delete(path, false); - os.write(2); - LambdaTestUtils.intercept(IOException.class, - "The specified blob does not exist", () -> { - os.hflush(); - }); - LambdaTestUtils.intercept(IOException.class, - "The specified blob does not exist", () -> { - os.close(); - }); - } - - @Test - public void testHsync() throws Exception { - Path path = fs.makeQualified(TEST_FILE_PATH); - FSDataOutputStream os = fs.create(path); - os.write(1); - os.hsync(); - // Delete the blob so that Azure call will fail. - fs.delete(path, false); - os.write(2); - LambdaTestUtils.intercept(IOException.class, - "The specified blob does not exist", () -> { - os.hsync(); - }); - LambdaTestUtils.intercept(IOException.class, - "The specified blob does not exist", () -> { - os.close(); - }); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestReadAndSeekPageBlobAfterWrite.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestReadAndSeekPageBlobAfterWrite.java deleted file mode 100644 index 7197f07c94565..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestReadAndSeekPageBlobAfterWrite.java +++ /dev/null @@ -1,342 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.Random; - -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest; -import org.apache.hadoop.util.Time; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils .*; - -/** - * Write data into a page blob and verify you can read back all of it - * or just a part of it. - */ -public class ITestReadAndSeekPageBlobAfterWrite extends AbstractAzureScaleTest { - private static final Logger LOG = - LoggerFactory.getLogger(ITestReadAndSeekPageBlobAfterWrite.class); - - private FileSystem fs; - private byte[] randomData; - - // Page blob physical page size - private static final int PAGE_SIZE = PageBlobFormatHelpers.PAGE_SIZE; - - // Size of data on page (excluding header) - private static final int PAGE_DATA_SIZE = PAGE_SIZE - PageBlobFormatHelpers.PAGE_HEADER_SIZE; - private static final int MAX_BYTES = 33554432; // maximum bytes in a file that we'll test - private static final int MAX_PAGES = MAX_BYTES / PAGE_SIZE; // maximum number of pages we'll test - private Random rand = new Random(); - - // A key with a prefix under /pageBlobs, which for the test file system will - // force use of a page blob. - private static final String KEY = "/pageBlobs/file.dat"; - - // path of page blob file to read and write - private Path blobPath; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - fs = getTestAccount().getFileSystem(); - // Make sure we are using an integral number of pages. - assertEquals(0, MAX_BYTES % PAGE_SIZE); - - // load an in-memory array of random data - randomData = new byte[PAGE_SIZE * MAX_PAGES]; - rand.nextBytes(randomData); - - blobPath = blobPath("ITestReadAndSeekPageBlobAfterWrite"); - } - - @Override - public void tearDown() throws Exception { - deleteQuietly(fs, blobPath, true); - super.tearDown(); - } - - /** - * Make sure the file name (key) is a page blob file name. If anybody changes that, - * we need to come back and update this test class. - */ - @Test - public void testIsPageBlobFileName() { - AzureNativeFileSystemStore store = ((NativeAzureFileSystem) fs).getStore(); - String[] a = blobPath.toUri().getPath().split("/"); - String key2 = a[1] + "/"; - assertTrue(store.isPageBlobKey(key2), "Not a page blob: " + blobPath); - } - - /** - * For a set of different file sizes, write some random data to a page blob, - * read it back, and compare that what was read is the same as what was written. - */ - @Test - public void testReadAfterWriteRandomData() throws IOException { - - // local shorthand - final int pds = PAGE_DATA_SIZE; - - // Test for sizes at and near page boundaries - int[] dataSizes = { - - // on first page - 0, 1, 2, 3, - - // Near first physical page boundary (because the implementation - // stores PDS + the page header size bytes on each page). - pds - 1, pds, pds + 1, pds + 2, pds + 3, - - // near second physical page boundary - (2 * pds) - 1, (2 * pds), (2 * pds) + 1, (2 * pds) + 2, (2 * pds) + 3, - - // near tenth physical page boundary - (10 * pds) - 1, (10 * pds), (10 * pds) + 1, (10 * pds) + 2, (10 * pds) + 3, - - // test one big size, >> 4MB (an internal buffer size in the code) - MAX_BYTES - }; - - for (int i : dataSizes) { - testReadAfterWriteRandomData(i); - } - } - - private void testReadAfterWriteRandomData(int size) throws IOException { - writeRandomData(size); - readRandomDataAndVerify(size); - } - - /** - * Read "size" bytes of data and verify that what was read and what was written - * are the same. - */ - private void readRandomDataAndVerify(int size) throws AzureException, IOException { - byte[] b = new byte[size]; - FSDataInputStream stream = fs.open(blobPath); - int bytesRead = stream.read(b); - stream.close(); - assertEquals(bytesRead, size); - - // compare the data read to the data written - assertTrue(comparePrefix(randomData, b, size)); - } - - // return true if the beginning "size" values of the arrays are the same - private boolean comparePrefix(byte[] a, byte[] b, int size) { - if (a.length < size || b.length < size) { - return false; - } - for (int i = 0; i < size; i++) { - if (a[i] != b[i]) { - return false; - } - } - return true; - } - - // Write a specified amount of random data to the file path for this test class. - private void writeRandomData(int size) throws IOException { - OutputStream output = fs.create(blobPath); - output.write(randomData, 0, size); - output.close(); - } - - /** - * Write data to a page blob, open it, seek, and then read a range of data. - * Then compare that the data read from that range is the same as the data originally written. - */ - @Test - public void testPageBlobSeekAndReadAfterWrite() throws IOException { - writeRandomData(PAGE_SIZE * MAX_PAGES); - int recordSize = 100; - byte[] b = new byte[recordSize]; - - - try(FSDataInputStream stream = fs.open(blobPath)) { - // Seek to a boundary around the middle of the 6th page - int seekPosition = 5 * PAGE_SIZE + 250; - stream.seek(seekPosition); - - // Read a record's worth of bytes and verify results - int bytesRead = stream.read(b); - verifyReadRandomData(b, bytesRead, seekPosition, recordSize); - - // Seek to another spot and read a record greater than a page - seekPosition = 10 * PAGE_SIZE + 250; - stream.seek(seekPosition); - recordSize = 1000; - b = new byte[recordSize]; - bytesRead = stream.read(b); - verifyReadRandomData(b, bytesRead, seekPosition, recordSize); - - // Read the last 100 bytes of the file - recordSize = 100; - seekPosition = PAGE_SIZE * MAX_PAGES - recordSize; - stream.seek(seekPosition); - b = new byte[recordSize]; - bytesRead = stream.read(b); - verifyReadRandomData(b, bytesRead, seekPosition, recordSize); - - // Read past the end of the file and we should get only partial data. - recordSize = 100; - seekPosition = PAGE_SIZE * MAX_PAGES - recordSize + 50; - stream.seek(seekPosition); - b = new byte[recordSize]; - bytesRead = stream.read(b); - assertEquals(50, bytesRead); - - // compare last 50 bytes written with those read - byte[] tail = Arrays.copyOfRange(randomData, seekPosition, randomData.length); - assertTrue(comparePrefix(tail, b, 50)); - } - } - - // Verify that reading a record of data after seeking gives the expected data. - private void verifyReadRandomData(byte[] b, int bytesRead, int seekPosition, int recordSize) { - byte[] originalRecordData = - Arrays.copyOfRange(randomData, seekPosition, seekPosition + recordSize + 1); - assertEquals(recordSize, bytesRead); - assertTrue(comparePrefix(originalRecordData, b, recordSize)); - } - - // Test many small flushed writes interspersed with periodic hflush calls. - // For manual testing, increase NUM_WRITES to a large number. - // The goal for a long-running manual test is to make sure that it finishes - // and the close() call does not time out. It also facilitates debugging into - // hflush/hsync. - @Test - public void testManySmallWritesWithHFlush() throws IOException { - writeAndReadOneFile(50, 100, 20); - } - - /** - * Write a total of numWrites * recordLength data to a file, read it back, - * and check to make sure what was read is the same as what was written. - * The syncInterval is the number of writes after which to call hflush to - * force the data to storage. - */ - private void writeAndReadOneFile(int numWrites, - int recordLength, int syncInterval) throws IOException { - - // A lower bound on the minimum time we think it will take to do - // a write to Azure storage. - final long MINIMUM_EXPECTED_TIME = 20; - LOG.info("Writing " + numWrites * recordLength + " bytes to " + blobPath.getName()); - FSDataOutputStream output = fs.create(blobPath); - int writesSinceHFlush = 0; - try { - - // Do a flush and hflush to exercise case for empty write queue in PageBlobOutputStream, - // to test concurrent execution gates. - output.flush(); - output.hflush(); - for (int i = 0; i < numWrites; i++) { - output.write(randomData, i * recordLength, recordLength); - writesSinceHFlush++; - output.flush(); - if ((i % syncInterval) == 0) { - output.hflush(); - writesSinceHFlush = 0; - } - } - } finally { - long start = Time.monotonicNow(); - output.close(); - long end = Time.monotonicNow(); - LOG.debug("close duration = " + (end - start) + " msec."); - if (writesSinceHFlush > 0) { - assertTrue(end - start >= MINIMUM_EXPECTED_TIME, String.format( - "close duration with >= 1 pending write is %d, less than minimum expected of %d", - end - start, MINIMUM_EXPECTED_TIME)); - } - } - - // Read the data back and check it. - FSDataInputStream stream = fs.open(blobPath); - int SIZE = numWrites * recordLength; - byte[] b = new byte[SIZE]; - try { - stream.seek(0); - stream.read(b, 0, SIZE); - verifyReadRandomData(b, SIZE, 0, SIZE); - } finally { - stream.close(); - } - - // delete the file - fs.delete(blobPath, false); - } - - // Test writing to a large file repeatedly as a stress test. - // Set the repetitions to a larger number for manual testing - // for a longer stress run. - @Test - public void testLargeFileStress() throws IOException { - int numWrites = 32; - int recordSize = 1024 * 1024; - int syncInterval = 10; - int repetitions = 1; - for (int i = 0; i < repetitions; i++) { - writeAndReadOneFile(numWrites, recordSize, syncInterval); - } - } - - // Write to a file repeatedly to verify that it extends. - // The page blob file should start out at 128MB and finish at 256MB. - public void testFileSizeExtension() throws IOException { - final int writeSize = 1024 * 1024; - final int numWrites = 129; - final byte dataByte = 5; - byte[] data = new byte[writeSize]; - Arrays.fill(data, dataByte); - try (FSDataOutputStream output = fs.create(blobPath)) { - for (int i = 0; i < numWrites; i++) { - output.write(data); - output.hflush(); - LOG.debug("total writes = " + (i + 1)); - } - } - - // Show that we wrote more than the default page blob file size. - assertTrue(numWrites * writeSize > PageBlobOutputStream.PAGE_BLOB_MIN_SIZE); - - // Verify we can list the new size. That will prove we expanded the file. - FileStatus[] status = fs.listStatus(blobPath); - assertEquals(numWrites * writeSize, status[0].getLen(), - "File size hasn't changed " + status); - LOG.debug("Total bytes written to " + blobPath + " = " + status[0].getLen()); - fs.delete(blobPath, false); - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbRemoteCallHelper.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbRemoteCallHelper.java deleted file mode 100644 index f1e8fdd181399..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbRemoteCallHelper.java +++ /dev/null @@ -1,517 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.io.retry.RetryUtils; -import org.apache.http.Header; -import org.apache.http.HttpResponse; -import org.apache.http.HttpEntity; -import org.apache.http.HttpStatus; -import org.apache.http.StatusLine; -import org.apache.http.ProtocolVersion; -import org.apache.http.ParseException; -import org.apache.http.HeaderElement; -import org.apache.http.client.HttpClient; -import org.apache.http.client.methods.HttpGet; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentMatcher; -import org.mockito.Mockito; - -import java.io.ByteArrayInputStream; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.nio.charset.StandardCharsets; - -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_USE_SECURE_MODE; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Mockito.atLeast; -import static org.mockito.Mockito.times; - -import static org.assertj.core.api.Assumptions.assumeThat; - -/** - * Test class to hold all WasbRemoteCallHelper tests. - */ -public class ITestWasbRemoteCallHelper - extends AbstractWasbTestBase { - public static final String EMPTY_STRING = ""; - private static final int INVALID_HTTP_STATUS_CODE_999 = 999; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - Configuration conf = new Configuration(); - conf.set(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, "true"); - conf.set(RemoteWasbAuthorizerImpl.KEY_REMOTE_AUTH_SERVICE_URLS, "http://localhost1/,http://localhost2/,http://localhost:8080"); - return AzureBlobStorageTestAccount.create(conf); - } - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - boolean useSecureMode = fs.getConf().getBoolean(KEY_USE_SECURE_MODE, false); - boolean useAuthorization = fs.getConf() - .getBoolean(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, false); - assumeThat(useSecureMode && useAuthorization) - .as("Test valid when both SecureMode and Authorization are enabled .. skipping") - .isTrue(); - } - - /** - * Test invalid status-code. - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testInvalidStatusCode() throws Throwable { - - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpClient.execute(Mockito.any())) - .thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()) - .thenReturn(newStatusLine(INVALID_HTTP_STATUS_CODE_999)); - // finished setting up mocks - - performop(mockHttpClient); - }); - } - - /** - * Test invalid Content-Type. - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testInvalidContentType() throws Throwable { - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "text/plain")); - // finished setting up mocks - performop(mockHttpClient); - }); - } - - /** - * Test missing Content-Length. - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testMissingContentLength() throws Throwable { - - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - // finished setting up mocks - - performop(mockHttpClient); - }); - } - - /** - * Test Content-Length exceeds max. - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testContentLengthExceedsMax() throws Throwable { - - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "2048")); - // finished setting up mocks - performop(mockHttpClient); - }); - } - - /** - * Test invalid Content-Length value - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testInvalidContentLengthValue() throws Throwable { - - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "20abc48")); - // finished setting up mocks - - performop(mockHttpClient); - }); - } - - /** - * Test valid JSON response. - * @throws Throwable - */ - @Test - public void testValidJSONResponse() throws Throwable { - - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - HttpEntity mockHttpEntity = Mockito.mock(HttpEntity.class); - - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponse.getEntity()).thenReturn(mockHttpEntity); - Mockito.when(mockHttpEntity.getContent()) - .thenReturn(new ByteArrayInputStream(validJsonResponse().getBytes(StandardCharsets.UTF_8))) - .thenReturn(new ByteArrayInputStream(validJsonResponse().getBytes(StandardCharsets.UTF_8))) - .thenReturn(new ByteArrayInputStream(validJsonResponse().getBytes(StandardCharsets.UTF_8))); - // finished setting up mocks - - performop(mockHttpClient); - } - - /** - * Test malformed JSON response. - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testMalFormedJSONResponse() throws Throwable { - - String errorMsg = - "com.fasterxml.jackson.core.JsonParseException: Unexpected end-of-input in FIELD_NAME"; - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - HttpEntity mockHttpEntity = Mockito.mock(HttpEntity.class); - - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponse.getEntity()).thenReturn(mockHttpEntity); - Mockito.when(mockHttpEntity.getContent()) - .thenReturn(new ByteArrayInputStream(malformedJsonResponse().getBytes(StandardCharsets.UTF_8))); - // finished setting up mocks - - performop(mockHttpClient); - }, errorMsg); - - } - - /** - * Test valid JSON response failure response code. - * @throws Throwable - */ - @Test // (expected = WasbAuthorizationException.class) - public void testFailureCodeJSONResponse() throws Throwable { - String errorMsg = "Remote authorization service encountered an error Unauthorized"; - - assertThrows(WasbAuthorizationException.class, () -> { - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - - HttpResponse mockHttpResponse = Mockito.mock(HttpResponse.class); - HttpEntity mockHttpEntity = Mockito.mock(HttpEntity.class); - - Mockito.when(mockHttpClient.execute(Mockito.any())).thenReturn(mockHttpResponse); - Mockito.when(mockHttpResponse.getStatusLine()).thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponse.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponse.getEntity()).thenReturn(mockHttpEntity); - Mockito.when(mockHttpEntity.getContent()) - .thenReturn(new ByteArrayInputStream(failureCodeJsonResponse() - .getBytes(StandardCharsets.UTF_8))); - // finished setting up mocks - performop(mockHttpClient); - }, errorMsg); - } - - @Test - public void testWhenOneInstanceIsDown() throws Throwable { - - boolean isAuthorizationCachingEnabled = fs.getConf().getBoolean(CachingAuthorizer.KEY_AUTH_SERVICE_CACHING_ENABLE, false); - - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpEntity mockHttpEntity = Mockito.mock(HttpEntity.class); - - HttpResponse mockHttpResponseService1 = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpResponseService1.getStatusLine()) - .thenReturn(newStatusLine(HttpStatus.SC_INTERNAL_SERVER_ERROR)); - Mockito.when(mockHttpResponseService1.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponseService1.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponseService1.getEntity()) - .thenReturn(mockHttpEntity); - - HttpResponse mockHttpResponseService2 = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpResponseService2.getStatusLine()) - .thenReturn(newStatusLine(HttpStatus.SC_OK)); - Mockito.when(mockHttpResponseService2.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponseService2.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponseService2.getEntity()) - .thenReturn(mockHttpEntity); - - HttpResponse mockHttpResponseServiceLocal = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpResponseServiceLocal.getStatusLine()) - .thenReturn(newStatusLine(HttpStatus.SC_INTERNAL_SERVER_ERROR)); - Mockito.when(mockHttpResponseServiceLocal.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponseServiceLocal.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponseServiceLocal.getEntity()) - .thenReturn(mockHttpEntity); - - Mockito.when(mockHttpClient.execute(argThat(new HttpGetForService1()))) - .thenReturn(mockHttpResponseService1); - Mockito.when(mockHttpClient.execute(argThat(new HttpGetForService2()))) - .thenReturn(mockHttpResponseService2); - Mockito.when(mockHttpClient.execute(argThat(new HttpGetForServiceLocal()))) - .thenReturn(mockHttpResponseServiceLocal); - - //Need 2 times because performop() does 2 fs operations. - Mockito.when(mockHttpEntity.getContent()) - .thenReturn(new ByteArrayInputStream(validJsonResponse() - .getBytes(StandardCharsets.UTF_8))) - .thenReturn(new ByteArrayInputStream(validJsonResponse() - .getBytes(StandardCharsets.UTF_8))) - .thenReturn(new ByteArrayInputStream(validJsonResponse() - .getBytes(StandardCharsets.UTF_8))); - // finished setting up mocks - - performop(mockHttpClient); - - int expectedNumberOfInvocations = isAuthorizationCachingEnabled ? 2 : 3; - Mockito.verify(mockHttpClient, times(expectedNumberOfInvocations)).execute(Mockito.argThat(new HttpGetForServiceLocal())); - Mockito.verify(mockHttpClient, times(expectedNumberOfInvocations)).execute(Mockito.argThat(new HttpGetForService2())); - } - - @Test - public void testWhenServiceInstancesAreDown() throws Throwable { - //expectedEx.expect(WasbAuthorizationException.class); - // set up mocks - HttpClient mockHttpClient = Mockito.mock(HttpClient.class); - HttpEntity mockHttpEntity = Mockito.mock(HttpEntity.class); - - HttpResponse mockHttpResponseService1 = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpResponseService1.getStatusLine()) - .thenReturn(newStatusLine(HttpStatus.SC_INTERNAL_SERVER_ERROR)); - Mockito.when(mockHttpResponseService1.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponseService1.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponseService1.getEntity()) - .thenReturn(mockHttpEntity); - - HttpResponse mockHttpResponseService2 = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpResponseService2.getStatusLine()) - .thenReturn(newStatusLine( - HttpStatus.SC_INTERNAL_SERVER_ERROR)); - Mockito.when(mockHttpResponseService2.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponseService2.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponseService2.getEntity()) - .thenReturn(mockHttpEntity); - - HttpResponse mockHttpResponseService3 = Mockito.mock(HttpResponse.class); - Mockito.when(mockHttpResponseService3.getStatusLine()) - .thenReturn(newStatusLine( - HttpStatus.SC_INTERNAL_SERVER_ERROR)); - Mockito.when(mockHttpResponseService3.getFirstHeader("Content-Type")) - .thenReturn(newHeader("Content-Type", "application/json")); - Mockito.when(mockHttpResponseService3.getFirstHeader("Content-Length")) - .thenReturn(newHeader("Content-Length", "1024")); - Mockito.when(mockHttpResponseService3.getEntity()) - .thenReturn(mockHttpEntity); - - Mockito.when(mockHttpClient.execute(argThat(new HttpGetForService1()))) - .thenReturn(mockHttpResponseService1); - Mockito.when(mockHttpClient.execute(argThat(new HttpGetForService2()))) - .thenReturn(mockHttpResponseService2); - Mockito.when(mockHttpClient.execute(argThat(new HttpGetForServiceLocal()))) - .thenReturn(mockHttpResponseService3); - - //Need 3 times because performop() does 3 fs operations. - Mockito.when(mockHttpEntity.getContent()) - .thenReturn(new ByteArrayInputStream( - validJsonResponse().getBytes(StandardCharsets.UTF_8))) - .thenReturn(new ByteArrayInputStream( - validJsonResponse().getBytes(StandardCharsets.UTF_8))) - .thenReturn(new ByteArrayInputStream( - validJsonResponse().getBytes(StandardCharsets.UTF_8))); - // finished setting up mocks - try { - performop(mockHttpClient); - }catch (WasbAuthorizationException e){ - e.printStackTrace(); - Mockito.verify(mockHttpClient, atLeast(2)) - .execute(argThat(new HttpGetForService1())); - Mockito.verify(mockHttpClient, atLeast(2)) - .execute(argThat(new HttpGetForService2())); - Mockito.verify(mockHttpClient, atLeast(3)) - .execute(argThat(new HttpGetForServiceLocal())); - Mockito.verify(mockHttpClient, times(7)).execute(Mockito.any()); - } - } - - private void performop(HttpClient mockHttpClient) throws Throwable { - - Path testPath = new Path("/", "test.dat"); - - RemoteWasbAuthorizerImpl authorizer = new RemoteWasbAuthorizerImpl(); - authorizer.init(fs.getConf()); - WasbRemoteCallHelper mockWasbRemoteCallHelper = new WasbRemoteCallHelper( - RetryUtils.getMultipleLinearRandomRetry(new Configuration(), - EMPTY_STRING, true, - EMPTY_STRING, "1000,3,10000,2")); - mockWasbRemoteCallHelper.updateHttpClient(mockHttpClient); - authorizer.updateWasbRemoteCallHelper(mockWasbRemoteCallHelper); - fs.updateWasbAuthorizer(authorizer); - - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - fs.delete(testPath, false); - } - - private String validJsonResponse() { - return "{" - + "\"responseCode\": 0," - + "\"authorizationResult\": true," - + "\"responseMessage\": \"Authorized\"" - + "}"; - } - - private String malformedJsonResponse() { - return "{" - + "\"responseCode\": 0," - + "\"authorizationResult\": true," - + "\"responseMessage\":"; - } - - private String failureCodeJsonResponse() { - return "{" - + "\"responseCode\": 1," - + "\"authorizationResult\": false," - + "\"responseMessage\": \"Unauthorized\"" - + "}"; - } - - private StatusLine newStatusLine(int statusCode) { - return new StatusLine() { - @Override - public ProtocolVersion getProtocolVersion() { - return new ProtocolVersion("HTTP", 1, 1); - } - - @Override - public int getStatusCode() { - return statusCode; - } - - @Override - public String getReasonPhrase() { - return "Reason Phrase"; - } - }; - } - - private Header newHeader(String name, String value) { - return new Header() { - @Override - public String getName() { - return name; - } - - @Override - public String getValue() { - return value; - } - - @Override - public HeaderElement[] getElements() throws ParseException { - return new HeaderElement[0]; - } - }; - } - - /** Check that a HttpGet request is with given remote host. */ - private static boolean checkHttpGetMatchHost(HttpGet g, String h) { - return g != null && g.getURI().getHost().equals(h); - } - - private class HttpGetForService1 implements ArgumentMatcher{ - @Override - public boolean matches(HttpGet httpGet) { - return checkHttpGetMatchHost(httpGet, "localhost1"); - } - } - private class HttpGetForService2 implements ArgumentMatcher{ - @Override - public boolean matches(HttpGet httpGet) { - return checkHttpGetMatchHost(httpGet, "localhost2"); - } - } - private class HttpGetForServiceLocal implements ArgumentMatcher{ - @Override - public boolean matches(HttpGet httpGet) { - try { - return checkHttpGetMatchHost(httpGet, - InetAddress.getLocalHost().getCanonicalHostName()); - } catch (UnknownHostException e) { - return checkHttpGetMatchHost(httpGet, "localhost"); - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java deleted file mode 100644 index 08122f966b905..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestWasbUriAndConfiguration.java +++ /dev/null @@ -1,664 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY; -import static org.apache.hadoop.fs.azure.NativeAzureFileSystem.RETURN_URI_AS_CANONICAL_SERVICE_NAME_PROPERTY_NAME; -import static org.apache.hadoop.test.LambdaTestUtils.intercept; -import static org.assertj.core.api.Assumptions.assumeThat; - -import java.io.ByteArrayInputStream; -import java.io.DataInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.net.URI; -import java.util.Date; -import java.util.EnumSet; -import java.io.File; -import java.util.NoSuchElementException; - -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.security.ProviderUtils; -import org.apache.hadoop.security.alias.CredentialProvider; -import org.apache.hadoop.security.alias.CredentialProviderFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.AbstractFileSystem; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.CreateOptions; -import org.apache.hadoop.test.GenericTestUtils; - -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.io.TempDir; - -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import com.microsoft.azure.storage.core.SR; - -public class ITestWasbUriAndConfiguration extends AbstractWasbTestWithTimeout { - - private static final int FILE_SIZE = 4096; - private static final String PATH_DELIMITER = "/"; - - protected String accountName; - protected String accountKey; - protected static Configuration conf = null; - private boolean runningInSASMode = false; - - private AzureBlobStorageTestAccount testAccount; - - @AfterEach - public void tearDown() throws Exception { - testAccount = AzureTestUtils.cleanupTestAccount(testAccount); - } - - @BeforeEach - public void setMode() { - runningInSASMode = AzureBlobStorageTestAccount.createTestConfiguration(). - getBoolean(AzureNativeFileSystemStore.KEY_USE_SECURE_MODE, false); - } - - private boolean validateIOStreams(Path filePath) throws IOException { - // Capture the file system from the test account. - FileSystem fs = testAccount.getFileSystem(); - return validateIOStreams(fs, filePath); - } - - private boolean validateIOStreams(FileSystem fs, Path filePath) - throws IOException { - - // Create and write a file - OutputStream outputStream = fs.create(filePath); - outputStream.write(new byte[FILE_SIZE]); - outputStream.close(); - - // Return true if the the count is equivalent to the file size. - return (FILE_SIZE == readInputStream(fs, filePath)); - } - - private int readInputStream(Path filePath) throws IOException { - // Capture the file system from the test account. - FileSystem fs = testAccount.getFileSystem(); - return readInputStream(fs, filePath); - } - - private int readInputStream(FileSystem fs, Path filePath) throws IOException { - // Read the file - InputStream inputStream = fs.open(filePath); - int count = 0; - while (inputStream.read() >= 0) { - count++; - } - inputStream.close(); - - // Return true if the the count is equivalent to the file size. - return count; - } - - // Positive tests to exercise making a connection with to Azure account using - // account key. - @Test - public void testConnectUsingKey() throws Exception { - - testAccount = AzureBlobStorageTestAccount.create(); - assumeNotNull(testAccount); - - // Validate input and output on the connection. - assertTrue(validateIOStreams(new Path("/wasb_scheme"))); - } - - @Test - public void testConnectUsingSAS() throws Exception { - - assumeThat(runningInSASMode).isFalse(); - // Create the test account with SAS credentials. - testAccount = AzureBlobStorageTestAccount.create("", - EnumSet.of(CreateOptions.UseSas, CreateOptions.CreateContainer)); - assumeNotNull(testAccount); - // Validate input and output on the connection. - // NOTE: As of 4/15/2013, Azure Storage has a deficiency that prevents the - // full scenario from working (CopyFromBlob doesn't work with SAS), so - // just do a minor check until that is corrected. - assertFalse(testAccount.getFileSystem().exists(new Path("/IDontExist"))); - //assertTrue(validateIOStreams(new Path("/sastest.txt"))); - } - - @Test - public void testConnectUsingSASReadonly() throws Exception { - - assumeThat(runningInSASMode).isFalse(); - // Create the test account with SAS credentials. - testAccount = AzureBlobStorageTestAccount.create("", EnumSet.of( - CreateOptions.UseSas, CreateOptions.CreateContainer, - CreateOptions.Readonly)); - assumeNotNull(testAccount); - - // Create a blob in there - final String blobKey = "blobForReadonly"; - CloudBlobContainer container = testAccount.getRealContainer(); - CloudBlockBlob blob = container.getBlockBlobReference(blobKey); - ByteArrayInputStream inputStream = new ByteArrayInputStream(new byte[] { 1, - 2, 3 }); - blob.upload(inputStream, 3); - inputStream.close(); - - // Make sure we can read it from the file system - Path filePath = new Path("/" + blobKey); - FileSystem fs = testAccount.getFileSystem(); - assertTrue(fs.exists(filePath)); - byte[] obtained = new byte[3]; - DataInputStream obtainedInputStream = fs.open(filePath); - obtainedInputStream.readFully(obtained); - obtainedInputStream.close(); - assertEquals(3, obtained[2]); - } - - /** - * Use secure mode, which will automatically switch to SAS, - */ - @Test - public void testConnectUsingSecureSAS() throws Exception { - // Create the test account with SAS credentials. - Configuration conf = new Configuration(); - conf.setBoolean(AzureNativeFileSystemStore.KEY_USE_SECURE_MODE, true); - testAccount = AzureBlobStorageTestAccount.create("", - EnumSet.of(CreateOptions.UseSas), - conf); - assumeNotNull(testAccount); - NativeAzureFileSystem fs = testAccount.getFileSystem(); - - AzureException ex = intercept(AzureException.class, - SR.ENUMERATION_ERROR, - () -> ContractTestUtils.writeTextFile(fs, - new Path("/testConnectUsingSecureSAS"), - "testConnectUsingSecureSAS", - true)); - - StorageException cause = getCause(StorageException.class, - getCause(NoSuchElementException.class, ex)); - GenericTestUtils.assertExceptionContains( - "The specified container does not exist", cause); - } - - /** - * Get an inner cause of an exception; require it to be of the given - * type. - * If there is a problem, an AssertionError is thrown, containing the - * outer or inner exception. - * @param clazz required class - * @param t exception - * @param type of required exception - * @return the retrieved exception - * @throws AssertionError if there is no cause or it is of the wrong type. - */ - private E getCause( - Class clazz, Throwable t) { - Throwable e = t.getCause(); - if (e == null) { - throw new AssertionError("No cause", t); - } - if (!clazz.isAssignableFrom(e.getClass())) { - throw new AssertionError("Wrong inner class", e); - } else { - return (E) e; - } - } - - @Test - public void testConnectUsingAnonymous() throws Exception { - - // Create test account with anonymous credentials - testAccount = AzureBlobStorageTestAccount.createAnonymous("testWasb.txt", - FILE_SIZE); - assumeNotNull(testAccount); - - // Read the file from the public folder using anonymous credentials. - assertEquals(FILE_SIZE, readInputStream(new Path("/testWasb.txt"))); - } - - @Test - public void testConnectToEmulator() throws Exception { - testAccount = AzureBlobStorageTestAccount.createForEmulator(); - assumeNotNull(testAccount); - assertTrue(validateIOStreams(new Path("/testFile"))); - } - - /** - * Tests that we can connect to fully qualified accounts outside of - * blob.core.windows.net - */ - @Test - public void testConnectToFullyQualifiedAccountMock() throws Exception { - Configuration conf = new Configuration(); - AzureBlobStorageTestAccount.setMockAccountKey(conf, - "mockAccount.mock.authority.net"); - AzureNativeFileSystemStore store = new AzureNativeFileSystemStore(); - MockStorageInterface mockStorage = new MockStorageInterface(); - store.setAzureStorageInteractionLayer(mockStorage); - NativeAzureFileSystem fs = new NativeAzureFileSystem(store); - fs.initialize( - new URI("wasb://mockContainer@mockAccount.mock.authority.net"), conf); - fs.createNewFile(new Path("/x")); - assertTrue(mockStorage.getBackingStore().exists( - "http://mockAccount.mock.authority.net/mockContainer/x")); - fs.close(); - } - - public void testConnectToRoot() throws Exception { - - // Set up blob names. - final String blobPrefix = String.format("wasbtests-%s-%tQ-blob", - System.getProperty("user.name"), new Date()); - final String inblobName = blobPrefix + "_In" + ".txt"; - final String outblobName = blobPrefix + "_Out" + ".txt"; - - // Create test account with default root access. - testAccount = AzureBlobStorageTestAccount.createRoot(inblobName, FILE_SIZE); - assumeNotNull(testAccount); - - // Read the file from the default container. - assertEquals(FILE_SIZE, readInputStream(new Path(PATH_DELIMITER - + inblobName))); - - try { - // Capture file system. - FileSystem fs = testAccount.getFileSystem(); - - // Create output path and open an output stream to the root folder. - Path outputPath = new Path(PATH_DELIMITER + outblobName); - OutputStream outputStream = fs.create(outputPath); - fail("Expected an AzureException when writing to root folder."); - outputStream.write(new byte[FILE_SIZE]); - outputStream.close(); - } catch (AzureException e) { - assertTrue(true); - } catch (Exception e) { - String errMsg = String.format( - "Expected AzureException but got %s instead.", e); - assertTrue(false, errMsg); - } - } - - // Positive tests to exercise throttling I/O path. Connections are made to an - // Azure account using account key. - // - public void testConnectWithThrottling() throws Exception { - - testAccount = AzureBlobStorageTestAccount.createThrottled(); - - // Validate input and output on the connection. - assertTrue(validateIOStreams(new Path("/wasb_scheme"))); - } - - /** - * Creates a file and writes a single byte with the given value in it. - */ - private static void writeSingleByte(FileSystem fs, Path testFile, int toWrite) - throws Exception { - OutputStream outputStream = fs.create(testFile); - outputStream.write(toWrite); - outputStream.close(); - } - - /** - * Reads the file given and makes sure that it's a single-byte file with the - * given value in it. - */ - private static void assertSingleByteValue(FileSystem fs, Path testFile, - int expectedValue) throws Exception { - InputStream inputStream = fs.open(testFile); - int byteRead = inputStream.read(); - assertTrue(byteRead >= 0, "File unexpectedly empty: " + testFile); - assertTrue( - inputStream.read() < 0, "File has more than a single byte: " + testFile); - inputStream.close(); - assertEquals(expectedValue, byteRead, "Unxpected content in: " + testFile); - } - - @Test - public void testMultipleContainers() throws Exception { - AzureBlobStorageTestAccount firstAccount = AzureBlobStorageTestAccount - .create("first"), secondAccount = AzureBlobStorageTestAccount - .create("second"); - assumeNotNull(firstAccount); - assumeNotNull(secondAccount); - try { - FileSystem firstFs = firstAccount.getFileSystem(), - secondFs = secondAccount.getFileSystem(); - Path testFile = new Path("/testWasb"); - assertTrue(validateIOStreams(firstFs, testFile)); - assertTrue(validateIOStreams(secondFs, testFile)); - // Make sure that we're really dealing with two file systems here. - writeSingleByte(firstFs, testFile, 5); - writeSingleByte(secondFs, testFile, 7); - assertSingleByteValue(firstFs, testFile, 5); - assertSingleByteValue(secondFs, testFile, 7); - } finally { - firstAccount.cleanup(); - secondAccount.cleanup(); - } - } - - @Test - public void testDefaultKeyProvider() throws Exception { - Configuration conf = new Configuration(); - String account = "testacct"; - String key = "testkey"; - - conf.set(SimpleKeyProvider.KEY_ACCOUNT_KEY_PREFIX + account, key); - - String result = AzureNativeFileSystemStore.getAccountKeyFromConfiguration( - account, conf); - assertEquals(key, result); - } - - @Test - public void testCredsFromCredentialProvider(@TempDir java.nio.file.Path tempDir) - throws Exception { - - assumeThat(runningInSASMode).isFalse(); - String account = "testacct"; - String key = "testkey"; - // set up conf to have a cred provider - final Configuration conf = new Configuration(); - final File file = new File(tempDir.toFile(), "myfile.txt"); - final URI jks = ProviderUtils.nestURIForLocalJavaKeyStoreProvider( - file.toURI()); - conf.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, - jks.toString()); - - provisionAccountKey(conf, account, key); - - // also add to configuration as clear text that should be overridden - conf.set(SimpleKeyProvider.KEY_ACCOUNT_KEY_PREFIX + account, - key + "cleartext"); - - String result = AzureNativeFileSystemStore.getAccountKeyFromConfiguration( - account, conf); - // result should contain the credential provider key not the config key - assertEquals(key, result, "AccountKey incorrect."); - } - - void provisionAccountKey( - final Configuration conf, String account, String key) throws Exception { - // add our creds to the provider - final CredentialProvider provider = - CredentialProviderFactory.getProviders(conf).get(0); - provider.createCredentialEntry( - SimpleKeyProvider.KEY_ACCOUNT_KEY_PREFIX + account, key.toCharArray()); - provider.flush(); - } - - @Test - public void testValidKeyProvider() throws Exception { - Configuration conf = new Configuration(); - String account = "testacct"; - String key = "testkey"; - - conf.set(SimpleKeyProvider.KEY_ACCOUNT_KEY_PREFIX + account, key); - conf.setClass("fs.azure.account.keyprovider." + account, - SimpleKeyProvider.class, KeyProvider.class); - String result = AzureNativeFileSystemStore.getAccountKeyFromConfiguration( - account, conf); - assertEquals(key, result); - } - - @Test - public void testInvalidKeyProviderNonexistantClass() throws Exception { - Configuration conf = new Configuration(); - String account = "testacct"; - - conf.set("fs.azure.account.keyprovider." + account, - "org.apache.Nonexistant.Class"); - try { - AzureNativeFileSystemStore.getAccountKeyFromConfiguration(account, conf); - fail("Nonexistant key provider class should have thrown a " - + "KeyProviderException"); - } catch (KeyProviderException e) { - } - } - - @Test - public void testInvalidKeyProviderWrongClass() throws Exception { - Configuration conf = new Configuration(); - String account = "testacct"; - - conf.set("fs.azure.account.keyprovider." + account, "java.lang.String"); - try { - AzureNativeFileSystemStore.getAccountKeyFromConfiguration(account, conf); - fail("Key provider class that doesn't implement KeyProvider " - + "should have thrown a KeyProviderException"); - } catch (KeyProviderException e) { - } - } - - /** - * Tests the cases when the URI is specified with no authority, i.e. - * wasb:///path/to/file. - */ - @Test - public void testNoUriAuthority() throws Exception { - // For any combination of default FS being asv(s)/wasb(s)://c@a/ and - // the actual URI being asv(s)/wasb(s):///, it should work. - - String[] wasbAliases = new String[] { "wasb", "wasbs" }; - for (String defaultScheme : wasbAliases) { - for (String wantedScheme : wasbAliases) { - testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - String authority = testAccount.getFileSystem().getUri().getAuthority(); - URI defaultUri = new URI(defaultScheme, authority, null, null, null); - conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); - // Add references to file system implementations for wasb and wasbs. - conf.addResource("azure-test.xml"); - URI wantedUri = new URI(wantedScheme + ":///random/path"); - NativeAzureFileSystem obtained = (NativeAzureFileSystem) FileSystem - .get(wantedUri, conf); - assertNotNull(obtained); - assertEquals(new URI(wantedScheme, authority, null, null, null), - obtained.getUri()); - // Make sure makeQualified works as expected - Path qualified = obtained.makeQualified(new Path(wantedUri)); - assertEquals(new URI(wantedScheme, authority, wantedUri.getPath(), - null, null), qualified.toUri()); - // Cleanup for the next iteration to not cache anything in FS - testAccount.cleanup(); - FileSystem.closeAll(); - } - } - // If the default FS is not a WASB FS, then specifying a URI without - // authority for the Azure file system should throw. - testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - conf.set(FS_DEFAULT_NAME_KEY, "file:///"); - try { - FileSystem.get(new URI("wasb:///random/path"), conf); - fail("Should've thrown."); - } catch (IllegalArgumentException e) { - } - } - - @Test - public void testWasbAsDefaultFileSystemHasNoPort() throws Exception { - try { - testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - String authority = testAccount.getFileSystem().getUri().getAuthority(); - URI defaultUri = new URI("wasb", authority, null, null, null); - conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); - conf.addResource("azure-test.xml"); - - FileSystem fs = FileSystem.get(conf); - assertTrue(fs instanceof NativeAzureFileSystem); - assertEquals(-1, fs.getUri().getPort()); - - AbstractFileSystem afs = FileContext.getFileContext(conf) - .getDefaultFileSystem(); - assertTrue(afs instanceof Wasb); - assertEquals(-1, afs.getUri().getPort()); - } finally { - testAccount.cleanup(); - FileSystem.closeAll(); - } - } - - /** - * Tests the cases when the scheme specified is 'wasbs'. - */ - @Test - public void testAbstractFileSystemImplementationForWasbsScheme() throws Exception { - try { - testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - String authority = testAccount.getFileSystem().getUri().getAuthority(); - URI defaultUri = new URI("wasbs", authority, null, null, null); - conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); - conf.set("fs.AbstractFileSystem.wasbs.impl", "org.apache.hadoop.fs.azure.Wasbs"); - conf.addResource("azure-test.xml"); - - FileSystem fs = FileSystem.get(conf); - assertTrue(fs instanceof NativeAzureFileSystem); - assertEquals("wasbs", fs.getScheme()); - - AbstractFileSystem afs = FileContext.getFileContext(conf) - .getDefaultFileSystem(); - assertTrue(afs instanceof Wasbs); - assertEquals(-1, afs.getUri().getPort()); - assertEquals("wasbs", afs.getUri().getScheme()); - } finally { - testAccount.cleanup(); - FileSystem.closeAll(); - } - } - - @Test - public void testCredentialProviderPathExclusions() throws Exception { - String providerPath = - "user:///,jceks://wasb/user/hrt_qa/sqoopdbpasswd.jceks," + - "jceks://hdfs@nn1.example.com/my/path/test.jceks"; - Configuration config = new Configuration(); - config.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, - providerPath); - String newPath = "user:///,jceks://hdfs@nn1.example.com/my/path/test.jceks"; - - excludeAndTestExpectations(config, newPath); - } - - @Test - public void testExcludeAllProviderTypesFromConfig() throws Exception { - String providerPath = - "jceks://wasb/tmp/test.jceks," + - "jceks://wasb@/my/path/test.jceks"; - Configuration config = new Configuration(); - config.set(CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, - providerPath); - String newPath = null; - - excludeAndTestExpectations(config, newPath); - } - - void excludeAndTestExpectations(Configuration config, String newPath) - throws Exception { - Configuration conf = ProviderUtils.excludeIncompatibleCredentialProviders( - config, NativeAzureFileSystem.class); - String effectivePath = conf.get( - CredentialProviderFactory.CREDENTIAL_PROVIDER_PATH, null); - assertEquals(newPath, effectivePath); - } - - @Test - public void testUserAgentConfig() throws Exception { - // Set the user agent - try { - testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - String authority = testAccount.getFileSystem().getUri().getAuthority(); - URI defaultUri = new URI("wasbs", authority, null, null, null); - conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); - conf.set("fs.AbstractFileSystem.wasbs.impl", "org.apache.hadoop.fs.azure.Wasbs"); - - conf.set(AzureNativeFileSystemStore.USER_AGENT_ID_KEY, "TestClient"); - - FileSystem fs = FileSystem.get(conf); - AbstractFileSystem afs = FileContext.getFileContext(conf).getDefaultFileSystem(); - - assertTrue(afs instanceof Wasbs); - assertEquals(-1, afs.getUri().getPort()); - assertEquals("wasbs", afs.getUri().getScheme()); - - } finally { - testAccount.cleanup(); - FileSystem.closeAll(); - } - - // Unset the user agent - try { - testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - String authority = testAccount.getFileSystem().getUri().getAuthority(); - URI defaultUri = new URI("wasbs", authority, null, null, null); - conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); - conf.set("fs.AbstractFileSystem.wasbs.impl", "org.apache.hadoop.fs.azure.Wasbs"); - - conf.unset(AzureNativeFileSystemStore.USER_AGENT_ID_KEY); - - FileSystem fs = FileSystem.get(conf); - AbstractFileSystem afs = FileContext.getFileContext(conf).getDefaultFileSystem(); - assertTrue(afs instanceof Wasbs); - assertEquals(-1, afs.getUri().getPort()); - assertEquals("wasbs", afs.getUri().getScheme()); - - } finally { - testAccount.cleanup(); - FileSystem.closeAll(); - } - } - - @Test - public void testCanonicalServiceName() throws Exception { - AzureBlobStorageTestAccount testAccount = AzureBlobStorageTestAccount.createMock(); - Configuration conf = testAccount.getFileSystem().getConf(); - String authority = testAccount.getFileSystem().getUri().getAuthority(); - URI defaultUri = new URI("wasbs", authority, null, null, null); - conf.set(FS_DEFAULT_NAME_KEY, defaultUri.toString()); - - try { - FileSystem fs0 = FileSystem.get(conf); - // Default getCanonicalServiceName() will try to resolve the host to IP, - // because the mock container does not exist, this call is expected to fail. - intercept(IllegalArgumentException.class, - "java.net.UnknownHostException", - () -> fs0.getCanonicalServiceName()); - - conf.setBoolean(RETURN_URI_AS_CANONICAL_SERVICE_NAME_PROPERTY_NAME, true); - FileSystem fs1 = FileSystem.newInstance(defaultUri, conf); - assertEquals(fs1.getUri().toString(), fs1.getCanonicalServiceName(), - "getCanonicalServiceName() should return URI"); - } finally { - testAccount.cleanup(); - FileSystem.closeAll(); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java deleted file mode 100644 index 7ddeabe242ef6..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/InMemoryBlockBlobStore.java +++ /dev/null @@ -1,197 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Map; - -import static java.util.Objects.requireNonNull; - -/** - * A simple memory key-value store to help mock the Windows Azure Storage - * implementation for unit testing. - */ -public class InMemoryBlockBlobStore { - private final HashMap blobs = new HashMap(); - private HashMap containerMetadata; - - public synchronized Iterable getKeys() { - return new ArrayList(blobs.keySet()); - } - - public static class ListBlobEntry { - private final String key; - private final HashMap metadata; - private final int contentLength; - private final boolean isPageBlob; - - - ListBlobEntry(String key, HashMap metadata, - int contentLength, boolean isPageBlob) { - this.key = key; - this.metadata = metadata; - this.contentLength = contentLength; - this.isPageBlob = isPageBlob; - } - - public String getKey() { - return key; - } - - public HashMap getMetadata() { - return metadata; - } - - public int getContentLength() { - return contentLength; - } - - public boolean isPageBlob() { - return isPageBlob; - } - } - - /** - * List all the blobs whose key starts with the given prefix. - * - * @param prefix - * The prefix to check. - * @param includeMetadata - * If set, the metadata in the returned listing will be populated; - * otherwise it'll be null. - * @return The listing. - */ - public synchronized Iterable listBlobs(String prefix, - boolean includeMetadata) { - ArrayList list = new ArrayList(); - for (Map.Entry entry : blobs.entrySet()) { - if (entry.getKey().startsWith(prefix)) { - list.add(new ListBlobEntry( - entry.getKey(), - includeMetadata ? - new HashMap(entry.getValue().metadata) : - null, - entry.getValue().content.length, - entry.getValue().isPageBlob)); - } - } - return list; - } - - public synchronized byte[] getContent(String key) { - return blobs.get(key).content; - } - - @SuppressWarnings("unchecked") - public synchronized void setContent(String key, byte[] value, - HashMap metadata, boolean isPageBlob, - long length) { - blobs.put(key, new Entry(value, (HashMap)metadata.clone(), - isPageBlob, length)); - } - - @SuppressWarnings("unchecked") - public synchronized void setMetadata(String key, - HashMap metadata) { - blobs.get(key).metadata = (HashMap) metadata.clone(); - } - - public OutputStream uploadBlockBlob(final String key, - final HashMap metadata) { - setContent(key, new byte[0], metadata, false, 0); - return new ByteArrayOutputStream() { - @Override - public void flush() - throws IOException { - super.flush(); - byte[] tempBytes = toByteArray(); - setContent(key, tempBytes, metadata, false, tempBytes.length); - } - @Override - public void close() - throws IOException { - super.close(); - byte[] tempBytes = toByteArray(); - setContent(key, tempBytes, metadata, false, tempBytes.length); - } - }; - } - - public OutputStream uploadPageBlob(final String key, - final HashMap metadata, - final long length) { - setContent(key, new byte[0], metadata, true, length); - return new ByteArrayOutputStream() { - @Override - public void flush() - throws IOException { - super.flush(); - setContent(key, toByteArray(), metadata, true, length); - } - }; - } - - public synchronized void copy(String sourceKey, String destKey) { - blobs.put(destKey, blobs.get(sourceKey)); - } - - public synchronized void delete(String key) { - blobs.remove(key); - } - - public synchronized boolean exists(String key) { - return blobs.containsKey(key); - } - - @SuppressWarnings("unchecked") - public synchronized HashMap getMetadata(String key) { - Entry entry = requireNonNull(blobs.get(key), "entry for " + key); - return (HashMap) requireNonNull(entry.metadata, - "metadata for " + key) - .clone(); - } - - public synchronized HashMap getContainerMetadata() { - return containerMetadata; - } - - public synchronized void setContainerMetadata(HashMap metadata) { - containerMetadata = metadata; - } - - private static class Entry { - private byte[] content; - private HashMap metadata; - private boolean isPageBlob; - @SuppressWarnings("unused") // TODO: use it - private long length; - - public Entry(byte[] content, HashMap metadata, - boolean isPageBlob, long length) { - this.content = content; - this.metadata = metadata; - this.isPageBlob = isPageBlob; - this.length = length; - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java deleted file mode 100644 index 6d11207c479b5..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockStorageInterface.java +++ /dev/null @@ -1,661 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.lang.reflect.Method; -import java.net.HttpURLConnection; -import java.net.URI; -import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.EnumSet; -import java.util.HashMap; -import java.util.HashSet; -import java.util.TimeZone; -import java.util.List; -import org.apache.commons.codec.DecoderException; -import org.apache.commons.codec.net.URLCodec; -import org.apache.commons.lang3.NotImplementedException; -import org.apache.hadoop.fs.Path; -import org.apache.http.client.utils.URIBuilder; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.RetryPolicyFactory; -import com.microsoft.azure.storage.StorageCredentials; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.StorageUri; -import com.microsoft.azure.storage.blob.BlobListingDetails; -import com.microsoft.azure.storage.blob.BlobProperties; -import com.microsoft.azure.storage.blob.BlobRequestOptions; -import com.microsoft.azure.storage.blob.BlockEntry; -import com.microsoft.azure.storage.blob.BlockListingFilter; -import com.microsoft.azure.storage.blob.CloudBlob; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlobDirectory; -import com.microsoft.azure.storage.blob.CopyState; -import com.microsoft.azure.storage.blob.ListBlobItem; -import com.microsoft.azure.storage.blob.PageRange; - -import javax.ws.rs.core.UriBuilder; -import javax.ws.rs.core.UriBuilderException; - -/** - * A mock implementation of the Azure Storage interaction layer for unit tests. - * Just does in-memory storage. - */ -public class MockStorageInterface extends StorageInterface { - private InMemoryBlockBlobStore backingStore; - private final ArrayList preExistingContainers = - new ArrayList(); - private String baseUriString; - private static final URLCodec codec = new URLCodec(); - - public InMemoryBlockBlobStore getBackingStore() { - return backingStore; - } - - /** - * Mocks the situation where a container already exists before WASB comes in, - * i.e. the situation where a user creates a container then mounts WASB on the - * pre-existing container. - * - * @param uri - * The URI of the container. - * @param metadata - * The metadata on the container. - */ - public void addPreExistingContainer(String uri, - HashMap metadata) { - preExistingContainers.add(new PreExistingContainer(uri, metadata)); - } - - @Override - public void setRetryPolicyFactory(final RetryPolicyFactory retryPolicyFactory) { - } - - @Override - public void setTimeoutInMs(int timeoutInMs) { - } - - @Override - public void createBlobClient(CloudStorageAccount account) { - backingStore = new InMemoryBlockBlobStore(); - } - - @Override - public void createBlobClient(URI baseUri) { - backingStore = new InMemoryBlockBlobStore(); - } - - @Override - public void createBlobClient(URI baseUri, StorageCredentials credentials) { - this.baseUriString = baseUri.toString(); - backingStore = new InMemoryBlockBlobStore(); - } - - @Override - public StorageCredentials getCredentials() { - // Not implemented for mock interface. - return null; - } - - /** - * Utility function used to convert a given URI to a decoded string - * representation sent to the backing store. URIs coming as input - * to this class will be encoded by the URI class, and we want - * the underlying storage to store keys in their original UTF-8 form. - */ - private static String convertUriToDecodedString(URI uri) { - try { - return codec.decode(uri.toString()); - } catch (DecoderException e) { - throw new AssertionError("Failed to decode URI: " + uri.toString()); - } - } - - private static URI convertKeyToEncodedUri(String key) { - try { - Path p = new Path(key); - URI unEncodedURI = p.toUri(); - return new URIBuilder().setPath(unEncodedURI.getPath()) - .setScheme(unEncodedURI.getScheme()).build(); - } catch (URISyntaxException e) { - int i = e.getIndex(); - String details; - if (i >= 0) { - details = " -- \"" + e.getInput().charAt(i) + "\""; - } else { - details = ""; - } - throw new AssertionError("Failed to encode key: " + key - + ": " + e + details); - } - } - - @Override - public CloudBlobContainerWrapper getContainerReference(String name) - throws URISyntaxException, StorageException { - String fullUri; - URIBuilder builder = new URIBuilder(baseUriString); - String path = builder.getPath() == null ? "" : builder.getPath() + "/"; - fullUri = builder.setPath(path + name).toString(); - MockCloudBlobContainerWrapper container = new MockCloudBlobContainerWrapper( - fullUri, name); - // Check if we have a pre-existing container with that name, and prime - // the wrapper with that knowledge if it's found. - for (PreExistingContainer existing : preExistingContainers) { - if (fullUri.equalsIgnoreCase(existing.containerUri)) { - // We have a pre-existing container. Mark the wrapper as created and - // make sure we use the metadata for it. - container.created = true; - backingStore.setContainerMetadata(existing.containerMetadata); - break; - } - } - return container; - } - - class MockCloudBlobContainerWrapper extends CloudBlobContainerWrapper { - private boolean created = false; - private HashMap metadata; - private final String baseUri; - private final String name; - - public MockCloudBlobContainerWrapper(String baseUri, String name) { - this.baseUri = baseUri; - this.name = name; - } - - @Override - public String getName() { - return name; - } - - @Override - public boolean exists(OperationContext opContext) throws StorageException { - return created; - } - - @Override - public void create(OperationContext opContext) throws StorageException { - created = true; - backingStore.setContainerMetadata(metadata); - } - - @Override - public HashMap getMetadata() { - return metadata; - } - - @Override - public void setMetadata(HashMap metadata) { - this.metadata = metadata; - } - - @Override - public void downloadAttributes(OperationContext opContext) - throws StorageException { - metadata = backingStore.getContainerMetadata(); - } - - @Override - public void uploadMetadata(OperationContext opContext) - throws StorageException { - backingStore.setContainerMetadata(metadata); - } - - @Override - public CloudBlobDirectoryWrapper getDirectoryReference(String relativePath) - throws URISyntaxException, StorageException { - return new MockCloudBlobDirectoryWrapper(new URI(fullUriString( - relativePath, true))); - } - - @Override - public CloudBlockBlobWrapper getBlockBlobReference(String relativePath) - throws URISyntaxException, StorageException { - return new MockCloudBlockBlobWrapper(new URI(fullUriString(relativePath, - false)), null, 0); - } - - @Override - public CloudPageBlobWrapper getPageBlobReference(String blobAddressUri) - throws URISyntaxException, StorageException { - return new MockCloudPageBlobWrapper(new URI(blobAddressUri), null, 0); - } - - // helper to create full URIs for directory and blob. - // use withTrailingSlash=true to get a good path for a directory. - private String fullUriString(String relativePath, boolean withTrailingSlash) { - String baseUri = this.baseUri; - if (!baseUri.endsWith("/")) { - baseUri += "/"; - } - if (withTrailingSlash && !relativePath.equals("") - && !relativePath.endsWith("/")) { - relativePath += "/"; - } - - try { - URIBuilder builder = new URIBuilder(baseUri); - return builder.setPath(builder.getPath() + relativePath).toString(); - } catch (URISyntaxException e) { - throw new RuntimeException("problem encoding fullUri", e); - } - } - } - - private static class PreExistingContainer { - final String containerUri; - final HashMap containerMetadata; - - public PreExistingContainer(String uri, HashMap metadata) { - this.containerUri = uri; - this.containerMetadata = metadata; - } - } - - class MockCloudBlobDirectoryWrapper extends CloudBlobDirectoryWrapper { - private URI uri; - - public MockCloudBlobDirectoryWrapper(URI uri) { - this.uri = uri; - } - - @Override - public CloudBlobContainer getContainer() throws URISyntaxException, - StorageException { - return null; - } - - @Override - public CloudBlobDirectory getParent() throws URISyntaxException, - StorageException { - return null; - } - - @Override - public URI getUri() { - return uri; - } - - @Override - public Iterable listBlobs(String prefix, - boolean useFlatBlobListing, EnumSet listingDetails, - BlobRequestOptions options, OperationContext opContext) - throws URISyntaxException, StorageException { - ArrayList ret = new ArrayList(); - URI searchUri = null; - if (prefix == null) { - searchUri = uri; - } else { - try { - searchUri = UriBuilder.fromUri(uri).path(prefix).build(); - } catch (UriBuilderException e) { - throw new AssertionError("Failed to encode path: " + prefix); - } - } - - String fullPrefix = convertUriToDecodedString(searchUri); - boolean includeMetadata = listingDetails.contains(BlobListingDetails.METADATA); - HashSet addedDirectories = new HashSet(); - for (InMemoryBlockBlobStore.ListBlobEntry current : backingStore.listBlobs( - fullPrefix, includeMetadata)) { - int indexOfSlash = current.getKey().indexOf('/', fullPrefix.length()); - if (useFlatBlobListing || indexOfSlash < 0) { - if (current.isPageBlob()) { - ret.add(new MockCloudPageBlobWrapper( - convertKeyToEncodedUri(current.getKey()), - current.getMetadata(), - current.getContentLength())); - } else { - ret.add(new MockCloudBlockBlobWrapper( - convertKeyToEncodedUri(current.getKey()), - current.getMetadata(), - current.getContentLength())); - } - } else { - String directoryName = current.getKey().substring(0, indexOfSlash); - if (!addedDirectories.contains(directoryName)) { - addedDirectories.add(current.getKey()); - ret.add(new MockCloudBlobDirectoryWrapper(new URI( - directoryName + "/"))); - } - } - } - return ret; - } - - @Override - public StorageUri getStorageUri() { - throw new NotImplementedException("Code is not implemented"); - } - } - - abstract class MockCloudBlobWrapper implements CloudBlobWrapper { - protected final URI uri; - protected HashMap metadata = - new HashMap(); - protected BlobProperties properties; - - protected MockCloudBlobWrapper(URI uri, HashMap metadata, - int length) { - this.uri = uri; - this.metadata = metadata; - this.properties = new BlobProperties(); - - this.properties=updateLastModifed(this.properties); - this.properties=updateLength(this.properties,length); - } - - protected BlobProperties updateLastModifed(BlobProperties properties){ - try{ - Method setLastModified =properties.getClass(). - getDeclaredMethod("setLastModified", Date.class); - setLastModified.setAccessible(true); - setLastModified.invoke(this.properties, - Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTime()); - }catch(Exception e){ - throw new RuntimeException(e); - } - return properties; - } - - protected BlobProperties updateLength(BlobProperties properties,int length) { - try{ - Method setLength =properties.getClass(). - getDeclaredMethod("setLength", long.class); - setLength.setAccessible(true); - setLength.invoke(this.properties, length); - }catch (Exception e){ - throw new RuntimeException(e); - } - return properties; - } - - protected void refreshProperties(boolean getMetadata) { - if (backingStore.exists(convertUriToDecodedString(uri))) { - byte[] content = backingStore.getContent(convertUriToDecodedString(uri)); - properties = new BlobProperties(); - this.properties=updateLastModifed(this.properties); - this.properties=updateLength(this.properties, content.length); - if (getMetadata) { - metadata = backingStore.getMetadata(convertUriToDecodedString(uri)); - } - } - } - - @Override - public CloudBlobContainer getContainer() throws URISyntaxException, - StorageException { - return null; - } - - @Override - public CloudBlobDirectory getParent() throws URISyntaxException, - StorageException { - return null; - } - - @Override - public URI getUri() { - return uri; - } - - @Override - public HashMap getMetadata() { - return metadata; - } - - @Override - public void setMetadata(HashMap metadata) { - this.metadata = metadata; - } - - @Override - public void startCopyFromBlob(CloudBlobWrapper sourceBlob, BlobRequestOptions options, - OperationContext opContext, boolean overwriteDestination) throws StorageException, URISyntaxException { - if (!overwriteDestination && backingStore.exists(convertUriToDecodedString(uri))) { - throw new StorageException("BlobAlreadyExists", - "The blob already exists.", - HttpURLConnection.HTTP_CONFLICT, - null, - null); - } - backingStore.copy(convertUriToDecodedString(sourceBlob.getUri()), convertUriToDecodedString(uri)); - //TODO: set the backingStore.properties.CopyState and - // update azureNativeFileSystemStore.waitForCopyToComplete - } - - @Override - public CopyState getCopyState() { - return this.properties.getCopyState(); - } - - @Override - public void delete(OperationContext opContext, SelfRenewingLease lease) - throws StorageException { - backingStore.delete(convertUriToDecodedString(uri)); - } - - @Override - public boolean exists(OperationContext opContext) throws StorageException { - return backingStore.exists(convertUriToDecodedString(uri)); - } - - @Override - public void downloadAttributes(OperationContext opContext) - throws StorageException { - refreshProperties(true); - } - - @Override - public BlobProperties getProperties() { - return properties; - } - - @Override - public InputStream openInputStream(BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return new ByteArrayInputStream( - backingStore.getContent(convertUriToDecodedString(uri))); - } - - @Override - public void uploadMetadata(OperationContext opContext) - throws StorageException { - backingStore.setMetadata(convertUriToDecodedString(uri), metadata); - } - - @Override - public void downloadRange(long offset, long length, OutputStream os, - BlobRequestOptions options, OperationContext opContext) - throws StorageException { - if (offset < 0 || length <= 0) { - throw new IndexOutOfBoundsException(); - } - if (!backingStore.exists(convertUriToDecodedString(uri))) { - throw new StorageException("BlobNotFound", - "Resource does not exist.", - HttpURLConnection.HTTP_NOT_FOUND, - null, - null); - } - byte[] content = backingStore.getContent(convertUriToDecodedString(uri)); - try { - os.write(content, (int) offset, (int) length); - } catch (IOException e) { - throw new StorageException("Unknown error", "Unexpected error", e); - } - } - } - - class MockCloudBlockBlobWrapper extends MockCloudBlobWrapper - implements CloudBlockBlobWrapper { - - int minimumReadSize = AzureNativeFileSystemStore.DEFAULT_DOWNLOAD_BLOCK_SIZE; - - public MockCloudBlockBlobWrapper(URI uri, HashMap metadata, - int length) { - super(uri, metadata, length); - } - - @Override - public OutputStream openOutputStream(BlobRequestOptions options, - OperationContext opContext) throws StorageException { - return backingStore.uploadBlockBlob(convertUriToDecodedString(uri), - metadata); - } - - @Override - public int getStreamMinimumReadSizeInBytes() { - return this.minimumReadSize; - } - - @Override - public void setStreamMinimumReadSizeInBytes(int minimumReadSizeBytes) { - this.minimumReadSize = minimumReadSizeBytes; - } - - @Override - public void setWriteBlockSizeInBytes(int writeBlockSizeBytes) { - } - - @Override - public StorageUri getStorageUri() { - return null; - } - - @Override - public void uploadProperties(OperationContext context, SelfRenewingLease lease) { - } - - @Override - public SelfRenewingLease acquireLease() { - return null; - } - - @Override - public CloudBlob getBlob() { - return null; - } - - @Override - public List downloadBlockList(BlockListingFilter filter, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - - throw new UnsupportedOperationException("downloadBlockList not used in Mock Tests"); - } - @Override - public void uploadBlock(String blockId, AccessCondition accessCondition, - InputStream sourceStream, - long length, BlobRequestOptions options, - OperationContext opContext) throws IOException, StorageException { - throw new UnsupportedOperationException("uploadBlock not used in Mock Tests"); - } - - @Override - public void commitBlockList(List blockList, AccessCondition accessCondition, - BlobRequestOptions options, OperationContext opContext) throws IOException, StorageException { - throw new UnsupportedOperationException("commitBlockList not used in Mock Tests"); - } - - public void uploadMetadata(AccessCondition accessCondition, BlobRequestOptions options, - OperationContext opContext) throws StorageException { - throw new UnsupportedOperationException("uploadMetadata not used in Mock Tests"); - } - } - - class MockCloudPageBlobWrapper extends MockCloudBlobWrapper - implements CloudPageBlobWrapper { - - int minimumReadSize = AzureNativeFileSystemStore.DEFAULT_DOWNLOAD_BLOCK_SIZE; - - public MockCloudPageBlobWrapper(URI uri, HashMap metadata, - int length) { - super(uri, metadata, length); - } - - @Override - public void create(long length, BlobRequestOptions options, - OperationContext opContext) throws StorageException { - throw new NotImplementedException("Code is not implemented"); - } - - @Override - public void uploadPages(InputStream sourceStream, long offset, long length, - BlobRequestOptions options, OperationContext opContext) - throws StorageException, IOException { - throw new NotImplementedException("Code is not implemented"); - } - - @Override - public ArrayList downloadPageRanges(BlobRequestOptions options, - OperationContext opContext) throws StorageException { - throw new NotImplementedException("Code is not implemented"); - } - - @Override - public int getStreamMinimumReadSizeInBytes() { - return this.minimumReadSize; - } - - @Override - public void setStreamMinimumReadSizeInBytes(int minimumReadSize) { - this.minimumReadSize = minimumReadSize; - } - - @Override - public void setWriteBlockSizeInBytes(int writeBlockSizeInBytes) { - } - - @Override - public StorageUri getStorageUri() { - throw new NotImplementedException("Code is not implemented"); - } - - @Override - public void uploadProperties(OperationContext opContext, - SelfRenewingLease lease) - throws StorageException { - } - - @Override - public SelfRenewingLease acquireLease() { - return null; - } - - @Override - public CloudBlob getBlob() { - return null; - } - - public void uploadMetadata(AccessCondition accessCondition, BlobRequestOptions options, - OperationContext opContext) throws StorageException { - throw new UnsupportedOperationException("uploadMetadata not used in Mock Tests"); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java deleted file mode 100644 index d6f26d702c885..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/MockWasbAuthorizerImpl.java +++ /dev/null @@ -1,215 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.util.HashMap; -import java.util.Map; -import java.util.concurrent.TimeUnit; -import java.util.regex.Pattern; - -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.fs.Path; - -/** - * A mock wasb authorizer implementation. - */ - -public class MockWasbAuthorizerImpl implements WasbAuthorizerInterface { - - private Map authRules; - private CachingAuthorizer cache; - - // The full qualified URL to the root directory - private String qualifiedPrefixUrl; - - public MockWasbAuthorizerImpl(NativeAzureFileSystem fs) { - qualifiedPrefixUrl = new Path("/").makeQualified(fs.getUri(), - fs.getWorkingDirectory()) - .toString().replaceAll("/$", ""); - cache = new CachingAuthorizer<>(TimeUnit.MINUTES.convert(5L, TimeUnit.MINUTES), "AUTHORIZATION"); - } - - @Override - public void init(Configuration conf) { - cache.init(conf); - authRules = new HashMap<>(); - } - - public void addAuthRuleForOwner(String wasbAbsolutePath, - String accessType, boolean access) { - addAuthRule(wasbAbsolutePath, accessType, "owner", access); - } - - public void addAuthRule(String wasbAbsolutePath, - String accessType, String user, boolean access) { - wasbAbsolutePath = qualifiedPrefixUrl + wasbAbsolutePath; - AuthorizationComponent component = wasbAbsolutePath.endsWith("*") - ? new AuthorizationComponent("^" + wasbAbsolutePath.replace("*", ".*"), - accessType, user) - : new AuthorizationComponent(wasbAbsolutePath, accessType, user); - - this.authRules.put(component, access); - } - - @Override - public boolean authorize(String wasbAbsolutePath, - String accessType, - String owner) - throws WasbAuthorizationException { - - if (wasbAbsolutePath.endsWith( - NativeAzureFileSystem.FolderRenamePending.SUFFIX)) { - return true; - } - - CachedAuthorizerEntry cacheKey = new CachedAuthorizerEntry(wasbAbsolutePath, accessType, owner); - Boolean cacheresult = cache.get(cacheKey); - if (cacheresult != null) { - return cacheresult; - } - - boolean authorizeresult = authorizeInternal(wasbAbsolutePath, accessType, owner); - cache.put(cacheKey, authorizeresult); - - return authorizeresult; - } - - private boolean authorizeInternal(String wasbAbsolutePath, String accessType, String owner) - throws WasbAuthorizationException { - - String currentUserShortName = ""; - try { - UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); - currentUserShortName = ugi.getShortUserName(); - } catch (Exception e) { - //no op - } - - // In case of root("/"), owner match does not happen - // because owner is returned as empty string. - // we try to force owner match just for purpose of tests - // to make sure all operations work seemlessly with owner. - if (StringUtils.equalsIgnoreCase(wasbAbsolutePath, qualifiedPrefixUrl + "/")) { - owner = currentUserShortName; - } - - AuthorizationComponent component = new AuthorizationComponent(wasbAbsolutePath, - accessType, currentUserShortName); - - return processRules(authRules, component, owner); - } - - private boolean processRules(Map authRules, - AuthorizationComponent component, String owner) { - - // Direct match of rules and access request - if (authRules.containsKey(component)) { - return authRules.get(component); - } else { - // Regex-pattern match if we don't have a straight match for path - // Current user match if we don't have a owner match - for (Map.Entry entry : authRules.entrySet()) { - AuthorizationComponent key = entry.getKey(); - String keyPath = key.getWasbAbsolutePath(); - String keyAccess = key.getAccessType(); - String keyUser = key.getUser(); - - boolean foundMatchingOwnerRule = keyPath.equals(component.getWasbAbsolutePath()) - && keyAccess.equals(component.getAccessType()) - && keyUser.equalsIgnoreCase("owner") - && owner.equals(component.getUser()); - - boolean foundMatchingPatternRule = keyPath.endsWith("*") - && Pattern.matches(keyPath, component.getWasbAbsolutePath()) - && keyAccess.equals(component.getAccessType()) - && keyUser.equalsIgnoreCase(component.getUser()); - - boolean foundMatchingPatternOwnerRule = keyPath.endsWith("*") - && Pattern.matches(keyPath, component.getWasbAbsolutePath()) - && keyAccess.equals(component.getAccessType()) - && keyUser.equalsIgnoreCase("owner") - && owner.equals(component.getUser()); - - if (foundMatchingOwnerRule - || foundMatchingPatternRule - || foundMatchingPatternOwnerRule) { - return entry.getValue(); - } - } - return false; - } - } - - public void deleteAllAuthRules() { - authRules.clear(); - cache.clear(); - } - - private static class AuthorizationComponent { - - private final String wasbAbsolutePath; - private final String accessType; - private final String user; - - AuthorizationComponent(String wasbAbsolutePath, - String accessType, String user) { - this.wasbAbsolutePath = wasbAbsolutePath; - this.accessType = accessType; - this.user = user; - } - - @Override - public int hashCode() { - return this.wasbAbsolutePath.hashCode() ^ this.accessType.hashCode(); - } - - @Override - public boolean equals(Object obj) { - - if (obj == this) { - return true; - } - - if (!(obj instanceof AuthorizationComponent)) { - return false; - } - - return ((AuthorizationComponent) obj). - getWasbAbsolutePath().equals(this.wasbAbsolutePath) - && ((AuthorizationComponent) obj). - getAccessType().equals(this.accessType) - && ((AuthorizationComponent) obj). - getUser().equals(this.user); - } - - public String getWasbAbsolutePath() { - return this.wasbAbsolutePath; - } - - public String getAccessType() { - return accessType; - } - - public String getUser() { - return user; - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java deleted file mode 100644 index 7730d5283d7c1..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/NativeAzureFileSystemBaseTest.java +++ /dev/null @@ -1,1731 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Date; -import java.util.EnumSet; -import java.util.TimeZone; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FileUtil; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.XAttrSetFlag; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem.FolderRenamePending; - -import com.microsoft.azure.storage.AccessCondition; -import com.microsoft.azure.storage.StorageException; -import com.microsoft.azure.storage.blob.CloudBlob; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.readStringFromFile; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.writeStringToFile; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.writeStringToStream; -import static org.apache.hadoop.test.GenericTestUtils.*; -import static org.apache.hadoop.test.LambdaTestUtils.intercept; - -/* - * Tests the Native Azure file system (WASB) against an actual blob store if - * provided in the environment. - * Subclasses implement createTestAccount() to hit local&mock storage with the same test code. - * - * For hand-testing: remove "abstract" keyword and copy in an implementation of createTestAccount - * from one of the subclasses - */ -public abstract class NativeAzureFileSystemBaseTest - extends AbstractWasbTestBase { - - private final long modifiedTimeErrorMargin = 5 * 1000; // Give it +/-5 seconds - - private static final short READ_WRITE_PERMISSIONS = 644; - private static final EnumSet CREATE_FLAG = EnumSet.of(XAttrSetFlag.CREATE); - private static final EnumSet REPLACE_FLAG = EnumSet.of(XAttrSetFlag.REPLACE); - - public static final Logger LOG = LoggerFactory.getLogger(NativeAzureFileSystemBaseTest.class); - protected NativeAzureFileSystem fs; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - fs = getFileSystem(); - } - - /** - * Assert that a path does not exist. - * - * @param message message to include in the assertion failure message - * @param path path in the filesystem - * @throws IOException IO problems - */ - public void assertPathDoesNotExist(String message, - Path path) throws IOException { - ContractTestUtils.assertPathDoesNotExist(fs, message, path); - } - - /** - * Assert that a path exists. - * - * @param message message to include in the assertion failure message - * @param path path in the filesystem - * @throws IOException IO problems - */ - public void assertPathExists(String message, - Path path) throws IOException { - ContractTestUtils.assertPathExists(fs, message, path); - } - - @Test - public void testCheckingNonExistentOneLetterFile() throws Exception { - assertPathDoesNotExist("one letter file", new Path("/a")); - } - - @Test - public void testStoreRetrieveFile() throws Exception { - Path testFile = methodPath(); - writeString(testFile, "Testing"); - assertTrue(fs.exists(testFile)); - FileStatus status = fs.getFileStatus(testFile); - assertNotNull(status); - // By default, files should be have masked permissions - // that grant RW to user, and R to group/other - assertEquals(new FsPermission((short) 0644), status.getPermission()); - assertEquals("Testing", readString(testFile)); - fs.delete(testFile, true); - } - - @Test - public void testSetGetXAttr() throws Exception { - byte[] attributeValue1 = "hi".getBytes(StandardCharsets.UTF_8); - byte[] attributeValue2 = "你好".getBytes(StandardCharsets.UTF_8); - String attributeName1 = "user.asciiAttribute"; - String attributeName2 = "user.unicodeAttribute"; - Path testFile = methodPath(); - - // after creating a file, the xAttr should not be present - createEmptyFile(testFile, FsPermission.createImmutable(READ_WRITE_PERMISSIONS)); - assertNull(fs.getXAttr(testFile, attributeName1)); - - // after setting the xAttr on the file, the value should be retrievable - fs.setXAttr(testFile, attributeName1, attributeValue1); - assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1)); - - // after setting a second xAttr on the file, the first xAttr values should not be overwritten - fs.setXAttr(testFile, attributeName2, attributeValue2); - assertArrayEquals(attributeValue1, fs.getXAttr(testFile, attributeName1)); - assertArrayEquals(attributeValue2, fs.getXAttr(testFile, attributeName2)); - } - - @Test - public void testSetGetXAttrCreateReplace() throws Exception { - byte[] attributeValue = "one".getBytes(StandardCharsets.UTF_8); - String attributeName = "user.someAttribute"; - Path testFile = methodPath(); - - // after creating a file, it must be possible to create a new xAttr - createEmptyFile(testFile, FsPermission.createImmutable(READ_WRITE_PERMISSIONS)); - fs.setXAttr(testFile, attributeName, attributeValue, CREATE_FLAG); - assertArrayEquals(attributeValue, fs.getXAttr(testFile, attributeName)); - - // however after the xAttr is created, creating it again must fail - intercept(IOException.class, () -> fs.setXAttr(testFile, attributeName, attributeValue, CREATE_FLAG)); - } - - @Test - public void testSetGetXAttrReplace() throws Exception { - byte[] attributeValue1 = "one".getBytes(StandardCharsets.UTF_8); - byte[] attributeValue2 = "two".getBytes(StandardCharsets.UTF_8); - String attributeName = "user.someAttribute"; - Path testFile = methodPath(); - - // after creating a file, it must not be possible to replace an xAttr - createEmptyFile(testFile, FsPermission.createImmutable(READ_WRITE_PERMISSIONS)); - intercept(IOException.class, () -> fs.setXAttr(testFile, attributeName, attributeValue1, REPLACE_FLAG)); - - // however after the xAttr is created, replacing it must succeed - fs.setXAttr(testFile, attributeName, attributeValue1, CREATE_FLAG); - fs.setXAttr(testFile, attributeName, attributeValue2, REPLACE_FLAG); - assertArrayEquals(attributeValue2, fs.getXAttr(testFile, attributeName)); - } - - @Test - public void testStoreDeleteFolder() throws Exception { - Path testFolder = methodPath(); - assertFalse(fs.exists(testFolder)); - assertTrue(fs.mkdirs(testFolder)); - assertTrue(fs.exists(testFolder)); - FileStatus status = fs.getFileStatus(testFolder); - assertNotNull(status); - assertTrue(status.isDirectory()); - // By default, directories should be have masked permissions - // that grant RWX to user, and RX to group/other - assertEquals(new FsPermission((short) 0755), status.getPermission()); - Path innerFile = new Path(testFolder, "innerFile"); - assertTrue(fs.createNewFile(innerFile)); - assertPathExists("inner file", innerFile); - assertTrue(fs.delete(testFolder, true)); - assertPathDoesNotExist("inner file", innerFile); - assertPathDoesNotExist("testFolder", testFolder); - } - - @Test - public void testFileOwnership() throws Exception { - Path testFile = methodPath(); - writeString(testFile, "Testing"); - testOwnership(testFile); - } - - @Test - public void testFolderOwnership() throws Exception { - Path testFolder = methodPath(); - fs.mkdirs(testFolder); - testOwnership(testFolder); - } - - private void testOwnership(Path pathUnderTest) throws IOException { - FileStatus ret = fs.getFileStatus(pathUnderTest); - UserGroupInformation currentUser = UserGroupInformation.getCurrentUser(); - assertTrue(ret.getOwner().equals(currentUser.getShortUserName())); - fs.delete(pathUnderTest, true); - } - - private static FsPermission ignoreStickyBit(FsPermission original) { - return new FsPermission(original.getUserAction(), - original.getGroupAction(), original.getOtherAction()); - } - - // When FsPermission applies a UMask, it loses sticky bit information. - // And since we always apply UMask, we should ignore whether the sticky - // bit is equal or not. - private static void assertEqualsIgnoreStickyBit(FsPermission expected, - FsPermission actual) { - assertEquals(ignoreStickyBit(expected), ignoreStickyBit(actual)); - } - - @Test - public void testFilePermissions() throws Exception { - Path testFile = methodPath(); - FsPermission permission = FsPermission.createImmutable((short) 644); - createEmptyFile(testFile, permission); - FileStatus ret = fs.getFileStatus(testFile); - assertEqualsIgnoreStickyBit(permission, ret.getPermission()); - fs.delete(testFile, true); - } - - @Test - public void testFolderPermissions() throws Exception { - Path testFolder = methodPath(); - FsPermission permission = FsPermission.createImmutable((short) 644); - fs.mkdirs(testFolder, permission); - FileStatus ret = fs.getFileStatus(testFolder); - assertEqualsIgnoreStickyBit(permission, ret.getPermission()); - fs.delete(testFolder, true); - } - - void testDeepFileCreationBase(String testFilePath, String firstDirPath, String middleDirPath, - short permissionShort, short umaskedPermissionShort) throws Exception { - Path testFile = new Path(testFilePath); - Path firstDir = new Path(firstDirPath); - Path middleDir = new Path(middleDirPath); - FsPermission permission = FsPermission.createImmutable(permissionShort); - FsPermission umaskedPermission = FsPermission.createImmutable(umaskedPermissionShort); - - createEmptyFile(testFile, permission); - FsPermission rootPerm = fs.getFileStatus(firstDir.getParent()).getPermission(); - FsPermission inheritPerm = FsPermission.createImmutable((short)(rootPerm.toShort() | 0300)); - assertPathExists("test file", testFile); - assertPathExists("firstDir", firstDir); - assertPathExists("middleDir", middleDir); - // verify that the indirectly created directory inherited its permissions from the root directory - FileStatus directoryStatus = fs.getFileStatus(middleDir); - assertTrue(directoryStatus.isDirectory()); - assertEqualsIgnoreStickyBit(inheritPerm, directoryStatus.getPermission()); - // verify that the file itself has the permissions as specified - FileStatus fileStatus = fs.getFileStatus(testFile); - assertFalse(fileStatus.isDirectory()); - assertEqualsIgnoreStickyBit(umaskedPermission, fileStatus.getPermission()); - assertTrue(fs.delete(firstDir, true)); - assertPathDoesNotExist("deleted file", testFile); - - // An alternative test scenario would've been to delete the file first, - // and then check for the existence of the upper folders still. But that - // doesn't actually work as expected right now. - } - - @Test - public void testDeepFileCreation() throws Exception { - // normal permissions in user home - testDeepFileCreationBase("deep/file/creation/test", "deep", "deep/file/creation", (short)0644, (short)0644); - // extra permissions in user home. umask will change the actual permissions. - testDeepFileCreationBase("deep/file/creation/test", "deep", "deep/file/creation", (short)0777, (short)0755); - // normal permissions in root - testDeepFileCreationBase("/deep/file/creation/test", "/deep", "/deep/file/creation", (short)0644, (short)0644); - // less permissions in root - testDeepFileCreationBase("/deep/file/creation/test", "/deep", "/deep/file/creation", (short)0700, (short)0700); - // one indirectly created directory in root - testDeepFileCreationBase("/deep/file", "/deep", "/deep", (short)0644, (short)0644); - // one indirectly created directory in user home - testDeepFileCreationBase("deep/file", "deep", "deep", (short)0644, (short)0644); - } - - private static enum RenameVariation { - NormalFileName, SourceInAFolder, SourceWithSpace, SourceWithPlusAndPercent - } - - @Test - public void testRename() throws Exception { - for (RenameVariation variation : RenameVariation.values()) { - System.out.printf("Rename variation: %s\n", variation); - Path originalFile; - switch (variation) { - case NormalFileName: - originalFile = new Path("fileToRename"); - break; - case SourceInAFolder: - originalFile = new Path("file/to/rename"); - break; - case SourceWithSpace: - originalFile = new Path("file to rename"); - break; - case SourceWithPlusAndPercent: - originalFile = new Path("file+to%rename"); - break; - default: - throw new Exception("Unknown variation"); - } - Path destinationFile = new Path("file/resting/destination"); - assertTrue(fs.createNewFile(originalFile)); - assertTrue(fs.exists(originalFile)); - assertFalse(fs.rename(originalFile, destinationFile)); // Parent directory - // doesn't exist - assertTrue(fs.mkdirs(destinationFile.getParent())); - boolean result = fs.rename(originalFile, destinationFile); - assertTrue(result); - assertTrue(fs.exists(destinationFile)); - assertFalse(fs.exists(originalFile)); - fs.delete(destinationFile.getParent(), true); - } - } - - @Test - public void testRenameImplicitFolder() throws Exception { - Path testFile = new Path("deep/file/rename/test"); - FsPermission permission = FsPermission.createImmutable((short) 644); - createEmptyFile(testFile, permission); - boolean renameResult = fs.rename(new Path("deep/file"), new Path("deep/renamed")); - assertTrue(renameResult); - assertFalse(fs.exists(testFile)); - FileStatus newStatus = fs.getFileStatus(new Path("deep/renamed/rename/test")); - assertNotNull(newStatus); - assertEqualsIgnoreStickyBit(permission, newStatus.getPermission()); - assertTrue(fs.delete(new Path("deep"), true)); - } - - private enum RenameFolderVariation { - CreateFolderAndInnerFile, CreateJustInnerFile, CreateJustFolder - } - - @Test - public void testRenameFolder() throws Exception { - for (RenameFolderVariation variation : RenameFolderVariation.values()) { - Path originalFolder = new Path("folderToRename"); - if (variation != RenameFolderVariation.CreateJustInnerFile) { - assertTrue(fs.mkdirs(originalFolder)); - } - Path innerFile = new Path(originalFolder, "innerFile"); - Path innerFile2 = new Path(originalFolder, "innerFile2"); - if (variation != RenameFolderVariation.CreateJustFolder) { - assertTrue(fs.createNewFile(innerFile)); - assertTrue(fs.createNewFile(innerFile2)); - } - Path destination = new Path("renamedFolder"); - assertTrue(fs.rename(originalFolder, destination)); - assertTrue(fs.exists(destination)); - if (variation != RenameFolderVariation.CreateJustFolder) { - assertTrue(fs.exists(new Path(destination, innerFile.getName()))); - assertTrue(fs.exists(new Path(destination, innerFile2.getName()))); - } - assertFalse(fs.exists(originalFolder)); - assertFalse(fs.exists(innerFile)); - assertFalse(fs.exists(innerFile2)); - fs.delete(destination, true); - } - } - - @Test - public void testCopyFromLocalFileSystem() throws Exception { - Path localFilePath = new Path(System.getProperty("test.build.data", - "azure_test")); - FileSystem localFs = FileSystem.get(new Configuration()); - localFs.delete(localFilePath, true); - try { - writeStringToFile(localFs, localFilePath, "Testing"); - Path dstPath = methodPath(); - assertTrue(FileUtil.copy(localFs, localFilePath, fs, dstPath, false, - fs.getConf())); - assertPathExists("coied from local", dstPath); - assertEquals("Testing", readStringFromFile(fs, dstPath)); - fs.delete(dstPath, true); - } finally { - localFs.delete(localFilePath, true); - } - } - - @Test - public void testListDirectory() throws Exception { - Path rootFolder = new Path("testingList"); - assertTrue(fs.mkdirs(rootFolder)); - FileStatus[] listed = fs.listStatus(rootFolder); - assertEquals(0, listed.length); - Path innerFolder = new Path(rootFolder, "inner"); - assertTrue(fs.mkdirs(innerFolder)); - listed = fs.listStatus(rootFolder); - assertEquals(1, listed.length); - assertTrue(listed[0].isDirectory()); - Path innerFile = new Path(innerFolder, "innerFile"); - writeString(innerFile, "testing"); - listed = fs.listStatus(rootFolder); - assertEquals(1, listed.length); - assertTrue(listed[0].isDirectory()); - listed = fs.listStatus(innerFolder); - assertEquals(1, listed.length); - assertFalse(listed[0].isDirectory()); - assertTrue(fs.delete(rootFolder, true)); - } - - @Test - public void testUriEncoding() throws Exception { - fs.create(new Path("p/t%5Fe")).close(); - FileStatus[] listing = fs.listStatus(new Path("p")); - assertEquals(1, listing.length); - assertEquals("t%5Fe", listing[0].getPath().getName()); - assertTrue(fs.rename(new Path("p"), new Path("q"))); - assertTrue(fs.delete(new Path("q"), true)); - } - - @Test - public void testUriEncodingMoreComplexCharacters() throws Exception { - // Create a file name with URI reserved characters, plus the percent - String fileName = "!#$'()*;=[]%"; - String directoryName = "*;=[]%!#$'()"; - fs.create(new Path(directoryName, fileName)).close(); - FileStatus[] listing = fs.listStatus(new Path(directoryName)); - assertEquals(1, listing.length); - assertEquals(fileName, listing[0].getPath().getName()); - FileStatus status = fs.getFileStatus(new Path(directoryName, fileName)); - assertEquals(fileName, status.getPath().getName()); - InputStream stream = fs.open(new Path(directoryName, fileName)); - assertNotNull(stream); - stream.close(); - assertTrue(fs.delete(new Path(directoryName, fileName), true)); - assertTrue(fs.delete(new Path(directoryName), true)); - } - - @Test - public void testChineseCharacters() throws Exception { - // Create a file and a folder with Chinese (non-ASCI) characters - String chinese = "" + '\u963f' + '\u4db5'; - String fileName = "filename" + chinese; - String directoryName = chinese; - fs.create(new Path(directoryName, fileName)).close(); - FileStatus[] listing = fs.listStatus(new Path(directoryName)); - assertEquals(1, listing.length); - assertEquals(fileName, listing[0].getPath().getName()); - FileStatus status = fs.getFileStatus(new Path(directoryName, fileName)); - assertEquals(fileName, status.getPath().getName()); - InputStream stream = fs.open(new Path(directoryName, fileName)); - assertNotNull(stream); - stream.close(); - assertTrue(fs.delete(new Path(directoryName, fileName), true)); - assertTrue(fs.delete(new Path(directoryName), true)); - } - - @Test - public void testChineseCharactersFolderRename() throws Exception { - // Create a file and a folder with Chinese (non-ASCI) characters - String chinese = "" + '\u963f' + '\u4db5'; - String fileName = "filename" + chinese; - String srcDirectoryName = chinese; - String targetDirectoryName = "target" + chinese; - fs.create(new Path(srcDirectoryName, fileName)).close(); - fs.rename(new Path(srcDirectoryName), new Path(targetDirectoryName)); - FileStatus[] listing = fs.listStatus(new Path(targetDirectoryName)); - assertEquals(1, listing.length); - assertEquals(fileName, listing[0].getPath().getName()); - FileStatus status = fs.getFileStatus(new Path(targetDirectoryName, fileName)); - assertEquals(fileName, status.getPath().getName()); - assertTrue(fs.delete(new Path(targetDirectoryName, fileName), true)); - assertTrue(fs.delete(new Path(targetDirectoryName), true)); - } - - @Test - public void testReadingDirectoryAsFile() throws Exception { - Path dir = methodPath(); - assertTrue(fs.mkdirs(dir)); - try { - fs.open(dir).close(); - assertTrue(false, "Should've thrown"); - } catch (FileNotFoundException ex) { - assertExceptionContains("a directory not a file.", ex); - } - } - - @Test - public void testCreatingFileOverDirectory() throws Exception { - Path dir = methodPath(); - assertTrue(fs.mkdirs(dir)); - try { - fs.create(dir).close(); - assertTrue(false, "Should've thrown"); - } catch (IOException ex) { - assertExceptionContains("Cannot create file", ex); - assertExceptionContains("already exists as a directory", ex); - } - } - - @Test - public void testInputStreamReadWithZeroSizeBuffer() throws Exception { - Path newFile = methodPath(); - OutputStream output = fs.create(newFile); - output.write(10); - output.close(); - - InputStream input = fs.open(newFile); - int result = input.read(new byte[2], 0, 0); - assertEquals(0, result); - } - - @Test - public void testInputStreamReadWithBufferReturnsMinusOneOnEof() - throws Exception { - Path newFile = methodPath(); - OutputStream output = fs.create(newFile); - output.write(10); - output.close(); - - // Read first byte back - InputStream input = fs.open(newFile); - byte[] buff = new byte[1]; - int result = input.read(buff, 0, 1); - assertEquals(1, result); - assertEquals(10, buff[0]); - - // Issue another read and make sure it returns -1 - buff[0] = 2; - result = input.read(buff, 0, 1); - assertEquals(-1, result); - // Buffer is intact - assertEquals(2, buff[0]); - } - - @Test - public void testInputStreamReadWithBufferReturnsMinusOneOnEofForLargeBuffer() - throws Exception { - Path newFile = methodPath(); - OutputStream output = fs.create(newFile); - byte[] outputBuff = new byte[97331]; - for(int i = 0; i < outputBuff.length; ++i) { - outputBuff[i] = (byte)(Math.random() * 255); - } - output.write(outputBuff); - output.close(); - - // Read the content of the file - InputStream input = fs.open(newFile); - byte[] buff = new byte[131072]; - int result = input.read(buff, 0, buff.length); - assertEquals(outputBuff.length, result); - for(int i = 0; i < outputBuff.length; ++i) { - assertEquals(outputBuff[i], buff[i]); - } - - // Issue another read and make sure it returns -1 - buff = new byte[131072]; - result = input.read(buff, 0, buff.length); - assertEquals(-1, result); - } - - @Test - public void testInputStreamReadIntReturnsMinusOneOnEof() throws Exception { - Path newFile = methodPath(); - OutputStream output = fs.create(newFile); - output.write(10); - output.close(); - - // Read first byte back - InputStream input = fs.open(newFile); - int value = input.read(); - assertEquals(10, value); - - // Issue another read and make sure it returns -1 - value = input.read(); - assertEquals(-1, value); - } - - @Test - public void testSetPermissionOnFile() throws Exception { - Path newFile = methodPath(); - OutputStream output = fs.create(newFile); - output.write(13); - output.close(); - FsPermission newPermission = new FsPermission((short) 0700); - fs.setPermission(newFile, newPermission); - FileStatus newStatus = fs.getFileStatus(newFile); - assertNotNull(newStatus); - assertEquals(newPermission, newStatus.getPermission()); - assertEquals("supergroup", newStatus.getGroup()); - assertEquals(UserGroupInformation.getCurrentUser().getShortUserName(), - newStatus.getOwner()); - - // Don't check the file length for page blobs. Only block blobs - // provide the actual length of bytes written. - if (!(this instanceof ITestNativeAzureFSPageBlobLive)) { - assertEquals(1, newStatus.getLen()); - } - } - - @Test - public void testSetPermissionOnFolder() throws Exception { - Path newFolder = methodPath(); - assertTrue(fs.mkdirs(newFolder)); - FsPermission newPermission = new FsPermission((short) 0600); - fs.setPermission(newFolder, newPermission); - FileStatus newStatus = fs.getFileStatus(newFolder); - assertNotNull(newStatus); - assertEquals(newPermission, newStatus.getPermission()); - assertTrue(newStatus.isDirectory()); - } - - @Test - public void testSetOwnerOnFile() throws Exception { - Path newFile = methodPath(); - OutputStream output = fs.create(newFile); - output.write(13); - output.close(); - fs.setOwner(newFile, "newUser", null); - FileStatus newStatus = fs.getFileStatus(newFile); - assertNotNull(newStatus); - assertEquals("newUser", newStatus.getOwner()); - assertEquals("supergroup", newStatus.getGroup()); - - // File length is only reported to be the size of bytes written to the file for block blobs. - // So only check it for block blobs, not page blobs. - if (!(this instanceof ITestNativeAzureFSPageBlobLive)) { - assertEquals(1, newStatus.getLen()); - } - fs.setOwner(newFile, null, "newGroup"); - newStatus = fs.getFileStatus(newFile); - assertNotNull(newStatus); - assertEquals("newUser", newStatus.getOwner()); - assertEquals("newGroup", newStatus.getGroup()); - } - - @Test - public void testSetOwnerOnFolder() throws Exception { - Path newFolder = methodPath(); - assertTrue(fs.mkdirs(newFolder)); - fs.setOwner(newFolder, "newUser", null); - FileStatus newStatus = fs.getFileStatus(newFolder); - assertNotNull(newStatus); - assertEquals("newUser", newStatus.getOwner()); - assertTrue(newStatus.isDirectory()); - } - - @Test - public void testModifiedTimeForFile() throws Exception { - Path testFile = methodPath(); - fs.create(testFile).close(); - testModifiedTime(testFile); - } - - @Test - public void testModifiedTimeForFolder() throws Exception { - Path testFolder = methodPath(); - assertTrue(fs.mkdirs(testFolder)); - testModifiedTime(testFolder); - } - - @Test - public void testFolderLastModifiedTime() throws Exception { - Path parentFolder = methodPath(); - Path innerFile = new Path(parentFolder, "innerfile"); - assertTrue(fs.mkdirs(parentFolder)); - - // Create file - long lastModifiedTime = fs.getFileStatus(parentFolder) - .getModificationTime(); - // Wait at least the error margin - Thread.sleep(modifiedTimeErrorMargin + 1); - assertTrue(fs.createNewFile(innerFile)); - // The parent folder last modified time should have changed because we - // create an inner file. - assertFalse(testModifiedTime(parentFolder, lastModifiedTime)); - testModifiedTime(parentFolder); - - // Rename file - lastModifiedTime = fs.getFileStatus(parentFolder).getModificationTime(); - Path destFolder = new Path("testDestFolder"); - assertTrue(fs.mkdirs(destFolder)); - long destLastModifiedTime = fs.getFileStatus(destFolder) - .getModificationTime(); - Thread.sleep(modifiedTimeErrorMargin + 1); - Path destFile = new Path(destFolder, "innerfile"); - assertTrue(fs.rename(innerFile, destFile)); - // Both source and destination folder last modified time should have changed - // because of renaming. - assertFalse(testModifiedTime(parentFolder, lastModifiedTime)); - assertFalse(testModifiedTime(destFolder, destLastModifiedTime)); - testModifiedTime(parentFolder); - testModifiedTime(destFolder); - - // Delete file - destLastModifiedTime = fs.getFileStatus(destFolder).getModificationTime(); - // Wait at least the error margin - Thread.sleep(modifiedTimeErrorMargin + 1); - fs.delete(destFile, false); - // The parent folder last modified time should have changed because we - // delete an inner file. - assertFalse(testModifiedTime(destFolder, destLastModifiedTime)); - testModifiedTime(destFolder); - } - - /** - * Verify we can get file status of a directory with various forms of - * the directory file name, including the nonstandard but legal form - * ending in "/.". Check that we're getting status for a directory. - */ - @Test - public void testListSlash() throws Exception { - Path testFolder = new Path("/testFolder"); - Path testFile = new Path(testFolder, "testFile"); - assertTrue(fs.mkdirs(testFolder)); - assertTrue(fs.createNewFile(testFile)); - FileStatus status; - status = fs.getFileStatus(new Path("/testFolder")); - assertTrue(status.isDirectory()); - status = fs.getFileStatus(new Path("/testFolder/")); - assertTrue(status.isDirectory()); - status = fs.getFileStatus(new Path("/testFolder/.")); - assertTrue(status.isDirectory()); - } - - @Test - public void testCannotCreatePageBlobByDefault() throws Exception { - - // Verify that the page blob directory list configuration setting - // is not set in the default configuration. - Configuration conf = new Configuration(); - String[] rawPageBlobDirs = - conf.getStrings(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES); - assertTrue(rawPageBlobDirs == null); - } - - /* - * Set up a situation where a folder rename is partway finished. - * Then apply redo to finish the rename. - * - * The original source folder *would* have had contents - * folderToRename (0 byte dummy file for directory) - * folderToRename/innerFile - * folderToRename/innerFile2 - * - * The actual source folder (after partial rename and failure) - * - * folderToRename - * folderToRename/innerFile2 - * - * The actual target folder (after partial rename and failure) - * - * renamedFolder - * renamedFolder/innerFile - */ - @Test - public void testRedoRenameFolder() throws IOException { - // create original folder - String srcKey = "folderToRename"; - Path originalFolder = new Path(srcKey); - assertTrue(fs.mkdirs(originalFolder)); - Path innerFile = new Path(originalFolder, "innerFile"); - assertTrue(fs.createNewFile(innerFile)); - Path innerFile2 = new Path(originalFolder, "innerFile2"); - assertTrue(fs.createNewFile(innerFile2)); - - String dstKey = "renamedFolder"; - - // propose (but don't do) the rename - Path home = fs.getHomeDirectory(); - String relativeHomeDir = getRelativePath(home.toString()); - NativeAzureFileSystem.FolderRenamePending pending = - new NativeAzureFileSystem.FolderRenamePending( - relativeHomeDir + "/" + srcKey, - relativeHomeDir + "/" + dstKey, null, - (NativeAzureFileSystem) fs); - - // get the rename pending file contents - String renameDescription = pending.makeRenamePendingFileContents(); - - // Remove one file from source folder to simulate a partially done - // rename operation. - assertTrue(fs.delete(innerFile, false)); - - // Create the destination folder with just one file in it, again - // to simulate a partially done rename. - Path destination = new Path(dstKey); - Path innerDest = new Path(destination, "innerFile"); - assertTrue(fs.createNewFile(innerDest)); - - // Create a rename-pending file and write rename information to it. - final String renamePendingStr = "folderToRename-RenamePending.json"; - Path renamePendingFile = new Path(renamePendingStr); - FSDataOutputStream out = fs.create(renamePendingFile, true); - assertTrue(out != null); - writeStringToStream(out, renameDescription); - - // Redo the rename operation based on the contents of the -RenamePending.json file. - // Trigger the redo by checking for existence of the original folder. It must appear - // to not exist. - assertFalse(fs.exists(originalFolder)); - - // Verify that the target is there, and the source is gone. - assertTrue(fs.exists(destination)); - assertTrue(fs.exists(new Path(destination, innerFile.getName()))); - assertTrue(fs.exists(new Path(destination, innerFile2.getName()))); - assertFalse(fs.exists(originalFolder)); - assertFalse(fs.exists(innerFile)); - assertFalse(fs.exists(innerFile2)); - - // Verify that there's no RenamePending file left. - assertFalse(fs.exists(renamePendingFile)); - - // Verify that we can list the target directory. - FileStatus[] listed = fs.listStatus(destination); - assertEquals(2, listed.length); - - // List the home directory and show the contents is a directory. - Path root = fs.getHomeDirectory(); - listed = fs.listStatus(root); - assertEquals(1, listed.length); - assertTrue(listed[0].isDirectory()); - } - - /** - * If there is a folder to be renamed inside a parent folder, - * then when you list the parent folder, you should only see - * the final result, after the rename. - */ - @Test - public void testRedoRenameFolderInFolderListing() throws IOException { - - // create original folder - String parent = "parent"; - Path parentFolder = new Path(parent); - assertTrue(fs.mkdirs(parentFolder)); - Path inner = new Path(parentFolder, "innerFolder"); - assertTrue(fs.mkdirs(inner)); - Path inner2 = new Path(parentFolder, "innerFolder2"); - assertTrue(fs.mkdirs(inner2)); - Path innerFile = new Path(inner2, "file"); - assertTrue(fs.createNewFile(innerFile)); - - Path inner2renamed = new Path(parentFolder, "innerFolder2Renamed"); - - // propose (but don't do) the rename of innerFolder2 - Path home = fs.getHomeDirectory(); - String relativeHomeDir = getRelativePath(home.toString()); - NativeAzureFileSystem.FolderRenamePending pending = - new NativeAzureFileSystem.FolderRenamePending( - relativeHomeDir + "/" + inner2, - relativeHomeDir + "/" + inner2renamed, null, - (NativeAzureFileSystem) fs); - - // Create a rename-pending file and write rename information to it. - final String renamePendingStr = inner2 + FolderRenamePending.SUFFIX; - Path renamePendingFile = new Path(renamePendingStr); - FSDataOutputStream out = fs.create(renamePendingFile, true); - assertTrue(out != null); - writeStringToStream(out, pending.makeRenamePendingFileContents()); - - // Redo the rename operation based on the contents of the - // -RenamePending.json file. Trigger the redo by checking for existence of - // the original folder. It must appear to not exist. - FileStatus[] listed = fs.listStatus(parentFolder); - assertEquals(2, listed.length); - assertTrue(listed[0].isDirectory()); - assertTrue(listed[1].isDirectory()); - - // The rename pending file is not a directory, so at this point we know the - // redo has been done. - assertFalse(fs.exists(inner2)); // verify original folder is gone - assertTrue(fs.exists(inner2renamed)); // verify the target is there - assertTrue(fs.exists(new Path(inner2renamed, "file"))); - } - - /** - * There is a nested folder and file under the folder to be renamed - * and the process crashes after the nested folder has been renamed but not the file. - * then when you list the parent folder, pending renames should be redone - * Apache jira HADOOP-12780 - */ - @Test - public void testRedoRenameFolderRenameInProgress() throws IOException { - - // create original folder - String parent = "parent"; - Path parentFolder = new Path(parent); - assertTrue(fs.mkdirs(parentFolder)); - Path folderToBeRenamed = new Path(parentFolder, "folderToBeRenamed"); - assertTrue(fs.mkdirs(folderToBeRenamed)); - String innerFolderName = "innerFolder"; - Path inner = new Path(folderToBeRenamed, innerFolderName); - assertTrue(fs.mkdirs(inner)); - String innerFileName = "file"; - Path innerFile = new Path(inner, innerFileName); - assertTrue(fs.createNewFile(innerFile)); - - Path renamedFolder = new Path(parentFolder, "renamedFolder"); - - // propose (but don't do) the rename of innerFolder2 - Path home = fs.getHomeDirectory(); - String relativeHomeDir = getRelativePath(home.toString()); - NativeAzureFileSystem.FolderRenamePending pending = - new NativeAzureFileSystem.FolderRenamePending( - relativeHomeDir + "/" + folderToBeRenamed, - relativeHomeDir + "/" + renamedFolder, null, - (NativeAzureFileSystem) fs); - - // Create a rename-pending file and write rename information to it. - final String renamePendingStr = folderToBeRenamed + FolderRenamePending.SUFFIX; - Path renamePendingFile = new Path(renamePendingStr); - FSDataOutputStream out = fs.create(renamePendingFile, true); - assertTrue(out != null); - writeStringToStream(out, pending.makeRenamePendingFileContents()); - - // Rename inner folder to simulate the scenario where rename has started and - // only one directory has been renamed but not the files under it - ((NativeAzureFileSystem) fs).getStoreInterface().rename( - relativeHomeDir + "/" +inner, relativeHomeDir + "/" +renamedFolder + "/" + innerFolderName , true, null); - - // Instead of using fs.exist use store.explicitFileExists because fs.exist will return true - // even if directory has been renamed, but there are still file under that directory - assertFalse(((NativeAzureFileSystem) fs).getStoreInterface(). - explicitFileExists(relativeHomeDir + "/" + inner)); // verify the explicit inner folder is gone - assertTrue(((NativeAzureFileSystem) fs).getStoreInterface(). - explicitFileExists(relativeHomeDir + "/" + innerFile)); // verify inner file is present - - // Redo the rename operation based on the contents of the - // -RenamePending.json file. Trigger the redo by checking for existence of - // the original folder. It must appear to not exist. - FileStatus[] listed = fs.listStatus(parentFolder); - assertEquals(1, listed.length); - assertTrue(listed[0].isDirectory()); - - // The rename pending file is not a directory, so at this point we know the - // redo has been done. - assertFalse(fs.exists(inner)); // verify original folder is gone - assertFalse(fs.exists(innerFile)); // verify original file is gone - assertTrue(fs.exists(renamedFolder)); // verify the target is there - assertTrue(fs.exists(new Path(renamedFolder, innerFolderName + "/" + innerFileName))); - } - - /** - * Test the situation when the rename metadata file is empty - * i.e. it is created but not written yet. In that case in next rename - * this empty file should be deleted. As zero byte metadata file means - * rename has not started yet. This is to emulate the scenario where - * the process crashes just after creating rename metadata file. - * We had a bug (HADOOP-12678) that in that case listing used to fail and - * hbase master did not use to come up - */ - @Test - public void testRedoRenameFolderInFolderListingWithZeroByteRenameMetadata() - throws IOException { - // create original folder - String parent = "parent"; - Path parentFolder = new Path(parent); - assertTrue(fs.mkdirs(parentFolder)); - Path inner = new Path(parentFolder, "innerFolder"); - assertTrue(fs.mkdirs(inner)); - Path inner2 = new Path(parentFolder, "innerFolder2"); - assertTrue(fs.mkdirs(inner2)); - Path innerFile = new Path(inner2, "file"); - assertTrue(fs.createNewFile(innerFile)); - - Path inner2renamed = new Path(parentFolder, "innerFolder2Renamed"); - - // Create an empty rename-pending file - final String renamePendingStr = inner2 + FolderRenamePending.SUFFIX; - Path renamePendingFile = new Path(renamePendingStr); - FSDataOutputStream out = fs.create(renamePendingFile, true); - assertTrue(out != null); - out.close(); - - // Redo the rename operation based on the contents of the - // -RenamePending.json file. Trigger the redo by listing - // the parent folder. It should not throw and it should - // delete empty rename pending file - FileStatus[] listed = fs.listStatus(parentFolder); - assertEquals(2, listed.length); - assertTrue(listed[0].isDirectory()); - assertTrue(listed[1].isDirectory()); - assertFalse(fs.exists(renamePendingFile)); - - // Verify that even if rename pending file is deleted, - // deletion should handle that - Path home = fs.getHomeDirectory(); - String relativeHomeDir = getRelativePath(home.toString()); - NativeAzureFileSystem.FolderRenamePending pending = - new NativeAzureFileSystem.FolderRenamePending( - relativeHomeDir + "/" + inner2, - relativeHomeDir + "/" + inner2renamed, null, - (NativeAzureFileSystem) fs); - pending.deleteRenamePendingFile(fs, renamePendingFile); - - assertTrue(fs.exists(inner2)); // verify original folder is there - assertFalse(fs.exists(inner2renamed)); // verify the target is not there - } - - /** - * Test the situation where a rename pending file exists but the rename - * is really done. This could happen if the rename process died just - * before deleting the rename pending file. It exercises a non-standard - * code path in redo(). - */ - @Test - public void testRenameRedoFolderAlreadyDone() throws IOException { - // create only destination folder - String orig = "originalFolder"; - String dest = "renamedFolder"; - Path destPath = new Path(dest); - assertTrue(fs.mkdirs(destPath)); - - // propose (but don't do) the rename of innerFolder2 - Path home = fs.getHomeDirectory(); - String relativeHomeDir = getRelativePath(home.toString()); - NativeAzureFileSystem.FolderRenamePending pending = - new NativeAzureFileSystem.FolderRenamePending( - relativeHomeDir + "/" + orig, - relativeHomeDir + "/" + dest, null, - (NativeAzureFileSystem) fs); - - // Create a rename-pending file and write rename information to it. - final String renamePendingStr = orig + FolderRenamePending.SUFFIX; - Path renamePendingFile = new Path(renamePendingStr); - FSDataOutputStream out = fs.create(renamePendingFile, true); - assertTrue(out != null); - writeStringToStream(out, pending.makeRenamePendingFileContents()); - - try { - pending.redo(); - } catch (Exception e) { - fail(); - } - - // Make sure rename pending file is gone. - FileStatus[] listed = fs.listStatus(new Path("/")); - assertEquals(1, listed.length, "Pending directory still found"); - assertTrue(listed[0].isDirectory()); - } - - @Test - public void testRedoFolderRenameAll() throws IllegalArgumentException, IOException { - { - FileFolder original = new FileFolder("folderToRename"); - original.add("innerFile").add("innerFile2"); - FileFolder partialSrc = original.copy(); - FileFolder partialDst = original.copy(); - partialDst.setName("renamedFolder"); - partialSrc.setPresent(0, false); - partialDst.setPresent(1, false); - - testRenameRedoFolderSituation(original, partialSrc, partialDst); - } - { - FileFolder original = new FileFolder("folderToRename"); - original.add("file1").add("file2").add("file3"); - FileFolder partialSrc = original.copy(); - FileFolder partialDst = original.copy(); - partialDst.setName("renamedFolder"); - - // Set up this state before the redo: - // folderToRename: file1 file3 - // renamedFolder: file1 file2 - // This gives code coverage for all 3 expected cases for individual file - // redo. - partialSrc.setPresent(1, false); - partialDst.setPresent(2, false); - - testRenameRedoFolderSituation(original, partialSrc, partialDst); - } - { - // Simulate a situation with folder with a large number of files in it. - // For the first half of the files, they will be in the destination - // but not the source. For the second half, they will be in the source - // but not the destination. There will be one file in the middle that is - // in both source and destination. Then trigger redo and verify. - // For testing larger folder sizes, manually change this, temporarily, and - // edit the SIZE value. - final int SIZE = 5; - assertTrue(SIZE >= 3); - // Try a lot of files in the folder. - FileFolder original = new FileFolder("folderToRename"); - for (int i = 0; i < SIZE; i++) { - original.add("file" + Integer.toString(i)); - } - FileFolder partialSrc = original.copy(); - FileFolder partialDst = original.copy(); - partialDst.setName("renamedFolder"); - for (int i = 0; i < SIZE; i++) { - partialSrc.setPresent(i, i >= SIZE / 2); - partialDst.setPresent(i, i <= SIZE / 2); - } - - testRenameRedoFolderSituation(original, partialSrc, partialDst); - } - { - // Do a nested folder, like so: - // folderToRename: - // nestedFolder: a, b, c - // p - // q - // - // Then delete file 'a' from the source and add it to destination. - // Then trigger redo. - - FileFolder original = new FileFolder("folderToRename"); - FileFolder nested = new FileFolder("nestedFolder"); - nested.add("a").add("b").add("c"); - original.add(nested).add("p").add("q"); - - FileFolder partialSrc = original.copy(); - FileFolder partialDst = original.copy(); - partialDst.setName("renamedFolder"); - - // logically remove 'a' from source - partialSrc.getMember(0).setPresent(0, false); - - // logically eliminate b, c from destination - partialDst.getMember(0).setPresent(1, false); - partialDst.getMember(0).setPresent(2, false); - - testRenameRedoFolderSituation(original, partialSrc, partialDst); - } - } - - private void testRenameRedoFolderSituation( - FileFolder fullSrc, - FileFolder partialSrc, - FileFolder partialDst) throws IllegalArgumentException, IOException { - - // make file folder tree for source - fullSrc.create(); - - // set up rename pending file - fullSrc.makeRenamePending(partialDst); - - // prune away some files (as marked) from source to simulate partial rename - partialSrc.prune(); - - // Create only the files indicated for the destination to indicate a partial rename. - partialDst.create(); - - // trigger redo - assertFalse(fullSrc.exists()); - - // verify correct results - partialDst.verifyExists(); - fullSrc.verifyGone(); - - // delete the new folder to leave no garbage behind - fs.delete(new Path(partialDst.getName()), true); - } - - // Mock up of a generalized folder (which can also be a leaf-level file) - // for rename redo testing. - private class FileFolder { - private String name; - - // For rename testing, indicates whether an expected - // file is present in the source or target folder. - private boolean present; - ArrayList members; // Null if a leaf file, otherwise not null. - - // Make a new, empty folder (not a regular leaf file). - public FileFolder(String name) { - this.name = name; - this.present = true; - members = new ArrayList(); - } - - public FileFolder getMember(int i) { - return members.get(i); - } - - // Verify a folder and all its contents are gone. This is only to - // be called on the root of a FileFolder. - public void verifyGone() throws IllegalArgumentException, IOException { - assertFalse(fs.exists(new Path(name))); - assertTrue(isFolder()); - verifyGone(new Path(name), members); - } - - private void verifyGone(Path prefix, ArrayList members2) throws IOException { - for (FileFolder f : members2) { - f.verifyGone(prefix); - } - } - - private void verifyGone(Path prefix) throws IOException { - assertFalse(fs.exists(new Path(prefix, name))); - if (isLeaf()) { - return; - } - for (FileFolder f : members) { - f.verifyGone(new Path(prefix, name)); - } - } - - public void verifyExists() throws IllegalArgumentException, IOException { - - // verify the root is present - assertTrue(fs.exists(new Path(name))); - assertTrue(isFolder()); - - // check the members - verifyExists(new Path(name), members); - } - - private void verifyExists(Path prefix, ArrayList members2) throws IOException { - for (FileFolder f : members2) { - f.verifyExists(prefix); - } - } - - private void verifyExists(Path prefix) throws IOException { - - // verify this file/folder is present - assertTrue(fs.exists(new Path(prefix, name))); - - // verify members are present - if (isLeaf()) { - return; - } - - for (FileFolder f : members) { - f.verifyExists(new Path(prefix, name)); - } - } - - public boolean exists() throws IOException { - return fs.exists(new Path(name)); - } - - // Make a rename pending file for the situation where we rename - // this object (the source) to the specified destination. - public void makeRenamePending(FileFolder dst) throws IOException { - - // Propose (but don't do) the rename. - Path home = fs.getHomeDirectory(); - String relativeHomeDir = getRelativePath(home.toString()); - NativeAzureFileSystem.FolderRenamePending pending = - new NativeAzureFileSystem.FolderRenamePending( - relativeHomeDir + "/" + this.getName(), - relativeHomeDir + "/" + dst.getName(), null, - (NativeAzureFileSystem) fs); - - // Get the rename pending file contents. - String renameDescription = pending.makeRenamePendingFileContents(); - - // Create a rename-pending file and write rename information to it. - final String renamePendingStr = this.getName() + "-RenamePending.json"; - Path renamePendingFile = new Path(renamePendingStr); - FSDataOutputStream out = fs.create(renamePendingFile, true); - assertTrue(out != null); - writeStringToStream(out, renameDescription); - } - - // set whether a child is present or not - public void setPresent(int i, boolean b) { - members.get(i).setPresent(b); - } - - // Make an uninitialized folder - private FileFolder() { - this.present = true; - } - - public void setPresent(boolean value) { - present = value; - } - - public FileFolder makeLeaf(String name) { - FileFolder f = new FileFolder(); - f.setName(name); - return f; - } - - void setName(String name) { - this.name = name; - } - - public String getName() { - return name; - } - - public boolean isLeaf() { - return members == null; - } - - public boolean isFolder() { - return members != null; - } - - FileFolder add(FileFolder folder) { - members.add(folder); - return this; - } - - // Add a leaf file (by convention, if you pass a string argument, you get a leaf). - FileFolder add(String file) { - FileFolder leaf = makeLeaf(file); - members.add(leaf); - return this; - } - - public FileFolder copy() { - if (isLeaf()) { - return makeLeaf(name); - } else { - FileFolder f = new FileFolder(name); - for (FileFolder member : members) { - f.add(member.copy()); - } - return f; - } - } - - // Create the folder structure. Return true on success, or else false. - public void create() throws IllegalArgumentException, IOException { - create(null); - } - - private void create(Path prefix) throws IllegalArgumentException, IOException { - if (isFolder()) { - if (present) { - assertTrue(fs.mkdirs(makePath(prefix, name))); - } - create(makePath(prefix, name), members); - } else if (isLeaf()) { - if (present) { - assertTrue(fs.createNewFile(makePath(prefix, name))); - } - } else { - assertTrue(false, "The object must be a (leaf) file or a folder."); - } - } - - private void create(Path prefix, ArrayList members2) throws IllegalArgumentException, IOException { - for (FileFolder f : members2) { - f.create(prefix); - } - } - - private Path makePath(Path prefix, String name) { - if (prefix == null) { - return new Path(name); - } else { - return new Path(prefix, name); - } - } - - // Remove the files marked as not present. - public void prune() throws IOException { - prune(null); - } - - private void prune(Path prefix) throws IOException { - Path path = null; - if (prefix == null) { - path = new Path(name); - } else { - path = new Path(prefix, name); - } - if (isLeaf() && !present) { - assertTrue(fs.delete(path, false)); - } else if (isFolder() && !present) { - assertTrue(fs.delete(path, true)); - } else if (isFolder()) { - for (FileFolder f : members) { - f.prune(path); - } - } - } - } - - private String getRelativePath(String path) { - // example input: wasb://wasbtests-ehans-1404322046279@ehans9.blob.core.windows.net/user/ehans/folderToRename - // example result: user/ehans/folderToRename - - // Find the third / position and return input substring after that. - int slashCount = 0; // number of slashes so far - int i; - for (i = 0; i < path.length(); i++) { - if (path.charAt(i) == '/') { - slashCount++; - if (slashCount == 3) { - return path.substring(i + 1, path.length()); - } - } - } - throw new RuntimeException("Incorrect path prefix -- expected wasb://.../..."); - } - - @Test - public void testCloseFileSystemTwice() throws Exception { - //make sure close() can be called multiple times without doing any harm - fs.close(); - fs.close(); - } - - // Test the available() method for the input stream returned by fs.open(). - // This works for both page and block blobs. - int FILE_SIZE = 4 * 1024 * 1024 + 1; // Make this 1 bigger than internal - // buffer used in BlobInputStream - // to exercise that case. - int MAX_STRIDE = FILE_SIZE + 1; - Path PATH = new Path("/available.dat"); - @Test - public void testAvailable() throws IOException { - - // write FILE_SIZE bytes to page blob - FSDataOutputStream out = fs.create(PATH); - byte[] data = new byte[FILE_SIZE]; - Arrays.fill(data, (byte) 5); - out.write(data, 0, FILE_SIZE); - out.close(); - - // Test available() for different read sizes - verifyAvailable(1); - verifyAvailable(100); - verifyAvailable(5000); - verifyAvailable(FILE_SIZE); - verifyAvailable(MAX_STRIDE); - - fs.delete(PATH, false); - } - - // Verify that available() for the input stream is always >= 1 unless we've - // consumed all the input, and then it is 0. This is to match expectations by - // HBase which were set based on behavior of DFSInputStream.available(). - private void verifyAvailable(int readStride) throws IOException { - FSDataInputStream in = fs.open(PATH); - try { - byte[] inputBuffer = new byte[MAX_STRIDE]; - int position = 0; - int bytesRead = 0; - while(bytesRead != FILE_SIZE) { - bytesRead += in.read(inputBuffer, position, readStride); - int available = in.available(); - if (bytesRead < FILE_SIZE) { - if (available < 1) { - fail(String.format( - "expected available > 0 but got: " - + "position = %d, bytesRead = %d, in.available() = %d", - position, bytesRead, available)); - } - } - } - int available = in.available(); - assertTrue(available == 0); - } finally { - in.close(); - } - } - - @Test - public void testGetFileSizeFromListing() throws IOException { - Path path = new Path("file.dat"); - final int PAGE_SIZE = 512; - final int FILE_SIZE = PAGE_SIZE + 1; - - // write FILE_SIZE bytes to page blob - FSDataOutputStream out = fs.create(path); - byte[] data = new byte[FILE_SIZE]; - Arrays.fill(data, (byte) 5); - out.write(data, 0, FILE_SIZE); - out.close(); - - // list the file to get its properties - FileStatus[] status = fs.listStatus(path); - assertEquals(1, status.length); - - // The file length should report the number of bytes - // written for either page or block blobs (subclasses - // of this test class will exercise both). - assertEquals(FILE_SIZE, status[0].getLen()); - } - - private boolean testModifiedTime(Path testPath, long time) throws Exception { - FileStatus fileStatus = fs.getFileStatus(testPath); - final long errorMargin = modifiedTimeErrorMargin; - long lastModified = fileStatus.getModificationTime(); - return (lastModified > (time - errorMargin) && lastModified < (time + errorMargin)); - } - - @Test - public void testCreateNonRecursive() throws Exception { - Path testFolder = new Path("/testFolder"); - Path testFile = new Path(testFolder, "testFile"); - try { - fs.createNonRecursive(testFile, true, 1024, (short)1, 1024, null); - assertTrue(false, "Should've thrown"); - } catch (FileNotFoundException e) { - } - fs.mkdirs(testFolder); - fs.createNonRecursive(testFile, true, 1024, (short)1, 1024, null) - .close(); - assertTrue(fs.exists(testFile)); - } - - public void testFileEndingInDot() throws Exception { - Path testFolder = new Path("/testFolder."); - Path testFile = new Path(testFolder, "testFile."); - assertTrue(fs.mkdirs(testFolder)); - assertTrue(fs.createNewFile(testFile)); - assertTrue(fs.exists(testFile)); - FileStatus[] listed = fs.listStatus(testFolder); - assertEquals(1, listed.length); - assertEquals("testFile.", listed[0].getPath().getName()); - } - private void testModifiedTime(Path testPath) throws Exception { - Calendar utc = Calendar.getInstance(TimeZone.getTimeZone("UTC")); - long currentUtcTime = utc.getTime().getTime(); - FileStatus fileStatus = fs.getFileStatus(testPath); - final long errorMargin = 60 * 1000; // Give it +/-60 seconds - assertTrue( - fileStatus.getModificationTime() > (currentUtcTime - errorMargin) && - fileStatus.getModificationTime() < (currentUtcTime + errorMargin), - "Modification time " + new Date(fileStatus.getModificationTime()) - + " is not close to now: " + utc.getTime()); - } - - private void createEmptyFile(Path testFile, FsPermission permission) - throws IOException { - FSDataOutputStream outputStream = fs.create(testFile, permission, true, - 4096, (short) 1, 1024, null); - outputStream.close(); - } - - private String readString(Path testFile) throws IOException { - return readStringFromFile(fs, testFile); - } - - - private void writeString(Path path, String value) throws IOException { - writeStringToFile(fs, path, value); - } - - @Test - // Acquire and free a Lease object. Wait for more than the lease - // timeout, to make sure the lease renews itself. - public void testSelfRenewingLease() throws IllegalArgumentException, IOException, - InterruptedException, StorageException { - - SelfRenewingLease lease; - final String FILE_KEY = "file"; - fs.create(new Path(FILE_KEY)); - NativeAzureFileSystem nfs = (NativeAzureFileSystem) fs; - String fullKey = nfs.pathToKey(nfs.makeAbsolute(new Path(FILE_KEY))); - AzureNativeFileSystemStore store = nfs.getStore(); - lease = store.acquireLease(fullKey); - assertTrue(lease.getLeaseID() != null); - - // The sleep time for the keep-alive thread is 40 seconds, so sleep just - // a little beyond that, to make sure the keep-alive thread wakes up - // and renews the lease. - Thread.sleep(42000); - lease.free(); - - // Check that the lease is really freed. - CloudBlob blob = lease.getCloudBlob(); - - // Try to acquire it again, using direct Azure blob access. - // If that succeeds, then the lease was already freed. - String differentLeaseID = null; - try { - differentLeaseID = blob.acquireLease(15, null); - } catch (Exception e) { - e.printStackTrace(); - fail("Caught exception trying to directly re-acquire lease from Azure"); - } finally { - assertTrue(differentLeaseID != null); - AccessCondition accessCondition = AccessCondition.generateEmptyCondition(); - accessCondition.setLeaseID(differentLeaseID); - blob.releaseLease(accessCondition); - } - } - - @Test - // Acquire a SelfRenewingLease object. Wait for more than the lease - // timeout, to make sure the lease renews itself. Delete the file. - // That will automatically free the lease. - // (that should work without any failures). - public void testSelfRenewingLeaseFileDelete() - throws IllegalArgumentException, IOException, - InterruptedException, StorageException { - - SelfRenewingLease lease; - final String FILE_KEY = "file"; - final Path path = new Path(FILE_KEY); - fs.create(path); - NativeAzureFileSystem nfs = (NativeAzureFileSystem) fs; - String fullKey = nfs.pathToKey(nfs.makeAbsolute(path)); - lease = nfs.getStore().acquireLease(fullKey); - assertTrue(lease.getLeaseID() != null); - - // The sleep time for the keep-alive thread is 40 seconds, so sleep just - // a little beyond that, to make sure the keep-alive thread wakes up - // and renews the lease. - Thread.sleep(42000); - - nfs.getStore().delete(fullKey, lease); - - // Check that the file is really gone and the lease is freed. - assertTrue(!fs.exists(path)); - assertTrue(lease.isFreed()); - } - - // Variables to check assertions in next test. - private long firstEndTime; - private long secondStartTime; - - // Create two threads. One will get a lease on a file. - // The second one will try to get the lease and thus block. - // Then the first one will free the lease and the second - // one will get it and proceed. - @Test - public void testLeaseAsDistributedLock() throws IllegalArgumentException, - IOException { - final String LEASE_LOCK_FILE_KEY = "file"; - fs.create(new Path(LEASE_LOCK_FILE_KEY)); - NativeAzureFileSystem nfs = (NativeAzureFileSystem) fs; - String fullKey = nfs.pathToKey(nfs.makeAbsolute(new Path(LEASE_LOCK_FILE_KEY))); - - Thread first = new SubjectInheritingThread(new LeaseLockAction("first-thread", fullKey)); - first.start(); - Thread second = new SubjectInheritingThread(new LeaseLockAction("second-thread", fullKey)); - second.start(); - try { - - // Wait for the two threads to finish. - first.join(); - second.join(); - assertTrue(firstEndTime < secondStartTime); - } catch (InterruptedException e) { - fail("Unable to wait for threads to finish"); - Thread.currentThread().interrupt(); - } - } - - private class LeaseLockAction implements Runnable { - private String name; - private String key; - - LeaseLockAction(String name, String key) { - this.name = name; - this.key = key; - } - - @Override - public void run() { - LOG.info("starting thread " + name); - SelfRenewingLease lease = null; - NativeAzureFileSystem nfs = (NativeAzureFileSystem) fs; - - if (name.equals("first-thread")) { - try { - lease = nfs.getStore().acquireLease(key); - LOG.info(name + " acquired lease " + lease.getLeaseID()); - } catch (AzureException e) { - assertTrue(false, "Unanticipated exception"); - } - assertTrue(lease != null); - try { - - // Sleep long enough for the lease to renew once. - Thread.sleep(SelfRenewingLease.LEASE_RENEWAL_PERIOD + 2000); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - try { - firstEndTime = System.currentTimeMillis(); - lease.free(); - LOG.info(name + " freed lease " + lease.getLeaseID()); - } catch (StorageException e) { - fail("Unanticipated exception"); - } - } else if (name.equals("second-thread")) { - try { - - // sleep 2 sec to let first thread get ahead of this one - Thread.sleep(2000); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - try { - LOG.info(name + " before getting lease"); - lease = nfs.getStore().acquireLease(key); - secondStartTime = System.currentTimeMillis(); - LOG.info(name + " acquired lease " + lease.getLeaseID()); - } catch (AzureException e) { - assertTrue(false, "Unanticipated exception"); - } - assertTrue(lease != null); - try { - lease.free(); - LOG.info(name + " freed lease " + lease.getLeaseID()); - } catch (StorageException e) { - assertTrue(false, "Unanticipated exception"); - } - } else { - fail("Unknown thread name"); - } - - LOG.info(name + " is exiting."); - } - - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java deleted file mode 100644 index 853163e086db3..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobMetadata.java +++ /dev/null @@ -1,269 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.Closeable; -import java.io.IOException; -import java.net.URI; -import java.util.HashMap; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.security.UserGroupInformation; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * Tests that we put the correct metadata on blobs created through WASB. - */ -public class TestBlobMetadata extends AbstractWasbTestWithTimeout { - private AzureBlobStorageTestAccount testAccount; - private FileSystem fs; - private InMemoryBlockBlobStore backingStore; - - @BeforeEach - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createMock(); - fs = testAccount.getFileSystem(); - backingStore = testAccount.getMockStorage().getBackingStore(); - } - - @AfterEach - public void tearDown() throws Exception { - testAccount.cleanup(); - fs = null; - backingStore = null; - } - - private static String getExpectedOwner() throws Exception { - return UserGroupInformation.getCurrentUser().getShortUserName(); - } - - private static String getExpectedPermissionString(String permissionString) - throws Exception { - return String.format( - "{\"owner\":\"%s\",\"group\":\"%s\",\"permissions\":\"%s\"}", - getExpectedOwner(), - NativeAzureFileSystem.AZURE_DEFAULT_GROUP_DEFAULT, - permissionString); - } - - /** - * Tests that WASB stamped the version in the container metadata. - */ - @Test - public void testContainerVersionMetadata() throws Exception { - // Do a write operation to trigger version stamp - fs.createNewFile(new Path("/foo")); - HashMap containerMetadata = - backingStore.getContainerMetadata(); - assertNotNull(containerMetadata); - assertEquals(AzureNativeFileSystemStore.CURRENT_WASB_VERSION, - containerMetadata.get(AzureNativeFileSystemStore.VERSION_METADATA_KEY)); - } - - private static final class FsWithPreExistingContainer implements Closeable { - private final MockStorageInterface mockStorage; - private final NativeAzureFileSystem fs; - - private FsWithPreExistingContainer(MockStorageInterface mockStorage, - NativeAzureFileSystem fs) { - this.mockStorage = mockStorage; - this.fs = fs; - } - - public NativeAzureFileSystem getFs() { - return fs; - } - - public HashMap getContainerMetadata() { - return mockStorage.getBackingStore().getContainerMetadata(); - } - - public static FsWithPreExistingContainer create() throws Exception { - return create(null); - } - - public static FsWithPreExistingContainer create( - HashMap containerMetadata) throws Exception { - AzureNativeFileSystemStore store = new AzureNativeFileSystemStore(); - MockStorageInterface mockStorage = new MockStorageInterface(); - store.setAzureStorageInteractionLayer(mockStorage); - NativeAzureFileSystem fs = new NativeAzureFileSystem(store); - Configuration conf = new Configuration(); - AzureBlobStorageTestAccount.setMockAccountKey(conf); - mockStorage.addPreExistingContainer( - AzureBlobStorageTestAccount.getMockContainerUri(), containerMetadata); - fs.initialize(new URI(AzureBlobStorageTestAccount.MOCK_WASB_URI), conf); - return new FsWithPreExistingContainer(mockStorage, fs); - } - - @Override - public void close() throws IOException { - fs.close(); - } - } - - /** - * Tests that WASB stamped the version in the container metadata if it does a - * write operation to a pre-existing container. - */ - @Test - public void testPreExistingContainerVersionMetadata() throws Exception { - // Create a mock storage with a pre-existing container that has no - // WASB version metadata on it. - FsWithPreExistingContainer fsWithContainer = FsWithPreExistingContainer - .create(); - - // Now, do some read operations (should touch the metadata) - assertFalse(fsWithContainer.getFs().exists(new Path("/IDontExist"))); - assertEquals(0, fsWithContainer.getFs().listStatus(new Path("/")).length); - - // Check that no container metadata exists yet - assertNull(fsWithContainer.getContainerMetadata()); - - // Now do a write operation - should stamp the version - fsWithContainer.getFs().mkdirs(new Path("/dir")); - - // Check that now we have the version stamp - assertNotNull(fsWithContainer.getContainerMetadata()); - assertEquals( - AzureNativeFileSystemStore.CURRENT_WASB_VERSION, - fsWithContainer.getContainerMetadata().get( - AzureNativeFileSystemStore.VERSION_METADATA_KEY)); - fsWithContainer.close(); - } - - /** - * Tests that WASB works well with an older version container with ASV-era - * version and metadata. - */ - @Test - public void testFirstContainerVersionMetadata() throws Exception { - // Create a mock storage with a pre-existing container that has - // ASV version metadata on it. - HashMap containerMetadata = new HashMap(); - containerMetadata.put(AzureNativeFileSystemStore.OLD_VERSION_METADATA_KEY, - AzureNativeFileSystemStore.FIRST_WASB_VERSION); - FsWithPreExistingContainer fsWithContainer = FsWithPreExistingContainer - .create(containerMetadata); - - // Now, do some read operations (should touch the metadata) - assertFalse(fsWithContainer.getFs().exists(new Path("/IDontExist"))); - assertEquals(0, fsWithContainer.getFs().listStatus(new Path("/")).length); - - // Check that no container metadata exists yet - assertEquals( - AzureNativeFileSystemStore.FIRST_WASB_VERSION, - fsWithContainer.getContainerMetadata().get( - AzureNativeFileSystemStore.OLD_VERSION_METADATA_KEY)); - assertNull(fsWithContainer.getContainerMetadata().get( - AzureNativeFileSystemStore.VERSION_METADATA_KEY)); - - // Now do a write operation - should stamp the version - fsWithContainer.getFs().mkdirs(new Path("/dir")); - - // Check that now we have the version stamp - assertEquals( - AzureNativeFileSystemStore.CURRENT_WASB_VERSION, - fsWithContainer.getContainerMetadata().get( - AzureNativeFileSystemStore.VERSION_METADATA_KEY)); - assertNull(fsWithContainer.getContainerMetadata().get( - AzureNativeFileSystemStore.OLD_VERSION_METADATA_KEY)); - fsWithContainer.close(); - } - - @SuppressWarnings("deprecation") - @Test - public void testPermissionMetadata() throws Exception { - FsPermission justMe = new FsPermission(FsAction.READ_WRITE, FsAction.NONE, - FsAction.NONE); - Path selfishFile = new Path("/noOneElse"); - fs.create(selfishFile, justMe, true, 4096, fs.getDefaultReplication(), - fs.getDefaultBlockSize(), null).close(); - String mockUri = AzureBlobStorageTestAccount.toMockUri(selfishFile); - assertNotNull(mockUri, "converted URI"); - HashMap metadata = backingStore - .getMetadata(mockUri); - assertNotNull(metadata); - String storedPermission = metadata.get("hdi_permission"); - assertEquals(getExpectedPermissionString("rw-------"), storedPermission); - FileStatus retrievedStatus = fs.getFileStatus(selfishFile); - assertNotNull(retrievedStatus); - assertEquals(justMe, retrievedStatus.getPermission()); - assertEquals(getExpectedOwner(), retrievedStatus.getOwner()); - assertEquals(NativeAzureFileSystem.AZURE_DEFAULT_GROUP_DEFAULT, - retrievedStatus.getGroup()); - } - - /** - * Tests that WASB understands the old-style ASV metadata and changes it when - * it gets the chance. - */ - @Test - public void testOldPermissionMetadata() throws Exception { - Path selfishFile = new Path("/noOneElse"); - HashMap metadata = - new HashMap(); - metadata.put("asv_permission", - getExpectedPermissionString("rw-------")); - backingStore.setContent( - AzureBlobStorageTestAccount.toMockUri(selfishFile), - new byte[] { }, - metadata, false, 0); - FsPermission justMe = new FsPermission( - FsAction.READ_WRITE, FsAction.NONE, FsAction.NONE); - FileStatus retrievedStatus = fs.getFileStatus(selfishFile); - assertNotNull(retrievedStatus); - assertEquals(justMe, retrievedStatus.getPermission()); - assertEquals(getExpectedOwner(), retrievedStatus.getOwner()); - assertEquals(NativeAzureFileSystem.AZURE_DEFAULT_GROUP_DEFAULT, - retrievedStatus.getGroup()); - FsPermission meAndYou = new FsPermission( - FsAction.READ_WRITE, FsAction.READ_WRITE, FsAction.NONE); - fs.setPermission(selfishFile, meAndYou); - metadata = - backingStore.getMetadata( - AzureBlobStorageTestAccount.toMockUri(selfishFile)); - assertNotNull(metadata); - String storedPermission = metadata.get("hdi_permission"); - assertEquals(getExpectedPermissionString("rw-rw----"), - storedPermission); - assertNull(metadata.get("asv_permission")); - } - - @Test - public void testFolderMetadata() throws Exception { - Path folder = new Path("/folder"); - FsPermission justRead = new FsPermission(FsAction.READ, FsAction.READ, - FsAction.READ); - fs.mkdirs(folder, justRead); - HashMap metadata = backingStore - .getMetadata(AzureBlobStorageTestAccount.toMockUri(folder)); - assertNotNull(metadata); - assertEquals("true", metadata.get("hdi_isfolder")); - assertEquals(getExpectedPermissionString("r--r--r--"), - metadata.get("hdi_permission")); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java deleted file mode 100644 index ef08640c4eee4..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestBlobOperationDescriptor.java +++ /dev/null @@ -1,303 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import com.microsoft.azure.storage.OperationContext; -import com.microsoft.azure.storage.ResponseReceivedEvent; -import com.microsoft.azure.storage.SendingRequestEvent; -import com.microsoft.azure.storage.StorageEvent; -import com.microsoft.azure.storage.blob.BlobInputStream; -import com.microsoft.azure.storage.blob.BlobOutputStream; -import com.microsoft.azure.storage.blob.CloudAppendBlob; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import com.microsoft.azure.storage.blob.CloudBlockBlob; -import com.microsoft.azure.storage.blob.CloudPageBlob; -import org.apache.hadoop.classification.InterfaceAudience; -import org.junit.jupiter.api.Test; - -import java.net.HttpURLConnection; -import java.nio.charset.StandardCharsets; - -/** - * Tests for BlobOperationDescriptor. - */ -public class TestBlobOperationDescriptor extends AbstractWasbTestBase { - private BlobOperationDescriptor.OperationType lastOperationTypeReceived; - private BlobOperationDescriptor.OperationType lastOperationTypeSent; - private long lastContentLengthReceived; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - @Test - public void testAppendBlockOperations() throws Exception { - CloudBlobContainer container = getTestAccount().getRealContainer(); - - OperationContext context = new OperationContext(); - context.getResponseReceivedEventHandler().addListener( - new ResponseReceivedEventHandler()); - context.getSendingRequestEventHandler().addListener( - new SendingRequestEventHandler()); - - CloudAppendBlob appendBlob = container.getAppendBlobReference( - "testAppendBlockOperations"); - assertNull(lastOperationTypeSent); - assertNull(lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - try ( - BlobOutputStream output - = appendBlob.openWriteNew(null, null, context); - ) { - assertEquals(BlobOperationDescriptor.OperationType.CreateBlob, - lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - String message = "this is a test"; - output.write(message.getBytes(StandardCharsets.UTF_8)); - output.flush(); - assertEquals(BlobOperationDescriptor.OperationType.AppendBlock, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.AppendBlock, - lastOperationTypeReceived); - assertEquals(message.length(), lastContentLengthReceived); - } - } - - @Test - public void testPutBlockOperations() throws Exception { - CloudBlobContainer container = getTestAccount().getRealContainer(); - - OperationContext context = new OperationContext(); - context.getResponseReceivedEventHandler().addListener( - new ResponseReceivedEventHandler()); - context.getSendingRequestEventHandler().addListener( - new SendingRequestEventHandler()); - - CloudBlockBlob blockBlob = container.getBlockBlobReference( - "testPutBlockOperations"); - assertNull(lastOperationTypeSent); - assertNull(lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - try ( - BlobOutputStream output - = blockBlob.openOutputStream(null, - null, - context); - ) { - assertNull(lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - String message = "this is a test"; - output.write(message.getBytes(StandardCharsets.UTF_8)); - output.flush(); - assertEquals(BlobOperationDescriptor.OperationType.PutBlock, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.PutBlock, - lastOperationTypeReceived); - assertEquals(message.length(), lastContentLengthReceived); - } - assertEquals(BlobOperationDescriptor.OperationType.PutBlockList, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.PutBlockList, - lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - } - - @Test - public void testPutPageOperations() throws Exception { - CloudBlobContainer container = getTestAccount().getRealContainer(); - - OperationContext context = new OperationContext(); - context.getResponseReceivedEventHandler().addListener( - new ResponseReceivedEventHandler()); - context.getSendingRequestEventHandler().addListener( - new SendingRequestEventHandler()); - - CloudPageBlob pageBlob = container.getPageBlobReference( - "testPutPageOperations"); - assertNull(lastOperationTypeSent); - assertNull(lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - try ( - BlobOutputStream output = pageBlob.openWriteNew(1024, - null, - null, - context); - ) { - assertEquals(BlobOperationDescriptor.OperationType.CreateBlob, - lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - final int pageSize = 512; - byte[] buffer = new byte[pageSize]; - output.write(buffer); - output.flush(); - assertEquals(BlobOperationDescriptor.OperationType.PutPage, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.PutPage, - lastOperationTypeReceived); - assertEquals(buffer.length, lastContentLengthReceived); - } - } - - @Test - public void testGetBlobOperations() throws Exception { - CloudBlobContainer container = getTestAccount().getRealContainer(); - - OperationContext context = new OperationContext(); - context.getResponseReceivedEventHandler().addListener( - new ResponseReceivedEventHandler()); - context.getSendingRequestEventHandler().addListener( - new SendingRequestEventHandler()); - - CloudBlockBlob blockBlob = container.getBlockBlobReference( - "testGetBlobOperations"); - assertNull(lastOperationTypeSent); - assertNull(lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - String message = "this is a test"; - - try ( - BlobOutputStream output = blockBlob.openOutputStream(null, - null, - context); - ) { - assertNull(lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - output.write(message.getBytes(StandardCharsets.UTF_8)); - output.flush(); - assertEquals(BlobOperationDescriptor.OperationType.PutBlock, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.PutBlock, - lastOperationTypeReceived); - assertEquals(message.length(), lastContentLengthReceived); - } - assertEquals(BlobOperationDescriptor.OperationType.PutBlockList, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.PutBlockList, - lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - try ( - BlobInputStream input = blockBlob.openInputStream(null, - null, - context); - ) { - assertEquals(BlobOperationDescriptor.OperationType.GetProperties, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.GetProperties, - lastOperationTypeReceived); - assertEquals(0, lastContentLengthReceived); - - byte[] buffer = new byte[1024]; - int numBytesRead = input.read(buffer); - assertEquals(BlobOperationDescriptor.OperationType.GetBlob, - lastOperationTypeSent); - assertEquals(BlobOperationDescriptor.OperationType.GetBlob, - lastOperationTypeReceived); - assertEquals(message.length(), lastContentLengthReceived); - assertEquals(numBytesRead, lastContentLengthReceived); - } - } - - /** - * Called after the Azure Storage SDK receives a response. - * - * @param event The connection, operation, and request state. - */ - private void responseReceived(ResponseReceivedEvent event) { - HttpURLConnection conn = (HttpURLConnection) event.getConnectionObject(); - BlobOperationDescriptor.OperationType operationType - = BlobOperationDescriptor.getOperationType(conn); - lastOperationTypeReceived = operationType; - - switch (operationType) { - case AppendBlock: - case PutBlock: - case PutPage: - lastContentLengthReceived - = BlobOperationDescriptor.getContentLengthIfKnown(conn, - operationType); - break; - case GetBlob: - lastContentLengthReceived - = BlobOperationDescriptor.getContentLengthIfKnown(conn, - operationType); - break; - default: - lastContentLengthReceived = 0; - break; - } - } - - /** - * Called before the Azure Storage SDK sends a request. - * - * @param event The connection, operation, and request state. - */ - private void sendingRequest(SendingRequestEvent event) { - this.lastOperationTypeSent - = BlobOperationDescriptor.getOperationType( - (HttpURLConnection) event.getConnectionObject()); - } - - /** - * The ResponseReceivedEvent is fired after the Azure Storage SDK receives a - * response. - */ - @InterfaceAudience.Private - class ResponseReceivedEventHandler - extends StorageEvent { - - /** - * Called after the Azure Storage SDK receives a response. - * - * @param event The connection, operation, and request state. - */ - @Override - public void eventOccurred(ResponseReceivedEvent event) { - responseReceived(event); - } - } - - /** - * The SendingRequestEvent is fired before the Azure Storage SDK sends a - * request. - */ - @InterfaceAudience.Private - class SendingRequestEventHandler extends StorageEvent { - - /** - * Called before the Azure Storage SDK sends a request. - * - * @param event The connection, operation, and request state. - */ - @Override - public void eventOccurred(SendingRequestEvent event) { - sendingRequest(event); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java deleted file mode 100644 index 2bb7b2390dd45..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestClientThrottlingAnalyzer.java +++ /dev/null @@ -1,167 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer; -import org.junit.jupiter.api.Test; - -/** - * Tests for ClientThrottlingAnalyzer. - */ -public class TestClientThrottlingAnalyzer extends AbstractWasbTestWithTimeout { - private static final int ANALYSIS_PERIOD = 1000; - private static final int ANALYSIS_PERIOD_PLUS_10_PERCENT = ANALYSIS_PERIOD - + ANALYSIS_PERIOD / 10; - private static final long MEGABYTE = 1024 * 1024; - private static final int MAX_ACCEPTABLE_PERCENT_DIFFERENCE = 20; - - private void sleep(long milliseconds) { - try { - Thread.sleep(milliseconds); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } - - private void fuzzyValidate(long expected, long actual, double percentage) { - final double lowerBound = Math.max(expected - percentage / 100 * expected, 0); - final double upperBound = expected + percentage / 100 * expected; - - assertTrue(actual >= lowerBound && actual <= upperBound, String.format( - "The actual value %1$d is not within the expected range: " - + "[%2$.2f, %3$.2f].", - actual, - lowerBound, - upperBound)); - } - - private void validate(long expected, long actual) { - assertEquals(expected, actual, - String.format("The actual value %1$d is not the expected value %2$d.", actual, expected)); - } - - private void validateLessThanOrEqual(long maxExpected, long actual) { - assertTrue(actual < maxExpected, String.format( - "The actual value %1$d is not less than or equal to the maximum" - + " expected value %2$d.", - actual, - maxExpected)); - } - - /** - * Ensure that there is no waiting (sleepDuration = 0) if the metrics have - * never been updated. This validates proper initialization of - * ClientThrottlingAnalyzer. - */ - @Test - public void testNoMetricUpdatesThenNoWaiting() { - ClientThrottlingAnalyzer analyzer = new ClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); - validate(0, analyzer.getSleepDuration()); - sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); - validate(0, analyzer.getSleepDuration()); - } - - /** - * Ensure that there is no waiting (sleepDuration = 0) if the metrics have - * only been updated with successful requests. - */ - @Test - public void testOnlySuccessThenNoWaiting() { - ClientThrottlingAnalyzer analyzer = new ClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); - analyzer.addBytesTransferred(8 * MEGABYTE, false); - validate(0, analyzer.getSleepDuration()); - sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); - validate(0, analyzer.getSleepDuration()); - } - - /** - * Ensure that there is waiting (sleepDuration != 0) if the metrics have - * only been updated with failed requests. Also ensure that the - * sleepDuration decreases over time. - */ - @Test - public void testOnlyErrorsAndWaiting() { - ClientThrottlingAnalyzer analyzer = new ClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); - validate(0, analyzer.getSleepDuration()); - analyzer.addBytesTransferred(4 * MEGABYTE, true); - sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); - final int expectedSleepDuration1 = 1100; - validateLessThanOrEqual(expectedSleepDuration1, analyzer.getSleepDuration()); - sleep(10 * ANALYSIS_PERIOD); - final int expectedSleepDuration2 = 900; - validateLessThanOrEqual(expectedSleepDuration2, analyzer.getSleepDuration()); - } - - /** - * Ensure that there is waiting (sleepDuration != 0) if the metrics have - * only been updated with both successful and failed requests. Also ensure - * that the sleepDuration decreases over time. - */ - @Test - public void testSuccessAndErrorsAndWaiting() { - ClientThrottlingAnalyzer analyzer = new ClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); - validate(0, analyzer.getSleepDuration()); - analyzer.addBytesTransferred(8 * MEGABYTE, false); - analyzer.addBytesTransferred(2 * MEGABYTE, true); - sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); - NanoTimer timer = new NanoTimer(); - analyzer.suspendIfNecessary(); - final int expectedElapsedTime = 126; - fuzzyValidate(expectedElapsedTime, - timer.elapsedTimeMs(), - MAX_ACCEPTABLE_PERCENT_DIFFERENCE); - sleep(10 * ANALYSIS_PERIOD); - final int expectedSleepDuration = 110; - validateLessThanOrEqual(expectedSleepDuration, analyzer.getSleepDuration()); - } - - /** - * Ensure that there is waiting (sleepDuration != 0) if the metrics have - * only been updated with many successful and failed requests. Also ensure - * that the sleepDuration decreases to zero over time. - */ - @Test - public void testManySuccessAndErrorsAndWaiting() { - ClientThrottlingAnalyzer analyzer = new ClientThrottlingAnalyzer( - "test", - ANALYSIS_PERIOD); - validate(0, analyzer.getSleepDuration()); - final int numberOfRequests = 20; - for (int i = 0; i < numberOfRequests; i++) { - analyzer.addBytesTransferred(8 * MEGABYTE, false); - analyzer.addBytesTransferred(2 * MEGABYTE, true); - } - sleep(ANALYSIS_PERIOD_PLUS_10_PERCENT); - NanoTimer timer = new NanoTimer(); - analyzer.suspendIfNecessary(); - fuzzyValidate(7, - timer.elapsedTimeMs(), - MAX_ACCEPTABLE_PERCENT_DIFFERENCE); - sleep(10 * ANALYSIS_PERIOD); - validate(0, analyzer.getSleepDuration()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestKeyPageBlobDirectories.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestKeyPageBlobDirectories.java deleted file mode 100644 index 327a3b8fdfa08..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestKeyPageBlobDirectories.java +++ /dev/null @@ -1,169 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.net.URI; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.junit.jupiter.api.Test; - -/** - * Test config property KEY_PAGE_BLOB_DIRECTORIES. - */ -public class TestKeyPageBlobDirectories extends AbstractWasbTestBase{ - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - public void expectPageBlobKey(boolean expectedOutcome, AzureNativeFileSystemStore store, String path) { - assertEquals(expectedOutcome, store.isPageBlobKey(path), - "Unexpected result for isPageBlobKey(" + path + ")"); - } - - @Test - public void testKeySetWithoutAsterisk() throws Exception { - NativeAzureFileSystem azureFs = fs; - AzureNativeFileSystemStore store = azureFs.getStore(); - Configuration conf = fs.getConf(); - String dirList = "/service/WALs,/data/mypageblobfiles"; - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, dirList); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - expectPageBlobKey(false, store, "/"); - expectPageBlobKey(false, store, "service"); - - expectPageBlobKey(false, store, "service/dir/recovered.edits"); - expectPageBlobKey(true, store, "service/WALs/recovered.edits"); - - expectPageBlobKey(false, store, "data/dir/recovered.txt"); - expectPageBlobKey(true, store, "data/mypageblobfiles/recovered.txt"); - } - - @Test - public void testKeySetWithAsterisk() throws Exception { - NativeAzureFileSystem azureFs = fs; - AzureNativeFileSystemStore store = azureFs.getStore(); - Configuration conf = fs.getConf(); - String dirList = "/service/*/*/*/recovered.edits,/*/recovered.edits,/*/*/*/WALs, /*/*/oldWALs/*/*"; - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, dirList); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - expectPageBlobKey(false, store, "/"); - expectPageBlobKey(false, store, "service"); - - expectPageBlobKey(false, store, "service/dir/recovered.edits"); - expectPageBlobKey(true, store, "service/dir1/dir2/dir3/recovered.edits"); - - expectPageBlobKey(false, store, "data/dir/recovered.edits"); - expectPageBlobKey(true, store, "data/recovered.edits"); - - expectPageBlobKey(false, store, "dir1/dir2/WALs/data"); - expectPageBlobKey(true, store, "dir1/dir2/dir3/WALs/data1"); - expectPageBlobKey(true, store, "dir1/dir2/dir3/WALs/data2"); - - expectPageBlobKey(false, store, "dir1/oldWALs/data"); - expectPageBlobKey(false, store, "dir1/dir2/oldWALs/data"); - expectPageBlobKey(true, store, "dir1/dir2/oldWALs/dir3/dir4/data"); - } - - - - @Test - public void testKeySetUsingFullName() throws Exception { - NativeAzureFileSystem azureFs = fs; - AzureNativeFileSystemStore store = azureFs.getStore(); - Configuration conf = fs.getConf(); - String dirList = "/service/WALs,/data/mypageblobfiles,/*/*/WALs,/*/*/recover.edits"; - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, dirList); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - final String defaultFS = FileSystem.getDefaultUri(conf).toString(); - - expectPageBlobKey(false, store, defaultFS + "service/recover.edits"); - expectPageBlobKey(true, store, defaultFS + "service/WALs/recover.edits"); - - expectPageBlobKey(false, store, defaultFS + "data/mismatch/mypageblobfiles/data"); - expectPageBlobKey(true, store, defaultFS + "data/mypageblobfiles/data"); - - expectPageBlobKey(false, store, defaultFS + "dir1/dir2/dir3/WALs/data"); - expectPageBlobKey(true, store, defaultFS + "dir1/dir2/WALs/data"); - - expectPageBlobKey(false, store, defaultFS + "dir1/dir2/dir3/recover.edits"); - expectPageBlobKey(true, store, defaultFS + "dir1/dir2/recover.edits"); - - } - - @Test - public void testKeyContainsAsterisk() throws IOException { - NativeAzureFileSystem azureFs = fs; - AzureNativeFileSystemStore store = azureFs.getStore(); - Configuration conf = fs.getConf(); - // Test dir name which contains * - String dirList = "/service/*/*/*/d*ir,/*/fi**le.data,/*/*/*/WALs*, /*/*/oldWALs"; - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, dirList); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - expectPageBlobKey(false, store, "/"); - expectPageBlobKey(false, store, "service"); - - expectPageBlobKey(false, store, "service/d*ir/data"); - expectPageBlobKey(true, store, "service/dir1/dir2/dir3/d*ir/data"); - - expectPageBlobKey(false, store, "dir/fi*le.data"); - expectPageBlobKey(true, store, "dir/fi**le.data"); - - expectPageBlobKey(false, store, "dir1/dir2/WALs/data"); - expectPageBlobKey(false, store, "dir1/dir2/dir3/WALs/data"); - expectPageBlobKey(true, store, "dir1/dir2/dir3/WALs*/data1"); - expectPageBlobKey(true, store, "dir1/dir2/dir3/WALs*/data2"); - - expectPageBlobKey(false, store, "dir1/oldWALs/data"); - expectPageBlobKey(true, store, "dir1/dir2/oldWALs/data1"); - expectPageBlobKey(true, store, "dir1/dir2/oldWALs/data2"); - } - - @Test - public void testKeyWithCommonPrefix() throws IOException { - NativeAzureFileSystem azureFs = fs; - AzureNativeFileSystemStore store = azureFs.getStore(); - Configuration conf = fs.getConf(); - // Test dir name which contains * - String dirList = "/service/WALs,/*/*/WALs"; - conf.set(AzureNativeFileSystemStore.KEY_PAGE_BLOB_DIRECTORIES, dirList); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - - expectPageBlobKey(false, store, "/"); - expectPageBlobKey(false, store, "service"); - - expectPageBlobKey(false, store, "service/WALsssssss/dir"); - expectPageBlobKey(true, store, "service/WALs/dir"); - - expectPageBlobKey(false, store, "service/dir/WALsss/data"); - expectPageBlobKey(true, store, "service/dir/WALs/data"); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java deleted file mode 100644 index 6cc6903d4930d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemAuthorization.java +++ /dev/null @@ -1,2113 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.FileNotFoundException; -import java.security.PrivilegedExceptionAction; -import java.io.IOException; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.concurrent.Callable; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.fs.permission.FsAction; -import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.security.AccessControlException; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.test.GenericTestUtils; -import org.apache.hadoop.test.LambdaTestUtils; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.classification.VisibleForTesting; - -import static org.apache.hadoop.fs.azure.AzureNativeFileSystemStore.KEY_USE_SECURE_MODE; -import static org.apache.hadoop.fs.azure.CachingAuthorizer.KEY_AUTH_SERVICE_CACHING_ENABLE; -import static org.apache.hadoop.fs.contract.ContractTestUtils.*; -import static org.assertj.core.api.Assumptions.assumeThat; - -/** - * Test class to hold all WASB authorization tests. - */ -public class TestNativeAzureFileSystemAuthorization - extends AbstractWasbTestBase { - - private static final short FULL_PERMISSION_WITH_STICKYBIT = 1777; - - @VisibleForTesting - protected MockWasbAuthorizerImpl authorizer; - - @VisibleForTesting - protected static final short STICKYBIT_PERMISSION_CONSTANT = 1700; - @VisibleForTesting - protected static final String READ = WasbAuthorizationOperations.READ.toString(); - @VisibleForTesting - protected static final String WRITE = WasbAuthorizationOperations.WRITE.toString(); - - @Override - public Configuration createConfiguration() { - Configuration conf = super.createConfiguration(); - conf.set(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, "true"); - conf.set(KEY_USE_SECURE_MODE, "true"); - conf.set(RemoteWasbAuthorizerImpl.KEY_REMOTE_AUTH_SERVICE_URLS, "http://localhost/"); - conf.set(NativeAzureFileSystem.AZURE_CHOWN_USERLIST_PROPERTY_NAME, "user1 , user2"); - conf.set(KEY_AUTH_SERVICE_CACHING_ENABLE, "false"); - conf.set(NativeAzureFileSystem.AZURE_CHMOD_USERLIST_PROPERTY_NAME, "user1 , user2"); - conf.set(NativeAzureFileSystem.AZURE_DAEMON_USERLIST_PROPERTY_NAME, "hive , hcs , yarn"); - return conf; - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(createConfiguration()); - } - - @Override - @BeforeEach - public void setUp() throws Exception { - super.setUp(); - boolean useSecureMode = fs.getConf().getBoolean(KEY_USE_SECURE_MODE, false); - boolean useAuthorization = fs.getConf().getBoolean(NativeAzureFileSystem.KEY_AZURE_AUTHORIZATION, false); - assumeThat((useSecureMode && useAuthorization)) - .as("Test valid when both SecureMode and Authorization are enabled .. skipping") - .isTrue(); - - authorizer = new MockWasbAuthorizerImpl(fs); - authorizer.init(fs.getConf()); - fs.updateWasbAuthorizer(authorizer); - } - - /** - * Setup up permissions to allow a recursive delete for cleanup purposes. - */ - protected void allowRecursiveDelete(NativeAzureFileSystem fs, String path) - throws IOException { - - int index = path.lastIndexOf('/'); - String parent = (index == 0) ? "/" : path.substring(0, index); - - authorizer.deleteAllAuthRules(); - authorizer.addAuthRule(parent, WRITE, getCurrentUserShortName(), true); - authorizer.addAuthRule((path.endsWith("*") ? path : path+"*"), WRITE, - getCurrentUserShortName(), true); - fs.updateWasbAuthorizer(authorizer); - } - - /** - * Setup the expected exception class, and exception message that the test is supposed to fail with. - */ - protected String setExpectedFailureMessage(String operation, Path path) { - return String.format("%s operation for Path : %s not allowed", - operation, path.makeQualified(fs.getUri(), fs.getWorkingDirectory())); - } - - /** - * get current user short name for user context - */ - protected String getCurrentUserShortName() throws IOException { - return UserGroupInformation.getCurrentUser().getShortUserName(); - } - - /** - * Positive test to verify Create access check. - * The file is created directly under an existing folder. - * No intermediate folders need to be created. - * @throws Throwable - */ - @Test - public void testCreateAccessWithoutCreateIntermediateFoldersCheckPositive() throws Throwable { - - Path parentDir = new Path("/"); - Path testPath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - } - finally { - fs.delete(testPath, false); - } - } - - /** - * Positive test to verify Create access check. - * The test tries to create a file whose parent is non-existent to ensure that - * the intermediate folders between ancestor and direct parent are being created - * when proper ranger policies are configured. - * @throws Throwable - */ - @Test - public void testCreateAccessWithCreateIntermediateFoldersCheckPositive() throws Throwable { - - Path parentDir = new Path("/testCreateAccessCheckPositive/1/2/3"); - Path testPath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - } - finally { - allowRecursiveDelete(fs, "/testCreateAccessCheckPositive"); - fs.delete(new Path("/testCreateAccessCheckPositive"), true); - } - } - - - /** - * Negative test to verify that create fails when trying to overwrite an existing file. - * without proper write permissions on the file being overwritten. - * @throws Throwable - */ - @Test // (expected=WasbAuthorizationException.class) - public void testCreateAccessWithOverwriteCheckNegative() throws Throwable { - - Path parentDir = new Path("/"); - Path testPath = new Path(parentDir, "test.dat"); - - String errorMsg = setExpectedFailureMessage("create", testPath); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - fs.create(testPath, true); - } finally { - fs.delete(testPath, false); - } - }, errorMsg); - } - - /** - * Positive test to verify that create succeeds when trying to overwrite an existing file. - * when proper write permissions on the file being overwritten are provided. - * @throws Throwable - */ - @Test - public void testCreateAccessWithOverwriteCheckPositive() throws Throwable { - - Path parentDir = new Path("/"); - Path testPath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - fs.create(testPath, true); - } - finally { - fs.delete(testPath, false); - } - } - - /** - * Negative test to verify that Create fails when appropriate permissions are not provided. - * @throws Throwable - */ - - @Test // (expected=WasbAuthorizationException.class) - public void testCreateAccessCheckNegative() throws Throwable { - - Path parentDir = new Path("/testCreateAccessCheckNegative"); - Path testPath = new Path(parentDir, "test.dat"); - - String errorMsg = setExpectedFailureMessage("create", testPath); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, false); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - } - finally { - /* Provide permissions to cleanup in case the file got created */ - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - }, errorMsg); - } - - /** - * Positive test to verify listStatus access check. - * @throws Throwable - */ - @Test - public void testListAccessCheckPositive() throws Throwable { - - Path parentDir = new Path("/testListAccessCheckPositive"); - Path intermediateFolders = new Path(parentDir, "1/2/3/"); - Path testPath = new Path(intermediateFolders, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - fs.listStatus(testPath); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Negative test to verify listStatus access check. - * @throws Throwable - */ - - @Test //(expected=WasbAuthorizationException.class) - public void testListAccessCheckNegative() throws Throwable { - - Path parentDir = new Path("/testListAccessCheckNegative"); - Path testPath = new Path(parentDir, "test.dat"); - - String errorMsg = setExpectedFailureMessage("liststatus", testPath); - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, false); - fs.updateWasbAuthorizer(authorizer); - try { - fs.create(testPath); - fs.listStatus(testPath); - } finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - }, errorMsg); - } - - /** - * Positive test to verify rename access check. - * @throws Throwable - */ - @Test - public void testRenameAccessCheckPositive() throws Throwable { - - Path parentDir = new Path("/testRenameAccessCheckPositive"); - Path srcPath = new Path(parentDir, "test1.dat"); - Path dstPath = new Path(parentDir, "test2.dat"); - - /* to create parentDir */ - authorizer.addAuthRuleForOwner("/", WRITE, true); - /* for rename */ - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcPath); - assertPathExists(fs, "sourcePath does not exist", srcPath); - assertRenameOutcome(fs, srcPath, dstPath, true); - assertPathExists(fs, "destPath does not exist", dstPath); - assertPathDoesNotExist(fs, "sourcePath exists after rename!", srcPath); - } - finally { - recursiveDelete(parentDir); - } - } - - /** - * Negative test to verify rename access check. - * @throws Throwable - */ - @Test //(expected=WasbAuthorizationException.class) - public void testRenameAccessCheckNegative() throws Throwable { - - Path parentDir = new Path("/testRenameAccessCheckNegative"); - Path srcPath = new Path(parentDir, "test1.dat"); - Path dstPath = new Path(parentDir, "test2.dat"); - - String errorMsg = setExpectedFailureMessage("rename", srcPath); - - assertThrows(WasbAuthorizationException.class, () -> { - /* to create parent dir */ - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, false); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(srcPath); - ContractTestUtils.assertPathExists(fs, "sourcePath does not exist", srcPath); - fs.rename(srcPath, dstPath); - ContractTestUtils.assertPathExists(fs, "destPath does not exist", dstPath); - } finally { - ContractTestUtils.assertPathExists(fs, - "sourcePath does not exist after rename failure!", srcPath); - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - }, errorMsg); - } - - /** - * Negative test to verify rename access check - the dstFolder disallows rename. - * @throws Throwable - */ - @Test //(expected=WasbAuthorizationException.class) - public void testRenameAccessCheckNegativeOnDstFolder() throws Throwable { - - Path parentSrcDir = new Path("/testRenameAccessCheckNegativeSrc"); - Path srcPath = new Path(parentSrcDir, "test1.dat"); - Path parentDstDir = new Path("/testRenameAccessCheckNegativeDst"); - Path dstPath = new Path(parentDstDir, "test2.dat"); - - String errorMsg = setExpectedFailureMessage("rename", dstPath); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); /* to create parent dir */ - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(parentDstDir.toString(), WRITE, false); - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcPath); - ContractTestUtils.assertPathExists(fs, "sourcePath does not exist", srcPath); - fs.mkdirs(parentDstDir); - fs.rename(srcPath, dstPath); - ContractTestUtils.assertPathDoesNotExist(fs, "destPath does not exist", dstPath); - } finally { - ContractTestUtils.assertPathExists(fs, - "sourcePath does not exist after rename !", srcPath); - recursiveDelete(parentSrcDir); - } - }, errorMsg); - } - - /** - * Positive test to verify rename access check - the dstFolder allows rename. - * @throws Throwable - */ - @Test - public void testRenameAccessCheckPositiveOnDstFolder() throws Throwable { - - Path parentSrcDir = new Path("/testRenameAccessCheckPositiveSrc"); - Path srcPath = new Path(parentSrcDir, "test1.dat"); - Path parentDstDir = new Path("/testRenameAccessCheckPositiveDst"); - Path dstPath = new Path(parentDstDir, "test2.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); /* to create parent dirs */ - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(parentDstDir.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcPath); - ContractTestUtils.assertPathExists(fs, "sourcePath does not exist", srcPath); - fs.mkdirs(parentDstDir); - assertRenameOutcome(fs, srcPath, dstPath, true); - ContractTestUtils.assertPathDoesNotExist(fs, "sourcePath does not exist", srcPath); - ContractTestUtils.assertPathExists(fs, "destPath does not exist", dstPath); - } finally { - recursiveDelete(parentSrcDir); - recursiveDelete(parentDstDir); - } - } - - /** - * Recursive delete for teardown/finally operations, setting the permissions - * to do the delete before invoking FileSystem.delete. - * Exceptions are caught and logged at ERROR. - * @param path path to delete - */ - private void recursiveDelete(Path path) { - try { - allowRecursiveDelete(fs, path.toString()); - fs.delete(path, true); - } catch (IOException e) { - LOG.error("Failed to delete {}", path, e); - } - } - - /** - * Positive test to check rename succeeds for hierarchy of - * files and folders under a src directory when destination - * folder already exists. - */ - @Test - public void testRenamePositiveWhenDestinationFolderExists() throws Throwable { - - Path parentSrcDir = new Path("/testRenamePositiveForFolderSrc"); - Path srcFilePath = new Path(parentSrcDir, "test1.dat"); - Path srcFolderPath = new Path(parentSrcDir, "testFolder"); - Path dstDir = new Path("/testRenamePositiveForFolderDst"); - Path finalDstDir = new Path(dstDir, "testRenamePositiveForFolderSrc"); - Path dstFilePath = new Path(finalDstDir, "test1.dat"); - Path dstFolderPath = new Path(finalDstDir, "testFolder"); - - /* to create parent dirs */ - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(dstDir.toString(), WRITE, true); - /* Required for assertPathExists calls */ - authorizer.addAuthRuleForOwner("/", READ, true); - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), READ, true); - authorizer.addAuthRuleForOwner(finalDstDir.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcFilePath); - assertPathExists(fs, "srcFilePath does not exist", srcFilePath); - fs.mkdirs(srcFolderPath); - assertIsDirectory(fs, srcFolderPath); - fs.mkdirs(dstDir); - assertIsDirectory(fs, dstDir); - assertRenameOutcome(fs, parentSrcDir, dstDir, true); - assertPathDoesNotExist(fs, "parentSrcDir exists", parentSrcDir); - assertPathDoesNotExist(fs, "srcFilePath exists", srcFilePath); - assertPathDoesNotExist(fs, "srcFolderPath exists", srcFolderPath); - assertPathExists(fs, "destPath does not exist", dstDir); - assertPathExists(fs, "dstFilePath does not exist", dstFilePath); - assertPathExists(fs, "dstFolderPath does not exist", dstFolderPath); - } finally { - recursiveDelete(parentSrcDir); - recursiveDelete(dstDir); - } - } - - /** - * Positive test to check rename succeeds for hierarchy of - * files and folders under a src directory and when the destination - * folder does not exist. - */ - @Test - public void testRenamePositiveWhenDestinationFolderDoesNotExist() throws Throwable { - Path srcParentDir = new Path("/testRenamePositiveWhenDestinationFolderDoesNotExist"); - Path srcDir = new Path(srcParentDir, "srcDir"); - Path srcFilePath = new Path(srcDir, "test1.dat"); - Path srcSubDirPath = new Path(srcDir, "testFolder"); - Path srcSubDirFilePath = new Path(srcSubDirPath, "test2.dat"); - Path dstDir = new Path(srcParentDir, "dstDir"); - Path dstFilePath = new Path(dstDir, "test1.dat"); - Path dstSubDirPath = new Path(dstDir, "testFolder"); - Path dstSubDirFilePath = new Path(dstSubDirPath, "test2.dat"); - - /* to create parent dirs */ - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(srcParentDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(srcDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(srcSubDirPath.toString(), WRITE, true); - /* Required for asserPathExists calls */ - authorizer.addAuthRuleForOwner("/", READ, true); - authorizer.addAuthRuleForOwner(srcParentDir.toString(), READ, true); - authorizer.addAuthRuleForOwner(srcDir.toString(), READ, true); - authorizer.addAuthRuleForOwner(srcSubDirPath.toString(), READ, true); - authorizer.addAuthRuleForOwner(dstDir.toString(), READ, true); - authorizer.addAuthRuleForOwner(dstSubDirPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcFilePath); - assertPathExists(fs, "srcFilePath does not exist", srcFilePath); - fs.mkdirs(srcSubDirPath); - assertIsDirectory(fs, srcSubDirPath); - touch(fs, srcSubDirFilePath); - assertPathExists(fs, "srcSubDirFilePath does not exist", srcSubDirFilePath); - assertRenameOutcome(fs, srcDir, dstDir, true); - assertPathDoesNotExist(fs, "srcDir exists", srcDir); - assertPathDoesNotExist(fs, "srcFilePath exists", srcFilePath); - assertPathDoesNotExist(fs, "srcSubDirPath exists", srcSubDirPath); - assertPathDoesNotExist(fs, "srcSubDirFilePath exists", srcSubDirFilePath); - assertPathExists(fs, "destPath does not exist", dstDir); - assertPathExists(fs, "dstFilePath does not exist", dstFilePath); - assertPathExists(fs, "dstSubDirPath does not exist", dstSubDirPath); - assertPathExists(fs, "dstSubDirFilePath does not exist", dstSubDirFilePath); - } finally { - recursiveDelete(srcParentDir); - } - } - - /** - * Test to verify rename fails and returns false when - * the source to be renamed does not exist. - */ - @Test - public void testRenameOnNonExistentSource() throws Throwable { - - Path parentSrcDir = new Path("/testRenameOnNonExistentSourceFolderSrc"); - Path srcPath = new Path(parentSrcDir, "test1.dat"); - Path parentDstDir = new Path("/testRenameOnNonExistentSourceFolderDst"); - Path dstPath = new Path(parentDstDir, "test2.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); /* to create parent dirs */ - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(parentDstDir.toString(), WRITE, true); - // required for assertpathExists calls - authorizer.addAuthRuleForOwner("/", READ, true); - authorizer.addAuthRuleForOwner(parentDstDir.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(parentSrcDir); - assertIsDirectory(fs, parentSrcDir); - fs.mkdirs(parentDstDir); - // should return false - assertRenameOutcome(fs, srcPath, dstPath, false); - assertPathDoesNotExist(fs, "destPath exists!", dstPath); - } finally { - recursiveDelete(parentSrcDir); - recursiveDelete(parentDstDir); - } - } - - /** - * Positive test to check rename succeeds when sticky bit is set on - * source parent directory and user owns the source directory. - */ - @Test - public void testRenameWithStickyBitPositive() throws Throwable { - - Path parentSrcDir = new Path("/testRenameWithStickyBitPositiveSrc"); - Path srcPath = new Path(parentSrcDir, "test1.dat"); - Path parentDstDir = new Path("/testRenameWithStickyBitPositiveDst"); - Path dstPath = new Path(parentDstDir, "test2.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); /* to create parent dirs */ - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(parentDstDir.toString(), WRITE, true); - /* Required for asserPathExists calls */ - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcPath); - assertPathExists(fs, "sourcePath does not exist", srcPath); - fs.mkdirs(parentDstDir); - assertIsDirectory(fs, parentDstDir); - // set stickybit on parent directory - fs.setPermission(parentSrcDir, new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - assertRenameOutcome(fs, srcPath, dstPath, true); - assertPathDoesNotExist(fs, "sourcePath exists", srcPath); - assertPathExists(fs, "destPath does not exist", dstPath); - } finally { - recursiveDelete(parentSrcDir); - recursiveDelete(parentDstDir); - } - } - - /** - * Test to check rename fails when sticky bit is set on - * parent of source directory and the user is not owner - * of parent or the source directory. - */ - @Test - public void testRenameWithStickyBitNegative() throws Throwable { - - final Path parentSrcDir = new Path("/testRenameWithStickyBitNegativeSrc"); - final Path srcPath = new Path(parentSrcDir, "test1.dat"); - final Path parentDstDir = new Path("/testRenameWithStickyBitNegativeDst"); - final Path dstPath = new Path(parentDstDir, "test2.dat"); - - String errorMsg = String.format("Rename operation for %s is not permitted." - + " Details : Stickybit check failed.", srcPath.toString()); - - assertThrows(WasbAuthorizationException.class, () -> { - /* to create parent dirs */ - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), - WRITE, true); - /* Required for asserPathExists calls */ - fs.updateWasbAuthorizer(authorizer); - - try { - touch(fs, srcPath); - assertPathExists(fs, "sourcePath does not exist", srcPath); - fs.mkdirs(parentDstDir); - assertIsDirectory(fs, parentDstDir); - // set stickybit on parent of source folder - fs.setPermission(parentSrcDir, new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - - UserGroupInformation dummyUser = UserGroupInformation.createUserForTesting( - "dummyUser", new String[] {"dummygroup"}); - - dummyUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - // Add auth rules for dummyuser - authorizer.addAuthRule(parentSrcDir.toString(), - WRITE, getCurrentUserShortName(), true); - authorizer.addAuthRule(parentDstDir.toString(), - WRITE, getCurrentUserShortName(), true); - - try { - fs.rename(srcPath, dstPath); - } catch (WasbAuthorizationException wae) { - assertPathExists(fs, "sourcePath does not exist", srcPath); - assertPathDoesNotExist(fs, "destPath exists", dstPath); - throw wae; - } - - return null; - } - }); - } finally { - recursiveDelete(parentSrcDir); - recursiveDelete(parentDstDir); - } - }, errorMsg); - } - - /** - * Test to check rename returns false when sticky bit is set on - * parent of source parent directory and the source does not exist - */ - @Test - public void testRenameOnNonExistentSourceWithStickyBit() throws Throwable { - - final Path parentSrcDir = new Path("/testRenameOnNonExistentSourceWithStickyBitSrc"); - final Path srcPath = new Path(parentSrcDir, "test1.dat"); - final Path parentDstDir = new Path("/testRenameOnNonExistentSourceWithStickyBitDest"); - final Path dstPath = new Path(parentDstDir, "test2.dat"); - - /* to create parent dirs */ - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentSrcDir.toString(), - WRITE, true); - /* Required for asserPathExists calls */ - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(parentSrcDir); - assertIsDirectory(fs, parentSrcDir); - fs.mkdirs(parentDstDir); - assertIsDirectory(fs, parentDstDir); - // set stickybit on parent of source folder - fs.setPermission(parentSrcDir, new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - - UserGroupInformation dummyUser = UserGroupInformation.createUserForTesting( - "dummyUser", new String[] {"dummygroup"}); - - dummyUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - // Add auth rules for dummyuser - authorizer.addAuthRule(parentSrcDir.toString(), - WRITE, getCurrentUserShortName(), true); - authorizer.addAuthRule(parentDstDir.toString(), - WRITE, getCurrentUserShortName(), true); - // should return false since srcPath does not exist. - assertRenameOutcome(fs, srcPath, dstPath, false); - assertPathDoesNotExist(fs, "destPath exists", dstPath); - return null; - } - }); - } finally { - recursiveDelete(parentSrcDir); - recursiveDelete(parentDstDir); - } - } - - /** - * Positive test for read access check. - * @throws Throwable - */ - @Test - public void testReadAccessCheckPositive() throws Throwable { - - Path parentDir = new Path("/testReadAccessCheckPositive"); - Path testPath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - - FSDataInputStream inputStream = null; - FSDataOutputStream fso = null; - - try { - fso = fs.create(testPath); - String data = "Hello World"; - fso.writeBytes(data); - fso.close(); - - inputStream = fs.open(testPath); - ContractTestUtils.verifyRead(inputStream, data.getBytes(), 0, data.length()); - } - finally { - if (fso != null) { - fso.close(); - } - if(inputStream != null) { - inputStream.close(); - } - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Negative test to verify read access check. - * @throws Throwable - */ - - @Test //(expected=WasbAuthorizationException.class) - public void testReadAccessCheckNegative() throws Throwable { - - Path parentDir = new Path("/testReadAccessCheckNegative"); - Path testPath = new Path(parentDir, "test.dat"); - - String errorMsg = setExpectedFailureMessage("read", testPath); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, false); - fs.updateWasbAuthorizer(authorizer); - - FSDataInputStream inputStream = null; - FSDataOutputStream fso = null; - - try { - fso = fs.create(testPath); - String data = "Hello World"; - fso.writeBytes(data); - fso.close(); - - inputStream = fs.open(testPath); - ContractTestUtils.verifyRead(inputStream, data.getBytes(), 0, data.length()); - } finally { - if (fso != null) { - fso.close(); - } - if (inputStream != null) { - inputStream.close(); - } - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - }, errorMsg); - } - - /** - * Positive test to verify file delete access check. - * @throws Throwable - */ - @Test - public void testFileDeleteAccessCheckPositive() throws Throwable { - - Path parentDir = new Path("/"); - Path testPath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - } - finally { - fs.delete(testPath, false); - ContractTestUtils.assertPathDoesNotExist(fs, "testPath exists after deletion!", testPath); - } - } - - /** - * Negative test to verify file delete access check. - * @throws Throwable - */ - @Test //(expected=WasbAuthorizationException.class) - public void testFileDeleteAccessCheckNegative() throws Throwable { - - Path parentDir = new Path("/"); - Path testPath = new Path(parentDir, "test.dat"); - - String errorMsg = setExpectedFailureMessage("delete", testPath); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - - - /* Remove permissions for delete to force failure */ - authorizer.deleteAllAuthRules(); - authorizer.addAuthRuleForOwner("/", WRITE, false); - fs.updateWasbAuthorizer(authorizer); - - fs.delete(testPath, false); - } - finally { - /* Restore permissions to force a successful delete */ - authorizer.deleteAllAuthRules(); - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - fs.delete(testPath, false); - ContractTestUtils.assertPathDoesNotExist(fs, "testPath exists after deletion!", testPath); - } - }, errorMsg); - } - - /** - * Positive test to verify file delete access check, with intermediate folders - * Uses wildcard recursive permissions. - * @throws Throwable - */ - @Test - public void testFileDeleteAccessWithIntermediateFoldersCheckPositive() throws Throwable { - - Path parentDir = new Path("/testDeleteIntermediateFolder"); - Path childPath = new Path(parentDir, "1/2"); - Path testPath = new Path(childPath, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); // for create and delete - authorizer.addAuthRuleForOwner("/testDeleteIntermediateFolder*", - WRITE, true); // for recursive delete - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - fs.delete(parentDir, true); - ContractTestUtils.assertPathDoesNotExist(fs, "testPath exists after deletion!", parentDir); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Test to verify access check failure leaves intermediate folders undeleted. - * @throws Throwable - */ - @Test - public void testDeleteAuthCheckFailureLeavesFilesUndeleted() throws Throwable { - - Path parentDir = new Path("/testDeleteAuthCheckFailureLeavesFilesUndeleted"); - Path childPath1 = new Path(parentDir, "child1"); - Path childPath2 = new Path(parentDir, "child2"); - Path testPath1 = new Path(childPath1, "test.dat"); - Path testPath2 = new Path(childPath2, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner("/testDeleteAuthCheckFailureLeavesFilesUndeleted*", - WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath1); - fs.create(testPath2); - ContractTestUtils.assertPathExists(fs, "testPath1 was not created", testPath1); - ContractTestUtils.assertPathExists(fs, "testPath2 was not created", testPath2); - - // revoke write on one of the child folders - authorizer.deleteAllAuthRules(); - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(childPath2.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(childPath1.toString(), WRITE, false); - - assertFalse(fs.delete(parentDir, true)); - - // Assert that only child2 contents are deleted - ContractTestUtils.assertPathExists(fs, "child1 is deleted!", testPath1); - ContractTestUtils.assertPathDoesNotExist(fs, "child2 exists after deletion!", testPath2); - ContractTestUtils.assertPathDoesNotExist(fs, "child2 exists after deletion!", childPath2); - ContractTestUtils.assertPathExists(fs, "parentDir is deleted!", parentDir); - - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Positive test to verify file delete with sticky bit on parent. - * @throws Throwable - */ - @Test - public void testSingleFileDeleteWithStickyBitPositive() throws Throwable { - - Path parentDir = new Path("/testSingleFileDeleteWithStickyBitPositive"); - Path testPath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - - // set stickybit on parent directory - fs.setPermission(parentDir, new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - - assertTrue(fs.delete(testPath, true)); - ContractTestUtils.assertPathDoesNotExist(fs, - "testPath exists after deletion!", testPath); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Negative test to verify file delete fails when sticky bit is set on parent - * and non-owner user performs delete - * @throws Throwable - */ - @Test - public void testSingleFileDeleteWithStickyBitNegative() throws Throwable { - - Path parentDir = new Path("/testSingleFileDeleteWithStickyBitNegative"); - Path testPath = new Path(parentDir, "test.dat"); - - String errorMsg = String.format("%s has sticky bit set. File %s cannot be deleted.", - parentDir.toString(), testPath.toString()); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath); - ContractTestUtils.assertPathExists(fs, "testPath was not created", testPath); - // set stickybit on parent directory - fs.setPermission(parentDir, new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - - UserGroupInformation dummyUser = UserGroupInformation.createUserForTesting( - "dummyUser", new String[] {"dummygroup"}); - - dummyUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - try { - authorizer.addAuthRule(parentDir.toString(), WRITE, - getCurrentUserShortName(), true); - fs.delete(testPath, true); - return null; - } - catch (WasbAuthorizationException wae) { - ContractTestUtils.assertPathExists(fs, "testPath should not be deleted!", testPath); - throw wae; - } - } - }); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - }, errorMsg); - } - - /** - * Positive test to verify file and folder delete succeeds with stickybit - * when the owner of the files deletes the file. - * @throws Throwable - */ - @Test - public void testRecursiveDeleteSucceedsWithStickybit() throws Throwable { - - Path parentDir = new Path("/testRecursiveDeleteSucceedsWithStickybit"); - Path childDir = new Path(parentDir, "child"); - Path testFilePath = new Path(childDir, "test.dat"); - Path testFolderPath = new Path(childDir, "testDirectory"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner("/testRecursiveDeleteSucceedsWithStickybit*", - WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testFilePath); - ContractTestUtils.assertPathExists(fs, "file was not created", testFilePath); - fs.mkdirs(testFolderPath); - ContractTestUtils.assertPathExists(fs, "folder was not created", testFolderPath); - // set stickybit on child directory - fs.setPermission(new Path(parentDir, "child"), - new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - // perform delete as owner of the files - assertTrue(fs.delete(parentDir, true)); - ContractTestUtils.assertPathDoesNotExist(fs, "parentDir exists after deletion!", parentDir); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Test to verify delete fails for child files and folders when - * non-owner user performs delete and stickybit is set on parent - * @throws Throwable - */ - @Test - public void testRecursiveDeleteFailsWithStickybit() throws Throwable { - - Path parentDir = new Path("/testRecursiveDeleteFailsWithStickybit"); - Path childDir = new Path(parentDir, "child"); - Path testFilePath = new Path(childDir, "test.dat"); - Path testFolderPath = new Path(childDir, "testDirectory"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner("/testRecursiveDeleteFailsWithStickybit*", - WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testFilePath); - ContractTestUtils.assertPathExists(fs, "file was not created", testFilePath); - fs.mkdirs(testFolderPath); - ContractTestUtils.assertPathExists(fs, "folder was not created", testFolderPath); - - // set stickybit on child directory - fs.setPermission(new Path(parentDir, "child"), - new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - - UserGroupInformation dummyUser = UserGroupInformation.createUserForTesting( - "dummyUser", new String[] {"dummygroup"}); - - dummyUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - // Add auth rules for dummyuser - authorizer.addAuthRule("/", WRITE, getCurrentUserShortName(), true); - authorizer.addAuthRule("/testRecursiveDeleteFailsWithStickybit*", - WRITE, getCurrentUserShortName(), true); - - assertFalse(fs.delete(parentDir, true)); - return null; - } - }); - - ContractTestUtils.assertPathExists(fs, "parentDir is deleted!", parentDir); - ContractTestUtils.assertPathExists(fs, "file is deleted!", testFilePath); - ContractTestUtils.assertPathExists(fs, "folder is deleted!", testFolderPath); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Test delete scenario where sticky bit check leaves files/folders not owned - * by a specific user intact and the files owned by him/her are deleted - * @throws Throwable - */ - @Test - public void testDeleteSucceedsForOnlyFilesOwnedByUserWithStickybitSet() - throws Throwable { - - Path parentDir = new Path("/testDeleteSucceedsForOnlyFilesOwnedByUserWithStickybitSet"); - Path testFilePath = new Path(parentDir, "test.dat"); - Path testFolderPath = new Path(parentDir, "testDirectory"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner( - "/testDeleteSucceedsForOnlyFilesOwnedByUserWithStickybitSet*", - WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testFilePath); - ContractTestUtils.assertPathExists(fs, "file was not created", testFilePath); - - fs.setPermission(parentDir, new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - - UserGroupInformation dummyUser = UserGroupInformation.createUserForTesting( - "dummyuser", new String[] {"dummygroup"}); - dummyUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - authorizer.addAuthRule("/", WRITE, getCurrentUserShortName(), true); - authorizer.addAuthRule("/testDeleteSucceedsForOnlyFilesOwnedByUserWithStickybitSet*", - WRITE, getCurrentUserShortName(), true); - - fs.create(testFolderPath); // the folder will have owner as dummyuser - ContractTestUtils.assertPathExists(fs, "folder was not created", testFolderPath); - assertFalse(fs.delete(parentDir, true)); - - ContractTestUtils.assertPathDoesNotExist(fs, "folder should have been deleted!", - testFolderPath); - ContractTestUtils.assertPathExists(fs, "parentDir is deleted!", parentDir); - ContractTestUtils.assertPathExists(fs, "file is deleted!", testFilePath); - return null; - } - }); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Test delete scenario where sticky bit is set and the owner of parent - * directory can delete child files/folders which he does not own. - * This is according to the sticky bit behaviour specified in hdfs permission - * guide which is as follows - The sticky bit can be set on directories, - * preventing anyone except the superuser, directory owner or file owner - * from deleting or moving the files within the directory - * @throws Throwable - */ - @Test - public void testDeleteSucceedsForParentDirectoryOwnerUserWithStickybit() throws Throwable { - - Path parentDir = new Path("/testDeleteSucceedsForParentDirectoryOwnerUserWithStickybit"); - Path testFilePath = new Path(parentDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner( - "/testDeleteSucceedsForParentDirectoryOwnerUserWithStickybit*", - WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - // create folder with owner as current user - fs.mkdirs(parentDir); - ContractTestUtils.assertPathExists(fs, "folder was not created", parentDir); - - // create child with owner as dummyUser - UserGroupInformation dummyUser = UserGroupInformation.createUserForTesting( - "user1", new String[] {"dummygroup"}); - dummyUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - authorizer.addAuthRule(parentDir.toString(), WRITE, getCurrentUserShortName(), true); - fs.create(testFilePath); - ContractTestUtils.assertPathExists(fs, "file was not created", testFilePath); - - fs.setPermission(parentDir, - new FsPermission(STICKYBIT_PERMISSION_CONSTANT)); - return null; - } - }); - - // invoke delete as current user - assertTrue(fs.delete(parentDir, true)); - ContractTestUtils.assertPathDoesNotExist(fs, "parentDir is not deleted!", parentDir); - ContractTestUtils.assertPathDoesNotExist(fs, "file is not deleted!", testFilePath); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Test to verify delete of root succeeds with proper permissions and - * leaves root after delete. - * @throws Throwable - */ - @Test - public void testDeleteScenarioForRoot() throws Throwable { - Path rootPath = new Path("/"); - Path parentDir = new Path("/testDeleteScenarioForRoot"); - Path childPath1 = new Path(parentDir, "child1"); - Path childPath2 = new Path(parentDir, "child2"); - Path testPath1 = new Path(childPath1, "test.dat"); - Path testPath2 = new Path(childPath2, "testFolder"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner("/testDeleteScenarioForRoot*", - WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(testPath1); - fs.create(testPath2); - ContractTestUtils.assertPathExists(fs, "testPath1 was not created", testPath1); - ContractTestUtils.assertPathExists(fs, "testPath2 was not created", testPath2); - - assertFalse(fs.delete(rootPath, true)); - - ContractTestUtils.assertPathDoesNotExist(fs, "file exists after deletion!", testPath1); - ContractTestUtils.assertPathDoesNotExist(fs, "folder exists after deletion!", testPath2); - ContractTestUtils.assertPathDoesNotExist(fs, "parentDir exists after deletion!", parentDir); - ContractTestUtils.assertPathExists(fs, "Root should not have been deleted!", rootPath); - } - finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Positive test for getFileStatus. - * @throws Throwable - */ - @Test - public void testGetFileStatusPositive() throws Throwable { - - Path testPath = new Path("/"); - authorizer.addAuthRuleForOwner("/", READ, true); - ContractTestUtils.assertIsDirectory(fs, testPath); - } - - /** - * Positive test for mkdirs access check. - * @throws Throwable - */ - @Test - public void testMkdirsCheckPositive() throws Throwable { - - Path testPath = new Path("/testMkdirsAccessCheckPositive/1/2/3"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(testPath); - ContractTestUtils.assertIsDirectory(fs, testPath); - } - finally { - allowRecursiveDelete(fs, "/testMkdirsAccessCheckPositive"); - fs.delete(new Path("/testMkdirsAccessCheckPositive"), true); - } - } - - /** - * Positive test for mkdirs -p with existing hierarchy - * @throws Throwable - */ - @Test - public void testMkdirsWithExistingHierarchyCheckPositive1() throws Throwable { - - Path testPath = new Path("/testMkdirsWithExistingHierarchyCheckPositive1"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(testPath); - ContractTestUtils.assertIsDirectory(fs, testPath); - - /* Don't need permissions to create a directory that already exists */ - authorizer.deleteAllAuthRules(); - authorizer.addAuthRuleForOwner(testPath.getParent().toString(), READ, true); // for assert - - fs.mkdirs(testPath); - ContractTestUtils.assertIsDirectory(fs, testPath); - } - finally { - allowRecursiveDelete(fs, testPath.toString()); - fs.delete(testPath, true); - } - } - - @Test - public void testMkdirsWithExistingHierarchyCheckPositive2() throws Throwable { - - Path testPath = new Path("/testMkdirsWithExistingHierarchyCheckPositive2"); - Path childPath1 = new Path(testPath, "1"); - Path childPath2 = new Path(childPath1, "2"); - Path childPath3 = new Path(childPath2, "3"); - - authorizer.addAuthRuleForOwner("/", - WRITE, true); - - authorizer.addAuthRuleForOwner(childPath1.toString(), - WRITE, true); - - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(childPath1); - ContractTestUtils.assertIsDirectory(fs, childPath1); - - // Path already exists => no-op. - fs.mkdirs(testPath); - ContractTestUtils.assertIsDirectory(fs, testPath); - - // Path already exists => no-op. - fs.mkdirs(childPath1); - ContractTestUtils.assertIsDirectory(fs, childPath1); - - // Check permissions against existing ancestor childPath1 - fs.mkdirs(childPath3); - ContractTestUtils.assertIsDirectory(fs, childPath3); - } finally { - allowRecursiveDelete(fs, testPath.toString()); - fs.delete(testPath, true); - } - } - /** - * Negative test for mkdirs access check. - * @throws Throwable - */ - @Test //(expected=WasbAuthorizationException.class) - public void testMkdirsCheckNegative() throws Throwable { - - Path testPath = new Path("/testMkdirsAccessCheckNegative/1/2/3"); - - String errorMsg = setExpectedFailureMessage("mkdirs", testPath); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, false); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(testPath); - ContractTestUtils.assertPathDoesNotExist(fs, "testPath was not created", testPath); - } - finally { - allowRecursiveDelete(fs, "/testMkdirsAccessCheckNegative"); - fs.delete(new Path("/testMkdirsAccessCheckNegative"), true); - } - }, errorMsg); - } - - /** - * Positive test triple slash format (wasb:///) access check. - * @throws Throwable - */ - @Test - public void testListStatusWithTripleSlashCheckPositive() throws Throwable { - - Path testPath = new Path("/"); - - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - - Path testPathWithTripleSlash = new Path("wasb:///" + testPath); - fs.listStatus(testPathWithTripleSlash); - } - - /** - * Test case when owner matches current user - */ - @Test - public void testOwnerPermissionPositive() throws Throwable { - - Path parentDir = new Path("/testOwnerPermissionPositive"); - Path testPath = new Path(parentDir, "test.data"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - // additional rule used for assertPathExists - fs.updateWasbAuthorizer(authorizer); - - try { - // creates parentDir with owner as current user - fs.mkdirs(parentDir); - ContractTestUtils.assertPathExists(fs, "parentDir does not exist", parentDir); - - fs.create(testPath); - fs.getFileStatus(testPath); - ContractTestUtils.assertPathExists(fs, "testPath does not exist", testPath); - - fs.delete(parentDir, true); - ContractTestUtils.assertPathDoesNotExist(fs, "testPath does not exist", testPath); - - } finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - } - - /** - * Negative test case for owner does not match current user - */ - @Test - public void testOwnerPermissionNegative() throws Throwable { - - Path parentDir = new Path("/testOwnerPermissionNegative"); - Path childDir = new Path(parentDir, "childDir"); - - String errorMsg = setExpectedFailureMessage("mkdirs", childDir); - - assertThrows(WasbAuthorizationException.class, () -> { - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(parentDir); - UserGroupInformation ugiSuperUser = UserGroupInformation.createUserForTesting( - "testuser", new String[] {}); - - ugiSuperUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - fs.mkdirs(childDir); - return null; - } - }); - - } finally { - allowRecursiveDelete(fs, parentDir.toString()); - fs.delete(parentDir, true); - } - }, errorMsg); - } - - /** - * Test to verify that retrieving owner information does not - * throw when file/folder does not exist - */ - @Test - public void testRetrievingOwnerDoesNotFailWhenFileDoesNotExist() - throws Throwable { - - Path testdirectory = new Path("/testDirectory123454565"); - - String owner = fs.getOwnerForPath(testdirectory); - assertEquals("", owner); - } - - /** - * Negative test for setOwner when Authorization is enabled. - */ - @Test - public void testSetOwnerThrowsForUnauthorisedUsers() throws Throwable { - Path testPath = new Path("/testSetOwnerNegative"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - final String owner; - UserGroupInformation unauthorisedUser = UserGroupInformation.createUserForTesting( - "unauthoriseduser", new String[] {"group1"}); - try { - fs.mkdirs(testPath); - ContractTestUtils.assertPathExists(fs, "test path does not exist", testPath); - owner = fs.getFileStatus(testPath).getOwner(); - - unauthorisedUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - try { - fs.setOwner(testPath, "newowner", null); - fail("Failing test because setOwner call was expected to throw"); - } catch (WasbAuthorizationException wex) { - // check that the owner is not modified - assertOwnerEquals(testPath, owner); - } - return null; - } - }); - } finally { - fs.delete(testPath, false); - } - } - - /** - * Test for setOwner when Authorization is enabled and - * the user is specified in chown allowed user list. - * */ - @Test - public void testSetOwnerSucceedsForAuthorisedUsers() throws Throwable { - - Path testPath = new Path("/testSetOwnerPositive"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - String newOwner = "user2"; - String newGroup = "newgroup"; - - UserGroupInformation authorisedUser = UserGroupInformation.createUserForTesting( - "user2", new String[]{"group1"}); - try { - - fs.mkdirs(testPath); - ContractTestUtils.assertPathExists(fs, "test path does not exist", testPath); - - String owner = fs.getFileStatus(testPath).getOwner(); - assumeThat(owner) - .as("changing owner requires original and new owner to be different") - .isNotEqualToIgnoringCase(newOwner); - - authorisedUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - fs.setOwner(testPath, newOwner, newGroup); - assertOwnerEquals(testPath, newOwner); - assertEquals(newGroup, fs.getFileStatus(testPath).getGroup()); - return null; - } - }); - - } finally { - fs.delete(testPath, false); - } - } - - /** - * Test for setOwner when Authorization is enabled and - * the userlist is specified as '*'. - * */ - @Test - public void testSetOwnerSucceedsForAnyUserWhenWildCardIsSpecified() throws Throwable { - fs.updateChownAllowedUsers(Collections.singletonList("*")); - final Path testPath = new Path("/testSetOwnerPositiveWildcard"); - - Configuration conf = fs.getConf(); - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - final String newOwner = "newowner"; - final String newGroup = "newgroup"; - - UserGroupInformation user = UserGroupInformation.createUserForTesting( - "anyuser", new String[]{"group1"}); - try { - - fs.mkdirs(testPath); - ContractTestUtils.assertPathExists(fs, "test path does not exist", testPath); - - String owner = fs.getFileStatus(testPath).getOwner(); - assumeThat(owner) - .as("changing owner requires original and new owner to be different") - .isNotEqualToIgnoringCase(newOwner); - - user.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - fs.setOwner(testPath, newOwner, newGroup); - assertOwnerEquals(testPath, newOwner); - assertEquals(newGroup, fs.getFileStatus(testPath).getGroup()); - return null; - } - }); - - } finally { - fs.delete(testPath, false); - } - } - - /** Test for setOwner throws for illegal setup of chown - * allowed testSetOwnerSucceedsForAuthorisedUsers. - */ - @Test - public void testSetOwnerFailsForIllegalSetup() throws Throwable { - fs.updateChownAllowedUsers(Arrays.asList("user1", "*")); - - final Path testPath = new Path("/testSetOwnerFailsForIllegalSetup"); - - Configuration conf = fs.getConf(); - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - UserGroupInformation user = UserGroupInformation.createUserForTesting( - "anyuser", new String[]{"group1"}); - try { - - fs.mkdirs(testPath); - ContractTestUtils.assertPathExists(fs, "test path does not exist", testPath); - - final String owner = fs.getFileStatus(testPath).getOwner(); - - user.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - try { - fs.setOwner(testPath, "newowner", null); - fail("Failing test because setOwner call was expected to throw"); - } catch (IllegalArgumentException iex) { - // check that the owner is not modified - assertOwnerEquals(testPath, owner); - } - return null; - } - }); - } finally { - fs.delete(testPath, false); - } - } - - /** Test to ensure that the internal RenamePending mechanism - * does not make authorization calls. - */ - @Test - public void testRenamePendingAuthorizationCalls() throws Throwable { - Path testPath = new Path("/testRenamePendingAuthorizationCalls"); - Path srcPath = new Path(testPath, "srcPath"); - Path dstPath = new Path(testPath, "dstPath"); - Path srcFilePath = new Path(srcPath, "file.txt"); - Path dstFilePath = new Path(dstPath, "file.txt"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - /* Remove nextline after fixing createInternal from FolderRenamePending */ - authorizer.addAuthRuleForOwner(testPath.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - - try { - fs.create(srcFilePath); - - String srcKey = fs.pathToKey(srcPath); - String dstKey = fs.pathToKey(dstPath); - - // Create a -RenamePendingFile - NativeAzureFileSystem.FolderRenamePending renamePending = - new NativeAzureFileSystem.FolderRenamePending(srcKey, dstKey, null, fs); - renamePending.writeFile(fs); - - // Initiate the pending-rename - fs.getFileStatus(srcPath); - } catch (FileNotFoundException fnfe) { - // This is expected because getFileStatus would complete the pending "rename" - // represented by the -RenamePending file. - GenericTestUtils.assertExceptionContains( - srcPath.toString() + ": No such file or directory.", fnfe - ); - - // The pending rename should have completed - ContractTestUtils.assertPathExists(fs, - "dstFilePath does not exist -- pending rename failed", dstFilePath); - } finally { - allowRecursiveDelete(fs, testPath.toString()); - fs.delete(testPath, true); - } - } - - /** - * Negative test for setPermission when Authorization is enabled. - */ - @Test - public void testSetPermissionThrowsForUnauthorisedUsers() throws Throwable { - //setPermission is called by a user who is not a daemon user - //and not chmodAllowedUsers and not owner of the file/folder. - //This test validates a authorization exception during setPermission call - testSetPermission("/testSetPermissionNegative", null, null, "unauthorizeduser", - true, false); - } - - /** - * Positive test for setPermission when Authorization is enabled. - */ - @Test - public void testSetPermissionForAuthorisedUsers() throws Throwable { - //user1 is already part of chmodAllowedUsers. - //This test validates the allowed user can do setPermission - testSetPermission("/testSetPermissionPositive", null, null, "user1", - false, false); - } - - /** - * Positive test for setPermission as owner when Authorization is enabled. - */ - @Test - public void testSetPermissionForOwner() throws Throwable { - //setPermission is called by the owner and expect a success - //during setPermission call - testSetPermission("/testSetPermissionPositiveOwner", - null, null, null, false, false); - } - - /** - * Test setPermission when wildcard is specified in allowed user list. - */ - @Test - public void testSetPermissionWhenWildCardInAllowedUserList() throws Throwable { - //Allow all to setPermission and expect a success - //during setPermission call - List chmodAllowedUsers = Collections.singletonList("*"); - - testSetPermission("/testSetPermissionWhenWildCardInAllowedUserList", - chmodAllowedUsers, null, "testuser", false, false); - } - - /** - * Test setPermission when invalid configuration value for allowed user list - * i.e. wildcard character and a username. - */ - @Test - public void testSetPermissionForInvalidAllowedUserList() throws Throwable { - //Setting up an invalid chmodAllowedUsers and expects a failure - //during setPermission call - List chmodAllowedUsers = Arrays.asList("*", "testuser"); - - testSetPermission("/testSetPermissionForInvalidAllowedUserList", - chmodAllowedUsers, null, "testuser", true, true); - } - - /** - * Test setPermission for a daemon user. - */ - @Test - public void testSetPermissionForDaemonUser() throws Throwable { - //hive user is already setup as daemon user. - //This test validates the daemon user can do setPermission - testSetPermission("/testSetPermissionForDaemonUser", null, - null, "hive", false, false); - } - - /** - * Test setPermission when invalid configuration value for daemon user list - * i.e. wildcard character and a daemon username. - */ - @Test - public void testSetPermissionForInvalidDaemonUserList() throws Throwable { - - List daemonUsers = Arrays.asList("*", "hive"); - - testSetPermission("/testSetPermissionForInvalidDaemonUserList", null, - daemonUsers, "testuser", true, true); - - } - - /** - * Test access when requested permissions match the existing permissions. - */ - @Test - public void testAccessWhenPermissionsMatchForAllAndReadWrite() throws Throwable { - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessWhenPermissionsMatchForAllAndReadWrite"); - - // For All and Read-Write FsAction. - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.mkdirs(testPath); - assertPathExists(fs, "test path does not exist", testPath); - fs.access(testPath, FsAction.ALL); - fs.access(testPath, FsAction.READ_WRITE); - } finally { - recursiveDelete(testPath); - } - - } - - /** - * Test access when Write and Write-Execute match the existing permissions. - * @throws Throwable - */ - @Test - public void testAccessWhenPermissionsMatchForWriteAndWriteExecute() throws Throwable { - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessWhenPermissionsMatchForWriteAndWriteExecute"); - // For Write and Write-Execute FsAction. - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.mkdirs(testPath); - assertPathExists(fs, "test path does not exist", testPath); - fs.access(testPath, FsAction.WRITE); - fs.access(testPath, FsAction.WRITE_EXECUTE); - } finally { - recursiveDelete(testPath); - } - } - - /** - * Test access when Read and Read-Execute match the existing permissions. - * @throws Throwable - */ - @Test - public void testAccessWhenPermissionsMatchForReadAndReadExecute() throws Throwable { - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessWhenPermissionsMatchForReadAndReadExecute"); - // For Read and Read-Execute FsAction. - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.mkdirs(testPath); - assertPathExists(fs, "test path does not exist", testPath); - fs.access(testPath, FsAction.READ); - fs.access(testPath, FsAction.READ_EXECUTE); - } finally { - recursiveDelete(testPath); - } - } - - /** - * Test access when Execute and None match the existing permissions. - * @throws Throwable - */ - @Test - public void testAccessWhenPermissionsMatchForExecuteAndNone() throws Throwable { - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessWhenPermissionsMatchForExecuteAndNone"); - // For Execute and None FsAction. - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.mkdirs(testPath); - assertPathExists(fs, "test path does not exist", testPath); - fs.access(testPath, FsAction.EXECUTE); - fs.access(testPath, FsAction.NONE); - } finally { - recursiveDelete(testPath); - } - } - - /** - * Test access when requested permissions do not match existing permissions. - */ - @Test - public void testAccessWhenPermissionsDoNotMatch() throws Throwable{ - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessWhenPermissionsDoNotMatch"); - - authorizer.init(conf); - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - fs.updateWasbAuthorizer(authorizer); - try { - fs.mkdirs(testPath); - assertPathExists(fs, "test path does not exist", testPath); - assertNoAccess(testPath, FsAction.ALL); - assertNoAccess(testPath, FsAction.WRITE); - assertNoAccess(testPath, FsAction.WRITE_EXECUTE); - } finally { - recursiveDelete(testPath); - } - } - - /** - * Test access when file does not exist and permissions match. - */ - @Test - public void testAccessFileDoesNotExist() throws Throwable{ - assertThrows(FileNotFoundException.class, () -> { - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessFileDoesNotExist"); - - authorizer.init(conf); - authorizer.addAuthRuleForOwner(testPath.toString(), READ, true); - authorizer.addAuthRuleForOwner(testPath.toString(), WRITE, true); - fs.updateWasbAuthorizer(authorizer); - assertPathDoesNotExist(fs, "test path exists", testPath); - fs.access(testPath, FsAction.ALL); - }); - } - - /** - * Test access when file does not exist and permissions do not match. - */ - @Test - public void testAccessFileDoesNotExistWhenNoAccessPermission() throws Throwable { - assertThrows(FileNotFoundException.class, () -> { - Configuration conf = fs.getConf(); - fs.setConf(conf); - final Path testPath = new Path("/testAccessFileDoesNotExistWhenNoAccessPermission"); - - authorizer.init(conf); - fs.updateWasbAuthorizer(authorizer); - assertPathDoesNotExist(fs, "test path exists", testPath); - fs.access(testPath, FsAction.ALL); - }); - } - - /** - * Test access for file and intermediate directory after creating - * file with intermediate directory. - */ - @Test - public void testAccessForFileAndIntermediateDirectoryCreated() throws Throwable { - Path parentDir = new Path("/testAccessDirectory"); - Path intermediateDir = new Path(parentDir, "intermediateDir"); - Path testPath = new Path(intermediateDir, "test.dat"); - - authorizer.addAuthRuleForOwner("/", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString(), WRITE, true); - - // Recursive owner Rule is to determine the permission on intermediate dirs. - authorizer.addAuthRuleForOwner(parentDir.toString()+"/*", WRITE, true); - authorizer.addAuthRuleForOwner(parentDir.toString()+"/*", READ, true); - - fs.updateWasbAuthorizer(authorizer); - - try { - fs.mkdirs(parentDir); - fs.create(testPath); - assertPathExists(fs, "testPath was not created", testPath); - fs.access(parentDir, FsAction.WRITE); - fs.access(parentDir, FsAction.WRITE_EXECUTE); - - fs.access(intermediateDir, FsAction.ALL); - fs.access(intermediateDir, FsAction.READ_WRITE); - - fs.access(testPath, FsAction.ALL); - fs.access(testPath, FsAction.READ_WRITE); - } finally { - recursiveDelete(testPath); - } - } - - /** - * Helper method to test setPermission scenarios. This method handles both positive - * and negative scenarios of setPermission tests - */ - private void testSetPermission(String path, - List chmodAllowedUsers, - List daemonUsers, - String user, - boolean isSetPermissionFailureCase, - boolean isInvalidSetup) throws Throwable { - - final FsPermission filePermission; - - final Path testPath = new Path(path); - final FsPermission newPermission = new FsPermission(FULL_PERMISSION_WITH_STICKYBIT); - authorizer.addAuthRule("/", WRITE, getCurrentUserShortName(), true); - fs.updateWasbAuthorizer(authorizer); - - if (chmodAllowedUsers != null && !chmodAllowedUsers.isEmpty()) { - fs.updateChmodAllowedUsers(chmodAllowedUsers); - } - - if (daemonUsers != null && !daemonUsers.isEmpty()) { - fs.updateDaemonUsers(daemonUsers); - } - - UserGroupInformation testUser = (user != null) ? UserGroupInformation.createUserForTesting( - user, new String[] {"testgrp"}) : null; - try { - fs.mkdirs(testPath); - ContractTestUtils.assertPathExists(fs, "test path does not exist", - testPath); - filePermission = fs.getFileStatus(testPath).getPermission(); - - if (isSetPermissionFailureCase) { - executeSetPermissionFailure(testUser, testPath, filePermission, - newPermission, isInvalidSetup); - } else { - executeSetPermissionSuccess(testUser, testPath, filePermission, - newPermission); - } - - } finally { - fs.delete(testPath, false); - } - } - - /** - * This method expects a failure while invoking setPermission call - * and validates whether the failure is as expected - * - */ - private void executeSetPermissionFailure(UserGroupInformation testUser, - Path testPath, FsPermission oldPermission, FsPermission newPermission, - boolean isInvalidSetup) - throws Throwable { - testUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - try { - //READ access required for getFileStatus - fs.setPermission(testPath, newPermission); - fail("Failing test because setPermission was expected to throw"); - - } catch (IllegalArgumentException iex) { - if (!isInvalidSetup) { - //fail if IllegalArgumentException is not expected - fail("Failing test because IllegalArgumentException" - + " is not expected to throw"); - } - // check that the file permission is not modified. - assertPermissionEquals(testPath, oldPermission); - } catch (WasbAuthorizationException wex) { - if (isInvalidSetup) { - //fail if WasbAuthorizationException is not expected - fail("Failing test because WasbAuthorizationException" - + " is not expected to throw"); - } - // check that the file permission is not modified. - assertPermissionEquals(testPath, oldPermission); - } - return null; - } - }); - } - - /** - * This method expects a success while invoking setPermission call - * and validates whether the new permissions are set - * - */ - private void executeSetPermissionSuccess(UserGroupInformation testUser, - Path testPath, FsPermission oldPermission, FsPermission newPermission) - throws Throwable { - //If user is given, then use doAs - if (testUser != null) { - testUser.doAs(new PrivilegedExceptionAction() { - @Override - public Void run() throws Exception { - fs.setPermission(testPath, newPermission); - return null; - } - }); - } else { - //If user is not given, then run in current user context - fs.setPermission(testPath, newPermission); - } - - // check that the file permission is modified - assertPermissionEquals(testPath, newPermission); - // check old permission is not equals to new permission - assertNotEquals(newPermission, oldPermission); - } - - private void assertPermissionEquals(Path path, FsPermission newPermission) - throws IOException { - FileStatus status = fs.getFileStatus(path); - assertEquals(newPermission, status.getPermission(), - "Wrong permissions in " + status); - } - - private void assertOwnerEquals(Path path, String owner) throws IOException { - FileStatus status = fs.getFileStatus(path); - assertEquals(owner, status.getOwner(), "Wrong owner in " + status); - } - - private void assertNoAccess(final Path path, final FsAction action) - throws Exception { - LambdaTestUtils.intercept(AccessControlException.class, - new Callable() { - @Override - public String call() throws Exception { - fs.access(path, action); - return "Access granted to " + path + " for action " + action; - } - } - ); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockCompaction.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockCompaction.java deleted file mode 100644 index 408e850a372bc..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemBlockCompaction.java +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import com.microsoft.azure.storage.blob.BlockEntry; -import org.apache.commons.lang3.RandomStringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -import java.io.ByteArrayOutputStream; -import java.io.OutputStream; -import java.net.URI; -import java.util.List; - -/** - * Test class that runs WASB block compaction process for block blobs. - */ - -public class TestNativeAzureFileSystemBlockCompaction extends AbstractWasbTestBase { - - private static final String TEST_FILE = "/user/active/test.dat"; - private static final Path TEST_PATH = new Path(TEST_FILE); - - private static final String TEST_FILE_NORMAL = "/user/normal/test.dat"; - private static final Path TEST_PATH_NORMAL = new Path(TEST_FILE_NORMAL); - - private AzureBlobStorageTestAccount testAccount = null; - - @BeforeEach - public void setUp() throws Exception { - super.setUp(); - testAccount = createTestAccount(); - fs = testAccount.getFileSystem(); - Configuration conf = fs.getConf(); - conf.setBoolean(NativeAzureFileSystem.APPEND_SUPPORT_ENABLE_PROPERTY_NAME, true); - conf.set(AzureNativeFileSystemStore.KEY_BLOCK_BLOB_WITH_COMPACTION_DIRECTORIES, "/user/active"); - URI uri = fs.getUri(); - fs.initialize(uri, conf); - } - - /* - * Helper method that creates test data of size provided by the - * "size" parameter. - */ - private static byte[] getTestData(int size) { - byte[] testData = new byte[size]; - System.arraycopy(RandomStringUtils.randomAlphabetic(size).getBytes(), 0, testData, 0, size); - return testData; - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - private BlockBlobAppendStream getBlockBlobAppendStream(FSDataOutputStream appendStream) { - SyncableDataOutputStream dataOutputStream = null; - - if (appendStream.getWrappedStream() instanceof NativeAzureFileSystem.NativeAzureFsOutputStream) { - NativeAzureFileSystem.NativeAzureFsOutputStream fsOutputStream = - (NativeAzureFileSystem.NativeAzureFsOutputStream) appendStream.getWrappedStream(); - - dataOutputStream = (SyncableDataOutputStream) fsOutputStream.getOutStream(); - } - - if (appendStream.getWrappedStream() instanceof SyncableDataOutputStream) { - dataOutputStream = (SyncableDataOutputStream) appendStream.getWrappedStream(); - } - - assertNotNull( - dataOutputStream, "Did not recognize " + dataOutputStream); - - return (BlockBlobAppendStream) dataOutputStream.getOutStream(); - } - - private void verifyBlockList(BlockBlobAppendStream blockBlobStream, - int[] testData) throws Throwable { - List blockList = blockBlobStream.getBlockList(); - assertEquals(testData.length, blockList.size(), "Block list length"); - - int i = 0; - for (BlockEntry block: blockList) { - assertTrue(block.getSize() == testData[i++]); - } - } - - private void appendBlockList(FSDataOutputStream fsStream, - ByteArrayOutputStream memStream, - int[] testData) throws Throwable { - - for (int d: testData) { - byte[] data = getTestData(d); - memStream.write(data); - fsStream.write(data); - } - fsStream.hflush(); - } - - @Test - public void testCompactionDisabled() throws Throwable { - - try (FSDataOutputStream appendStream = fs.create(TEST_PATH_NORMAL)) { - - // testing new file - - SyncableDataOutputStream dataOutputStream = null; - - OutputStream wrappedStream = appendStream.getWrappedStream(); - if (wrappedStream instanceof NativeAzureFileSystem.NativeAzureFsOutputStream) { - NativeAzureFileSystem.NativeAzureFsOutputStream fsOutputStream = - (NativeAzureFileSystem.NativeAzureFsOutputStream) wrappedStream; - - dataOutputStream = (SyncableDataOutputStream) fsOutputStream.getOutStream(); - } else if (wrappedStream instanceof SyncableDataOutputStream) { - dataOutputStream = (SyncableDataOutputStream) wrappedStream; - } else { - fail("Unable to determine type of " + wrappedStream - + " class of " + wrappedStream.getClass()); - } - - assertFalse(dataOutputStream.getOutStream() instanceof BlockBlobAppendStream, - "Data output stream is a BlockBlobAppendStream: " - + dataOutputStream); - - } - } - - @Test - public void testCompaction() throws Throwable { - - final int n2 = 2; - final int n4 = 4; - final int n10 = 10; - final int n12 = 12; - final int n14 = 14; - final int n16 = 16; - - final int maxBlockSize = 16; - final int compactionBlockCount = 4; - - ByteArrayOutputStream memStream = new ByteArrayOutputStream(); - - try (FSDataOutputStream appendStream = fs.create(TEST_PATH)) { - - // test new file - - BlockBlobAppendStream blockBlobStream = getBlockBlobAppendStream(appendStream); - blockBlobStream.setMaxBlockSize(maxBlockSize); - blockBlobStream.setCompactionBlockCount(compactionBlockCount); - - appendBlockList(appendStream, memStream, new int[]{n2}); - verifyBlockList(blockBlobStream, new int[]{n2}); - - appendStream.hflush(); - verifyBlockList(blockBlobStream, new int[]{n2}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n2, n4}); - - appendStream.hsync(); - verifyBlockList(blockBlobStream, new int[]{n2, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n2, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n2, n4, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n14, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n14, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n14, n4, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n2, n4, n4}); - verifyBlockList(blockBlobStream, new int[]{n14, n12, n10}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, new int[]{n14, n12, n10, n4}); - - appendBlockList(appendStream, memStream, - new int[]{n4, n4, n4, n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16}); - - appendBlockList(appendStream, memStream, - new int[]{n4, n4, n4, n4, n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n4}); - - appendBlockList(appendStream, memStream, - new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n4, n4}); - - appendBlockList(appendStream, memStream, - new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n4, n4, n4}); - - appendBlockList(appendStream, memStream, - new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n4, n4, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - - appendStream.close(); - - ContractTestUtils.verifyFileContents(fs, TEST_PATH, memStream.toByteArray()); - } - - try (FSDataOutputStream appendStream = fs.append(TEST_PATH)) { - - // test existing file - - BlockBlobAppendStream blockBlobStream = getBlockBlobAppendStream(appendStream); - blockBlobStream.setMaxBlockSize(maxBlockSize); - blockBlobStream.setCompactionBlockCount(compactionBlockCount); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n16, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n16, n4, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n16, n4, n4, n4, n4}); - - appendBlockList(appendStream, memStream, new int[]{n4}); - verifyBlockList(blockBlobStream, - new int[]{n14, n12, n14, n16, n16, n16, n16, n4}); - - appendStream.close(); - - ContractTestUtils.verifyFileContents(fs, TEST_PATH, memStream.toByteArray()); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java deleted file mode 100644 index a5bd553839786..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemConcurrency.java +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.OutputStream; -import java.io.PrintWriter; -import java.io.StringWriter; -import java.net.URLDecoder; -import java.util.HashMap; -import java.util.Iterator; -import java.util.concurrent.ConcurrentLinkedQueue; - -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.util.StringUtils; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class TestNativeAzureFileSystemConcurrency extends AbstractWasbTestBase { - private InMemoryBlockBlobStore backingStore; - - @Override - @BeforeEach - public void setUp() throws Exception { - super.setUp(); - backingStore = getTestAccount().getMockStorage().getBackingStore(); - } - - @Override - public void tearDown() throws Exception { - super.tearDown(); - backingStore = null; - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.createMock(); - } - - @Test - public void testLinkBlobs() throws Exception { - Path filePath = new Path("/inProgress"); - FSDataOutputStream outputStream = fs.create(filePath); - // Since the stream is still open, we should see an empty link - // blob in the backing store linking to the temporary file. - HashMap metadata = backingStore - .getMetadata(AzureBlobStorageTestAccount.toMockUri(filePath)); - assertNotNull(metadata); - String linkValue = metadata.get(AzureNativeFileSystemStore.LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY); - linkValue = URLDecoder.decode(linkValue, "UTF-8"); - assertNotNull(linkValue); - assertTrue(backingStore.exists(AzureBlobStorageTestAccount - .toMockUri(linkValue))); - // Also, WASB should say the file exists now even before we close the - // stream. - assertTrue(fs.exists(filePath)); - outputStream.close(); - // Now there should be no link metadata on the final file. - metadata = backingStore.getMetadata(AzureBlobStorageTestAccount - .toMockUri(filePath)); - assertNull(metadata - .get(AzureNativeFileSystemStore.LINK_BACK_TO_UPLOAD_IN_PROGRESS_METADATA_KEY)); - } - - private static String toString(FileStatus[] list) { - String[] asStrings = new String[list.length]; - for (int i = 0; i < list.length; i++) { - asStrings[i] = list[i].getPath().toString(); - } - return StringUtils.join(",", asStrings); - } - - /** - * Test to make sure that we don't expose the temporary upload folder when - * listing at the root. - */ - @Test - public void testNoTempBlobsVisible() throws Exception { - Path filePath = new Path("/inProgress"); - FSDataOutputStream outputStream = fs.create(filePath); - // Make sure I can't see the temporary blob if I ask for a listing - FileStatus[] listOfRoot = fs.listStatus(new Path("/")); - assertEquals(1, listOfRoot.length, "Expected one file listed, instead got: " - + toString(listOfRoot)); - assertEquals(fs.makeQualified(filePath), listOfRoot[0].getPath()); - outputStream.close(); - } - - /** - * Converts a collection of exceptions to a collection of strings by getting - * the stack trace on every exception. - */ - private static Iterable selectToString( - final Iterable collection) { - return new Iterable() { - @Override - public Iterator iterator() { - final Iterator exceptionIterator = collection.iterator(); - return new Iterator() { - @Override - public boolean hasNext() { - return exceptionIterator.hasNext(); - } - - @Override - public String next() { - StringWriter stringWriter = new StringWriter(); - PrintWriter printWriter = new PrintWriter(stringWriter); - exceptionIterator.next().printStackTrace(printWriter); - printWriter.close(); - return stringWriter.toString(); - } - - @Override - public void remove() { - exceptionIterator.remove(); - } - }; - } - }; - } - - /** - * Tests running starting multiple threads all doing various File system - * operations against the same FS. - */ - @Test - public void testMultiThreadedOperation() throws Exception { - for (int iter = 0; iter < 10; iter++) { - final int numThreads = 20; - Thread[] threads = new Thread[numThreads]; - final ConcurrentLinkedQueue exceptionsEncountered = new ConcurrentLinkedQueue(); - for (int i = 0; i < numThreads; i++) { - final Path threadLocalFile = new Path("/myFile" + i); - threads[i] = new SubjectInheritingThread(new Runnable() { - @Override - public void run() { - try { - assertTrue(!fs.exists(threadLocalFile)); - OutputStream output = fs.create(threadLocalFile); - output.write(5); - output.close(); - assertTrue(fs.exists(threadLocalFile)); - assertTrue(fs.listStatus(new Path("/")).length > 0); - } catch (Throwable ex) { - exceptionsEncountered.add(ex); - } - } - }); - } - for (Thread t : threads) { - t.start(); - } - for (Thread t : threads) { - t.join(); - } - assertTrue(exceptionsEncountered.isEmpty(), "Encountered exceptions: " - + StringUtils.join("\r\n", selectToString(exceptionsEncountered))); - tearDown(); - setUp(); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java deleted file mode 100644 index b773379e9572c..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemContractMocked.java +++ /dev/null @@ -1,66 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.fs.FileSystemContractBaseTest; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * Mocked testing of FileSystemContractBaseTest. - */ -public class TestNativeAzureFileSystemContractMocked extends - FileSystemContractBaseTest { - - @BeforeEach - public void setUp() throws Exception { - fs = AzureBlobStorageTestAccount.createMock().getFileSystem(); - } - - /** - * The following tests are failing on Azure and the Azure - * file system code needs to be modified to make them pass. - * A separate work item has been opened for this. - */ - @Disabled - @Test - public void testMoveFileUnderParent() throws Throwable { - } - - @Disabled - @Test - public void testRenameFileToSelf() throws Throwable { - } - - @Disabled - @Test - public void testRenameChildDirForbidden() throws Exception { - } - - @Disabled - @Test - public void testMoveDirUnderParent() throws Throwable { - } - - @Disabled - @Test - public void testRenameDirToSelf() throws Throwable { - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java deleted file mode 100644 index 9b0df856cf018..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemFileNameCheck.java +++ /dev/null @@ -1,131 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.util.HashMap; - -import org.apache.hadoop.fs.Path; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * Tests the scenario where a colon is included in the file/directory name. - * - * NativeAzureFileSystem#create(), #mkdir(), and #rename() disallow the - * creation/rename of files/directories through WASB that have colons in the - * names. - */ -public class TestNativeAzureFileSystemFileNameCheck extends AbstractWasbTestBase { - private String root = null; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - root = fs.getUri().toString(); - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.createMock(); - } - - @Test - public void testCreate() throws Exception { - // positive test - Path testFile1 = new Path(root + "/testFile1"); - assertTrue(fs.createNewFile(testFile1)); - - // negative test - Path testFile2 = new Path(root + "/testFile2:2"); - try { - fs.createNewFile(testFile2); - fail("Should've thrown."); - } catch (IOException e) { // ignore - } - } - - @Test - public void testRename() throws Exception { - // positive test - Path testFile1 = new Path(root + "/testFile1"); - assertTrue(fs.createNewFile(testFile1)); - Path testFile2 = new Path(root + "/testFile2"); - fs.rename(testFile1, testFile2); - assertTrue(!fs.exists(testFile1) && fs.exists(testFile2)); - - // negative test - Path testFile3 = new Path(root + "/testFile3:3"); - try { - fs.rename(testFile2, testFile3); - fail("Should've thrown."); - } catch (IOException e) { // ignore - } - assertTrue(fs.exists(testFile2)); - } - - @Test - public void testMkdirs() throws Exception { - // positive test - Path testFolder1 = new Path(root + "/testFolder1"); - assertTrue(fs.mkdirs(testFolder1)); - - // negative test - Path testFolder2 = new Path(root + "/testFolder2:2"); - try { - assertTrue(fs.mkdirs(testFolder2)); - fail("Should've thrown."); - } catch (IOException e) { // ignore - } - } - - @Test - public void testWasbFsck() throws Exception { - // positive test - Path testFolder1 = new Path(root + "/testFolder1"); - assertTrue(fs.mkdirs(testFolder1)); - Path testFolder2 = new Path(testFolder1, "testFolder2"); - assertTrue(fs.mkdirs(testFolder2)); - Path testFolder3 = new Path(testFolder1, "testFolder3"); - assertTrue(fs.mkdirs(testFolder3)); - Path testFile1 = new Path(testFolder2, "testFile1"); - assertTrue(fs.createNewFile(testFile1)); - Path testFile2 = new Path(testFolder1, "testFile2"); - assertTrue(fs.createNewFile(testFile2)); - assertFalse(runWasbFsck(testFolder1)); - - // negative test - InMemoryBlockBlobStore backingStore - = testAccount.getMockStorage().getBackingStore(); - backingStore.setContent( - AzureBlobStorageTestAccount.toMockUri("testFolder1/testFolder2/test2:2"), - new byte[] { 1, 2 }, - new HashMap(), false, 0); - assertTrue(runWasbFsck(testFolder1)); - } - - private boolean runWasbFsck(Path p) throws Exception { - WasbFsck fsck = new WasbFsck(fs.getConf()); - fsck.setMockFileSystemForTesting(fs); - fsck.run(new String[] { p.toString() }); - return fsck.getPathNameWarning(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java deleted file mode 100644 index a8f9a661f591d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemMocked.java +++ /dev/null @@ -1,67 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.junit.jupiter.api.Disabled; -import java.io.IOException; - -/** - * Run {@link NativeAzureFileSystemBaseTest} tests against a mocked store, - * skipping tests of unsupported features - */ -public class TestNativeAzureFileSystemMocked extends - NativeAzureFileSystemBaseTest { - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.createMock(); - } - - // Ignore the following tests because taking a lease requires a real - // (not mock) file system store. These tests don't work on the mock. - @Override - @Disabled - public void testLeaseAsDistributedLock() { - } - - @Override - @Disabled - public void testSelfRenewingLease() { - } - - @Override - @Disabled - public void testRedoFolderRenameAll() { - } - - @Override - @Disabled - public void testCreateNonRecursive() { - } - - @Override - @Disabled - public void testSelfRenewingLeaseFileDelete() { - } - - @Override - @Disabled - public void testRenameRedoFolderAlreadyDone() throws IOException{ - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemOperationsMocked.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemOperationsMocked.java deleted file mode 100644 index f70787e366442..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemOperationsMocked.java +++ /dev/null @@ -1,89 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; - -import org.apache.hadoop.fs.FSMainOperationsBaseTest; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.BeforeEach; - -public class TestNativeAzureFileSystemOperationsMocked extends - FSMainOperationsBaseTest { - - private static final String TEST_ROOT_DIR = - "/tmp/TestNativeAzureFileSystemOperationsMocked"; - - public TestNativeAzureFileSystemOperationsMocked (){ - super(TEST_ROOT_DIR); - } - - @BeforeEach - @Override - public void setUp() throws Exception { - fSys = AzureBlobStorageTestAccount.createMock().getFileSystem(); - } - - @Override - protected FileSystem createFileSystem() throws Exception { - return AzureBlobStorageTestAccount.createMock().getFileSystem(); - } - - public void testListStatusThrowsExceptionForUnreadableDir() throws Exception { - System.out - .println("Skipping testListStatusThrowsExceptionForUnreadableDir since WASB" - + " doesn't honor directory permissions."); - assumeNotWindows(); - } - - @Override - public void testGlobStatusThrowsExceptionForUnreadableDir() - throws Exception { - System.out.println( - "Skipping testGlobStatusThrowsExceptionForUnreadableDir since WASB" - + " doesn't honor directory permissions."); - assumeNotWindows(); - } - - @Override - public String getTestRootDir() { - return TEST_ROOT_DIR; - } - - @Override - public Path getTestRootPath(FileSystem fSys) { - return fSys.makeQualified(new Path(TEST_ROOT_DIR)); - } - - @Override - public Path getTestRootPath(FileSystem fSys, String pathString) { - return fSys.makeQualified(new Path(TEST_ROOT_DIR, pathString)); - } - - @Override - public Path getAbsoluteTestRootPath(FileSystem fSys) { - Path testRootPath = new Path(TEST_ROOT_DIR); - if (testRootPath.isAbsolute()) { - return testRootPath; - } else { - return new Path(fSys.getWorkingDirectory(), TEST_ROOT_DIR); - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java deleted file mode 100644 index 8ff14c0b5a45d..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestNativeAzureFileSystemUploadLogic.java +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.OutputStream; - -import org.apache.hadoop.fs.Path; - -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * Tests for the upload, buffering and flush logic in WASB. - */ -public class TestNativeAzureFileSystemUploadLogic extends AbstractWasbTestBase { - - // Just an arbitrary number so that the values I write have a predictable - // pattern: 0, 1, 2, .. , 45, 46, 0, 1, 2, ... - static final int byteValuePeriod = 47; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.createMock(); - } - - /** - * Various scenarios to test in how often we flush data while uploading. - */ - private enum FlushFrequencyVariation { - /** - * Flush before even a single in-memory buffer is full. - */ - BeforeSingleBufferFull, - /** - * Flush after a single in-memory buffer is full. - */ - AfterSingleBufferFull, - /** - * Flush after all the in-memory buffers got full and were - * automatically flushed to the backing store. - */ - AfterAllRingBufferFull, - } - - /** - * Tests that we upload consistently if we flush after every little - * bit of data. - */ - @Test - @Disabled - /* flush() no longer does anything. @@TODO: implement a force-flush and reinstate this test */ - public void testConsistencyAfterSmallFlushes() throws Exception { - testConsistencyAfterManyFlushes(FlushFrequencyVariation.BeforeSingleBufferFull); - } - - /** - * Tests that we upload consistently if we flush after every medium-sized - * bit of data. - */ - @Test - @Disabled - /* flush() no longer does anything. @@TODO: implement a force-flush and reinstate this test */ - public void testConsistencyAfterMediumFlushes() throws Exception { - testConsistencyAfterManyFlushes(FlushFrequencyVariation.AfterSingleBufferFull); - } - - /** - * Tests that we upload consistently if we flush after every large chunk - * of data. - */ - @Test - @Disabled - /* flush() no longer does anything. @@TODO: implement a force-flush and reinstate this test */ - public void testConsistencyAfterLargeFlushes() throws Exception { - testConsistencyAfterManyFlushes(FlushFrequencyVariation.AfterAllRingBufferFull); - } - - /** - * Makes sure the data in the given input is what I'd expect. - * @param inStream The input stream. - * @param expectedSize The expected size of the data in there. - */ - private void assertDataInStream(InputStream inStream, int expectedSize) - throws Exception { - int byteRead; - int countBytes = 0; - while ((byteRead = inStream.read()) != -1) { - assertEquals(countBytes % byteValuePeriod, byteRead); - countBytes++; - } - assertEquals(expectedSize, countBytes); - } - - /** - * Checks that the data in the given file is what I'd expect. - * @param file The file to check. - * @param expectedSize The expected size of the data in there. - */ - private void assertDataInFile(Path file, int expectedSize) throws Exception { - try(InputStream inStream = getFileSystem().open(file)) { - assertDataInStream(inStream, expectedSize); - } - } - - /** - * Checks that the data in the current temporary upload blob - * is what I'd expect. - * @param expectedSize The expected size of the data in there. - */ - private void assertDataInTempBlob(int expectedSize) throws Exception { - // Look for the temporary upload blob in the backing store. - InMemoryBlockBlobStore backingStore = - getTestAccount().getMockStorage().getBackingStore(); - String tempKey = null; - for (String key : backingStore.getKeys()) { - if (key.contains(NativeAzureFileSystem.AZURE_TEMP_FOLDER)) { - // Assume this is the one we're looking for. - tempKey = key; - break; - } - } - assertNotNull(tempKey); - try (InputStream inStream = new ByteArrayInputStream( - backingStore.getContent(tempKey))) { - assertDataInStream(inStream, expectedSize); - } - } - - /** - * Tests the given scenario for uploading a file while flushing - * periodically and making sure the data is always consistent - * with what I'd expect. - * @param variation The variation/scenario to test. - */ - private void testConsistencyAfterManyFlushes(FlushFrequencyVariation variation) - throws Exception { - Path uploadedFile = methodPath(); - try { - OutputStream outStream = getFileSystem().create(uploadedFile); - final int totalSize = 9123; - int flushPeriod; - switch (variation) { - case BeforeSingleBufferFull: flushPeriod = 300; break; - case AfterSingleBufferFull: flushPeriod = 600; break; - case AfterAllRingBufferFull: flushPeriod = 1600; break; - default: - throw new IllegalArgumentException("Unknown variation: " + variation); - } - for (int i = 0; i < totalSize; i++) { - outStream.write(i % byteValuePeriod); - if ((i + 1) % flushPeriod == 0) { - outStream.flush(); - assertDataInTempBlob(i + 1); - } - } - outStream.close(); - assertDataInFile(uploadedFile, totalSize); - } finally { - getFileSystem().delete(uploadedFile, false); - - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java deleted file mode 100644 index bc6c70553f6b2..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestOutOfBandAzureBlobOperations.java +++ /dev/null @@ -1,170 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import java.util.HashMap; - -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.permission.FsPermission; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -/** - * Tests that WASB handles things gracefully when users add blobs to the Azure - * Storage container from outside WASB's control. - */ -public class TestOutOfBandAzureBlobOperations - extends AbstractWasbTestWithTimeout { - private AzureBlobStorageTestAccount testAccount; - private FileSystem fs; - private InMemoryBlockBlobStore backingStore; - - @BeforeEach - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createMock(); - fs = testAccount.getFileSystem(); - backingStore = testAccount.getMockStorage().getBackingStore(); - } - - @AfterEach - public void tearDown() throws Exception { - testAccount.cleanup(); - fs = null; - backingStore = null; - } - - private void createEmptyBlobOutOfBand(String path) { - backingStore.setContent( - AzureBlobStorageTestAccount.toMockUri(path), - new byte[] { 1, 2 }, - new HashMap(), - false, 0); - } - - @SuppressWarnings("deprecation") - @Test - public void testImplicitFolderListed() throws Exception { - createEmptyBlobOutOfBand("root/b"); - - // List the blob itself. - FileStatus[] obtained = fs.listStatus(new Path("/root/b")); - assertNotNull(obtained); - assertEquals(1, obtained.length); - assertFalse(obtained[0].isDirectory()); - assertEquals("/root/b", obtained[0].getPath().toUri().getPath()); - - // List the directory - obtained = fs.listStatus(new Path("/root")); - assertNotNull(obtained); - assertEquals(1, obtained.length); - assertFalse(obtained[0].isDirectory()); - assertEquals("/root/b", obtained[0].getPath().toUri().getPath()); - - // Get the directory's file status - FileStatus dirStatus = fs.getFileStatus(new Path("/root")); - assertNotNull(dirStatus); - assertTrue(dirStatus.isDirectory()); - assertEquals("/root", dirStatus.getPath().toUri().getPath()); - } - - @Test - public void testImplicitFolderDeleted() throws Exception { - createEmptyBlobOutOfBand("root/b"); - assertTrue(fs.exists(new Path("/root"))); - assertTrue(fs.delete(new Path("/root"), true)); - assertFalse(fs.exists(new Path("/root"))); - } - - @Test - public void testFileInImplicitFolderDeleted() throws Exception { - createEmptyBlobOutOfBand("root/b"); - assertTrue(fs.exists(new Path("/root"))); - assertTrue(fs.delete(new Path("/root/b"), true)); - assertTrue(fs.exists(new Path("/root"))); - } - - @SuppressWarnings("deprecation") - @Test - public void testFileAndImplicitFolderSameName() throws Exception { - createEmptyBlobOutOfBand("root/b"); - createEmptyBlobOutOfBand("root/b/c"); - FileStatus[] listResult = fs.listStatus(new Path("/root/b")); - // File should win. - assertEquals(1, listResult.length); - assertFalse(listResult[0].isDirectory()); - try { - // Trying to delete root/b/c would cause a dilemma for WASB, so - // it should throw. - fs.delete(new Path("/root/b/c"), true); - assertTrue(false, "Should've thrown."); - } catch (AzureException e) { - assertEquals("File /root/b/c has a parent directory /root/b" - + " which is also a file. Can't resolve.", e.getMessage()); - } - } - - private static enum DeepCreateTestVariation { - File, Folder - }; - - /** - * Tests that when we create the file (or folder) x/y/z, we also create - * explicit folder blobs for x and x/y - */ - @Test - public void testCreatingDeepFileCreatesExplicitFolder() throws Exception { - for (DeepCreateTestVariation variation : DeepCreateTestVariation.values()) { - switch (variation) { - case File: - assertTrue(fs.createNewFile(new Path("/x/y/z"))); - break; - case Folder: - assertTrue(fs.mkdirs(new Path("/x/y/z"))); - break; - } - assertTrue(backingStore - .exists(AzureBlobStorageTestAccount.toMockUri("x"))); - assertTrue(backingStore.exists(AzureBlobStorageTestAccount - .toMockUri("x/y"))); - fs.delete(new Path("/x"), true); - } - } - - @Test - public void testSetPermissionOnImplicitFolder() throws Exception { - createEmptyBlobOutOfBand("root/b"); - FsPermission newPermission = new FsPermission((short) 0600); - fs.setPermission(new Path("/root"), newPermission); - FileStatus newStatus = fs.getFileStatus(new Path("/root")); - assertNotNull(newStatus); - assertEquals(newPermission, newStatus.getPermission()); - } - - @Test - public void testSetOwnerOnImplicitFolder() throws Exception { - createEmptyBlobOutOfBand("root/b"); - fs.setOwner(new Path("/root"), "newOwner", null); - FileStatus newStatus = fs.getFileStatus(new Path("/root")); - assertNotNull(newStatus); - assertEquals("newOwner", newStatus.getOwner()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java deleted file mode 100644 index 6cc2da18c64ef..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestShellDecryptionKeyProvider.java +++ /dev/null @@ -1,81 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import static org.apache.hadoop.test.PlatformAssumptions.assumeWindows; - -import java.io.File; -import java.nio.charset.StandardCharsets; - -import org.apache.commons.io.FileUtils; -import org.apache.hadoop.conf.Configuration; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Windows only tests of shell scripts to provide decryption keys. - */ -public class TestShellDecryptionKeyProvider - extends AbstractWasbTestWithTimeout { - public static final Logger LOG = LoggerFactory - .getLogger(TestShellDecryptionKeyProvider.class); - private static File TEST_ROOT_DIR = new File(System.getProperty( - "test.build.data", "/tmp"), "TestShellDecryptionKeyProvider"); - - @Test - public void testScriptPathNotSpecified() throws Exception { - assumeWindows(); - ShellDecryptionKeyProvider provider = new ShellDecryptionKeyProvider(); - Configuration conf = new Configuration(); - String account = "testacct"; - String key = "key"; - - conf.set(SimpleKeyProvider.KEY_ACCOUNT_KEY_PREFIX + account, key); - try { - provider.getStorageAccountKey(account, conf); - fail("fs.azure.shellkeyprovider.script is not specified, we should throw"); - } catch (KeyProviderException e) { - LOG.info("Received an expected exception: " + e.getMessage()); - } - } - - @Test - public void testValidScript() throws Exception { - assumeWindows(); - String expectedResult = "decretedKey"; - - // Create a simple script which echoes the given key plus the given - // expected result (so that we validate both script input and output) - File scriptFile = new File(TEST_ROOT_DIR, "testScript.cmd"); - FileUtils.writeStringToFile(scriptFile, "@echo %1 " + expectedResult, - StandardCharsets.UTF_8); - - ShellDecryptionKeyProvider provider = new ShellDecryptionKeyProvider(); - Configuration conf = new Configuration(); - String account = "testacct"; - String key = "key1"; - conf.set(SimpleKeyProvider.KEY_ACCOUNT_KEY_PREFIX + account, key); - conf.set(ShellDecryptionKeyProvider.KEY_ACCOUNT_SHELLKEYPROVIDER_SCRIPT, - "cmd /c " + scriptFile.getAbsolutePath()); - - String result = provider.getStorageAccountKey(account, conf); - assertEquals(key + " " + expectedResult, result); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestSyncableDataOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestSyncableDataOutputStream.java deleted file mode 100644 index b594ce955ec18..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestSyncableDataOutputStream.java +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.azure; - -import java.io.IOException; -import java.io.OutputStream; - -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.test.LambdaTestUtils; - -public class TestSyncableDataOutputStream { - - @Test - public void testCloseWhenFlushThrowingIOException() throws Exception { - MockOutputStream out = new MockOutputStream(); - SyncableDataOutputStream sdos = new SyncableDataOutputStream(out); - out.flushThrowIOE = true; - LambdaTestUtils.intercept(IOException.class, "An IOE from flush", () -> sdos.close()); - MockOutputStream out2 = new MockOutputStream(); - out2.flushThrowIOE = true; - LambdaTestUtils.intercept(IOException.class, "An IOE from flush", () -> { - try (SyncableDataOutputStream sdos2 = new SyncableDataOutputStream(out2)) { - } - }); - } - - private static class MockOutputStream extends OutputStream { - - private boolean flushThrowIOE = false; - private IOException lastException = null; - - @Override - public void write(int arg0) throws IOException { - - } - - @Override - public void flush() throws IOException { - if (this.flushThrowIOE) { - this.lastException = new IOException("An IOE from flush"); - throw this.lastException; - } - } - - @Override - public void close() throws IOException { - if (this.lastException != null) { - throw this.lastException; - } - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java deleted file mode 100644 index fab77bc3e9982..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbFsck.java +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; - -/** - * Tests which look at fsck recovery. - */ -public class TestWasbFsck extends AbstractWasbTestWithTimeout { - private AzureBlobStorageTestAccount testAccount; - private FileSystem fs; - private InMemoryBlockBlobStore backingStore; - - @BeforeEach - public void setUp() throws Exception { - testAccount = AzureBlobStorageTestAccount.createMock(); - fs = testAccount.getFileSystem(); - backingStore = testAccount.getMockStorage().getBackingStore(); - } - - @AfterEach - public void tearDown() throws Exception { - testAccount.cleanup(); - fs = null; - backingStore = null; - } - - /** - * Counts the number of temporary blobs in the backing store. - */ - private int getNumTempBlobs() { - int count = 0; - for (String key : backingStore.getKeys()) { - if (key.contains(NativeAzureFileSystem.AZURE_TEMP_FOLDER)) { - count++; - } - } - return count; - } - - /** - * Tests that we recover files properly - */ - @Test - @Disabled - /* flush() no longer does anything @@TODO: reinstate an appropriate test of fsck recovery*/ - public void testRecover() throws Exception { - Path danglingFile = new Path("/crashedInTheMiddle"); - - // Create a file and leave it dangling and try to recover it. - FSDataOutputStream stream = fs.create(danglingFile); - stream.write(new byte[] { 1, 2, 3 }); - stream.flush(); - - // Now we should still only see a zero-byte file in this place - FileStatus fileStatus = fs.getFileStatus(danglingFile); - assertNotNull(fileStatus); - assertEquals(0, fileStatus.getLen()); - assertEquals(1, getNumTempBlobs()); - - // Run WasbFsck -move to recover the file. - runFsck("-move"); - - // Now we should the see the file in lost+found with the data there. - fileStatus = fs.getFileStatus(new Path("/lost+found", - danglingFile.getName())); - assertNotNull(fileStatus); - assertEquals(3, fileStatus.getLen()); - assertEquals(0, getNumTempBlobs()); - // But not in its original location - assertFalse(fs.exists(danglingFile)); - } - - private void runFsck(String command) throws Exception { - Configuration conf = fs.getConf(); - // Set the dangling cutoff to zero, so every temp blob is considered - // dangling. - conf.setInt(NativeAzureFileSystem.AZURE_TEMP_EXPIRY_PROPERTY_NAME, 0); - WasbFsck fsck = new WasbFsck(conf); - fsck.setMockFileSystemForTesting(fs); - fsck.run(new String[] { AzureBlobStorageTestAccount.MOCK_WASB_URI, command }); - } - - /** - * Tests that we delete dangling files properly - */ - @Test - public void testDelete() throws Exception { - Path danglingFile = new Path("/crashedInTheMiddle"); - - // Create a file and leave it dangling and try to delete it. - FSDataOutputStream stream = fs.create(danglingFile); - stream.write(new byte[] { 1, 2, 3 }); - stream.flush(); - - // Now we should still only see a zero-byte file in this place - FileStatus fileStatus = fs.getFileStatus(danglingFile); - assertNotNull(fileStatus); - assertEquals(0, fileStatus.getLen()); - assertEquals(1, getNumTempBlobs()); - - // Run WasbFsck -delete to delete the file. - runFsck("-delete"); - - // Now we should see no trace of the file. - assertEquals(0, getNumTempBlobs()); - assertFalse(fs.exists(danglingFile)); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbInitFailure.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbInitFailure.java new file mode 100644 index 0000000000000..29fb7ff586832 --- /dev/null +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/TestWasbInitFailure.java @@ -0,0 +1,53 @@ +package org.apache.hadoop.fs.azure; + +import java.net.URI; + +import org.assertj.core.api.Assertions; +import org.junit.jupiter.api.Test; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +import static org.apache.hadoop.fs.azure.NativeAzureFileSystem.WASB_INIT_ERROR_MESSAGE; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test to verify WASB initialization fails as expected. + */ +public class TestWasbInitFailure { + + /** + * Test that initialization of Non-secure WASB FileSystem fails as expected. + * @throws Exception on any failure + */ + @Test + public void testWasbInitFails() throws Exception { + URI wasbUri = URI.create("wasb://container@account.blob.core.windows.net"); + assertFailure(wasbUri); + } + + /** + * Test that initialization of Secure WASB FileSystem fails as expected. + * @throws Exception on any failure + */ + @Test + public void testSecureWasbInitFails() throws Exception { + URI wasbUri = URI.create("wasbs://container@account.blob.core.windows.net"); + assertFailure(wasbUri); + } + + private void assertFailure(URI uri) throws Exception { + Configuration conf = new Configuration(); + UnsupportedOperationException ex = intercept(UnsupportedOperationException.class, () -> { + FileSystem.newInstance(uri, conf).close(); + }); + Assertions.assertThat(ex.getMessage()) + .contains(WASB_INIT_ERROR_MESSAGE); + + ex = intercept(UnsupportedOperationException.class, () -> { + FileSystem.get(uri, conf).close(); + }); + Assertions.assertThat(ex.getMessage()) + .contains(WASB_INIT_ERROR_MESSAGE); + } +} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractAppend.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractAppend.java deleted file mode 100644 index fd21bd20b2e67..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractAppend.java +++ /dev/null @@ -1,41 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractAppendTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; -import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; - -/** - * Append test, skipping one of them. - */ - -public class ITestAzureNativeContractAppend extends AbstractContractAppendTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } - - @Override - public void testRenameFileBeingAppended() throws Throwable { - skip("Skipping as renaming an opened file is not supported"); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractCreate.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractCreate.java deleted file mode 100644 index 0ac046a302610..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractCreate.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractCreateTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractCreate extends AbstractContractCreateTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDelete.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDelete.java deleted file mode 100644 index 4c6dd484a5db4..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDelete.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractDelete extends AbstractContractDeleteTest { - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDistCp.java deleted file mode 100644 index 4d07886cf18f8..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractDistCp.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; -import org.junit.jupiter.api.BeforeEach; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; - -/** - * Contract test suite covering WASB integration with DistCp. - */ -public class ITestAzureNativeContractDistCp extends AbstractContractDistCpTest { - - @Override - protected int getTestTimeoutMillis() { - return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; - } - - @Override - protected NativeAzureFileSystemContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } - - @BeforeEach - @Override - public void setup() throws Exception { - super.setup(); - assumeScaleTestsEnabled(getContract().getConf()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractGetFileStatus.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractGetFileStatus.java deleted file mode 100644 index 9c09c0d8e7b71..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractGetFileStatus.java +++ /dev/null @@ -1,35 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractGetFileStatusTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractGetFileStatus - extends AbstractContractGetFileStatusTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractMkdir.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractMkdir.java deleted file mode 100644 index 71654b8eca8fd..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractMkdir.java +++ /dev/null @@ -1,33 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractMkdir extends AbstractContractMkdirTest { - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractOpen.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractOpen.java deleted file mode 100644 index 0b174e606fa6a..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractOpen.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractOpenTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractOpen extends AbstractContractOpenTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractRename.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractRename.java deleted file mode 100644 index 474b874e30500..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractRename.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractRenameTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractRename extends AbstractContractRenameTest { - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractSeek.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractSeek.java deleted file mode 100644 index 673d5f89544bf..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/ITestAzureNativeContractSeek.java +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.contract.AbstractContractSeekTest; -import org.apache.hadoop.fs.contract.AbstractFSContract; - -/** - * Contract test. - */ -public class ITestAzureNativeContractSeek extends AbstractContractSeekTest{ - - @Override - protected AbstractFSContract createContract(Configuration conf) { - return new NativeAzureFileSystemContract(conf); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java deleted file mode 100644 index ea90a86dc05ef..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/contract/NativeAzureFileSystemContract.java +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.contract; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.integration.AzureTestUtils; -import org.apache.hadoop.fs.contract.AbstractBondedFSContract; - -/** - * Azure Contract. Test paths are created using any maven fork - * identifier, if defined. This guarantees paths unique to tests - * running in parallel. - */ -public class NativeAzureFileSystemContract extends AbstractBondedFSContract { - - public static final String CONTRACT_XML = "wasb.xml"; - - public NativeAzureFileSystemContract(Configuration conf) { - super(conf); //insert the base features - addConfResource(CONTRACT_XML); - AzureTestUtils.assumeNamespaceDisabled(conf); - } - - @Override - public String getScheme() { - return "wasb"; - } - - @Override - public Path getTestPath() { - return AzureTestUtils.createTestPath(super.getTestPath()); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AbstractAzureScaleTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AbstractAzureScaleTest.java deleted file mode 100644 index 7e37a44fe0e0c..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AbstractAzureScaleTest.java +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.integration; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.fs.azure.AbstractWasbTestBase; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; - -import java.util.concurrent.TimeUnit; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.*; - -/** - * Scale tests are only executed if the scale profile - * is set; the setup method will check this and skip - * tests if not. - * - */ -@Timeout(value = AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS, unit = TimeUnit.MILLISECONDS) -public abstract class AbstractAzureScaleTest - extends AbstractWasbTestBase implements Sizes { - - protected static final Logger LOG = - LoggerFactory.getLogger(AbstractAzureScaleTest.class); - - protected int getTestTimeoutMillis() { - return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; - } - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - LOG.debug("Scale test operation count = {}", getOperationCount()); - assumeScaleTestsEnabled(getConfiguration()); - } - - /** - * Create the test account. - * @return a test account - * @throws Exception on any failure to create the account. - */ - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(createConfiguration()); - } - - protected long getOperationCount() { - return getConfiguration().getLong(KEY_OPERATION_COUNT, - DEFAULT_OPERATION_COUNT); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java deleted file mode 100644 index 231c54825f229..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestConstants.java +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.integration; - -import org.apache.hadoop.fs.Path; - -import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_WRITE_BUFFER_SIZE; - -/** - * Constants for the Azure tests. - */ -public interface AzureTestConstants { - - /** - * Prefix for any cross-filesystem scale test options. - */ - String SCALE_TEST = "scale.test."; - - /** - * Prefix for wasb-specific scale tests. - */ - String AZURE_SCALE_TEST = "fs.azure.scale.test."; - - /** - * Prefix for FS wasb tests. - */ - String TEST_FS_WASB = "test.fs.azure."; - - /** - * Name of the test filesystem. - */ - String TEST_FS_WASB_NAME = TEST_FS_WASB + "name"; - - /** - * Tell tests that they are being executed in parallel: {@value}. - */ - String KEY_PARALLEL_TEST_EXECUTION = "test.parallel.execution"; - - /** - * A property set to true in maven if scale tests are enabled: {@value}. - */ - String KEY_SCALE_TESTS_ENABLED = AZURE_SCALE_TEST + "enabled"; - - /** - * The number of operations to perform: {@value}. - */ - String KEY_OPERATION_COUNT = SCALE_TEST + "operation.count"; - - /** - * The number of directory operations to perform: {@value}. - */ - String KEY_DIRECTORY_COUNT = SCALE_TEST + "directory.count"; - - /** - * The readahead buffer: {@value}. - */ - String KEY_READ_BUFFER_SIZE = AZURE_SCALE_TEST + "read.buffer.size"; - - int DEFAULT_READ_BUFFER_SIZE = 16384; - - /** - * Key for a multi MB test file: {@value}. - */ - String KEY_CSVTEST_FILE = AZURE_SCALE_TEST + "csvfile"; - - /** - * Default path for the multi MB test file: {@value}. - */ - String DEFAULT_CSVTEST_FILE = "wasb://datasets@azuremlsampleexperiments.blob.core.windows.net/network_intrusion_detection.csv"; - - /** - * Name of the property to define the timeout for scale tests: {@value}. - * Measured in seconds. - */ - String KEY_TEST_TIMEOUT = AZURE_SCALE_TEST + "timeout"; - - /** - * Name of the property to define the file size for the huge file - * tests: {@value}. - * Measured in KB; a suffix like "M", or "G" will change the unit. - */ - String KEY_HUGE_FILESIZE = AZURE_SCALE_TEST + "huge.filesize"; - - /** - * Name of the property to define the partition size for the huge file - * tests: {@value}. - * Measured in KB; a suffix like "M", or "G" will change the unit. - */ - String KEY_HUGE_PARTITION_SIZE = AZURE_SCALE_TEST + "huge.partitionsize"; - - /** - * The default huge size is small —full 5GB+ scale tests are something - * to run in long test runs on EC2 VMs. {@value}. - */ - String DEFAULT_HUGE_FILESIZE = "10M"; - - /** - * The default number of operations to perform: {@value}. - */ - long DEFAULT_OPERATION_COUNT = 2005; - - /** - * Default number of directories to create when performing - * directory performance/scale tests. - */ - int DEFAULT_DIRECTORY_COUNT = 2; - - /** - * Default policy on scale tests: {@value}. - */ - boolean DEFAULT_SCALE_TESTS_ENABLED = false; - - /** - * Fork ID passed down from maven if the test is running in parallel. - */ - String TEST_UNIQUE_FORK_ID = "test.unique.fork.id"; - - /** - * Timeout in Milliseconds for standard tests: {@value}. - */ - int AZURE_TEST_TIMEOUT = 10 * 60 * 1000; - - /** - * Timeout in Seconds for Scale Tests: {@value}. - */ - int SCALE_TEST_TIMEOUT_SECONDS = 30 * 60; - - int SCALE_TEST_TIMEOUT_MILLIS = SCALE_TEST_TIMEOUT_SECONDS * 1000; - - - - String ACCOUNT_KEY_PROPERTY_NAME = "fs.azure.account.key."; - String ACCOUNT_NAME_PROPERTY_NAME = "fs.azure.account.name"; - String SAS_PROPERTY_NAME = "fs.azure.sas."; - String TEST_CONFIGURATION_FILE_NAME = "azure-test.xml"; - String MOCK_ACCOUNT_NAME - = "mockAccount.blob.core.windows.net"; - String MOCK_CONTAINER_NAME = "mockContainer"; - String WASB_AUTHORITY_DELIMITER = "@"; - String WASB_SCHEME = "wasb"; - String PATH_DELIMITER = "/"; - String AZURE_ROOT_CONTAINER = "$root"; - String MOCK_WASB_URI = "wasb://" + MOCK_CONTAINER_NAME - + WASB_AUTHORITY_DELIMITER + MOCK_ACCOUNT_NAME + "/"; - String USE_EMULATOR_PROPERTY_NAME - = "fs.azure.test.emulator"; - - String KEY_DISABLE_THROTTLING - = "fs.azure.disable.bandwidth.throttling"; - String KEY_READ_TOLERATE_CONCURRENT_APPEND - = "fs.azure.io.read.tolerate.concurrent.append"; - /** - * Path for page blobs: {@value}. - */ - String DEFAULT_PAGE_BLOB_DIRECTORY = "pageBlobs"; - - String DEFAULT_ATOMIC_RENAME_DIRECTORIES - = "/atomicRenameDir1,/atomicRenameDir2"; - - /** - * Base directory for page blobs. - */ - Path PAGE_BLOB_DIR = new Path("/" + DEFAULT_PAGE_BLOB_DIRECTORY); - - /** - * Huge file for testing AbfsOutputStream uploads: {@value} - */ - String AZURE_SCALE_HUGE_FILE_UPLOAD = AZURE_SCALE_TEST + "huge.upload"; - - /** - * Default value for Huge file to be tested for AbfsOutputStream uploads: - * {@value} - */ - int AZURE_SCALE_HUGE_FILE_UPLOAD_DEFAULT = 2 * DEFAULT_WRITE_BUFFER_SIZE; -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestUtils.java deleted file mode 100644 index 4a71c78a9813f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/AzureTestUtils.java +++ /dev/null @@ -1,557 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.integration; - -import java.io.BufferedReader; -import java.io.BufferedWriter; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStreamWriter; -import java.net.URI; -import java.util.List; - -import org.junit.jupiter.api.Assertions; -import org.opentest4j.TestAbortedException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem; - -import static org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.WASB_ACCOUNT_NAME_DOMAIN_SUFFIX_REGEX; -import static org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.WASB_TEST_ACCOUNT_NAME_WITH_DOMAIN; -import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.*; -import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT; -import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; -import static org.apache.hadoop.test.MetricsAsserts.getLongGauge; -import static org.apache.hadoop.test.MetricsAsserts.getMetrics; -import static org.assertj.core.api.Assumptions.assumeThat; - -/** - * Utilities for the Azure tests. Based on {@code S3ATestUtils}, so - * (initially) has unused method. - */ -public final class AzureTestUtils extends Assertions { - private static final Logger LOG = LoggerFactory.getLogger( - AzureTestUtils.class); - - /** - * Value to set a system property to (in maven) to declare that - * a property has been unset. - */ - public static final String UNSET_PROPERTY = "unset"; - - /** - * Create the test filesystem. - * - * If the test.fs.wasb.name property is not set, this will - * raise a JUnit assumption exception - * - * @param conf configuration - * @return the FS - * @throws IOException IO Problems - * @throws TestAbortedException if the FS is not named - */ - public static NativeAzureFileSystem createTestFileSystem(Configuration conf) - throws IOException { - - String fsname = conf.getTrimmed(TEST_FS_WASB_NAME, ""); - - boolean liveTest = !StringUtils.isEmpty(fsname); - URI testURI = null; - if (liveTest) { - testURI = URI.create(fsname); - liveTest = testURI.getScheme().equals(WASB_SCHEME); - } - if (!liveTest) { - // Skip the test - throw new TestAbortedException( - "No test filesystem in " + TEST_FS_WASB_NAME); - } - NativeAzureFileSystem fs1 = new NativeAzureFileSystem(); - fs1.initialize(testURI, conf); - return fs1; - } - - /** - * Create a file context for tests. - * - * If the test.fs.wasb.name property is not set, this will - * trigger a JUnit failure. - * - * Multipart purging is enabled. - * @param conf configuration - * @return the FS - * @throws IOException IO Problems - * @throws TestAbortedException if the FS is not named - */ - public static FileContext createTestFileContext(Configuration conf) - throws IOException { - String fsname = conf.getTrimmed(TEST_FS_WASB_NAME, ""); - - boolean liveTest = !StringUtils.isEmpty(fsname); - URI testURI = null; - if (liveTest) { - testURI = URI.create(fsname); - liveTest = testURI.getScheme().equals(WASB_SCHEME); - } - if (!liveTest) { - // This doesn't work with our JUnit 3 style test cases, so instead we'll - // make this whole class not run by default - throw new TestAbortedException("No test filesystem in " - + TEST_FS_WASB_NAME); - } - FileContext fc = FileContext.getFileContext(testURI, conf); - return fc; - } - - /** - * Get a long test property. - *

    - *
  1. Look up configuration value (which can pick up core-default.xml), - * using {@code defVal} as the default value (if conf != null). - *
  2. - *
  3. Fetch the system property.
  4. - *
  5. If the system property is not empty or "(unset)": - * it overrides the conf value. - *
  6. - *
- * This puts the build properties in charge of everything. It's not a - * perfect design; having maven set properties based on a file, as ant let - * you do, is better for customization. - * - * As to why there's a special (unset) value, see - * {@link http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven} - * @param conf config: may be null - * @param key key to look up - * @param defVal default value - * @return the evaluated test property. - */ - public static long getTestPropertyLong(Configuration conf, - String key, long defVal) { - return Long.valueOf( - getTestProperty(conf, key, Long.toString(defVal))); - } - /** - * Get a test property value in bytes, using k, m, g, t, p, e suffixes. - * {@link org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix#string2long(String)} - *
    - *
  1. Look up configuration value (which can pick up core-default.xml), - * using {@code defVal} as the default value (if conf != null). - *
  2. - *
  3. Fetch the system property.
  4. - *
  5. If the system property is not empty or "(unset)": - * it overrides the conf value. - *
  6. - *
- * This puts the build properties in charge of everything. It's not a - * perfect design; having maven set properties based on a file, as ant let - * you do, is better for customization. - * - * As to why there's a special (unset) value, see - * {@link http://stackoverflow.com/questions/7773134/null-versus-empty-arguments-in-maven} - * @param conf config: may be null - * @param key key to look up - * @param defVal default value - * @return the evaluated test property. - */ - public static long getTestPropertyBytes(Configuration conf, - String key, String defVal) { - return org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix - .string2long(getTestProperty(conf, key, defVal)); - } - - /** - * Get an integer test property; algorithm described in - * {@link #getTestPropertyLong(Configuration, String, long)}. - * @param key key to look up - * @param defVal default value - * @return the evaluated test property. - */ - public static int getTestPropertyInt(Configuration conf, - String key, int defVal) { - return (int) getTestPropertyLong(conf, key, defVal); - } - - /** - * Get a boolean test property; algorithm described in - * {@link #getTestPropertyLong(Configuration, String, long)}. - * @param key key to look up - * @param defVal default value - * @return the evaluated test property. - */ - public static boolean getTestPropertyBool(Configuration conf, - String key, - boolean defVal) { - return Boolean.valueOf( - getTestProperty(conf, key, Boolean.toString(defVal))); - } - - /** - * Get a string test property. - *
    - *
  1. Look up configuration value (which can pick up core-default.xml), - * using {@code defVal} as the default value (if conf != null). - *
  2. - *
  3. Fetch the system property.
  4. - *
  5. If the system property is not empty or "(unset)": - * it overrides the conf value. - *
  6. - *
- * This puts the build properties in charge of everything. It's not a - * perfect design; having maven set properties based on a file, as ant let - * you do, is better for customization. - * - * As to why there's a special (unset) value, see - * @see - * Stack Overflow - * @param conf config: may be null - * @param key key to look up - * @param defVal default value - * @return the evaluated test property. - */ - - public static String getTestProperty(Configuration conf, - String key, - String defVal) { - String confVal = conf != null - ? conf.getTrimmed(key, defVal) - : defVal; - String propval = System.getProperty(key); - return StringUtils.isNotEmpty(propval) && !UNSET_PROPERTY.equals(propval) - ? propval : confVal; - } - - /** - * Verify the class of an exception. If it is not as expected, rethrow it. - * Comparison is on the exact class, not subclass-of inference as - * offered by {@code instanceof}. - * @param clazz the expected exception class - * @param ex the exception caught - * @return the exception, if it is of the expected class - * @throws Exception the exception passed in. - */ - public static Exception verifyExceptionClass(Class clazz, - Exception ex) - throws Exception { - if (!(ex.getClass().equals(clazz))) { - throw ex; - } - return ex; - } - - /** - * Turn off FS Caching: use if a filesystem with different options from - * the default is required. - * @param conf configuration to patch - */ - public static void disableFilesystemCaching(Configuration conf) { - conf.setBoolean("fs.wasb.impl.disable.cache", true); - } - - /** - * Create a test path, using the value of - * {@link AzureTestUtils#TEST_UNIQUE_FORK_ID} if it is set. - * @param defVal default value - * @return a path - */ - public static Path createTestPath(Path defVal) { - String testUniqueForkId = System.getProperty( - AzureTestConstants.TEST_UNIQUE_FORK_ID); - return testUniqueForkId == null - ? defVal - : new Path("/" + testUniqueForkId, "test"); - } - - /** - * Create a test page blob path using the value of - * {@link AzureTestConstants#TEST_UNIQUE_FORK_ID} if it is set. - * @param filename filename at the end of the path - * @return an absolute path - */ - public static Path blobPathForTests(FileSystem fs, String filename) { - String testUniqueForkId = System.getProperty( - AzureTestConstants.TEST_UNIQUE_FORK_ID); - return fs.makeQualified(new Path(PAGE_BLOB_DIR, - testUniqueForkId == null - ? filename - : (testUniqueForkId + "/" + filename))); - } - - /** - * Create a test path using the value of - * {@link AzureTestConstants#TEST_UNIQUE_FORK_ID} if it is set. - * @param filename filename at the end of the path - * @return an absolute path - */ - public static Path pathForTests(FileSystem fs, String filename) { - String testUniqueForkId = System.getProperty( - AzureTestConstants.TEST_UNIQUE_FORK_ID); - return fs.makeQualified(new Path( - testUniqueForkId == null - ? ("/test/" + filename) - : (testUniqueForkId + "/" + filename))); - } - - /** - * Get a unique fork ID. - * Returns a default value for non-parallel tests. - * @return a string unique for all test VMs running in this maven build. - */ - public static String getForkID() { - return System.getProperty( - AzureTestConstants.TEST_UNIQUE_FORK_ID, "fork-1"); - } - - /** - * Flag to indicate that this test is being executed in parallel. - * This is used by some of the scale tests to validate test time expectations. - * @return true if the build indicates this test is being run in parallel. - */ - public static boolean isParallelExecution() { - return Boolean.getBoolean(KEY_PARALLEL_TEST_EXECUTION); - } - - /** - * Asserts that {@code obj} is an instance of {@code expectedClass} using a - * descriptive assertion message. - * @param expectedClass class - * @param obj object to check - */ - public static void assertInstanceOf2(Class expectedClass, Object obj) { - Assertions.assertTrue( - expectedClass.isAssignableFrom(obj.getClass()), String.format("Expected instance of class %s, but is %s.", - expectedClass, obj.getClass())); - } - - /** - * Builds a comma-separated list of class names. - * @param classes list of classes - * @return comma-separated list of class names - */ - public static > String buildClassListString( - List classes) { - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < classes.size(); ++i) { - if (i > 0) { - sb.append(','); - } - sb.append(classes.get(i).getName()); - } - return sb.toString(); - } - - /** - * This class should not be instantiated. - */ - private AzureTestUtils() { - } - - /** - * Assert that a configuration option matches the expected value. - * @param conf configuration - * @param key option key - * @param expected expected value - */ - public static void assertOptionEquals(Configuration conf, - String key, - String expected) { - assertEquals(expected, conf.get(key), "Value of " + key); - } - - /** - * Assume that a condition is met. If not: log at WARN and - * then throw an {@link TestAbortedException}. - * @param message message in an assumption - * @param condition condition to probe - */ - public static void assume(String message, boolean condition) { - if (!condition) { - LOG.warn(message); - } - assumeThat(condition).as(message).isTrue(); - } - - /** - * Gets the current value of the given gauge. - * @param fs filesystem - * @param gaugeName gauge name - * @return the gauge value - */ - public static long getLongGaugeValue(NativeAzureFileSystem fs, - String gaugeName) { - return getLongGauge(gaugeName, getMetrics(fs.getInstrumentation())); - } - - /** - * Gets the current value of the given counter. - * @param fs filesystem - * @param counterName counter name - * @return the counter value - */ - public static long getLongCounterValue(NativeAzureFileSystem fs, - String counterName) { - return getLongCounter(counterName, getMetrics(fs.getInstrumentation())); - } - - - /** - * Delete a path, catching any exception and downgrading to a log message. - * @param fs filesystem - * @param path path to delete - * @param recursive recursive delete? - * @throws IOException IO failure. - */ - public static void deleteQuietly(FileSystem fs, - Path path, - boolean recursive) throws IOException { - if (fs != null && path != null) { - try { - fs.delete(path, recursive); - } catch (IOException e) { - LOG.warn("When deleting {}", path, e); - } - } - } - - - /** - * Clean up the test account if non-null; return null to put in the - * field. - * @param testAccount test account to clean up - * @return null - */ - public static AzureBlobStorageTestAccount cleanup( - AzureBlobStorageTestAccount testAccount) throws Exception { - if (testAccount != null) { - testAccount.cleanup(); - testAccount = null; - } - return null; - } - - - /** - * Clean up the test account; any thrown exceptions are caught and - * logged. - * @param testAccount test account - * @return null, so that any fields can be reset. - */ - public static AzureBlobStorageTestAccount cleanupTestAccount( - AzureBlobStorageTestAccount testAccount) { - if (testAccount != null) { - try { - testAccount.cleanup(); - } catch (Exception e) { - LOG.error("While cleaning up test account: ", e); - } - } - return null; - } - - /** - * Assume that the scale tests are enabled by the relevant system property. - */ - public static void assumeScaleTestsEnabled(Configuration conf) { - boolean enabled = getTestPropertyBool( - conf, - KEY_SCALE_TESTS_ENABLED, - DEFAULT_SCALE_TESTS_ENABLED); - assume("Scale test disabled: to enable set property " - + KEY_SCALE_TESTS_ENABLED, - enabled); - } - - /** - * Check the account name for WASB tests is set correctly and return. - */ - public static String verifyWasbAccountNameInConfig(Configuration conf) { - String accountName = conf.get(ACCOUNT_NAME_PROPERTY_NAME); - if (accountName == null) { - accountName = conf.get(WASB_TEST_ACCOUNT_NAME_WITH_DOMAIN); - } - assumeThat(accountName) - .as("Account for WASB is missing or it is not in correct format") - .isNotNull() - .doesNotEndWith(WASB_ACCOUNT_NAME_DOMAIN_SUFFIX_REGEX); - return accountName; - } - - /** - * Write string into a file. - */ - public static void writeStringToFile(FileSystem fs, Path path, String value) - throws IOException { - FSDataOutputStream outputStream = fs.create(path, true); - writeStringToStream(outputStream, value); - } - - /** - * Write string into a file. - */ - public static void writeStringToStream(FSDataOutputStream outputStream, String value) - throws IOException { - BufferedWriter writer = new BufferedWriter(new OutputStreamWriter( - outputStream)); - writer.write(value); - writer.close(); - } - - /** - * Read string from a file. - */ - public static String readStringFromFile(FileSystem fs, Path testFile) throws IOException { - FSDataInputStream inputStream = fs.open(testFile); - String ret = readStringFromStream(inputStream); - inputStream.close(); - return ret; - } - - /** - * Read string from stream. - */ - public static String readStringFromStream(FSDataInputStream inputStream) throws IOException { - BufferedReader reader = new BufferedReader(new InputStreamReader( - inputStream)); - final int BUFFER_SIZE = 1024; - char[] buffer = new char[BUFFER_SIZE]; - int count = reader.read(buffer, 0, BUFFER_SIZE); - if (count > BUFFER_SIZE) { - throw new IOException("Exceeded buffer size"); - } - inputStream.close(); - return new String(buffer, 0, count); - } - - /** - * Assume hierarchical namespace is disabled for test account. - */ - public static void assumeNamespaceDisabled(Configuration conf) { - assumeThat(conf.getBoolean(FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT, false)) - .as("Hierarchical namespace is enabled for test account.") - .isFalse(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/CleanupTestContainers.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/CleanupTestContainers.java deleted file mode 100644 index 490bd31d0ee13..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/CleanupTestContainers.java +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.integration; - -import java.util.EnumSet; - -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.blob.CloudBlobClient; -import com.microsoft.azure.storage.blob.CloudBlobContainer; -import org.junit.jupiter.api.Test; - -import org.apache.hadoop.fs.azure.AbstractWasbTestBase; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; - -/** - * This looks like a test, but it is really a command to invoke to - * clean up containers created in other test runs. - * - */ -public class CleanupTestContainers extends AbstractWasbTestBase { - - private static final String CONTAINER_PREFIX = "wasbtests-"; - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create( - "CleanupTestContainers", - EnumSet.noneOf(AzureBlobStorageTestAccount.CreateOptions.class), - createConfiguration(), - true); - } - - @Test - public void testEnumContainers() throws Throwable { - describe("Enumerating all the WASB test containers"); - - int count = 0; - CloudStorageAccount storageAccount = getTestAccount().getRealAccount(); - CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); - Iterable containers - = blobClient.listContainers(CONTAINER_PREFIX); - for (CloudBlobContainer container : containers) { - count++; - LOG.info("Container {} URI {}", - container.getName(), - container.getUri()); - } - LOG.info("Found {} test containers", count); - } - - @Test - public void testDeleteContainers() throws Throwable { - describe("Delete all the WASB test containers"); - int count = 0; - CloudStorageAccount storageAccount = getTestAccount().getRealAccount(); - CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); - Iterable containers - = blobClient.listContainers(CONTAINER_PREFIX); - for (CloudBlobContainer container : containers) { - LOG.info("Container {} URI {}", - container.getName(), - container.getUri()); - if (container.deleteIfExists()) { - count++; - } - } - LOG.info("Deleted {} test containers", count); - } - - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/ITestAzureHugeFiles.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/ITestAzureHugeFiles.java deleted file mode 100644 index d70412aca1f83..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/ITestAzureHugeFiles.java +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.integration; - -import java.io.FileNotFoundException; -import java.io.IOException; -import java.util.EnumSet; -import java.util.Iterator; - -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestMethodOrder; -import org.junit.jupiter.api.MethodOrderer; -import org.junit.jupiter.api.TestInfo; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.StorageStatistics; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem; -import org.apache.hadoop.fs.contract.ContractTestUtils; -import org.apache.hadoop.io.IOUtils; - -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.*; -import static org.apache.hadoop.fs.contract.ContractTestUtils.*; -import static org.assertj.core.api.Assumptions.assumeThat; - - -/** - * Scale test which creates a huge file. - * - * Important: the order in which these tests execute is fixed to - * alphabetical order. Test cases are numbered {@code test_123_} to impose - * an ordering based on the numbers. - * - * Having this ordering allows the tests to assume that the huge file - * exists. Even so: they should all have a {@link #assumeHugeFileExists()} - * check at the start, in case an individual test is executed. - * - * Ignore checkstyle complaints about naming: we need a scheme with visible - * ordering. - */ - -@TestMethodOrder(MethodOrderer.Alphanumeric.class) -public class ITestAzureHugeFiles extends AbstractAzureScaleTest { - - private static final Logger LOG = LoggerFactory.getLogger( - ITestAzureHugeFiles.class); - - private Path scaleTestDir; - private Path hugefile; - private Path hugefileRenamed; - private AzureBlobStorageTestAccount testAccountForCleanup; - - private static final int UPLOAD_BLOCKSIZE = 64 * S_1K; - private static final byte[] SOURCE_DATA; - - static { - SOURCE_DATA = dataset(UPLOAD_BLOCKSIZE, 0, S_256); - } - - private Path testPath; - - @BeforeEach - @Override - public void setUp() throws Exception { - super.setUp(); - testPath = path("ITestAzureHugeFiles"); - scaleTestDir = new Path(testPath, "scale"); - hugefile = new Path(scaleTestDir, "hugefile"); - hugefileRenamed = new Path(scaleTestDir, "hugefileRenamed"); - } - - /** - * Only clean up the test account (and delete the container) if the account - * is set in the field {@code testAccountForCleanup}. - * @throws Exception - */ - @Override - public void tearDown() throws Exception { - testAccount = null; - super.tearDown(); - if (testAccountForCleanup != null) { - cleanupTestAccount(testAccount); - } - } - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create( - "testazurehugefiles", - EnumSet.of(AzureBlobStorageTestAccount.CreateOptions.CreateContainer), - createConfiguration(), - true); - } - - /** - * Stop the test-case teardown from deleting the test path. - * @throws IOException never - */ - protected void deleteTestDirInTeardown() throws IOException { - // this is a no-op, so the test file is preserved. - // the last test in the suite does the teardown - } - - protected void deleteHugeFile() throws IOException { - describe("Deleting %s", hugefile); - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - getFileSystem().delete(hugefile, false); - timer.end("time to delete %s", hugefile); - } - - /** - * Log how long an IOP took, by dividing the total time by the - * count of operations, printing in a human-readable form. - * @param operation operation being measured - * @param timer timing data - * @param count IOP count. - */ - protected void logTimePerIOP(String operation, - ContractTestUtils.NanoTimer timer, - long count) { - LOG.info("Time per {}: {} nS", - operation, toHuman(timer.duration() / count)); - } - - /** - * Assume that the huge file exists, skip if not/empty. - * @return the file status - * @throws IOException IO failure - */ - FileStatus assumeHugeFileExists() throws IOException { - assertPathExists(getFileSystem(), "huge file not created", hugefile); - try { - FileStatus status = getFileSystem().getFileStatus(hugefile); - assumeThat(status.isFile()).as("Not a file: " + status).isTrue(); - assumeThat(status.getLen()) - .as("File " + hugefile + " is empty") - .isPositive(); - return status; - } catch (FileNotFoundException e) { - skip("huge file not created: " + hugefile); - } - return null; - } - - /** - * If/when {@link NativeAzureFileSystem#getStorageStatistics()} returns - * statistics, this will be interesting. - */ - private void logFSState() { - StorageStatistics statistics = getFileSystem().getStorageStatistics(); - Iterator longStatistics - = statistics.getLongStatistics(); - while (longStatistics.hasNext()) { - StorageStatistics.LongStatistic next = longStatistics.next(); - LOG.info("{} = {}", next.getName(), next.getValue()); - } - } - - @Test - public void test_010_CreateHugeFile() throws IOException { - long filesize = getTestPropertyBytes(getConfiguration(), - KEY_HUGE_FILESIZE, - DEFAULT_HUGE_FILESIZE); - long filesizeMB = filesize / S_1M; - - // clean up from any previous attempts - deleteHugeFile(); - - describe("Creating file %s of size %d MB", hugefile, filesizeMB); - - // now do a check of available upload time, with a pessimistic bandwidth - // (that of remote upload tests). If the test times out then not only is - // the test outcome lost, as the follow-on tests continue, they will - // overlap with the ongoing upload test, for much confusion. -/* - int timeout = getTestTimeoutSeconds(); - // assume 1 MB/s upload bandwidth - int bandwidth = _1MB; - long uploadTime = filesize / bandwidth; - assertTrue(String.format("Timeout set in %s seconds is too low;" + - " estimating upload time of %d seconds at 1 MB/s." + - " Rerun tests with -D%s=%d", - timeout, uploadTime, KEY_TEST_TIMEOUT, uploadTime * 2), - uploadTime < timeout); -*/ - assertEquals(0, filesize % UPLOAD_BLOCKSIZE, - "File size set in " + KEY_HUGE_FILESIZE + " = " + filesize - + " is not a multiple of " + UPLOAD_BLOCKSIZE); - - byte[] data = SOURCE_DATA; - - long blocks = filesize / UPLOAD_BLOCKSIZE; - long blocksPerMB = S_1M / UPLOAD_BLOCKSIZE; - - // perform the upload. - // there's lots of logging here, so that a tail -f on the output log - // can give a view of what is happening. - NativeAzureFileSystem fs = getFileSystem(); - - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - long blocksPer10MB = blocksPerMB * 10; - fs.mkdirs(hugefile.getParent()); - try (FSDataOutputStream out = fs.create(hugefile, - true, - UPLOAD_BLOCKSIZE, - null)) { - for (long block = 1; block <= blocks; block++) { - out.write(data); - long written = block * UPLOAD_BLOCKSIZE; - // every 10 MB and on file upload @ 100%, print some stats - if (block % blocksPer10MB == 0 || written == filesize) { - long percentage = written * 100 / filesize; - double elapsedTime = timer.elapsedTime() / NANOSEC; - double writtenMB = 1.0 * written / S_1M; - LOG.info(String.format("[%02d%%] Buffered %.2f MB out of %d MB;" - + " elapsedTime=%.2fs; write to buffer bandwidth=%.2f MB/s", - percentage, - writtenMB, - filesizeMB, - elapsedTime, - writtenMB / elapsedTime)); - } - } - // now close the file - LOG.info("Closing stream {}", out); - ContractTestUtils.NanoTimer closeTimer - = new ContractTestUtils.NanoTimer(); - out.close(); - closeTimer.end("time to close() output stream"); - } - - timer.end("time to write %d MB in blocks of %d", - filesizeMB, UPLOAD_BLOCKSIZE); - logFSState(); - bandwidth(timer, filesize); - ContractTestUtils.assertPathExists(fs, "Huge file", hugefile); - FileStatus status = fs.getFileStatus(hugefile); - ContractTestUtils.assertIsFile(hugefile, status); - assertEquals(filesize, status.getLen(), "File size in " + status); - } - - @Test - public void test_040_PositionedReadHugeFile() throws Throwable { - assumeHugeFileExists(); - describe("Positioned reads of file %s", hugefile); - NativeAzureFileSystem fs = getFileSystem(); - FileStatus status = fs.getFileStatus(hugefile); - long filesize = status.getLen(); - int ops = 0; - final int bufferSize = 8192; - byte[] buffer = new byte[bufferSize]; - long eof = filesize - 1; - - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - ContractTestUtils.NanoTimer readAtByte0, readAtByte0Again, readAtEOF; - try (FSDataInputStream in = openDataFile()) { - readAtByte0 = new ContractTestUtils.NanoTimer(); - in.readFully(0, buffer); - readAtByte0.end("time to read data at start of file"); - ops++; - - readAtEOF = new ContractTestUtils.NanoTimer(); - in.readFully(eof - bufferSize, buffer); - readAtEOF.end("time to read data at end of file"); - ops++; - - readAtByte0Again = new ContractTestUtils.NanoTimer(); - in.readFully(0, buffer); - readAtByte0Again.end("time to read data at start of file again"); - ops++; - LOG.info("Final stream state: {}", in); - } - long mb = Math.max(filesize / S_1M, 1); - - logFSState(); - timer.end("time to performed positioned reads of %d MB ", mb); - LOG.info("Time per positioned read = {} nS", - toHuman(timer.nanosPerOperation(ops))); - } - - protected FSDataInputStream openDataFile() throws IOException { - NanoTimer openTimer = new NanoTimer(); - FSDataInputStream inputStream = getFileSystem().open(hugefile, - UPLOAD_BLOCKSIZE); - openTimer.end("open data file"); - return inputStream; - } - - - /** - * Work out the bandwidth in bytes/second. - * @param timer timer measuring the duration - * @param bytes bytes - * @return the number of bytes/second of the recorded operation - */ - public static double bandwidthInBytes(NanoTimer timer, long bytes) { - return bytes * NANOSEC / timer.duration(); - } - - @Test - public void test_050_readHugeFile() throws Throwable { - assumeHugeFileExists(); - describe("Reading %s", hugefile); - NativeAzureFileSystem fs = getFileSystem(); - FileStatus status = fs.getFileStatus(hugefile); - long filesize = status.getLen(); - long blocks = filesize / UPLOAD_BLOCKSIZE; - byte[] data = new byte[UPLOAD_BLOCKSIZE]; - - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - try (FSDataInputStream in = openDataFile()) { - for (long block = 0; block < blocks; block++) { - in.readFully(data); - } - LOG.info("Final stream state: {}", in); - } - - long mb = Math.max(filesize / S_1M, 1); - timer.end("time to read file of %d MB ", mb); - LOG.info("Time per MB to read = {} nS", - toHuman(timer.nanosPerOperation(mb))); - bandwidth(timer, filesize); - logFSState(); - } - - @Test - public void test_060_openAndReadWholeFileBlocks() throws Throwable { - FileStatus status = assumeHugeFileExists(); - int blockSize = S_1M; - describe("Open the test file and read it in blocks of size %d", blockSize); - long len = status.getLen(); - FSDataInputStream in = openDataFile(); - NanoTimer timer2 = null; - long blockCount = 0; - long totalToRead = 0; - int resetCount = 0; - try { - byte[] block = new byte[blockSize]; - timer2 = new NanoTimer(); - long count = 0; - // implicitly rounding down here - blockCount = len / blockSize; - totalToRead = blockCount * blockSize; - long minimumBandwidth = S_128K; - int maxResetCount = 4; - resetCount = 0; - for (long i = 0; i < blockCount; i++) { - int offset = 0; - int remaining = blockSize; - long blockId = i + 1; - NanoTimer blockTimer = new NanoTimer(); - int reads = 0; - while (remaining > 0) { - NanoTimer readTimer = new NanoTimer(); - int bytesRead = in.read(block, offset, remaining); - reads++; - if (bytesRead == 1) { - break; - } - remaining -= bytesRead; - offset += bytesRead; - count += bytesRead; - readTimer.end(); - if (bytesRead != 0) { - LOG.debug("Bytes in read #{}: {} , block bytes: {}," - + " remaining in block: {}" - + " duration={} nS; ns/byte: {}, bandwidth={} MB/s", - reads, bytesRead, blockSize - remaining, remaining, - readTimer.duration(), - readTimer.nanosPerOperation(bytesRead), - readTimer.bandwidthDescription(bytesRead)); - } else { - LOG.warn("0 bytes returned by read() operation #{}", reads); - } - } - blockTimer.end("Reading block %d in %d reads", blockId, reads); - String bw = blockTimer.bandwidthDescription(blockSize); - LOG.info("Bandwidth of block {}: {} MB/s: ", blockId, bw); - if (bandwidthInBytes(blockTimer, blockSize) < minimumBandwidth) { - LOG.warn("Bandwidth {} too low on block {}: resetting connection", - bw, blockId); - assertTrue(resetCount <= maxResetCount, "Bandwidth of " + bw + " too low after " - + resetCount + " attempts"); - resetCount++; - // reset the connection - } - } - } finally { - IOUtils.closeStream(in); - } - timer2.end("Time to read %d bytes in %d blocks", totalToRead, blockCount); - LOG.info("Overall Bandwidth {} MB/s; reset connections {}", - timer2.bandwidth(totalToRead), resetCount); - } - - @Test - public void test_100_renameHugeFile() throws Throwable { - assumeHugeFileExists(); - describe("renaming %s to %s", hugefile, hugefileRenamed); - NativeAzureFileSystem fs = getFileSystem(); - FileStatus status = fs.getFileStatus(hugefile); - long filesize = status.getLen(); - fs.delete(hugefileRenamed, false); - ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer(); - fs.rename(hugefile, hugefileRenamed); - long mb = Math.max(filesize / S_1M, 1); - timer.end("time to rename file of %d MB", mb); - LOG.info("Time per MB to rename = {} nS", - toHuman(timer.nanosPerOperation(mb))); - bandwidth(timer, filesize); - logFSState(); - FileStatus destFileStatus = fs.getFileStatus(hugefileRenamed); - assertEquals(filesize, destFileStatus.getLen()); - - // rename back - ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer(); - fs.rename(hugefileRenamed, hugefile); - timer2.end("Renaming back"); - LOG.info("Time per MB to rename = {} nS", - toHuman(timer2.nanosPerOperation(mb))); - bandwidth(timer2, filesize); - } - - @Test - public void test_999_deleteHugeFiles() throws IOException { - // mark the test account for cleanup after this test - testAccountForCleanup = testAccount; - deleteHugeFile(); - ContractTestUtils.NanoTimer timer2 = new ContractTestUtils.NanoTimer(); - NativeAzureFileSystem fs = getFileSystem(); - fs.delete(hugefileRenamed, false); - timer2.end("time to delete %s", hugefileRenamed); - rm(fs, testPath, true, false); - assertPathDoesNotExist(fs, "deleted huge file", testPath); - } - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/Sizes.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/Sizes.java deleted file mode 100644 index 92b10cfeddd58..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/integration/Sizes.java +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.integration; - -/** - * Sizes of data. - * Checkstyle doesn't like the naming scheme or the fact its an interface. - */ -public interface Sizes { - - int S_256 = 256; - int S_512 = 512; - int S_1K = 1024; - int S_4K = 4 * S_1K; - int S_8K = 8 * S_1K; - int S_16K = 16 * S_1K; - int S_32K = 32 * S_1K; - int S_64K = 64 * S_1K; - int S_128K = 128 * S_1K; - int S_256K = 256 * S_1K; - int S_1M = S_1K * S_1K; - int S_2M = 2 * S_1M; - int S_5M = 5 * S_1M; - int S_10M = 10* S_1M; - double NANOSEC = 1.0e9; - -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/AzureMetricsTestUtil.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/AzureMetricsTestUtil.java deleted file mode 100644 index 4c706ce71330f..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/AzureMetricsTestUtil.java +++ /dev/null @@ -1,83 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_BYTES_READ; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_BYTES_WRITTEN; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_RAW_BYTES_DOWNLOADED; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_RAW_BYTES_UPLOADED; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_WEB_RESPONSES; -import static org.apache.hadoop.test.MetricsAsserts.getLongCounter; -import static org.apache.hadoop.test.MetricsAsserts.getLongGauge; -import static org.apache.hadoop.test.MetricsAsserts.getMetrics; - -public final class AzureMetricsTestUtil { - public static long getLongGaugeValue(AzureFileSystemInstrumentation instrumentation, - String gaugeName) { - return getLongGauge(gaugeName, getMetrics(instrumentation)); - } - - /** - * Gets the current value of the given counter. - */ - public static long getLongCounterValue(AzureFileSystemInstrumentation instrumentation, - String counterName) { - return getLongCounter(counterName, getMetrics(instrumentation)); - } - - - - /** - * Gets the current value of the wasb_bytes_written_last_second counter. - */ - public static long getCurrentBytesWritten(AzureFileSystemInstrumentation instrumentation) { - return getLongGaugeValue(instrumentation, WASB_BYTES_WRITTEN); - } - - /** - * Gets the current value of the wasb_bytes_read_last_second counter. - */ - public static long getCurrentBytesRead(AzureFileSystemInstrumentation instrumentation) { - return getLongGaugeValue(instrumentation, WASB_BYTES_READ); - } - - /** - * Gets the current value of the wasb_raw_bytes_uploaded counter. - */ - public static long getCurrentTotalBytesWritten( - AzureFileSystemInstrumentation instrumentation) { - return getLongCounterValue(instrumentation, WASB_RAW_BYTES_UPLOADED); - } - - /** - * Gets the current value of the wasb_raw_bytes_downloaded counter. - */ - public static long getCurrentTotalBytesRead( - AzureFileSystemInstrumentation instrumentation) { - return getLongCounterValue(instrumentation, WASB_RAW_BYTES_DOWNLOADED); - } - - /** - * Gets the current value of the asv_web_responses counter. - */ - public static long getCurrentWebResponses( - AzureFileSystemInstrumentation instrumentation) { - return getLongCounter(WASB_WEB_RESPONSES, getMetrics(instrumentation)); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/ITestAzureFileSystemInstrumentation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/ITestAzureFileSystemInstrumentation.java deleted file mode 100644 index 0ffdad7b15303..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/ITestAzureFileSystemInstrumentation.java +++ /dev/null @@ -1,546 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_CLIENT_ERRORS; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_DIRECTORIES_CREATED; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_DOWNLOAD_LATENCY; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_DOWNLOAD_RATE; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_FILES_CREATED; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_FILES_DELETED; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_SERVER_ERRORS; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_UPLOAD_LATENCY; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_UPLOAD_RATE; -import static org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation.WASB_WEB_RESPONSES; -import static org.apache.hadoop.test.MetricsAsserts.assertCounter; -import static org.apache.hadoop.test.MetricsAsserts.getMetrics; -import static org.mockito.ArgumentMatchers.argThat; -import static org.mockito.Mockito.verify; - -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.Date; - -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AbstractWasbTestBase; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; -import org.apache.hadoop.fs.azure.AzureException; -import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem; -import org.apache.hadoop.io.IOUtils; -import org.apache.hadoop.metrics2.MetricsRecordBuilder; -import org.apache.hadoop.metrics2.MetricsTag; -import org.junit.jupiter.api.Test; -import org.mockito.ArgumentMatcher; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * Instrumentation test, changing state of time and verifying metrics are - * consistent. - */ -public class ITestAzureFileSystemInstrumentation extends AbstractWasbTestBase { - - protected static final Logger LOG = - LoggerFactory.getLogger(ITestAzureFileSystemInstrumentation.class); - - @Override - protected AzureBlobStorageTestAccount createTestAccount() throws Exception { - return AzureBlobStorageTestAccount.create(); - } - - @Test - public void testMetricTags() throws Exception { - String accountName = - getTestAccount().getRealAccount().getBlobEndpoint() - .getAuthority(); - String containerName = - getTestAccount().getRealContainer().getName(); - MetricsRecordBuilder myMetrics = getMyMetrics(); - verify(myMetrics).add(argThat( - new TagMatcher("accountName", accountName) - )); - verify(myMetrics).add(argThat( - new TagMatcher("containerName", containerName) - )); - verify(myMetrics).add(argThat( - new TagMatcher("Context", "azureFileSystem") - )); - verify(myMetrics).add(argThat( - new TagExistsMatcher("wasbFileSystemId") - )); - } - - - @Test - public void testMetricsOnMkdirList() throws Exception { - long base = getBaseWebResponses(); - - // Create a directory - assertTrue(fs.mkdirs(new Path("a"))); - // At the time of writing - // getAncestor uses 2 calls for each folder level /user//a - // plus 1 call made by checkContainer - // mkdir checks the hierarchy with 2 calls per level - // mkdirs calls storeEmptyDir to create the empty folder, which makes 5 calls - // For a total of 7 + 6 + 5 = 18 web responses - base = assertWebResponsesInRange(base, 1, 18); - assertEquals(1, - AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_DIRECTORIES_CREATED)); - - // List the root contents - assertEquals(1, getFileSystem().listStatus(new Path("/")).length); - base = assertWebResponsesEquals(base, 1); - - assertNoErrors(); - } - - private BandwidthGaugeUpdater getBandwidthGaugeUpdater() { - NativeAzureFileSystem azureFs = (NativeAzureFileSystem) getFileSystem(); - AzureNativeFileSystemStore azureStore = azureFs.getStore(); - return azureStore.getBandwidthGaugeUpdater(); - } - - private static byte[] nonZeroByteArray(int size) { - byte[] data = new byte[size]; - Arrays.fill(data, (byte)5); - return data; - } - - @Test - public void testMetricsOnFileCreateRead() throws Exception { - long base = getBaseWebResponses(); - - assertEquals(0, AzureMetricsTestUtil.getCurrentBytesWritten(getInstrumentation())); - - Path filePath = new Path("/metricsTest_webResponses"); - final int FILE_SIZE = 1000; - - // Suppress auto-update of bandwidth metrics so we get - // to update them exactly when we want to. - getBandwidthGaugeUpdater().suppressAutoUpdate(); - - // Create a file - Date start = new Date(); - OutputStream outputStream = getFileSystem().create(filePath); - outputStream.write(nonZeroByteArray(FILE_SIZE)); - outputStream.close(); - long uploadDurationMs = new Date().getTime() - start.getTime(); - - // The exact number of requests/responses that happen to create a file - // can vary - at the time of writing this code it takes 10 - // requests/responses for the 1000 byte file (33 for 100 MB), - // plus the initial container-check request but that - // can very easily change in the future. Just assert that we do roughly - // more than 2 but less than 15. - logOpResponseCount("Creating a 1K file", base); - base = assertWebResponsesInRange(base, 2, 15); - getBandwidthGaugeUpdater().triggerUpdate(true); - long bytesWritten = AzureMetricsTestUtil.getCurrentBytesWritten(getInstrumentation()); - assertTrue(bytesWritten > (FILE_SIZE / 2) && bytesWritten < (FILE_SIZE * 2), - "The bytes written in the last second " + bytesWritten - + " is pretty far from the expected range of around " + FILE_SIZE - + " bytes plus a little overhead."); - long totalBytesWritten = AzureMetricsTestUtil.getCurrentTotalBytesWritten(getInstrumentation()); - assertTrue(totalBytesWritten >= FILE_SIZE && totalBytesWritten < (FILE_SIZE * 2), - "The total bytes written " + totalBytesWritten - + " is pretty far from the expected range of around " + FILE_SIZE - + " bytes plus a little overhead."); - long uploadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_UPLOAD_RATE); - LOG.info("Upload rate: " + uploadRate + " bytes/second."); - long expectedRate = (FILE_SIZE * 1000L) / uploadDurationMs; - assertTrue(uploadRate >= expectedRate, "The upload rate " + uploadRate - + " is below the expected range of around " + expectedRate - + " bytes/second that the unit test observed. This should never be" - + " the case since the test underestimates the rate by looking at " - + " end-to-end time instead of just block upload time."); - long uploadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), - WASB_UPLOAD_LATENCY); - LOG.info("Upload latency: {}", uploadLatency); - long expectedLatency = uploadDurationMs; // We're uploading less than a block. - assertTrue(uploadLatency > 0, - "The upload latency " + uploadLatency - + " should be greater than zero now that I've just uploaded a file."); - assertTrue(uploadLatency <= expectedLatency, - "The upload latency " + uploadLatency - + " is more than the expected range of around " + expectedLatency - + " milliseconds that the unit test observed. This should never be" - + " the case since the test overestimates the latency by looking at " - + " end-to-end time instead of just block upload time."); - - // Read the file - start = new Date(); - InputStream inputStream = getFileSystem().open(filePath); - int count = 0; - while (inputStream.read() >= 0) { - count++; - } - inputStream.close(); - long downloadDurationMs = new Date().getTime() - start.getTime(); - assertEquals(FILE_SIZE, count); - - // Again, exact number varies. At the time of writing this code - // it takes 4 request/responses, so just assert a rough range between - // 1 and 10. - logOpResponseCount("Reading a 1K file", base); - base = assertWebResponsesInRange(base, 1, 10); - getBandwidthGaugeUpdater().triggerUpdate(false); - long totalBytesRead = AzureMetricsTestUtil.getCurrentTotalBytesRead(getInstrumentation()); - assertEquals(FILE_SIZE, totalBytesRead); - long bytesRead = AzureMetricsTestUtil.getCurrentBytesRead(getInstrumentation()); - assertTrue(bytesRead > (FILE_SIZE / 2) && bytesRead < (FILE_SIZE * 2), - "The bytes read in the last second " + bytesRead - + " is pretty far from the expected range of around " + FILE_SIZE - + " bytes plus a little overhead."); - long downloadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_DOWNLOAD_RATE); - LOG.info("Download rate: " + downloadRate + " bytes/second."); - expectedRate = (FILE_SIZE * 1000L) / downloadDurationMs; - assertTrue(downloadRate >= expectedRate, - "The download rate " + downloadRate - + " is below the expected range of around " + expectedRate - + " bytes/second that the unit test observed. This should never be" - + " the case since the test underestimates the rate by looking at " - + " end-to-end time instead of just block download time."); - long downloadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), - WASB_DOWNLOAD_LATENCY); - LOG.info("Download latency: " + downloadLatency); - expectedLatency = downloadDurationMs; // We're downloading less than a block. - assertTrue(downloadLatency > 0, - "The download latency " + downloadLatency - + " should be greater than zero now that I've just downloaded a file."); - assertTrue(downloadLatency <= expectedLatency, - "The download latency " + downloadLatency - + " is more than the expected range of around " + expectedLatency - + " milliseconds that the unit test observed. This should never be" - + " the case since the test overestimates the latency by looking at " - + " end-to-end time instead of just block download time."); - - assertNoErrors(); - } - - @Test - public void testMetricsOnBigFileCreateRead() throws Exception { - long base = getBaseWebResponses(); - - assertEquals(0, AzureMetricsTestUtil.getCurrentBytesWritten(getInstrumentation())); - - Path filePath = new Path("/metricsTest_webResponses"); - final int FILE_SIZE = 100 * 1024 * 1024; - - // Suppress auto-update of bandwidth metrics so we get - // to update them exactly when we want to. - getBandwidthGaugeUpdater().suppressAutoUpdate(); - - // Create a file - OutputStream outputStream = getFileSystem().create(filePath); - outputStream.write(new byte[FILE_SIZE]); - outputStream.close(); - - // The exact number of requests/responses that happen to create a file - // can vary - at the time of writing this code it takes 34 - // requests/responses for the 100 MB file, - // plus the initial container check request, but that - // can very easily change in the future. Just assert that we do roughly - // more than 20 but less than 50. - logOpResponseCount("Creating a 100 MB file", base); - base = assertWebResponsesInRange(base, 20, 50); - getBandwidthGaugeUpdater().triggerUpdate(true); - long totalBytesWritten = AzureMetricsTestUtil.getCurrentTotalBytesWritten(getInstrumentation()); - assertTrue(totalBytesWritten >= FILE_SIZE && totalBytesWritten < (FILE_SIZE * 2), - "The total bytes written " + totalBytesWritten - + " is pretty far from the expected range of around " + FILE_SIZE - + " bytes plus a little overhead."); - long uploadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_UPLOAD_RATE); - LOG.info("Upload rate: " + uploadRate + " bytes/second."); - long uploadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), - WASB_UPLOAD_LATENCY); - LOG.info("Upload latency: " + uploadLatency); - assertTrue(uploadLatency > 0, - "The upload latency " + uploadLatency - + " should be greater than zero now that I've just uploaded a file."); - - // Read the file - InputStream inputStream = getFileSystem().open(filePath); - int count = 0; - while (inputStream.read() >= 0) { - count++; - } - inputStream.close(); - assertEquals(FILE_SIZE, count); - - // Again, exact number varies. At the time of writing this code - // it takes 27 request/responses, so just assert a rough range between - // 20 and 40. - logOpResponseCount("Reading a 100 MB file", base); - base = assertWebResponsesInRange(base, 20, 40); - getBandwidthGaugeUpdater().triggerUpdate(false); - long totalBytesRead = AzureMetricsTestUtil.getCurrentTotalBytesRead(getInstrumentation()); - assertEquals(FILE_SIZE, totalBytesRead); - long downloadRate = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), WASB_DOWNLOAD_RATE); - LOG.info("Download rate: " + downloadRate + " bytes/second."); - long downloadLatency = AzureMetricsTestUtil.getLongGaugeValue(getInstrumentation(), - WASB_DOWNLOAD_LATENCY); - LOG.info("Download latency: " + downloadLatency); - assertTrue(downloadLatency > 0, - "The download latency " + downloadLatency - + " should be greater than zero now that I've just downloaded a file."); - } - - @Test - public void testMetricsOnFileRename() throws Exception { - long base = getBaseWebResponses(); - - Path originalPath = new Path("/metricsTest_RenameStart"); - Path destinationPath = new Path("/metricsTest_RenameFinal"); - - // Create an empty file - assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_CREATED)); - assertTrue(getFileSystem().createNewFile(originalPath)); - logOpResponseCount("Creating an empty file", base); - base = assertWebResponsesInRange(base, 2, 20); - assertEquals(1, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_CREATED)); - - // Rename the file - assertTrue( - ((FileSystem) getFileSystem()).rename(originalPath, destinationPath)); - // Varies: at the time of writing this code it takes 7 requests/responses. - logOpResponseCount("Renaming a file", base); - base = assertWebResponsesInRange(base, 2, 15); - - assertNoErrors(); - } - - @Test - public void testMetricsOnFileExistsDelete() throws Exception { - long base = getBaseWebResponses(); - - Path filePath = new Path("/metricsTest_delete"); - - // Check existence - assertFalse(getFileSystem().exists(filePath)); - // At the time of writing this code it takes 2 requests/responses to - // check existence, which seems excessive, plus initial request for - // container check, plus 2 ancestor checks only in the secure case. - logOpResponseCount("Checking file existence for non-existent file", base); - base = assertWebResponsesInRange(base, 1, 5); - - // Create an empty file - assertTrue(getFileSystem().createNewFile(filePath)); - base = getCurrentWebResponses(); - - // Check existence again - assertTrue(getFileSystem().exists(filePath)); - logOpResponseCount("Checking file existence for existent file", base); - base = assertWebResponsesInRange(base, 1, 4); - - // Delete the file - assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_DELETED)); - assertTrue(getFileSystem().delete(filePath, false)); - // At the time of writing this code it takes 4 requests/responses to - // delete, which seems excessive. Check for range 1-4 for now. - logOpResponseCount("Deleting a file", base); - base = assertWebResponsesInRange(base, 1, 4); - assertEquals(1, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_FILES_DELETED)); - - assertNoErrors(); - } - - @Test - public void testMetricsOnDirRename() throws Exception { - long base = getBaseWebResponses(); - - Path originalDirName = new Path("/metricsTestDirectory_RenameStart"); - Path innerFileName = new Path(originalDirName, "innerFile"); - Path destDirName = new Path("/metricsTestDirectory_RenameFinal"); - - // Create an empty directory - assertTrue(getFileSystem().mkdirs(originalDirName)); - base = getCurrentWebResponses(); - - // Create an inner file - assertTrue(getFileSystem().createNewFile(innerFileName)); - base = getCurrentWebResponses(); - - // Rename the directory - assertTrue(getFileSystem().rename(originalDirName, destDirName)); - - // At the time of writing this code it takes 11 requests/responses - // to rename the directory with one file. Check for range 1-20 for now. - logOpResponseCount("Renaming a directory", base); - base = assertWebResponsesInRange(base, 1, 20); - - assertNoErrors(); - } - - /** - * Recursive discovery of path depth - * @param path path to measure. - * @return depth, where "/" == 0. - */ - int depth(Path path) { - if (path.isRoot()) { - return 0; - } else { - return 1 + depth(path.getParent()); - } - } - - @Test - public void testClientErrorMetrics() throws Exception { - String fileName = "metricsTestFile_ClientError"; - Path filePath = new Path("/"+fileName); - final int FILE_SIZE = 100; - OutputStream outputStream = null; - String leaseID = null; - try { - // Create a file - outputStream = getFileSystem().create(filePath); - leaseID = getTestAccount().acquireShortLease(fileName); - try { - outputStream.write(new byte[FILE_SIZE]); - outputStream.close(); - assertTrue(false, "Should've thrown"); - } catch (AzureException ex) { - assertTrue( - ex.getMessage().contains("lease"), "Unexpected exception: " + ex); - } - assertEquals(1, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_CLIENT_ERRORS)); - assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_SERVER_ERRORS)); - } finally { - if(leaseID != null){ - getTestAccount().releaseLease(leaseID, fileName); - } - IOUtils.closeStream(outputStream); - } - } - - private void logOpResponseCount(String opName, long base) { - LOG.info("{} took {} web responses to complete.", - opName, getCurrentWebResponses() - base); - } - - /** - * Gets (and asserts) the value of the wasb_web_responses counter just - * after the creation of the file system object. - */ - private long getBaseWebResponses() { - // The number of requests should start at 0 - return assertWebResponsesEquals(0, 0); - } - - /** - * Gets the current value of the wasb_web_responses counter. - */ - private long getCurrentWebResponses() { - return AzureMetricsTestUtil.getCurrentWebResponses(getInstrumentation()); - } - - /** - * Checks that the wasb_web_responses counter is at the given value. - * @param base The base value (before the operation of interest). - * @param expected The expected value for the operation of interest. - * @return The new base value now. - */ - private long assertWebResponsesEquals(long base, long expected) { - assertCounter(WASB_WEB_RESPONSES, base + expected, getMyMetrics()); - return base + expected; - } - - private void assertNoErrors() { - assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_CLIENT_ERRORS)); - assertEquals(0, AzureMetricsTestUtil.getLongCounterValue(getInstrumentation(), WASB_SERVER_ERRORS)); - } - - /** - * Checks that the wasb_web_responses counter is in the given range. - * @param base The base value (before the operation of interest). - * @param inclusiveLowerLimit The lower limit for what it should increase by. - * @param inclusiveUpperLimit The upper limit for what it should increase by. - * @return The new base value now. - */ - private long assertWebResponsesInRange(long base, - long inclusiveLowerLimit, - long inclusiveUpperLimit) { - long currentResponses = getCurrentWebResponses(); - long justOperation = currentResponses - base; - assertTrue(justOperation >= inclusiveLowerLimit && justOperation <= inclusiveUpperLimit, - String.format("Web responses expected in range [%d, %d], but was %d.", - inclusiveLowerLimit, inclusiveUpperLimit, justOperation)); - return currentResponses; - } - - /** - * Gets the metrics for the file system object. - * @return The metrics record. - */ - private MetricsRecordBuilder getMyMetrics() { - return getMetrics(getInstrumentation()); - } - - private AzureFileSystemInstrumentation getInstrumentation() { - return getFileSystem().getInstrumentation(); - } - - /** - * A matcher class for asserting that we got a tag with a given - * value. - */ - private static class TagMatcher extends TagExistsMatcher { - private final String tagValue; - - public TagMatcher(String tagName, String tagValue) { - super(tagName); - this.tagValue = tagValue; - } - - @Override - public boolean matches(MetricsTag toMatch) { - return toMatch.value().equals(tagValue); - } - - @Override - public String toString() { - return super.toString() + " with value " + tagValue; - } - } - - /** - * A matcher class for asserting that we got a tag with any value. - */ - private static class TagExistsMatcher - implements ArgumentMatcher { - private final String tagName; - - public TagExistsMatcher(String tagName) { - this.tagName = tagName; - } - - @Override - public boolean matches(MetricsTag asTag) { - return asTag.name().equals(tagName); - } - - @Override - public String toString() { - return "Has tag " + tagName; - } - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestBandwidthGaugeUpdater.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestBandwidthGaugeUpdater.java deleted file mode 100644 index 2ecc2592b47be..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestBandwidthGaugeUpdater.java +++ /dev/null @@ -1,78 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import java.util.Date; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.util.concurrent.SubjectInheritingThread; -import org.junit.jupiter.api.Test; - -public class TestBandwidthGaugeUpdater { - @Test - public void testSingleThreaded() throws Exception { - AzureFileSystemInstrumentation instrumentation = - new AzureFileSystemInstrumentation(new Configuration()); - BandwidthGaugeUpdater updater = - new BandwidthGaugeUpdater(instrumentation, 1000, true); - updater.triggerUpdate(true); - assertEquals(0, AzureMetricsTestUtil.getCurrentBytesWritten(instrumentation)); - updater.blockUploaded(new Date(), new Date(), 150); - updater.triggerUpdate(true); - assertEquals(150, AzureMetricsTestUtil.getCurrentBytesWritten(instrumentation)); - updater.blockUploaded(new Date(new Date().getTime() - 10000), - new Date(), 200); - updater.triggerUpdate(true); - long currentBytes = AzureMetricsTestUtil.getCurrentBytesWritten(instrumentation); - assertTrue(currentBytes > 18 && currentBytes < 22, - "We expect around (200/10 = 20) bytes written as the gauge value." + - "Got " + currentBytes); - updater.close(); - } - - @Test - public void testMultiThreaded() throws Exception { - final AzureFileSystemInstrumentation instrumentation = - new AzureFileSystemInstrumentation(new Configuration()); - final BandwidthGaugeUpdater updater = - new BandwidthGaugeUpdater(instrumentation, 1000, true); - Thread[] threads = new Thread[10]; - for (int i = 0; i < threads.length; i++) { - threads[i] = new SubjectInheritingThread(new Runnable() { - @Override - public void run() { - updater.blockDownloaded(new Date(), new Date(), 10); - updater.blockDownloaded(new Date(0), new Date(0), 10); - } - }); - } - for (Thread t : threads) { - t.start(); - } - for (Thread t : threads) { - t.join(); - } - updater.triggerUpdate(false); - assertEquals(10 * threads.length, AzureMetricsTestUtil.getCurrentBytesRead(instrumentation)); - updater.close(); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestNativeAzureFileSystemMetricsSystem.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestNativeAzureFileSystemMetricsSystem.java deleted file mode 100644 index db46c9579af79..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestNativeAzureFileSystemMetricsSystem.java +++ /dev/null @@ -1,100 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.junit.jupiter.api.Assertions.assertTrue; - -import org.apache.hadoop.fs.*; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem; -import org.junit.jupiter.api.Test; - -/** - * Tests that the WASB-specific metrics system is working correctly. - */ -public class TestNativeAzureFileSystemMetricsSystem { - private static final String WASB_FILES_CREATED = "wasb_files_created"; - - private static int getFilesCreated(AzureBlobStorageTestAccount testAccount) { - return testAccount.getLatestMetricValue(WASB_FILES_CREATED, 0).intValue(); - } - - /** - * Tests that when we have multiple file systems created/destroyed - * metrics from each are published correctly. - * @throws Exception on a failure - */ - @Test - public void testMetricsAcrossFileSystems() - throws Exception { - AzureBlobStorageTestAccount a1, a2, a3; - - a1 = AzureBlobStorageTestAccount.createMock(); - assertFilesCreated(a1, "a1", 0); - a2 = AzureBlobStorageTestAccount.createMock(); - assertFilesCreated(a2, "a2", 0); - a1.getFileSystem().create(new Path("/foo")).close(); - a1.getFileSystem().create(new Path("/bar")).close(); - a2.getFileSystem().create(new Path("/baz")).close(); - assertFilesCreated(a1, "a1", 0); - assertFilesCreated(a2, "a2", 0); - a1.closeFileSystem(); // Causes the file system to close, which publishes metrics - a2.closeFileSystem(); - - assertFilesCreated(a1, "a1", 2); - assertFilesCreated(a2, "a2", 1); - a3 = AzureBlobStorageTestAccount.createMock(); - assertFilesCreated(a3, "a3", 0); - a3.closeFileSystem(); - assertFilesCreated(a3, "a3", 0); - } - - /** - * Assert that a specific number of files were created. - * @param account account to examine - * @param name account name (for exception text) - * @param expected expected value - */ - private void assertFilesCreated(AzureBlobStorageTestAccount account, - String name, int expected) { - assertEquals(expected, getFilesCreated(account), - "Files created in account " + name); - } - - @Test - public void testMetricsSourceNames() { - String name1 = NativeAzureFileSystem.newMetricsSourceName(); - String name2 = NativeAzureFileSystem.newMetricsSourceName(); - assertTrue(name1.startsWith("AzureFileSystemMetrics")); - assertTrue(name2.startsWith("AzureFileSystemMetrics")); - assertTrue(!name1.equals(name2)); - } - - @Test - public void testSkipMetricsCollection() throws Exception { - AzureBlobStorageTestAccount a; - a = AzureBlobStorageTestAccount.createMock(); - a.getFileSystem().getConf().setBoolean( - NativeAzureFileSystem.SKIP_AZURE_METRICS_PROPERTY_NAME, true); - a.getFileSystem().create(new Path("/foo")).close(); - a.closeFileSystem(); // Causes the file system to close, which publishes metrics - assertFilesCreated(a, "a", 0); - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestRollingWindowAverage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestRollingWindowAverage.java deleted file mode 100644 index 2f7e3ea7f62fe..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/metrics/TestRollingWindowAverage.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.azure.metrics; - -import static org.junit.jupiter.api.Assertions.assertEquals; -import org.junit.jupiter.api.Test; - -public class TestRollingWindowAverage { - /** - * Tests the basic functionality of the class. - */ - @Test - public void testBasicFunctionality() throws Exception { - RollingWindowAverage average = new RollingWindowAverage(100); - assertEquals(0, average.getCurrentAverage()); // Nothing there yet. - average.addPoint(5); - assertEquals(5, average.getCurrentAverage()); // One point in there. - Thread.sleep(50); - average.addPoint(15); - assertEquals(10, average.getCurrentAverage()); // Two points in there. - Thread.sleep(60); - assertEquals(15, average.getCurrentAverage()); // One point retired. - Thread.sleep(50); - assertEquals(0, average.getCurrentAverage()); // Both points retired. - } -} diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java index c7762172f3e75..2d0f188585568 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsIntegrationTest.java @@ -50,9 +50,6 @@ import org.apache.hadoop.fs.azurebfs.services.AbfsOutputStream; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.fs.azurebfs.services.ITestAbfsClient; -import org.apache.hadoop.fs.azure.AzureNativeFileSystemStore; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem; -import org.apache.hadoop.fs.azure.metrics.AzureFileSystemInstrumentation; import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; import org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants; import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; @@ -64,11 +61,8 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.io.IOUtils; -import static org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount.WASB_ACCOUNT_NAME_DOMAIN_SUFFIX; -import static org.apache.hadoop.fs.azure.NativeAzureFileSystem.APPEND_SUPPORT_ENABLE_PROPERTY_NAME; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.COLON; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FORWARD_SLASH; -import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.TRUE; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.*; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_BLOB_DOMAIN_NAME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_DFS_DOMAIN_NAME; @@ -90,7 +84,6 @@ public abstract class AbstractAbfsIntegrationTest extends LoggerFactory.getLogger(AbstractAbfsIntegrationTest.class); private boolean isIPAddress; - private NativeAzureFileSystem wasb; private AzureBlobFileSystem abfs; private String abfsScheme; @@ -196,44 +189,11 @@ public TracingContext getTestTracingContext(AzureBlobFileSystem fs, public void setup() throws Exception { //Create filesystem first to make sure getWasbFileSystem() can return an existing filesystem. createFileSystem(); - - // Only live account without namespace support can run ABFS&WASB - // compatibility tests - if (!isIPAddress && (abfsConfig.getAuthType(accountName) != AuthType.SAS) - && (abfsConfig.getAuthType(accountName) - != AuthType.UserboundSASWithOAuth) - && !abfs.getIsNamespaceEnabled(getTestTracingContext( - getFileSystem(), false))) { - final URI wasbUri = new URI( - abfsUrlToWasbUrl(getTestUrl(), abfsConfig.isHttpsAlwaysUsed())); - final AzureNativeFileSystemStore azureNativeFileSystemStore = - new AzureNativeFileSystemStore(); - - // update configuration with wasb credentials - String accountNameWithoutDomain = accountName.split("\\.")[0]; - String wasbAccountName = accountNameWithoutDomain + WASB_ACCOUNT_NAME_DOMAIN_SUFFIX; - String keyProperty = FS_AZURE_ACCOUNT_KEY + "." + wasbAccountName; - if (rawConfig.get(keyProperty) == null) { - rawConfig.set(keyProperty, getAccountKey()); - } - rawConfig.set(APPEND_SUPPORT_ENABLE_PROPERTY_NAME, TRUE); - - azureNativeFileSystemStore.initialize( - wasbUri, - rawConfig, - new AzureFileSystemInstrumentation(rawConfig)); - - wasb = new NativeAzureFileSystem(azureNativeFileSystemStore); - wasb.initialize(wasbUri, rawConfig); - } } @AfterEach public void teardown() throws Exception { try { - IOUtils.closeStream(wasb); - wasb = null; - if (abfs == null) { return; } @@ -369,11 +329,6 @@ public AzureBlobFileSystem createFileSystem() throws IOException { return abfs; } - - protected NativeAzureFileSystem getWasbFileSystem() { - return wasb; - } - protected String getHostName() { // READ FROM ENDPOINT, THIS IS CALLED ONLY WHEN TESTING AGAINST DEV-FABRIC String endPoint = abfsConfig.get(AZURE_ABFS_ENDPOINT); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsScaleTest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsScaleTest.java index d8286ecac2e34..8e0827da62a85 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsScaleTest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/AbstractAbfsScaleTest.java @@ -23,9 +23,10 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.SCALE_TEST_OPERATION_COUNT; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.SCALE_TEST_TIMEOUT_MILLIS; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.assumeScaleTestsEnabled; /** * Integration tests at bigger scale; configurable as to @@ -35,6 +36,7 @@ public class AbstractAbfsScaleTest extends AbstractAbfsIntegrationTest { protected static final Logger LOG = LoggerFactory.getLogger(AbstractAbfsScaleTest.class); + private static final long DEFAULT_OPERATION_COUNT = 2005; public AbstractAbfsScaleTest() throws Exception { super(); @@ -42,7 +44,7 @@ public AbstractAbfsScaleTest() throws Exception { @Override protected int getTestTimeoutMillis() { - return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + return SCALE_TEST_TIMEOUT_MILLIS; } @BeforeEach @@ -55,7 +57,7 @@ public void setup() throws Exception { } protected long getOperationCount() { - return getConfiguration().getLong(AzureTestConstants.KEY_OPERATION_COUNT, - AzureTestConstants.DEFAULT_OPERATION_COUNT); + return getConfiguration().getLong(SCALE_TEST_OPERATION_COUNT, + DEFAULT_OPERATION_COUNT); } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java index 0c7cc0d9a739b..085e63bc53887 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAbfsHugeFiles.java @@ -34,10 +34,9 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.store.DataBlocks; -import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.AZURE_SCALE_HUGE_FILE_UPLOAD; -import static org.apache.hadoop.fs.azure.integration.AzureTestConstants.AZURE_SCALE_HUGE_FILE_UPLOAD_DEFAULT; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assume; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.getTestPropertyInt; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_SCALE_TEST_HUGE_UPLOAD; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.assume; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.getTestPropertyInt; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.DATA_BLOCKS_BUFFER; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemConfigurations.DEFAULT_WRITE_BUFFER_SIZE; @@ -55,8 +54,9 @@ public class ITestAbfsHugeFiles extends AbstractAbfsScaleTest { // Set the HUGE_FILE. static { + int defaultValue = 2 * DEFAULT_WRITE_BUFFER_SIZE; HUGE_FILE = getTestPropertyInt(new Configuration(), - AZURE_SCALE_HUGE_FILE_UPLOAD, AZURE_SCALE_HUGE_FILE_UPLOAD_DEFAULT); + FS_AZURE_SCALE_TEST_HUGE_UPLOAD, defaultValue); } // Writing block size to be used in this test. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java index 1887fa3537884..2161f2f5b3788 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestAzureBlobFileSystemRandomRead.java @@ -107,69 +107,6 @@ public void testBasicRead() throws Exception { } } - /** - * Validates the implementation of random read in ABFS - * @throws IOException - */ - @Test - public void testRandomRead() throws Exception { - assumeThat(getIsNamespaceEnabled(getFileSystem())) - .as("This test does not support namespace enabled account") - .isFalse(); - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Path testPath = path(TEST_FILE_PREFIX + "_testRandomRead"); - assumeHugeFileExists(testPath); - - try ( - FSDataInputStream inputStreamV1 - = this.getFileSystem().open(testPath); - FSDataInputStream inputStreamV2 - = this.getWasbFileSystem().open(testPath); - ) { - final int bufferSize = 4 * KILOBYTE; - byte[] bufferV1 = new byte[bufferSize]; - byte[] bufferV2 = new byte[bufferV1.length]; - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - inputStreamV1.seek(0); - inputStreamV2.seek(0); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - inputStreamV1.seek(SEEK_POSITION_ONE); - inputStreamV2.seek(SEEK_POSITION_ONE); - - inputStreamV1.seek(0); - inputStreamV2.seek(0); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - inputStreamV1.seek(SEEK_POSITION_TWO); - inputStreamV2.seek(SEEK_POSITION_TWO); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - inputStreamV1.seek(SEEK_POSITION_THREE); - inputStreamV2.seek(SEEK_POSITION_THREE); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - - inputStreamV1.seek(SEEK_POSITION_FOUR); - inputStreamV2.seek(SEEK_POSITION_FOUR); - - verifyConsistentReads(inputStreamV1, inputStreamV2, bufferV1, bufferV2); - } - } - /** * Validates the implementation of Seekable.seekToNewSource * @throws IOException @@ -429,43 +366,6 @@ public void testSequentialReadAfterReverseSeekPerformance() ratio)); } - @Test - @Disabled("HADOOP-16915") - public void testRandomReadPerformance() throws Exception { - assumeThat(getIsNamespaceEnabled(getFileSystem())) - .as("This test does not support namespace enabled account") - .isFalse(); - Path testPath = path(TEST_FILE_PREFIX + "_testRandomReadPerformance"); - assumeHugeFileExists(testPath); - - final AzureBlobFileSystem abFs = this.getFileSystem(); - final NativeAzureFileSystem wasbFs = this.getWasbFileSystem(); - - final int maxAttempts = 10; - final double maxAcceptableRatio = 1.025; - double v1ElapsedMs = 0, v2ElapsedMs = 0; - double ratio = Double.MAX_VALUE; - for (int i = 0; i < maxAttempts && ratio >= maxAcceptableRatio; i++) { - v1ElapsedMs = randomRead(1, testPath, wasbFs); - v2ElapsedMs = randomRead(2, testPath, abFs); - - ratio = v2ElapsedMs / v1ElapsedMs; - - LOG.info(String.format( - "v1ElapsedMs=%1$d, v2ElapsedMs=%2$d, ratio=%3$.2f", - (long) v1ElapsedMs, - (long) v2ElapsedMs, - ratio)); - } - assertTrue( - ratio < maxAcceptableRatio, String.format( - "Performance of version 2 is not acceptable: v1ElapsedMs=%1$d," - + " v2ElapsedMs=%2$d, ratio=%3$.2f", - (long) v1ElapsedMs, - (long) v2ElapsedMs, - ratio)); - } - /** * With this test we should see a full buffer read being triggered in case * alwaysReadBufferSize is on, else only the requested buffer size. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java deleted file mode 100644 index d1ab0b3a1f8c7..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/ITestWasbAbfsCompatibility.java +++ /dev/null @@ -1,2044 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.fs.azurebfs; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.StandardCharsets; -import java.util.EnumSet; -import java.util.UUID; - -import org.assertj.core.api.Assertions; -import org.junit.jupiter.api.Test; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileStatus; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.RemoteIterator; -import org.apache.hadoop.fs.XAttrSetFlag; -import org.apache.hadoop.fs.azure.NativeAzureFileSystem; -import org.apache.hadoop.fs.azurebfs.contracts.exceptions.AbfsRestOperationException; -import org.apache.hadoop.fs.contract.ContractTestUtils; - -import static java.net.HttpURLConnection.HTTP_CONFLICT; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION; -import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ENABLE_FULL_BLOB_CHECKSUM_VALIDATION; -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertDeleted; -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertIsDirectory; -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertMkdirs; -import static org.apache.hadoop.fs.contract.ContractTestUtils.assertPathExists; -import static org.assertj.core.api.Assumptions.assumeThat; - -/** - * Test compatibility between ABFS client and WASB client. - */ -public class ITestWasbAbfsCompatibility extends AbstractAbfsIntegrationTest { - private static final String WASB_TEST_CONTEXT = "wasb test file"; - private static final String ABFS_TEST_CONTEXT = "abfs test file"; - private static final String TEST_CONTEXT = "THIS IS FOR TEST"; - private static final String TEST_CONTEXT1 = "THIS IS FOR TEST1"; - private static final byte[] ATTRIBUTE_VALUE_1 = "one".getBytes( - StandardCharsets.UTF_8); - private static final byte[] ATTRIBUTE_VALUE_2 = "two".getBytes( - StandardCharsets.UTF_8); - private static final String ATTRIBUTE_NAME_1 = "user_someAttribute"; - private static final String ATTRIBUTE_NAME_2 = "user_someAttribute1"; - private static final EnumSet CREATE_FLAG = EnumSet.of( - XAttrSetFlag.CREATE); - private static final Logger LOG = - LoggerFactory.getLogger(ITestWasbAbfsCompatibility.class); - - public ITestWasbAbfsCompatibility() throws Exception { - // To ensure the wasb and abfs filesystem are initialized. - super.setup(); - assumeThat(isIPAddress()).as("Emulator is not supported").isFalse(); - assumeHnsDisabled(); - assumeBlobServiceType(); - } - - @Test - public void testListFileStatus() throws Exception { - // crate file using abfs - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - AzureBlobFileSystem fs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFiles = path("/testfiles"); - Path path1 = new Path(testFiles + "/~12/!008/3/abFsTestfile"); - try (FSDataOutputStream abfsStream = fs.create(path1, true)) { - abfsStream.write(ABFS_TEST_CONTEXT.getBytes()); - abfsStream.flush(); - abfsStream.hsync(); - } - - // create file using wasb - Path path2 = new Path(testFiles + "/~12/!008/3/nativeFsTestfile"); - LOG.info("{}", wasb.getUri()); - try (FSDataOutputStream nativeFsStream = wasb.create(path2, true)) { - nativeFsStream.write(WASB_TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // list file using abfs and wasb - FileStatus[] abfsFileStatus = fs.listStatus( - new Path(testFiles + "/~12/!008/3/")); - FileStatus[] nativeFsFileStatus = wasb.listStatus( - new Path(testFiles + "/~12/!008/3/")); - - assertEquals(2, abfsFileStatus.length); - assertEquals(2, nativeFsFileStatus.length); - } - - @Test - public void testReadFile() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - boolean[] createFileWithAbfs = new boolean[]{false, true, false, true}; - boolean[] readFileWithAbfs = new boolean[]{false, true, true, false}; - - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ENABLE_FULL_BLOB_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - for (int i = 0; i < 4; i++) { - Path path = new Path(testFile + "/~12/!008/testfile" + i); - final FileSystem createFs = createFileWithAbfs[i] ? abfs : wasb; - // Read - final FileSystem readFs = readFileWithAbfs[i] ? abfs : wasb; - // Write - try (FSDataOutputStream nativeFsStream = createFs.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(createFs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(readFs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + readFs); - } - - // Remove file - assertDeleted(readFs, path, true); - } - } - } - - /** - * Flow: Create and write a file using WASB, then read and append to it using ABFS. Finally, delete the file via ABFS after verifying content consistency. - * Expected: WASB successfully creates the file and writes content. ABFS reads, appends, and deletes the file without data loss or errors. - */ - @Test - public void testwriteFile() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(wasb, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - try (FSDataOutputStream abfsOutputStream = abfs.append(path)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - // Remove file - assertDeleted(abfs, path, true); - } - - /** - * Flow: Create and write a file using ABFS, append to the file using WASB, then write again using ABFS. - * Expected: File is created and written correctly by ABFS, appended by WASB, and final ABFS write reflects all updates without errors. - */ - - @Test - public void testwriteFile1() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - // Write - try (FSDataOutputStream nativeFsStream = abfs.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - try (FSDataOutputStream nativeFsStream = abfs.append(path)) { - nativeFsStream.write(TEST_CONTEXT1.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // Remove file - assertDeleted(abfs, path, true); - } - - /** - * Flow: Create the file using AzCopy, then append to the file using ABFS. - * Expected: ABFS append succeeds and final file reflects both AzCopy and appended data correctly. - */ - @Test - public void testazcopywasbcompatibility() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - AzureBlobFileSystem abfs = getFileSystem(); - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - createAzCopyFile(path); - - try (FSDataOutputStream nativeFsStream = abfs.append(path)) { - nativeFsStream.write(TEST_CONTEXT1.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // Remove file - assertDeleted(abfs, path, true); - } - - - @Test - public void testDir() throws Exception { - boolean[] createDirWithAbfs = new boolean[]{false, true, false, true}; - boolean[] readDirWithAbfs = new boolean[]{false, true, true, false}; - - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testDir = path("/testDir"); - for (int i = 0; i < 4; i++) { - Path path = new Path(testDir + "/t" + i); - //create - final FileSystem createFs = createDirWithAbfs[i] ? abfs : wasb; - assertTrue(createFs.mkdirs(path)); - //check - assertPathExists(createFs, "Created dir not found with " + createFs, - path); - //read - final FileSystem readFs = readDirWithAbfs[i] ? abfs : wasb; - assertPathExists(readFs, "Created dir not found with " + readFs, - path); - assertIsDirectory(readFs, path); - assertDeleted(readFs, path, true); - } - } - - - @Test - public void testUrlConversion() { - String abfsUrl - = "abfs://abcde-1111-1111-1111-1111@xxxx.dfs.xxx.xxx.xxxx.xxxx"; - String wabsUrl - = "wasb://abcde-1111-1111-1111-1111@xxxx.blob.xxx.xxx.xxxx.xxxx"; - assertEquals(abfsUrl, wasbUrlToAbfsUrl(wabsUrl)); - assertEquals(wabsUrl, abfsUrlToWasbUrl(abfsUrl, false)); - } - - @Test - public void testSetWorkingDirectory() throws Exception { - //create folders - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path d1 = path("/d1"); - Path d1d4 = new Path(d1 + "/d2/d3/d4"); - assertMkdirs(abfs, d1d4); - - //set working directory to path1 - Path path1 = new Path(d1 + "/d2"); - wasb.setWorkingDirectory(path1); - abfs.setWorkingDirectory(path1); - assertEquals(path1, wasb.getWorkingDirectory()); - assertEquals(path1, abfs.getWorkingDirectory()); - - //set working directory to path2 - Path path2 = new Path("d3/d4"); - wasb.setWorkingDirectory(path2); - abfs.setWorkingDirectory(path2); - - Path path3 = d1d4; - assertEquals(path3, wasb.getWorkingDirectory()); - assertEquals(path3, abfs.getWorkingDirectory()); - } - - // Scenario wise testing - - /** - * Scenario 1: Create and write a file using WASB, then read the file using ABFS. - * Expected Outcome: ABFS should correctly read the content written by WASB. - */ - @Test - public void testScenario1() throws Exception { - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // Check file status - ContractTestUtils.assertIsFile(wasb, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - - // Remove file - assertDeleted(abfs, path, true); - } - - /** - * Scenario 2: Create and write a file using WASB, read it using ABFS, then write to the same file using ABFS. - * Expected Outcome: ABFS should read the WASB-written content and successfully write new content to the same file. - */ - @Test - public void testScenario2() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // Check file status - ContractTestUtils.assertIsFile(wasb, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + wasb); - } - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.append(path)) { - abfsOutputStream.write(TEST_CONTEXT1.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Remove file - assertDeleted(abfs, path, true); - } - - /** - * Scenario 3: Create and write a file using ABFS, then read it using WASB. - * Expected Outcome: WASB should be able to read the content written by ABFS without any data mismatch or error. - */ - @Test - public void testScenario3() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ENABLE_FULL_BLOB_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(wasb.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + wasb); - } - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 4: Create a file using WASB, write to it using ABFS, and then write again using WASB. - * Expected Outcome: All writes should succeed and the final content should reflect changes from both ABFS and WASB. - */ - @Test - public void testScenario4() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - AzureBlobFileSystem abfs = getFileSystem(); - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.create(path, true); - try (FSDataOutputStream abfsOutputStream = abfs.append(path)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT1.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - // Remove file - assertDeleted(abfs, path, true); - } - - /** - * Scenario 5: Create a file using ABFS, write to it using WASB, and read it back using ABFS with checksum validation disabled. - * Expected Outcome: The read operation should succeed and reflect the data written via WASB despite checksum validation being off. - */ - @Test - public void testScenario5() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, false); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.create(path, true); - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 6: Create a file using ABFS, write to it using WASB, and read it via ABFS with checksum validation enabled. - * Expected Outcome: Read should fail due to checksum mismatch caused by WASB write, verifying integrity enforcement. - */ - @Test - public void testScenario6() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.create(path, true); - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 7: Create a file using WASB and then overwrite it using ABFS with overwrite=true. - * Expected Outcome: ABFS should successfully overwrite the existing file created by WASB without error. - */ - @Test - public void testScenario7() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - abfs.create(path, true); - FileStatus fileStatus = abfs.getFileStatus(path); - Assertions.assertThat(fileStatus.getLen()) - .as("Expected file length to be 0 after overwrite") - .isEqualTo(0L); - - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 8: Create a file using WASB and then attempt to create the same file using ABFS with overwrite=false. - * Expected Outcome: ABFS should fail to create the file due to the file already existing. - */ - @Test - public void testScenario8() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - try { - abfs.create(path, false); - } catch (IOException e) { - AbfsRestOperationException restEx = (AbfsRestOperationException) e.getCause(); - if (restEx != null) { - Assertions.assertThat(restEx.getStatusCode()) - .as("Expected HTTP status code 409 (Conflict) when file already exists") - .isEqualTo(HTTP_CONFLICT); - } - Assertions.assertThat(e.getMessage()) - .as("Expected error message to contain 'Exists'") - .containsIgnoringCase("Exists"); - } - - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 9: Create a file using ABFS and then attempt to create the same file using WASB with overwrite=true. - * Expected Outcome: WASB should successfully overwrite the existing file. - */ - @Test - public void testScenario9() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - wasb.create(path, true); - FileStatus fileStatus = abfs.getFileStatus(path); - Assertions.assertThat(fileStatus.getLen()) - .as("Expected file length to be 0 after overwrite") - .isEqualTo(0L); - - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 10: Create a file using ABFS and then attempt to create the same file using WASB with overwrite=false. - * Expected Outcome: WASB should fail to create the file as it already exists. The exception should indicate - * an "AlreadyExists" error with HTTP status code 409 (Conflict). - */ - @Test - public void testScenario10() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - try { - wasb.create(path, false); - } catch (IOException e) { - AbfsRestOperationException restEx - = (AbfsRestOperationException) e.getCause(); - if (restEx != null) { - Assertions.assertThat(restEx.getStatusCode()) - .as("Expected HTTP status code 409 (Conflict) when file already exists") - .isEqualTo(HTTP_CONFLICT); - } - Assertions.assertThat(e.getMessage()) - .as("Expected error message to contain 'exists'") - .contains("exists"); - } - // Remove file - assertDeleted(abfs, path, true); - } - } - - /** - * Scenario 11: Create a file using ABFS, write data to it using WASB, and then delete the file using ABFS. - * Expected Outcome: File should be created via ABFS and writable by WASB. - * ABFS delete should succeed, and the file should no longer exist. - */ - @Test - public void testScenario11() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.create(path, true); - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - abfs.delete(path, true); - } - } - - /** - * Scenario 12: Create and write a file using ABFS, and then delete the same file using WASB. - * Expected Outcome: File should be created and written successfully via ABFS. - * WASB should be able to delete the file without errors. - */ - @Test - public void testScenario12() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - wasb.delete(path, true); - } - } - - /** - * Scenario 13: Create a file using ABFS, write data to it using WASB, and then read the file using WASB. - * Expected Outcome: The read operation via WASB should return the correct content written via WASB. - */ - @Test - public void testScenario13() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.create(path, true); - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(wasb.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + wasb); - } - abfs.delete(path, true); - } - } - - /** - * Scenario 14: Create a file using ABFS, write data to it using WASB, and delete the file using WASB. - * Expected Outcome: Write via WASB should succeed and data should be persisted; delete via WASB should succeed without errors. - */ - @Test - public void testScenario14() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.create(path, true); - try (FSDataOutputStream nativeFsStream = wasb.append(path)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(wasb.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + wasb); - } - wasb.delete(path, true); - } - } - - /** - * Scenario 15: Create and write a file using WASB, then delete the file using ABFS. - * Expected Outcome: Write via WASB should succeed and data should be persisted; delete via ABFS should succeed without errors. - */ - @Test - public void testScenario15() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(wasb.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + wasb); - } - abfs.delete(path, true); - } - } - - /** - * Scenario 16: Create a file using WASB, write data to it using ABFS, and then delete the file using WASB. - * Expected Outcome: Write via ABFS should succeed and persist data; delete via WASB should succeed without errors. - */ - @Test - public void testScenario16() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.create(path, true); - try (FSDataOutputStream abfsOutputStream = abfs.append(path)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, path); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(path)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - wasb.delete(path, true); - } - } - - /** - * Scenario 17: Create a file using ABFS, set attribute (xAttr), and retrieve it using ABFS. - * Expected Outcome: setXAttr and getXAttr operations via ABFS should succeed and return the correct value. - */ - @Test - public void testScenario17() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = abfs.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - abfs.delete(path, true); - } - } - - /** - * Scenario 18: Create a file using WASB, set an attribute (xAttr), and retrieve it using WASB. - * Expected Outcome: setXAttr and getXAttr operations via WASB should succeed and return the correct value. - */ - @Test - public void testScenario18() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.delete(path, true); - } - } - - /** - * Scenario 19: Create a file using WASB, set an attribute using WASB, and retrieve it using ABFS. - * Expected Outcome: Attribute set via WASB should be retrievable via ABFS and should match the original value. - */ - @Test - public void testScenario19() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.delete(path, true); - } - } - - /** - * Scenario 20: Create a file using WASB, set an attribute via WASB, retrieve the attribute via ABFS, - * and then create the file again using ABFS with overwrite=true. - * Expected Outcome: Attribute set via WASB should be retrievable via ABFS before overwrite. - * After overwrite via ABFS, the attribute should no longer exist. - */ - @Test - public void testScenario20() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - abfs.create(path, true); - FileStatus fileStatus = abfs.getFileStatus(path); - Assertions.assertThat(fileStatus.getLen()) - .as("Expected file length to be 0 after overwrite") - .isEqualTo(0L); - wasb.delete(path, true); - } - } - - /** - * Scenario 21: Create a file using ABFS, set an attribute via ABFS, retrieve the attribute via WASB, - * and then create the file again using WASB with overwrite=true. - * Expected Outcome: Attribute set via ABFS should be retrievable via WASB before overwrite. - * After overwrite via WASB, the attribute should no longer exist. - */ - @Test - public void testScenario21() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.create(path, true); - FileStatus fileStatus = abfs.getFileStatus(path); - Assertions.assertThat(fileStatus.getLen()) - .as("Expected file length to be 0 after overwrite") - .isEqualTo(0L); - wasb.delete(path, true); - } - } - - /** - * Scenario 22: Create a file using WASB, set an attribute via ABFS, - * retrieve the attribute via WASB, and then create the file again using WASB with overwrite=true. - * Expected Outcome: Attribute set via ABFS should be retrievable via WASB before overwrite. - * After overwrite via WASB, the attribute should be removed. - */ - @Test - public void testScenario22() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.create(path, true); - FileStatus fileStatus = abfs.getFileStatus(path); - Assertions.assertThat(fileStatus.getLen()) - .as("Expected file length to be 0 after overwrite") - .isEqualTo(0L); - wasb.delete(path, true); - } - } - - /** - * Scenario 23: Create a file using WASB, set an attribute via ABFS, - * then set another attribute via WASB, and retrieve attributes via ABFS. - * Expected Outcome: Both attributes should be retrievable via ABFS, - * confirming that updates from both ABFS and WASB are visible. - */ - @Test - public void testScenario23() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2, CREATE_FLAG); - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.delete(path, true); - } - } - - /** - * Scenario 24: Create a file using ABFS, then set an attribute via WASB, - * and retrieve the attribute via ABFS. - * Expected Outcome: Attribute set via WASB should be retrievable via ABFS, - * verifying cross-compatibility of attribute operations. - */ - @Test - public void testScenario24() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2, CREATE_FLAG); - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.delete(path, true); - } - } - - /** - * Scenario 25: Create a file using WASB, then set and retrieve an attribute via ABFS, - * and finally delete the file using WASB. - * Expected Outcome: Attribute set via ABFS should be retrievable via ABFS, - * and file deletion via WASB should succeed. - */ - @Test - public void testScenario25() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - abfs.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2, CREATE_FLAG); - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = abfs.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.delete(path, true); - } - } - - /** - * Scenario 26: Create a file using ABFS, then set and retrieve an attribute via WASB, - * and finally delete the file using WASB. - * Expected Outcome: Attribute set via WASB should be retrievable via WASB, - * and file deletion via WASB should succeed. - */ - @Test - public void testScenario26() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(path, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = abfs.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2, CREATE_FLAG); - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.delete(path, true); - } - } - - /** - * Scenario 27: Create and write a file using ABFS, then rename the file using WASB. - * Expected Outcome: WASB should successfully rename the file created and written by ABFS. - */ - @Test - public void testScenario27() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream abfsOutputStream = abfs.create(testPath1, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME FILE --- - boolean renamed = wasb.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - - // --- LIST FILES IN DIRECTORY --- - Path parentDir = new Path(testFile + "/~12/!008"); - int noOfFiles = listAllFilesAndDirs(wasb, parentDir); - Assertions.assertThat(noOfFiles) - .as("Expected only 1 file or directory under path: %s", parentDir) - .isEqualTo(1); - wasb.delete(testPath2, true); - } - } - - /** - * Scenario 28: Create and write a file using WASB, rename the file using ABFS, and list files using ABFS. - * Expected Outcome: ABFS should successfully rename the file created by WASB, and the renamed file should appear in listings. - */ - @Test - public void testScenario28() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path( - testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME FILE --- - boolean renamed = abfs.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - - // --- LIST FILES IN DIRECTORY --- - Path parentDir = new Path(testFile + "/~12/!008"); - int noOfFiles = listAllFilesAndDirs(abfs, parentDir); - Assertions.assertThat(noOfFiles) - .as("Expected only 1 file or directory under path: %s", parentDir) - .isEqualTo(1); - wasb.delete(testPath2, true); - } - } - - /** - * Scenario 29: Create a file using WASB, write data to it via ABFS, rename the file using ABFS, and list files using ABFS. - * Expected Outcome: ABFS should successfully rename the file and list the renamed file accurately. - */ - @Test - public void testScenario29() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.create(testPath1, true); - try (FSDataOutputStream abfsOutputStream = abfs.append(testPath1)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME FILE --- - boolean renamed = abfs.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - - // --- LIST FILES IN DIRECTORY --- - Path parentDir = new Path(testFile + "/~12/!008"); - int noOfFiles = listAllFilesAndDirs(abfs, parentDir); - Assertions.assertThat(noOfFiles) - .as("Expected only 1 file or directory under path: %s", parentDir) - .isEqualTo(1); - wasb.delete(testPath2, true); - } - } - - /** - * Scenario 30: Create and write a file using WASB, rename it via WASB, rename again via ABFS, and list files using ABFS. - * Expected Outcome: Both renames should succeed, and ABFS listing should reflect the latest filename. - */ - @Test - public void testScenario30() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME FILE --- - boolean renamed = wasb.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - - // --- RENAME FILE --- - boolean renamed1 = abfs.rename(testPath2, testPath3); - Assertions.assertThat(renamed1) - .as("Rename failed") - .isTrue(); - - // --- LIST FILES IN DIRECTORY --- - Path parentDir = new Path(testFile + "/~12/!008"); - int noOfFiles = listAllFilesAndDirs(abfs, parentDir); - Assertions.assertThat(noOfFiles) - .as("Expected only 1 file or directory under path: %s", parentDir) - .isEqualTo(1); - wasb.delete(testPath3, true); - } - } - - /** - * Scenario 31: Create and write a file using WASB, delete it via WASB, then attempt to rename the deleted file via ABFS. - * Expected Outcome: Rename should fail since the file was deleted, ensuring proper error handling. - */ - @Test - public void testScenario31() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - wasb.delete(testPath1, true); - - // --- RENAME FILE --- - boolean renamed = abfs.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename operation should have failed but returned true") - .isFalse(); - } - } - - /** - * Scenario 32: Create a directory and file using WASB, rename the directory using ABFS, and list files using ABFS. - * Expected Outcome: ABFS should successfully rename the directory, and listing should reflect the updated directory name. - */ - @Test - public void testScenario32() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testFile1 = path("/testReadFile1"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.mkdirs(testFile); - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - wasb.create(testPath2, true); - wasb.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME DIR --- - boolean renamed = abfs.rename(testFile, testFile1); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - // --- LIST FILES IN DIRECTORY --- - int listResult = listAllFilesAndDirs(abfs, testFile1); - Assertions.assertThat(listResult) - .as("Expected only 5 entries under path: %s", testFile1) - .isEqualTo(5); - } - } - - /** - * Scenario 33: Create a directory and file using ABFS, rename the directory using WASB, and list files using WASB. - * Expected Outcome: WASB should successfully rename the directory, and listing should reflect the updated directory name. - */ - @Test - public void testScenario33() throws Exception { - assumeThat(isAppendBlobEnabled()).as("Not valid for APPEND BLOB").isFalse(); - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testFile1 = path("/testReadFile1"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.mkdirs(testFile); - try (FSDataOutputStream abfsOutputStream = abfs.create(testPath1, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - abfs.create(testPath2, true); - abfs.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME DIR --- - boolean renamed = wasb.rename(testFile, testFile1); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - // --- LIST FILES IN DIRECTORY --- - int listResult = listAllFilesAndDirs(wasb, testFile1); - Assertions.assertThat(listResult) - .as("Expected only 5 entries under path: %s", testFile1) - .isEqualTo(5); - } - } - - /** - * Scenario 34: Create a directory via ABFS, rename a file inside the directory using WASB, and list files via ABFS. - * Expected Outcome: WASB should successfully rename the file, and ABFS listing should reflect the updated filename. - */ - @Test - public void testScenario34() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.mkdirs(testFile); - try (FSDataOutputStream abfsOutputStream = abfs.create(testPath1, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - abfs.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME DIR --- - boolean renamed = wasb.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - // --- LIST FILES IN DIRECTORY --- - int listResult = listAllFilesAndDirs(abfs, testFile); - Assertions.assertThat(listResult) - .as("Expected only 4 entries under path: %s", testFile) - .isEqualTo(4); - } - } - - /** - * Scenario 35: Create a directory via WASB, rename a file inside the directory using ABFS, and list files via WASB. - * Expected Outcome: ABFS should successfully rename the file, and WASB listing should reflect the updated filename. - */ - @Test - public void testScenario35() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.mkdirs(testFile); - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - wasb.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME DIR --- - boolean renamed = abfs.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - // --- LIST FILES IN DIRECTORY --- - int listResult = listAllFilesAndDirs(wasb, testFile); - Assertions.assertThat(listResult) - .as("Expected only 4 entries under path: %s", testFile) - .isEqualTo(4); - } - } - - /** - * Scenario 36: Create a file via WASB, attempt to rename it to an existing filename using ABFS, and list files via WASB. - * Expected Outcome: Rename should fail due to existing target name, and WASB listing should remain unchanged. - */ - - @Test - public void testScenario36() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.mkdirs(testFile); - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - wasb.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME DIR --- - boolean renamed = abfs.rename(testFile, testFile); - Assertions.assertThat(renamed) - .as("Rename operation should have failed but returned true") - .isFalse(); - } - } - - /** - * Scenario 37: Attempt to rename a non-existent file using WASB. - * Expected Outcome: Rename operation should fail with an appropriate error indicating the file does not exist. - */ - @Test - public void testScenario37() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - abfs.mkdirs(testFile); - try (FSDataOutputStream abfsOutputStream = abfs.create(testPath1, true)) { - abfsOutputStream.write(TEST_CONTEXT.getBytes()); - abfsOutputStream.flush(); - abfsOutputStream.hsync(); - } - abfs.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME NON EXISTENT FILE --- - boolean renamed = wasb.rename(testPath2, testPath3); - Assertions.assertThat(renamed) - .as("Rename operation should have failed but returned true") - .isFalse(); - } - } - - /** - * Scenario 38: Create a file using WASB, set and get an attribute via WASB, then create the file again with overwrite=true using WASB. - * Expected Outcome: Attribute operations should succeed before overwrite, and after overwrite, the file should be replaced with no prior attributes. - */ - @Test - public void testScenario38() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - Path testFile = path("/testReadFile"); - Path path = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - try (FSDataOutputStream nativeFsStream = wasb.create(path, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - // --- VALIDATE FILE --- - FileStatus status = wasb.getFileStatus(path); - assertIsFile(path, status); - - // --- SET XATTR #1 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_1, ATTRIBUTE_VALUE_1); - byte[] readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - // --- SET XATTR #2 --- - wasb.setXAttr(path, ATTRIBUTE_NAME_2, ATTRIBUTE_VALUE_2); - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_2); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_2, "two"); - - // --- VERIFY XATTR #1 AGAIN --- - readValue = wasb.getXAttr(path, ATTRIBUTE_NAME_1); - ITestAzureBlobFileSystemAttributes.assertAttributeEqual(abfs, readValue, ATTRIBUTE_VALUE_1, "one"); - - wasb.create(path, true); - FileStatus fileStatus = abfs.getFileStatus(path); - Assertions.assertThat(fileStatus.getLen()) - .as("Expected file length to be 0 after overwrite") - .isEqualTo(0L); - wasb.delete(path, true); - } - } - - /** - * Scenario 39: Create and write a file using WASB, rename the file using WASB, and list files using WASB. - * Expected Outcome: WASB should successfully rename the file, and the renamed file should appear in the listing. - */ - @Test - public void testScenario39() throws Exception { - Configuration conf = getRawConfiguration(); - conf.setBoolean(FS_AZURE_ABFS_ENABLE_CHECKSUM_VALIDATION, true); - FileSystem fileSystem = FileSystem.newInstance(conf); - try (AzureBlobFileSystem abfs = (AzureBlobFileSystem) fileSystem) { - NativeAzureFileSystem wasb = getWasbFileSystem(); - - String testRunId = UUID.randomUUID().toString(); - Path baseDir = path("/testScenario39_" + testRunId); - Path testFile = new Path(baseDir, "testReadFile"); - Path testPath1 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath2 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - Path testPath3 = new Path(testFile + "/~12/!008/testfile_" + UUID.randomUUID()); - - // Write - wasb.mkdirs(testFile); - try (FSDataOutputStream nativeFsStream = wasb.create(testPath1, true)) { - nativeFsStream.write(TEST_CONTEXT.getBytes()); - nativeFsStream.flush(); - nativeFsStream.hsync(); - } - wasb.create(testPath3, true); - - // Check file status - ContractTestUtils.assertIsFile(abfs, testPath1); - - try (BufferedReader br = new BufferedReader( - new InputStreamReader(abfs.open(testPath1)))) { - String line = br.readLine(); - assertEquals(TEST_CONTEXT, line, "Wrong text from " + abfs); - } - // --- RENAME DIR --- - boolean renamed = wasb.rename(testPath1, testPath2); - Assertions.assertThat(renamed) - .as("Rename failed") - .isTrue(); - // --- LIST FILES IN DIRECTORY --- - int listResult = listAllFilesAndDirs(wasb, testFile); - Assertions.assertThat(listResult) - .as("Expected only 4 entries under path: %s", testFile) - .isEqualTo(4); - } - } - - /** - * Recursively counts all files and directories under the given path. - * - * @param fs The file system to use. - * @param path The starting path. - * @return Total number of files and directories. - * @throws IOException If an error occurs while accessing the file system. - */ - public static int listAllFilesAndDirs(FileSystem fs, Path path) throws IOException { - int count = 0; - RemoteIterator iter = fs.listStatusIterator(path); - - while (iter.hasNext()) { - FileStatus status = iter.next(); - count++; // Count this file or directory - - if (status.isDirectory()) { - count += listAllFilesAndDirs(fs, status.getPath()); // Recurse into directory - } - } - - return count; - } - - /** - * Checks that the given path is a regular file (not a directory or symlink). - * - * @param path The file path. - * @param status The file status. - * @throws AssertionError If the path is a directory or a symlink. - */ - private static void assertIsFile(Path path, FileStatus status) { - Assertions.assertThat(status.isDirectory()) - .as("Expected a regular file, but was a directory: %s %s", path, status) - .isFalse(); - - Assertions.assertThat(status.isSymlink()) - .as("Expected a regular file, but was a symlink: %s %s", path, status) - .isFalse(); - } -} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java index d906333b0b27b..60cdf1d78dde5 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/AbstractAbfsClusterITest.java @@ -27,7 +27,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes; import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; @@ -40,7 +39,8 @@ import org.apache.hadoop.util.DurationInfo; import static java.util.Objects.requireNonNull; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.SCALE_TEST_TIMEOUT_MILLIS; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.assumeScaleTestsEnabled; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_AUTH_TYPE_PROPERTY_NAME; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_OVERRIDE_OWNER_SP; import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_OVERRIDE_OWNER_SP_LIST; @@ -76,7 +76,7 @@ protected AbstractAbfsClusterITest() throws Exception { @Override protected int getTestTimeoutMillis() { - return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + return SCALE_TEST_TIMEOUT_MILLIS; } @BeforeEach diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java index 2979afc79e5ce..960b83826d510 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/commit/ITestAbfsLoadManifestsStage.java @@ -19,7 +19,6 @@ package org.apache.hadoop.fs.azurebfs.commit; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azurebfs.contract.ABFSContractTestBinding; import org.apache.hadoop.fs.azurebfs.contract.AbfsFileSystemContract; import org.apache.hadoop.fs.contract.AbstractFSContract; @@ -27,6 +26,8 @@ import org.apache.hadoop.mapreduce.lib.output.committer.manifest.TestLoadManifestsStage; import org.junit.jupiter.api.BeforeEach; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.SCALE_TEST_TIMEOUT_MILLIS; + /** * ABFS storage test of saving and loading a large number * of manifests. @@ -58,7 +59,7 @@ protected AbstractFSContract createContract(final Configuration conf) { @Override protected int getTestTimeoutMillis() { - return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + return SCALE_TEST_TIMEOUT_MILLIS; } /** diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java index d82a4d2879b93..e46140d401801 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/constants/TestConfigurationKeys.java @@ -32,6 +32,9 @@ public final class TestConfigurationKeys { public static final String FS_AZURE_TEST_NAMESPACE_ENABLED_ACCOUNT = "fs.azure.test.namespace.enabled"; public static final String FS_AZURE_TEST_APPENDBLOB_ENABLED = "fs.azure.test.appendblob.enabled"; public static final String FS_AZURE_TEST_CPK_ENABLED = "fs.azure.test.cpk.enabled"; + public static final String FS_AZURE_SCALE_TEST_HUGE_UPLOAD = "fs.azure.scale.test.huge.upload"; + public static final String FS_AZURE_SCALE_TEST_ENABLED = "fs.azure.scale.test.enabled"; + public static final String SCALE_TEST_OPERATION_COUNT = "scale.test.operation.count"; public static final String FS_AZURE_BLOB_DATA_CONTRIBUTOR_CLIENT_ID = "fs.azure.account.oauth2.contributor.client.id"; public static final String FS_AZURE_BLOB_DATA_CONTRIBUTOR_CLIENT_SECRET = "fs.azure.account.oauth2.contributor.client.secret"; diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java index b02c99af2b799..013366fa75757 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java @@ -19,12 +19,12 @@ package org.apache.hadoop.fs.azurebfs.contract; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.azure.integration.AzureTestConstants; import org.apache.hadoop.fs.azurebfs.services.AuthType; import org.apache.hadoop.tools.contract.AbstractContractDistCpTest; import org.junit.jupiter.api.BeforeEach; -import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.SCALE_TEST_TIMEOUT_MILLIS; +import static org.apache.hadoop.fs.azurebfs.utils.AbfsTestUtils.assumeScaleTestsEnabled; import static org.assertj.core.api.Assumptions.assumeThat; /** @@ -35,7 +35,7 @@ public class ITestAbfsFileSystemContractDistCp extends AbstractContractDistCpTes @Override protected int getTestTimeoutMillis() { - return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS; + return SCALE_TEST_TIMEOUT_MILLIS; } public ITestAbfsFileSystemContractDistCp() throws Exception { diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AbfsTestUtils.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AbfsTestUtils.java index 1752dafd8d950..c6de9476e6ed4 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AbfsTestUtils.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/utils/AbfsTestUtils.java @@ -24,90 +24,53 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; -import com.microsoft.azure.storage.CloudStorageAccount; -import com.microsoft.azure.storage.blob.CloudBlobClient; -import com.microsoft.azure.storage.blob.CloudBlobContainer; +import org.opentest4j.TestAbortedException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.azure.AzureBlobStorageTestAccount; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; import org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem; -import org.apache.hadoop.fs.azurebfs.services.AuthType; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.FILE; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_SCHEME; import static org.apache.hadoop.fs.azurebfs.constants.FileSystemUriSchemes.ABFS_SECURE_SCHEME; -import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.TEST_CONTAINER_PREFIX; +import static org.apache.hadoop.fs.azurebfs.constants.TestConfigurationKeys.FS_AZURE_SCALE_TEST_ENABLED; import static org.assertj.core.api.Assumptions.assumeThat; /** * Some Utils for ABFS tests. */ public final class AbfsTestUtils extends AbstractAbfsIntegrationTest { - private static final Logger LOG = - LoggerFactory.getLogger(AbfsTestUtils.class); + + private static final Logger LOG = LoggerFactory.getLogger(AbfsTestUtils.class); private static final int TOTAL_THREADS_IN_POOL = 5; + private static final String UNSET_PROPERTY = "unset"; + private static final int SCALE_TEST_TIMEOUT_SECONDS = 30 * 60; + private static final boolean DEFAULT_SCALE_TESTS_ENABLED = false; + public static final int SCALE_TEST_TIMEOUT_MILLIS = SCALE_TEST_TIMEOUT_SECONDS + * 1000; public AbfsTestUtils() throws Exception { super(); } /** - * If unit tests were interrupted and crushed accidentally, the test containers won't be deleted. - * In that case, dev can use this tool to list and delete all test containers. - * By default, all test container used in E2E tests sharing same prefix: "abfs-testcontainer-" + * Turn off FS Caching: use if a filesystem with different options from + * the default is required. + * @param conf configuration to patch */ - - public void checkContainers() throws Throwable { - assumeThat(this.getAuthType()).isEqualTo(AuthType.SharedKey); - int count = 0; - CloudStorageAccount storageAccount = AzureBlobStorageTestAccount.createTestAccount(); - CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); - Iterable containers - = blobClient.listContainers(TEST_CONTAINER_PREFIX); - for (CloudBlobContainer container : containers) { - count++; - LOG.info("Container {}, URI {}", - container.getName(), - container.getUri()); - } - LOG.info("Found {} test containers", count); - } - - - public void deleteContainers() throws Throwable { - assumeThat(this.getAuthType()).isEqualTo(AuthType.SharedKey); - int count = 0; - CloudStorageAccount storageAccount = AzureBlobStorageTestAccount.createTestAccount(); - CloudBlobClient blobClient = storageAccount.createCloudBlobClient(); - Iterable containers - = blobClient.listContainers(TEST_CONTAINER_PREFIX); - for (CloudBlobContainer container : containers) { - LOG.info("Container {} URI {}", - container.getName(), - container.getUri()); - if (container.deleteIfExists()) { - count++; - } - } - LOG.info("Deleted {} test containers", count); + public static void disableFilesystemCaching(Configuration conf) { + // Disabling cache to make sure new configs are picked up. + conf.setBoolean(String.format("fs.%s.impl.disable.cache", ABFS_SCHEME), + true); + conf.setBoolean( + String.format("fs.%s.impl.disable.cache", ABFS_SECURE_SCHEME), true); } - /** - * Turn off FS Caching: use if a filesystem with different options from - * the default is required. - * @param conf configuration to patch - */ - public static void disableFilesystemCaching(Configuration conf) { - // Disabling cache to make sure new configs are picked up. - conf.setBoolean(String.format("fs.%s.impl.disable.cache", ABFS_SCHEME), true); - conf.setBoolean(String.format("fs.%s.impl.disable.cache", ABFS_SECURE_SCHEME), true); - } - /** * Helper method to create files in the given directory. * @@ -116,7 +79,9 @@ public static void disableFilesystemCaching(Configuration conf) { * @param numFiles The number of files to create. * @throws ExecutionException, InterruptedException If an error occurs during file creation. */ - public static void createFiles(AzureBlobFileSystem fs, Path path, int numFiles) + public static void createFiles(AzureBlobFileSystem fs, + Path path, + int numFiles) throws ExecutionException, InterruptedException { ExecutorService executorService = Executors.newFixedThreadPool(TOTAL_THREADS_IN_POOL); @@ -132,4 +97,119 @@ public static void createFiles(AzureBlobFileSystem fs, Path path, int numFiles) } executorService.shutdown(); } + + /** + * Assume that a condition is met. If not: log at WARN and + * then throw an {@link TestAbortedException}. + * @param message message in an assumption + * @param condition condition to probe + */ + public static void assume(String message, boolean condition) { + if (!condition) { + LOG.warn(message); + } + assumeThat(condition).as(message).isTrue(); + } + + /** + * Get a string test property. + *
    + *
  1. Look up configuration value (which can pick up core-default.xml), + * using {@code defVal} as the default value (if conf != null). + *
  2. + *
  3. Fetch the system property.
  4. + *
  5. If the system property is not empty or "(unset)": + * it overrides the conf value. + *
  6. + *
+ * This puts the build properties in charge of everything. It's not a + * perfect design; having maven set properties based on a file, as ant let + * you do, is better for customization. + * + * As to why there's a special (unset) value, see + * @see + * Stack Overflow + * @param conf config: may be null + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + + public static String getTestProperty(Configuration conf, + String key, + String defVal) { + String confVal = conf != null + ? conf.getTrimmed(key, defVal) + : defVal; + String propval = System.getProperty(key); + return StringUtils.isNotEmpty(propval) && !UNSET_PROPERTY.equals(propval) + ? propval : confVal; + } + + /** + * Get a long test property. + *
    + *
  1. Look up configuration value (which can pick up core-default.xml), + * using {@code defVal} as the default value (if conf != null). + *
  2. + *
  3. Fetch the system property.
  4. + *
  5. If the system property is not empty or "(unset)": + * it overrides the conf value. + *
  6. + *
+ * This puts the build properties in charge of everything. It's not a + * perfect design; having maven set properties based on a file, as ant let + * you do, is better for customization. + * + * As to why there's a special (unset) value, see + * {@link ...} + * @param conf config: may be null + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static long getTestPropertyLong(Configuration conf, + String key, long defVal) { + return Long.valueOf( + getTestProperty(conf, key, Long.toString(defVal))); + } + + /** + * Get an integer test property; algorithm described in + * {@link #getTestPropertyLong(Configuration, String, long)}. + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static int getTestPropertyInt(Configuration conf, + String key, int defVal) { + return (int) getTestPropertyLong(conf, key, defVal); + } + + /** + * Get a boolean test property; algorithm described in + * {@link #getTestPropertyLong(Configuration, String, long)}. + * @param key key to look up + * @param defVal default value + * @return the evaluated test property. + */ + public static boolean getTestPropertyBool(Configuration conf, + String key, + boolean defVal) { + return Boolean.valueOf( + getTestProperty(conf, key, Boolean.toString(defVal))); + } + + /** + * Assume that the scale tests are enabled by the relevant system property. + */ + public static void assumeScaleTestsEnabled(Configuration conf) { + boolean enabled = getTestPropertyBool( + conf, + FS_AZURE_SCALE_TEST_ENABLED, + DEFAULT_SCALE_TESTS_ENABLED); + assume("Scale test disabled: to enable set property " + + FS_AZURE_SCALE_TEST_ENABLED, + enabled); + } } diff --git a/hadoop-tools/hadoop-azure/src/test/resources/wasb.xml b/hadoop-tools/hadoop-azure/src/test/resources/wasb.xml deleted file mode 100644 index d7cd1a5409688..0000000000000 --- a/hadoop-tools/hadoop-azure/src/test/resources/wasb.xml +++ /dev/null @@ -1,169 +0,0 @@ - - - - - - - - - - - - - - - fs.contract.test.root-tests-enabled - false - - - - fs.contract.test.random-seek-count - 10 - - - - fs.contract.is-blobstore - true - - - - fs.contract.is-case-sensitive - true - - - - fs.contract.rename-returns-false-if-dest-exists - true - - - - fs.contract.rename-returns-false-if-source-missing - true - - - - fs.contract.rename-creates-dest-dirs - false - - - - fs.contract.rename-remove-dest-if-empty-dir - false - - - - fs.contract.supports-settimes - false - - - - fs.contract.supports-append - true - - - - fs.contract.supports-atomic-directory-delete - false - - - - fs.contract.supports-atomic-rename - true - - - - fs.contract.supports-block-locality - true - - - - fs.contract.supports-concat - false - - - - fs.contract.supports-seek - true - - - - fs.contract.supports-seek-on-closed-file - true - - - - fs.contract.rejects-seek-past-eof - true - - - - fs.contract.supports-available-on-closed-file - false - - - - fs.contract.supports-strict-exceptions - true - - - - fs.contract.supports-unix-permissions - false - - - - fs.contract.rename-overwrites-dest - false - - - - fs.contract.supports-append - true - - - - fs.azure.enable.append.support - true - - - - fs.contract.supports-getfilestatus - true - - - - fs.azure.test.namespace.enabled - false - - -