mkdir ~/tbd_ml/
cd ~/tbd_ml/
git clone git@github.com:bdg-tbd/tbd-notebooks.git
docker run -v `pwd`:/home/jovyan/work/ -p 8888:8888 jupyter/pyspark-notebook
-
run load_data.ipynb to download sample csv data and read it using sparkSQL
-
run =ml.ipynb to learn how to build machine learning pipeline with SparkML