Kitware/spark-mpi-experimentation

Name: spark-mpi-experimentation

Owner: Kitware, Inc.

Description: Experimentations with Spark, MPI, ParaView, Web visualization

Created: 2017-06-14 20:08:52.0

Updated: 2017-06-15 16:36:01.0

Pushed: 2017-12-01 21:30:25.0

Homepage:

Size: 55162

Language: Python

GitHub Committers

UserMost Recent Commit# Commits

Other Committers

UserEmailMost Recent Commit# Commits

README

Spark-MPI experimentation

This repository gather various experimentation made with Spark along with MPI parallel processing and ParaView.

Configuration of the test machine
Working directory
r -p /data/sebastien/SparkMPI
data/sebastien/SparkMPI
Get ParaView
r pv-build pv-install mpi
clone https://gitlab.kitware.com/paraview/paraview.git
araview
submodule update --init
Get CMake
r cmake
make
 -O https://cmake.org/files/v3.8/cmake-3.8.1-Linux-x86_64.sh
d +x cmake-3.8.1-Linux-x86_64.sh
ake-3.8.1-Linux-x86_64.sh
Get MPI library to use
pi
 -O http://mvapich.cse.ohio-state.edu/download/mvapich/mv2/mvapich2-2.2.tar.gz
xvfz mvapich2-2.2.tar.gz
r mvapich
vapich2-2.2
nfigure --disable-libxml2 --disable-fortran --prefix=/data/sebastien/SparkMPI/mpi/mvapich  --disable-mcast  --without-cma

 install
Build ParaView with MPI
data/sebastien/SparkMPI/pv-build
a/sebastien/SparkMPI/cmake/cmake-3.8.1-Linux-x86_64/bin/ccmake ../paraview

E_INSTALL_PREFIX            */data/sebastien/SparkMPI/pv-install/
VIEW_BUILD_QT_GUI           *OFF
VIEW_ENABLE_PYTHON          *ON
VIEW_USE_MPI                *ON



C_INCLUDE_PATH              */data/sebastien/SparkMPI/mpi/mvapich/include
C_LIBRARIES                 */data/sebastien/SparkMPI/mpi/mvapich/lib/libmpi.so




 -j20
 install
Install Spark-MPI
rt MPI_SRC=/data/sebastien/SparkMPI/mpi/mvapich2-2.2/src/

r -p /data/sebastien/SparkMPI/spark-mpi
data/sebastien/SparkMPI/spark-mpi/
clone git://github.com/SciDriver/spark-mpi.git
r build install
uild
a/sebastien/SparkMPI/cmake/cmake-3.8.1-Linux-x86_64/bin/cmake ../spark-mpi

E_INSTALL_PREFIX            */data/sebastien/SparkMPI/spark-mpi/install
EXTRA_LIBRARY               */data/sebastien/SparkMPI/mpi/mvapich/lib/libmpi.so
LIBRARY                     */data/sebastien/SparkMPI/mpi/mvapich/lib/libmpicxx.so

XEC                         */data/sebastien/SparkMPI/mpi/mvapich/bim/mpiexec
XEC_MAX_NUMPROCS            *2
XEC_NUMPROC_FLAG            *-np
XEC_POSTFLAGS               *
XEC_PREFLAGS                *
CXX_COMPILER                */data/sebastien/SparkMPI/mpi/mvapich/bin/mpicxx
CXX_COMPILE_FLAGS           *
CXX_INCLUDE_PATH            */data/sebastien/SparkMPI/mpi/mvapich/include
CXX_LIBRARIES               */data/sebastien/SparkMPI/mpi/mvapich/lib/libmpicxx.so;/data/sebastien/SparkMPI/mpi/mvapich/lib/libmpi.so
CXX_LINK_FLAGS              *-Wl,-rpath -Wl,/data/sebastien/SparkMPI/mpi/mvapich/lib -Wl,--enable-new-dtags
C_COMPILER                  */data/sebastien/SparkMPI/mpi/mvapich/bin/mpicc
C_COMPILE_FLAGS             *
C_INCLUDE_PATH              */data/sebastien/SparkMPI/mpi/mvapich/include
C_LIBRARIES                 */data/sebastien/SparkMPI/mpi/mvapich/lib/libmpi.so
C_LINK_FLAGS                *-Wl,-rpath -Wl,/usr/lib/openmpi/lib -Wl,--enable-new-dtags

E_BUILD_TYPE                 Release

g]


 install
Install Spark
 -O http://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz
xvfz spark-2.1.1-bin-hadoop2.7.tgz

rt SPARK_HOME=/data/sebastien/SparkMPI/spark-2.1.1-bin-hadoop2.7
SPARK_HOME/conf
park-defaults.conf.template spark-defaults.conf
park-defaults.conf

ark.driver.memory    5g

laves.template slaves

park-env.sh

rt PYTHONPATH="${PYTHONPATH}:/data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages/vtk"
rt PYTHONPATH="${PYTHONPATH}:/data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages"
rt PYTHONPATH="${PYTHONPATH}:/data/sebastien/SparkMPI/pv-install/lib/paraview-5.4"
rt PYTHONPATH="${PYTHONPATH}:/data/sebastien/SparkMPI/pv-install/bin"
rt LD_LIBRARY_PATH=/data/sebastien/SparkMPI/pv-install/lib/paraview-5.4:/data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages/vtk

Start spark

data/sebastien/SparkMPI/spark-2.1.1-bin-hadoop2.7/sbin
art-all.sh
Install SciPy

Atlas library

do apt-get install libatlas-base-dev

BLAS library

do apt-get install libopenblas-dev

MKL library

Register on Intel web site => https://software.intel.com/en-us/performance-libraries

Then download MKL (following link may not work for you)

et http://registrationcenter-download.intel.com/akdlm/irc_nas/tec/11544/l_mkl_2017.3.196.tgz
r xvfz l_mkl_2017.3.196.tgz
 l_mkl_2017.3.196/
do ./install.sh

all location:
/opt/intel

onent(s) selected:
Intel(R) Math Kernel Library 2017 Update 3 for C/C++                   2.0GB
    Intel MKL core libraries for C/C++
    Intel TBB threading support
    GNU* C/C++ compiler support

Intel(R) Math Kernel Library 2017 Update 3 for Fortran                 2.1GB
    Intel MKL core libraries for Fortran
    GNU* Fortran compiler support
    Fortran 95 interfaces for BLAS and LAPACK

all space required:  2.3GB

(from: http://tzutalin.blogspot.com/2015/06/blas-atlas-openblas-and-mkl.html)

=> create file ~/.numpy-site.cfg

AULT]
ary_dirs = /usr/lib:/usr/local/lib
ude_dirs = /usr/include:/usr/local/include

]
ary_dirs = /opt/intel/mkl/lib/intel64/
ude_dirs = /opt/intel/mkl/include/
libs = mkl_intel_ilp64, mkl_intel_thread, mkl_core, mkl_rt
ck_libs =

]
libs = amd

pack]
ack_libs = umfpack

fft]
ude_dirs = /usr/local/djbfft/include
ary_dirs = /usr/local/djbfft/lib

scipy

dir scipy
 scipy
et https://github.com/scipy/scipy/releases/download/v0.19.0/scipy-0.19.0.tar.gz
r xvfz scipy-0.19.0.tar.gz
 scipy-0.19.0

port PYTHONPATH=/data/sebastien/SparkMPI/scipy/install/lib/python2.7/site-packages
thon setup.py install --prefix=/data/sebastien/SparkMPI/scipy/install

 -r /data/sebastien/SparkMPI/scipy/install/lib/python2.7/site-packages/scipy-0.19.0-py2.7-linux-x86_64.egg/scipy /data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages
Patch ParaView Python server

Edit ~/SparkMPI/pv-install/lib/paraview-5.4/site-packages/vtk/web/server.py and remove all occurence involving the “testing” module.

Running experimentations
 /data/sebastien/SparkMPI
t clone https://github.com/Kitware/spark-mpi-experimentation.git
 spark-mpi-experimentation/experimentations

Choose the example to run

 11-recon-to-volume
start.sh

gdal

do add-apt-repository ppa:ubuntugis/ppa && sudo apt-get update
do apt-get install gdal-bin libgdal-dev

port CPLUS_INCLUDE_PATH=/usr/include/gdal
port C_INCLUDE_PATH=/usr/include/gdal

rtualenv tmp-gdal
p install gdal==2.1.0

 tmp-gdal/lib/python2.7/site-packages
 -r gdal* /data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages
 -r skimage /data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages
 -r osgeo /data/sebastien/SparkMPI/pv-install/lib/paraview-5.4/site-packages

This work is supported by the National Institutes of Health's National Center for Advancing Translational Sciences, Grant Number U24TR002306. This work is solely the responsibility of the creators and does not necessarily represent the official views of the National Institutes of Health.