11Steps to run the COCO dataset for Image Captioning
22==================================================
33##### (1) Env setup
4- Initial Setup: https://github.com/yahoo/CaffeOnSpark/wiki/GetStarted_standalone
4+ Set up both CAFFE_ON_SPARK and SPARK_HOME per https://github.com/yahoo/CaffeOnSpark/wiki/GetStarted_standalone
55 export DYLD_LIBRARY_PATH=${CAFFE_ON_SPARK}/caffe-public/distribute/lib:${CAFFE_ON_SPARK}/caffe-distri/distribute/lib:/usr/local/cuda/lib:/usr/local/mkl/lib/intel64/:Python2.7.10/lib:/usr/local/cuda/lib:caffe_on_grid_archive/lib64/mkl/intel64/
66 export LD_LIBRARY_PATH=${DYLD_LIBRARY_PATH}
77 export SPARK_HOME=/Users/mridul/bigml/spark-1.6.0-bin-hadoop2.6
88 export PATH=${SPARK_HOME}/bin:${PATH}
99 export PYSPARK_PYTHON=Python2.7.10/bin/python
1010 export PYTHONPATH=$PYTHONPATH:caffeonsparkpythonapi.zip:caffe_on_grid_archive/lib64:/usr/local/cuda-7.5/lib64
1111 export IPYTHON_ROOT=~/Python2.7.10
12- unset SPARK_CONF_DIR
1312
1413##### (2) Download the coco dataset if required
1514
@@ -81,7 +80,8 @@ Steps to run the COCO dataset for Image Captioning
8180 -model /tmp/coco/parquet/lrcn_coco.model
8281 popd
8382
84- ##### (6) Submit the data for inference
83+ #### Run either of the steps below for running a script or notebook
84+ ##### (6 a) Submit the data for inference
8585 Note that the below files also need to be shipped as shown
8686 #-model the image-lstm pretrained model to ship
8787 #-imagenet the image network definition
@@ -93,25 +93,23 @@ Steps to run the COCO dataset for Image Captioning
9393 pushd ${CAFFE_ON_SPARK}/data/
9494 ln -s ~/Python2.7.10 Python2.7.10
9595 unzip ${CAFFE_ON_SPARK}/caffe-grid/target/caffeonsparkpythonapi.zip
96- cat /tmp/coco/parquet/vocab/part* > vocab.txt
9796 rm -rf /tmp/coco/parquet/df_caption_results_train2014
9897 spark-submit --master ${MASTER_URL} \
9998 --conf spark.cores.max=${TOTAL_CORES} \
10099 --conf spark.task.cpus=${CORES_PER_WORKER} \
101100 --conf spark.driver.extraLibraryPath="${DYLD_LIBRARY_PATH}:Python2.7.10/lib" \
102101 --conf spark.executorEnv.LD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}:Python2.7.10/lib" \
103- --conf spark.pythonargs="-model /tmp/coco/parquet/lrcn_coco.model -imagenet lstm_deploy.prototxt -lstmnet lrcn_word_to_preds.deploy.prototxt -vocab vocab.txt -input /tmp/coco/parquet/df_embedded_train2014 -output /tmp/coco/parquet/df_caption_results_train2014" examples/ImageCaption.py
102+ --conf spark.pythonargs="-model /tmp/coco/parquet/lrcn_coco.model -imagenet lstm_deploy.prototxt -lstmnet lrcn_word_to_preds.deploy.prototxt -vocab /tmp/coco/parquet/ vocab/part-00000 -input /tmp/coco/parquet/df_embedded_train2014 -output /tmp/coco/parquet/df_caption_results_train2014" examples/ImageCaption.py
104103 popd
105- ##### (7 ) Launch IPython Notebook
104+ ##### (6 b ) Launch IPython Notebook
106105 export IPYTHON_OPTS="notebook --no-browser --ip=127.0.0.1"
107106 pushd ${CAFFE_ON_SPARK}/data/
108107 ln -s ~/Python2.7.10 Python2.7.10
109108 unzip ${CAFFE_ON_SPARK}/caffe-grid/target/caffeonsparkpythonapi.zip
110- cat /tmp/coco/parquet/vocab/part* > vocab.txt
111109 pyspark --master ${MASTER_URL} --deploy-mode client \
112110 --conf spark.driver.extraLibraryPath="${DYLD_LIBRARY_PATH}:Python2.7.10/lib" \
113111 --conf spark.executorEnv.LD_LIBRARY_PATH="${DYLD_LIBRARY_PATH}:Python2.7.10/lib" \
114- --files "${CAFFE_ON_SPARK}/data/lstm_deploy.prototxt,${CAFFE_ON_SPARK}/data/vocab.txt,${CAFFE_ON_SPARK}/data/lrcn_word_to_preds.deploy.prototxt,${CAFFE_ON_SPARK}/data/caffe/_caffe.so,${CAFFE_ON_SPARK}/data/bvlc_reference_net.prototxt,${CAFFE_ON_SPARK}/data/bvlc_reference_solver.prototxt,${CAFFE_ON_SPARK}/data/lrcn_cos.prototxt,${CAFFE_ON_SPARK}/data/lrcn_solver.prototxt" \
112+ --files "${CAFFE_ON_SPARK}/data/lstm_deploy.prototxt,${CAFFE_ON_SPARK}/data/vocab.txt/part-00000 ,${CAFFE_ON_SPARK}/data/lrcn_word_to_preds.deploy.prototxt,${CAFFE_ON_SPARK}/data/caffe/_caffe.so,${CAFFE_ON_SPARK}/data/bvlc_reference_net.prototxt,${CAFFE_ON_SPARK}/data/bvlc_reference_solver.prototxt,${CAFFE_ON_SPARK}/data/lrcn_cos.prototxt,${CAFFE_ON_SPARK}/data/lrcn_solver.prototxt" \
115113 --py-files "${CAFFE_ON_SPARK}/caffe-grid/target/caffeonsparkpythonapi.zip" \
116114 --jars "${CAFFE_ON_SPARK}/caffe-grid/target/caffe-grid-0.1-SNAPSHOT-jar-with-dependencies.jar" \
117115 --driver-library-path "${CAFFE_ON_SPARK}/caffe-grid/target/caffe-grid-0.1-SNAPSHOT-jar-with-dependencies.jar" \
0 commit comments