diff --git a/README.md b/README.md index 0185f93..7ede4e7 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,10 @@ A collection of delicious docker recipes. - [x] hubot :octocat: - [x] jenkins-arm :beetle: +## Machine Learning + +- [x] ludwig + ## Cluster - [x] ggr diff --git a/ludwig/Dockerfile b/ludwig/Dockerfile new file mode 100644 index 0000000..c2daee9 --- /dev/null +++ b/ludwig/Dockerfile @@ -0,0 +1,14 @@ +FROM tensorflow/tensorflow:latest-py3 + +RUN apt-get -y install git + +RUN git clone --depth=1 https://github.com/uber/ludwig.git \ + && cd ludwig/ \ + && pip install -r requirements.txt -r requirements_text.txt \ + -r requirements_image.txt -r requirements_audio.txt \ + -r requirements_serve.txt -r requirements_viz.txt \ + && python setup.py install + +WORKDIR /data + +ENTRYPOINT ["ludwig"] diff --git a/ludwig/README.md b/ludwig/README.md new file mode 100644 index 0000000..8d28555 --- /dev/null +++ b/ludwig/README.md @@ -0,0 +1,70 @@ +ludwig +====== + +[Ludwig][1] is a toolbox that allows to train and test deep learning models +without the need to write code. + +## up and running + +```bash +$ mkdir -p data +$ vim data/model.yaml + +$ wget http://boston.lti.cs.cmu.edu/classes/95-865-K/HW/HW2/epinions.zip +$ unzip epinions.zip +$ mv epinions/epinions-1.csv data/train.csv +$ mv epinions/epinions-2.csv data/predict.csv + +$ tree data +├── model.yaml +├── predict.csv +└── train.csv + +$ docker-compose run --rm train +$ docker-compose run --rm visualize +$ docker-compose run --rm predict +$ docker-compose up -d serve + +$ curl http://127.0.0.1:8000/predict -X POST -F 'text=taking photos and recording videos' +{ + "class_predictions": "Camera", + "class_probabilities_": 9.438252263072044e-11, + "class_probabilities_Auto": 0.32920214533805847, + "class_probabilities_Camera": 0.6707978248596191, + "class_probability": 0.6707978248596191 +} + +$ curl http://127.0.0.1:8000/predict -X POST -F 'text=looking to buy a new sports car' +{ + "class_predictions": "Auto", + "class_probabilities_": 1.900043131457165e-15, + "class_probabilities_Auto": 0.9999126195907593, + "class_probabilities_Camera": 8.738834003452212e-05, + "class_probability": 0.9999126195907593 +} + +$ tree -L 3 data +├── model.yaml +├── predict.csv +├── train.csv +├── results +│   └── experiment_run +│   ├── description.json +│   ├── model +│   └── training_statistics.json +├── results_0 +│   ├── class_predictions.csv +│   ├── class_predictions.npy +│   ├── class_probabilities.csv +│   ├── class_probabilities.npy +│   ├── class_probability.csv +│   └── class_probability.npy +└── visualize + ├── learning_curves_class_accuracy.png + ├── learning_curves_class_hits_at_k.png + ├── learning_curves_class_loss.png + ├── learning_curves_combined_accuracy.png + └── learning_curves_combined_loss.png +``` + +[1]: https://uber.github.io/ludwig/ diff --git a/ludwig/data/model.yaml b/ludwig/data/model.yaml new file mode 100644 index 0000000..f129f9f --- /dev/null +++ b/ludwig/data/model.yaml @@ -0,0 +1,11 @@ +input_features: + - + name: text + type: text + level: word + encoder: parallel_cnn + +output_features: + - + name: class + type: category diff --git a/ludwig/docker-compose.yml b/ludwig/docker-compose.yml new file mode 100644 index 0000000..5b08239 --- /dev/null +++ b/ludwig/docker-compose.yml @@ -0,0 +1,26 @@ +train: + image: vimagick/ludwig + command: train --data_csv train.csv -mdf model.yaml + volumes: + - ./data:/data + +visualize: + image: vimagick/ludwig + command: visualize -v learning_curves -trs results/experiment_run/training_statistics.json -od visualize -ff png + volumes: + - ./data:/data + +predict: + image: vimagick/ludwig + command: predict --data_csv predict.csv -m results/experiment_run/model + volumes: + - ./data:/data + +serve: + image: vimagick/ludwig + command: serve -m results/experiment_run/model -p 8000 + ports: + - "8000:8000" + volumes: + - ./data:/data + restart: unless-stopped