diff --git a/openrefine/reconcile-csv/Dockerfile b/openrefine/reconcile-csv/Dockerfile new file mode 100644 index 0000000..80c5260 --- /dev/null +++ b/openrefine/reconcile-csv/Dockerfile @@ -0,0 +1,15 @@ +# +# Dockerfile for openrefine-reconcile-csv +# + +FROM java:8-jre-alpine + +MAINTAINER kev + +ADD http://okfnlabs.org/reconcile-csv/dist/reconcile-csv-0.1.2.jar /app/ + +WORKDIR /data + +EXPOSE 8000 + +CMD java -jar /app/reconcile-csv-0.1.2.jar "${CSV_FILE}" "${SEARCH_COLUMN}" "${ID_COLUMN}" diff --git a/openrefine/reconcile-csv/README.md b/openrefine/reconcile-csv/README.md new file mode 100644 index 0000000..d18c083 --- /dev/null +++ b/openrefine/reconcile-csv/README.md @@ -0,0 +1,47 @@ +reconcile-csv +============= + +[Reconcile-csv][1] is a reconciliation service for [OpenRefine][2] running from a +CSV file. It uses fuzzy matching to match entries in one dataset to entries in +another dataset, helping to introduce unique IDs into the system - so they can +be used to join your data painlessly. + +## docker-compose.yml + +```yaml +reconcile-csv: + image: vimagick/openrefine-reconcile-csv + ports: + - "8000:8000" + volumes: + - ./data:/data + environment: + - JAVA_OPTS=-Xmx2g + - CSV_FILE=input.csv + - SEARCH_COLUMN=name + - ID_COLUMN=id + restart: always +``` + +## input.csv + +```csv +id,name +1,kevin +2,tom +3,sarah +4,mike +5,lucy +``` + +## up and running + +```bash +$ docker-compose up -d +$ curl http://localhost:8000/reconcile?query=kev +$ curl http://localhost:8000/reconcile?query={%22query%22:%22kev%22,%22limit%22:1} +$ curl http://localhost:8000/view/1 +``` + +[1]: http://okfnlabs.org/reconcile-csv/ +[2]: https://github.com/OpenRefine/OpenRefine/wiki diff --git a/openrefine/reconcile-csv/data/input.csv b/openrefine/reconcile-csv/data/input.csv new file mode 100644 index 0000000..8878ada --- /dev/null +++ b/openrefine/reconcile-csv/data/input.csv @@ -0,0 +1,6 @@ +id,name +1,kevin +2,tom +3,sarah +4,mike +5,lucy diff --git a/openrefine/reconcile-csv/docker-compose.yml b/openrefine/reconcile-csv/docker-compose.yml new file mode 100644 index 0000000..0abbf1d --- /dev/null +++ b/openrefine/reconcile-csv/docker-compose.yml @@ -0,0 +1,12 @@ +reconcile-csv: + image: vimagick/openrefine-reconcile-csv + ports: + - "8000:8000" + volumes: + - ./data:/data + environment: + - JAVA_OPTS=-Xmx2g + - CSV_FILE=input.csv + - SEARCH_COLUMN=name + - ID_COLUMN=id + restart: unless-stopped