Commit 47b82e2a authored by Swaroop Vattam's avatar Swaroop Vattam

Merge branch 'version4' into 'master'

Version4

See merge request !1
parents 2b0c227f b71865e6
Pipeline #9 passed with stage
in 22 minutes
......@@ -11,7 +11,7 @@ test:
- git lfs fetch --all
- pip3 install cerberus==1.3.1 deep_dircmp==0.1.0
- git clone --recursive https://gitlab.com/datadrivendiscovery/data-supply.git
- git -C data-supply checkout 51efe8f74ae2ec223a1540782945beee1f05bf00
- git -C data-supply checkout 4d67a8acee3fe5236900137a528bc48cf05731a3
script:
- |
......@@ -25,7 +25,7 @@ test:
if [ "${CI_COMMIT_REF_NAME}" = master ]; then
if [ -n "${GIT_ACCESS_USER}" -a -n "${GIT_ACCESS_TOKEN}" ]; then
echo "Pushing updated digests."
git remote set-url --push origin "https://${GIT_ACCESS_USER}:${GIT_ACCESS_TOKEN}@datasets.datadrivendiscovery.org/${CI_PROJECT_PATH}.git"
git remote set-url --push origin "https://${GIT_ACCESS_USER}:${GIT_ACCESS_TOKEN}@gitlab.datadrivendiscovery.org/${CI_PROJECT_PATH}.git"
git config --local user.email noreply@datadrivendiscovery.org
git config --local user.name "D3M CI"
if ! git diff --quiet ; then
......
# Public D3M datasets
# Private D3M datasets
This repository contains public D3M datasets.
This repository contains private D3M datasets. **Do not distribute them.**
**Public D3M datasets are available [here](https://datasets.datadrivendiscovery.org/d3m/datasets).**
Please report any issues with private datasets in [data-supply repository](https://gitlab.com/datadrivendiscovery/data-supply/issues).
Datasets schemas and related documentation is available in [data-supply repository](https://gitlab.com/datadrivendiscovery/data-supply).
......@@ -9,15 +13,15 @@ Datasets schemas and related documentation is available in [data-supply reposito
Download datasets using [git LFS](https://git-lfs.github.com/):
```
$ git lfs clone git@datasets.datadrivendiscovery.org:d3m/datasets.git
$ git lfs clone git@gitlab.datadrivendiscovery.org:d3m/datasets.git
```
Note, use `git lfs clone` instead of `git clone` because it
is faster.
This will take time but especially disk space. Currently all
datasets are around 46 GB, but the whole directory with cloned
repository and git metadata is around 65 GB. Running
datasets are around 54 GB, but the whole directory with cloned
repository and git metadata is around 84 GB. Running
`git lfs prune` might help by removing old and unreferenced files.
Repository is organized so that all files larger than 100 KB are
......@@ -31,7 +35,7 @@ It is possible to download only part of the repository. First clone
without downloading files managed by git LFS:
```
$ git lfs clone git@datasets.datadrivendiscovery.org:d3m/datasets.git -X "*"
$ git lfs clone git@gitlab.datadrivendiscovery.org:d3m/datasets.git -X "*"
```
This will download and checkout all files smaller than 100 KB.
......
......@@ -7,28 +7,32 @@
"license": "open",
"source": "USPS",
"sourceURI": "http://www.cad.zju.edu.cn/home/dengcai/Data/MLData.html",
"datasetSchemaVersion": "3.2.0",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "2.0",
"digest": "c75da4fea7a7d4e4c67b9ff8175ab646e2fab606e2006a2d929e3cc8de6d7012"
"datasetVersion": "4.0.0",
"digest": "dc64b78bce3f4a88dfdb0ebd834bbce42b81bb137c8bd4cd152db6777be5d62d"
},
"dataResources": [
{
"resID": "0",
"resPath": "media/",
"resType": "image",
"resFormat": [
"image/png"
],
"resFormat": {
"image/png": [
"png"
]
},
"isCollection": true
},
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": [
"text/csv"
],
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
......
......@@ -3,10 +3,13 @@
"problemID": "124_188_usps_problem",
"problemName": "usps_problem",
"problemDescription": "Multiclass image classification problem. Each image belongs to one of 10 classes.",
"taskType": "classification",
"taskSubType": "multiClass",
"problemSchemaVersion": "3.2.0",
"problemVersion": "2.0"
"problemSchemaVersion": "4.0.0",
"problemVersion": "4.0.0",
"taskKeywords": [
"classification",
"multiClass",
"image"
]
},
"inputs": {
"data": [
......@@ -27,7 +30,27 @@
"testSize": 0.216,
"stratified": false,
"numRepeats": 0,
"splitsFile": "dataSplits.csv"
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TRAIN"
}
],
"test": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TEST"
}
],
"score": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
......@@ -38,4 +61,4 @@
"expectedOutputs": {
"predictionsFile": "predictions.csv"
}
}
}
\ No newline at end of file
......@@ -7,28 +7,32 @@
"license": "open",
"source": "USPS",
"sourceURI": "http://www.cad.zju.edu.cn/home/dengcai/Data/MLData.html",
"datasetSchemaVersion": "3.2.0",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "2.0",
"digest": "a8f27ab2e3cb1443d8cfbe2b3a286774ef8012b0c6ba4e41e029f4eda06f20f7"
"datasetVersion": "4.0.0",
"digest": "73d76f80d119e04a0aef61cd014d312df5d786fae76a592ef8c0932f0a509914"
},
"dataResources": [
{
"resID": "0",
"resPath": "media/",
"resType": "image",
"resFormat": [
"image/png"
],
"resFormat": {
"image/png": [
"png"
]
},
"isCollection": true
},
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": [
"text/csv"
],
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
......
{
"about": {
"problemID": "124_188_usps_problem_SCORE",
"problemID": "124_188_usps_problem",
"problemName": "usps_problem",
"problemDescription": "Multiclass image classification problem. Each image belongs to one of 10 classes.",
"taskType": "classification",
"taskSubType": "multiClass",
"problemSchemaVersion": "3.2.0",
"problemVersion": "2.0"
"problemSchemaVersion": "4.0.0",
"problemVersion": "4.0.0",
"taskKeywords": [
"classification",
"multiClass",
"image"
]
},
"inputs": {
"data": [
{
"datasetID": "124_188_usps_dataset_SCORE",
"datasetID": "124_188_usps_dataset",
"targets": [
{
"targetIndex": 0,
......@@ -27,7 +30,27 @@
"testSize": 0.216,
"stratified": false,
"numRepeats": 0,
"splitsFile": "dataSplits.csv"
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TRAIN"
}
],
"test": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TEST"
}
],
"score": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
......
......@@ -7,28 +7,32 @@
"license": "open",
"source": "USPS",
"sourceURI": "http://www.cad.zju.edu.cn/home/dengcai/Data/MLData.html",
"datasetSchemaVersion": "3.2.0",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "2.0",
"digest": "6861672270a24f3f199f7175b4904e6a588e1ed7433ed7cb1bdbbdd0225f8274"
"datasetVersion": "4.0.0",
"digest": "2c26e78b65ae0ae3ee6dc2ad6f28eaec138c6c37a23552bd33b3db2461a0652b"
},
"dataResources": [
{
"resID": "0",
"resPath": "media/",
"resType": "image",
"resFormat": [
"image/png"
],
"resFormat": {
"image/png": [
"png"
]
},
"isCollection": true
},
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": [
"text/csv"
],
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
......
{
"about": {
"problemID": "124_188_usps_problem_TEST",
"problemID": "124_188_usps_problem",
"problemName": "usps_problem",
"problemDescription": "Multiclass image classification problem. Each image belongs to one of 10 classes.",
"taskType": "classification",
"taskSubType": "multiClass",
"problemSchemaVersion": "3.2.0",
"problemVersion": "2.0"
"problemSchemaVersion": "4.0.0",
"problemVersion": "4.0.0",
"taskKeywords": [
"classification",
"multiClass",
"image"
]
},
"inputs": {
"data": [
{
"datasetID": "124_188_usps_dataset_TEST",
"datasetID": "124_188_usps_dataset",
"targets": [
{
"targetIndex": 0,
......@@ -27,7 +30,27 @@
"testSize": 0.216,
"stratified": false,
"numRepeats": 0,
"splitsFile": "dataSplits.csv"
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TRAIN"
}
],
"test": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TEST"
}
],
"score": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
......
......@@ -7,28 +7,32 @@
"license": "open",
"source": "USPS",
"sourceURI": "http://www.cad.zju.edu.cn/home/dengcai/Data/MLData.html",
"datasetSchemaVersion": "3.2.0",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"datasetVersion": "2.0",
"digest": "4fdd5af8fca34e57aeaf7fe085c174c1050efeb45f0646670a4134e341fea6c1"
"datasetVersion": "4.0.0",
"digest": "33b71f121dcd1466895e4f83ff20272ae57397582277359a862a7cbade2c4cf4"
},
"dataResources": [
{
"resID": "0",
"resPath": "media/",
"resType": "image",
"resFormat": [
"image/png"
],
"resFormat": {
"image/png": [
"png"
]
},
"isCollection": true
},
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": [
"text/csv"
],
"resFormat": {
"text/csv": [
"csv"
]
},
"isCollection": false,
"columns": [
{
......
{
"about": {
"problemID": "124_188_usps_problem_TRAIN",
"problemID": "124_188_usps_problem",
"problemName": "usps_problem",
"problemDescription": "Multiclass image classification problem. Each image belongs to one of 10 classes.",
"taskType": "classification",
"taskSubType": "multiClass",
"problemSchemaVersion": "3.2.0",
"problemVersion": "2.0"
"problemSchemaVersion": "4.0.0",
"problemVersion": "4.0.0",
"taskKeywords": [
"classification",
"multiClass",
"image"
]
},
"inputs": {
"data": [
{
"datasetID": "124_188_usps_dataset_TRAIN",
"datasetID": "124_188_usps_dataset",
"targets": [
{
"targetIndex": 0,
......@@ -27,7 +30,27 @@
"testSize": 0.216,
"stratified": false,
"numRepeats": 0,
"splitsFile": "dataSplits.csv"
"splitsFile": "dataSplits.csv",
"datasetViewMaps": {
"train": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TRAIN"
}
],
"test": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_TEST"
}
],
"score": [
{
"from": "124_188_usps_dataset",
"to": "124_188_usps_dataset_SCORE"
}
]
}
},
"performanceMetrics": [
{
......
......@@ -8,19 +8,21 @@
"source": "OpenML",
"sourceURI": "http://www.openml.org/d/1491",
"approximateSize": "",
"datasetVersion": "2.0",
"datasetSchemaVersion": "3.2.0",
"datasetVersion": "4.0.0",
"datasetSchemaVersion": "4.0.0",
"redacted": false,
"digest": "f0a796e02c16e16e4edf199e7062eeb7f93abc80b1ca92b0cb00828feb5b5ed7"
"digest": "eacc121b204bc03038e24cc691fe1dfe05ca76b99e84867f6240e31a29bf3d57"
},
"dataResources": [
{
"resID": "learningData",
"resPath": "tables/learningData.csv",
"resType": "table",
"resFormat": [
"text/csv"
],
"resFormat": {
"text/csv": [
"csv"
]
},