{
  "_id": "6a1ed620b401979e7340e911",
  "Package": "dataPreparation",
  "Title": "Automated Data Preparation",
  "Version": "1.1.2",
  "Authors@R": "person(\"Emmanuel-Lin\", \"Toulemonde\", email = \"el.toulemonde@protonmail.com\", role = c(\"aut\", \"cre\"))",
  "Description": "Do most of the painful data preparation for a data science\nproject with a minimum amount of code; Take advantages of\n'data.table' efficiency and use some algorithmic trick in order\nto perform data preparation in a time and RAM efficient way.",
  "License": "GPL-3 | file LICENSE",
  "LazyData": "true",
  "Encoding": "UTF-8",
  "RoxygenNote": "7.3.2",
  "BugReports": "https://github.com/ELToulemonde/dataPreparation/issues",
  "Config/pak/sysreqs": "libicu-dev",
  "Repository": "https://eltoulemonde.r-universe.dev",
  "Date/Publication": "2025-09-02 11:41:20 UTC",
  "RemoteUrl": "https://github.com/eltoulemonde/datapreparation",
  "RemoteRef": "HEAD",
  "RemoteSha": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
  "NeedsCompilation": "no",
  "Packaged": {
    "Date": "2026-05-20 10:26:41 UTC",
    "User": "root"
  },
  "Author": "Emmanuel-Lin Toulemonde [aut, cre]",
  "Maintainer": "Emmanuel-Lin Toulemonde <el.toulemonde@protonmail.com>",
  "MD5sum": "8004721783b943a2181b404b04af2056",
  "_user": "eltoulemonde",
  "_type": "src",
  "_file": "dataPreparation_1.1.2.tar.gz",
  "_fileid": "6aa86d78cf733db8b40b84df69c2794b819998a73e705b1571904526999cce4f",
  "_filesize": 1490806,
  "_sha256": "6aa86d78cf733db8b40b84df69c2794b819998a73e705b1571904526999cce4f",
  "_created": "2026-05-20T10:26:41.000Z",
  "_published": "2026-06-02T13:09:52.056Z",
  "_distro": "noble",
  "_jobs": [
    {
      "job": 79077989882,
      "time": 146,
      "config": "linux-devel-x86_64",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7107533640"
    },
    {
      "job": 79077990149,
      "time": 163,
      "config": "linux-release-x86_64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7107538710"
    },
    {
      "job": 79077989875,
      "time": 182,
      "config": "macos-oldrel-arm64",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7107554362"
    },
    {
      "job": 79077989877,
      "time": 137,
      "config": "macos-release-arm64",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7107537177"
    },
    {
      "job": 79077988768,
      "time": 175,
      "config": "source",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7107485034"
    },
    {
      "job": 79077989253,
      "time": 110,
      "config": "wasm-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7358566887"
    },
    {
      "job": 79077989601,
      "time": 122,
      "config": "windows-devel",
      "r": "4.7.0",
      "check": "OK",
      "artifact": "7107525526"
    },
    {
      "job": 79077990323,
      "time": 108,
      "config": "windows-oldrel",
      "r": "4.5.3",
      "check": "OK",
      "artifact": "7107521173"
    },
    {
      "job": 79077989991,
      "time": 111,
      "config": "windows-release",
      "r": "4.6.0",
      "check": "OK",
      "artifact": "7107522595"
    }
  ],
  "_buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395",
  "_status": "success",
  "_host": "GitHub-Actions",
  "_upstream": "https://github.com/eltoulemonde/datapreparation",
  "_commit": {
    "id": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
    "author": "Emmanuel-Lin Toulemonde <toul@octo.com>",
    "committer": "Emmanuel-Lin Toulemonde <toul@octo.com>",
    "message": "Finish fixing doc\n",
    "time": 1756813280
  },
  "_maintainer": {
    "name": "Emmanuel-Lin Toulemonde",
    "email": "el.toulemonde@protonmail.com",
    "login": "eltoulemonde",
    "description": "",
    "uuid": 27816336
  },
  "_registered": true,
  "_dependencies": [
    {
      "package": "R",
      "version": ">= 3.6.0",
      "role": "Depends"
    },
    {
      "package": "data.table",
      "role": "Imports"
    },
    {
      "package": "lubridate",
      "role": "Imports"
    },
    {
      "package": "stringr",
      "role": "Imports"
    },
    {
      "package": "Matrix",
      "role": "Imports"
    },
    {
      "package": "progress",
      "role": "Imports"
    },
    {
      "package": "testthat",
      "version": ">= 2.0.0",
      "role": "Suggests"
    }
  ],
  "_owner": "eltoulemonde",
  "_selfowned": true,
  "_usedby": 0,
  "_updates": [
    {
      "week": "2025-36",
      "n": 3
    }
  ],
  "_tags": [],
  "_topics": [
    "data-preparation",
    "data-preprocessing",
    "data-science",
    "date-conversion",
    "speed",
    "variable-elimination",
    "variable-selection"
  ],
  "_stars": 33,
  "_contributors": [
    {
      "user": "eltoulemonde",
      "count": 82,
      "uuid": 27816336
    },
    {
      "user": "xavierfontaine",
      "count": 2,
      "uuid": 12429906
    },
    {
      "user": "earino",
      "count": 1,
      "uuid": 3258
    }
  ],
  "_userbio": {
    "uuid": 27816336,
    "type": "user",
    "name": "ELToulemonde"
  },
  "_downloads": {
    "count": 707,
    "source": "https://cranlogs.r-pkg.org/downloads/total/last-month/dataPreparation"
  },
  "_devurl": "https://github.com/eltoulemonde/datapreparation",
  "_searchresults": 147,
  "_rbuild": "4.6.0",
  "_assets": [
    "extra/citation.cff",
    "extra/citation.html",
    "extra/citation.json",
    "extra/citation.txt",
    "extra/contents.json",
    "extra/dataPreparation.html",
    "extra/NEWS.html",
    "extra/NEWS.txt",
    "extra/readme.html",
    "extra/readme.md",
    "manual.pdf"
  ],
  "_homeurl": "https://github.com/eltoulemonde/datapreparation",
  "_realowner": "eltoulemonde",
  "_cranurl": true,
  "_releases": [
    {
      "version": "0.1",
      "date": "2017-07-07"
    },
    {
      "version": "0.2",
      "date": "2017-08-18"
    },
    {
      "version": "0.3.2",
      "date": "2017-10-25"
    },
    {
      "version": "0.3.4",
      "date": "2017-12-20"
    },
    {
      "version": "0.3.5",
      "date": "2018-02-16"
    },
    {
      "version": "0.3.6",
      "date": "2018-05-11"
    },
    {
      "version": "0.3.7",
      "date": "2018-08-20"
    },
    {
      "version": "0.3.8",
      "date": "2018-10-17"
    },
    {
      "version": "0.3.9",
      "date": "2019-01-02"
    },
    {
      "version": "0.4.0",
      "date": "2019-03-25"
    },
    {
      "version": "0.4.1",
      "date": "2019-07-19"
    },
    {
      "version": "0.4.2",
      "date": "2019-11-14"
    },
    {
      "version": "0.4.3",
      "date": "2020-02-12"
    },
    {
      "version": "1.0.0",
      "date": "2020-11-13"
    },
    {
      "version": "1.0.1",
      "date": "2020-12-16"
    },
    {
      "version": "1.0.2",
      "date": "2021-09-20"
    },
    {
      "version": "1.0.3",
      "date": "2021-11-19"
    },
    {
      "version": "1.0.4",
      "date": "2021-12-21"
    },
    {
      "version": "1.0.5",
      "date": "2022-07-15"
    },
    {
      "version": "1.1.1",
      "date": "2023-07-04"
    },
    {
      "version": "1.1.2",
      "date": "2025-09-02"
    }
  ],
  "_exports": [
    "aggregate_by_key",
    "as.POSIXct_fast",
    "build_bins",
    "build_date_factor",
    "build_encoding",
    "build_scales",
    "build_target_encoding",
    "compute_probability_ratio",
    "compute_weight_of_evidence",
    "data_preparation_news",
    "date_format_unifier",
    "description",
    "fast_discretization",
    "fast_filter_variables",
    "fast_handle_na",
    "fast_is_equal",
    "fast_round",
    "fast_scale",
    "find_and_transform_dates",
    "find_and_transform_numerics",
    "generate_date_diffs",
    "generate_factor_from_date",
    "generate_from_character",
    "generate_from_factor",
    "get_most_frequent_element",
    "identify_dates",
    "one_hot_encoder",
    "prepare_set",
    "remove_percentile_outlier",
    "remove_rare_categorical",
    "remove_sd_outlier",
    "same_shape",
    "set_as_numeric_matrix",
    "set_col_as_character",
    "set_col_as_date",
    "set_col_as_factor",
    "set_col_as_numeric",
    "shape_set",
    "target_encode",
    "un_factor",
    "which_are_bijection",
    "which_are_constant",
    "which_are_in_double",
    "which_are_included"
  ],
  "_datasets": [
    {
      "name": "adult",
      "title": "Adult for UCI repository",
      "object": "adult",
      "class": [
        "data.frame"
      ],
      "fields": [
        "age",
        "type_employer",
        "fnlwgt",
        "education",
        "education_num",
        "marital",
        "occupation",
        "relationship",
        "race",
        "sex",
        "capital_gain",
        "capital_loss",
        "hr_per_week",
        "country",
        "income"
      ],
      "rows": 32561,
      "table": true,
      "tojson": true
    },
    {
      "name": "messy_adult",
      "title": "Adult with some ugly columns added",
      "object": "messy_adult",
      "class": [
        "data.table",
        "data.frame"
      ],
      "fields": [
        "date1",
        "date2",
        "date3",
        "date4",
        "num1",
        "num2",
        "constant",
        "mail",
        "num3",
        "age",
        "type_employer",
        "fnlwgt",
        "education",
        "education_num",
        "marital",
        "occupation",
        "relationship",
        "race",
        "sex",
        "capital_gain",
        "capital_loss",
        "hr_per_week",
        "country",
        "income"
      ],
      "rows": 32561,
      "table": true,
      "tojson": true
    },
    {
      "name": "tiny_messy_adult",
      "title": "First 500 rows of 'messy_adult'",
      "object": "tiny_messy_adult",
      "class": [
        "data.table",
        "data.frame"
      ],
      "fields": [
        "date1",
        "date2",
        "date3",
        "date4",
        "num1",
        "num2",
        "constant",
        "mail",
        "num3",
        "age",
        "type_employer",
        "fnlwgt",
        "education",
        "education_num",
        "marital",
        "occupation",
        "relationship",
        "race",
        "sex",
        "capital_gain",
        "capital_loss",
        "hr_per_week",
        "country",
        "income"
      ],
      "rows": 500,
      "table": true,
      "tojson": true
    }
  ],
  "_help": [
    {
      "page": "adult",
      "title": "Adult for UCI repository",
      "topics": [
        "adult"
      ]
    },
    {
      "page": "aggregate_by_key",
      "title": "Automatic data_set aggregation by key",
      "topics": [
        "aggregate_by_key"
      ]
    },
    {
      "page": "as.POSIXct_fast",
      "title": "Faster date transformation",
      "topics": [
        "as.POSIXct_fast"
      ]
    },
    {
      "page": "build_bins",
      "title": "Compute bins",
      "topics": [
        "build_bins"
      ]
    },
    {
      "page": "build_date_factor",
      "title": "Date Factor",
      "topics": [
        "build_date_factor"
      ]
    },
    {
      "page": "build_encoding",
      "title": "Compute encoding",
      "topics": [
        "build_encoding"
      ]
    },
    {
      "page": "build_scales",
      "title": "Compute scales",
      "topics": [
        "build_scales"
      ]
    },
    {
      "page": "build_target_encoding",
      "title": "Build target encoding",
      "topics": [
        "build_target_encoding"
      ]
    },
    {
      "page": "compute_probability_ratio",
      "title": "Compute probability ratio",
      "topics": [
        "compute_probability_ratio"
      ]
    },
    {
      "page": "compute_weight_of_evidence",
      "title": "Compute weight of evidence",
      "topics": [
        "compute_weight_of_evidence"
      ]
    },
    {
      "page": "data_preparation_news",
      "title": "Show the NEWS file",
      "topics": [
        "data_preparation_news"
      ]
    },
    {
      "page": "date_format_unifier",
      "title": "Unify dates format",
      "topics": [
        "date_format_unifier"
      ]
    },
    {
      "page": "description",
      "title": "Describe data set",
      "topics": [
        "description"
      ]
    },
    {
      "page": "fast_discretization",
      "title": "Discretization",
      "topics": [
        "fast_discretization"
      ]
    },
    {
      "page": "fast_filter_variables",
      "title": "Filtering useless variables",
      "topics": [
        "fast_filter_variables"
      ]
    },
    {
      "page": "fast_handle_na",
      "title": "Handle NA values",
      "topics": [
        "fast_handle_na"
      ]
    },
    {
      "page": "fast_is_equal",
      "title": "Fast checks of equality",
      "topics": [
        "fast_is_equal"
      ]
    },
    {
      "page": "fast_round",
      "title": "Fast round",
      "topics": [
        "fast_round"
      ]
    },
    {
      "page": "fast_scale",
      "title": "scale",
      "topics": [
        "fast_scale"
      ]
    },
    {
      "page": "find_and_transform_dates",
      "title": "Identify date columns",
      "topics": [
        "find_and_transform_dates"
      ]
    },
    {
      "page": "find_and_transform_numerics",
      "title": "Identify numeric columns in a data_set set",
      "topics": [
        "find_and_transform_numerics"
      ]
    },
    {
      "page": "generate_date_diffs",
      "title": "Date difference",
      "topics": [
        "generate_date_diffs"
      ]
    },
    {
      "page": "generate_factor_from_date",
      "title": "Generate factor from dates",
      "topics": [
        "generate_factor_from_date"
      ]
    },
    {
      "page": "generate_from_character",
      "title": "Recode character",
      "topics": [
        "generate_from_character"
      ]
    },
    {
      "page": "generate_from_factor",
      "title": "Recode factor",
      "topics": [
        "generate_from_factor"
      ]
    },
    {
      "page": "get_most_frequent_element",
      "title": "Get most frequent element",
      "topics": [
        "get_most_frequent_element"
      ]
    },
    {
      "page": "identify_dates",
      "title": "Identify date columns",
      "topics": [
        "identify_dates"
      ]
    },
    {
      "page": "messy_adult",
      "title": "Adult with some ugly columns added",
      "topics": [
        "messy_adult"
      ]
    },
    {
      "page": "one_hot_encoder",
      "title": "One hot encoder",
      "topics": [
        "one_hot_encoder"
      ]
    },
    {
      "page": "prepare_set",
      "title": "Preparation pipeline",
      "topics": [
        "prepare_set"
      ]
    },
    {
      "page": "remove_percentile_outlier",
      "title": "Percentile outlier filtering",
      "topics": [
        "remove_percentile_outlier"
      ]
    },
    {
      "page": "remove_rare_categorical",
      "title": "Filter rare categories",
      "topics": [
        "remove_rare_categorical"
      ]
    },
    {
      "page": "remove_sd_outlier",
      "title": "Standard deviation outlier filtering",
      "topics": [
        "remove_sd_outlier"
      ]
    },
    {
      "page": "same_shape",
      "title": "Give same shape",
      "topics": [
        "same_shape"
      ]
    },
    {
      "page": "set_as_numeric_matrix",
      "title": "Numeric matrix preparation for Machine Learning.",
      "topics": [
        "set_as_numeric_matrix"
      ]
    },
    {
      "page": "set_col_as_character",
      "title": "Set columns as character",
      "topics": [
        "set_col_as_character"
      ]
    },
    {
      "page": "set_col_as_date",
      "title": "Set columns as POSIXct",
      "topics": [
        "set_col_as_date"
      ]
    },
    {
      "page": "set_col_as_factor",
      "title": "Set columns as factor",
      "topics": [
        "set_col_as_factor"
      ]
    },
    {
      "page": "set_col_as_numeric",
      "title": "Set columns as numeric",
      "topics": [
        "set_col_as_numeric"
      ]
    },
    {
      "page": "shape_set",
      "title": "Final preparation before ML algorithm",
      "topics": [
        "shape_set"
      ]
    },
    {
      "page": "target_encode",
      "title": "Target encode",
      "topics": [
        "target_encode"
      ]
    },
    {
      "page": "tiny_messy_adult",
      "title": "First 500 rows of 'messy_adult'",
      "topics": [
        "tiny_messy_adult"
      ]
    },
    {
      "page": "un_factor",
      "title": "Unfactor factor with too many values",
      "topics": [
        "un_factor"
      ]
    },
    {
      "page": "which_are_bijection",
      "title": "Identify bijections",
      "topics": [
        "which_are_bijection"
      ]
    },
    {
      "page": "which_are_constant",
      "title": "Identify constant columns",
      "topics": [
        "which_are_constant"
      ]
    },
    {
      "page": "which_are_in_double",
      "title": "Identify double columns",
      "topics": [
        "which_are_in_double"
      ]
    },
    {
      "page": "which_are_included",
      "title": "Identify columns that are included in others",
      "topics": [
        "which_are_included"
      ]
    }
  ],
  "_readme": "https://github.com/eltoulemonde/datapreparation/raw/HEAD/README.md",
  "_rundeps": [
    "cli",
    "cpp11",
    "crayon",
    "data.table",
    "generics",
    "glue",
    "hms",
    "lattice",
    "lifecycle",
    "lubridate",
    "magrittr",
    "Matrix",
    "pkgconfig",
    "prettyunits",
    "progress",
    "R6",
    "rlang",
    "stringi",
    "stringr",
    "timechange",
    "vctrs"
  ],
  "_score": 5.6858312746260635,
  "_indexed": true,
  "_nocasepkg": "datapreparation",
  "_universes": [
    "eltoulemonde"
  ],
  "_binaries": [
    {
      "r": "4.7.0",
      "os": "linux",
      "version": "1.1.2",
      "date": "2026-05-20T10:28:45.000Z",
      "distro": "noble",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "895a3affb50419fda5cea90f5b8cbb762989a6545212ae1af17f868fea0ab3b7",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.6.0",
      "os": "linux",
      "version": "1.1.2",
      "date": "2026-05-20T10:29:00.000Z",
      "distro": "noble",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "c86dac8776d1f004743013ee6889d8751a6655d8efe10ddfe3716a070a6c9405",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.5.3",
      "os": "mac",
      "version": "1.1.2",
      "date": "2026-05-20T10:30:04.000Z",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "c9c530c9428a56576bc401b0b3dab65663541514a9408d8b9293f90fa8aac1d0",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.6.0",
      "os": "mac",
      "version": "1.1.2",
      "date": "2026-05-20T10:29:14.000Z",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "88e11a5c87da7676d8bfca2efe16744ac786bf2ee0a2687b51cc44d2808b16bf",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.7.0",
      "os": "win",
      "version": "1.1.2",
      "date": "2026-05-20T10:28:03.000Z",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "cde3e06e1e85240ff41c324e9fa0dbc7698407f8d4be66051327de4a1b4c2439",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.5.3",
      "os": "win",
      "version": "1.1.2",
      "date": "2026-05-20T10:27:58.000Z",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "c4afb84bd64f1be3beccc2281f41440474668e9ce16444780da9ced739ffe6e2",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.6.0",
      "os": "win",
      "version": "1.1.2",
      "date": "2026-05-20T10:27:59.000Z",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "86d721563152f74e9aa8ef688c1fba3b89a8da7345c8a680c4b01e7135a66425",
      "status": "success",
      "check": "OK",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    },
    {
      "r": "4.6.0",
      "os": "wasm",
      "version": "1.1.2",
      "date": "2026-06-02T13:09:24.000Z",
      "commit": "5748e2dd552e449302ada05d35e20eb6b8ac4ad8",
      "fileid": "36ebdb4626e4b26465bf7b61d1f7d57a91ea1f7eba4a2c52fba366aafe3f832e",
      "status": "success",
      "buildurl": "https://github.com/r-universe/eltoulemonde/actions/runs/26156499395"
    }
  ]
}