|
28 | 28 | #' |
29 | 29 | #' @section Parameters: |
30 | 30 | #' The parameters are the parameters inherited from [`PipeOpTaskPreproc`], as well as: |
31 | | -#' * `renaming` :: named `character`\cr |
32 | | -#' Named `character` vector. The names of the vector specify the old column names that should be |
33 | | -#' changed to the new column names as given by the elements of the vector. Initialized to the empty |
34 | | -#' character vector. |
| 31 | +#' * `renaming` :: named `character` | `function`\cr |
| 32 | +#' Takes the form of either a named `character` or a `function`. |
| 33 | +#' For a named `character` vector, the names of the vector elements specify the |
| 34 | +#' old column names and the corresponding element values give the new column names. |
| 35 | +#' A `function` specifies how the old column names should be changed to the new column names. |
| 36 | +#' The function must return a `character` vector with one entry per input column name so that each selected column receives a new name. |
| 37 | +#' To choose columns use the `affect_columns` parameter. |
| 38 | +#' Initialized to `character(0)`. |
35 | 39 | #' * `ignore_missing` :: `logical(1)`\cr |
36 | 40 | #' Ignore if columns named in `renaming` are not found in the input [`Task`][mlr3::Task]. If this is |
37 | 41 | #' `FALSE`, then names found in `renaming` not found in the [`Task`][mlr3::Task] cause an error. |
38 | 42 | #' Initialized to `FALSE`. |
39 | 43 | #' |
40 | 44 | #' @section Internals: |
41 | | -#' Uses the `$rename()` mutator of the [`Task`][mlr3::Task] to set the new column names. |
| 45 | +#' Uses the `$rename()` mutator of the [`Task`][mlr3::Task] to set new column names. |
42 | 46 | #' |
43 | 47 | #' @section Fields: |
44 | 48 | #' Only fields inherited from [`PipeOp`]. |
|
56 | 60 | #' task = tsk("iris") |
57 | 61 | #' pop = po("renamecolumns", param_vals = list(renaming = c("Petal.Length" = "PL"))) |
58 | 62 | #' pop$train(list(task)) |
| 63 | +#' |
| 64 | +#' pof = po("renamecolumns", param_vals = list(renaming = function(colnames) { |
| 65 | +#' sub("Petal", "P", colnames) |
| 66 | +#' })) |
| 67 | +#' pof$train(list(task)) |
| 68 | +#' |
59 | 69 | PipeOpRenameColumns = R6Class("PipeOpRenameColumns", |
60 | 70 | inherit = PipeOpTaskPreprocSimple, |
61 | 71 | public = list( |
62 | 72 | initialize = function(id = "renamecolumns", param_vals = list()) { |
63 | 73 | ps = ps( |
64 | 74 | renaming = p_uty( |
65 | | - custom_check = crate(function(x) check_character(x, any.missing = FALSE, names = "strict") %check&&% check_names(x, type = "strict"), |
66 | | - .parent = topenv()), |
| 75 | + custom_check = crate(function(x) (check_character(x, any.missing = FALSE, names = "strict") %check&&% check_names(x, type = "strict")) %check||% check_function(x)), |
67 | 76 | tags = c("train", "predict", "required") |
68 | 77 | ), |
69 | 78 | ignore_missing = p_lgl(tags = c("train", "predict", "required")) |
70 | 79 | ) |
71 | 80 | ps$values = list(renaming = character(0), ignore_missing = FALSE) |
72 | | - super$initialize(id, ps, param_vals = param_vals, can_subset_cols = FALSE) |
| 81 | + super$initialize(id, ps, param_vals = param_vals, can_subset_cols = TRUE) |
73 | 82 | } |
74 | 83 | ), |
75 | 84 | private = list( |
| 85 | + .get_state = function(task) { |
| 86 | + if (is.function(self$param_set$values$renaming)) { |
| 87 | + new_names = self$param_set$values$renaming(task$feature_names) |
| 88 | + assert_character(new_names, any.missing = FALSE, len = length(task$feature_names), .var.name = "the value returned by `renaming` function") |
| 89 | + names(new_names) = task$feature_names |
| 90 | + list(old_names = task$feature_names, new_names = new_names) |
| 91 | + } else { |
| 92 | + pv = self$param_set$get_values(tags = "train") |
| 93 | + new_names = pv$renaming |
| 94 | + innames = names(new_names) |
| 95 | + nontargets = task$col_roles |
| 96 | + nontargets$target = NULL |
| 97 | + takenames = innames %in% unlist(nontargets) |
| 98 | + if (!pv$ignore_missing && !all(takenames)) { |
| 99 | + # we can't rely on task$rename because it could also change the target name, which we don't want. |
| 100 | + stopf("The names %s from `renaming` parameter were not found in the Task.", str_collapse(innames[!takenames])) |
| 101 | + } |
| 102 | + list(old_names = innames[takenames], new_names = new_names[takenames]) |
| 103 | + } |
| 104 | + }, |
76 | 105 | .transform = function(task) { |
77 | | - if (!length(self$param_set$values$renaming)) { |
| 106 | + if (!length(self$state$new_names)) { |
78 | 107 | return(task) # early exit |
79 | 108 | } |
80 | | - innames = names(self$param_set$values$renaming) |
81 | | - nontargets = task$col_roles |
82 | | - nontargets$target = NULL |
83 | | - takenames = innames %in% unlist(nontargets) |
84 | | - if (!self$param_set$values$ignore_missing && !all(takenames)) { |
85 | | - # we can't rely on task$rename because it could also change the target name, which we don't want. |
86 | | - stopf("The names %s from `renaming` parameter were not found in the Task.", str_collapse(innames[!takenames])) |
87 | | - } |
88 | | - task$rename(old = innames[takenames], new = self$param_set$values$renaming[takenames]) |
| 109 | + task$rename(old = self$state$old_names, new = self$state$new_names) |
89 | 110 | } |
90 | 111 | ) |
91 | 112 | ) |
|
0 commit comments