I have a couple of features which are scaled between 0 and 1. For all of those I get a "ValueError: cannot reindex from a duplicate axis". I assume that in creating the columns for the different values of a feature, some rounding happens for their naming, which results in several columns having the same name, although I couldn't trace back the error in the code. Multiplying the column by 10 solves the problem but is of course unintended.
The error message below.
Thanks for this beautiful package.
/home/cdsw/.local/lib/python3.6/site-packages/pdpbox/pdp.py in pdp_plot(pdp_isolate_out, feature_name, center, plot_org_pts, plot_lines, frac_to_plot, cluster, n_cluster_centers, cluster_method, x_quantile, figsize, ncols, plot_params, multi_flag, which_class)
546 _pdp_plot(pdp_isolate_out=pdp_isolate_out, feature_name=feature_name, center=center, plot_org_pts=plot_org_pts, plot_lines=plot_lines,
547 frac_to_plot=frac_to_plot, cluster=cluster, n_cluster_centers=n_cluster_centers, cluster_method=cluster_method, x_quantile=x_quantile,
--> 548 ax=ax2, plot_params=plot_params)
549
550
/home/cdsw/.local/lib/python3.6/site-packages/pdpbox/pdp.py in _pdp_plot(pdp_isolate_out, feature_name, center, plot_org_pts, plot_lines, frac_to_plot, cluster, n_cluster_centers, cluster_method, x_quantile, ax, plot_params)
616 pdp_y -= pdp_y[0]
617 for col in display_columns[1:]:
--> 618 ice_lines[col] -= ice_lines[display_columns[0]]
619 ice_lines['actual_preds'] -= ice_lines[display_columns[0]]
620 ice_lines[display_columns[0]] = 0
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/ops.py in f(self, other)
895
896 def f(self, other):
--> 897 result = method(self, other)
898
899 # this makes sure that we are aligned like the input
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/ops.py in f(self, other, axis, level, fill_value)
1552 return _combine_series_frame(self, other, na_op,
1553 fill_value=fill_value, axis=axis,
-> 1554 level=level, try_cast=True)
1555 else:
1556 if fill_value is not None:
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/ops.py in _combine_series_frame(self, other, func, fill_value, axis, level, try_cast)
1437 # default axis is columns
1438 return self._combine_match_columns(other, func, level=level,
-> 1439 try_cast=try_cast)
1440
1441
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/frame.py in _combine_match_columns(self, other, func, level, try_cast)
4767 def _combine_match_columns(self, other, func, level=None, try_cast=True):
4768 left, right = self.align(other, join='outer', axis=1, level=level,
-> 4769 copy=False)
4770
4771 new_data = left._data.eval(func=func, other=right,
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/frame.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
3548 method=method, limit=limit,
3549 fill_axis=fill_axis,
-> 3550 broadcast_axis=broadcast_axis)
3551
3552 @Appender(_shared_docs['reindex'] % _shared_doc_kwargs)
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/generic.py in align(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis, broadcast_axis)
7364 copy=copy, fill_value=fill_value,
7365 method=method, limit=limit,
-> 7366 fill_axis=fill_axis)
7367 else: # pragma: no cover
7368 raise TypeError('unsupported type: %s' % type(other))
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/generic.py in _align_series(self, other, join, axis, level, copy, fill_value, method, limit, fill_axis)
7461
7462 if lidx is not None:
-> 7463 fdata = fdata.reindex_indexer(join_index, lidx, axis=0)
7464 else:
7465 raise ValueError('Must specify axis=0 or 1')
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
4412 # some axes don't allow reindexing with dups
4413 if not allow_dups:
-> 4414 self.axes[axis]._can_reindex(indexer)
4415
4416 if axis >= self.ndim:
/home/cdsw/.local/lib/python3.6/site-packages/pandas/core/indexes/base.py in _can_reindex(self, indexer)
3558 # trying to reindex on an axis with duplicates
3559 if not self.is_unique and len(indexer):
-> 3560 raise ValueError("cannot reindex from a duplicate axis")
3561
3562 def reindex(self, target, method=None, level=None, limit=None,
ValueError: cannot reindex from a duplicate axis