6969
7070
7171def _single_replace (self , to_replace , method , inplace , limit ):
72+ """
73+ Replaces values in a Series using the fill method specified when no
74+ replacement value is given in the replace method
75+ """
7276 if self .ndim != 1 :
7377 raise TypeError ('cannot replace {0} with method {1} on a {2}'
7478 .format (to_replace , method , type (self ).__name__ ))
@@ -4787,94 +4791,111 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
47874791 return self .fillna (method = 'bfill' , axis = axis , inplace = inplace ,
47884792 limit = limit , downcast = downcast )
47894793
4790- def replace (self , to_replace = None , value = None , inplace = False , limit = None ,
4791- regex = False , method = 'pad' , axis = None ):
4792- """
4794+ _shared_docs ['replace' ] = ("""
47934795 Replace values given in 'to_replace' with 'value'.
47944796
47954797 Parameters
47964798 ----------
47974799 to_replace : str, regex, list, dict, Series, numeric, or None
47984800
4799- * str or regex:
4801+ * numeric, str or regex:
48004802
4801- - str: string exactly matching `to_replace` will be replaced
4802- with `value`
4803- - regex: regexs matching `to_replace` will be replaced with
4804- `value`
4803+ - numeric: numeric values equal to ``to_replace`` will be
4804+ replaced with ``value``
4805+ - str: string exactly matching ``to_replace`` will be replaced
4806+ with ``value``
4807+ - regex: regexs matching ``to_replace`` will be replaced with
4808+ ``value``
48054809
48064810 * list of str, regex, or numeric:
48074811
4808- - First, if `to_replace` and `value` are both lists, they
4812+ - First, if `` to_replace`` and `` value` ` are both lists, they
48094813 **must** be the same length.
48104814 - Second, if ``regex=True`` then all of the strings in **both**
48114815 lists will be interpreted as regexs otherwise they will match
4812- directly. This doesn't matter much for `value` since there
4816+ directly. This doesn't matter much for `` value` ` since there
48134817 are only a few possible substitution regexes you can use.
4814- - str and regex rules apply as above.
4818+ - str, regex and numeric rules apply as above.
48154819
48164820 * dict:
48174821
4818- - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as
4819- follows: look in column 'a' for the value 'b' and replace it
4820- with nan. You can nest regular expressions as well. Note that
4822+ - Dicts can be used to specify different replacement values
4823+ for different existing values. For example,
4824+ {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and
4825+ 'y' with 'z'. To use a dict in this way the ``value``
4826+ parameter should be ``None``.
4827+ - For a DataFrame a dict can specify that different values
4828+ should be replaced in different columns. For example,
4829+ {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and
4830+ the value 'z' in column 'b' and replaces these values with
4831+ whatever is specified in ``value``. The ``value`` parameter
4832+ should not be ``None`` in this case. You can treat this as a
4833+ special case of passing two lists except that you are
4834+ specifying the column to search in.
4835+ - For a DataFrame nested dictionaries, e.g.,
4836+ {'a': {'b': np.nan}}, are read as follows: look in column 'a'
4837+ for the value 'b' and replace it with NaN. The ``value``
4838+ parameter should be ``None`` to use a nested dict in this
4839+ way. You can nest regular expressions as well. Note that
48214840 column names (the top-level dictionary keys in a nested
48224841 dictionary) **cannot** be regular expressions.
4823- - Keys map to column names and values map to substitution
4824- values. You can treat this as a special case of passing two
4825- lists except that you are specifying the column to search in.
48264842
48274843 * None:
48284844
48294845 - This means that the ``regex`` argument must be a string,
48304846 compiled regular expression, or list, dict, ndarray or Series
4831- of such elements. If `value` is also ``None`` then this
4847+ of such elements. If `` value` ` is also ``None`` then this
48324848 **must** be a nested dictionary or ``Series``.
48334849
48344850 See the examples section for examples of each of these.
48354851 value : scalar, dict, list, str, regex, default None
4836- Value to use to fill holes (e.g. 0), alternately a dict of values
4837- specifying which value to use for each column (columns not in the
4838- dict will not be filled). Regular expressions, strings and lists or
4839- dicts of such objects are also allowed.
4852+ Value to replace any values matching ``to_replace`` with.
4853+ For a DataFrame a dict of values can be used to specify which
4854+ value to use for each column (columns not in the dict will not be
4855+ filled). Regular expressions, strings and lists or dicts of such
4856+ objects are also allowed.
48404857 inplace : boolean, default False
48414858 If True, in place. Note: this will modify any
48424859 other views on this object (e.g. a column from a DataFrame).
48434860 Returns the caller if this is True.
48444861 limit : int, default None
48454862 Maximum size gap to forward or backward fill
4846- regex : bool or same types as `to_replace`, default False
4847- Whether to interpret `to_replace` and/or `value` as regular
4848- expressions. If this is ``True`` then `to_replace` *must* be a
4849- string. Otherwise, `to_replace` must be ``None`` because this
4850- parameter will be interpreted as a regular expression or a list,
4851- dict, or array of regular expressions .
4863+ regex : bool or same types as `` to_replace` `, default False
4864+ Whether to interpret `` to_replace`` and/or `` value` ` as regular
4865+ expressions. If this is ``True`` then `` to_replace` ` *must* be a
4866+ string. Alternatively, this could be a regular expression or a
4867+ list, dict, or array of regular expressions in which case
4868+ ``to_replace`` must be ``None`` .
48524869 method : string, optional, {'pad', 'ffill', 'bfill'}
48534870 The method to use when for replacement, when ``to_replace`` is a
48544871 ``list``.
48554872
48564873 See Also
48574874 --------
4858- NDFrame.reindex
4859- NDFrame.asfreq
4860- NDFrame.fillna
4875+ %(klass)s.fillna : Fill NA/NaN values
4876+ %(klass)s.where : Replace values based on boolean condition
48614877
48624878 Returns
48634879 -------
4864- filled : NDFrame
4880+ filled : %(klass)s
48654881
48664882 Raises
48674883 ------
48684884 AssertionError
4869- * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
4885+ * If ``regex`` is not a ``bool`` and ``to_replace`` is not
4886+ ``None``.
48704887 TypeError
4871- * If `to_replace` is a ``dict`` and `value` is not a ``list``,
4888+ * If `` to_replace`` is a ``dict`` and `` value` ` is not a ``list``,
48724889 ``dict``, ``ndarray``, or ``Series``
4873- * If `to_replace` is ``None`` and `regex` is not compilable into a
4874- regular expression or is a list, dict, ndarray, or Series.
4890+ * If ``to_replace`` is ``None`` and ``regex`` is not compilable
4891+ into a regular expression or is a list, dict, ndarray, or
4892+ Series.
4893+ * When replacing multiple ``bool`` or ``datetime64`` objects and
4894+ the arguments to ``to_replace`` does not match the type of the
4895+ value being replaced
48754896 ValueError
4876- * If `to_replace` and `value` are ``list `` s or ``ndarray `` s, but
4877- they are not the same length.
4897+ * If a ``list`` or an ``ndarray `` is passed to ``to_replace `` and
4898+ `value` but they are not the same length.
48784899
48794900 Notes
48804901 -----
@@ -4883,12 +4904,121 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
48834904 * Regular expressions will only substitute on strings, meaning you
48844905 cannot provide, for example, a regular expression matching floating
48854906 point numbers and expect the columns in your frame that have a
4886- numeric dtype to be matched. However, if those floating point numbers
4887- *are* strings, then you can do this.
4907+ numeric dtype to be matched. However, if those floating point
4908+ numbers *are* strings, then you can do this.
48884909 * This method has *a lot* of options. You are encouraged to experiment
48894910 and play with this method to gain intuition about how it works.
48904911
4891- """
4912+ Examples
4913+ --------
4914+
4915+ >>> s = pd.Series([0, 1, 2, 3, 4])
4916+ >>> s.replace(0, 5)
4917+ 0 5
4918+ 1 1
4919+ 2 2
4920+ 3 3
4921+ 4 4
4922+ dtype: int64
4923+ >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
4924+ ... 'B': [5, 6, 7, 8, 9],
4925+ ... 'C': ['a', 'b', 'c', 'd', 'e']})
4926+ >>> df.replace(0, 5)
4927+ A B C
4928+ 0 5 5 a
4929+ 1 1 6 b
4930+ 2 2 7 c
4931+ 3 3 8 d
4932+ 4 4 9 e
4933+
4934+ >>> df.replace([0, 1, 2, 3], 4)
4935+ A B C
4936+ 0 4 5 a
4937+ 1 4 6 b
4938+ 2 4 7 c
4939+ 3 4 8 d
4940+ 4 4 9 e
4941+ >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
4942+ A B C
4943+ 0 4 5 a
4944+ 1 3 6 b
4945+ 2 2 7 c
4946+ 3 1 8 d
4947+ 4 4 9 e
4948+ >>> s.replace([1, 2], method='bfill')
4949+ 0 0
4950+ 1 3
4951+ 2 3
4952+ 3 3
4953+ 4 4
4954+ dtype: int64
4955+
4956+ >>> df.replace({0: 10, 1: 100})
4957+ A B C
4958+ 0 10 5 a
4959+ 1 100 6 b
4960+ 2 2 7 c
4961+ 3 3 8 d
4962+ 4 4 9 e
4963+ >>> df.replace({'A': 0, 'B': 5}, 100)
4964+ A B C
4965+ 0 100 100 a
4966+ 1 1 6 b
4967+ 2 2 7 c
4968+ 3 3 8 d
4969+ 4 4 9 e
4970+ >>> df.replace({'A': {0: 100, 4: 400}})
4971+ A B C
4972+ 0 100 5 a
4973+ 1 1 6 b
4974+ 2 2 7 c
4975+ 3 3 8 d
4976+ 4 400 9 e
4977+
4978+ >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
4979+ ... 'B': ['abc', 'bar', 'xyz']})
4980+ >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
4981+ A B
4982+ 0 new abc
4983+ 1 foo new
4984+ 2 bait xyz
4985+ >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
4986+ A B
4987+ 0 new abc
4988+ 1 foo bar
4989+ 2 bait xyz
4990+ >>> df.replace(regex=r'^ba.$', value='new')
4991+ A B
4992+ 0 new abc
4993+ 1 foo new
4994+ 2 bait xyz
4995+ >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'})
4996+ A B
4997+ 0 new abc
4998+ 1 xyz new
4999+ 2 bait xyz
5000+ >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
5001+ A B
5002+ 0 new abc
5003+ 1 new new
5004+ 2 bait xyz
5005+
5006+ Note that when replacing multiple ``bool`` or ``datetime64`` objects,
5007+ the data types in the ``to_replace`` parameter must match the data
5008+ type of the value being replaced:
5009+
5010+ >>> df = pd.DataFrame({'A': [True, False, True],
5011+ ... 'B': [False, True, False]})
5012+ >>> df.replace({'a string': 'new value', True: False}) # raises
5013+ TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
5014+
5015+ This raises a ``TypeError`` because one of the ``dict`` keys is not of
5016+ the correct type for replacement.
5017+ """ )
5018+
5019+ @Appender (_shared_docs ['replace' ] % _shared_doc_kwargs )
5020+ def replace (self , to_replace = None , value = None , inplace = False , limit = None ,
5021+ regex = False , method = 'pad' , axis = None ):
48925022 inplace = validate_bool_kwarg (inplace , 'inplace' )
48935023 if not is_bool (regex ) and to_replace is not None :
48945024 raise AssertionError ("'to_replace' must be 'None' if 'regex' is "
0 commit comments