|
«»» |
|
Data structure for 1-dimensional cross-sectional and time series data |
|
«»» |
|
from __future__ import annotations |
|
|
|
import sys |
|
from textwrap import dedent |
|
from typing import ( |
|
IO, |
|
TYPE_CHECKING, |
|
Any, |
|
Callable, |
|
Hashable, |
|
Iterable, |
|
Literal, |
|
Mapping, |
|
Sequence, |
|
Union, |
|
cast, |
|
overload, |
|
) |
|
import warnings |
|
import weakref |
|
|
|
import numpy as np |
|
|
|
from pandas._config import ( |
|
get_option, |
|
using_copy_on_write, |
|
) |
|
|
|
from pandas._libs import ( |
|
lib, |
|
properties, |
|
reshape, |
|
) |
|
from pandas._libs.lib import ( |
|
is_range_indexer, |
|
no_default, |
|
) |
|
from pandas._typing import ( |
|
AggFuncType, |
|
AlignJoin, |
|
AnyAll, |
|
AnyArrayLike, |
|
ArrayLike, |
|
Axis, |
|
AxisInt, |
|
CorrelationMethod, |
|
DropKeep, |
|
Dtype, |
|
DtypeObj, |
|
FilePath, |
|
FillnaOptions, |
|
Frequency, |
|
IgnoreRaise, |
|
IndexKeyFunc, |
|
IndexLabel, |
|
Level, |
|
NaPosition, |
|
QuantileInterpolation, |
|
Renamer, |
|
Scalar, |
|
SingleManager, |
|
SortKind, |
|
StorageOptions, |
|
TimedeltaConvertibleTypes, |
|
TimestampConvertibleTypes, |
|
ValueKeyFunc, |
|
WriteBuffer, |
|
npt, |
|
) |
|
from pandas.compat import PYPY |
|
from pandas.compat.numpy import function as nv |
|
from pandas.errors import ( |
|
ChainedAssignmentError, |
|
InvalidIndexError, |
|
_chained_assignment_msg, |
|
) |
|
from pandas.util._decorators import ( |
|
Appender, |
|
Substitution, |
|
doc, |
|
) |
|
from pandas.util._exceptions import find_stack_level |
|
from pandas.util._validators import ( |
|
validate_ascending, |
|
validate_bool_kwarg, |
|
validate_percentile, |
|
) |
|
|
|
from pandas.core.dtypes.cast import ( |
|
LossySetitemError, |
|
convert_dtypes, |
|
maybe_box_native, |
|
maybe_cast_pointwise_result, |
|
) |
|
from pandas.core.dtypes.common import ( |
|
ensure_platform_int, |
|
is_dict_like, |
|
is_extension_array_dtype, |
|
is_integer, |
|
is_iterator, |
|
is_list_like, |
|
is_numeric_dtype, |
|
is_object_dtype, |
|
is_scalar, |
|
pandas_dtype, |
|
validate_all_hashable, |
|
) |
|
from pandas.core.dtypes.generic import ABCDataFrame |
|
from pandas.core.dtypes.inference import is_hashable |
|
from pandas.core.dtypes.missing import ( |
|
isna, |
|
na_value_for_dtype, |
|
notna, |
|
remove_na_arraylike, |
|
) |
|
|
|
from pandas.core import ( |
|
algorithms, |
|
base, |
|
common as com, |
|
missing, |
|
nanops, |
|
ops, |
|
) |
|
from pandas.core.accessor import CachedAccessor |
|
from pandas.core.apply import SeriesApply |
|
from pandas.core.arrays import ExtensionArray |
|
from pandas.core.arrays.categorical import CategoricalAccessor |
|
from pandas.core.arrays.sparse import SparseAccessor |
|
from pandas.core.construction import ( |
|
extract_array, |
|
sanitize_array, |
|
) |
|
from pandas.core.generic import NDFrame |
|
from pandas.core.indexers import ( |
|
disallow_ndim_indexing, |
|
unpack_1tuple, |
|
) |
|
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties |
|
from pandas.core.indexes.api import ( |
|
DatetimeIndex, |
|
Index, |
|
MultiIndex, |
|
PeriodIndex, |
|
default_index, |
|
ensure_index, |
|
) |
|
import pandas.core.indexes.base as ibase |
|
from pandas.core.indexes.multi import maybe_droplevels |
|
from pandas.core.indexing import ( |
|
check_bool_indexer, |
|
check_dict_or_set_indexers, |
|
) |
|
from pandas.core.internals import ( |
|
SingleArrayManager, |
|
SingleBlockManager, |
|
) |
|
from pandas.core.shared_docs import _shared_docs |
|
from pandas.core.sorting import ( |
|
ensure_key_mapped, |
|
nargsort, |
|
) |
|
from pandas.core.strings import StringMethods |
|
from pandas.core.tools.datetimes import to_datetime |
|
|
|
import pandas.io.formats.format as fmt |
|
from pandas.io.formats.info import ( |
|
INFO_DOCSTRING, |
|
SeriesInfo, |
|
series_sub_kwargs, |
|
) |
|
import pandas.plotting |
|
|
|
if TYPE_CHECKING: |
|
from pandas._typing import ( |
|
NumpySorter, |
|
NumpyValueArrayLike, |
|
Suffixes, |
|
) |
|
|
|
from pandas.core.frame import DataFrame |
|
from pandas.core.groupby.generic import SeriesGroupBy |
|
from pandas.core.resample import Resampler |
|
|
|
__all__ = [«Series»] |
|
|
|
_shared_doc_kwargs = { |
|
«axes»: «index», |
|
«klass»: «Series», |
|
«axes_single_arg»: «{0 or ‘index’}», |
|
«axis»: «»»axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame.»»», |
|
«inplace»: «»»inplace : bool, default False |
|
If True, performs operation inplace and returns None.»»», |
|
«unique»: «np.ndarray», |
|
«duplicated»: «Series», |
|
«optional_by»: «», |
|
«optional_mapper»: «», |
|
«optional_reindex»: «»» |
|
index : array-like, optional |
|
New labels for the index. Preferably an Index object to avoid |
|
duplicating data. |
|
axis : int or str, optional |
|
Unused.»»», |
|
«replace_iloc»: «»» |
|
This differs from updating with «.loc« or «.iloc«, which require |
|
you to specify a location to update with some value.»»», |
|
} |
|
|
|
|
|
def _coerce_method(converter): |
|
«»» |
|
Install the scalar coercion methods. |
|
«»» |
|
|
|
def wrapper(self): |
|
if len(self) == 1: |
|
warnings.warn( |
|
f»Calling {converter.__name__} on a single element Series is « |
|
«deprecated and will raise a TypeError in the future. « |
|
f»Use {converter.__name__}(ser.iloc[0]) instead», |
|
FutureWarning, |
|
stacklevel=find_stack_level(), |
|
) |
|
return converter(self.iloc[0]) |
|
raise TypeError(f»cannot convert the series to {converter}«) |
|
|
|
wrapper.__name__ = f»__{converter.__name__}__» |
|
return wrapper |
|
|
|
|
|
# ———————————————————————- |
|
# Series class |
|
|
|
# error: Definition of «max» in base class «IndexOpsMixin» is incompatible with |
|
# definition in base class «NDFrame» |
|
# error: Definition of «min» in base class «IndexOpsMixin» is incompatible with |
|
# definition in base class «NDFrame» |
|
class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] |
|
«»» |
|
One-dimensional ndarray with axis labels (including time series). |
|
|
|
Labels need not be unique but must be a hashable type. The object |
|
supports both integer- and label-based indexing and provides a host of |
|
methods for performing operations involving the index. Statistical |
|
methods from ndarray have been overridden to automatically exclude |
|
missing data (currently represented as NaN). |
|
|
|
Operations between Series (+, -, /, \*, \*\*) align values based on their |
|
associated index values— they need not be the same length. The result |
|
index will be the sorted union of the two indexes. |
|
|
|
Parameters |
|
———- |
|
data : array-like, Iterable, dict, or scalar value |
|
Contains data stored in Series. If data is a dict, argument order is |
|
maintained. |
|
index : array-like or Index (1d) |
|
Values must be hashable and have the same length as `data`. |
|
Non-unique index values are allowed. Will default to |
|
RangeIndex (0, 1, 2, …, n) if not provided. If data is dict-like |
|
and index is None, then the keys in the data are used as the index. If the |
|
index is not None, the resulting Series is reindexed with the index values. |
|
dtype : str, numpy.dtype, or ExtensionDtype, optional |
|
Data type for the output Series. If not specified, this will be |
|
inferred from `data`. |
|
See the :ref:`user guide <basics.dtypes>` for more usages. |
|
name : str, optional |
|
The name to give to the Series. |
|
copy : bool, default False |
|
Copy input data. Only affects Series or 1d ndarray input. See examples. |
|
|
|
Notes |
|
—— |
|
Please reference the :ref:`User Guide <basics.series>` for more information. |
|
|
|
Examples |
|
——— |
|
Constructing Series from a dictionary with an Index specified |
|
|
|
>>> d = {‘a’: 1, ‘b’: 2, ‘c’: 3} |
|
>>> ser = pd.Series(data=d, index=[‘a’, ‘b’, ‘c’]) |
|
>>> ser |
|
a 1 |
|
b 2 |
|
c 3 |
|
dtype: int64 |
|
|
|
The keys of the dictionary match with the Index values, hence the Index |
|
values have no effect. |
|
|
|
>>> d = {‘a’: 1, ‘b’: 2, ‘c’: 3} |
|
>>> ser = pd.Series(data=d, index=[‘x’, ‘y’, ‘z’]) |
|
>>> ser |
|
x NaN |
|
y NaN |
|
z NaN |
|
dtype: float64 |
|
|
|
Note that the Index is first build with the keys from the dictionary. |
|
After this the Series is reindexed with the given Index values, hence we |
|
get all NaN as a result. |
|
|
|
Constructing Series from a list with `copy=False`. |
|
|
|
>>> r = [1, 2] |
|
>>> ser = pd.Series(r, copy=False) |
|
>>> ser.iloc[0] = 999 |
|
>>> r |
|
[1, 2] |
|
>>> ser |
|
0 999 |
|
1 2 |
|
dtype: int64 |
|
|
|
Due to input data type the Series has a `copy` of |
|
the original data even though `copy=False`, so |
|
the data is unchanged. |
|
|
|
Constructing Series from a 1d ndarray with `copy=False`. |
|
|
|
>>> r = np.array([1, 2]) |
|
>>> ser = pd.Series(r, copy=False) |
|
>>> ser.iloc[0] = 999 |
|
>>> r |
|
array([999, 2]) |
|
>>> ser |
|
0 999 |
|
1 2 |
|
dtype: int64 |
|
|
|
Due to input data type the Series has a `view` on |
|
the original data, so |
|
the data is changed as well. |
|
«»» |
|
|
|
_typ = «series» |
|
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) |
|
|
|
_name: Hashable |
|
_metadata: list[str] = [«name»] |
|
_internal_names_set = {«index»} | NDFrame._internal_names_set |
|
_accessors = {«dt», «cat», «str», «sparse»} |
|
_hidden_attrs = ( |
|
base.IndexOpsMixin._hidden_attrs |
|
| NDFrame._hidden_attrs |
|
| frozenset([«compress», «ptp»]) |
|
) |
|
|
|
# Override cache_readonly bc Series is mutable |
|
# error: Incompatible types in assignment (expression has type «property», |
|
# base class «IndexOpsMixin» defined the type as «Callable[[IndexOpsMixin], bool]») |
|
hasnans = property( # type: ignore[assignment] |
|
# error: «Callable[[IndexOpsMixin], bool]» has no attribute «fget» |
|
base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined] |
|
doc=base.IndexOpsMixin.hasnans.__doc__, |
|
) |
|
_mgr: SingleManager |
|
div: Callable[[Series, Any], Series] |
|
rdiv: Callable[[Series, Any], Series] |
|
|
|
# ———————————————————————- |
|
# Constructors |
|
|
|
def __init__( |
|
self, |
|
data=None, |
|
index=None, |
|
dtype: Dtype | None = None, |
|
name=None, |
|
copy: bool = False, |
|
fastpath: bool = False, |
|
) -> None: |
|
|
|
if ( |
|
isinstance(data, (SingleBlockManager, SingleArrayManager)) |
|
and index is None |
|
and dtype is None |
|
and copy is False |
|
): |
|
# GH#33357 called with just the SingleBlockManager |
|
NDFrame.__init__(self, data) |
|
if fastpath: |
|
# e.g. from _box_col_values, skip validation of name |
|
object.__setattr__(self, «_name», name) |
|
else: |
|
self.name = name |
|
return |
|
|
|
# we are called internally, so short-circuit |
|
if fastpath: |
|
# data is a ndarray, index is defined |
|
if not isinstance(data, (SingleBlockManager, SingleArrayManager)): |
|
manager = get_option(«mode.data_manager») |
|
if manager == «block»: |
|
data = SingleBlockManager.from_array(data, index) |
|
elif manager == «array»: |
|
data = SingleArrayManager.from_array(data, index) |
|
if copy: |
|
data = data.copy() |
|
# skips validation of the name |
|
object.__setattr__(self, «_name», name) |
|
NDFrame.__init__(self, data) |
|
return |
|
|
|
name = ibase.maybe_extract_name(name, data, type(self)) |
|
|
|
if index is not None: |
|
index = ensure_index(index) |
|
|
|
if dtype is not None: |
|
dtype = self._validate_dtype(dtype) |
|
|
|
if data is None: |
|
index = index if index is not None else default_index(0) |
|
if len(index) or dtype is not None: |
|
data = na_value_for_dtype(pandas_dtype(dtype), compat=False) |
|
else: |
|
data = [] |
|
|
|
if isinstance(data, MultiIndex): |
|
raise NotImplementedError( |
|
«initializing a Series from a MultiIndex is not supported» |
|
) |
|
if isinstance(data, Index): |
|
|
|
if dtype is not None: |
|
# astype copies |
|
data = data.astype(dtype) |
|
else: |
|
# GH#24096 we need to ensure the index remains immutable |
|
data = data._values.copy() |
|
copy = False |
|
|
|
elif isinstance(data, np.ndarray): |
|
if len(data.dtype): |
|
# GH#13296 we are dealing with a compound dtype, which |
|
# should be treated as 2D |
|
raise ValueError( |
|
«Cannot construct a Series from an ndarray with « |
|
«compound dtype. Use DataFrame instead.» |
|
) |
|
elif isinstance(data, Series): |
|
if index is None: |
|
index = data.index |
|
if using_copy_on_write(): |
|
data = data._mgr.copy(deep=False) |
|
else: |
|
data = data._mgr |
|
else: |
|
data = data.reindex(index, copy=copy) |
|
copy = False |
|
data = data._mgr |
|
elif is_dict_like(data): |
|
data, index = self._init_dict(data, index, dtype) |
|
dtype = None |
|
copy = False |
|
elif isinstance(data, (SingleBlockManager, SingleArrayManager)): |
|
if index is None: |
|
index = data.index |
|
elif not data.index.equals(index) or copy: |
|
# GH#19275 SingleBlockManager input should only be called |
|
# internally |
|
raise AssertionError( |
|
«Cannot pass both SingleBlockManager « |
|
«`data` argument and a different « |
|
«`index` argument. `copy` must be False.» |
|
) |
|
|
|
elif isinstance(data, ExtensionArray): |
|
pass |
|
else: |
|
data = com.maybe_iterable_to_list(data) |
|
if is_list_like(data) and not len(data) and dtype is None: |
|
# GH 29405: Pre-2.0, this defaulted to float. |
|
dtype = np.dtype(object) |
|
|
|
if index is None: |
|
if not is_list_like(data): |
|
data = [data] |
|
index = default_index(len(data)) |
|
elif is_list_like(data): |
|
com.require_length_match(data, index) |
|
|
|
# create/copy the manager |
|
if isinstance(data, (SingleBlockManager, SingleArrayManager)): |
|
if dtype is not None: |
|
data = data.astype(dtype=dtype, errors=«ignore», copy=copy) |
|
elif copy: |
|
data = data.copy() |
|
else: |
|
data = sanitize_array(data, index, dtype, copy) |
|
|
|
manager = get_option(«mode.data_manager») |
|
if manager == «block»: |
|
data = SingleBlockManager.from_array(data, index) |
|
elif manager == «array»: |
|
data = SingleArrayManager.from_array(data, index) |
|
|
|
NDFrame.__init__(self, data) |
|
self.name = name |
|
self._set_axis(0, index) |
|
|
|
def _init_dict( |
|
self, data, index: Index | None = None, dtype: DtypeObj | None = None |
|
): |
|
«»» |
|
Derive the «_mgr» and «index» attributes of a new Series from a |
|
dictionary input. |
|
|
|
Parameters |
|
———- |
|
data : dict or dict-like |
|
Data used to populate the new Series. |
|
index : Index or None, default None |
|
Index for the new Series: if None, use dict keys. |
|
dtype : np.dtype, ExtensionDtype, or None, default None |
|
The dtype for the new Series: if None, infer from data. |
|
|
|
Returns |
|
——- |
|
_data : BlockManager for the new Series |
|
index : index for the new Series |
|
«»» |
|
keys: Index | tuple |
|
|
|
# Looking for NaN in dict doesn’t work ({np.nan : 1}[float(‘nan’)] |
|
# raises KeyError), so we iterate the entire dict, and align |
|
if data: |
|
# GH:34717, issue was using zip to extract key and values from data. |
|
# using generators in effects the performance. |
|
# Below is the new way of extracting the keys and values |
|
|
|
keys = tuple(data.keys()) |
|
values = list(data.values()) # Generating list of values- faster way |
|
elif index is not None: |
|
# fastpath for Series(data=None). Just use broadcasting a scalar |
|
# instead of reindexing. |
|
if len(index) or dtype is not None: |
|
values = na_value_for_dtype(pandas_dtype(dtype), compat=False) |
|
else: |
|
values = [] |
|
keys = index |
|
else: |
|
keys, values = (), [] |
|
|
|
# Input is now list-like, so rely on «standard» construction: |
|
|
|
s = self._constructor( |
|
values, |
|
index=keys, |
|
dtype=dtype, |
|
) |
|
|
|
# Now we just make sure the order is respected, if any |
|
if data and index is not None: |
|
s = s.reindex(index, copy=False) |
|
return s._mgr, s.index |
|
|
|
# ———————————————————————- |
|
|
|
@property |
|
def _constructor(self) -> Callable[…, Series]: |
|
return Series |
|
|
|
@property |
|
def _constructor_expanddim(self) -> Callable[…, DataFrame]: |
|
«»» |
|
Used when a manipulation result has one higher dimension as the |
|
original, such as Series.to_frame() |
|
«»» |
|
from pandas.core.frame import DataFrame |
|
|
|
return DataFrame |
|
|
|
# types |
|
@property |
|
def _can_hold_na(self) -> bool: |
|
return self._mgr._can_hold_na |
|
|
|
# ndarray compatibility |
|
@property |
|
def dtype(self) -> DtypeObj: |
|
«»» |
|
Return the dtype object of the underlying data. |
|
«»» |
|
return self._mgr.dtype |
|
|
|
@property |
|
def dtypes(self) -> DtypeObj: |
|
«»» |
|
Return the dtype object of the underlying data. |
|
«»» |
|
# DataFrame compatibility |
|
return self.dtype |
|
|
|
@property |
|
def name(self) -> Hashable: |
|
«»» |
|
Return the name of the Series. |
|
|
|
The name of a Series becomes its index or column name if it is used |
|
to form a DataFrame. It is also used whenever displaying the Series |
|
using the interpreter. |
|
|
|
Returns |
|
——- |
|
label (hashable object) |
|
The name of the Series, also the column name if part of a DataFrame. |
|
|
|
See Also |
|
——— |
|
Series.rename : Sets the Series name when given a scalar input. |
|
Index.name : Corresponding Index property. |
|
|
|
Examples |
|
——— |
|
The Series name can be set initially when calling the constructor. |
|
|
|
>>> s = pd.Series([1, 2, 3], dtype=np.int64, name=’Numbers’) |
|
>>> s |
|
0 1 |
|
1 2 |
|
2 3 |
|
Name: Numbers, dtype: int64 |
|
>>> s.name = «Integers» |
|
>>> s |
|
0 1 |
|
1 2 |
|
2 3 |
|
Name: Integers, dtype: int64 |
|
|
|
The name of a Series within a DataFrame is its column name. |
|
|
|
>>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]], |
|
… columns=[«Odd Numbers», «Even Numbers»]) |
|
>>> df |
|
Odd Numbers Even Numbers |
|
0 1 2 |
|
1 3 4 |
|
2 5 6 |
|
>>> df[«Even Numbers»].name |
|
‘Even Numbers’ |
|
«»» |
|
return self._name |
|
|
|
@name.setter |
|
def name(self, value: Hashable) -> None: |
|
validate_all_hashable(value, error_name=f»{type(self).__name__}.name») |
|
object.__setattr__(self, «_name», value) |
|
|
|
@property |
|
def values(self): |
|
«»» |
|
Return Series as ndarray or ndarray-like depending on the dtype. |
|
|
|
.. warning:: |
|
|
|
We recommend using :attr:`Series.array` or |
|
:meth:`Series.to_numpy`, depending on whether you need |
|
a reference to the underlying data or a NumPy array. |
|
|
|
Returns |
|
——- |
|
numpy.ndarray or ndarray-like |
|
|
|
See Also |
|
——— |
|
Series.array : Reference to the underlying data. |
|
Series.to_numpy : A NumPy array representing the underlying data. |
|
|
|
Examples |
|
——— |
|
>>> pd.Series([1, 2, 3]).values |
|
array([1, 2, 3]) |
|
|
|
>>> pd.Series(list(‘aabc’)).values |
|
array([‘a’, ‘a’, ‘b’, ‘c’], dtype=object) |
|
|
|
>>> pd.Series(list(‘aabc’)).astype(‘category’).values |
|
[‘a’, ‘a’, ‘b’, ‘c’] |
|
Categories (3, object): [‘a’, ‘b’, ‘c’] |
|
|
|
Timezone aware datetime data is converted to UTC: |
|
|
|
>>> pd.Series(pd.date_range(‘20130101’, periods=3, |
|
… tz=’US/Eastern’)).values |
|
array([‘2013-01-01T05:00:00.000000000’, |
|
‘2013-01-02T05:00:00.000000000’, |
|
‘2013-01-03T05:00:00.000000000′], dtype=’datetime64[ns]’) |
|
«»» |
|
return self._mgr.external_values() |
|
|
|
@property |
|
def _values(self): |
|
«»» |
|
Return the internal repr of this data (defined by Block.interval_values). |
|
This are the values as stored in the Block (ndarray or ExtensionArray |
|
depending on the Block class), with datetime64[ns] and timedelta64[ns] |
|
wrapped in ExtensionArrays to match Index._values behavior. |
|
|
|
Differs from the public «.values« for certain data types, because of |
|
historical backwards compatibility of the public attribute (e.g. period |
|
returns object ndarray and datetimetz a datetime64[ns] ndarray for |
|
«.values« while it returns an ExtensionArray for «._values« in those |
|
cases). |
|
|
|
Differs from «.array« in that this still returns the numpy array if |
|
the Block is backed by a numpy array (except for datetime64 and |
|
timedelta64 dtypes), while «.array« ensures to always return an |
|
ExtensionArray. |
|
|
|
Overview: |
|
|
|
dtype | values | _values | array | |
|
———— | ————- | ————- | ————- | |
|
Numeric | ndarray | ndarray | PandasArray | |
|
Category | Categorical | Categorical | Categorical | |
|
dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray | |
|
dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | |
|
td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | |
|
Period | ndarray[obj] | PeriodArray | PeriodArray | |
|
Nullable | EA | EA | EA | |
|
|
|
«»» |
|
return self._mgr.internal_values() |
|
|
|
# error: Decorated property not supported |
|
@Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc] |
|
@property |
|
def array(self) -> ExtensionArray: |
|
return self._mgr.array_values() |
|
|
|
# ops |
|
def ravel(self, order: str = «C») -> ArrayLike: |
|
«»» |
|
Return the flattened underlying data as an ndarray or ExtensionArray. |
|
|
|
Returns |
|
——- |
|
numpy.ndarray or ExtensionArray |
|
Flattened data of the Series. |
|
|
|
See Also |
|
——— |
|
numpy.ndarray.ravel : Return a flattened array. |
|
«»» |
|
return self._values.ravel(order=order) |
|
|
|
def __len__(self) -> int: |
|
«»» |
|
Return the length of the Series. |
|
«»» |
|
return len(self._mgr) |
|
|
|
def view(self, dtype: Dtype | None = None) -> Series: |
|
«»» |
|
Create a new view of the Series. |
|
|
|
This function will return a new Series with a view of the same |
|
underlying values in memory, optionally reinterpreted with a new data |
|
type. The new data type must preserve the same size in bytes as to not |
|
cause index misalignment. |
|
|
|
Parameters |
|
———- |
|
dtype : data type |
|
Data type object or one of their string representations. |
|
|
|
Returns |
|
——- |
|
Series |
|
A new Series object as a view of the same data in memory. |
|
|
|
See Also |
|
——— |
|
numpy.ndarray.view : Equivalent numpy function to create a new view of |
|
the same data in memory. |
|
|
|
Notes |
|
—— |
|
Series are instantiated with «dtype=float64« by default. While |
|
«numpy.ndarray.view()« will return a view with the same data type as |
|
the original array, «Series.view()« (without specified dtype) |
|
will try using «float64« and may fail if the original data type size |
|
in bytes is not the same. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([-2, -1, 0, 1, 2], dtype=’int8′) |
|
>>> s |
|
0 -2 |
|
1 -1 |
|
2 0 |
|
3 1 |
|
4 2 |
|
dtype: int8 |
|
|
|
The 8 bit signed integer representation of `-1` is `0b11111111`, but |
|
the same bytes represent 255 if read as an 8 bit unsigned integer: |
|
|
|
>>> us = s.view(‘uint8’) |
|
>>> us |
|
0 254 |
|
1 255 |
|
2 0 |
|
3 1 |
|
4 2 |
|
dtype: uint8 |
|
|
|
The views share the same underlying values: |
|
|
|
>>> us[0] = 128 |
|
>>> s |
|
0 -128 |
|
1 -1 |
|
2 0 |
|
3 1 |
|
4 2 |
|
dtype: int8 |
|
«»» |
|
# self.array instead of self._values so we piggyback on PandasArray |
|
# implementation |
|
res_values = self.array.view(dtype) |
|
res_ser = self._constructor(res_values, index=self.index) |
|
return res_ser.__finalize__(self, method=«view») |
|
|
|
# ———————————————————————- |
|
# NDArray Compat |
|
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray) |
|
|
|
def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray: |
|
«»» |
|
Return the values as a NumPy array. |
|
|
|
Users should not call this directly. Rather, it is invoked by |
|
:func:`numpy.array` and :func:`numpy.asarray`. |
|
|
|
Parameters |
|
———- |
|
dtype : str or numpy.dtype, optional |
|
The dtype to use for the resulting NumPy array. By default, |
|
the dtype is inferred from the data. |
|
|
|
Returns |
|
——- |
|
numpy.ndarray |
|
The values in the series converted to a :class:`numpy.ndarray` |
|
with the specified `dtype`. |
|
|
|
See Also |
|
——— |
|
array : Create a new array from data. |
|
Series.array : Zero-copy view to the array backing the Series. |
|
Series.to_numpy : Series method for similar behavior. |
|
|
|
Examples |
|
——— |
|
>>> ser = pd.Series([1, 2, 3]) |
|
>>> np.asarray(ser) |
|
array([1, 2, 3]) |
|
|
|
For timezone-aware data, the timezones may be retained with |
|
«dtype=’object’« |
|
|
|
>>> tzser = pd.Series(pd.date_range(‘2000’, periods=2, tz=»CET»)) |
|
>>> np.asarray(tzser, dtype=»object») |
|
array([Timestamp(‘2000-01-01 00:00:00+0100′, tz=’CET’), |
|
Timestamp(‘2000-01-02 00:00:00+0100′, tz=’CET’)], |
|
dtype=object) |
|
|
|
Or the values may be localized to UTC and the tzinfo discarded with |
|
«dtype=’datetime64[ns]’« |
|
|
|
>>> np.asarray(tzser, dtype=»datetime64[ns]») # doctest: +ELLIPSIS |
|
array([‘1999-12-31T23:00:00.000000000’, …], |
|
dtype=’datetime64[ns]’) |
|
«»» |
|
return np.asarray(self._values, dtype) |
|
|
|
# ———————————————————————- |
|
# Unary Methods |
|
|
|
# coercion |
|
__float__ = _coerce_method(float) |
|
__int__ = _coerce_method(int) |
|
|
|
# ———————————————————————- |
|
|
|
# indexers |
|
@property |
|
def axes(self) -> list[Index]: |
|
«»» |
|
Return a list of the row axis labels. |
|
«»» |
|
return [self.index] |
|
|
|
# ———————————————————————- |
|
# Indexing Methods |
|
|
|
@Appender(NDFrame.take.__doc__) |
|
def take(self, indices, axis: Axis = 0, **kwargs) -> Series: |
|
nv.validate_take((), kwargs) |
|
|
|
indices = ensure_platform_int(indices) |
|
|
|
if ( |
|
indices.ndim == 1 |
|
and using_copy_on_write() |
|
and is_range_indexer(indices, len(self)) |
|
): |
|
return self.copy(deep=None) |
|
|
|
new_index = self.index.take(indices) |
|
new_values = self._values.take(indices) |
|
|
|
result = self._constructor(new_values, index=new_index, fastpath=True) |
|
return result.__finalize__(self, method=«take») |
|
|
|
def _take_with_is_copy(self, indices, axis: Axis = 0) -> Series: |
|
«»» |
|
Internal version of the `take` method that sets the `_is_copy` |
|
attribute to keep track of the parent dataframe (using in indexing |
|
for the SettingWithCopyWarning). For Series this does the same |
|
as the public take (it never sets `_is_copy`). |
|
|
|
See the docstring of `take` for full explanation of the parameters. |
|
«»» |
|
return self.take(indices=indices, axis=axis) |
|
|
|
def _ixs(self, i: int, axis: AxisInt = 0) -> Any: |
|
«»» |
|
Return the i-th value or values in the Series by location. |
|
|
|
Parameters |
|
———- |
|
i : int |
|
|
|
Returns |
|
——- |
|
scalar (int) or Series (slice, sequence) |
|
«»» |
|
return self._values[i] |
|
|
|
def _slice(self, slobj: slice | np.ndarray, axis: Axis = 0) -> Series: |
|
# axis kwarg is retained for compat with NDFrame method |
|
# _slice is *always* positional |
|
return self._get_values(slobj) |
|
|
|
def __getitem__(self, key): |
|
check_dict_or_set_indexers(key) |
|
key = com.apply_if_callable(key, self) |
|
|
|
if key is Ellipsis: |
|
return self |
|
|
|
key_is_scalar = is_scalar(key) |
|
if isinstance(key, (list, tuple)): |
|
key = unpack_1tuple(key) |
|
|
|
if is_integer(key) and self.index._should_fallback_to_positional: |
|
return self._values[key] |
|
|
|
elif key_is_scalar: |
|
return self._get_value(key) |
|
|
|
if is_hashable(key): |
|
# Otherwise index.get_value will raise InvalidIndexError |
|
try: |
|
# For labels that don’t resolve as scalars like tuples and frozensets |
|
result = self._get_value(key) |
|
|
|
return result |
|
|
|
except (KeyError, TypeError, InvalidIndexError): |
|
# InvalidIndexError for e.g. generator |
|
# see test_series_getitem_corner_generator |
|
if isinstance(key, tuple) and isinstance(self.index, MultiIndex): |
|
# We still have the corner case where a tuple is a key |
|
# in the first level of our MultiIndex |
|
return self._get_values_tuple(key) |
|
|
|
if is_iterator(key): |
|
key = list(key) |
|
|
|
if com.is_bool_indexer(key): |
|
key = check_bool_indexer(self.index, key) |
|
key = np.asarray(key, dtype=bool) |
|
return self._get_values(key) |
|
|
|
return self._get_with(key) |
|
|
|
def _get_with(self, key): |
|
# other: fancy integer or otherwise |
|
if isinstance(key, slice): |
|
# _convert_slice_indexer to determine if this slice is positional |
|
# or label based, and if the latter, convert to positional |
|
slobj = self.index._convert_slice_indexer(key, kind=«getitem») |
|
return self._slice(slobj) |
|
elif isinstance(key, ABCDataFrame): |
|
raise TypeError( |
|
«Indexing a Series with DataFrame is not « |
|
«supported, use the appropriate DataFrame column» |
|
) |
|
elif isinstance(key, tuple): |
|
return self._get_values_tuple(key) |
|
|
|
elif not is_list_like(key): |
|
# e.g. scalars that aren’t recognized by lib.is_scalar, GH#32684 |
|
return self.loc[key] |
|
|
|
if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)): |
|
key = list(key) |
|
|
|
if isinstance(key, Index): |
|
key_type = key.inferred_type |
|
else: |
|
key_type = lib.infer_dtype(key, skipna=False) |
|
|
|
# Note: The key_type == «boolean» case should be caught by the |
|
# com.is_bool_indexer check in __getitem__ |
|
if key_type == «integer»: |
|
# We need to decide whether to treat this as a positional indexer |
|
# (i.e. self.iloc) or label-based (i.e. self.loc) |
|
if not self.index._should_fallback_to_positional: |
|
return self.loc[key] |
|
else: |
|
return self.iloc[key] |
|
|
|
# handle the dup indexing case GH#4246 |
|
return self.loc[key] |
|
|
|
def _get_values_tuple(self, key: tuple): |
|
# mpl hackaround |
|
if com.any_none(*key): |
|
# mpl compat if we look up e.g. ser[:, np.newaxis]; |
|
# see tests.series.timeseries.test_mpl_compat_hack |
|
# the asarray is needed to avoid returning a 2D DatetimeArray |
|
result = np.asarray(self._values[key]) |
|
disallow_ndim_indexing(result) |
|
return result |
|
|
|
if not isinstance(self.index, MultiIndex): |
|
raise KeyError(«key of type tuple not found and not a MultiIndex») |
|
|
|
# If key is contained, would have returned by now |
|
indexer, new_index = self.index.get_loc_level(key) |
|
return self._constructor(self._values[indexer], index=new_index).__finalize__( |
|
self |
|
) |
|
|
|
def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: |
|
new_mgr = self._mgr.getitem_mgr(indexer) |
|
return self._constructor(new_mgr).__finalize__(self) |
|
|
|
def _get_value(self, label, takeable: bool = False): |
|
«»» |
|
Quickly retrieve single value at passed index label. |
|
|
|
Parameters |
|
———- |
|
label : object |
|
takeable : interpret the index as indexers, default False |
|
|
|
Returns |
|
——- |
|
scalar value |
|
«»» |
|
if takeable: |
|
return self._values[label] |
|
|
|
# Similar to Index.get_value, but we do not fall back to positional |
|
loc = self.index.get_loc(label) |
|
|
|
if is_integer(loc): |
|
return self._values[loc] |
|
|
|
if isinstance(self.index, MultiIndex): |
|
mi = self.index |
|
new_values = self._values[loc] |
|
if len(new_values) == 1 and mi.nlevels == 1: |
|
# If more than one level left, we can not return a scalar |
|
return new_values[0] |
|
|
|
new_index = mi[loc] |
|
new_index = maybe_droplevels(new_index, label) |
|
new_ser = self._constructor(new_values, index=new_index, name=self.name) |
|
return new_ser.__finalize__(self) |
|
|
|
else: |
|
return self.iloc[loc] |
|
|
|
def __setitem__(self, key, value) -> None: |
|
if not PYPY and using_copy_on_write(): |
|
if sys.getrefcount(self) <= 3: |
|
raise ChainedAssignmentError(_chained_assignment_msg) |
|
|
|
check_dict_or_set_indexers(key) |
|
key = com.apply_if_callable(key, self) |
|
cacher_needs_updating = self._check_is_chained_assignment_possible() |
|
|
|
if key is Ellipsis: |
|
key = slice(None) |
|
|
|
if isinstance(key, slice): |
|
indexer = self.index._convert_slice_indexer(key, kind=«getitem») |
|
return self._set_values(indexer, value) |
|
|
|
try: |
|
self._set_with_engine(key, value) |
|
except KeyError: |
|
# We have a scalar (or for MultiIndex or object-dtype, scalar-like) |
|
# key that is not present in self.index. |
|
if is_integer(key) and self.index.inferred_type != «integer»: |
|
if not self.index._should_fallback_to_positional: |
|
# GH#33469 |
|
self.loc[key] = value |
|
else: |
|
# positional setter |
|
# can’t use _mgr.setitem_inplace yet bc could have *both* |
|
# KeyError and then ValueError, xref GH#45070 |
|
self._set_values(key, value) |
|
else: |
|
# GH#12862 adding a new key to the Series |
|
self.loc[key] = value |
|
|
|
except (TypeError, ValueError, LossySetitemError): |
|
# The key was OK, but we cannot set the value losslessly |
|
indexer = self.index.get_loc(key) |
|
self._set_values(indexer, value) |
|
|
|
except InvalidIndexError as err: |
|
if isinstance(key, tuple) and not isinstance(self.index, MultiIndex): |
|
# cases with MultiIndex don’t get here bc they raise KeyError |
|
# e.g. test_basic_getitem_setitem_corner |
|
raise KeyError( |
|
«key of type tuple not found and not a MultiIndex» |
|
) from err |
|
|
|
if com.is_bool_indexer(key): |
|
key = check_bool_indexer(self.index, key) |
|
key = np.asarray(key, dtype=bool) |
|
|
|
if ( |
|
is_list_like(value) |
|
and len(value) != len(self) |
|
and not isinstance(value, Series) |
|
and not is_object_dtype(self.dtype) |
|
): |
|
# Series will be reindexed to have matching length inside |
|
# _where call below |
|
# GH#44265 |
|
indexer = key.nonzero()[0] |
|
self._set_values(indexer, value) |
|
return |
|
|
|
# otherwise with listlike other we interpret series[mask] = other |
|
# as series[mask] = other[mask] |
|
try: |
|
self._where(~key, value, inplace=True) |
|
except InvalidIndexError: |
|
# test_where_dups |
|
self.iloc[key] = value |
|
return |
|
|
|
else: |
|
self._set_with(key, value) |
|
|
|
if cacher_needs_updating: |
|
self._maybe_update_cacher(inplace=True) |
|
|
|
def _set_with_engine(self, key, value) -> None: |
|
loc = self.index.get_loc(key) |
|
|
|
# this is equivalent to self._values[key] = value |
|
self._mgr.setitem_inplace(loc, value) |
|
|
|
def _set_with(self, key, value) -> None: |
|
# We got here via exception-handling off of InvalidIndexError, so |
|
# key should always be listlike at this point. |
|
assert not isinstance(key, tuple) |
|
|
|
if is_iterator(key): |
|
# Without this, the call to infer_dtype will consume the generator |
|
key = list(key) |
|
|
|
if not self.index._should_fallback_to_positional: |
|
# Regardless of the key type, we’re treating it as labels |
|
self._set_labels(key, value) |
|
|
|
else: |
|
# Note: key_type == «boolean» should not occur because that |
|
# should be caught by the is_bool_indexer check in __setitem__ |
|
key_type = lib.infer_dtype(key, skipna=False) |
|
|
|
if key_type == «integer»: |
|
self._set_values(key, value) |
|
else: |
|
self._set_labels(key, value) |
|
|
|
def _set_labels(self, key, value) -> None: |
|
key = com.asarray_tuplesafe(key) |
|
indexer: np.ndarray = self.index.get_indexer(key) |
|
mask = indexer == —1 |
|
if mask.any(): |
|
raise KeyError(f»{key[mask]} not in index») |
|
self._set_values(indexer, value) |
|
|
|
def _set_values(self, key, value) -> None: |
|
if isinstance(key, (Index, Series)): |
|
key = key._values |
|
|
|
self._mgr = self._mgr.setitem(indexer=key, value=value) |
|
self._maybe_update_cacher() |
|
|
|
def _set_value(self, label, value, takeable: bool = False) -> None: |
|
«»» |
|
Quickly set single value at passed label. |
|
|
|
If label is not contained, a new object is created with the label |
|
placed at the end of the result index. |
|
|
|
Parameters |
|
———- |
|
label : object |
|
Partial indexing with MultiIndex not allowed. |
|
value : object |
|
Scalar value. |
|
takeable : interpret the index as indexers, default False |
|
«»» |
|
if not takeable: |
|
try: |
|
loc = self.index.get_loc(label) |
|
except KeyError: |
|
# set using a non-recursive method |
|
self.loc[label] = value |
|
return |
|
else: |
|
loc = label |
|
|
|
self._set_values(loc, value) |
|
|
|
# ———————————————————————- |
|
# Lookup Caching |
|
|
|
@property |
|
def _is_cached(self) -> bool: |
|
«»»Return boolean indicating if self is cached or not.»»» |
|
return getattr(self, «_cacher», None) is not None |
|
|
|
def _get_cacher(self): |
|
«»»return my cacher or None»»» |
|
cacher = getattr(self, «_cacher», None) |
|
if cacher is not None: |
|
cacher = cacher[1]() |
|
return cacher |
|
|
|
def _reset_cacher(self) -> None: |
|
«»» |
|
Reset the cacher. |
|
«»» |
|
if hasattr(self, «_cacher»): |
|
del self._cacher |
|
|
|
def _set_as_cached(self, item, cacher) -> None: |
|
«»» |
|
Set the _cacher attribute on the calling object with a weakref to |
|
cacher. |
|
«»» |
|
if using_copy_on_write(): |
|
return |
|
self._cacher = (item, weakref.ref(cacher)) |
|
|
|
def _clear_item_cache(self) -> None: |
|
# no-op for Series |
|
pass |
|
|
|
def _check_is_chained_assignment_possible(self) -> bool: |
|
«»» |
|
See NDFrame._check_is_chained_assignment_possible.__doc__ |
|
«»» |
|
if self._is_view and self._is_cached: |
|
ref = self._get_cacher() |
|
if ref is not None and ref._is_mixed_type: |
|
self._check_setitem_copy(t=«referent», force=True) |
|
return True |
|
return super()._check_is_chained_assignment_possible() |
|
|
|
def _maybe_update_cacher( |
|
self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False |
|
) -> None: |
|
«»» |
|
See NDFrame._maybe_update_cacher.__doc__ |
|
«»» |
|
# for CoW, we never want to update the parent DataFrame cache |
|
# if the Series changed, but don’t keep track of any cacher |
|
if using_copy_on_write(): |
|
return |
|
cacher = getattr(self, «_cacher», None) |
|
if cacher is not None: |
|
assert self.ndim == 1 |
|
ref: DataFrame = cacher[1]() |
|
|
|
# we are trying to reference a dead referent, hence |
|
# a copy |
|
if ref is None: |
|
del self._cacher |
|
elif len(self) == len(ref) and self.name in ref.columns: |
|
# GH#42530 self.name must be in ref.columns |
|
# to ensure column still in dataframe |
|
# otherwise, either self or ref has swapped in new arrays |
|
ref._maybe_cache_changed(cacher[0], self, inplace=inplace) |
|
else: |
|
# GH#33675 we have swapped in a new array, so parent |
|
# reference to self is now invalid |
|
ref._item_cache.pop(cacher[0], None) |
|
|
|
super()._maybe_update_cacher( |
|
clear=clear, verify_is_copy=verify_is_copy, inplace=inplace |
|
) |
|
|
|
# ———————————————————————- |
|
# Unsorted |
|
|
|
@property |
|
def _is_mixed_type(self) -> bool: |
|
return False |
|
|
|
def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series: |
|
«»» |
|
Repeat elements of a Series. |
|
|
|
Returns a new Series where each element of the current Series |
|
is repeated consecutively a given number of times. |
|
|
|
Parameters |
|
———- |
|
repeats : int or array of ints |
|
The number of repetitions for each element. This should be a |
|
non-negative integer. Repeating 0 times will return an empty |
|
Series. |
|
axis : None |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
|
|
Returns |
|
——- |
|
Series |
|
Newly created Series with repeated elements. |
|
|
|
See Also |
|
——— |
|
Index.repeat : Equivalent function for Index. |
|
numpy.repeat : Similar method for :class:`numpy.ndarray`. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([‘a’, ‘b’, ‘c’]) |
|
>>> s |
|
0 a |
|
1 b |
|
2 c |
|
dtype: object |
|
>>> s.repeat(2) |
|
0 a |
|
0 a |
|
1 b |
|
1 b |
|
2 c |
|
2 c |
|
dtype: object |
|
>>> s.repeat([1, 2, 3]) |
|
0 a |
|
1 b |
|
1 b |
|
2 c |
|
2 c |
|
2 c |
|
dtype: object |
|
«»» |
|
nv.validate_repeat((), {«axis»: axis}) |
|
new_index = self.index.repeat(repeats) |
|
new_values = self._values.repeat(repeats) |
|
return self._constructor(new_values, index=new_index).__finalize__( |
|
self, method=«repeat» |
|
) |
|
|
|
@overload |
|
def reset_index( |
|
self, |
|
level: IndexLabel = …, |
|
*, |
|
drop: Literal[False] = …, |
|
name: Level = …, |
|
inplace: Literal[False] = …, |
|
allow_duplicates: bool = …, |
|
) -> DataFrame: |
|
… |
|
|
|
@overload |
|
def reset_index( |
|
self, |
|
level: IndexLabel = …, |
|
*, |
|
drop: Literal[True], |
|
name: Level = …, |
|
inplace: Literal[False] = …, |
|
allow_duplicates: bool = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def reset_index( |
|
self, |
|
level: IndexLabel = …, |
|
*, |
|
drop: bool = …, |
|
name: Level = …, |
|
inplace: Literal[True], |
|
allow_duplicates: bool = …, |
|
) -> None: |
|
… |
|
|
|
def reset_index( |
|
self, |
|
level: IndexLabel = None, |
|
*, |
|
drop: bool = False, |
|
name: Level = lib.no_default, |
|
inplace: bool = False, |
|
allow_duplicates: bool = False, |
|
) -> DataFrame | Series | None: |
|
«»» |
|
Generate a new DataFrame or Series with the index reset. |
|
|
|
This is useful when the index needs to be treated as a column, or |
|
when the index is meaningless and needs to be reset to the default |
|
before another operation. |
|
|
|
Parameters |
|
———- |
|
level : int, str, tuple, or list, default optional |
|
For a Series with a MultiIndex, only remove the specified levels |
|
from the index. Removes all levels by default. |
|
drop : bool, default False |
|
Just reset the index, without inserting it as a column in |
|
the new DataFrame. |
|
name : object, optional |
|
The name to use for the column containing the original Series |
|
values. Uses «self.name« by default. This argument is ignored |
|
when `drop` is True. |
|
inplace : bool, default False |
|
Modify the Series in place (do not create a new object). |
|
allow_duplicates : bool, default False |
|
Allow duplicate column labels to be created. |
|
|
|
.. versionadded:: 1.5.0 |
|
|
|
Returns |
|
——- |
|
Series or DataFrame or None |
|
When `drop` is False (the default), a DataFrame is returned. |
|
The newly created columns will come first in the DataFrame, |
|
followed by the original Series values. |
|
When `drop` is True, a `Series` is returned. |
|
In either case, if «inplace=True«, no value is returned. |
|
|
|
See Also |
|
——— |
|
DataFrame.reset_index: Analogous function for DataFrame. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3, 4], name=’foo’, |
|
… index=pd.Index([‘a’, ‘b’, ‘c’, ‘d’], name=’idx’)) |
|
|
|
Generate a DataFrame with default index. |
|
|
|
>>> s.reset_index() |
|
idx foo |
|
0 a 1 |
|
1 b 2 |
|
2 c 3 |
|
3 d 4 |
|
|
|
To specify the name of the new column use `name`. |
|
|
|
>>> s.reset_index(name=’values’) |
|
idx values |
|
0 a 1 |
|
1 b 2 |
|
2 c 3 |
|
3 d 4 |
|
|
|
To generate a new Series with the default set `drop` to True. |
|
|
|
>>> s.reset_index(drop=True) |
|
0 1 |
|
1 2 |
|
2 3 |
|
3 4 |
|
Name: foo, dtype: int64 |
|
|
|
The `level` parameter is interesting for Series with a multi-level |
|
index. |
|
|
|
>>> arrays = [np.array([‘bar’, ‘bar’, ‘baz’, ‘baz’]), |
|
… np.array([‘one’, ‘two’, ‘one’, ‘two’])] |
|
>>> s2 = pd.Series( |
|
… range(4), name=’foo’, |
|
… index=pd.MultiIndex.from_arrays(arrays, |
|
… names=[‘a’, ‘b’])) |
|
|
|
To remove a specific level from the Index, use `level`. |
|
|
|
>>> s2.reset_index(level=’a’) |
|
a foo |
|
b |
|
one bar 0 |
|
two bar 1 |
|
one baz 2 |
|
two baz 3 |
|
|
|
If `level` is not set, all levels are removed from the Index. |
|
|
|
>>> s2.reset_index() |
|
a b foo |
|
0 bar one 0 |
|
1 bar two 1 |
|
2 baz one 2 |
|
3 baz two 3 |
|
«»» |
|
inplace = validate_bool_kwarg(inplace, «inplace») |
|
if drop: |
|
new_index = default_index(len(self)) |
|
if level is not None: |
|
level_list: Sequence[Hashable] |
|
if not isinstance(level, (tuple, list)): |
|
level_list = [level] |
|
else: |
|
level_list = level |
|
level_list = [self.index._get_level_number(lev) for lev in level_list] |
|
if len(level_list) < self.index.nlevels: |
|
new_index = self.index.droplevel(level_list) |
|
|
|
if inplace: |
|
self.index = new_index |
|
else: |
|
return self._constructor( |
|
self._values.copy(), index=new_index |
|
).__finalize__(self, method=«reset_index») |
|
elif inplace: |
|
raise TypeError( |
|
«Cannot reset_index inplace on a Series to create a DataFrame» |
|
) |
|
else: |
|
if name is lib.no_default: |
|
# For backwards compatibility, keep columns as [0] instead of |
|
# [None] when self.name is None |
|
if self.name is None: |
|
name = 0 |
|
else: |
|
name = self.name |
|
|
|
df = self.to_frame(name) |
|
return df.reset_index( |
|
level=level, drop=drop, allow_duplicates=allow_duplicates |
|
) |
|
return None |
|
|
|
# ———————————————————————- |
|
# Rendering Methods |
|
|
|
def __repr__(self) -> str: |
|
«»» |
|
Return a string representation for a particular Series. |
|
«»» |
|
# pylint: disable=invalid-repr-returned |
|
repr_params = fmt.get_series_repr_params() |
|
return self.to_string(**repr_params) |
|
|
|
@overload |
|
def to_string( |
|
self, |
|
buf: None = …, |
|
na_rep: str = …, |
|
float_format: str | None = …, |
|
header: bool = …, |
|
index: bool = …, |
|
length=…, |
|
dtype=…, |
|
name=…, |
|
max_rows: int | None = …, |
|
min_rows: int | None = …, |
|
) -> str: |
|
… |
|
|
|
@overload |
|
def to_string( |
|
self, |
|
buf: FilePath | WriteBuffer[str], |
|
na_rep: str = …, |
|
float_format: str | None = …, |
|
header: bool = …, |
|
index: bool = …, |
|
length=…, |
|
dtype=…, |
|
name=…, |
|
max_rows: int | None = …, |
|
min_rows: int | None = …, |
|
) -> None: |
|
… |
|
|
|
def to_string( |
|
self, |
|
buf: FilePath | WriteBuffer[str] | None = None, |
|
na_rep: str = «NaN», |
|
float_format: str | None = None, |
|
header: bool = True, |
|
index: bool = True, |
|
length: bool = False, |
|
dtype: bool = False, |
|
name: bool = False, |
|
max_rows: int | None = None, |
|
min_rows: int | None = None, |
|
) -> str | None: |
|
«»» |
|
Render a string representation of the Series. |
|
|
|
Parameters |
|
———- |
|
buf : StringIO-like, optional |
|
Buffer to write to. |
|
na_rep : str, optional |
|
String representation of NaN to use, default ‘NaN’. |
|
float_format : one-parameter function, optional |
|
Formatter function to apply to columns’ elements if they are |
|
floats, default None. |
|
header : bool, default True |
|
Add the Series header (index name). |
|
index : bool, optional |
|
Add index (row) labels, default True. |
|
length : bool, default False |
|
Add the Series length. |
|
dtype : bool, default False |
|
Add the Series dtype. |
|
name : bool, default False |
|
Add the Series name if not None. |
|
max_rows : int, optional |
|
Maximum number of rows to show before truncating. If None, show |
|
all. |
|
min_rows : int, optional |
|
The number of rows to display in a truncated repr (when number |
|
of rows is above `max_rows`). |
|
|
|
Returns |
|
——- |
|
str or None |
|
String representation of Series if «buf=None«, otherwise None. |
|
«»» |
|
formatter = fmt.SeriesFormatter( |
|
self, |
|
name=name, |
|
length=length, |
|
header=header, |
|
index=index, |
|
dtype=dtype, |
|
na_rep=na_rep, |
|
float_format=float_format, |
|
min_rows=min_rows, |
|
max_rows=max_rows, |
|
) |
|
result = formatter.to_string() |
|
|
|
# catch contract violations |
|
if not isinstance(result, str): |
|
raise AssertionError( |
|
«result must be of type str, type « |
|
f»of result is {repr(type(result).__name__)}« |
|
) |
|
|
|
if buf is None: |
|
return result |
|
else: |
|
if hasattr(buf, «write»): |
|
buf.write(result) |
|
else: |
|
with open(buf, «w») as f: |
|
f.write(result) |
|
return None |
|
|
|
@doc( |
|
klass=_shared_doc_kwargs[«klass»], |
|
storage_options=_shared_docs[«storage_options»], |
|
examples=dedent( |
|
«»»Examples |
|
——— |
|
>>> s = pd.Series([«elk», «pig», «dog», «quetzal»], name=»animal») |
|
>>> print(s.to_markdown()) |
|
| | animal | |
|
|—:|:———| |
|
| 0 | elk | |
|
| 1 | pig | |
|
| 2 | dog | |
|
| 3 | quetzal | |
|
|
|
Output markdown with a tabulate option. |
|
|
|
>>> print(s.to_markdown(tablefmt=»grid»)) |
|
+—-+———-+ |
|
| | animal | |
|
+====+==========+ |
|
| 0 | elk | |
|
+—-+———-+ |
|
| 1 | pig | |
|
+—-+———-+ |
|
| 2 | dog | |
|
+—-+———-+ |
|
| 3 | quetzal | |
|
+—-+———-+»»» |
|
), |
|
) |
|
def to_markdown( |
|
self, |
|
buf: IO[str] | None = None, |
|
mode: str = «wt», |
|
index: bool = True, |
|
storage_options: StorageOptions = None, |
|
**kwargs, |
|
) -> str | None: |
|
«»» |
|
Print {klass} in Markdown-friendly format. |
|
|
|
.. versionadded:: 1.0.0 |
|
|
|
Parameters |
|
———- |
|
buf : str, Path or StringIO-like, optional, default None |
|
Buffer to write to. If None, the output is returned as a string. |
|
mode : str, optional |
|
Mode in which file is opened, «wt» by default. |
|
index : bool, optional, default True |
|
Add index (row) labels. |
|
|
|
.. versionadded:: 1.1.0 |
|
{storage_options} |
|
|
|
.. versionadded:: 1.2.0 |
|
|
|
**kwargs |
|
These parameters will be passed to `tabulate |
|
<https://pypi.org/project/tabulate>`_. |
|
|
|
Returns |
|
——- |
|
str |
|
{klass} in Markdown-friendly format. |
|
|
|
Notes |
|
—— |
|
Requires the `tabulate <https://pypi.org/project/tabulate>`_ package. |
|
|
|
{examples} |
|
«»» |
|
return self.to_frame().to_markdown( |
|
buf, mode, index, storage_options=storage_options, **kwargs |
|
) |
|
|
|
# ———————————————————————- |
|
|
|
def items(self) -> Iterable[tuple[Hashable, Any]]: |
|
«»» |
|
Lazily iterate over (index, value) tuples. |
|
|
|
This method returns an iterable tuple (index, value). This is |
|
convenient if you want to create a lazy iterator. |
|
|
|
Returns |
|
——- |
|
iterable |
|
Iterable of tuples containing the (index, value) pairs from a |
|
Series. |
|
|
|
See Also |
|
——— |
|
DataFrame.items : Iterate over (column name, Series) pairs. |
|
DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([‘A’, ‘B’, ‘C’]) |
|
>>> for index, value in s.items(): |
|
… print(f»Index : {index}, Value : {value}») |
|
Index : 0, Value : A |
|
Index : 1, Value : B |
|
Index : 2, Value : C |
|
«»» |
|
return zip(iter(self.index), iter(self)) |
|
|
|
# ———————————————————————- |
|
# Misc public methods |
|
|
|
def keys(self) -> Index: |
|
«»» |
|
Return alias for index. |
|
|
|
Returns |
|
——- |
|
Index |
|
Index of the Series. |
|
«»» |
|
return self.index |
|
|
|
def to_dict(self, into: type[dict] = dict) -> dict: |
|
«»» |
|
Convert Series to {label -> value} dict or dict-like object. |
|
|
|
Parameters |
|
———- |
|
into : class, default dict |
|
The collections.abc.Mapping subclass to use as the return |
|
object. Can be the actual class or an empty |
|
instance of the mapping type you want. If you want a |
|
collections.defaultdict, you must pass it initialized. |
|
|
|
Returns |
|
——- |
|
collections.abc.Mapping |
|
Key-value representation of Series. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3, 4]) |
|
>>> s.to_dict() |
|
{0: 1, 1: 2, 2: 3, 3: 4} |
|
>>> from collections import OrderedDict, defaultdict |
|
>>> s.to_dict(OrderedDict) |
|
OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)]) |
|
>>> dd = defaultdict(list) |
|
>>> s.to_dict(dd) |
|
defaultdict(<class ‘list’>, {0: 1, 1: 2, 2: 3, 3: 4}) |
|
«»» |
|
# GH16122 |
|
into_c = com.standardize_mapping(into) |
|
|
|
if is_object_dtype(self) or is_extension_array_dtype(self): |
|
return into_c((k, maybe_box_native(v)) for k, v in self.items()) |
|
else: |
|
# Not an object dtype => all types will be the same so let the default |
|
# indexer return native python type |
|
return into_c(self.items()) |
|
|
|
def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: |
|
«»» |
|
Convert Series to DataFrame. |
|
|
|
Parameters |
|
———- |
|
name : object, optional |
|
The passed name should substitute for the series name (if it has |
|
one). |
|
|
|
Returns |
|
——- |
|
DataFrame |
|
DataFrame representation of Series. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([«a», «b», «c»], |
|
… name=»vals») |
|
>>> s.to_frame() |
|
vals |
|
0 a |
|
1 b |
|
2 c |
|
«»» |
|
columns: Index |
|
if name is lib.no_default: |
|
name = self.name |
|
if name is None: |
|
# default to [0], same as we would get with DataFrame(self) |
|
columns = default_index(1) |
|
else: |
|
columns = Index([name]) |
|
else: |
|
columns = Index([name]) |
|
|
|
mgr = self._mgr.to_2d_mgr(columns) |
|
df = self._constructor_expanddim(mgr) |
|
return df.__finalize__(self, method=«to_frame») |
|
|
|
def _set_name(self, name, inplace: bool = False) -> Series: |
|
«»» |
|
Set the Series name. |
|
|
|
Parameters |
|
———- |
|
name : str |
|
inplace : bool |
|
Whether to modify `self` directly or return a copy. |
|
«»» |
|
inplace = validate_bool_kwarg(inplace, «inplace») |
|
ser = self if inplace else self.copy() |
|
ser.name = name |
|
return ser |
|
|
|
@Appender( |
|
«»» |
|
Examples |
|
——— |
|
>>> ser = pd.Series([390., 350., 30., 20.], |
|
… index=[‘Falcon’, ‘Falcon’, ‘Parrot’, ‘Parrot’], name=»Max Speed») |
|
>>> ser |
|
Falcon 390.0 |
|
Falcon 350.0 |
|
Parrot 30.0 |
|
Parrot 20.0 |
|
Name: Max Speed, dtype: float64 |
|
>>> ser.groupby([«a», «b», «a», «b»]).mean() |
|
a 210.0 |
|
b 185.0 |
|
Name: Max Speed, dtype: float64 |
|
>>> ser.groupby(level=0).mean() |
|
Falcon 370.0 |
|
Parrot 25.0 |
|
Name: Max Speed, dtype: float64 |
|
>>> ser.groupby(ser > 100).mean() |
|
Max Speed |
|
False 25.0 |
|
True 370.0 |
|
Name: Max Speed, dtype: float64 |
|
|
|
**Grouping by Indexes** |
|
|
|
We can groupby different levels of a hierarchical index |
|
using the `level` parameter: |
|
|
|
>>> arrays = [[‘Falcon’, ‘Falcon’, ‘Parrot’, ‘Parrot’], |
|
… [‘Captive’, ‘Wild’, ‘Captive’, ‘Wild’]] |
|
>>> index = pd.MultiIndex.from_arrays(arrays, names=(‘Animal’, ‘Type’)) |
|
>>> ser = pd.Series([390., 350., 30., 20.], index=index, name=»Max Speed») |
|
>>> ser |
|
Animal Type |
|
Falcon Captive 390.0 |
|
Wild 350.0 |
|
Parrot Captive 30.0 |
|
Wild 20.0 |
|
Name: Max Speed, dtype: float64 |
|
>>> ser.groupby(level=0).mean() |
|
Animal |
|
Falcon 370.0 |
|
Parrot 25.0 |
|
Name: Max Speed, dtype: float64 |
|
>>> ser.groupby(level=»Type»).mean() |
|
Type |
|
Captive 210.0 |
|
Wild 185.0 |
|
Name: Max Speed, dtype: float64 |
|
|
|
We can also choose to include `NA` in group keys or not by defining |
|
`dropna` parameter, the default setting is `True`. |
|
|
|
>>> ser = pd.Series([1, 2, 3, 3], index=[«a», ‘a’, ‘b’, np.nan]) |
|
>>> ser.groupby(level=0).sum() |
|
a 3 |
|
b 3 |
|
dtype: int64 |
|
|
|
>>> ser.groupby(level=0, dropna=False).sum() |
|
a 3 |
|
b 3 |
|
NaN 3 |
|
dtype: int64 |
|
|
|
>>> arrays = [‘Falcon’, ‘Falcon’, ‘Parrot’, ‘Parrot’] |
|
>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name=»Max Speed») |
|
>>> ser.groupby([«a», «b», «a», np.nan]).mean() |
|
a 210.0 |
|
b 350.0 |
|
Name: Max Speed, dtype: float64 |
|
|
|
>>> ser.groupby([«a», «b», «a», np.nan], dropna=False).mean() |
|
a 210.0 |
|
b 350.0 |
|
NaN 20.0 |
|
Name: Max Speed, dtype: float64 |
|
«»» |
|
) |
|
@Appender(_shared_docs[«groupby»] % _shared_doc_kwargs) |
|
def groupby( |
|
self, |
|
by=None, |
|
axis: Axis = 0, |
|
level: IndexLabel = None, |
|
as_index: bool = True, |
|
sort: bool = True, |
|
group_keys: bool | lib.NoDefault = no_default, |
|
observed: bool = False, |
|
dropna: bool = True, |
|
) -> SeriesGroupBy: |
|
from pandas.core.groupby.generic import SeriesGroupBy |
|
|
|
if level is None and by is None: |
|
raise TypeError(«You have to supply one of ‘by’ and ‘level'») |
|
if not as_index: |
|
raise TypeError(«as_index=False only valid with DataFrame») |
|
axis = self._get_axis_number(axis) |
|
|
|
return SeriesGroupBy( |
|
obj=self, |
|
keys=by, |
|
axis=axis, |
|
level=level, |
|
as_index=as_index, |
|
sort=sort, |
|
group_keys=group_keys, |
|
observed=observed, |
|
dropna=dropna, |
|
) |
|
|
|
# ———————————————————————- |
|
# Statistics, overridden ndarray methods |
|
|
|
# TODO: integrate bottleneck |
|
def count(self): |
|
«»» |
|
Return number of non-NA/null observations in the Series. |
|
|
|
Returns |
|
——- |
|
int or Series (if level specified) |
|
Number of non-null values in the Series. |
|
|
|
See Also |
|
——— |
|
DataFrame.count : Count non-NA cells for each column or row. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([0.0, 1.0, np.nan]) |
|
>>> s.count() |
|
2 |
|
«»» |
|
return notna(self._values).sum().astype(«int64») |
|
|
|
def mode(self, dropna: bool = True) -> Series: |
|
«»» |
|
Return the mode(s) of the Series. |
|
|
|
The mode is the value that appears most often. There can be multiple modes. |
|
|
|
Always returns Series even if only one value is returned. |
|
|
|
Parameters |
|
———- |
|
dropna : bool, default True |
|
Don’t consider counts of NaN/NaT. |
|
|
|
Returns |
|
——- |
|
Series |
|
Modes of the Series in sorted order. |
|
«»» |
|
# TODO: Add option for bins like value_counts() |
|
values = self._values |
|
if isinstance(values, np.ndarray): |
|
res_values = algorithms.mode(values, dropna=dropna) |
|
else: |
|
res_values = values._mode(dropna=dropna) |
|
|
|
# Ensure index is type stable (should always use int index) |
|
return self._constructor( |
|
res_values, index=range(len(res_values)), name=self.name |
|
) |
|
|
|
def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation |
|
«»» |
|
Return unique values of Series object. |
|
|
|
Uniques are returned in order of appearance. Hash table-based unique, |
|
therefore does NOT sort. |
|
|
|
Returns |
|
——- |
|
ndarray or ExtensionArray |
|
The unique values returned as a NumPy array. See Notes. |
|
|
|
See Also |
|
——— |
|
Series.drop_duplicates : Return Series with duplicate values removed. |
|
unique : Top-level unique method for any 1-d array-like object. |
|
Index.unique : Return Index with unique values from an Index object. |
|
|
|
Notes |
|
—— |
|
Returns the unique values as a NumPy array. In case of an |
|
extension-array backed Series, a new |
|
:class:`~api.extensions.ExtensionArray` of that type with just |
|
the unique values is returned. This includes |
|
|
|
* Categorical |
|
* Period |
|
* Datetime with Timezone |
|
* Datetime without Timezone |
|
* Timedelta |
|
* Interval |
|
* Sparse |
|
* IntegerNA |
|
|
|
See Examples section. |
|
|
|
Examples |
|
——— |
|
>>> pd.Series([2, 1, 3, 3], name=’A’).unique() |
|
array([2, 1, 3]) |
|
|
|
>>> pd.Series([pd.Timestamp(‘2016-01-01’) for _ in range(3)]).unique() |
|
<DatetimeArray> |
|
[‘2016-01-01 00:00:00’] |
|
Length: 1, dtype: datetime64[ns] |
|
|
|
>>> pd.Series([pd.Timestamp(‘2016-01-01′, tz=’US/Eastern’) |
|
… for _ in range(3)]).unique() |
|
<DatetimeArray> |
|
[‘2016-01-01 00:00:00-05:00’] |
|
Length: 1, dtype: datetime64[ns, US/Eastern] |
|
|
|
An Categorical will return categories in the order of |
|
appearance and with the same dtype. |
|
|
|
>>> pd.Series(pd.Categorical(list(‘baabc’))).unique() |
|
[‘b’, ‘a’, ‘c’] |
|
Categories (3, object): [‘a’, ‘b’, ‘c’] |
|
>>> pd.Series(pd.Categorical(list(‘baabc’), categories=list(‘abc’), |
|
… ordered=True)).unique() |
|
[‘b’, ‘a’, ‘c’] |
|
Categories (3, object): [‘a’ < ‘b’ < ‘c’] |
|
«»» |
|
return super().unique() |
|
|
|
@overload |
|
def drop_duplicates( |
|
self, |
|
*, |
|
keep: DropKeep = …, |
|
inplace: Literal[False] = …, |
|
ignore_index: bool = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def drop_duplicates( |
|
self, *, keep: DropKeep = …, inplace: Literal[True], ignore_index: bool = … |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def drop_duplicates( |
|
self, *, keep: DropKeep = …, inplace: bool = …, ignore_index: bool = … |
|
) -> Series | None: |
|
… |
|
|
|
def drop_duplicates( |
|
self, |
|
*, |
|
keep: DropKeep = «first», |
|
inplace: bool = False, |
|
ignore_index: bool = False, |
|
) -> Series | None: |
|
«»» |
|
Return Series with duplicate values removed. |
|
|
|
Parameters |
|
———- |
|
keep : {‘first’, ‘last’, «False«}, default ‘first’ |
|
Method to handle dropping duplicates: |
|
|
|
— ‘first’ : Drop duplicates except for the first occurrence. |
|
— ‘last’ : Drop duplicates except for the last occurrence. |
|
— «False« : Drop all duplicates. |
|
|
|
inplace : bool, default «False« |
|
If «True«, performs operation inplace and returns None. |
|
|
|
ignore_index : bool, default «False« |
|
If «True«, the resulting axis will be labeled 0, 1, …, n — 1. |
|
|
|
.. versionadded:: 2.0.0 |
|
|
|
Returns |
|
——- |
|
Series or None |
|
Series with duplicates dropped or None if «inplace=True«. |
|
|
|
See Also |
|
——— |
|
Index.drop_duplicates : Equivalent method on Index. |
|
DataFrame.drop_duplicates : Equivalent method on DataFrame. |
|
Series.duplicated : Related method on Series, indicating duplicate |
|
Series values. |
|
Series.unique : Return unique values as an array. |
|
|
|
Examples |
|
——— |
|
Generate a Series with duplicated entries. |
|
|
|
>>> s = pd.Series([‘lama’, ‘cow’, ‘lama’, ‘beetle’, ‘lama’, ‘hippo’], |
|
… name=’animal’) |
|
>>> s |
|
0 lama |
|
1 cow |
|
2 lama |
|
3 beetle |
|
4 lama |
|
5 hippo |
|
Name: animal, dtype: object |
|
|
|
With the ‘keep’ parameter, the selection behaviour of duplicated values |
|
can be changed. The value ‘first’ keeps the first occurrence for each |
|
set of duplicated entries. The default value of keep is ‘first’. |
|
|
|
>>> s.drop_duplicates() |
|
0 lama |
|
1 cow |
|
3 beetle |
|
5 hippo |
|
Name: animal, dtype: object |
|
|
|
The value ‘last’ for parameter ‘keep’ keeps the last occurrence for |
|
each set of duplicated entries. |
|
|
|
>>> s.drop_duplicates(keep=’last’) |
|
1 cow |
|
3 beetle |
|
4 lama |
|
5 hippo |
|
Name: animal, dtype: object |
|
|
|
The value «False« for parameter ‘keep’ discards all sets of |
|
duplicated entries. |
|
|
|
>>> s.drop_duplicates(keep=False) |
|
1 cow |
|
3 beetle |
|
5 hippo |
|
Name: animal, dtype: object |
|
«»» |
|
inplace = validate_bool_kwarg(inplace, «inplace») |
|
result = super().drop_duplicates(keep=keep) |
|
|
|
if ignore_index: |
|
result.index = default_index(len(result)) |
|
|
|
if inplace: |
|
self._update_inplace(result) |
|
return None |
|
else: |
|
return result |
|
|
|
def duplicated(self, keep: DropKeep = «first») -> Series: |
|
«»» |
|
Indicate duplicate Series values. |
|
|
|
Duplicated values are indicated as «True« values in the resulting |
|
Series. Either all duplicates, all except the first or all except the |
|
last occurrence of duplicates can be indicated. |
|
|
|
Parameters |
|
———- |
|
keep : {‘first’, ‘last’, False}, default ‘first’ |
|
Method to handle dropping duplicates: |
|
|
|
— ‘first’ : Mark duplicates as «True« except for the first |
|
occurrence. |
|
— ‘last’ : Mark duplicates as «True« except for the last |
|
occurrence. |
|
— «False« : Mark all duplicates as «True«. |
|
|
|
Returns |
|
——- |
|
Series[bool] |
|
Series indicating whether each value has occurred in the |
|
preceding values. |
|
|
|
See Also |
|
——— |
|
Index.duplicated : Equivalent method on pandas.Index. |
|
DataFrame.duplicated : Equivalent method on pandas.DataFrame. |
|
Series.drop_duplicates : Remove duplicate values from Series. |
|
|
|
Examples |
|
——— |
|
By default, for each set of duplicated values, the first occurrence is |
|
set on False and all others on True: |
|
|
|
>>> animals = pd.Series([‘lama’, ‘cow’, ‘lama’, ‘beetle’, ‘lama’]) |
|
>>> animals.duplicated() |
|
0 False |
|
1 False |
|
2 True |
|
3 False |
|
4 True |
|
dtype: bool |
|
|
|
which is equivalent to |
|
|
|
>>> animals.duplicated(keep=’first’) |
|
0 False |
|
1 False |
|
2 True |
|
3 False |
|
4 True |
|
dtype: bool |
|
|
|
By using ‘last’, the last occurrence of each set of duplicated values |
|
is set on False and all others on True: |
|
|
|
>>> animals.duplicated(keep=’last’) |
|
0 True |
|
1 False |
|
2 True |
|
3 False |
|
4 False |
|
dtype: bool |
|
|
|
By setting keep on «False«, all duplicates are True: |
|
|
|
>>> animals.duplicated(keep=False) |
|
0 True |
|
1 False |
|
2 True |
|
3 False |
|
4 True |
|
dtype: bool |
|
«»» |
|
res = self._duplicated(keep=keep) |
|
result = self._constructor(res, index=self.index) |
|
return result.__finalize__(self, method=«duplicated») |
|
|
|
def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: |
|
«»» |
|
Return the row label of the minimum value. |
|
|
|
If multiple values equal the minimum, the first row label with that |
|
value is returned. |
|
|
|
Parameters |
|
———- |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
skipna : bool, default True |
|
Exclude NA/null values. If the entire Series is NA, the result |
|
will be NA. |
|
*args, **kwargs |
|
Additional arguments and keywords have no effect but might be |
|
accepted for compatibility with NumPy. |
|
|
|
Returns |
|
——- |
|
Index |
|
Label of the minimum value. |
|
|
|
Raises |
|
—— |
|
ValueError |
|
If the Series is empty. |
|
|
|
See Also |
|
——— |
|
numpy.argmin : Return indices of the minimum values |
|
along the given axis. |
|
DataFrame.idxmin : Return index of first occurrence of minimum |
|
over requested axis. |
|
Series.idxmax : Return index *label* of the first occurrence |
|
of maximum of values. |
|
|
|
Notes |
|
—— |
|
This method is the Series version of «ndarray.argmin«. This method |
|
returns the label of the minimum, while «ndarray.argmin« returns |
|
the position. To get the position, use «series.values.argmin()«. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series(data=[1, None, 4, 1], |
|
… index=[‘A’, ‘B’, ‘C’, ‘D’]) |
|
>>> s |
|
A 1.0 |
|
B NaN |
|
C 4.0 |
|
D 1.0 |
|
dtype: float64 |
|
|
|
>>> s.idxmin() |
|
‘A’ |
|
|
|
If `skipna` is False and there is an NA value in the data, |
|
the function returns «nan«. |
|
|
|
>>> s.idxmin(skipna=False) |
|
nan |
|
«»» |
|
# error: Argument 1 to «argmin» of «IndexOpsMixin» has incompatible type «Union |
|
# [int, Literal[‘index’, ‘columns’]]»; expected «Optional[int]» |
|
i = self.argmin(axis, skipna, *args, **kwargs) # type: ignore[arg-type] |
|
if i == —1: |
|
return np.nan |
|
return self.index[i] |
|
|
|
def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable: |
|
«»» |
|
Return the row label of the maximum value. |
|
|
|
If multiple values equal the maximum, the first row label with that |
|
value is returned. |
|
|
|
Parameters |
|
———- |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
skipna : bool, default True |
|
Exclude NA/null values. If the entire Series is NA, the result |
|
will be NA. |
|
*args, **kwargs |
|
Additional arguments and keywords have no effect but might be |
|
accepted for compatibility with NumPy. |
|
|
|
Returns |
|
——- |
|
Index |
|
Label of the maximum value. |
|
|
|
Raises |
|
—— |
|
ValueError |
|
If the Series is empty. |
|
|
|
See Also |
|
——— |
|
numpy.argmax : Return indices of the maximum values |
|
along the given axis. |
|
DataFrame.idxmax : Return index of first occurrence of maximum |
|
over requested axis. |
|
Series.idxmin : Return index *label* of the first occurrence |
|
of minimum of values. |
|
|
|
Notes |
|
—— |
|
This method is the Series version of «ndarray.argmax«. This method |
|
returns the label of the maximum, while «ndarray.argmax« returns |
|
the position. To get the position, use «series.values.argmax()«. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series(data=[1, None, 4, 3, 4], |
|
… index=[‘A’, ‘B’, ‘C’, ‘D’, ‘E’]) |
|
>>> s |
|
A 1.0 |
|
B NaN |
|
C 4.0 |
|
D 3.0 |
|
E 4.0 |
|
dtype: float64 |
|
|
|
>>> s.idxmax() |
|
‘C’ |
|
|
|
If `skipna` is False and there is an NA value in the data, |
|
the function returns «nan«. |
|
|
|
>>> s.idxmax(skipna=False) |
|
nan |
|
«»» |
|
# error: Argument 1 to «argmax» of «IndexOpsMixin» has incompatible type |
|
# «Union[int, Literal[‘index’, ‘columns’]]»; expected «Optional[int]» |
|
i = self.argmax(axis, skipna, *args, **kwargs) # type: ignore[arg-type] |
|
if i == —1: |
|
return np.nan |
|
return self.index[i] |
|
|
|
def round(self, decimals: int = 0, *args, **kwargs) -> Series: |
|
«»» |
|
Round each value in a Series to the given number of decimals. |
|
|
|
Parameters |
|
———- |
|
decimals : int, default 0 |
|
Number of decimal places to round to. If decimals is negative, |
|
it specifies the number of positions to the left of the decimal point. |
|
*args, **kwargs |
|
Additional arguments and keywords have no effect but might be |
|
accepted for compatibility with NumPy. |
|
|
|
Returns |
|
——- |
|
Series |
|
Rounded values of the Series. |
|
|
|
See Also |
|
——— |
|
numpy.around : Round values of an np.array. |
|
DataFrame.round : Round values of a DataFrame. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([0.1, 1.3, 2.7]) |
|
>>> s.round() |
|
0 0.0 |
|
1 1.0 |
|
2 3.0 |
|
dtype: float64 |
|
«»» |
|
nv.validate_round(args, kwargs) |
|
result = self._values.round(decimals) |
|
result = self._constructor(result, index=self.index).__finalize__( |
|
self, method=«round» |
|
) |
|
|
|
return result |
|
|
|
@overload |
|
def quantile( |
|
self, q: float = …, interpolation: QuantileInterpolation = … |
|
) -> float: |
|
… |
|
|
|
@overload |
|
def quantile( |
|
self, |
|
q: Sequence[float] | AnyArrayLike, |
|
interpolation: QuantileInterpolation = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def quantile( |
|
self, |
|
q: float | Sequence[float] | AnyArrayLike = …, |
|
interpolation: QuantileInterpolation = …, |
|
) -> float | Series: |
|
… |
|
|
|
def quantile( |
|
self, |
|
q: float | Sequence[float] | AnyArrayLike = 0.5, |
|
interpolation: QuantileInterpolation = «linear», |
|
) -> float | Series: |
|
«»» |
|
Return value at the given quantile. |
|
|
|
Parameters |
|
———- |
|
q : float or array-like, default 0.5 (50% quantile) |
|
The quantile(s) to compute, which can lie in range: 0 <= q <= 1. |
|
interpolation : {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’} |
|
This optional parameter specifies the interpolation method to use, |
|
when the desired quantile lies between two data points `i` and `j`: |
|
|
|
* linear: `i + (j — i) * fraction`, where `fraction` is the |
|
fractional part of the index surrounded by `i` and `j`. |
|
* lower: `i`. |
|
* higher: `j`. |
|
* nearest: `i` or `j` whichever is nearest. |
|
* midpoint: (`i` + `j`) / 2. |
|
|
|
Returns |
|
——- |
|
float or Series |
|
If «q« is an array, a Series will be returned where the |
|
index is «q« and the values are the quantiles, otherwise |
|
a float will be returned. |
|
|
|
See Also |
|
——— |
|
core.window.Rolling.quantile : Calculate the rolling quantile. |
|
numpy.percentile : Returns the q-th percentile(s) of the array elements. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3, 4]) |
|
>>> s.quantile(.5) |
|
2.5 |
|
>>> s.quantile([.25, .5, .75]) |
|
0.25 1.75 |
|
0.50 2.50 |
|
0.75 3.25 |
|
dtype: float64 |
|
«»» |
|
validate_percentile(q) |
|
|
|
# We dispatch to DataFrame so that core.internals only has to worry |
|
# about 2D cases. |
|
df = self.to_frame() |
|
|
|
result = df.quantile(q=q, interpolation=interpolation, numeric_only=False) |
|
if result.ndim == 2: |
|
result = result.iloc[:, 0] |
|
|
|
if is_list_like(q): |
|
result.name = self.name |
|
idx = Index(q, dtype=np.float64) |
|
return self._constructor(result, index=idx, name=self.name) |
|
else: |
|
# scalar |
|
return result.iloc[0] |
|
|
|
def corr( |
|
self, |
|
other: Series, |
|
method: CorrelationMethod = «pearson», |
|
min_periods: int | None = None, |
|
) -> float: |
|
«»» |
|
Compute correlation with `other` Series, excluding missing values. |
|
|
|
The two `Series` objects are not required to be the same length and will be |
|
aligned internally before the correlation function is applied. |
|
|
|
Parameters |
|
———- |
|
other : Series |
|
Series with which to compute the correlation. |
|
method : {‘pearson’, ‘kendall’, ‘spearman’} or callable |
|
Method used to compute correlation: |
|
|
|
— pearson : Standard correlation coefficient |
|
— kendall : Kendall Tau correlation coefficient |
|
— spearman : Spearman rank correlation |
|
— callable: Callable with input two 1d ndarrays and returning a float. |
|
|
|
.. warning:: |
|
Note that the returned matrix from corr will have 1 along the |
|
diagonals and will be symmetric regardless of the callable’s |
|
behavior. |
|
min_periods : int, optional |
|
Minimum number of observations needed to have a valid result. |
|
|
|
Returns |
|
——- |
|
float |
|
Correlation with other. |
|
|
|
See Also |
|
——— |
|
DataFrame.corr : Compute pairwise correlation between columns. |
|
DataFrame.corrwith : Compute pairwise correlation with another |
|
DataFrame or Series. |
|
|
|
Notes |
|
—— |
|
Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations. |
|
|
|
* `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_ |
|
* `Kendall rank correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_ |
|
* `Spearman’s rank correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_ |
|
|
|
Examples |
|
——— |
|
>>> def histogram_intersection(a, b): |
|
… v = np.minimum(a, b).sum().round(decimals=1) |
|
… return v |
|
>>> s1 = pd.Series([.2, .0, .6, .2]) |
|
>>> s2 = pd.Series([.3, .6, .0, .1]) |
|
>>> s1.corr(s2, method=histogram_intersection) |
|
0.3 |
|
«»» # noqa:E501 |
|
this, other = self.align(other, join=«inner», copy=False) |
|
if len(this) == 0: |
|
return np.nan |
|
|
|
if method in [«pearson», «spearman», «kendall»] or callable(method): |
|
return nanops.nancorr( |
|
this.values, other.values, method=method, min_periods=min_periods |
|
) |
|
|
|
raise ValueError( |
|
«method must be either ‘pearson’, « |
|
«‘spearman’, ‘kendall’, or a callable, « |
|
f»‘{method}‘ was supplied» |
|
) |
|
|
|
def cov( |
|
self, |
|
other: Series, |
|
min_periods: int | None = None, |
|
ddof: int | None = 1, |
|
) -> float: |
|
«»» |
|
Compute covariance with Series, excluding missing values. |
|
|
|
The two `Series` objects are not required to be the same length and |
|
will be aligned internally before the covariance is calculated. |
|
|
|
Parameters |
|
———- |
|
other : Series |
|
Series with which to compute the covariance. |
|
min_periods : int, optional |
|
Minimum number of observations needed to have a valid result. |
|
ddof : int, default 1 |
|
Delta degrees of freedom. The divisor used in calculations |
|
is «N — ddof«, where «N« represents the number of elements. |
|
|
|
.. versionadded:: 1.1.0 |
|
|
|
Returns |
|
——- |
|
float |
|
Covariance between Series and other normalized by N-1 |
|
(unbiased estimator). |
|
|
|
See Also |
|
——— |
|
DataFrame.cov : Compute pairwise covariance of columns. |
|
|
|
Examples |
|
——— |
|
>>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035]) |
|
>>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198]) |
|
>>> s1.cov(s2) |
|
-0.01685762652715874 |
|
«»» |
|
this, other = self.align(other, join=«inner», copy=False) |
|
if len(this) == 0: |
|
return np.nan |
|
return nanops.nancov( |
|
this.values, other.values, min_periods=min_periods, ddof=ddof |
|
) |
|
|
|
@doc( |
|
klass=«Series», |
|
extra_params=«», |
|
other_klass=«DataFrame», |
|
examples=dedent( |
|
«»» |
|
Difference with previous row |
|
|
|
>>> s = pd.Series([1, 1, 2, 3, 5, 8]) |
|
>>> s.diff() |
|
0 NaN |
|
1 0.0 |
|
2 1.0 |
|
3 1.0 |
|
4 2.0 |
|
5 3.0 |
|
dtype: float64 |
|
|
|
Difference with 3rd previous row |
|
|
|
>>> s.diff(periods=3) |
|
0 NaN |
|
1 NaN |
|
2 NaN |
|
3 2.0 |
|
4 4.0 |
|
5 6.0 |
|
dtype: float64 |
|
|
|
Difference with following row |
|
|
|
>>> s.diff(periods=-1) |
|
0 0.0 |
|
1 -1.0 |
|
2 -1.0 |
|
3 -2.0 |
|
4 -3.0 |
|
5 NaN |
|
dtype: float64 |
|
|
|
Overflow in input dtype |
|
|
|
>>> s = pd.Series([1, 0], dtype=np.uint8) |
|
>>> s.diff() |
|
0 NaN |
|
1 255.0 |
|
dtype: float64″»» |
|
), |
|
) |
|
def diff(self, periods: int = 1) -> Series: |
|
«»» |
|
First discrete difference of element. |
|
|
|
Calculates the difference of a {klass} element compared with another |
|
element in the {klass} (default is element in previous row). |
|
|
|
Parameters |
|
———- |
|
periods : int, default 1 |
|
Periods to shift for calculating difference, accepts negative |
|
values. |
|
{extra_params} |
|
Returns |
|
——- |
|
{klass} |
|
First differences of the Series. |
|
|
|
See Also |
|
——— |
|
{klass}.pct_change: Percent change over given number of periods. |
|
{klass}.shift: Shift index by desired number of periods with an |
|
optional time freq. |
|
{other_klass}.diff: First discrete difference of object. |
|
|
|
Notes |
|
—— |
|
For boolean dtypes, this uses :meth:`operator.xor` rather than |
|
:meth:`operator.sub`. |
|
The result is calculated according to current dtype in {klass}, |
|
however dtype of the result is always float64. |
|
|
|
Examples |
|
——— |
|
{examples} |
|
«»» |
|
result = algorithms.diff(self._values, periods) |
|
return self._constructor(result, index=self.index).__finalize__( |
|
self, method=«diff» |
|
) |
|
|
|
def autocorr(self, lag: int = 1) -> float: |
|
«»» |
|
Compute the lag-N autocorrelation. |
|
|
|
This method computes the Pearson correlation between |
|
the Series and its shifted self. |
|
|
|
Parameters |
|
———- |
|
lag : int, default 1 |
|
Number of lags to apply before performing autocorrelation. |
|
|
|
Returns |
|
——- |
|
float |
|
The Pearson correlation between self and self.shift(lag). |
|
|
|
See Also |
|
——— |
|
Series.corr : Compute the correlation between two Series. |
|
Series.shift : Shift index by desired number of periods. |
|
DataFrame.corr : Compute pairwise correlation of columns. |
|
DataFrame.corrwith : Compute pairwise correlation between rows or |
|
columns of two DataFrame objects. |
|
|
|
Notes |
|
—— |
|
If the Pearson correlation is not well defined return ‘NaN’. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([0.25, 0.5, 0.2, -0.05]) |
|
>>> s.autocorr() # doctest: +ELLIPSIS |
|
0.10355… |
|
>>> s.autocorr(lag=2) # doctest: +ELLIPSIS |
|
-0.99999… |
|
|
|
If the Pearson correlation is not well defined, then ‘NaN’ is returned. |
|
|
|
>>> s = pd.Series([1, 0, 0, 0]) |
|
>>> s.autocorr() |
|
nan |
|
«»» |
|
return self.corr(self.shift(lag)) |
|
|
|
def dot(self, other: AnyArrayLike) -> Series | np.ndarray: |
|
«»» |
|
Compute the dot product between the Series and the columns of other. |
|
|
|
This method computes the dot product between the Series and another |
|
one, or the Series and each columns of a DataFrame, or the Series and |
|
each columns of an array. |
|
|
|
It can also be called using `self @ other` in Python >= 3.5. |
|
|
|
Parameters |
|
———- |
|
other : Series, DataFrame or array-like |
|
The other object to compute the dot product with its columns. |
|
|
|
Returns |
|
——- |
|
scalar, Series or numpy.ndarray |
|
Return the dot product of the Series and other if other is a |
|
Series, the Series of the dot product of Series and each rows of |
|
other if other is a DataFrame or a numpy.ndarray between the Series |
|
and each columns of the numpy array. |
|
|
|
See Also |
|
——— |
|
DataFrame.dot: Compute the matrix product with the DataFrame. |
|
Series.mul: Multiplication of series and other, element-wise. |
|
|
|
Notes |
|
—— |
|
The Series and other has to share the same index if other is a Series |
|
or a DataFrame. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([0, 1, 2, 3]) |
|
>>> other = pd.Series([-1, 2, -3, 4]) |
|
>>> s.dot(other) |
|
8 |
|
>>> s @ other |
|
8 |
|
>>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]]) |
|
>>> s.dot(df) |
|
0 24 |
|
1 14 |
|
dtype: int64 |
|
>>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]]) |
|
>>> s.dot(arr) |
|
array([24, 14]) |
|
«»» |
|
if isinstance(other, (Series, ABCDataFrame)): |
|
common = self.index.union(other.index) |
|
if len(common) > len(self.index) or len(common) > len(other.index): |
|
raise ValueError(«matrices are not aligned») |
|
|
|
left = self.reindex(index=common, copy=False) |
|
right = other.reindex(index=common, copy=False) |
|
lvals = left.values |
|
rvals = right.values |
|
else: |
|
lvals = self.values |
|
rvals = np.asarray(other) |
|
if lvals.shape[0] != rvals.shape[0]: |
|
raise Exception( |
|
f»Dot product shape mismatch, {lvals.shape} vs {rvals.shape}« |
|
) |
|
|
|
if isinstance(other, ABCDataFrame): |
|
return self._constructor( |
|
np.dot(lvals, rvals), index=other.columns |
|
).__finalize__(self, method=«dot») |
|
elif isinstance(other, Series): |
|
return np.dot(lvals, rvals) |
|
elif isinstance(rvals, np.ndarray): |
|
return np.dot(lvals, rvals) |
|
else: # pragma: no cover |
|
raise TypeError(f»unsupported type: {type(other)}«) |
|
|
|
def __matmul__(self, other): |
|
«»» |
|
Matrix multiplication using binary `@` operator in Python>=3.5. |
|
«»» |
|
return self.dot(other) |
|
|
|
def __rmatmul__(self, other): |
|
«»» |
|
Matrix multiplication using binary `@` operator in Python>=3.5. |
|
«»» |
|
return self.dot(np.transpose(other)) |
|
|
|
@doc(base.IndexOpsMixin.searchsorted, klass=«Series») |
|
# Signature of «searchsorted» incompatible with supertype «IndexOpsMixin» |
|
def searchsorted( # type: ignore[override] |
|
self, |
|
value: NumpyValueArrayLike | ExtensionArray, |
|
side: Literal[«left», «right»] = «left», |
|
sorter: NumpySorter = None, |
|
) -> npt.NDArray[np.intp] | np.intp: |
|
return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter) |
|
|
|
# ——————————————————————- |
|
# Combination |
|
|
|
def _append( |
|
self, to_append, ignore_index: bool = False, verify_integrity: bool = False |
|
): |
|
from pandas.core.reshape.concat import concat |
|
|
|
if isinstance(to_append, (list, tuple)): |
|
to_concat = [self] |
|
to_concat.extend(to_append) |
|
else: |
|
to_concat = [self, to_append] |
|
if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]): |
|
msg = «to_append should be a Series or list/tuple of Series, got DataFrame» |
|
raise TypeError(msg) |
|
return concat( |
|
to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity |
|
) |
|
|
|
def _binop(self, other: Series, func, level=None, fill_value=None): |
|
«»» |
|
Perform generic binary operation with optional fill value. |
|
|
|
Parameters |
|
———- |
|
other : Series |
|
func : binary operator |
|
fill_value : float or object |
|
Value to substitute for NA/null values. If both Series are NA in a |
|
location, the result will be NA regardless of the passed fill value. |
|
level : int or level name, default None |
|
Broadcast across a level, matching Index values on the |
|
passed MultiIndex level. |
|
|
|
Returns |
|
——- |
|
Series |
|
«»» |
|
if not isinstance(other, Series): |
|
raise AssertionError(«Other operand must be Series») |
|
|
|
this = self |
|
|
|
if not self.index.equals(other.index): |
|
this, other = self.align(other, level=level, join=«outer», copy=False) |
|
|
|
this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value) |
|
|
|
with np.errstate(all=«ignore»): |
|
result = func(this_vals, other_vals) |
|
|
|
name = ops.get_op_result_name(self, other) |
|
return this._construct_result(result, name) |
|
|
|
def _construct_result( |
|
self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable |
|
) -> Series | tuple[Series, Series]: |
|
«»» |
|
Construct an appropriately-labelled Series from the result of an op. |
|
|
|
Parameters |
|
———- |
|
result : ndarray or ExtensionArray |
|
name : Label |
|
|
|
Returns |
|
——- |
|
Series |
|
In the case of __divmod__ or __rdivmod__, a 2-tuple of Series. |
|
«»» |
|
if isinstance(result, tuple): |
|
# produced by divmod or rdivmod |
|
|
|
res1 = self._construct_result(result[0], name=name) |
|
res2 = self._construct_result(result[1], name=name) |
|
|
|
# GH#33427 assertions to keep mypy happy |
|
assert isinstance(res1, Series) |
|
assert isinstance(res2, Series) |
|
return (res1, res2) |
|
|
|
# TODO: result should always be ArrayLike, but this fails for some |
|
# JSONArray tests |
|
dtype = getattr(result, «dtype», None) |
|
out = self._constructor(result, index=self.index, dtype=dtype) |
|
out = out.__finalize__(self) |
|
|
|
# Set the result’s name after __finalize__ is called because __finalize__ |
|
# would set it back to self.name |
|
out.name = name |
|
return out |
|
|
|
@doc( |
|
_shared_docs[«compare»], |
|
«»» |
|
Returns |
|
——- |
|
Series or DataFrame |
|
If axis is 0 or ‘index’ the result will be a Series. |
|
The resulting index will be a MultiIndex with ‘self’ and ‘other’ |
|
stacked alternately at the inner level. |
|
|
|
If axis is 1 or ‘columns’ the result will be a DataFrame. |
|
It will have two columns namely ‘self’ and ‘other’. |
|
|
|
See Also |
|
——— |
|
DataFrame.compare : Compare with another DataFrame and show differences. |
|
|
|
Notes |
|
—— |
|
Matching NaNs will not appear as a difference. |
|
|
|
Examples |
|
——— |
|
>>> s1 = pd.Series([«a», «b», «c», «d», «e»]) |
|
>>> s2 = pd.Series([«a», «a», «c», «b», «e»]) |
|
|
|
Align the differences on columns |
|
|
|
>>> s1.compare(s2) |
|
self other |
|
1 b a |
|
3 d b |
|
|
|
Stack the differences on indices |
|
|
|
>>> s1.compare(s2, align_axis=0) |
|
1 self b |
|
other a |
|
3 self d |
|
other b |
|
dtype: object |
|
|
|
Keep all original rows |
|
|
|
>>> s1.compare(s2, keep_shape=True) |
|
self other |
|
0 NaN NaN |
|
1 b a |
|
2 NaN NaN |
|
3 d b |
|
4 NaN NaN |
|
|
|
Keep all original rows and also all original values |
|
|
|
>>> s1.compare(s2, keep_shape=True, keep_equal=True) |
|
self other |
|
0 a a |
|
1 b a |
|
2 c c |
|
3 d b |
|
4 e e |
|
«»», |
|
klass=_shared_doc_kwargs[«klass»], |
|
) |
|
def compare( |
|
self, |
|
other: Series, |
|
align_axis: Axis = 1, |
|
keep_shape: bool = False, |
|
keep_equal: bool = False, |
|
result_names: Suffixes = («self», «other»), |
|
) -> DataFrame | Series: |
|
return super().compare( |
|
other=other, |
|
align_axis=align_axis, |
|
keep_shape=keep_shape, |
|
keep_equal=keep_equal, |
|
result_names=result_names, |
|
) |
|
|
|
def combine( |
|
self, |
|
other: Series | Hashable, |
|
func: Callable[[Hashable, Hashable], Hashable], |
|
fill_value: Hashable = None, |
|
) -> Series: |
|
«»» |
|
Combine the Series with a Series or scalar according to `func`. |
|
|
|
Combine the Series and `other` using `func` to perform elementwise |
|
selection for combined Series. |
|
`fill_value` is assumed when value is missing at some index |
|
from one of the two objects being combined. |
|
|
|
Parameters |
|
———- |
|
other : Series or scalar |
|
The value(s) to be combined with the `Series`. |
|
func : function |
|
Function that takes two scalars as inputs and returns an element. |
|
fill_value : scalar, optional |
|
The value to assume when an index is missing from |
|
one Series or the other. The default specifies to use the |
|
appropriate NaN value for the underlying dtype of the Series. |
|
|
|
Returns |
|
——- |
|
Series |
|
The result of combining the Series with the other object. |
|
|
|
See Also |
|
——— |
|
Series.combine_first : Combine Series values, choosing the calling |
|
Series’ values first. |
|
|
|
Examples |
|
——— |
|
Consider 2 Datasets «s1« and «s2« containing |
|
highest clocked speeds of different birds. |
|
|
|
>>> s1 = pd.Series({‘falcon’: 330.0, ‘eagle’: 160.0}) |
|
>>> s1 |
|
falcon 330.0 |
|
eagle 160.0 |
|
dtype: float64 |
|
>>> s2 = pd.Series({‘falcon’: 345.0, ‘eagle’: 200.0, ‘duck’: 30.0}) |
|
>>> s2 |
|
falcon 345.0 |
|
eagle 200.0 |
|
duck 30.0 |
|
dtype: float64 |
|
|
|
Now, to combine the two datasets and view the highest speeds |
|
of the birds across the two datasets |
|
|
|
>>> s1.combine(s2, max) |
|
duck NaN |
|
eagle 200.0 |
|
falcon 345.0 |
|
dtype: float64 |
|
|
|
In the previous example, the resulting value for duck is missing, |
|
because the maximum of a NaN and a float is a NaN. |
|
So, in the example, we set «fill_value=0«, |
|
so the maximum value returned will be the value from some dataset. |
|
|
|
>>> s1.combine(s2, max, fill_value=0) |
|
duck 30.0 |
|
eagle 200.0 |
|
falcon 345.0 |
|
dtype: float64 |
|
«»» |
|
if fill_value is None: |
|
fill_value = na_value_for_dtype(self.dtype, compat=False) |
|
|
|
if isinstance(other, Series): |
|
# If other is a Series, result is based on union of Series, |
|
# so do this element by element |
|
new_index = self.index.union(other.index) |
|
new_name = ops.get_op_result_name(self, other) |
|
new_values = np.empty(len(new_index), dtype=object) |
|
for i, idx in enumerate(new_index): |
|
lv = self.get(idx, fill_value) |
|
rv = other.get(idx, fill_value) |
|
with np.errstate(all=«ignore»): |
|
new_values[i] = func(lv, rv) |
|
else: |
|
# Assume that other is a scalar, so apply the function for |
|
# each element in the Series |
|
new_index = self.index |
|
new_values = np.empty(len(new_index), dtype=object) |
|
with np.errstate(all=«ignore»): |
|
new_values[:] = [func(lv, other) for lv in self._values] |
|
new_name = self.name |
|
|
|
# try_float=False is to match agg_series |
|
npvalues = lib.maybe_convert_objects(new_values, try_float=False) |
|
res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False) |
|
return self._constructor(res_values, index=new_index, name=new_name) |
|
|
|
def combine_first(self, other) -> Series: |
|
«»» |
|
Update null elements with value in the same location in ‘other’. |
|
|
|
Combine two Series objects by filling null values in one Series with |
|
non-null values from the other Series. Result index will be the union |
|
of the two indexes. |
|
|
|
Parameters |
|
———- |
|
other : Series |
|
The value(s) to be used for filling null values. |
|
|
|
Returns |
|
——- |
|
Series |
|
The result of combining the provided Series with the other object. |
|
|
|
See Also |
|
——— |
|
Series.combine : Perform element-wise operation on two Series |
|
using a given function. |
|
|
|
Examples |
|
——— |
|
>>> s1 = pd.Series([1, np.nan]) |
|
>>> s2 = pd.Series([3, 4, 5]) |
|
>>> s1.combine_first(s2) |
|
0 1.0 |
|
1 4.0 |
|
2 5.0 |
|
dtype: float64 |
|
|
|
Null values still persist if the location of that null value |
|
does not exist in `other` |
|
|
|
>>> s1 = pd.Series({‘falcon’: np.nan, ‘eagle’: 160.0}) |
|
>>> s2 = pd.Series({‘eagle’: 200.0, ‘duck’: 30.0}) |
|
>>> s1.combine_first(s2) |
|
duck 30.0 |
|
eagle 160.0 |
|
falcon NaN |
|
dtype: float64 |
|
«»» |
|
new_index = self.index.union(other.index) |
|
this = self.reindex(new_index, copy=False) |
|
other = other.reindex(new_index, copy=False) |
|
if this.dtype.kind == «M» and other.dtype.kind != «M»: |
|
other = to_datetime(other) |
|
|
|
return this.where(notna(this), other) |
|
|
|
def update(self, other: Series | Sequence | Mapping) -> None: |
|
«»» |
|
Modify Series in place using values from passed Series. |
|
|
|
Uses non-NA values from passed Series to make updates. Aligns |
|
on index. |
|
|
|
Parameters |
|
———- |
|
other : Series, or object coercible into Series |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s.update(pd.Series([4, 5, 6])) |
|
>>> s |
|
0 4 |
|
1 5 |
|
2 6 |
|
dtype: int64 |
|
|
|
>>> s = pd.Series([‘a’, ‘b’, ‘c’]) |
|
>>> s.update(pd.Series([‘d’, ‘e’], index=[0, 2])) |
|
>>> s |
|
0 d |
|
1 b |
|
2 e |
|
dtype: object |
|
|
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s.update(pd.Series([4, 5, 6, 7, 8])) |
|
>>> s |
|
0 4 |
|
1 5 |
|
2 6 |
|
dtype: int64 |
|
|
|
If «other« contains NaNs the corresponding values are not updated |
|
in the original Series. |
|
|
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s.update(pd.Series([4, np.nan, 6])) |
|
>>> s |
|
0 4 |
|
1 2 |
|
2 6 |
|
dtype: int64 |
|
|
|
«other« can also be a non-Series object type |
|
that is coercible into a Series |
|
|
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s.update([4, np.nan, 6]) |
|
>>> s |
|
0 4 |
|
1 2 |
|
2 6 |
|
dtype: int64 |
|
|
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s.update({1: 9}) |
|
>>> s |
|
0 1 |
|
1 9 |
|
2 3 |
|
dtype: int64 |
|
«»» |
|
|
|
if not isinstance(other, Series): |
|
other = Series(other) |
|
|
|
other = other.reindex_like(self) |
|
mask = notna(other) |
|
|
|
self._mgr = self._mgr.putmask(mask=mask, new=other) |
|
self._maybe_update_cacher() |
|
|
|
# ———————————————————————- |
|
# Reindexing, sorting |
|
|
|
@overload |
|
def sort_values( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
ascending: bool | int | Sequence[bool] | Sequence[int] = …, |
|
inplace: Literal[False] = …, |
|
kind: str = …, |
|
na_position: str = …, |
|
ignore_index: bool = …, |
|
key: ValueKeyFunc = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def sort_values( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
ascending: bool | int | Sequence[bool] | Sequence[int] = …, |
|
inplace: Literal[True], |
|
kind: str = …, |
|
na_position: str = …, |
|
ignore_index: bool = …, |
|
key: ValueKeyFunc = …, |
|
) -> None: |
|
… |
|
|
|
def sort_values( |
|
self, |
|
*, |
|
axis: Axis = 0, |
|
ascending: bool | int | Sequence[bool] | Sequence[int] = True, |
|
inplace: bool = False, |
|
kind: str = «quicksort», |
|
na_position: str = «last», |
|
ignore_index: bool = False, |
|
key: ValueKeyFunc = None, |
|
) -> Series | None: |
|
«»» |
|
Sort by the values. |
|
|
|
Sort a Series in ascending or descending order by some |
|
criterion. |
|
|
|
Parameters |
|
———- |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
ascending : bool or list of bools, default True |
|
If True, sort values in ascending order, otherwise descending. |
|
inplace : bool, default False |
|
If True, perform operation in-place. |
|
kind : {‘quicksort’, ‘mergesort’, ‘heapsort’, ‘stable’}, default ‘quicksort’ |
|
Choice of sorting algorithm. See also :func:`numpy.sort` for more |
|
information. ‘mergesort’ and ‘stable’ are the only stable algorithms. |
|
na_position : {‘first’ or ‘last’}, default ‘last’ |
|
Argument ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs at |
|
the end. |
|
ignore_index : bool, default False |
|
If True, the resulting axis will be labeled 0, 1, …, n — 1. |
|
|
|
.. versionadded:: 1.0.0 |
|
|
|
key : callable, optional |
|
If not None, apply the key function to the series values |
|
before sorting. This is similar to the `key` argument in the |
|
builtin :meth:`sorted` function, with the notable difference that |
|
this `key` function should be *vectorized*. It should expect a |
|
«Series« and return an array-like. |
|
|
|
.. versionadded:: 1.1.0 |
|
|
|
Returns |
|
——- |
|
Series or None |
|
Series ordered by values or None if «inplace=True«. |
|
|
|
See Also |
|
——— |
|
Series.sort_index : Sort by the Series indices. |
|
DataFrame.sort_values : Sort DataFrame by the values along either axis. |
|
DataFrame.sort_index : Sort DataFrame by indices. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([np.nan, 1, 3, 10, 5]) |
|
>>> s |
|
0 NaN |
|
1 1.0 |
|
2 3.0 |
|
3 10.0 |
|
4 5.0 |
|
dtype: float64 |
|
|
|
Sort values ascending order (default behaviour) |
|
|
|
>>> s.sort_values(ascending=True) |
|
1 1.0 |
|
2 3.0 |
|
4 5.0 |
|
3 10.0 |
|
0 NaN |
|
dtype: float64 |
|
|
|
Sort values descending order |
|
|
|
>>> s.sort_values(ascending=False) |
|
3 10.0 |
|
4 5.0 |
|
2 3.0 |
|
1 1.0 |
|
0 NaN |
|
dtype: float64 |
|
|
|
Sort values putting NAs first |
|
|
|
>>> s.sort_values(na_position=’first’) |
|
0 NaN |
|
1 1.0 |
|
2 3.0 |
|
4 5.0 |
|
3 10.0 |
|
dtype: float64 |
|
|
|
Sort a series of strings |
|
|
|
>>> s = pd.Series([‘z’, ‘b’, ‘d’, ‘a’, ‘c’]) |
|
>>> s |
|
0 z |
|
1 b |
|
2 d |
|
3 a |
|
4 c |
|
dtype: object |
|
|
|
>>> s.sort_values() |
|
3 a |
|
1 b |
|
4 c |
|
2 d |
|
0 z |
|
dtype: object |
|
|
|
Sort using a key function. Your `key` function will be |
|
given the «Series« of values and should return an array-like. |
|
|
|
>>> s = pd.Series([‘a’, ‘B’, ‘c’, ‘D’, ‘e’]) |
|
>>> s.sort_values() |
|
1 B |
|
3 D |
|
0 a |
|
2 c |
|
4 e |
|
dtype: object |
|
>>> s.sort_values(key=lambda x: x.str.lower()) |
|
0 a |
|
1 B |
|
2 c |
|
3 D |
|
4 e |
|
dtype: object |
|
|
|
NumPy ufuncs work well here. For example, we can |
|
sort by the «sin« of the value |
|
|
|
>>> s = pd.Series([-4, -2, 0, 2, 4]) |
|
>>> s.sort_values(key=np.sin) |
|
1 -2 |
|
4 4 |
|
2 0 |
|
0 -4 |
|
3 2 |
|
dtype: int64 |
|
|
|
More complicated user-defined functions can be used, |
|
as long as they expect a Series and return an array-like |
|
|
|
>>> s.sort_values(key=lambda x: (np.tan(x.cumsum()))) |
|
0 -4 |
|
3 2 |
|
4 4 |
|
1 -2 |
|
2 0 |
|
dtype: int64 |
|
«»» |
|
inplace = validate_bool_kwarg(inplace, «inplace») |
|
# Validate the axis parameter |
|
self._get_axis_number(axis) |
|
|
|
# GH 5856/5853 |
|
if inplace and self._is_cached: |
|
raise ValueError( |
|
«This Series is a view of some other array, to « |
|
«sort in-place you must create a copy» |
|
) |
|
|
|
if is_list_like(ascending): |
|
ascending = cast(Sequence[Union[bool, int]], ascending) |
|
if len(ascending) != 1: |
|
raise ValueError( |
|
f»Length of ascending ({len(ascending)}) must be 1 for Series» |
|
) |
|
ascending = ascending[0] |
|
|
|
ascending = validate_ascending(ascending) |
|
|
|
if na_position not in [«first», «last»]: |
|
raise ValueError(f»invalid na_position: {na_position}«) |
|
|
|
# GH 35922. Make sorting stable by leveraging nargsort |
|
values_to_sort = ensure_key_mapped(self, key)._values if key else self._values |
|
sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position) |
|
|
|
if is_range_indexer(sorted_index, len(sorted_index)): |
|
if inplace: |
|
return self._update_inplace(self) |
|
return self.copy(deep=None) |
|
|
|
result = self._constructor( |
|
self._values[sorted_index], index=self.index[sorted_index] |
|
) |
|
|
|
if ignore_index: |
|
result.index = default_index(len(sorted_index)) |
|
|
|
if not inplace: |
|
return result.__finalize__(self, method=«sort_values») |
|
self._update_inplace(result) |
|
return None |
|
|
|
@overload |
|
def sort_index( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
level: IndexLabel = …, |
|
ascending: bool | Sequence[bool] = …, |
|
inplace: Literal[True], |
|
kind: SortKind = …, |
|
na_position: NaPosition = …, |
|
sort_remaining: bool = …, |
|
ignore_index: bool = …, |
|
key: IndexKeyFunc = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def sort_index( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
level: IndexLabel = …, |
|
ascending: bool | Sequence[bool] = …, |
|
inplace: Literal[False] = …, |
|
kind: SortKind = …, |
|
na_position: NaPosition = …, |
|
sort_remaining: bool = …, |
|
ignore_index: bool = …, |
|
key: IndexKeyFunc = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def sort_index( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
level: IndexLabel = …, |
|
ascending: bool | Sequence[bool] = …, |
|
inplace: bool = …, |
|
kind: SortKind = …, |
|
na_position: NaPosition = …, |
|
sort_remaining: bool = …, |
|
ignore_index: bool = …, |
|
key: IndexKeyFunc = …, |
|
) -> Series | None: |
|
… |
|
|
|
def sort_index( |
|
self, |
|
*, |
|
axis: Axis = 0, |
|
level: IndexLabel = None, |
|
ascending: bool | Sequence[bool] = True, |
|
inplace: bool = False, |
|
kind: SortKind = «quicksort», |
|
na_position: NaPosition = «last», |
|
sort_remaining: bool = True, |
|
ignore_index: bool = False, |
|
key: IndexKeyFunc = None, |
|
) -> Series | None: |
|
«»» |
|
Sort Series by index labels. |
|
|
|
Returns a new Series sorted by label if `inplace` argument is |
|
«False«, otherwise updates the original series and returns None. |
|
|
|
Parameters |
|
———- |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
level : int, optional |
|
If not None, sort on values in specified index level(s). |
|
ascending : bool or list-like of bools, default True |
|
Sort ascending vs. descending. When the index is a MultiIndex the |
|
sort direction can be controlled for each level individually. |
|
inplace : bool, default False |
|
If True, perform operation in-place. |
|
kind : {‘quicksort’, ‘mergesort’, ‘heapsort’, ‘stable’}, default ‘quicksort’ |
|
Choice of sorting algorithm. See also :func:`numpy.sort` for more |
|
information. ‘mergesort’ and ‘stable’ are the only stable algorithms. For |
|
DataFrames, this option is only applied when sorting on a single |
|
column or label. |
|
na_position : {‘first’, ‘last’}, default ‘last’ |
|
If ‘first’ puts NaNs at the beginning, ‘last’ puts NaNs at the end. |
|
Not implemented for MultiIndex. |
|
sort_remaining : bool, default True |
|
If True and sorting by level and index is multilevel, sort by other |
|
levels too (in order) after sorting by specified level. |
|
ignore_index : bool, default False |
|
If True, the resulting axis will be labeled 0, 1, …, n — 1. |
|
|
|
.. versionadded:: 1.0.0 |
|
|
|
key : callable, optional |
|
If not None, apply the key function to the index values |
|
before sorting. This is similar to the `key` argument in the |
|
builtin :meth:`sorted` function, with the notable difference that |
|
this `key` function should be *vectorized*. It should expect an |
|
«Index« and return an «Index« of the same shape. |
|
|
|
.. versionadded:: 1.1.0 |
|
|
|
Returns |
|
——- |
|
Series or None |
|
The original Series sorted by the labels or None if «inplace=True«. |
|
|
|
See Also |
|
——— |
|
DataFrame.sort_index: Sort DataFrame by the index. |
|
DataFrame.sort_values: Sort DataFrame by the value. |
|
Series.sort_values : Sort Series by the value. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([‘a’, ‘b’, ‘c’, ‘d’], index=[3, 2, 1, 4]) |
|
>>> s.sort_index() |
|
1 c |
|
2 b |
|
3 a |
|
4 d |
|
dtype: object |
|
|
|
Sort Descending |
|
|
|
>>> s.sort_index(ascending=False) |
|
4 d |
|
3 a |
|
2 b |
|
1 c |
|
dtype: object |
|
|
|
By default NaNs are put at the end, but use `na_position` to place |
|
them at the beginning |
|
|
|
>>> s = pd.Series([‘a’, ‘b’, ‘c’, ‘d’], index=[3, 2, 1, np.nan]) |
|
>>> s.sort_index(na_position=’first’) |
|
NaN d |
|
1.0 c |
|
2.0 b |
|
3.0 a |
|
dtype: object |
|
|
|
Specify index level to sort |
|
|
|
>>> arrays = [np.array([‘qux’, ‘qux’, ‘foo’, ‘foo’, |
|
… ‘baz’, ‘baz’, ‘bar’, ‘bar’]), |
|
… np.array([‘two’, ‘one’, ‘two’, ‘one’, |
|
… ‘two’, ‘one’, ‘two’, ‘one’])] |
|
>>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays) |
|
>>> s.sort_index(level=1) |
|
bar one 8 |
|
baz one 6 |
|
foo one 4 |
|
qux one 2 |
|
bar two 7 |
|
baz two 5 |
|
foo two 3 |
|
qux two 1 |
|
dtype: int64 |
|
|
|
Does not sort by remaining levels when sorting by levels |
|
|
|
>>> s.sort_index(level=1, sort_remaining=False) |
|
qux one 2 |
|
foo one 4 |
|
baz one 6 |
|
bar one 8 |
|
qux two 1 |
|
foo two 3 |
|
baz two 5 |
|
bar two 7 |
|
dtype: int64 |
|
|
|
Apply a key function before sorting |
|
|
|
>>> s = pd.Series([1, 2, 3, 4], index=[‘A’, ‘b’, ‘C’, ‘d’]) |
|
>>> s.sort_index(key=lambda x : x.str.lower()) |
|
A 1 |
|
b 2 |
|
C 3 |
|
d 4 |
|
dtype: int64 |
|
«»» |
|
|
|
return super().sort_index( |
|
axis=axis, |
|
level=level, |
|
ascending=ascending, |
|
inplace=inplace, |
|
kind=kind, |
|
na_position=na_position, |
|
sort_remaining=sort_remaining, |
|
ignore_index=ignore_index, |
|
key=key, |
|
) |
|
|
|
def argsort( |
|
self, |
|
axis: Axis = 0, |
|
kind: SortKind = «quicksort», |
|
order: None = None, |
|
) -> Series: |
|
«»» |
|
Return the integer indices that would sort the Series values. |
|
|
|
Override ndarray.argsort. Argsorts the value, omitting NA/null values, |
|
and places the result in the same locations as the non-NA values. |
|
|
|
Parameters |
|
———- |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
kind : {‘mergesort’, ‘quicksort’, ‘heapsort’, ‘stable’}, default ‘quicksort’ |
|
Choice of sorting algorithm. See :func:`numpy.sort` for more |
|
information. ‘mergesort’ and ‘stable’ are the only stable algorithms. |
|
order : None |
|
Has no effect but is accepted for compatibility with numpy. |
|
|
|
Returns |
|
——- |
|
Series[np.intp] |
|
Positions of values within the sort order with -1 indicating |
|
nan values. |
|
|
|
See Also |
|
——— |
|
numpy.ndarray.argsort : Returns the indices that would sort this array. |
|
«»» |
|
values = self._values |
|
mask = isna(values) |
|
|
|
if mask.any(): |
|
result = np.full(len(self), —1, dtype=np.intp) |
|
notmask = ~mask |
|
result[notmask] = np.argsort(values[notmask], kind=kind) |
|
else: |
|
result = np.argsort(values, kind=kind) |
|
|
|
res = self._constructor(result, index=self.index, name=self.name, dtype=np.intp) |
|
return res.__finalize__(self, method=«argsort») |
|
|
|
def nlargest( |
|
self, n: int = 5, keep: Literal[«first», «last», «all»] = «first» |
|
) -> Series: |
|
«»» |
|
Return the largest `n` elements. |
|
|
|
Parameters |
|
———- |
|
n : int, default 5 |
|
Return this many descending sorted values. |
|
keep : {‘first’, ‘last’, ‘all’}, default ‘first’ |
|
When there are duplicate values that cannot all fit in a |
|
Series of `n` elements: |
|
|
|
— «first« : return the first `n` occurrences in order |
|
of appearance. |
|
— «last« : return the last `n` occurrences in reverse |
|
order of appearance. |
|
— «all« : keep all occurrences. This can result in a Series of |
|
size larger than `n`. |
|
|
|
Returns |
|
——- |
|
Series |
|
The `n` largest values in the Series, sorted in decreasing order. |
|
|
|
See Also |
|
——— |
|
Series.nsmallest: Get the `n` smallest elements. |
|
Series.sort_values: Sort Series by values. |
|
Series.head: Return the first `n` rows. |
|
|
|
Notes |
|
—— |
|
Faster than «.sort_values(ascending=False).head(n)« for small `n` |
|
relative to the size of the «Series« object. |
|
|
|
Examples |
|
——— |
|
>>> countries_population = {«Italy»: 59000000, «France»: 65000000, |
|
… «Malta»: 434000, «Maldives»: 434000, |
|
… «Brunei»: 434000, «Iceland»: 337000, |
|
… «Nauru»: 11300, «Tuvalu»: 11300, |
|
… «Anguilla»: 11300, «Montserrat»: 5200} |
|
>>> s = pd.Series(countries_population) |
|
>>> s |
|
Italy 59000000 |
|
France 65000000 |
|
Malta 434000 |
|
Maldives 434000 |
|
Brunei 434000 |
|
Iceland 337000 |
|
Nauru 11300 |
|
Tuvalu 11300 |
|
Anguilla 11300 |
|
Montserrat 5200 |
|
dtype: int64 |
|
|
|
The `n` largest elements where «n=5« by default. |
|
|
|
>>> s.nlargest() |
|
France 65000000 |
|
Italy 59000000 |
|
Malta 434000 |
|
Maldives 434000 |
|
Brunei 434000 |
|
dtype: int64 |
|
|
|
The `n` largest elements where «n=3«. Default `keep` value is ‘first’ |
|
so Malta will be kept. |
|
|
|
>>> s.nlargest(3) |
|
France 65000000 |
|
Italy 59000000 |
|
Malta 434000 |
|
dtype: int64 |
|
|
|
The `n` largest elements where «n=3« and keeping the last duplicates. |
|
Brunei will be kept since it is the last with value 434000 based on |
|
the index order. |
|
|
|
>>> s.nlargest(3, keep=’last’) |
|
France 65000000 |
|
Italy 59000000 |
|
Brunei 434000 |
|
dtype: int64 |
|
|
|
The `n` largest elements where «n=3« with all duplicates kept. Note |
|
that the returned Series has five elements due to the three duplicates. |
|
|
|
>>> s.nlargest(3, keep=’all’) |
|
France 65000000 |
|
Italy 59000000 |
|
Malta 434000 |
|
Maldives 434000 |
|
Brunei 434000 |
|
dtype: int64 |
|
«»» |
|
return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() |
|
|
|
def nsmallest(self, n: int = 5, keep: str = «first») -> Series: |
|
«»» |
|
Return the smallest `n` elements. |
|
|
|
Parameters |
|
———- |
|
n : int, default 5 |
|
Return this many ascending sorted values. |
|
keep : {‘first’, ‘last’, ‘all’}, default ‘first’ |
|
When there are duplicate values that cannot all fit in a |
|
Series of `n` elements: |
|
|
|
— «first« : return the first `n` occurrences in order |
|
of appearance. |
|
— «last« : return the last `n` occurrences in reverse |
|
order of appearance. |
|
— «all« : keep all occurrences. This can result in a Series of |
|
size larger than `n`. |
|
|
|
Returns |
|
——- |
|
Series |
|
The `n` smallest values in the Series, sorted in increasing order. |
|
|
|
See Also |
|
——— |
|
Series.nlargest: Get the `n` largest elements. |
|
Series.sort_values: Sort Series by values. |
|
Series.head: Return the first `n` rows. |
|
|
|
Notes |
|
—— |
|
Faster than «.sort_values().head(n)« for small `n` relative to |
|
the size of the «Series« object. |
|
|
|
Examples |
|
——— |
|
>>> countries_population = {«Italy»: 59000000, «France»: 65000000, |
|
… «Brunei»: 434000, «Malta»: 434000, |
|
… «Maldives»: 434000, «Iceland»: 337000, |
|
… «Nauru»: 11300, «Tuvalu»: 11300, |
|
… «Anguilla»: 11300, «Montserrat»: 5200} |
|
>>> s = pd.Series(countries_population) |
|
>>> s |
|
Italy 59000000 |
|
France 65000000 |
|
Brunei 434000 |
|
Malta 434000 |
|
Maldives 434000 |
|
Iceland 337000 |
|
Nauru 11300 |
|
Tuvalu 11300 |
|
Anguilla 11300 |
|
Montserrat 5200 |
|
dtype: int64 |
|
|
|
The `n` smallest elements where «n=5« by default. |
|
|
|
>>> s.nsmallest() |
|
Montserrat 5200 |
|
Nauru 11300 |
|
Tuvalu 11300 |
|
Anguilla 11300 |
|
Iceland 337000 |
|
dtype: int64 |
|
|
|
The `n` smallest elements where «n=3«. Default `keep` value is |
|
‘first’ so Nauru and Tuvalu will be kept. |
|
|
|
>>> s.nsmallest(3) |
|
Montserrat 5200 |
|
Nauru 11300 |
|
Tuvalu 11300 |
|
dtype: int64 |
|
|
|
The `n` smallest elements where «n=3« and keeping the last |
|
duplicates. Anguilla and Tuvalu will be kept since they are the last |
|
with value 11300 based on the index order. |
|
|
|
>>> s.nsmallest(3, keep=’last’) |
|
Montserrat 5200 |
|
Anguilla 11300 |
|
Tuvalu 11300 |
|
dtype: int64 |
|
|
|
The `n` smallest elements where «n=3« with all duplicates kept. Note |
|
that the returned Series has four elements due to the three duplicates. |
|
|
|
>>> s.nsmallest(3, keep=’all’) |
|
Montserrat 5200 |
|
Nauru 11300 |
|
Tuvalu 11300 |
|
Anguilla 11300 |
|
dtype: int64 |
|
«»» |
|
return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest() |
|
|
|
@doc( |
|
klass=_shared_doc_kwargs[«klass»], |
|
extra_params=dedent( |
|
«»»copy : bool, default True |
|
Whether to copy underlying data.»»» |
|
), |
|
examples=dedent( |
|
«»» |
|
Examples |
|
——— |
|
>>> s = pd.Series( |
|
… [«A», «B», «A», «C»], |
|
… index=[ |
|
… [«Final exam», «Final exam», «Coursework», «Coursework»], |
|
… [«History», «Geography», «History», «Geography»], |
|
… [«January», «February», «March», «April»], |
|
… ], |
|
… ) |
|
>>> s |
|
Final exam History January A |
|
Geography February B |
|
Coursework History March A |
|
Geography April C |
|
dtype: object |
|
|
|
In the following example, we will swap the levels of the indices. |
|
Here, we will swap the levels column-wise, but levels can be swapped row-wise |
|
in a similar manner. Note that column-wise is the default behaviour. |
|
By not supplying any arguments for i and j, we swap the last and second to |
|
last indices. |
|
|
|
>>> s.swaplevel() |
|
Final exam January History A |
|
February Geography B |
|
Coursework March History A |
|
April Geography C |
|
dtype: object |
|
|
|
By supplying one argument, we can choose which index to swap the last |
|
index with. We can for example swap the first index with the last one as |
|
follows. |
|
|
|
>>> s.swaplevel(0) |
|
January History Final exam A |
|
February Geography Final exam B |
|
March History Coursework A |
|
April Geography Coursework C |
|
dtype: object |
|
|
|
We can also define explicitly which indices we want to swap by supplying values |
|
for both i and j. Here, we for example swap the first and second indices. |
|
|
|
>>> s.swaplevel(0, 1) |
|
History Final exam January A |
|
Geography Final exam February B |
|
History Coursework March A |
|
Geography Coursework April C |
|
dtype: object»»» |
|
), |
|
) |
|
def swaplevel( |
|
self, i: Level = —2, j: Level = —1, copy: bool | None = None |
|
) -> Series: |
|
«»» |
|
Swap levels i and j in a :class:`MultiIndex`. |
|
|
|
Default is to swap the two innermost levels of the index. |
|
|
|
Parameters |
|
———- |
|
i, j : int or str |
|
Levels of the indices to be swapped. Can pass level name as string. |
|
{extra_params} |
|
|
|
Returns |
|
——- |
|
{klass} |
|
{klass} with levels swapped in MultiIndex. |
|
|
|
{examples} |
|
«»» |
|
assert isinstance(self.index, MultiIndex) |
|
result = self.copy(deep=copy) |
|
result.index = self.index.swaplevel(i, j) |
|
return result |
|
|
|
def reorder_levels(self, order: Sequence[Level]) -> Series: |
|
«»» |
|
Rearrange index levels using input order. |
|
|
|
May not drop or duplicate levels. |
|
|
|
Parameters |
|
———- |
|
order : list of int representing new level order |
|
Reference level by number or key. |
|
|
|
Returns |
|
——- |
|
type of caller (new object) |
|
«»» |
|
if not isinstance(self.index, MultiIndex): # pragma: no cover |
|
raise Exception(«Can only reorder levels on a hierarchical axis.») |
|
|
|
result = self.copy(deep=None) |
|
assert isinstance(result.index, MultiIndex) |
|
result.index = result.index.reorder_levels(order) |
|
return result |
|
|
|
def explode(self, ignore_index: bool = False) -> Series: |
|
«»» |
|
Transform each element of a list-like to a row. |
|
|
|
Parameters |
|
———- |
|
ignore_index : bool, default False |
|
If True, the resulting index will be labeled 0, 1, …, n — 1. |
|
|
|
.. versionadded:: 1.1.0 |
|
|
|
Returns |
|
——- |
|
Series |
|
Exploded lists to rows; index will be duplicated for these rows. |
|
|
|
See Also |
|
——— |
|
Series.str.split : Split string values on specified separator. |
|
Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex |
|
to produce DataFrame. |
|
DataFrame.melt : Unpivot a DataFrame from wide format to long format. |
|
DataFrame.explode : Explode a DataFrame from list-like |
|
columns to long format. |
|
|
|
Notes |
|
—— |
|
This routine will explode list-likes including lists, tuples, sets, |
|
Series, and np.ndarray. The result dtype of the subset rows will |
|
be object. Scalars will be returned unchanged, and empty list-likes will |
|
result in a np.nan for that row. In addition, the ordering of elements in |
|
the output will be non-deterministic when exploding sets. |
|
|
|
Reference :ref:`the user guide <reshaping.explode>` for more examples. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([[1, 2, 3], ‘foo’, [], [3, 4]]) |
|
>>> s |
|
0 [1, 2, 3] |
|
1 foo |
|
2 [] |
|
3 [3, 4] |
|
dtype: object |
|
|
|
>>> s.explode() |
|
0 1 |
|
0 2 |
|
0 3 |
|
1 foo |
|
2 NaN |
|
3 3 |
|
3 4 |
|
dtype: object |
|
«»» |
|
if not len(self) or not is_object_dtype(self): |
|
result = self.copy() |
|
return result.reset_index(drop=True) if ignore_index else result |
|
|
|
values, counts = reshape.explode(np.asarray(self._values)) |
|
|
|
if ignore_index: |
|
index = default_index(len(values)) |
|
else: |
|
index = self.index.repeat(counts) |
|
|
|
return self._constructor(values, index=index, name=self.name) |
|
|
|
def unstack(self, level: IndexLabel = —1, fill_value: Hashable = None) -> DataFrame: |
|
«»» |
|
Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. |
|
|
|
Parameters |
|
———- |
|
level : int, str, or list of these, default last level |
|
Level(s) to unstack, can pass level name. |
|
fill_value : scalar value, default None |
|
Value to use when replacing NaN values. |
|
|
|
Returns |
|
——- |
|
DataFrame |
|
Unstacked Series. |
|
|
|
Notes |
|
—— |
|
Reference :ref:`the user guide <reshaping.stacking>` for more examples. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3, 4], |
|
… index=pd.MultiIndex.from_product([[‘one’, ‘two’], |
|
… [‘a’, ‘b’]])) |
|
>>> s |
|
one a 1 |
|
b 2 |
|
two a 3 |
|
b 4 |
|
dtype: int64 |
|
|
|
>>> s.unstack(level=-1) |
|
a b |
|
one 1 2 |
|
two 3 4 |
|
|
|
>>> s.unstack(level=0) |
|
one two |
|
a 1 3 |
|
b 2 4 |
|
«»» |
|
from pandas.core.reshape.reshape import unstack |
|
|
|
return unstack(self, level, fill_value) |
|
|
|
# ———————————————————————- |
|
# function application |
|
|
|
def map( |
|
self, |
|
arg: Callable | Mapping | Series, |
|
na_action: Literal[«ignore»] | None = None, |
|
) -> Series: |
|
«»» |
|
Map values of Series according to an input mapping or function. |
|
|
|
Used for substituting each value in a Series with another value, |
|
that may be derived from a function, a «dict« or |
|
a :class:`Series`. |
|
|
|
Parameters |
|
———- |
|
arg : function, collections.abc.Mapping subclass or Series |
|
Mapping correspondence. |
|
na_action : {None, ‘ignore’}, default None |
|
If ‘ignore’, propagate NaN values, without passing them to the |
|
mapping correspondence. |
|
|
|
Returns |
|
——- |
|
Series |
|
Same index as caller. |
|
|
|
See Also |
|
——— |
|
Series.apply : For applying more complex functions on a Series. |
|
DataFrame.apply : Apply a function row-/column-wise. |
|
DataFrame.applymap : Apply a function elementwise on a whole DataFrame. |
|
|
|
Notes |
|
—— |
|
When «arg« is a dictionary, values in Series that are not in the |
|
dictionary (as keys) are converted to «NaN«. However, if the |
|
dictionary is a «dict« subclass that defines «__missing__« (i.e. |
|
provides a method for default values), then this default is used |
|
rather than «NaN«. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([‘cat’, ‘dog’, np.nan, ‘rabbit’]) |
|
>>> s |
|
0 cat |
|
1 dog |
|
2 NaN |
|
3 rabbit |
|
dtype: object |
|
|
|
«map« accepts a «dict« or a «Series«. Values that are not found |
|
in the «dict« are converted to «NaN«, unless the dict has a default |
|
value (e.g. «defaultdict«): |
|
|
|
>>> s.map({‘cat’: ‘kitten’, ‘dog’: ‘puppy’}) |
|
0 kitten |
|
1 puppy |
|
2 NaN |
|
3 NaN |
|
dtype: object |
|
|
|
It also accepts a function: |
|
|
|
>>> s.map(‘I am a {}’.format) |
|
0 I am a cat |
|
1 I am a dog |
|
2 I am a nan |
|
3 I am a rabbit |
|
dtype: object |
|
|
|
To avoid applying the function to missing values (and keep them as |
|
«NaN«) «na_action=’ignore’« can be used: |
|
|
|
>>> s.map(‘I am a {}’.format, na_action=’ignore’) |
|
0 I am a cat |
|
1 I am a dog |
|
2 NaN |
|
3 I am a rabbit |
|
dtype: object |
|
«»» |
|
new_values = self._map_values(arg, na_action=na_action) |
|
return self._constructor(new_values, index=self.index).__finalize__( |
|
self, method=«map» |
|
) |
|
|
|
def _gotitem(self, key, ndim, subset=None) -> Series: |
|
«»» |
|
Sub-classes to define. Return a sliced object. |
|
|
|
Parameters |
|
———- |
|
key : string / list of selections |
|
ndim : {1, 2} |
|
Requested ndim of result. |
|
subset : object, default None |
|
Subset to act on. |
|
«»» |
|
return self |
|
|
|
_agg_see_also_doc = dedent( |
|
«»» |
|
See Also |
|
——— |
|
Series.apply : Invoke function on a Series. |
|
Series.transform : Transform function producing a Series with like indexes. |
|
«»» |
|
) |
|
|
|
_agg_examples_doc = dedent( |
|
«»» |
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3, 4]) |
|
>>> s |
|
0 1 |
|
1 2 |
|
2 3 |
|
3 4 |
|
dtype: int64 |
|
|
|
>>> s.agg(‘min’) |
|
1 |
|
|
|
>>> s.agg([‘min’, ‘max’]) |
|
min 1 |
|
max 4 |
|
dtype: int64 |
|
«»» |
|
) |
|
|
|
@doc( |
|
_shared_docs[«aggregate»], |
|
klass=_shared_doc_kwargs[«klass»], |
|
axis=_shared_doc_kwargs[«axis»], |
|
see_also=_agg_see_also_doc, |
|
examples=_agg_examples_doc, |
|
) |
|
def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs): |
|
# Validate the axis parameter |
|
self._get_axis_number(axis) |
|
|
|
# if func is None, will switch to user-provided «named aggregation» kwargs |
|
if func is None: |
|
func = dict(kwargs.items()) |
|
|
|
op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs) |
|
result = op.agg() |
|
return result |
|
|
|
agg = aggregate |
|
|
|
# error: Signature of «any» incompatible with supertype «NDFrame» [override] |
|
@overload # type: ignore[override] |
|
def any( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
bool_only: bool | None = …, |
|
skipna: bool = …, |
|
level: None = …, |
|
**kwargs, |
|
) -> bool: |
|
… |
|
|
|
@overload |
|
def any( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
bool_only: bool | None = …, |
|
skipna: bool = …, |
|
level: Level, |
|
**kwargs, |
|
) -> Series | bool: |
|
… |
|
|
|
# error: Missing return statement |
|
@doc(NDFrame.any, **_shared_doc_kwargs) |
|
def any( # type: ignore[empty-body] |
|
self, |
|
axis: Axis = 0, |
|
bool_only: bool | None = None, |
|
skipna: bool = True, |
|
level: Level | None = None, |
|
**kwargs, |
|
) -> Series | bool: |
|
… |
|
|
|
@doc( |
|
_shared_docs[«transform»], |
|
klass=_shared_doc_kwargs[«klass»], |
|
axis=_shared_doc_kwargs[«axis»], |
|
) |
|
def transform( |
|
self, func: AggFuncType, axis: Axis = 0, *args, **kwargs |
|
) -> DataFrame | Series: |
|
# Validate axis argument |
|
self._get_axis_number(axis) |
|
result = SeriesApply( |
|
self, func=func, convert_dtype=True, args=args, kwargs=kwargs |
|
).transform() |
|
return result |
|
|
|
def apply( |
|
self, |
|
func: AggFuncType, |
|
convert_dtype: bool = True, |
|
args: tuple[Any, …] = (), |
|
**kwargs, |
|
) -> DataFrame | Series: |
|
«»» |
|
Invoke function on values of Series. |
|
|
|
Can be ufunc (a NumPy function that applies to the entire Series) |
|
or a Python function that only works on single values. |
|
|
|
Parameters |
|
———- |
|
func : function |
|
Python function or NumPy ufunc to apply. |
|
convert_dtype : bool, default True |
|
Try to find better dtype for elementwise function results. If |
|
False, leave as dtype=object. Note that the dtype is always |
|
preserved for some extension array dtypes, such as Categorical. |
|
args : tuple |
|
Positional arguments passed to func after the series value. |
|
**kwargs |
|
Additional keyword arguments passed to func. |
|
|
|
Returns |
|
——- |
|
Series or DataFrame |
|
If func returns a Series object the result will be a DataFrame. |
|
|
|
See Also |
|
——— |
|
Series.map: For element-wise operations. |
|
Series.agg: Only perform aggregating type operations. |
|
Series.transform: Only perform transforming type operations. |
|
|
|
Notes |
|
—— |
|
Functions that mutate the passed object can produce unexpected |
|
behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` |
|
for more details. |
|
|
|
Examples |
|
——— |
|
Create a series with typical summer temperatures for each city. |
|
|
|
>>> s = pd.Series([20, 21, 12], |
|
… index=[‘London’, ‘New York’, ‘Helsinki’]) |
|
>>> s |
|
London 20 |
|
New York 21 |
|
Helsinki 12 |
|
dtype: int64 |
|
|
|
Square the values by defining a function and passing it as an |
|
argument to «apply()«. |
|
|
|
>>> def square(x): |
|
… return x ** 2 |
|
>>> s.apply(square) |
|
London 400 |
|
New York 441 |
|
Helsinki 144 |
|
dtype: int64 |
|
|
|
Square the values by passing an anonymous function as an |
|
argument to «apply()«. |
|
|
|
>>> s.apply(lambda x: x ** 2) |
|
London 400 |
|
New York 441 |
|
Helsinki 144 |
|
dtype: int64 |
|
|
|
Define a custom function that needs additional positional |
|
arguments and pass these additional arguments using the |
|
«args« keyword. |
|
|
|
>>> def subtract_custom_value(x, custom_value): |
|
… return x — custom_value |
|
|
|
>>> s.apply(subtract_custom_value, args=(5,)) |
|
London 15 |
|
New York 16 |
|
Helsinki 7 |
|
dtype: int64 |
|
|
|
Define a custom function that takes keyword arguments |
|
and pass these arguments to «apply«. |
|
|
|
>>> def add_custom_values(x, **kwargs): |
|
… for month in kwargs: |
|
… x += kwargs[month] |
|
… return x |
|
|
|
>>> s.apply(add_custom_values, june=30, july=20, august=25) |
|
London 95 |
|
New York 96 |
|
Helsinki 87 |
|
dtype: int64 |
|
|
|
Use a function from the Numpy library. |
|
|
|
>>> s.apply(np.log) |
|
London 2.995732 |
|
New York 3.044522 |
|
Helsinki 2.484907 |
|
dtype: float64 |
|
«»» |
|
return SeriesApply(self, func, convert_dtype, args, kwargs).apply() |
|
|
|
def _reduce( |
|
self, |
|
op, |
|
name: str, |
|
*, |
|
axis: Axis = 0, |
|
skipna: bool = True, |
|
numeric_only: bool = False, |
|
filter_type=None, |
|
**kwds, |
|
): |
|
«»» |
|
Perform a reduction operation. |
|
|
|
If we have an ndarray as a value, then simply perform the operation, |
|
otherwise delegate to the object. |
|
«»» |
|
delegate = self._values |
|
|
|
if axis is not None: |
|
self._get_axis_number(axis) |
|
|
|
if isinstance(delegate, ExtensionArray): |
|
# dispatch to ExtensionArray interface |
|
return delegate._reduce(name, skipna=skipna, **kwds) |
|
|
|
else: |
|
# dispatch to numpy arrays |
|
if numeric_only and not is_numeric_dtype(self.dtype): |
|
kwd_name = «numeric_only» |
|
if name in [«any», «all»]: |
|
kwd_name = «bool_only» |
|
# GH#47500 — change to TypeError to match other methods |
|
raise TypeError( |
|
f»Series.{name} does not allow {kwd_name}={numeric_only} « |
|
«with non-numeric dtypes.» |
|
) |
|
with np.errstate(all=«ignore»): |
|
return op(delegate, skipna=skipna, **kwds) |
|
|
|
def _reindex_indexer( |
|
self, |
|
new_index: Index | None, |
|
indexer: npt.NDArray[np.intp] | None, |
|
copy: bool | None, |
|
) -> Series: |
|
# Note: new_index is None iff indexer is None |
|
# if not None, indexer is np.intp |
|
if indexer is None and ( |
|
new_index is None or new_index.names == self.index.names |
|
): |
|
if using_copy_on_write(): |
|
return self.copy(deep=copy) |
|
if copy or copy is None: |
|
return self.copy(deep=copy) |
|
return self |
|
|
|
new_values = algorithms.take_nd( |
|
self._values, indexer, allow_fill=True, fill_value=None |
|
) |
|
return self._constructor(new_values, index=new_index) |
|
|
|
def _needs_reindex_multi(self, axes, method, level) -> bool: |
|
«»» |
|
Check if we do need a multi reindex; this is for compat with |
|
higher dims. |
|
«»» |
|
return False |
|
|
|
# error: Cannot determine type of ‘align’ |
|
@doc( |
|
NDFrame.align, # type: ignore[has-type] |
|
klass=_shared_doc_kwargs[«klass»], |
|
axes_single_arg=_shared_doc_kwargs[«axes_single_arg»], |
|
) |
|
def align( |
|
self, |
|
other: Series, |
|
join: AlignJoin = «outer», |
|
axis: Axis | None = None, |
|
level: Level = None, |
|
copy: bool | None = None, |
|
fill_value: Hashable = None, |
|
method: FillnaOptions | None = None, |
|
limit: int | None = None, |
|
fill_axis: Axis = 0, |
|
broadcast_axis: Axis | None = None, |
|
) -> Series: |
|
return super().align( |
|
other, |
|
join=join, |
|
axis=axis, |
|
level=level, |
|
copy=copy, |
|
fill_value=fill_value, |
|
method=method, |
|
limit=limit, |
|
fill_axis=fill_axis, |
|
broadcast_axis=broadcast_axis, |
|
) |
|
|
|
@overload |
|
def rename( |
|
self, |
|
index: Renamer | Hashable | None = …, |
|
*, |
|
axis: Axis | None = …, |
|
copy: bool = …, |
|
inplace: Literal[True], |
|
level: Level | None = …, |
|
errors: IgnoreRaise = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def rename( |
|
self, |
|
index: Renamer | Hashable | None = …, |
|
*, |
|
axis: Axis | None = …, |
|
copy: bool = …, |
|
inplace: Literal[False] = …, |
|
level: Level | None = …, |
|
errors: IgnoreRaise = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def rename( |
|
self, |
|
index: Renamer | Hashable | None = …, |
|
*, |
|
axis: Axis | None = …, |
|
copy: bool = …, |
|
inplace: bool = …, |
|
level: Level | None = …, |
|
errors: IgnoreRaise = …, |
|
) -> Series | None: |
|
… |
|
|
|
def rename( |
|
self, |
|
index: Renamer | Hashable | None = None, |
|
*, |
|
axis: Axis | None = None, |
|
copy: bool = True, |
|
inplace: bool = False, |
|
level: Level | None = None, |
|
errors: IgnoreRaise = «ignore», |
|
) -> Series | None: |
|
«»» |
|
Alter Series index labels or name. |
|
|
|
Function / dict values must be unique (1-to-1). Labels not contained in |
|
a dict / Series will be left as-is. Extra labels listed don’t throw an |
|
error. |
|
|
|
Alternatively, change «Series.name« with a scalar value. |
|
|
|
See the :ref:`user guide <basics.rename>` for more. |
|
|
|
Parameters |
|
———- |
|
index : scalar, hashable sequence, dict-like or function optional |
|
Functions or dict-like are transformations to apply to |
|
the index. |
|
Scalar or hashable sequence-like will alter the «Series.name« |
|
attribute. |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
copy : bool, default True |
|
Also copy underlying data. |
|
inplace : bool, default False |
|
Whether to return a new Series. If True the value of copy is ignored. |
|
level : int or level name, default None |
|
In case of MultiIndex, only rename labels in the specified level. |
|
errors : {‘ignore’, ‘raise’}, default ‘ignore’ |
|
If ‘raise’, raise `KeyError` when a `dict-like mapper` or |
|
`index` contains labels that are not present in the index being transformed. |
|
If ‘ignore’, existing keys will be renamed and extra keys will be ignored. |
|
|
|
Returns |
|
——- |
|
Series or None |
|
Series with index labels or name altered or None if «inplace=True«. |
|
|
|
See Also |
|
——— |
|
DataFrame.rename : Corresponding DataFrame method. |
|
Series.rename_axis : Set the name of the axis. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s |
|
0 1 |
|
1 2 |
|
2 3 |
|
dtype: int64 |
|
>>> s.rename(«my_name») # scalar, changes Series.name |
|
0 1 |
|
1 2 |
|
2 3 |
|
Name: my_name, dtype: int64 |
|
>>> s.rename(lambda x: x ** 2) # function, changes labels |
|
0 1 |
|
1 2 |
|
4 3 |
|
dtype: int64 |
|
>>> s.rename({1: 3, 2: 5}) # mapping, changes labels |
|
0 1 |
|
3 2 |
|
5 3 |
|
dtype: int64 |
|
«»» |
|
if axis is not None: |
|
# Make sure we raise if an invalid ‘axis’ is passed. |
|
axis = self._get_axis_number(axis) |
|
|
|
if callable(index) or is_dict_like(index): |
|
# error: Argument 1 to «_rename» of «NDFrame» has incompatible |
|
# type «Union[Union[Mapping[Any, Hashable], Callable[[Any], |
|
# Hashable]], Hashable, None]»; expected «Union[Mapping[Any, |
|
# Hashable], Callable[[Any], Hashable], None]» |
|
return super()._rename( |
|
index, # type: ignore[arg-type] |
|
copy=copy, |
|
inplace=inplace, |
|
level=level, |
|
errors=errors, |
|
) |
|
else: |
|
return self._set_name(index, inplace=inplace) |
|
|
|
@Appender( |
|
«»» |
|
Examples |
|
——— |
|
>>> s = pd.Series([1, 2, 3]) |
|
>>> s |
|
0 1 |
|
1 2 |
|
2 3 |
|
dtype: int64 |
|
|
|
>>> s.set_axis([‘a’, ‘b’, ‘c’], axis=0) |
|
a 1 |
|
b 2 |
|
c 3 |
|
dtype: int64 |
|
«»» |
|
) |
|
@Substitution( |
|
**_shared_doc_kwargs, |
|
extended_summary_sub=«», |
|
axis_description_sub=«», |
|
see_also_sub=«», |
|
) |
|
@Appender(NDFrame.set_axis.__doc__) |
|
def set_axis( |
|
self, |
|
labels, |
|
*, |
|
axis: Axis = 0, |
|
copy: bool | None = None, |
|
) -> Series: |
|
return super().set_axis(labels, axis=axis, copy=copy) |
|
|
|
# error: Cannot determine type of ‘reindex’ |
|
@doc( |
|
NDFrame.reindex, # type: ignore[has-type] |
|
klass=_shared_doc_kwargs[«klass»], |
|
optional_reindex=_shared_doc_kwargs[«optional_reindex»], |
|
) |
|
def reindex( # type: ignore[override] |
|
self, |
|
index=None, |
|
*, |
|
axis: Axis | None = None, |
|
method: str | None = None, |
|
copy: bool | None = None, |
|
level: Level | None = None, |
|
fill_value: Scalar | None = None, |
|
limit: int | None = None, |
|
tolerance=None, |
|
) -> Series: |
|
return super().reindex( |
|
index=index, |
|
method=method, |
|
copy=copy, |
|
level=level, |
|
fill_value=fill_value, |
|
limit=limit, |
|
tolerance=tolerance, |
|
) |
|
|
|
@doc(NDFrame.rename_axis) |
|
def rename_axis( # type: ignore[override] |
|
self: Series, |
|
mapper: IndexLabel | lib.NoDefault = lib.no_default, |
|
*, |
|
index=lib.no_default, |
|
axis: Axis = 0, |
|
copy: bool = True, |
|
inplace: bool = False, |
|
) -> Series | None: |
|
return super().rename_axis( |
|
mapper=mapper, |
|
index=index, |
|
axis=axis, |
|
copy=copy, |
|
inplace=inplace, |
|
) |
|
|
|
@overload |
|
def drop( |
|
self, |
|
labels: IndexLabel = …, |
|
*, |
|
axis: Axis = …, |
|
index: IndexLabel = …, |
|
columns: IndexLabel = …, |
|
level: Level | None = …, |
|
inplace: Literal[True], |
|
errors: IgnoreRaise = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def drop( |
|
self, |
|
labels: IndexLabel = …, |
|
*, |
|
axis: Axis = …, |
|
index: IndexLabel = …, |
|
columns: IndexLabel = …, |
|
level: Level | None = …, |
|
inplace: Literal[False] = …, |
|
errors: IgnoreRaise = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def drop( |
|
self, |
|
labels: IndexLabel = …, |
|
*, |
|
axis: Axis = …, |
|
index: IndexLabel = …, |
|
columns: IndexLabel = …, |
|
level: Level | None = …, |
|
inplace: bool = …, |
|
errors: IgnoreRaise = …, |
|
) -> Series | None: |
|
… |
|
|
|
def drop( |
|
self, |
|
labels: IndexLabel = None, |
|
*, |
|
axis: Axis = 0, |
|
index: IndexLabel = None, |
|
columns: IndexLabel = None, |
|
level: Level | None = None, |
|
inplace: bool = False, |
|
errors: IgnoreRaise = «raise», |
|
) -> Series | None: |
|
«»» |
|
Return Series with specified index labels removed. |
|
|
|
Remove elements of a Series based on specifying the index labels. |
|
When using a multi-index, labels on different levels can be removed |
|
by specifying the level. |
|
|
|
Parameters |
|
———- |
|
labels : single label or list-like |
|
Index labels to drop. |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
index : single label or list-like |
|
Redundant for application on Series, but ‘index’ can be used instead |
|
of ‘labels’. |
|
columns : single label or list-like |
|
No change is made to the Series; use ‘index’ or ‘labels’ instead. |
|
level : int or level name, optional |
|
For MultiIndex, level for which the labels will be removed. |
|
inplace : bool, default False |
|
If True, do operation inplace and return None. |
|
errors : {‘ignore’, ‘raise’}, default ‘raise’ |
|
If ‘ignore’, suppress error and only existing labels are dropped. |
|
|
|
Returns |
|
——- |
|
Series or None |
|
Series with specified index labels removed or None if «inplace=True«. |
|
|
|
Raises |
|
—— |
|
KeyError |
|
If none of the labels are found in the index. |
|
|
|
See Also |
|
——— |
|
Series.reindex : Return only specified index labels of Series. |
|
Series.dropna : Return series without null values. |
|
Series.drop_duplicates : Return Series with duplicate values removed. |
|
DataFrame.drop : Drop specified labels from rows or columns. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series(data=np.arange(3), index=[‘A’, ‘B’, ‘C’]) |
|
>>> s |
|
A 0 |
|
B 1 |
|
C 2 |
|
dtype: int64 |
|
|
|
Drop labels B en C |
|
|
|
>>> s.drop(labels=[‘B’, ‘C’]) |
|
A 0 |
|
dtype: int64 |
|
|
|
Drop 2nd level label in MultiIndex Series |
|
|
|
>>> midx = pd.MultiIndex(levels=[[‘lama’, ‘cow’, ‘falcon’], |
|
… [‘speed’, ‘weight’, ‘length’]], |
|
… codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2], |
|
… [0, 1, 2, 0, 1, 2, 0, 1, 2]]) |
|
>>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], |
|
… index=midx) |
|
>>> s |
|
lama speed 45.0 |
|
weight 200.0 |
|
length 1.2 |
|
cow speed 30.0 |
|
weight 250.0 |
|
length 1.5 |
|
falcon speed 320.0 |
|
weight 1.0 |
|
length 0.3 |
|
dtype: float64 |
|
|
|
>>> s.drop(labels=’weight’, level=1) |
|
lama speed 45.0 |
|
length 1.2 |
|
cow speed 30.0 |
|
length 1.5 |
|
falcon speed 320.0 |
|
length 0.3 |
|
dtype: float64 |
|
«»» |
|
return super().drop( |
|
labels=labels, |
|
axis=axis, |
|
index=index, |
|
columns=columns, |
|
level=level, |
|
inplace=inplace, |
|
errors=errors, |
|
) |
|
|
|
@overload |
|
def fillna( |
|
self, |
|
value: Hashable | Mapping | Series | DataFrame = …, |
|
*, |
|
method: FillnaOptions | None = …, |
|
axis: Axis | None = …, |
|
inplace: Literal[False] = …, |
|
limit: int | None = …, |
|
downcast: dict | None = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def fillna( |
|
self, |
|
value: Hashable | Mapping | Series | DataFrame = …, |
|
*, |
|
method: FillnaOptions | None = …, |
|
axis: Axis | None = …, |
|
inplace: Literal[True], |
|
limit: int | None = …, |
|
downcast: dict | None = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def fillna( |
|
self, |
|
value: Hashable | Mapping | Series | DataFrame = …, |
|
*, |
|
method: FillnaOptions | None = …, |
|
axis: Axis | None = …, |
|
inplace: bool = …, |
|
limit: int | None = …, |
|
downcast: dict | None = …, |
|
) -> Series | None: |
|
… |
|
|
|
@doc(NDFrame.fillna, **_shared_doc_kwargs) |
|
def fillna( |
|
self, |
|
value: Hashable | Mapping | Series | DataFrame = None, |
|
*, |
|
method: FillnaOptions | None = None, |
|
axis: Axis | None = None, |
|
inplace: bool = False, |
|
limit: int | None = None, |
|
downcast: dict | None = None, |
|
) -> Series | None: |
|
return super().fillna( |
|
value=value, |
|
method=method, |
|
axis=axis, |
|
inplace=inplace, |
|
limit=limit, |
|
downcast=downcast, |
|
) |
|
|
|
def pop(self, item: Hashable) -> Any: |
|
«»» |
|
Return item and drops from series. Raise KeyError if not found. |
|
|
|
Parameters |
|
———- |
|
item : label |
|
Index of the element that needs to be removed. |
|
|
|
Returns |
|
——- |
|
Value that is popped from series. |
|
|
|
Examples |
|
——— |
|
>>> ser = pd.Series([1,2,3]) |
|
|
|
>>> ser.pop(0) |
|
1 |
|
|
|
>>> ser |
|
1 2 |
|
2 3 |
|
dtype: int64 |
|
«»» |
|
return super().pop(item=item) |
|
|
|
@overload |
|
def replace( |
|
self, |
|
to_replace=…, |
|
value=…, |
|
*, |
|
inplace: Literal[False] = …, |
|
limit: int | None = …, |
|
regex: bool = …, |
|
method: Literal[«pad», «ffill», «bfill»] | lib.NoDefault = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def replace( |
|
self, |
|
to_replace=…, |
|
value=…, |
|
*, |
|
inplace: Literal[True], |
|
limit: int | None = …, |
|
regex: bool = …, |
|
method: Literal[«pad», «ffill», «bfill»] | lib.NoDefault = …, |
|
) -> None: |
|
… |
|
|
|
@doc( |
|
NDFrame.replace, |
|
klass=_shared_doc_kwargs[«klass»], |
|
inplace=_shared_doc_kwargs[«inplace»], |
|
replace_iloc=_shared_doc_kwargs[«replace_iloc»], |
|
) |
|
def replace( |
|
self, |
|
to_replace=None, |
|
value=lib.no_default, |
|
*, |
|
inplace: bool = False, |
|
limit: int | None = None, |
|
regex: bool = False, |
|
method: Literal[«pad», «ffill», «bfill»] | lib.NoDefault = lib.no_default, |
|
) -> Series | None: |
|
return super().replace( |
|
to_replace=to_replace, |
|
value=value, |
|
inplace=inplace, |
|
limit=limit, |
|
regex=regex, |
|
method=method, |
|
) |
|
|
|
@doc(INFO_DOCSTRING, **series_sub_kwargs) |
|
def info( |
|
self, |
|
verbose: bool | None = None, |
|
buf: IO[str] | None = None, |
|
max_cols: int | None = None, |
|
memory_usage: bool | str | None = None, |
|
show_counts: bool = True, |
|
) -> None: |
|
return SeriesInfo(self, memory_usage).render( |
|
buf=buf, |
|
max_cols=max_cols, |
|
verbose=verbose, |
|
show_counts=show_counts, |
|
) |
|
|
|
def _replace_single(self, to_replace, method: str, inplace: bool, limit): |
|
«»» |
|
Replaces values in a Series using the fill method specified when no |
|
replacement value is given in the replace method |
|
«»» |
|
|
|
result = self if inplace else self.copy() |
|
|
|
values = result._values |
|
mask = missing.mask_missing(values, to_replace) |
|
|
|
if isinstance(values, ExtensionArray): |
|
# dispatch to the EA’s _pad_mask_inplace method |
|
values._fill_mask_inplace(method, limit, mask) |
|
else: |
|
fill_f = missing.get_fill_func(method) |
|
fill_f(values, limit=limit, mask=mask) |
|
|
|
if inplace: |
|
return |
|
return result |
|
|
|
# error: Cannot determine type of ‘shift’ |
|
@doc(NDFrame.shift, klass=_shared_doc_kwargs[«klass»]) # type: ignore[has-type] |
|
def shift( |
|
self, periods: int = 1, freq=None, axis: Axis = 0, fill_value: Hashable = None |
|
) -> Series: |
|
return super().shift( |
|
periods=periods, freq=freq, axis=axis, fill_value=fill_value |
|
) |
|
|
|
def memory_usage(self, index: bool = True, deep: bool = False) -> int: |
|
«»» |
|
Return the memory usage of the Series. |
|
|
|
The memory usage can optionally include the contribution of |
|
the index and of elements of `object` dtype. |
|
|
|
Parameters |
|
———- |
|
index : bool, default True |
|
Specifies whether to include the memory usage of the Series index. |
|
deep : bool, default False |
|
If True, introspect the data deeply by interrogating |
|
`object` dtypes for system-level memory consumption, and include |
|
it in the returned value. |
|
|
|
Returns |
|
——- |
|
int |
|
Bytes of memory consumed. |
|
|
|
See Also |
|
——— |
|
numpy.ndarray.nbytes : Total bytes consumed by the elements of the |
|
array. |
|
DataFrame.memory_usage : Bytes consumed by a DataFrame. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series(range(3)) |
|
>>> s.memory_usage() |
|
152 |
|
|
|
Not including the index gives the size of the rest of the data, which |
|
is necessarily smaller: |
|
|
|
>>> s.memory_usage(index=False) |
|
24 |
|
|
|
The memory footprint of `object` values is ignored by default: |
|
|
|
>>> s = pd.Series([«a», «b»]) |
|
>>> s.values |
|
array([‘a’, ‘b’], dtype=object) |
|
>>> s.memory_usage() |
|
144 |
|
>>> s.memory_usage(deep=True) |
|
244 |
|
«»» |
|
v = self._memory_usage(deep=deep) |
|
if index: |
|
v += self.index.memory_usage(deep=deep) |
|
return v |
|
|
|
def isin(self, values) -> Series: |
|
«»» |
|
Whether elements in Series are contained in `values`. |
|
|
|
Return a boolean Series showing whether each element in the Series |
|
matches an element in the passed sequence of `values` exactly. |
|
|
|
Parameters |
|
———- |
|
values : set or list-like |
|
The sequence of values to test. Passing in a single string will |
|
raise a «TypeError«. Instead, turn a single string into a |
|
list of one element. |
|
|
|
Returns |
|
——- |
|
Series |
|
Series of booleans indicating if each element is in values. |
|
|
|
Raises |
|
—— |
|
TypeError |
|
* If `values` is a string |
|
|
|
See Also |
|
——— |
|
DataFrame.isin : Equivalent method on DataFrame. |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([‘lama’, ‘cow’, ‘lama’, ‘beetle’, ‘lama’, |
|
… ‘hippo’], name=’animal’) |
|
>>> s.isin([‘cow’, ‘lama’]) |
|
0 True |
|
1 True |
|
2 True |
|
3 False |
|
4 True |
|
5 False |
|
Name: animal, dtype: bool |
|
|
|
To invert the boolean values, use the «~« operator: |
|
|
|
>>> ~s.isin([‘cow’, ‘lama’]) |
|
0 False |
|
1 False |
|
2 False |
|
3 True |
|
4 False |
|
5 True |
|
Name: animal, dtype: bool |
|
|
|
Passing a single string as «s.isin(‘lama’)« will raise an error. Use |
|
a list of one element instead: |
|
|
|
>>> s.isin([‘lama’]) |
|
0 True |
|
1 False |
|
2 True |
|
3 False |
|
4 True |
|
5 False |
|
Name: animal, dtype: bool |
|
|
|
Strings and integers are distinct and are therefore not comparable: |
|
|
|
>>> pd.Series([1]).isin([‘1’]) |
|
0 False |
|
dtype: bool |
|
>>> pd.Series([1.1]).isin([‘1.1’]) |
|
0 False |
|
dtype: bool |
|
«»» |
|
result = algorithms.isin(self._values, values) |
|
return self._constructor(result, index=self.index).__finalize__( |
|
self, method=«isin» |
|
) |
|
|
|
def between( |
|
self, |
|
left, |
|
right, |
|
inclusive: Literal[«both», «neither», «left», «right»] = «both», |
|
) -> Series: |
|
«»» |
|
Return boolean Series equivalent to left <= series <= right. |
|
|
|
This function returns a boolean vector containing `True` wherever the |
|
corresponding Series element is between the boundary values `left` and |
|
`right`. NA values are treated as `False`. |
|
|
|
Parameters |
|
———- |
|
left : scalar or list-like |
|
Left boundary. |
|
right : scalar or list-like |
|
Right boundary. |
|
inclusive : {«both», «neither», «left», «right»} |
|
Include boundaries. Whether to set each bound as closed or open. |
|
|
|
.. versionchanged:: 1.3.0 |
|
|
|
Returns |
|
——- |
|
Series |
|
Series representing whether each element is between left and |
|
right (inclusive). |
|
|
|
See Also |
|
——— |
|
Series.gt : Greater than of series and other. |
|
Series.lt : Less than of series and other. |
|
|
|
Notes |
|
—— |
|
This function is equivalent to «(left <= ser) & (ser <= right)« |
|
|
|
Examples |
|
——— |
|
>>> s = pd.Series([2, 0, 4, 8, np.nan]) |
|
|
|
Boundary values are included by default: |
|
|
|
>>> s.between(1, 4) |
|
0 True |
|
1 False |
|
2 True |
|
3 False |
|
4 False |
|
dtype: bool |
|
|
|
With `inclusive` set to «»neither»« boundary values are excluded: |
|
|
|
>>> s.between(1, 4, inclusive=»neither») |
|
0 True |
|
1 False |
|
2 False |
|
3 False |
|
4 False |
|
dtype: bool |
|
|
|
`left` and `right` can be any scalar value: |
|
|
|
>>> s = pd.Series([‘Alice’, ‘Bob’, ‘Carol’, ‘Eve’]) |
|
>>> s.between(‘Anna’, ‘Daniel’) |
|
0 False |
|
1 True |
|
2 True |
|
3 False |
|
dtype: bool |
|
«»» |
|
if inclusive == «both»: |
|
lmask = self >= left |
|
rmask = self <= right |
|
elif inclusive == «left»: |
|
lmask = self >= left |
|
rmask = self < right |
|
elif inclusive == «right»: |
|
lmask = self > left |
|
rmask = self <= right |
|
elif inclusive == «neither»: |
|
lmask = self > left |
|
rmask = self < right |
|
else: |
|
raise ValueError( |
|
«Inclusive has to be either string of ‘both’,» |
|
«‘left’, ‘right’, or ‘neither’.» |
|
) |
|
|
|
return lmask & rmask |
|
|
|
# ———————————————————————- |
|
# Convert to types that support pd.NA |
|
|
|
def _convert_dtypes( |
|
self, |
|
infer_objects: bool = True, |
|
convert_string: bool = True, |
|
convert_integer: bool = True, |
|
convert_boolean: bool = True, |
|
convert_floating: bool = True, |
|
) -> Series: |
|
input_series = self |
|
if infer_objects: |
|
input_series = input_series.infer_objects() |
|
if is_object_dtype(input_series): |
|
input_series = input_series.copy() |
|
|
|
if convert_string or convert_integer or convert_boolean or convert_floating: |
|
dtype_backend = get_option(«mode.dtype_backend») |
|
inferred_dtype = convert_dtypes( |
|
input_series._values, |
|
convert_string, |
|
convert_integer, |
|
convert_boolean, |
|
convert_floating, |
|
infer_objects, |
|
dtype_backend, |
|
) |
|
result = input_series.astype(inferred_dtype) |
|
else: |
|
result = input_series.copy() |
|
return result |
|
|
|
# error: Cannot determine type of ‘isna’ |
|
# error: Return type «Series» of «isna» incompatible with return type «ndarray |
|
# [Any, dtype[bool_]]» in supertype «IndexOpsMixin» |
|
@doc(NDFrame.isna, klass=_shared_doc_kwargs[«klass»]) # type: ignore[has-type] |
|
def isna(self) -> Series: # type: ignore[override] |
|
return NDFrame.isna(self) |
|
|
|
# error: Cannot determine type of ‘isna’ |
|
@doc(NDFrame.isna, klass=_shared_doc_kwargs[«klass»]) # type: ignore[has-type] |
|
def isnull(self) -> Series: |
|
«»» |
|
Series.isnull is an alias for Series.isna. |
|
«»» |
|
return super().isnull() |
|
|
|
# error: Cannot determine type of ‘notna’ |
|
@doc(NDFrame.notna, klass=_shared_doc_kwargs[«klass»]) # type: ignore[has-type] |
|
def notna(self) -> Series: |
|
return super().notna() |
|
|
|
# error: Cannot determine type of ‘notna’ |
|
@doc(NDFrame.notna, klass=_shared_doc_kwargs[«klass»]) # type: ignore[has-type] |
|
def notnull(self) -> Series: |
|
«»» |
|
Series.notnull is an alias for Series.notna. |
|
«»» |
|
return super().notnull() |
|
|
|
@overload |
|
def dropna( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
inplace: Literal[False] = …, |
|
how: AnyAll | None = …, |
|
ignore_index: bool = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def dropna( |
|
self, |
|
*, |
|
axis: Axis = …, |
|
inplace: Literal[True], |
|
how: AnyAll | None = …, |
|
ignore_index: bool = …, |
|
) -> None: |
|
… |
|
|
|
def dropna( |
|
self, |
|
*, |
|
axis: Axis = 0, |
|
inplace: bool = False, |
|
how: AnyAll | None = None, |
|
ignore_index: bool = False, |
|
) -> Series | None: |
|
«»» |
|
Return a new Series with missing values removed. |
|
|
|
See the :ref:`User Guide <missing_data>` for more on which values are |
|
considered missing, and how to work with missing data. |
|
|
|
Parameters |
|
———- |
|
axis : {0 or ‘index’} |
|
Unused. Parameter needed for compatibility with DataFrame. |
|
inplace : bool, default False |
|
If True, do operation inplace and return None. |
|
how : str, optional |
|
Not in use. Kept for compatibility. |
|
ignore_index : bool, default «False« |
|
If «True«, the resulting axis will be labeled 0, 1, …, n — 1. |
|
|
|
.. versionadded:: 2.0.0 |
|
|
|
Returns |
|
——- |
|
Series or None |
|
Series with NA entries dropped from it or None if «inplace=True«. |
|
|
|
See Also |
|
——— |
|
Series.isna: Indicate missing values. |
|
Series.notna : Indicate existing (non-missing) values. |
|
Series.fillna : Replace missing values. |
|
DataFrame.dropna : Drop rows or columns which contain NA values. |
|
Index.dropna : Drop missing indices. |
|
|
|
Examples |
|
——— |
|
>>> ser = pd.Series([1., 2., np.nan]) |
|
>>> ser |
|
0 1.0 |
|
1 2.0 |
|
2 NaN |
|
dtype: float64 |
|
|
|
Drop NA values from a Series. |
|
|
|
>>> ser.dropna() |
|
0 1.0 |
|
1 2.0 |
|
dtype: float64 |
|
|
|
Empty strings are not considered NA values. «None« is considered an |
|
NA value. |
|
|
|
>>> ser = pd.Series([np.NaN, 2, pd.NaT, », None, ‘I stay’]) |
|
>>> ser |
|
0 NaN |
|
1 2 |
|
2 NaT |
|
3 |
|
4 None |
|
5 I stay |
|
dtype: object |
|
>>> ser.dropna() |
|
1 2 |
|
3 |
|
5 I stay |
|
dtype: object |
|
«»» |
|
inplace = validate_bool_kwarg(inplace, «inplace») |
|
ignore_index = validate_bool_kwarg(ignore_index, «ignore_index») |
|
# Validate the axis parameter |
|
self._get_axis_number(axis or 0) |
|
|
|
if self._can_hold_na: |
|
result = remove_na_arraylike(self) |
|
else: |
|
if not inplace: |
|
result = self.copy(deep=None) |
|
else: |
|
result = self |
|
|
|
if ignore_index: |
|
result.index = default_index(len(result)) |
|
|
|
if inplace: |
|
return self._update_inplace(result) |
|
else: |
|
return result |
|
|
|
# ———————————————————————- |
|
# Time series-oriented methods |
|
|
|
# error: Cannot determine type of ‘asfreq’ |
|
@doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type] |
|
def asfreq( |
|
self, |
|
freq: Frequency, |
|
method: FillnaOptions | None = None, |
|
how: str | None = None, |
|
normalize: bool = False, |
|
fill_value: Hashable = None, |
|
) -> Series: |
|
return super().asfreq( |
|
freq=freq, |
|
method=method, |
|
how=how, |
|
normalize=normalize, |
|
fill_value=fill_value, |
|
) |
|
|
|
# error: Cannot determine type of ‘resample’ |
|
@doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type] |
|
def resample( |
|
self, |
|
rule, |
|
axis: Axis = 0, |
|
closed: str | None = None, |
|
label: str | None = None, |
|
convention: str = «start», |
|
kind: str | None = None, |
|
on: Level = None, |
|
level: Level = None, |
|
origin: str | TimestampConvertibleTypes = «start_day», |
|
offset: TimedeltaConvertibleTypes | None = None, |
|
group_keys: bool | lib.NoDefault = no_default, |
|
) -> Resampler: |
|
return super().resample( |
|
rule=rule, |
|
axis=axis, |
|
closed=closed, |
|
label=label, |
|
convention=convention, |
|
kind=kind, |
|
on=on, |
|
level=level, |
|
origin=origin, |
|
offset=offset, |
|
group_keys=group_keys, |
|
) |
|
|
|
def to_timestamp( |
|
self, |
|
freq=None, |
|
how: Literal[«s», «e», «start», «end»] = «start», |
|
copy: bool | None = None, |
|
) -> Series: |
|
«»» |
|
Cast to DatetimeIndex of Timestamps, at *beginning* of period. |
|
|
|
Parameters |
|
———- |
|
freq : str, default frequency of PeriodIndex |
|
Desired frequency. |
|
how : {‘s’, ‘e’, ‘start’, ‘end’} |
|
Convention for converting period to timestamp; start of period |
|
vs. end. |
|
copy : bool, default True |
|
Whether or not to return a copy. |
|
|
|
Returns |
|
——- |
|
Series with DatetimeIndex |
|
«»» |
|
if not isinstance(self.index, PeriodIndex): |
|
raise TypeError(f»unsupported Type {type(self.index).__name__}«) |
|
|
|
new_obj = self.copy(deep=copy) |
|
new_index = self.index.to_timestamp(freq=freq, how=how) |
|
setattr(new_obj, «index», new_index) |
|
return new_obj |
|
|
|
def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series: |
|
«»» |
|
Convert Series from DatetimeIndex to PeriodIndex. |
|
|
|
Parameters |
|
———- |
|
freq : str, default None |
|
Frequency associated with the PeriodIndex. |
|
copy : bool, default True |
|
Whether or not to return a copy. |
|
|
|
Returns |
|
——- |
|
Series |
|
Series with index converted to PeriodIndex. |
|
«»» |
|
if not isinstance(self.index, DatetimeIndex): |
|
raise TypeError(f»unsupported Type {type(self.index).__name__}«) |
|
|
|
new_obj = self.copy(deep=copy) |
|
new_index = self.index.to_period(freq=freq) |
|
setattr(new_obj, «index», new_index) |
|
return new_obj |
|
|
|
@overload |
|
def ffill( |
|
self, |
|
*, |
|
axis: None | Axis = …, |
|
inplace: Literal[False] = …, |
|
limit: None | int = …, |
|
downcast: dict | None = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def ffill( |
|
self, |
|
*, |
|
axis: None | Axis = …, |
|
inplace: Literal[True], |
|
limit: None | int = …, |
|
downcast: dict | None = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def ffill( |
|
self, |
|
*, |
|
axis: None | Axis = …, |
|
inplace: bool = …, |
|
limit: None | int = …, |
|
downcast: dict | None = …, |
|
) -> Series | None: |
|
… |
|
|
|
def ffill( |
|
self, |
|
*, |
|
axis: None | Axis = None, |
|
inplace: bool = False, |
|
limit: None | int = None, |
|
downcast: dict | None = None, |
|
) -> Series | None: |
|
return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) |
|
|
|
@overload |
|
def bfill( |
|
self, |
|
*, |
|
axis: None | Axis = …, |
|
inplace: Literal[False] = …, |
|
limit: None | int = …, |
|
downcast: dict | None = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def bfill( |
|
self, |
|
*, |
|
axis: None | Axis = …, |
|
inplace: Literal[True], |
|
limit: None | int = …, |
|
downcast: dict | None = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def bfill( |
|
self, |
|
*, |
|
axis: None | Axis = …, |
|
inplace: bool = …, |
|
limit: None | int = …, |
|
downcast: dict | None = …, |
|
) -> Series | None: |
|
… |
|
|
|
def bfill( |
|
self, |
|
*, |
|
axis: None | Axis = None, |
|
inplace: bool = False, |
|
limit: None | int = None, |
|
downcast: dict | None = None, |
|
) -> Series | None: |
|
return super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast) |
|
|
|
def clip( |
|
self: Series, |
|
lower=None, |
|
upper=None, |
|
*, |
|
axis: Axis | None = None, |
|
inplace: bool = False, |
|
**kwargs, |
|
) -> Series | None: |
|
return super().clip(lower, upper, axis=axis, inplace=inplace, **kwargs) |
|
|
|
def interpolate( |
|
self: Series, |
|
method: str = «linear», |
|
*, |
|
axis: Axis = 0, |
|
limit: int | None = None, |
|
inplace: bool = False, |
|
limit_direction: str | None = None, |
|
limit_area: str | None = None, |
|
downcast: str | None = None, |
|
**kwargs, |
|
) -> Series | None: |
|
return super().interpolate( |
|
method=method, |
|
axis=axis, |
|
limit=limit, |
|
inplace=inplace, |
|
limit_direction=limit_direction, |
|
limit_area=limit_area, |
|
downcast=downcast, |
|
**kwargs, |
|
) |
|
|
|
@overload |
|
def where( |
|
self, |
|
cond, |
|
other=…, |
|
*, |
|
inplace: Literal[False] = …, |
|
axis: Axis | None = …, |
|
level: Level = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def where( |
|
self, |
|
cond, |
|
other=…, |
|
*, |
|
inplace: Literal[True], |
|
axis: Axis | None = …, |
|
level: Level = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def where( |
|
self, |
|
cond, |
|
other=…, |
|
*, |
|
inplace: bool = …, |
|
axis: Axis | None = …, |
|
level: Level = …, |
|
) -> Series | None: |
|
… |
|
|
|
def where( |
|
self, |
|
cond, |
|
other=lib.no_default, |
|
*, |
|
inplace: bool = False, |
|
axis: Axis | None = None, |
|
level: Level = None, |
|
) -> Series | None: |
|
return super().where( |
|
cond, |
|
other, |
|
inplace=inplace, |
|
axis=axis, |
|
level=level, |
|
) |
|
|
|
@overload |
|
def mask( |
|
self, |
|
cond, |
|
other=…, |
|
*, |
|
inplace: Literal[False] = …, |
|
axis: Axis | None = …, |
|
level: Level = …, |
|
) -> Series: |
|
… |
|
|
|
@overload |
|
def mask( |
|
self, |
|
cond, |
|
other=…, |
|
*, |
|
inplace: Literal[True], |
|
axis: Axis | None = …, |
|
level: Level = …, |
|
) -> None: |
|
… |
|
|
|
@overload |
|
def mask( |
|
self, |
|
cond, |
|
other=…, |
|
*, |
|
inplace: bool = …, |
|
axis: Axis | None = …, |
|
level: Level = …, |
|
) -> Series | None: |
|
… |
|
|
|
def mask( |
|
self, |
|
cond, |
|
other=lib.no_default, |
|
*, |
|
inplace: bool = False, |
|
axis: Axis | None = None, |
|
level: Level = None, |
|
) -> Series | None: |
|
return super().mask( |
|
cond, |
|
other, |
|
inplace=inplace, |
|
axis=axis, |
|
level=level, |
|
) |
|
|
|
# ———————————————————————- |
|
# Add index |
|
_AXIS_ORDERS: list[Literal[«index», «columns»]] = [«index»] |
|
_AXIS_LEN = len(_AXIS_ORDERS) |
|
_info_axis_number: Literal[0] = 0 |
|
_info_axis_name: Literal[«index»] = «index» |
|
|
|
index = properties.AxisProperty( |
|
axis=0, doc=«The index (axis labels) of the Series.» |
|
) |
|
|
|
# ———————————————————————- |
|
# Accessor Methods |
|
# ———————————————————————- |
|
str = CachedAccessor(«str», StringMethods) |
|
dt = CachedAccessor(«dt», CombinedDatetimelikeProperties) |
|
cat = CachedAccessor(«cat», CategoricalAccessor) |
|
plot = CachedAccessor(«plot», pandas.plotting.PlotAccessor) |
|
sparse = CachedAccessor(«sparse», SparseAccessor) |
|
|
|
# ———————————————————————- |
|
# Add plotting methods to Series |
|
hist = pandas.plotting.hist_series |
|
|
|
# ———————————————————————- |
|
# Template-Based Arithmetic/Comparison Methods |
|
|
|
def _cmp_method(self, other, op): |
|
res_name = ops.get_op_result_name(self, other) |
|
|
|
if isinstance(other, Series) and not self._indexed_same(other): |
|
raise ValueError(«Can only compare identically-labeled Series objects») |
|
|
|
lvalues = self._values |
|
rvalues = extract_array(other, extract_numpy=True, extract_range=True) |
|
|
|
with np.errstate(all=«ignore»): |
|
res_values = ops.comparison_op(lvalues, rvalues, op) |
|
|
|
return self._construct_result(res_values, name=res_name) |
|
|
|
def _logical_method(self, other, op): |
|
res_name = ops.get_op_result_name(self, other) |
|
self, other = ops.align_method_SERIES(self, other, align_asobject=True) |
|
|
|
lvalues = self._values |
|
rvalues = extract_array(other, extract_numpy=True, extract_range=True) |
|
|
|
res_values = ops.logical_op(lvalues, rvalues, op) |
|
return self._construct_result(res_values, name=res_name) |
|
|
|
def _arith_method(self, other, op): |
|
self, other = ops.align_method_SERIES(self, other) |
|
return base.IndexOpsMixin._arith_method(self, other, op) |
|
|
|
|
|
Series._add_numeric_operations() |
|
|
|
# Add arithmetic! |
|
ops.add_flex_arithmetic_methods(Series) |