Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions bigframes/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -2033,6 +2033,22 @@ def nsmallest(
column_ids = self._sql_names(columns)
return DataFrame(block_ops.nsmallest(self._block, n, column_ids, keep=keep))

def squeeze(self, axis: typing.Optional[typing.Union[int, str]] = None):
nrows, ncols = self.shape
squeeze_cols = True
squeeze_rows = True
if axis is not None:
axis_n = utils.get_axis_number(axis)
squeeze_cols = axis_n == 1
squeeze_rows = axis_n == 0
if (ncols == 1) and (nrows == 1) and (squeeze_rows and squeeze_cols):
return self.to_pandas().iloc[0, 0]
elif ncols == 1 and squeeze_cols:
return bigframes.series.Series(self._block)
elif nrows == 1 and squeeze_rows:
return bigframes.series.Series(self._block.transpose(single_row_mode=True))
return self

def insert(
self,
loc: int,
Expand Down
5 changes: 5 additions & 0 deletions bigframes/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,11 @@ def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
block_ops.nsmallest(self._block, n, [self._value_column], keep=keep)
)

def squeeze(self, axis=None):
if len(self) == 1:
return self.to_pandas().iloc[0]
return self

def isin(self, values) -> "Series":
if isinstance(values, Series):
return Series(self._block.isin(values._block))
Expand Down
51 changes: 51 additions & 0 deletions tests/unit/test_dataframe_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -1785,6 +1785,57 @@ def test_dataframe_sort_index_inplace(scalars_dfs):
pandas.testing.assert_frame_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("axis",),
((0,), ("columns",), (None,)),
)
def test_dataframe_squeeze_noop(scalars_dfs, axis):
scalars_df, scalars_pandas_df = scalars_dfs

pd_result = scalars_pandas_df.squeeze(axis=axis)
bf_result = scalars_df.squeeze(axis=axis).to_pandas()

pandas.testing.assert_frame_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("axis",),
((1,), (None,)),
)
def test_dataframe_squeeze_cols(scalars_dfs, axis):
scalars_df, scalars_pandas_df = scalars_dfs

pd_result = scalars_pandas_df[["int64_col"]].squeeze(axis)
bf_result = scalars_df[["int64_col"]].squeeze(axis).to_pandas()

pandas.testing.assert_series_equal(bf_result, pd_result)


@pytest.mark.parametrize(
("axis",),
((0,), (None,)),
)
def test_dataframe_squeeze_rows(scalars_dfs, axis):
scalars_df, scalars_pandas_df = scalars_dfs

# implicitly transposes, so col types need to be compatible
pd_result = scalars_pandas_df[["int64_col", "int64_too"]].head(1).squeeze(axis)
bf_result = scalars_df[["int64_col", "int64_too"]].head(1).squeeze(axis).to_pandas()

pandas.testing.assert_series_equal(bf_result, pd_result, check_index_type=False)


def test_dataframe_squeeze_both_axes(
scalars_dfs,
):
scalars_df, scalars_pandas_df = scalars_dfs

pd_result = scalars_pandas_df[["int64_col"]].head(1).squeeze()
bf_result = scalars_df[["int64_col"]].head(1).squeeze()

assert pd_result == bf_result


def test_df_abs(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs
columns = ["int64_col", "int64_too", "float64_col"]
Expand Down
19 changes: 19 additions & 0 deletions tests/unit/test_series_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,25 @@ def test_series_equals_different_values(scalars_df_index, scalars_pandas_df_inde
assert pd_result == bf_result


def test_series_squeeze_noop(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs

pd_result = scalars_pandas_df["int64_too"].squeeze()
bf_result = scalars_df["int64_too"].squeeze().to_pandas()

assert_series_equal(bf_result, pd_result)


def test_series_squeeze_squeezes(scalars_dfs):
scalars_df, scalars_pandas_df = scalars_dfs

# implicitly transposes, so col types need to be compatible
pd_result = scalars_pandas_df["int64_too"].head(1).squeeze()
bf_result = scalars_df["int64_too"].head(1).squeeze()

assert pd_result == bf_result


def test_series_get_with_default_index(scalars_dfs):
col_name = "float64_col"
key = 2
Expand Down
102 changes: 102 additions & 0 deletions third_party/bigframes_vendored/pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,108 @@ def __iter__(self) -> Iterator:
"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

def squeeze(self, axis=None):
"""
Squeeze 1 dimensional axis objects into scalars.

Series or DataFrames with a single element are squeezed to a scalar.
DataFrames with a single column or a single row are squeezed to a
Series. Otherwise the object is unchanged.

This method is most useful when you don't know if your
object is a Series or DataFrame, but you do know it has just a single
column. In that case you can safely call `squeeze` to ensure you have a
Series.

**Examples:**
>>> primes = bpd.Series([2, 3, 5, 7])

Slicing might produce a Series with a single value:

>>> even_primes = primes[primes % 2 == 0]
>>> even_primes
0 2
dtype: Int64

>>> even_primes.squeeze()
np.int64(2)

Squeezing objects with more than one value in every axis does nothing:

>>> odd_primes = primes[primes % 2 == 1]
>>> odd_primes
1 3
2 5
3 7
dtype: Int64

>>> odd_primes.squeeze()
1 3
2 5
3 7
dtype: Int64

Squeezing is even more effective when used with DataFrames.

>>> df = bpd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
>>> df
a b
0 1 2
1 3 4
<BLANKLINE>
[2 rows x 2 columns]

Slicing a single column will produce a DataFrame with the columns
having only one value:

>>> df_a = df[['a']]
>>> df_a
a
0 1
1 3
<BLANKLINE>
[2 rows x 1 columns]

So the columns can be squeezed down, resulting in a Series:

>>> df_a.squeeze('columns')
0 1
1 3
Name: a, dtype: Int64

Slicing a single row from a single column will produce a single
scalar DataFrame:

>>> df_0a = df.loc[[0], ['a']]
>>> df_0a
a
0 1
<BLANKLINE>
[1 rows x 1 columns]

Squeezing the rows produces a single scalar Series:

>>> df_0a.squeeze('rows')
a 1
Name: 0, dtype: Int64

Squeezing all axes will project directly into a scalar:

>>> df_0a.squeeze()
np.int64(1)

Args:
axis ({0 or 'index', 1 or 'columns', None}, default None):
A specific axis to squeeze. By default, all length-1 axes are
squeezed. For `Series` this parameter is unused and defaults to `None`.

Returns:
DataFrame, Series, or scalar:
The projection after squeezing `axis` or all the axes.

"""
raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)

# -------------------------------------------------------------------------
# Unary Methods

Expand Down