@@ -1811,6 +1811,28 @@ def to_numpy(
18111811
18121812 return result
18131813
1814+ def _create_data_for_split_and_tight_to_dict (
1815+ self , are_all_object_dtype_cols : bool , object_dtype_indices : list [int ]
1816+ ) -> list :
1817+ """
1818+ Simple helper method to create data for to ``to_dict(orient="split")`` and
1819+ ``to_dict(orient="tight")`` to create the main output data
1820+ """
1821+ if are_all_object_dtype_cols :
1822+ data = [
1823+ list (map (maybe_box_native , t ))
1824+ for t in self .itertuples (index = False , name = None )
1825+ ]
1826+ else :
1827+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1828+ if object_dtype_indices :
1829+ # If we have object_dtype_cols, apply maybe_box_naive after list
1830+ # comprehension for perf
1831+ for row in data :
1832+ for i in object_dtype_indices :
1833+ row [i ] = maybe_box_native (row [i ])
1834+ return data
1835+
18141836 @overload
18151837 def to_dict (
18161838 self ,
@@ -1950,30 +1972,50 @@ def to_dict(
19501972 "'index=False' is only valid when 'orient' is 'split' or 'tight'"
19511973 )
19521974
1975+ if orient == "series" :
1976+ # GH46470 Return quickly if orient series to avoid creating dtype objects
1977+ return into_c ((k , v ) for k , v in self .items ())
1978+
1979+ object_dtype_indices = [
1980+ i
1981+ for i , col_dtype in enumerate (self .dtypes .values )
1982+ if is_object_dtype (col_dtype )
1983+ ]
1984+ are_all_object_dtype_cols = len (object_dtype_indices ) == len (self .dtypes )
1985+
19531986 if orient == "dict" :
19541987 return into_c ((k , v .to_dict (into )) for k , v in self .items ())
19551988
19561989 elif orient == "list" :
1990+ object_dtype_indices_as_set = set (object_dtype_indices )
19571991 return into_c (
1958- (k , list (map (maybe_box_native , v .tolist ()))) for k , v in self .items ()
1992+ (
1993+ k ,
1994+ list (map (maybe_box_native , v .tolist ()))
1995+ if i in object_dtype_indices_as_set
1996+ else v .tolist (),
1997+ )
1998+ for i , (k , v ) in enumerate (self .items ())
19591999 )
19602000
19612001 elif orient == "split" :
2002+ data = self ._create_data_for_split_and_tight_to_dict (
2003+ are_all_object_dtype_cols , object_dtype_indices
2004+ )
2005+
19622006 return into_c (
19632007 ((("index" , self .index .tolist ()),) if index else ())
19642008 + (
19652009 ("columns" , self .columns .tolist ()),
1966- (
1967- "data" ,
1968- [
1969- list (map (maybe_box_native , t ))
1970- for t in self .itertuples (index = False , name = None )
1971- ],
1972- ),
2010+ ("data" , data ),
19732011 )
19742012 )
19752013
19762014 elif orient == "tight" :
2015+ data = self ._create_data_for_split_and_tight_to_dict (
2016+ are_all_object_dtype_cols , object_dtype_indices
2017+ )
2018+
19772019 return into_c (
19782020 ((("index" , self .index .tolist ()),) if index else ())
19792021 + (
@@ -1990,26 +2032,65 @@ def to_dict(
19902032 + (("column_names" , list (self .columns .names )),)
19912033 )
19922034
1993- elif orient == "series" :
1994- return into_c ((k , v ) for k , v in self .items ())
1995-
19962035 elif orient == "records" :
19972036 columns = self .columns .tolist ()
1998- rows = (
1999- dict (zip (columns , row ))
2000- for row in self .itertuples (index = False , name = None )
2001- )
2002- return [
2003- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
2004- ]
2037+ if are_all_object_dtype_cols :
2038+ rows = (
2039+ dict (zip (columns , row ))
2040+ for row in self .itertuples (index = False , name = None )
2041+ )
2042+ return [
2043+ into_c ((k , maybe_box_native (v )) for k , v in row .items ())
2044+ for row in rows
2045+ ]
2046+ else :
2047+ data = [
2048+ into_c (zip (columns , t ))
2049+ for t in self .itertuples (index = False , name = None )
2050+ ]
2051+ if object_dtype_indices :
2052+ object_dtype_indices_as_set = set (object_dtype_indices )
2053+ object_dtype_cols = {
2054+ col
2055+ for i , col in enumerate (self .columns )
2056+ if i in object_dtype_indices_as_set
2057+ }
2058+ for row in data :
2059+ for col in object_dtype_cols :
2060+ row [col ] = maybe_box_native (row [col ])
2061+ return data
20052062
20062063 elif orient == "index" :
20072064 if not self .index .is_unique :
20082065 raise ValueError ("DataFrame index must be unique for orient='index'." )
2009- return into_c (
2010- (t [0 ], dict (zip (self .columns , map (maybe_box_native , t [1 :]))))
2011- for t in self .itertuples (name = None )
2012- )
2066+ columns = self .columns .tolist ()
2067+ if are_all_object_dtype_cols :
2068+ return into_c (
2069+ (t [0 ], dict (zip (self .columns , map (maybe_box_native , t [1 :]))))
2070+ for t in self .itertuples (name = None )
2071+ )
2072+ elif object_dtype_indices :
2073+ object_dtype_indices_as_set = set (object_dtype_indices )
2074+ is_object_dtype_by_index = [
2075+ i in object_dtype_indices_as_set for i in range (len (self .columns ))
2076+ ]
2077+ return into_c (
2078+ (
2079+ t [0 ],
2080+ {
2081+ columns [i ]: maybe_box_native (v )
2082+ if is_object_dtype_by_index [i ]
2083+ else v
2084+ for i , v in enumerate (t [1 :])
2085+ },
2086+ )
2087+ for t in self .itertuples (name = None )
2088+ )
2089+ else :
2090+ return into_c (
2091+ (t [0 ], dict (zip (self .columns , t [1 :])))
2092+ for t in self .itertuples (name = None )
2093+ )
20132094
20142095 else :
20152096 raise ValueError (f"orient '{ orient } ' not understood" )
0 commit comments