@@ -1775,6 +1775,128 @@ def to_numpy(
17751775
17761776 return result
17771777
1778+ def _to_dict_helper (self , orient , into_c , into ):
1779+ """Helper function to do main work to convert frame into dict based on
1780+ `orient` and `into`
1781+
1782+ As part of GH46470 also takes care in when to use maybe_box_native as this
1783+ function can perform badly and is not necessary for non object cols
1784+ """
1785+ object_dtype_cols = {
1786+ col for col , dtype in self .dtypes .items () if is_object_dtype (dtype )
1787+ }
1788+ if orient == "dict" :
1789+ return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1790+ elif orient == "list" :
1791+ return into_c (
1792+ (
1793+ k ,
1794+ list (map (maybe_box_native , v .tolist ()))
1795+ if k in object_dtype_cols
1796+ else v .tolist (),
1797+ )
1798+ for k , v in self .items ()
1799+ )
1800+ elif orient == "split" :
1801+ if object_dtype_cols :
1802+ is_object_dtype_by_index = [
1803+ col in object_dtype_cols for col in self .columns
1804+ ]
1805+ data = [
1806+ [
1807+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1808+ for i , v in enumerate (t )
1809+ ]
1810+ for t in self .itertuples (index = False , name = None )
1811+ ]
1812+ else :
1813+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1814+ return into_c (
1815+ (
1816+ ("index" , self .index .tolist ()),
1817+ ("columns" , self .columns .tolist ()),
1818+ ("data" , data ),
1819+ )
1820+ )
1821+ elif orient == "series" :
1822+ return into_c ((k , v ) for k , v in self .items ())
1823+ elif orient == "records" :
1824+ columns = self .columns .tolist ()
1825+ if object_dtype_cols :
1826+ is_object_dtype_by_index = [col in object_dtype_cols for col in columns ]
1827+ return [
1828+ into_c (
1829+ zip (
1830+ columns ,
1831+ [
1832+ maybe_box_native (v )
1833+ if is_object_dtype_by_index [i ]
1834+ else v
1835+ for i , v in enumerate (t )
1836+ ],
1837+ )
1838+ )
1839+ for t in self .itertuples (index = False , name = None )
1840+ ]
1841+ else :
1842+ return [
1843+ into_c (zip (columns , t ))
1844+ for t in self .itertuples (index = False , name = None )
1845+ ]
1846+ elif orient == "index" :
1847+ if not self .index .is_unique :
1848+ raise ValueError ("DataFrame index must be unique for orient='index'." )
1849+ columns = self .columns .tolist ()
1850+ if object_dtype_cols :
1851+ is_object_dtype_by_index = [
1852+ col in object_dtype_cols for col in self .columns
1853+ ]
1854+ return into_c (
1855+ (
1856+ t [0 ],
1857+ {
1858+ columns [i ]: maybe_box_native (v )
1859+ if is_object_dtype_by_index [i ]
1860+ else v
1861+ for i , v in enumerate (t [1 :])
1862+ },
1863+ )
1864+ for t in self .itertuples (name = None )
1865+ )
1866+ else :
1867+ return into_c (
1868+ (
1869+ t [0 ],
1870+ {columns [i ]: v for i , v in enumerate (t [1 :])},
1871+ )
1872+ for t in self .itertuples (name = None )
1873+ )
1874+ elif orient == "tight" :
1875+ if object_dtype_cols :
1876+ is_object_dtype_by_index = [
1877+ col in object_dtype_cols for col in self .columns
1878+ ]
1879+ data = [
1880+ [
1881+ maybe_box_native (v ) if is_object_dtype_by_index [i ] else v
1882+ for i , v in enumerate (t )
1883+ ]
1884+ for t in self .itertuples (index = False , name = None )
1885+ ]
1886+ else :
1887+ data = [list (t ) for t in self .itertuples (index = False , name = None )]
1888+ return into_c (
1889+ (
1890+ ("index" , self .index .tolist ()),
1891+ ("columns" , self .columns .tolist ()),
1892+ ("data" , data ),
1893+ ("index_names" , list (self .index .names )),
1894+ ("column_names" , list (self .columns .names )),
1895+ )
1896+ )
1897+ else :
1898+ raise ValueError (f"orient '{ orient } ' not understood" )
1899+
17781900 def to_dict (self , orient : str = "dict" , into = dict ):
17791901 """
17801902 Convert the DataFrame to a dictionary.
@@ -1913,67 +2035,7 @@ def to_dict(self, orient: str = "dict", into=dict):
19132035 elif orient .startswith ("i" ):
19142036 orient = "index"
19152037
1916- if orient == "dict" :
1917- return into_c ((k , v .to_dict (into )) for k , v in self .items ())
1918-
1919- elif orient == "list" :
1920- return into_c ((k , v .tolist ()) for k , v in self .items ())
1921-
1922- elif orient == "split" :
1923- return into_c (
1924- (
1925- ("index" , self .index .tolist ()),
1926- ("columns" , self .columns .tolist ()),
1927- (
1928- "data" ,
1929- [
1930- list (map (maybe_box_native , t ))
1931- for t in self .itertuples (index = False , name = None )
1932- ],
1933- ),
1934- )
1935- )
1936-
1937- elif orient == "tight" :
1938- return into_c (
1939- (
1940- ("index" , self .index .tolist ()),
1941- ("columns" , self .columns .tolist ()),
1942- (
1943- "data" ,
1944- [
1945- list (map (maybe_box_native , t ))
1946- for t in self .itertuples (index = False , name = None )
1947- ],
1948- ),
1949- ("index_names" , list (self .index .names )),
1950- ("column_names" , list (self .columns .names )),
1951- )
1952- )
1953-
1954- elif orient == "series" :
1955- return into_c ((k , v ) for k , v in self .items ())
1956-
1957- elif orient == "records" :
1958- columns = self .columns .tolist ()
1959- rows = (
1960- dict (zip (columns , row ))
1961- for row in self .itertuples (index = False , name = None )
1962- )
1963- return [
1964- into_c ((k , maybe_box_native (v )) for k , v in row .items ()) for row in rows
1965- ]
1966-
1967- elif orient == "index" :
1968- if not self .index .is_unique :
1969- raise ValueError ("DataFrame index must be unique for orient='index'." )
1970- return into_c (
1971- (t [0 ], dict (zip (self .columns , t [1 :])))
1972- for t in self .itertuples (name = None )
1973- )
1974-
1975- else :
1976- raise ValueError (f"orient '{ orient } ' not understood" )
2038+ return self ._to_dict_helper (orient , into_c , into )
19772039
19782040 def to_gbq (
19792041 self ,
0 commit comments