Skip to content

Commit 8a2acaa

Browse files
authored
ARROW-15430: [Python] Address docstrings in Filesystems (Interface) (apache#13564)
Authored-by: Alenka Frim <frim.alenka@gmail.com> Signed-off-by: Alenka Frim <frim.alenka@gmail.com>
1 parent 1ae8dc7 commit 8a2acaa

2 files changed

Lines changed: 197 additions & 9 deletions

File tree

python/pyarrow/_fs.pyx

Lines changed: 194 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,48 @@ cdef class FileInfo(_Weakrefable):
100100
If given, the filesystem entry size in bytes. This should only
101101
be given if `type` is `FileType.File`.
102102
103+
Examples
104+
--------
105+
Generate a file:
106+
107+
>>> from pyarrow import fs
108+
>>> local = fs.LocalFileSystem()
109+
>>> path_fs = local_path + '/pyarrow-fs-example.dat'
110+
>>> with local.open_output_stream(path_fs) as stream:
111+
... stream.write(b'data')
112+
4
113+
114+
Get FileInfo object using ``get_file_info()``:
115+
116+
>>> file_info = local.get_file_info(path_fs)
117+
>>> file_info
118+
<FileInfo for '.../pyarrow-fs-example.dat': type=FileType.File, size=4>
119+
120+
Inspect FileInfo attributes:
121+
122+
>>> file_info.type
123+
<FileType.File: 2>
124+
125+
>>> file_info.is_file
126+
True
127+
128+
>>> file_info.path
129+
'/.../pyarrow-fs-example.dat'
130+
131+
>>> file_info.base_name
132+
'pyarrow-fs-example.dat'
133+
134+
>>> file_info.size
135+
4
136+
137+
>>> file_info.extension
138+
'dat'
139+
140+
>>> file_info.mtime # doctest: +SKIP
141+
datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
142+
143+
>>> file_info.mtime_ns # doctest: +SKIP
144+
1656489370873922073
103145
"""
104146

105147
def __init__(self, path, FileType type=FileType.Unknown, *,
@@ -179,6 +221,12 @@ cdef class FileInfo(_Weakrefable):
179221
def path(self):
180222
"""
181223
The full file path in the filesystem.
224+
225+
Examples
226+
--------
227+
>>> file_info = local.get_file_info(path)
228+
>>> file_info.path
229+
'/.../pyarrow-fs-example.dat'
182230
"""
183231
return frombytes(self.info.path())
184232

@@ -188,6 +236,12 @@ cdef class FileInfo(_Weakrefable):
188236
The file base name.
189237
190238
Component after the last directory separator.
239+
240+
Examples
241+
--------
242+
>>> file_info = local.get_file_info(path)
243+
>>> file_info.base_name
244+
'pyarrow-fs-example.dat'
191245
"""
192246
return frombytes(self.info.base_name())
193247

@@ -210,6 +264,12 @@ cdef class FileInfo(_Weakrefable):
210264
def extension(self):
211265
"""
212266
The file extension.
267+
268+
Examples
269+
--------
270+
>>> file_info = local.get_file_info(path)
271+
>>> file_info.extension
272+
'dat'
213273
"""
214274
return frombytes(self.info.extension())
215275

@@ -221,6 +281,12 @@ cdef class FileInfo(_Weakrefable):
221281
Returns
222282
-------
223283
mtime : datetime.datetime or None
284+
285+
Examples
286+
--------
287+
>>> file_info = local.get_file_info(path)
288+
>>> file_info.mtime # doctest: +SKIP
289+
datetime.datetime(2022, 6, 29, 7, 56, 10, 873922, tzinfo=datetime.timezone.utc)
224290
"""
225291
cdef int64_t nanoseconds
226292
nanoseconds = TimePoint_to_ns(self.info.mtime())
@@ -236,6 +302,12 @@ cdef class FileInfo(_Weakrefable):
236302
Returns
237303
-------
238304
mtime_ns : int or None
305+
306+
Examples
307+
--------
308+
>>> file_info = local.get_file_info(path)
309+
>>> file_info.mtime_ns # doctest: +SKIP
310+
1656489370873922073
239311
"""
240312
cdef int64_t nanoseconds
241313
nanoseconds = TimePoint_to_ns(self.info.mtime())
@@ -260,6 +332,31 @@ cdef class FileSelector(_Weakrefable):
260332
If true, an empty selection is returned.
261333
recursive : bool, default False
262334
Whether to recurse into subdirectories.
335+
336+
Examples
337+
--------
338+
List the contents of a directory and subdirectories:
339+
340+
>>> selector_1 = fs.FileSelector(local_path, recursive=True)
341+
>>> local.get_file_info(selector_1) # doctest: +SKIP
342+
[<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
343+
<FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>,
344+
<FileInfo for 'tmp/alphabet/subdir/example_copy.dat': type=FileType.File, size=4>]
345+
346+
List only the contents of the base directory:
347+
348+
>>> selector_2 = fs.FileSelector(local_path)
349+
>>> local.get_file_info(selector_2) # doctest: +SKIP
350+
[<FileInfo for 'tmp/alphabet/example.dat': type=FileType.File, size=4>,
351+
<FileInfo for 'tmp/alphabet/subdir': type=FileType.Directory>]
352+
353+
Return empty selection if the directory doesn't exist:
354+
355+
>>> selector_not_found = fs.FileSelector(local_path + '/missing',
356+
... recursive=True,
357+
... allow_not_found=True)
358+
>>> local.get_file_info(selector_not_found)
359+
[]
263360
"""
264361

265362
def __init__(self, base_dir, bint allow_not_found=False,
@@ -335,6 +432,22 @@ cdef class FileSystem(_Weakrefable):
335432
tuple of (FileSystem, str path)
336433
With (filesystem, path) tuple where path is the abstract path
337434
inside the FileSystem instance.
435+
436+
Examples
437+
--------
438+
Create a new FileSystem subclass from a URI:
439+
440+
>>> uri = 'file:///{}/pyarrow-fs-example.dat'.format(local_path)
441+
>>> local_new, path_new = fs.FileSystem.from_uri(uri)
442+
>>> local_new
443+
<pyarrow._fs.LocalFileSystem object at ...
444+
>>> path_new
445+
'/.../pyarrow-fs-example.dat'
446+
447+
Or from a s3 bucket:
448+
449+
>>> fs.FileSystem.from_uri("s3://usgs-landsat/collection02/")
450+
(<pyarrow._s3fs.S3FileSystem object at ...>, 'usgs-landsat/collection02')
338451
"""
339452
cdef:
340453
c_string c_path
@@ -422,6 +535,13 @@ cdef class FileSystem(_Weakrefable):
422535
FileInfo or list of FileInfo
423536
Single FileInfo object is returned for a single path, otherwise
424537
a list of FileInfo objects is returned.
538+
539+
Examples
540+
--------
541+
>>> local
542+
<pyarrow._fs.LocalFileSystem object at ...>
543+
>>> local.get_file_info("/{}/pyarrow-fs-example.dat".format(local_path))
544+
<FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
425545
"""
426546
cdef:
427547
CFileInfo info
@@ -521,6 +641,28 @@ cdef class FileSystem(_Weakrefable):
521641
The path of the file or the directory to be moved.
522642
dest : str
523643
The destination path where the file or directory is moved to.
644+
645+
Examples
646+
--------
647+
Create a new folder with a file:
648+
649+
>>> local.create_dir('/tmp/other_dir')
650+
>>> local.copy_file(path,'/tmp/move_example.dat')
651+
652+
Move the file:
653+
654+
>>> local.move('/tmp/move_example.dat',
655+
... '/tmp/other_dir/move_example_2.dat')
656+
657+
Inspect the file info:
658+
659+
>>> local.get_file_info('/tmp/other_dir/move_example_2.dat')
660+
<FileInfo for '/tmp/other_dir/move_example_2.dat': type=FileType.File, size=4>
661+
>>> local.get_file_info('/tmp/move_example.dat')
662+
<FileInfo for '/tmp/move_example.dat': type=FileType.NotFound>
663+
664+
Delete the folder:
665+
>>> local.delete_dir('/tmp/other_dir')
524666
"""
525667
cdef:
526668
c_string source = _path_as_bytes(src)
@@ -541,6 +683,18 @@ cdef class FileSystem(_Weakrefable):
541683
The path of the file to be copied from.
542684
dest : str
543685
The destination path where the file is copied to.
686+
687+
Examples
688+
--------
689+
>>> local.copy_file(path,
690+
... local_path + '/pyarrow-fs-example_copy.dat')
691+
692+
Inspect the file info:
693+
694+
>>> local.get_file_info(local_path + '/pyarrow-fs-example_copy.dat')
695+
<FileInfo for '/.../pyarrow-fs-example_copy.dat': type=FileType.File, size=4>
696+
>>> local.get_file_info(path)
697+
<FileInfo for '/.../pyarrow-fs-example.dat': type=FileType.File, size=4>
544698
"""
545699
cdef:
546700
c_string source = _path_as_bytes(src)
@@ -591,6 +745,14 @@ cdef class FileSystem(_Weakrefable):
591745
Returns
592746
-------
593747
stream : NativeFile
748+
749+
Examples
750+
--------
751+
Print the data from the file with `open_input_file()`:
752+
753+
>>> with local.open_input_file(path) as f:
754+
... print(f.readall())
755+
b'data'
594756
"""
595757
cdef:
596758
c_string pathstr = _path_as_bytes(path)
@@ -625,6 +787,14 @@ cdef class FileSystem(_Weakrefable):
625787
Returns
626788
-------
627789
stream : NativeFile
790+
791+
Examples
792+
--------
793+
Print the data from the file with `open_input_stream()`:
794+
795+
>>> with local.open_input_stream(path) as f:
796+
... print(f.readall())
797+
b'data'
628798
"""
629799
cdef:
630800
c_string pathstr = _path_as_bytes(path)
@@ -670,6 +840,13 @@ cdef class FileSystem(_Weakrefable):
670840
Returns
671841
-------
672842
stream : NativeFile
843+
844+
Examples
845+
--------
846+
>>> local = fs.LocalFileSystem()
847+
>>> with local.open_output_stream(path) as stream:
848+
... stream.write(b'data')
849+
4
673850
"""
674851
cdef:
675852
c_string pathstr = _path_as_bytes(path)
@@ -727,6 +904,20 @@ cdef class FileSystem(_Weakrefable):
727904
Returns
728905
-------
729906
stream : NativeFile
907+
908+
Examples
909+
--------
910+
Append new data to a FileSystem subclass with nonempty file:
911+
912+
>>> with local.open_append_stream(path) as f:
913+
... f.write(b'+newly added')
914+
12
915+
916+
Print out the content fo the file:
917+
918+
>>> with local.open_input_file(path) as f:
919+
... print(f.readall())
920+
b'data+newly added'
730921
"""
731922
cdef:
732923
c_string pathstr = _path_as_bytes(path)
@@ -796,11 +987,9 @@ cdef class LocalFileSystem(FileSystem):
796987
797988
>>> with local.open_output_stream('/tmp/local_fs.dat') as stream:
798989
... stream.write(b'data')
799-
...
800990
4
801991
>>> with local.open_input_stream('/tmp/local_fs.dat') as stream:
802992
... print(stream.readall())
803-
...
804993
b'data'
805994
806995
Create a FileSystem object inferred from a URI of the saved file:
@@ -834,11 +1023,10 @@ cdef class LocalFileSystem(FileSystem):
8341023
8351024
>>> with local.open_append_stream('/tmp/local_fs-copy.dat') as f:
8361025
... f.write(b'+newly added')
837-
...
8381026
12
1027+
8391028
>>> with local.open_input_stream('/tmp/local_fs-copy.dat') as f:
8401029
... print(f.readall())
841-
...
8421030
b'data+newly added'
8431031
8441032
Create a directory, copy a file into it and then delete the whole directory:
@@ -939,8 +1127,7 @@ cdef class SubTreeFileSystem(FileSystem):
9391127
>>> from pyarrow import fs
9401128
>>> local = fs.LocalFileSystem()
9411129
>>> with local.open_output_stream('/tmp/local_fs.dat') as stream:
942-
... stream.write(b'data')
943-
...
1130+
... stream.write(b'data')
9441131
4
9451132
9461133
Create a directory and a SubTreeFileSystem instance:
@@ -952,7 +1139,6 @@ cdef class SubTreeFileSystem(FileSystem):
9521139
9531140
>>> with subtree.open_append_stream('sub_tree_fs.dat') as f:
9541141
... f.write(b'+newly added')
955-
...
9561142
12
9571143
9581144
Print out the attributes:
@@ -971,6 +1157,7 @@ cdef class SubTreeFileSystem(FileSystem):
9711157
9721158
Delete the file and directory:
9731159
1160+
>>> subtree.delete_file('sub_tree_fs.dat')
9741161
>>> local.delete_dir('/tmp/sub_tree')
9751162
>>> local.delete_file('/tmp/local_fs.dat')
9761163

python/pyarrow/conftest.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,9 +258,10 @@ def add_fs(doctest_namespace, request, tmp_path):
258258

259259
# Creation of an object and file with data
260260
local = fs.LocalFileSystem()
261-
path = tmp_path / 'fileinfo.dat'
261+
path = tmp_path / 'pyarrow-fs-example.dat'
262262
with local.open_output_stream(str(path)) as stream:
263263
stream.write(b'data')
264264
doctest_namespace["local"] = local
265-
doctest_namespace["local_path"] = tmp_path
265+
doctest_namespace["local_path"] = str(tmp_path)
266+
doctest_namespace["path"] = str(path)
266267
yield

0 commit comments

Comments
 (0)