docarray · hanxiao · Jan 5, 2022 · Jan 5, 2022
diff --git a/docarray/array/mixins/io/binary.py b/docarray/array/mixins/io/binary.py
@@ -18,7 +18,7 @@ class BinaryIOMixin:
     def load_binary(
         cls: Type['T'],
         file: Union[str, BinaryIO, bytes],
-        protocol: str = 'pickle-once',
+        protocol: str = 'pickle-array',
         compress: Optional[str] = None,
     ) -> 'T':
         """Load array elements from a LZ4-compressed binary file.
@@ -45,14 +45,14 @@ def load_binary(
                 d = decompress_bytes(d, algorithm=compress)
                 compress = None
 
-            if protocol == 'protobuf-once':
+            if protocol == 'protobuf-array':
                 from ....proto.docarray_pb2 import DocumentArrayProto
 
                 dap = DocumentArrayProto()
                 dap.ParseFromString(d)
 
                 return cls.from_protobuf(dap)
-            elif protocol == 'pickle-once':
+            elif protocol == 'pickle-array':
                 return pickle.loads(d)
             else:
                 _len = len(random_uuid().bytes)
@@ -66,15 +66,15 @@ def load_binary(
     def from_bytes(
         cls: Type['T'],
         data: bytes,
-        protocol: str = 'pickle-once',
+        protocol: str = 'pickle-array',
         compress: Optional[str] = None,
     ) -> 'T':
         return cls.load_binary(data, protocol=protocol, compress=compress)
 
     def save_binary(
         self,
         file: Union[str, BinaryIO],
-        protocol: str = 'pickle-once',
+        protocol: str = 'pickle-array',
         compress: Optional[str] = None,
     ) -> None:
         """Save array elements into a LZ4 compressed binary file.
@@ -98,7 +98,7 @@ def save_binary(
 
     def to_bytes(
         self,
-        protocol: str = 'pickle-once',
+        protocol: str = 'pickle-array',
         compress: Optional[str] = None,
         _file_ctx: Optional[BinaryIO] = None,
     ) -> bytes:
@@ -121,9 +121,9 @@ def to_bytes(
                 fc = f
                 compress = None
             with fc:
-                if protocol == 'protobuf-once':
+                if protocol == 'protobuf-array':
                     f.write(self.to_protobuf().SerializePartialToString())
-                elif protocol == 'pickle-once':
+                elif protocol == 'pickle-array':
                     f.write(pickle.dumps(self))
                 else:
                     for d in self:

diff --git a/docs/fundamentals/document/serialization.md b/docs/fundamentals/document/serialization.md
@@ -4,7 +4,7 @@
 DocArray is designed to be "ready-to-wire": it assumes you always want to send/receive Document over network across microservices. Hence, serialization of Document is important. This chapter introduces multiple serialization methods of a single Document. 
 
 ```{tip}
-One should use DocumentArray for serializing multiple Documents, instead of looping over Documents one by one. The former is much faster and yield more compact serialization. 
+One should use {ref}`DocumentArray for serializing multiple Documents<docarray-serialization>`, instead of looping over Documents one by one. The former is much faster and yield more compact serialization. 
 ```
 
 
@@ -47,38 +47,7 @@ print(d_as_json, d)
 <Document ('id', 'mime_type', 'text', 'embedding') at 27d4fa4c6d5711ec8c831e008a366d49>
 ```
 
-
-## From/to dict
-
-```{important}
-This feature requires `protobuf` dependency. You can do `pip install docarray[full]` to install it.
-```
-
-You can serialize a Document as a Python `dict` via {meth}`~docarray.document.mixins.porting.PortingMixin.to_dict`, and then read from it via {meth}`~docarray.document.mixins.porting.PortingMixin.from_dict`.
-
-```python
-from docarray import Document
-import numpy as np
-
-d_as_dict = Document(text='hello, world', embedding=np.array([1, 2, 3])).to_dict()
-
-d = Document.from_dict(d_as_dict)
-
-print(d_as_dict, d)
-```
-
-```text
-{'id': 'b29d39066d5611ec87661e008a366d49', 'text': 'hello, world', 'mime_type': 'text/plain', 'embedding': {'dense': {'buffer': 'AQAAAAAAAAACAAAAAAAAAAMAAAAAAAAA', 'shape': [3], 'dtype': '<i8'}, 'cls_name': 'numpy'}} 
-
-<Document ('id', 'mime_type', 'text', 'embedding') at b29d39066d5611ec87661e008a366d49>
-```
-
-```{note}
-Note that the result dict is very "stricted" in the sense that all fields and values boil down to very basic data type such as `int`, `float`, `string`. This behavior is designed due to the "serialization to `dict`" is often an intermediate step of serializing into JSON/YAML. Hence all values in `dict` must be schema-friendly. After all, a Python `dict` object means nothing if you are not working in Python. 
-
-You can use `to_dict(strict=False)` to override this behavior. This will preserve the original Python data type of every value, which may not be JSON-friendly. But hey, you want it.   
-```
-
+(doc-in-bytes)=
 ## From/to bytes
 
 ```{important}
@@ -127,6 +96,43 @@ Note that when deserializing from a non-default binary serialization, you need t
 d = Document.from_bytes(d_bytes, protocol='protobuf', compress='gzip')
 ```
 
+```{tip}
+If you go with default `protcol` and `compress` settings, you can simply use `bytes(d)`, which is more Pythonic.
+```
+
+
+## From/to dict
+
+```{important}
+This feature requires `protobuf` dependency. You can do `pip install docarray[full]` to install it.
+```
+
+You can serialize a Document as a Python `dict` via {meth}`~docarray.document.mixins.porting.PortingMixin.to_dict`, and then read from it via {meth}`~docarray.document.mixins.porting.PortingMixin.from_dict`.
+
+```python
+from docarray import Document
+import numpy as np
+
+d_as_dict = Document(text='hello, world', embedding=np.array([1, 2, 3])).to_dict()
+
+d = Document.from_dict(d_as_dict)
+
+print(d_as_dict, d)
+```
+
+```text
+{'id': 'b29d39066d5611ec87661e008a366d49', 'text': 'hello, world', 'mime_type': 'text/plain', 'embedding': {'dense': {'buffer': 'AQAAAAAAAAACAAAAAAAAAAMAAAAAAAAA', 'shape': [3], 'dtype': '<i8'}, 'cls_name': 'numpy'}} 
+
+<Document ('id', 'mime_type', 'text', 'embedding') at b29d39066d5611ec87661e008a366d49>
+```
+
+(strict-arg-explain)=
+```{note}
+Note that the result dict is very "stricted" in the sense that all fields and values boil down to very basic data type such as `int`, `float`, `string`. This behavior is designed due to the "serialization to `dict`" is often an intermediate step of serializing into JSON/YAML. Hence all values in `dict` must be schema-friendly. After all, a Python `dict` object means nothing if you are not working in Python. 
+
+You can use `to_dict(strict=False)` to override this behavior. This will preserve the original Python data type of every value, which may not be JSON-friendly. But hey, you want it.   
+```
+
 ## From/to Protobuf
 
 ```{important}

diff --git a/docs/fundamentals/documentarray/construct.md b/docs/fundamentals/documentarray/construct.md
@@ -6,11 +6,62 @@
 ```python
 from docarray import DocumentArray
 
+da = DocumentArray()
+```
+
+```text
+<DocumentArray (length=0) at 4453362704>
+```
+
+Now you can use list-like interfaces such as `.append()` and `.extend()` as you would add elements to a Python List.
+
+```python
+da.append(Document(text='hello world!'))
+da.extend([Document(text='hello'), Document(text='world!')])
+```
+
+```text
+<DocumentArray (length=3) at 4446140816>
+```
+
+Directly printing a DocumentArray does not show you too much useful information, you can use {meth}`~docarray.array.mixins.plot.PlotMixin.summary`.
+
+```{important}
+This feature requires `rich` dependency. You can do `pip install docarray[full]` to install it.
+```
+
+```python
+da.summary()
+```
+
+```text
+                  Documents Summary                   
+
+  Length                 3                            
+  Homogenous Documents   True                         
+  Common Attributes      ('id', 'mime_type', 'text')  
+
+                     Attributes Summary                     
+
+  Attribute   Data type   #Unique values   Has empty value  
+ ────────────────────────────────────────────────────────── 
+  id          ('str',)    3                False            
+  mime_type   ('str',)    1                False            
+  text        ('str',)    3                False    
+```
+
+## Construct with empty Documents
+
+Like `numpy.zeros()`, you can quickly build a DocumentArray with only empty Documents:
+
+```python
+from docarray import DocumentArray
+
 da = DocumentArray.empty(10)
 ```
 
 ```text
-<DocumentArray (length=10) at 4456123280>
+<DocumentArray (length=10) at 4453362704>
 ```
 
 ## Construct from list-like objects
@@ -41,6 +92,7 @@ da = DocumentArray((Document() for _ in range(10)))
 ```
 ````
 
+
 As DocumentArray itself is also a "list-like object that yields `Document`", you can also construct DocumentArray from another DocumentArray:
 
 ```python

diff --git a/docs/fundamentals/documentarray/images/benchmark-size.svg b/docs/fundamentals/documentarray/images/benchmark-size.svg
diff --git a/docs/fundamentals/documentarray/images/benchmark-time.svg b/docs/fundamentals/documentarray/images/benchmark-time.svg
diff --git a/docs/fundamentals/documentarray/images/da-push.png b/docs/fundamentals/documentarray/images/da-push.png