docarray · hanxiao · Jan 3, 2022 · Jan 3, 2022 · Jan 3, 2022 · Jan 3, 2022
diff --git a/.github/requirements-cicd.txt b/.github/requirements-cicd.txt
@@ -12,4 +12,5 @@ matplotlib
 rich
 Pillow
 lz4
-fastapi
+fastapi
+jupyterlab
diff --git a/README.md b/README.md
@@ -9,11 +9,13 @@
 
 <!-- start elevator-pitch -->
 
-DocArray is a library for nested, unstructured data such as text, image, audio, video, 3D mesh. Its Pythonic interface allows deep learning engineers to easily preprocess, embed, search, recommend and transfer the data.
+DocArray is a library for nested, unstructured data such as text, image, audio, video, 3D mesh. It allows deep learning engineers to easily preprocess, embed, search, recommend and transfer the data.
 
 🌌 **All data types**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data.
 
-🧑‍🔬 **Data science powerhouse**: easy-to-use functions for facilitating data scientists work on embedding, matching, visualizing, evaluating via Torch/Tensorflow/ONNX/PaddlePaddle.
+🐍 **Pythonic API**: easy-to-use idioms and interfaces just as the native Python List. If you know how to Python, you know how to DocArray.
+
+🧑‍🔬 **Data science powerhouse**: greatly facilitate data scientists work on embedding, matching, visualizing, evaluating via Torch/Tensorflow/ONNX/PaddlePaddle.
 
 🚡 **Portable**: ready to wire at anytime with efficient and compact serialization from/to Protobuf, binary, JSON, CSV, dataframe.
 
@@ -196,6 +198,31 @@ recall@5 0.0573470744680851
 
 More metrics can be used such as `precision_at_k`, `ndcg_at_k`, `hit_at_k`. 
 
+
+
+### Save results
+
+You can save a DocumentArray to binary, JSON, dict, dataframe, CSV or Protobuf message. In its simplest form,
+
+```python
+left_da.save('left_da.bin')
+```
+
+To reuse it, do `left_da = DocumentArray.load('left_da.bin')`.
+
+If you want to transfer a DoucmentArray from one machine to another or share it with your colleagues, you can do:
+
+```python
+left_da.push(token='my_shared_da')
+```
+
+```python
+left_da = DocumentArray.pull(token='my_shared_da')
+```
+
+Anyone knows the token `my_shared_da` can pull and work on it.
+
+
 Intrigued? That's only scratching the surface of what DocArray is capable of. [Read our docs to learn more](https://docarray.jina.ai).
 
 <!-- start support-pitch -->

diff --git a/docarray/array/mixins/io/common.py b/docarray/array/mixins/io/common.py
@@ -8,7 +8,7 @@ class CommonIOMixin:
     """The common IO helper function for arrays. """
 
     def save(
-        self, file: Union[str, TextIO, BinaryIO], file_format: str = 'json'
+        self, file: Union[str, TextIO, BinaryIO], file_format: str = 'binary'
     ) -> None:
         """Save array elements into a JSON, a binary file or a CSV file.
 
@@ -28,7 +28,7 @@ def save(
 
     @classmethod
     def load(
-        cls: Type['T'], file: Union[str, TextIO, BinaryIO], file_format: str = 'json'
+        cls: Type['T'], file: Union[str, TextIO, BinaryIO], file_format: str = 'binary'
     ) -> 'T':
         """Load array elements from a JSON or a binary file, or a CSV file.
 

diff --git a/docarray/array/mixins/io/pushpull.py b/docarray/array/mixins/io/pushpull.py
@@ -1,6 +1,6 @@
 import io
 from contextlib import nullcontext
-from typing import Type, TYPE_CHECKING
+from typing import Type, TYPE_CHECKING, Optional
 
 from ....helper import get_request_header
 
@@ -13,7 +13,9 @@ class PushPullMixin:
 
     _service_url = 'https://apihubble.jina.ai/v2/rpc/da.'
 
-    def push(self, token: str, show_progress: bool = False) -> None:
+    def push(
+        self, token: str, show_progress: bool = False, compress: Optional[str] = None
+    ) -> None:
         """Push this DocumentArray object to Jina Cloud which can be later retrieved via :meth:`.push`
 
         .. note::
@@ -53,7 +55,7 @@ def read(self, n=-1):
         dict_data = {
             'file': (
                 'DocumentArray',
-                self.to_bytes(protocol='protobuf', compress='gzip'),
+                self.to_bytes(protocol='protobuf', compress=compress),
             ),
             'token': token,
         }
@@ -69,7 +71,12 @@ def read(self, n=-1):
             requests.post(self._service_url + 'push', data=body, headers=headers)
 
     @classmethod
-    def pull(cls: Type['T'], token: str, show_progress: bool = False) -> 'T':
+    def pull(
+        cls: Type['T'],
+        token: str,
+        show_progress: bool = False,
+        compress: Optional[str] = None,
+    ) -> 'T':
         """Pulling a :class:`DocumentArray` from Jina Cloud Service to local.
 
         :param token: the upload token set during :meth:`.push`
@@ -103,7 +110,9 @@ def pull(cls: Type['T'], token: str, show_progress: bool = False) -> 'T':
                     if show_progress:
                         progress.update(task_id, advance=len(chunk))
 
-                return cls.from_bytes(f.getvalue(), protocol='protobuf', compress='lz4')
+                return cls.from_bytes(
+                    f.getvalue(), protocol='protobuf', compress=compress
+                )
 
 
 def _get_progressbar(show_progress):

diff --git a/docarray/document/mixins/plot.py b/docarray/document/mixins/plot.py
@@ -54,10 +54,10 @@ def _mermaid_to_url(self, img_type: str) -> str:
         """
         mermaid_str = (
             """
-                                                                            %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#FFC666'}}}%%
-                                                                            classDiagram
-
-                                                                                    """
+                                                                                %%{init: {'theme': 'base', 'themeVariables': { 'primaryColor': '#FFC666'}}}%%
+                                                                                classDiagram
+                            
+                                                                                        """
             + self.__mermaid_str__()
         )
 
@@ -69,7 +69,41 @@ def _mermaid_to_url(self, img_type: str) -> str:
 
     def _ipython_display_(self):
         """Displays the object in IPython as a side effect"""
-        self.plot(inline_display=True)
+        self.summary()
+
+    def summary(self) -> None:
+        """ Print non-empty fields and nested structure of this Document object."""
+        _str_list = []
+        self._plot_recursion(_str_list, indent=0)
+        print('\n'.join(_str_list))
+
+    def _plot_recursion(self, _str_list, indent, box_char='├─'):
+        prefix = (' ' * indent + box_char) if indent else ''
+        _str_list.append(f'{prefix} {self}')
+
+        for a in ('matches', 'chunks'):
+            if getattr(self, a):
+                prefix = ' ' * (indent + 4) + '└─'
+                _str_list.append(f'{prefix} {a}')
+
+                for d in getattr(self, a)[:-1]:
+                    d._plot_recursion(_str_list, indent=len(prefix) + 4)
+                getattr(self, a)[-1]._plot_recursion(
+                    _str_list, indent=len(prefix) + 4, box_char='└─'
+                )
+
+    def plot_image(self):
+        """ Plot image data from :attr:`.blob` or :attr:`.uri`. """
+        from IPython.display import Image, display
+
+        if self.blob is not None:
+            import PIL.Image
+
+            display(PIL.Image.fromarray(self.blob))
+        elif self.uri:
+            display(Image(self.uri))
+        else:
+            raise ValueError('`uri` and `blob` is empty')
 
     def plot(self, output: Optional[str] = None, inline_display: bool = False) -> None:
         """

diff --git a/docs/index.md b/docs/index.md
@@ -7,6 +7,10 @@
 
 ## Install
 
+```{tip}
+Jina 3.x users do not need to install `docarray` separately, it is shipped with Jina. To check your Jina version, type `jina -vf` in the console.
+```
+
 Make sure you have Python 3.7+ and `numpy` installed on Linux/Mac/Windows:
 
 ````{tab} Basic install
@@ -41,6 +45,17 @@ The following dependencies will be installed to enable additional features:
 Alternatively, you can first do basic installation and then install missing dependencies on-demand. 
 ````
 
+```pycon
+>>> import docarray
+>>> docarray.__version__
+'0.1.0'
+```
+
+```{attention}
+If the printed version is smaller than `0.1.0`, say `0.0.x`, then you are 
+not installing `docarray` correctly. You are probably still using an old `docarray` shipped with Jina 2.x. 
+```
+
 
 
 

diff --git a/tests/unit/document/test_summary.py b/tests/unit/document/test_summary.py
@@ -0,0 +1,29 @@
+import os
+
+from docarray import Document
+
+cur_dir = os.path.dirname(os.path.abspath(__file__))
+
+
+def test_single_doc_summary():
+    # empty doc
+    Document().summary()
+    # nested doc
+    Document(
+        chunks=[
+            Document(),
+            Document(chunks=[Document()]),
+            Document(),
+        ],
+        matches=[Document(), Document()],
+    ).summary()
+
+
+def test_plot_image():
+    d = Document(uri=os.path.join(cur_dir, 'toydata/test.png'))
+    d.plot_image()
+
+    d.load_uri_to_image_blob()
+    d.uri = None
+
+    d.plot_image()
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,4 +12,5 @@ matplotlib @@
     rich
     Pillow
     lz4
-    fastapi
+    fastapi
+    jupyterlab