Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 41 additions & 37 deletions notebooks/multimodal/multimodal_dataframe.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
Expand Down Expand Up @@ -128,6 +128,38 @@
"import bigframes.bigquery as bbq"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import bigframes.bigquery as bbq\n",
"\n",
"def get_runtime_json_str(series, mode=\"R\", with_metadata=False):\n",
" \"\"\"\n",
" Get the runtime (contains signed URL to access gcs data) and apply the\n",
" ToJSONSTring transformation.\n",
" \n",
" Args:\n",
" series: bigframes.series.Series to operate on.\n",
" mode: \"R\" for read, \"RW\" for read/write.\n",
" with_metadata: Whether to fetch and include blob metadata.\n",
" \"\"\"\n",
" # 1. Optionally fetch metadata\n",
" s = (\n",
" bbq.obj.fetch_metadata(series)\n",
" if with_metadata\n",
" else series\n",
" )\n",
" \n",
" # 2. Retrieve the access URL runtime object\n",
" runtime = bbq.obj.get_access_url(s, mode=mode)\n",
" \n",
" # 3. Convert the runtime object to a JSON string\n",
" return bbq.to_json_string(runtime)"
]
},
{
"cell_type": "markdown",
"metadata": {
Expand Down Expand Up @@ -1290,22 +1322,11 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 3,
"metadata": {
"id": "oDDuYtUm5Yiy"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/google/home/shuowei/src/github.com/googleapis/python-bigquery-dataframes/bigframes/dtypes.py:959: JSONDtypeWarning: JSON columns will be represented as pandas.ArrowDtype(pyarrow.json_())\n",
"instead of using `db_dtypes` in the future when available in pandas\n",
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
]
}
],
"outputs": [],
"source": [
"df_pdf = bpd.from_glob_path(\"gs://cloud-samples-data/bigquery/tutorials/cymbal-pets/documents/*\", name=\"pdf\")"
]
Expand Down Expand Up @@ -1464,7 +1485,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -1474,7 +1495,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand All @@ -1486,26 +1507,9 @@
"(https://github.com/pandas-dev/pandas/issues/60958) and pyarrow.\n",
" warnings.warn(msg, bigframes.exceptions.JSONDtypeWarning)\n"
]
},
{
"data": {
"text/html": [
"<pre>0 Now, as all books, not primarily intended as p...</pre>"
],
"text/plain": [
"0 Now, as all books, not primarily intended as p...\n",
"Name: transcribed_content, dtype: string"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import bigframes.bigquery as bbq\n",
"import bigframes.operations as ops\n",
"\n",
"# The audio_transcribe function is a convenience wrapper around bigframes.bigquery.ai.generate.\n",
"# Here's how to perform the same operation directly:\n",
"\n",
Expand All @@ -1519,8 +1523,8 @@
"\n",
"# Convert the audio series to the runtime representation required by the model.\n",
"# This involves fetching metadata and getting a signed access URL.\n",
"audio_metadata = audio_series._apply_unary_op(ops.obj_fetch_metadata_op)\n",
"audio_runtime = audio_metadata._apply_unary_op(ops.ObjGetAccessUrl(mode=\"R\"))\n",
"audio_metadata = bbq.obj.fetch_metadata(audio_series)\n",
"audio_runtime = bbq.obj.get_access_url(audio_metadata, mode=\"R\")\n",
"\n",
"transcribed_results = bbq.ai.generate(\n",
" prompt=(prompt_text, audio_runtime),\n",
Expand All @@ -1534,7 +1538,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": null,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -1638,7 +1642,7 @@
"\n",
"# Generate a JSON string containing the runtime information (including signed read URLs)\n",
"# This allows the UDF to download the images from Google Cloud Storage\n",
"access_urls = exif_image_df[\"blob_col\"].blob.get_runtime_json_str(mode=\"R\")\n",
"access_urls = get_runtime_json_str(exif_image_df[\"blob_col\"], mode=\"R\")\n",
"\n",
"# Apply the BigQuery Python UDF to the runtime JSON strings\n",
"# We cast to string to ensure the input matches the UDF's signature\n",
Expand Down
Loading