googleapis · dhermes · Feb 5, 2015
diff --git a/gcloud/datastore/__init__.py b/gcloud/datastore/__init__.py
@@ -69,6 +69,48 @@
 _DATASET_ENV_VAR_NAME = 'GCLOUD_DATASET_ID'
 
 
+def _find_true_dataset_id(dataset_id, connection=None):
+    """Find the true (unaliased) dataset ID.
+
+    If the given ID already has a 's~' or 'e~' prefix, does nothing.
+    Otherwise, looks up a bogus Key('__MissingLookupKind', 1) and reads the
+    true prefixed dataset ID from the response (either from found or from
+    missing).
+
+    For some context, see:
+      github.com/GoogleCloudPlatform/gcloud-python/pull/528
+      github.com/GoogleCloudPlatform/google-cloud-datastore/issues/59
+
+    :type dataset_id: string
+    :param dataset_id: The dataset ID to un-alias / prefix.
+
+    :type connection: :class:`gcloud.datastore.connection.Connection`
+    :param connection: Optional. A connection provided to be the default.
+
+    :rtype: string
+    :returns: The true / prefixed / un-aliased dataset ID.
+    """
+    if dataset_id.startswith('s~') or dataset_id.startswith('e~'):
+        return dataset_id
+
+    connection = connection or _implicit_environ.CONNECTION
+
+    # Create the bogus Key protobuf to be looked up and remove
+    # the dataset ID so the backend won't complain.
+    bogus_key_pb = Key('__MissingLookupKind', 1,
+                       dataset_id=dataset_id).to_protobuf()
+    bogus_key_pb.partition_id.ClearField('dataset_id')
+
+    found_pbs, missing_pbs, _ = connection.lookup(dataset_id, [bogus_key_pb])
+    # By not passing in `deferred`, lookup will continue until
+    # all results are `found` or `missing`.
+    all_pbs = missing_pbs + found_pbs
+    # We only asked for one, so should only receive one.
+    returned_pb, = all_pbs
+
+    return returned_pb.key.partition_id.dataset_id
+
+
 def set_default_dataset_id(dataset_id=None):
     """Set default dataset ID either explicitly or implicitly as fall-back.
 
@@ -91,6 +133,7 @@ def set_default_dataset_id(dataset_id=None):
         dataset_id = _implicit_environ.compute_engine_id()
 
     if dataset_id is not None:
+        dataset_id = _find_true_dataset_id(dataset_id)
         _implicit_environ.DATASET_ID = dataset_id
 
 
@@ -120,8 +163,9 @@ def set_defaults(dataset_id=None, connection=None):
     :type connection: :class:`gcloud.datastore.connection.Connection`
     :param connection: A connection provided to be the default.
     """
-    set_default_dataset_id(dataset_id=dataset_id)
+    # Set CONNECTION first in case _find_true_dataset_id needs it.
     set_default_connection(connection=connection)
+    set_default_dataset_id(dataset_id=dataset_id)
 
 
 def get_connection():

diff --git a/gcloud/datastore/batch.py b/gcloud/datastore/batch.py
@@ -216,8 +216,7 @@ def put(self, entity):
         if entity.key is None:
             raise ValueError("Entity must have a key")
 
-        if not helpers._dataset_ids_equal(self._dataset_id,
-                                          entity.key.dataset_id):
+        if self._dataset_id != entity.key.dataset_id:
             raise ValueError("Key must be from same dataset as batch")
 
         _assign_entity_to_mutation(
@@ -235,8 +234,7 @@ def delete(self, key):
         if key.is_partial:
             raise ValueError("Key must be complete")
 
-        if not helpers._dataset_ids_equal(self._dataset_id,
-                                          key.dataset_id):
+        if self._dataset_id != key.dataset_id:
             raise ValueError("Key must be from same dataset as batch")
 
         key_pb = key.to_protobuf()
@@ -309,7 +307,6 @@ def _assign_entity_to_mutation(mutation_pb, entity, auto_id_entities):
     auto_id = entity.key.is_partial
 
     key_pb = entity.key.to_protobuf()
-    key_pb = helpers._prepare_key_for_request(key_pb)
 
     if auto_id:
         insert = mutation_pb.insert_auto_id.add()

diff --git a/gcloud/datastore/helpers.py b/gcloud/datastore/helpers.py
@@ -26,7 +26,6 @@
 import pytz
 import six
 
-from gcloud.datastore import _datastore_v1_pb2 as datastore_pb
 from gcloud.datastore.entity import Entity
 from gcloud.datastore.key import Key
 
@@ -280,33 +279,6 @@ def _set_protobuf_value(value_pb, val):
         setattr(value_pb, attr, val)
 
 
-def _prepare_key_for_request(key_pb):
-    """Add protobuf keys to a request object.
-
-    :type key_pb: :class:`gcloud.datastore._datastore_v1_pb2.Key`
-    :param key_pb: A key to be added to a request.
-
-    :rtype: :class:`gcloud.datastore._datastore_v1_pb2.Key`
-    :returns: A key which will be added to a request. It will be the
-              original if nothing needs to be changed.
-    """
-    if key_pb.partition_id.HasField('dataset_id'):
-        # We remove the dataset_id from the protobuf. This is because
-        # the backend fails a request if the key contains un-prefixed
-        # dataset ID. The backend fails because requests to
-        #     /datastore/.../datasets/foo/...
-        # and
-        #     /datastore/.../datasets/s~foo/...
-        # both go to the datastore given by 's~foo'. So if the key
-        # protobuf in the request body has dataset_id='foo', the
-        # backend will reject since 'foo' != 's~foo'.
-        new_key_pb = datastore_pb.Key()
-        new_key_pb.CopyFrom(key_pb)
-        new_key_pb.partition_id.ClearField('dataset_id')
-        key_pb = new_key_pb
-    return key_pb
-
-
 def _add_keys_to_request(request_field_pb, key_pbs):
     """Add protobuf keys to a request object.
 
@@ -317,57 +289,4 @@ def _add_keys_to_request(request_field_pb, key_pbs):
     :param key_pbs: The keys to add to a request.
     """
     for key_pb in key_pbs:
-        key_pb = _prepare_key_for_request(key_pb)
         request_field_pb.add().CopyFrom(key_pb)
-
-
-def _dataset_ids_equal(dataset_id1, dataset_id2):
-    """Compares two dataset IDs for fuzzy equality.
-
-    Each may be prefixed or unprefixed (but not null, since dataset ID
-    is required on a key). The only allowed prefixes are 's~' and 'e~'.
-
-    Two identical prefixed match
-
-      >>> 's~foo' == 's~foo'
-      >>> 'e~bar' == 'e~bar'
-
-    while non-identical prefixed don't
-
-      >>> 's~foo' != 's~bar'
-      >>> 's~foo' != 'e~foo'
-
-    As for non-prefixed, they can match other non-prefixed or
-    prefixed:
-
-      >>> 'foo' == 'foo'
-      >>> 'foo' == 's~foo'
-      >>> 'foo' == 'e~foo'
-      >>> 'foo' != 'bar'
-      >>> 'foo' != 's~bar'
-
-    (Ties are resolved since 'foo' can only be an alias for one of
-    s~foo or e~foo in the backend.)
-
-    :type dataset_id1: string
-    :param dataset_id1: A dataset ID.
-
-    :type dataset_id2: string
-    :param dataset_id2: A dataset ID.
-
-    :rtype: boolean
-    :returns: Boolean indicating if the IDs are the same.
-    """
-    if dataset_id1 == dataset_id2:
-        return True
-
-    if dataset_id1.startswith('s~') or dataset_id1.startswith('e~'):
-        # If `dataset_id1` is prefixed and not matching, then the only way
-        # they can match is if `dataset_id2` is unprefixed.
-        return dataset_id1[2:] == dataset_id2
-    elif dataset_id2.startswith('s~') or dataset_id2.startswith('e~'):
-        # Here we know `dataset_id1` is unprefixed and `dataset_id2`
-        # is prefixed.
-        return dataset_id1 == dataset_id2[2:]
-
-    return False
diff --git a/gcloud/datastore/query.py b/gcloud/datastore/query.py
@@ -449,8 +449,7 @@ def _pb_from_query(query):
     composite_filter.operator = datastore_pb.CompositeFilter.AND
 
     if query.ancestor:
-        ancestor_pb = helpers._prepare_key_for_request(
-            query.ancestor.to_protobuf())
+        ancestor_pb = query.ancestor.to_protobuf()
 
         # Filter on __key__ HAS_ANCESTOR == ancestor.
         ancestor_filter = composite_filter.filter.add().property_filter
@@ -469,8 +468,7 @@ def _pb_from_query(query):
         # Set the value to filter on based on the type.
         if property_name == '__key__':
             key_pb = value.to_protobuf()
-            property_filter.value.key_value.CopyFrom(
-                helpers._prepare_key_for_request(key_pb))
+            property_filter.value.key_value.CopyFrom(key_pb)
         else:
             helpers._set_protobuf_value(property_filter.value, value)