I am trying to prevent unnecessary downloading of large datasets by reading the publicly available files directly from their online location. Surprisingly I cannot find an answer to my question on StackOverflow already.
I use JupyterLab, and have tried the following:
import xarray as xr
url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
data = xr.open_dataset(url)
This produces the following error message:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:211, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
210 try:
--> 211 file = self._cache[self._key]
212 except KeyError:
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/lru_cache.py:56, in LRUCache.__getitem__(self, key)
55 with self._lock:
---> 56 value = self._cache[key]
57 self._cache.move_to_end(key)
KeyError: [<class 'netCDF4._netCDF4.Dataset'>, ('https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc',), 'r', (('clobber', True), ('diskless', False), ('format', 'NETCDF4'), ('persist', False)), 'd2d8feab-7dab-434f-ae9c-a79c655b259b']
During handling of the above exception, another exception occurred:
OSError Traceback (most recent call last)
Cell In[4], line 3
1 #read in dataset from website
2 url="https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc"
----> 3 data = xr.open_dataset(url)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/api.py:611, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, chunked_array_type, from_array_kwargs, backend_kwargs, **kwargs)
599 decoders = _resolve_decoders_kwargs(
600 decode_cf,
601 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
607 decode_coords=decode_coords,
608 )
610 overwrite_encoded_chunks = kwargs.pop("overwrite_encoded_chunks", None)
--> 611 backend_ds = backend.open_dataset(
612 filename_or_obj,
613 drop_variables=drop_variables,
614 **decoders,
615 **kwargs,
616 )
617 ds = _dataset_from_backend_dataset(
618 backend_ds,
619 filename_or_obj,
(...)
629 **kwargs,
630 )
631 return ds
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:649, in NetCDF4BackendEntrypoint.open_dataset(self, filename_or_obj, mask_and_scale, decode_times, concat_characters, decode_coords, drop_variables, use_cftime, decode_timedelta, group, mode, format, clobber, diskless, persist, lock, autoclose)
628 def open_dataset( # type: ignore[override] # allow LSP violation, not supporting **kwargs
629 self,
630 filename_or_obj: str | os.PathLike[Any] | BufferedIOBase | AbstractDataStore,
(...)
646 autoclose=False,
647 ) -> Dataset:
648 filename_or_obj = _normalize_path(filename_or_obj)
--> 649 store = NetCDF4DataStore.open(
650 filename_or_obj,
651 mode=mode,
652 format=format,
653 group=group,
654 clobber=clobber,
655 diskless=diskless,
656 persist=persist,
657 lock=lock,
658 autoclose=autoclose,
659 )
661 store_entrypoint = StoreBackendEntrypoint()
662 with close_on_error(store):
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:410, in NetCDF4DataStore.open(cls, filename, mode, format, group, clobber, diskless, persist, lock, lock_maker, autoclose)
404 kwargs = dict(
405 clobber=clobber, diskless=diskless, persist=persist, format=format
406 )
407 manager = CachingFileManager(
408 netCDF4.Dataset, filename, mode=mode, kwargs=kwargs
409 )
--> 410 return cls(manager, group=group, mode=mode, lock=lock, autoclose=autoclose)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:357, in NetCDF4DataStore.__init__(self, manager, group, mode, lock, autoclose)
355 self._group = group
356 self._mode = mode
--> 357 self.format = self.ds.data_model
358 self._filename = self.ds.filepath()
359 self.is_remote = is_remote_uri(self._filename)
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:419, in NetCDF4DataStore.ds(self)
417 @property
418 def ds(self):
--> 419 return self._acquire()
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/netCDF4_.py:413, in NetCDF4DataStore._acquire(self, needs_lock)
412 def _acquire(self, needs_lock=True):
--> 413 with self._manager.acquire_context(needs_lock) as root:
414 ds = _nc4_require_group(root, self._group, self._mode)
415 return ds
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/contextlib.py:137, in _GeneratorContextManager.__enter__(self)
135 del self.args, self.kwds, self.func
136 try:
--> 137 return next(self.gen)
138 except StopIteration:
139 raise RuntimeError("generator didn't yield") from None
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:199, in CachingFileManager.acquire_context(self, needs_lock)
196 @contextlib.contextmanager
197 def acquire_context(self, needs_lock=True):
198 """Context manager for acquiring a file."""
--> 199 file, cached = self._acquire_with_cache_info(needs_lock)
200 try:
201 yield file
File /usr/local/apps/python3/3.11.10-01/lib/python3.11/site-packages/xarray/backends/file_manager.py:217, in CachingFileManager._acquire_with_cache_info(self, needs_lock)
215 kwargs = kwargs.copy()
216 kwargs["mode"] = self._mode
--> 217 file = self._opener(*self._args, **kwargs)
218 if self._mode == "w":
219 # ensure file doesn't get overridden when opened again
220 self._mode = "a"
File src/netCDF4/_netCDF4.pyx:2470, in netCDF4._netCDF4.Dataset.__init__()
File src/netCDF4/_netCDF4.pyx:2107, in netCDF4._netCDF4._ensure_nc_success()
OSError: [Errno -75] NetCDF: Malformed or unexpected Constraint: 'https://thredds.met.no/thredds/catalog/metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/catalog.html?dataset=metusers/oskaral/PolarRES/ARC11_ALADIN43_v1_CNRMESM21_r1i1p1f2_hist/day/tas/tas_ARC11_CNRM-ESM2-1_historical_r1i1p1f2_HCLIMcom-METNO_ALADIN43_v1-r1_day_20140101-20141231.nc'
Is it in any way possible to read these files into an xarray dataset directly from their online location?
Thank you!