Source code for lib.Qhdf5

import json
from typing import Any, Dict

import h5py
import numpy as np


[docs] class HDF5Dict: def __init__(self, compression: str | None = "gzip", compression_opts: int | None = 4): self.compression = compression self.compression_opts = compression_opts self._file: h5py.File | None = None def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.close() # Utilities functions
[docs] def open(self, filename: str, mode: str = "r"): if self._file is not None and not self._file.id.valid: self._file = None if self._file is not None: self._file.close() self._file = h5py.File(filename, mode)
[docs] def close(self): if self._file is not None: self._file.close() self._file = None
[docs] def save_dict(self, filename: str, data: Dict[str, Any], mode: str = "w") -> None: with h5py.File(filename, mode) as h5f: self._write_group(h5f, data)
[docs] def save_array(self, filename: str, data: np.ndarray, mode: str = "w") -> None: with h5py.File(filename, mode) as h5f: self._write_item(h5f, "data", data)
[docs] def load_dict(self, filename: str) -> Dict[str, Any]: with h5py.File(filename, "r") as h5f: return self._read_group(h5f)
[docs] def load_array(self, filename: str) -> np.ndarray: with h5py.File(filename, "r") as h5f: obj = h5f["data"] if not isinstance(obj, h5py.Dataset): raise ValueError("The key 'data' does not correspond to a dataset.") return self._decode_dataset(obj)
[docs] def load_item(self, filename: str | None, path: str) -> Any: if self._file is not None: h5f = self._file else: if filename is None: raise ValueError("filename must be provided if no file is open") h5f = h5py.File(filename, "r") try: if "::" in path: h5_path, attr_name = path.split("::", 1) return self._decode_attribute(h5f[h5_path].attrs[attr_name]) obj = h5f[path] if isinstance(obj, h5py.Dataset): return self._decode_dataset(obj) if isinstance(obj, h5py.Group): if "__json__" in obj.attrs: try: raw = obj.attrs["__json__"] if isinstance(raw, bytes): raw = raw.decode("utf-8") return json.loads(raw) except Exception: pass return self._read_group(obj) raise ValueError(f"Unknown object type at path: {path}") finally: if self._file is None: h5f.close()
# Internal helpers def _write_group(self, h5group: h5py.Group, data: Dict[str, Any]) -> None: for key, value in data.items(): self._write_item(h5group, key, value) def _write_item(self, h5group: h5py.Group, name: str, value: Any) -> None: if isinstance(value, np.ndarray): h5group.create_dataset(name, data=value, compression=self.compression, compression_opts=self.compression_opts) return if isinstance(value, (int, float, bool)): h5group.attrs[name] = value return if isinstance(value, (list, tuple)): if all(isinstance(x, np.ndarray) for x in value): subgroup = h5group.create_group(name) subgroup.attrs["__kind__"] = "sequence" subgroup.attrs["__sequence_type__"] = "tuple" if isinstance(value, tuple) else "list" subgroup.attrs["__length__"] = len(value) for i, arr in enumerate(value): if isinstance(arr, np.ndarray) and arr.shape == (): # scalar dataset → NO compression allowed subgroup.create_dataset(str(i), data=arr) else: subgroup.create_dataset( str(i), data=arr, compression=self.compression, compression_opts=self.compression_opts, ) return if len(value) == 0: h5group.attrs[name] = json.dumps(value) return if all(isinstance(x, (int, float, np.integer, np.floating)) for x in value): arr = np.asarray(value) h5group.create_dataset(name, data=arr, compression=self.compression, compression_opts=self.compression_opts) return if all(isinstance(x, str) for x in value): dt = h5py.string_dtype(encoding="utf-8") h5group.create_dataset(name, data=np.array(value, dtype=object), dtype=dt, compression=self.compression, compression_opts=self.compression_opts) return h5group.attrs[name] = json.dumps(value, default=repr) return if isinstance(value, dict): subgroup = h5group.create_group(name) try: subgroup.attrs["__json__"] = json.dumps(value) except TypeError: subgroup.attrs["__json__"] = json.dumps(self._safe_dict_for_json(value)) for k, v in value.items(): self._write_item(subgroup, self._get_key(k), v) return h5group.attrs[name] = json.dumps(value, default=repr) def _read_group(self, h5group: h5py.Group) -> Dict[str, Any]: out: Dict[str, Any] = {} for k, v in h5group.attrs.items(): if k == "__json__": continue out[k] = self._decode_attribute(v) for key in h5group: obj = h5group[key] if isinstance(obj, h5py.Dataset): out[key] = self._decode_dataset(obj) continue if isinstance(obj, h5py.Group): if obj.attrs.get("__kind__") == "sequence": length = obj.attrs["__length__"] seq = [] for i in range(length): val = obj[str(i)][()] if isinstance(val, np.ndarray) and val.shape == (): val = val.tolist() seq.append(val) out[key] = tuple(seq) if obj.attrs["__sequence_type__"] == "tuple" else seq continue if "__json__" in obj.attrs: try: raw = obj.attrs["__json__"] if isinstance(raw, bytes): raw = raw.decode("utf-8") out[key] = json.loads(raw) continue except Exception: pass out[key] = self._read_group(obj) return out def _decode_attribute(self, val): if isinstance(val, bytes): try: val = val.decode("utf-8") except Exception: pass if isinstance(val, str): try: return json.loads(val) except Exception: return val return val def _decode_dataset(self, ds): val = ds[()] if isinstance(val, bytes): try: val = val.decode("utf-8") except Exception: pass if isinstance(val, np.ndarray) and val.shape == (): return val.tolist() return val @staticmethod def _get_key(key: str) -> str: return str(key).replace("/", "_").strip() @staticmethod def _safe_dict_for_json(d: Dict[str, Any]) -> Dict[str, Any]: out = {} for k, v in d.items(): try: json.dumps({k: v}) out[k] = v except TypeError: if isinstance(v, (np.integer, np.floating)): out[k] = v.item() else: out[k] = repr(v) return out