Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/dolfinx/v0.9.0/cpp/doxygen/d9/d1f/HDF5Interface_8h_source.html
DOLFINx  0.4.1
DOLFINx C++ interface
HDF5Interface.h
1 // Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2 //
3 // This file is part of DOLFINx (https://www.fenicsproject.org)
4 //
5 // SPDX-License-Identifier: LGPL-3.0-or-later
6 
7 #pragma once
8 
9 #include <array>
10 #include <cassert>
11 #include <chrono>
12 #include <cstdint>
13 #include <dolfinx/common/log.h>
14 #include <filesystem>
15 #include <hdf5.h>
16 #include <mpi.h>
17 #include <string>
18 #include <vector>
19 
20 namespace dolfinx::io
21 {
22 
24 
26 {
27 #define HDF5_FAIL -1
28 public:
34  static hid_t open_file(MPI_Comm comm, const std::filesystem::path& filename,
35  const std::string& mode, const bool use_mpi_io);
36 
39  static void close_file(const hid_t handle);
40 
43  static void flush_file(const hid_t handle);
44 
48  static std::filesystem::path get_filename(hid_t handle);
49 
60  template <typename T>
61  static void write_dataset(const hid_t handle, const std::string& dataset_path,
62  const T* data,
63  const std::array<std::int64_t, 2>& range,
64  const std::vector<std::int64_t>& global_size,
65  bool use_mpi_io, bool use_chunking);
66 
75  template <typename T>
76  static std::vector<T> read_dataset(const hid_t handle,
77  const std::string& dataset_path,
78  const std::array<std::int64_t, 2>& range);
79 
84  static bool has_dataset(const hid_t handle, const std::string& dataset_path);
85 
90  static std::vector<std::int64_t>
91  get_dataset_shape(const hid_t handle, const std::string& dataset_path);
92 
99  static void set_mpi_atomicity(const hid_t handle, const bool atomic);
100 
105  static bool get_mpi_atomicity(const hid_t handle);
106 
107 private:
111  static void add_group(const hid_t handle, const std::string& dataset_path);
112 
113  // Return HDF5 data type
114  template <typename T>
115  static hid_t hdf5_type()
116  {
117  throw std::runtime_error("Cannot get HDF5 primitive data type. "
118  "No specialised function for this data type");
119  }
120 };
122 //---------------------------------------------------------------------------
123 template <>
124 inline hid_t HDF5Interface::hdf5_type<float>()
125 {
126  return H5T_NATIVE_FLOAT;
127 }
128 //---------------------------------------------------------------------------
129 template <>
130 inline hid_t HDF5Interface::hdf5_type<double>()
131 {
132  return H5T_NATIVE_DOUBLE;
133 }
134 //---------------------------------------------------------------------------
135 template <>
136 inline hid_t HDF5Interface::hdf5_type<int>()
137 {
138  return H5T_NATIVE_INT;
139 }
140 //---------------------------------------------------------------------------
141 template <>
142 inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
143 {
144  return H5T_NATIVE_INT64;
145 }
146 //---------------------------------------------------------------------------
147 template <>
148 inline hid_t HDF5Interface::hdf5_type<std::size_t>()
149 {
150  if (sizeof(std::size_t) == sizeof(unsigned long))
151  return H5T_NATIVE_ULONG;
152  else if (sizeof(std::size_t) == sizeof(unsigned int))
153  return H5T_NATIVE_UINT;
154 
155  throw std::runtime_error("Cannot determine size of std::size_t. "
156  "std::size_t is not the same size as long or int");
157 }
158 //---------------------------------------------------------------------------
159 template <typename T>
160 inline void HDF5Interface::write_dataset(
161  const hid_t file_handle, const std::string& dataset_path, const T* data,
162  const std::array<std::int64_t, 2>& range,
163  const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
164 {
165  // Data rank
166  const int rank = global_size.size();
167  assert(rank != 0);
168  if (rank > 2)
169  {
170  throw std::runtime_error("Cannot write dataset to HDF5 file"
171  "Only rank 1 and rank 2 dataset are supported");
172  }
173 
174  // Get HDF5 data type
175  const hid_t h5type = hdf5_type<T>();
176 
177  // Hyperslab selection parameters
178  std::vector<hsize_t> count(global_size.begin(), global_size.end());
179  count[0] = range[1] - range[0];
180 
181  // Data offsets
182  std::vector<hsize_t> offset(rank, 0);
183  offset[0] = range[0];
184 
185  // Dataset dimensions
186  const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
187 
188  // Create a global data space
189  const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
190  if (filespace0 == HDF5_FAIL)
191  throw std::runtime_error("Failed to create HDF5 data space");
192 
193  // Set chunking parameters
194  hid_t chunking_properties;
195  if (use_chunking)
196  {
197  // Set chunk size and limit to 1kB min/1MB max
198  hsize_t chunk_size = dimsf[0] / 2;
199  if (chunk_size > 1048576)
200  chunk_size = 1048576;
201  if (chunk_size < 1024)
202  chunk_size = 1024;
203 
204  hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
205  chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
206  H5Pset_chunk(chunking_properties, rank, chunk_dims);
207  }
208  else
209  chunking_properties = H5P_DEFAULT;
210 
211  // Check that group exists and recursively create if required
212  const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
213  add_group(file_handle, group_name);
214 
215  // Create global dataset (using dataset_path)
216  const hid_t dset_id
217  = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
218  H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
219  if (dset_id == HDF5_FAIL)
220  throw std::runtime_error("Failed to create HDF5 global dataset.");
221 
222  // Generic status report
223  herr_t status;
224 
225  // Close global data space
226  status = H5Sclose(filespace0);
227  if (status == HDF5_FAIL)
228  throw std::runtime_error("Failed to close HDF5 global data space.");
229 
230  // Create a local data space
231  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
232  if (memspace == HDF5_FAIL)
233  throw std::runtime_error("Failed to create HDF5 local data space.");
234 
235  // Create a file dataspace within the global space - a hyperslab
236  const hid_t filespace1 = H5Dget_space(dset_id);
237  status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
238  nullptr, count.data(), nullptr);
239  if (status == HDF5_FAIL)
240  throw std::runtime_error("Failed to create HDF5 dataspace.");
241 
242  // Set parallel access
243  const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
244  if (use_mpi_io)
245  {
246 #ifdef H5_HAVE_PARALLEL
247  status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
248  if (status == HDF5_FAIL)
249  {
250  throw std::runtime_error(
251  "Failed to set HDF5 data transfer property list.");
252  }
253 
254 #else
255  throw std::runtime_error("HDF5 library has not been configured with MPI");
256 #endif
257  }
258 
259  // Write local dataset into selected hyperslab
260  status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
261  if (status == HDF5_FAIL)
262  {
263  throw std::runtime_error(
264  "Failed to write HDF5 local dataset into hyperslab.");
265  }
266 
267  if (use_chunking)
268  {
269  // Close chunking properties
270  status = H5Pclose(chunking_properties);
271  if (status == HDF5_FAIL)
272  throw std::runtime_error("Failed to close HDF5 chunking properties.");
273  }
274 
275  // Close dataset collectively
276  status = H5Dclose(dset_id);
277  if (status == HDF5_FAIL)
278  throw std::runtime_error("Failed to close HDF5 dataset.");
279 
280  // Close hyperslab
281  status = H5Sclose(filespace1);
282  if (status == HDF5_FAIL)
283  throw std::runtime_error("Failed to close HDF5 hyperslab.");
284 
285  // Close local dataset
286  status = H5Sclose(memspace);
287  if (status == HDF5_FAIL)
288  throw std::runtime_error("Failed to close local HDF5 dataset.");
289 
290  // Release file-access template
291  status = H5Pclose(plist_id);
292  if (status == HDF5_FAIL)
293  throw std::runtime_error("Failed to release HDF5 file-access template.");
294 }
295 //---------------------------------------------------------------------------
296 template <typename T>
297 inline std::vector<T>
298 HDF5Interface::read_dataset(const hid_t file_handle,
299  const std::string& dataset_path,
300  const std::array<std::int64_t, 2>& range)
301 {
302  auto timer_start = std::chrono::system_clock::now();
303 
304  // Open the dataset
305  const hid_t dset_id
306  = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
307  if (dset_id == HDF5_FAIL)
308  throw std::runtime_error("Failed to open HDF5 global dataset.");
309 
310  // Open dataspace
311  const hid_t dataspace = H5Dget_space(dset_id);
312  if (dataspace == HDF5_FAIL)
313  throw std::runtime_error("Failed to open HDF5 data space.");
314 
315  // Get rank of data set
316  const int rank = H5Sget_simple_extent_ndims(dataspace);
317  assert(rank >= 0);
318  if (rank > 2)
319  LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
320 
321  // Allocate data for shape
322  std::vector<hsize_t> shape(rank);
323 
324  // Get size in each dimension
325  const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
326  if (ndims != rank)
327  throw std::runtime_error("Failed to get dimensionality of dataspace");
328 
329  // Hyperslab selection
330  std::vector<hsize_t> offset(rank, 0);
331  std::vector<hsize_t> count = shape;
332  if (range[0] != -1 and range[1] != -1)
333  {
334  offset[0] = range[0];
335  count[0] = range[1] - range[0];
336  }
337  else
338  offset[0] = 0;
339 
340  // Select a block in the dataset beginning at offset[], with
341  // size=count[]
342  [[maybe_unused]] herr_t status = H5Sselect_hyperslab(
343  dataspace, H5S_SELECT_SET, offset.data(), nullptr, count.data(), nullptr);
344  if (status == HDF5_FAIL)
345  throw std::runtime_error("Failed to select HDF5 hyperslab.");
346 
347  // Create a memory dataspace
348  const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
349  if (memspace == HDF5_FAIL)
350  throw std::runtime_error("Failed to create HDF5 dataspace.");
351 
352  // Create local data to read into
353  std::size_t data_size = 1;
354  for (std::size_t i = 0; i < count.size(); ++i)
355  data_size *= count[i];
356  std::vector<T> data(data_size);
357 
358  // Read data on each process
359  const hid_t h5type = hdf5_type<T>();
360  status
361  = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
362  if (status == HDF5_FAIL)
363  throw std::runtime_error("Failed to read HDF5 data.");
364 
365  // Close dataspace
366  status = H5Sclose(dataspace);
367  if (status == HDF5_FAIL)
368  throw std::runtime_error("Failed to close HDF5 dataspace.");
369 
370  // Close memspace
371  status = H5Sclose(memspace);
372  if (status == HDF5_FAIL)
373  throw std::runtime_error("Failed to close HDF5 memory space.");
374 
375  // Close dataset
376  status = H5Dclose(dset_id);
377  if (status == HDF5_FAIL)
378  throw std::runtime_error("Failed to close HDF5 dataset.");
379 
380  auto timer_end = std::chrono::system_clock::now();
381  std::chrono::duration<double> dt = (timer_end - timer_start);
382  double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
383 
384  LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
385 
386  return data;
387 }
388 //---------------------------------------------------------------------------
390 } // namespace dolfinx::io
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:26
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:124
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:153
static std::filesystem::path get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:136
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:203
static hid_t open_file(MPI_Comm comm, const std::filesystem::path &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:130
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:244
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:236
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:75
Support for file IO.
Definition: ADIOS2Writers.h:39