Note: this is documentation for an old release. View the latest documentation at docs.fenicsproject.org/dolfinx/v0.9.0/cpp/doxygen/d9/d1f/HDF5Interface_8h_source.html
DOLFINx 0.6.0
DOLFINx C++ interface
Loading...
Searching...
No Matches
HDF5Interface.h
1// Copyright (C) 2012 Chris N. Richardson and Garth N. Wells
2//
3// This file is part of DOLFINx (https://www.fenicsproject.org)
4//
5// SPDX-License-Identifier: LGPL-3.0-or-later
6
7#pragma once
8
9#include <array>
10#include <cassert>
11#include <chrono>
12#include <cstdint>
13#include <dolfinx/common/log.h>
14#include <filesystem>
15#include <hdf5.h>
16#include <mpi.h>
17#include <numeric>
18#include <string>
19#include <vector>
20
21namespace dolfinx::io
22{
23
25
27{
28#define HDF5_FAIL -1
29public:
35 static hid_t open_file(MPI_Comm comm, const std::filesystem::path& filename,
36 const std::string& mode, const bool use_mpi_io);
37
40 static void close_file(const hid_t handle);
41
44 static void flush_file(const hid_t handle);
45
49 static std::filesystem::path get_filename(hid_t handle);
50
61 template <typename T>
62 static void write_dataset(const hid_t handle, const std::string& dataset_path,
63 const T* data,
64 const std::array<std::int64_t, 2>& range,
65 const std::vector<std::int64_t>& global_size,
66 bool use_mpi_io, bool use_chunking);
67
76 template <typename T>
77 static std::vector<T> read_dataset(const hid_t handle,
78 const std::string& dataset_path,
79 const std::array<std::int64_t, 2>& range);
80
85 static bool has_dataset(const hid_t handle, const std::string& dataset_path);
86
91 static std::vector<std::int64_t>
92 get_dataset_shape(const hid_t handle, const std::string& dataset_path);
93
100 static void set_mpi_atomicity(const hid_t handle, const bool atomic);
101
106 static bool get_mpi_atomicity(const hid_t handle);
107
108private:
112 static void add_group(const hid_t handle, const std::string& dataset_path);
113
114 // Return HDF5 data type
115 template <typename T>
116 static hid_t hdf5_type()
117 {
118 throw std::runtime_error("Cannot get HDF5 primitive data type. "
119 "No specialised function for this data type");
120 }
121};
123//---------------------------------------------------------------------------
124template <>
125inline hid_t HDF5Interface::hdf5_type<float>()
126{
127 return H5T_NATIVE_FLOAT;
128}
129//---------------------------------------------------------------------------
130template <>
131inline hid_t HDF5Interface::hdf5_type<double>()
132{
133 return H5T_NATIVE_DOUBLE;
134}
135//---------------------------------------------------------------------------
136template <>
137inline hid_t HDF5Interface::hdf5_type<int>()
138{
139 return H5T_NATIVE_INT;
140}
141//---------------------------------------------------------------------------
142template <>
143inline hid_t HDF5Interface::hdf5_type<std::int64_t>()
144{
145 return H5T_NATIVE_INT64;
146}
147//---------------------------------------------------------------------------
148template <>
149inline hid_t HDF5Interface::hdf5_type<std::size_t>()
150{
151 if (sizeof(std::size_t) == sizeof(unsigned long))
152 return H5T_NATIVE_ULONG;
153 else if (sizeof(std::size_t) == sizeof(unsigned int))
154 return H5T_NATIVE_UINT;
155
156 throw std::runtime_error("Cannot determine size of std::size_t. "
157 "std::size_t is not the same size as long or int");
158}
159//---------------------------------------------------------------------------
160template <typename T>
162 const hid_t file_handle, const std::string& dataset_path, const T* data,
163 const std::array<std::int64_t, 2>& range,
164 const std::vector<int64_t>& global_size, bool use_mpi_io, bool use_chunking)
165{
166 // Data rank
167 const int rank = global_size.size();
168 assert(rank != 0);
169 if (rank > 2)
170 {
171 throw std::runtime_error("Cannot write dataset to HDF5 file"
172 "Only rank 1 and rank 2 dataset are supported");
173 }
174
175 // Get HDF5 data type
176 const hid_t h5type = hdf5_type<T>();
177
178 // Hyperslab selection parameters
179 std::vector<hsize_t> count(global_size.begin(), global_size.end());
180 count[0] = range[1] - range[0];
181
182 // Data offsets
183 std::vector<hsize_t> offset(rank, 0);
184 offset[0] = range[0];
185
186 // Dataset dimensions
187 const std::vector<hsize_t> dimsf(global_size.begin(), global_size.end());
188
189 // Create a global data space
190 const hid_t filespace0 = H5Screate_simple(rank, dimsf.data(), nullptr);
191 if (filespace0 == HDF5_FAIL)
192 throw std::runtime_error("Failed to create HDF5 data space");
193
194 // Set chunking parameters
195 hid_t chunking_properties;
196 if (use_chunking)
197 {
198 // Set chunk size and limit to 1kB min/1MB max
199 hsize_t chunk_size = dimsf[0] / 2;
200 if (chunk_size > 1048576)
201 chunk_size = 1048576;
202 if (chunk_size < 1024)
203 chunk_size = 1024;
204
205 hsize_t chunk_dims[2] = {chunk_size, dimsf[1]};
206 chunking_properties = H5Pcreate(H5P_DATASET_CREATE);
207 H5Pset_chunk(chunking_properties, rank, chunk_dims);
208 }
209 else
210 chunking_properties = H5P_DEFAULT;
211
212 // Check that group exists and recursively create if required
213 const std::string group_name(dataset_path, 0, dataset_path.rfind('/'));
214 add_group(file_handle, group_name);
215
216 // Create global dataset (using dataset_path)
217 const hid_t dset_id
218 = H5Dcreate2(file_handle, dataset_path.c_str(), h5type, filespace0,
219 H5P_DEFAULT, chunking_properties, H5P_DEFAULT);
220 if (dset_id == HDF5_FAIL)
221 throw std::runtime_error("Failed to create HDF5 global dataset.");
222
223 // Generic status report
224 herr_t status;
225
226 // Close global data space
227 status = H5Sclose(filespace0);
228 if (status == HDF5_FAIL)
229 throw std::runtime_error("Failed to close HDF5 global data space.");
230
231 // Create a local data space
232 const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
233 if (memspace == HDF5_FAIL)
234 throw std::runtime_error("Failed to create HDF5 local data space.");
235
236 // Create a file dataspace within the global space - a hyperslab
237 const hid_t filespace1 = H5Dget_space(dset_id);
238 status = H5Sselect_hyperslab(filespace1, H5S_SELECT_SET, offset.data(),
239 nullptr, count.data(), nullptr);
240 if (status == HDF5_FAIL)
241 throw std::runtime_error("Failed to create HDF5 dataspace.");
242
243 // Set parallel access
244 const hid_t plist_id = H5Pcreate(H5P_DATASET_XFER);
245 if (use_mpi_io)
246 {
247#ifdef H5_HAVE_PARALLEL
248 status = H5Pset_dxpl_mpio(plist_id, H5FD_MPIO_COLLECTIVE);
249 if (status == HDF5_FAIL)
250 {
251 throw std::runtime_error(
252 "Failed to set HDF5 data transfer property list.");
253 }
254
255#else
256 throw std::runtime_error("HDF5 library has not been configured with MPI");
257#endif
258 }
259
260 // Write local dataset into selected hyperslab
261 status = H5Dwrite(dset_id, h5type, memspace, filespace1, plist_id, data);
262 if (status == HDF5_FAIL)
263 {
264 throw std::runtime_error(
265 "Failed to write HDF5 local dataset into hyperslab.");
266 }
267
268 if (use_chunking)
269 {
270 // Close chunking properties
271 status = H5Pclose(chunking_properties);
272 if (status == HDF5_FAIL)
273 throw std::runtime_error("Failed to close HDF5 chunking properties.");
274 }
275
276 // Close dataset collectively
277 status = H5Dclose(dset_id);
278 if (status == HDF5_FAIL)
279 throw std::runtime_error("Failed to close HDF5 dataset.");
280
281 // Close hyperslab
282 status = H5Sclose(filespace1);
283 if (status == HDF5_FAIL)
284 throw std::runtime_error("Failed to close HDF5 hyperslab.");
285
286 // Close local dataset
287 status = H5Sclose(memspace);
288 if (status == HDF5_FAIL)
289 throw std::runtime_error("Failed to close local HDF5 dataset.");
290
291 // Release file-access template
292 status = H5Pclose(plist_id);
293 if (status == HDF5_FAIL)
294 throw std::runtime_error("Failed to release HDF5 file-access template.");
295}
296//---------------------------------------------------------------------------
297template <typename T>
298inline std::vector<T>
299HDF5Interface::read_dataset(const hid_t file_handle,
300 const std::string& dataset_path,
301 const std::array<std::int64_t, 2>& range)
302{
303 auto timer_start = std::chrono::system_clock::now();
304
305 // Open the dataset
306 const hid_t dset_id
307 = H5Dopen2(file_handle, dataset_path.c_str(), H5P_DEFAULT);
308 if (dset_id == HDF5_FAIL)
309 throw std::runtime_error("Failed to open HDF5 global dataset.");
310
311 // Open dataspace
312 const hid_t dataspace = H5Dget_space(dset_id);
313 if (dataspace == HDF5_FAIL)
314 throw std::runtime_error("Failed to open HDF5 data space.");
315
316 // Get rank of data set
317 const int rank = H5Sget_simple_extent_ndims(dataspace);
318 assert(rank >= 0);
319 if (rank > 2)
320 LOG(WARNING) << "HDF5Interface::read_dataset untested for rank > 2.";
321
322 // Allocate data for shape
323 std::vector<hsize_t> shape(rank);
324
325 // Get size in each dimension
326 const int ndims = H5Sget_simple_extent_dims(dataspace, shape.data(), nullptr);
327 if (ndims != rank)
328 throw std::runtime_error("Failed to get dimensionality of dataspace");
329
330 // Hyperslab selection
331 std::vector<hsize_t> offset(rank, 0);
332 std::vector<hsize_t> count = shape;
333 if (range[0] != -1 and range[1] != -1)
334 {
335 offset[0] = range[0];
336 count[0] = range[1] - range[0];
337 }
338 else
339 offset[0] = 0;
340
341 // Select a block in the dataset beginning at offset[], with
342 // size=count[]
343 [[maybe_unused]] herr_t status = H5Sselect_hyperslab(
344 dataspace, H5S_SELECT_SET, offset.data(), nullptr, count.data(), nullptr);
345 if (status == HDF5_FAIL)
346 throw std::runtime_error("Failed to select HDF5 hyperslab.");
347
348 // Create a memory dataspace
349 const hid_t memspace = H5Screate_simple(rank, count.data(), nullptr);
350 if (memspace == HDF5_FAIL)
351 throw std::runtime_error("Failed to create HDF5 dataspace.");
352
353 // Create local data to read into
354 std::vector<T> data(
355 std::reduce(count.begin(), count.end(), 1, std::multiplies{}));
356
357 // Read data on each process
358 const hid_t h5type = hdf5_type<T>();
359 status
360 = H5Dread(dset_id, h5type, memspace, dataspace, H5P_DEFAULT, data.data());
361 if (status == HDF5_FAIL)
362 throw std::runtime_error("Failed to read HDF5 data.");
363
364 // Close dataspace
365 status = H5Sclose(dataspace);
366 if (status == HDF5_FAIL)
367 throw std::runtime_error("Failed to close HDF5 dataspace.");
368
369 // Close memspace
370 status = H5Sclose(memspace);
371 if (status == HDF5_FAIL)
372 throw std::runtime_error("Failed to close HDF5 memory space.");
373
374 // Close dataset
375 status = H5Dclose(dset_id);
376 if (status == HDF5_FAIL)
377 throw std::runtime_error("Failed to close HDF5 dataset.");
378
379 auto timer_end = std::chrono::system_clock::now();
380 std::chrono::duration<double> dt = (timer_end - timer_start);
381 double data_rate = data.size() * sizeof(T) / (1e6 * dt.count());
382
383 LOG(INFO) << "HDF5 Read data rate: " << data_rate << "MB/s";
384
385 return data;
386}
387//---------------------------------------------------------------------------
389} // namespace dolfinx::io
This class provides an interface to some HDF5 functionality.
Definition: HDF5Interface.h:27
static std::vector< T > read_dataset(const hid_t handle, const std::string &dataset_path, const std::array< std::int64_t, 2 > &range)
Read data from a HDF5 dataset "dataset_path" as defined by range blocks on each process.
static void close_file(const hid_t handle)
Close HDF5 file.
Definition: HDF5Interface.cpp:124
static bool has_dataset(const hid_t handle, const std::string &dataset_path)
Check for existence of dataset in HDF5 file.
Definition: HDF5Interface.cpp:153
static std::filesystem::path get_filename(hid_t handle)
Get filename.
Definition: HDF5Interface.cpp:136
static std::vector< std::int64_t > get_dataset_shape(const hid_t handle, const std::string &dataset_path)
Get dataset shape (size of each dimension)
Definition: HDF5Interface.cpp:203
static hid_t open_file(MPI_Comm comm, const std::filesystem::path &filename, const std::string &mode, const bool use_mpi_io)
Open HDF5 and return file descriptor.
Definition: HDF5Interface.cpp:58
static void write_dataset(const hid_t handle, const std::string &dataset_path, const T *data, const std::array< std::int64_t, 2 > &range, const std::vector< std::int64_t > &global_size, bool use_mpi_io, bool use_chunking)
Write data to existing HDF file as defined by range blocks on each process.
static void flush_file(const hid_t handle)
Flush data to file to improve data integrity after interruption.
Definition: HDF5Interface.cpp:130
static bool get_mpi_atomicity(const hid_t handle)
Get MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-GetMpiAtomicity and ...
Definition: HDF5Interface.cpp:244
static void set_mpi_atomicity(const hid_t handle, const bool atomic)
Set MPI atomicity. See https://support.hdfgroup.org/HDF5/doc/RM/RM_H5F.html#File-SetMpiAtomicity and ...
Definition: HDF5Interface.cpp:236
int rank(MPI_Comm comm)
Return process rank for the communicator.
Definition: MPI.cpp:63
Support for file IO.
Definition: ADIOS2Writers.h:39