/usr/include/shark/Data/HDF5.h is in libshark-dev 3.0.1+ds1-2ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 | //===========================================================================
/*!
*
*
* \brief Support for importing data from HDF5 file
*
*
* \par
* The most important application of the methods provided in this
* file is the import of data from HDF5 files into Shark data
* containers.
*
*
*
*
* \author B. Li
* \date 2012
*
*
* \par Copyright 1995-2015 Shark Development Team
*
* <BR><HR>
* This file is part of Shark.
* <http://image.diku.dk/shark/>
*
* Shark is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Shark is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
*
*/
//===========================================================================
#ifndef SHARK_DATA_HDF5_H
#define SHARK_DATA_HDF5_H
#include "shark/Core/utility/ScopedHandle.h"
#include "shark/Data/Dataset.h"
#include <hdf5.h> // This must come before #include <hdf5_hl.h>
#include <hdf5_hl.h>
#include <boost/array.hpp>
#include <boost/foreach.hpp>
#include <boost/range/algorithm/fill.hpp>
#include <boost/range/algorithm/max_element.hpp>
#include <boost/smart_ptr/scoped_array.hpp>
#include <boost/type_traits.hpp>
namespace shark {
namespace detail {
/// Overload functions so that complier is able to automatically detect which function to call
/// @note
/// Basically there are two ways to add support for other data types:
/// (a) Use other corresponding API H5HTpublic.h if the type is supported(luckily)
/// (b) Use H5LTread_dataset() but need pass in the type_id which are listed at:
/// http://www.hdfgroup.org/HDF5/doc/RM/PredefDTypes.html
/// Need pay special attention to endian.
///@{
herr_t readHDF5Dataset( hid_t loc_id, const char *dset_name, int *buffer )
{
return H5LTread_dataset_int( loc_id, dset_name, buffer );
}
herr_t readHDF5Dataset( hid_t loc_id, const char *dset_name, long *buffer )
{
return H5LTread_dataset_long( loc_id, dset_name, buffer );
}
herr_t readHDF5Dataset( hid_t loc_id, const char *dset_name, float *buffer )
{
return H5LTread_dataset_float( loc_id, dset_name, buffer );
}
herr_t readHDF5Dataset( hid_t loc_id, const char *dset_name, double *buffer )
{
return H5LTread_dataset_double( loc_id, dset_name, buffer );
}
///@}
/// Check whether typeClass and typeSize are supported by current implementation
template<typename RawValueType>
bool isSupported(H5T_class_t typeClass, size_t typeSize)
{
if (H5T_FLOAT == typeClass && 8 == typeSize && boost::is_floating_point < RawValueType > ::value
&& sizeof(RawValueType) == 8) {
// double
return true;
} else if (H5T_FLOAT == typeClass && 4 == typeSize && boost::is_floating_point < RawValueType > ::value
&& sizeof(RawValueType) == 4) {
// float
return true;
} else if (H5T_INTEGER == typeClass && 4 == typeSize && boost::is_integral < RawValueType > ::value
&& sizeof(RawValueType) == 4) {
// int
return true;
} else if (H5T_INTEGER == typeClass && 8 == typeSize && boost::is_integral < RawValueType > ::value
&& sizeof(RawValueType) == 8) {
// long
return true;
}
return false;
}
/// @brief Load a dataset in a HDF5 file into a matrix
///
/// @param data
/// in vector of vector format which should support assignment operations
/// @param fileName
/// The name of HDF5 file to be read from
/// @param dataSetName
/// the HDF5 dataset name to access in the HDF5 file
///
/// @tparam MatrixType
/// The type of data container which will accept read-in data and should be a 2-dimension matrix
template<typename MatrixType>
void loadIntoMatrix(MatrixType& data, const std::string& fileName, const std::string& dataSetName)
{
typedef typename MatrixType::value_type VectorType; // e.g., std::vector<double>
typedef typename VectorType::value_type RawValueType; // e.g., double
// Disable HDF5 diagnosis message which could be commented out in case of debugging HDF5 related issues
H5Eset_auto1(0, 0);
// 64 is big enough for HDF5, which supports no more than 32 dimensions presently
const size_t MAX_DIMENSIONS = 64u;
// Open the file, and then get dimension
hid_t open = H5Fopen(fileName.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
if(open < 0)
throw SHARKEXCEPTION((boost::format("[loadIntoMatrix] open file name: %1% (FAILED)") % fileName).str());
const ScopedHandle<hid_t> fileId(
open,
H5Fclose
);
boost::array<hsize_t, MAX_DIMENSIONS> dims;
dims.assign(0);
H5T_class_t typeClass;
size_t typeSize;
THROW_IF(
H5LTget_dataset_info(*fileId, dataSetName.c_str(), dims.c_array(), &typeClass, &typeSize) < 0,
(boost::format("[importHDF5] Get data set(%1%) info from file(%2%).") % dataSetName % fileName).str());
if (0 == dims[0])
return;
// Support 1 or 2 dimensions only at the moment
THROW_IF(
0 != dims[2],
(boost::format(
"[loadIntoMatrix][%1%][%2%] Support 1 or 2 dimensions, but this dataset has at least 3 dimensions.") % fileName % dataSetName).str());
const hsize_t dim0 = dims[0];
const hsize_t dim1 = (0 == dims[1]) ? 1 : dims[1]; // treat one dimension as two-dimension of N x 1
THROW_IF(
!detail::isSupported<RawValueType>(typeClass, typeSize),
(boost::format(
"[loadIntoMatrix] DataType doesn't match. HDF5 data type in dataset(%3%::%4%): %1%, size: %2%")
% typeClass
% typeSize
% fileName
% dataSetName).str());
// Read data into a buffer
const boost::scoped_array<RawValueType> dataBuffer(new RawValueType[dim0 * dim1]);
THROW_IF(detail::readHDF5Dataset(*fileId, dataSetName.c_str(), dataBuffer.get()) < 0, "[loadIntoMatrix] Read data set.");
// dims[0] = M, dims[1] = N, means each basic vector has M elements, and there are N of them.
for (size_t i = 0; i < dim1; ++i) {
VectorType sample(dim0);
for (size_t j = 0; j < dim0; ++j)
sample[j] = dataBuffer[i + j * dim1]; // elements in memory are in row-major order
data.push_back(sample);
}
}
/// @brief load a matrix from HDF5 file in compressed sparse column format
///
/// @param data the container which will hold the output matrix
/// @param fileName the name of HDF5 file
/// @param cscDatasetName dataset names for describing the CSC
template<typename MatrixType>
void loadHDF5Csc(MatrixType& data, const std::string& fileName, const std::vector<std::string>& cscDatasetName)
{
typedef typename MatrixType::value_type VectorType; // e.g., std::vector<double>
THROW_IF(
3 != cscDatasetName.size(),
"[importHDF5] Must provide 3 dataset names for importing Compressed Sparse Column format.");
std::vector<VectorType> valBuf;
std::vector<std::vector<boost::int32_t> > indicesBuf;
std::vector<std::vector<boost::int32_t> > indexPtrBuf;
detail::loadIntoMatrix(valBuf, fileName, cscDatasetName[0]);
detail::loadIntoMatrix(indicesBuf, fileName, cscDatasetName[1]);
detail::loadIntoMatrix(indexPtrBuf, fileName, cscDatasetName[2]);
THROW_IF(1u != valBuf.size() || 1u != indicesBuf.size() || 1u != indexPtrBuf.size(), "All datasets should be of one dimension.");
const VectorType& val = valBuf.front();
const std::vector<boost::int32_t>& indices = indicesBuf.front(); // WARNING: Not all indices are of int32 type
const std::vector<boost::int32_t>& indexPtr = indexPtrBuf.front();
THROW_IF(val.size() != indices.size(), "Size of value and indices should be the same.");
THROW_IF(indexPtr.back() != (boost::int32_t)val.size(), "Last element of index pointer should equal to size of value.");
// Figure out dimensions of dense matrix
const boost::uint32_t columnCount = indexPtr.size() - 1; // the last one is place holder
const boost::uint32_t rowCount = *boost::max_element(indices) + 1; // max index plus 1
data.resize(columnCount);
boost::fill(data, VectorType(rowCount, 0)); // pre-fill zero
size_t valIdx = 0;
for (size_t i = 0; i < columnCount; ++i) {
for (boost::int32_t j = indexPtr[i]; j < indexPtr[i + 1]; ++j) {
data[i][indices[j]] = val[valIdx++];
}
}
}
/// @brief Construct labeled data from passed in data and label
///
/// @param labeledData
/// Container storing the loaded data
/// @param dataBuffer
/// The data container will hold
/// @param labelBuffer
/// The label for data inside @a dataBuffer
template<typename VectorType, typename LabelType>
void constructLabeledData(
LabeledData<VectorType, LabelType>& labeledData,
const std::vector<VectorType>& dataBuffer,
const std::vector<std::vector<LabelType> >& labelBuffer)
{
THROW_IF(
1 != labelBuffer.size(),
(boost::format("[importHDF5] Expect only one label vector, but get %1%.") % labelBuffer.size()).str());
THROW_IF(
dataBuffer.size() != labelBuffer.front().size(),
boost::format("[importHDF5] Dimensions of data and label don't match.").str());
labeledData = createLabeledDataFromRange(dataBuffer, labelBuffer.front());
}
} // namespace details
/// @brief Import data from a HDF5 file.
///
/// @param data Container storing the loaded data
/// @param fileName The name of HDF5 file to be read from
/// @param datasetName the HDF5 dataset name to access in the HDF5 file
///
/// @tparam VectorType Type of object stored in Shark data container
template<typename VectorType>
void importHDF5(
Data<VectorType>& data,
const std::string& fileName,
const std::string& datasetName)
{
std::vector<VectorType> readinBuffer;
detail::loadIntoMatrix(readinBuffer, fileName, datasetName);
data = createDataFromRange(readinBuffer);
}
/// @brief Import data to a LabeledData object from a HDF5 file.
///
/// @param labeledData
/// Container storing the loaded data
/// @param fileName
/// The name of HDF5 file to be read from
/// @param data
/// the HDF5 dataset name for data
/// @param label
/// the HDF5 dataset name for label
///
/// @tparam VectorType
/// Type of object stored in Shark data container
/// @tparam LableType
/// Type of label
template<typename VectorType, typename LabelType>
void importHDF5(
LabeledData<VectorType, LabelType>& labeledData,
const std::string& fileName,
const std::string& data,
const std::string& label)
{
std::vector<VectorType> readinData;
std::vector < std::vector<LabelType> > readinLabel;
detail::loadIntoMatrix(readinData, fileName, data);
detail::loadIntoMatrix(readinLabel, fileName, label);
detail::constructLabeledData(labeledData, readinData, readinLabel);
}
/// @brief Import data from HDF5 dataset of compressed sparse column format.
///
/// @param data Container storing the loaded data
/// @param fileName The name of HDF5 file to be read from
/// @param cscDatasetName
/// the CSC dataset names used to construct a matrix
///
/// @tparam VectorType Type of object stored in Shark data container
template<typename VectorType>
void importHDF5(
Data<VectorType>& data,
const std::string& fileName,
const std::vector<std::string>& cscDatasetName)
{
std::vector<VectorType> readinBuffer;
detail::loadHDF5Csc(readinBuffer, fileName, cscDatasetName);
data = createDataFromRange(readinBuffer);
}
/// @brief Import data from HDF5 dataset of compressed sparse column format.
///
/// @param labeledData
/// Container storing the loaded data
/// @param fileName
/// The name of HDF5 file to be read from
/// @param cscDatasetName
/// the CSC dataset names used to construct a matrix
/// @param label
/// the HDF5 dataset name for label
///
/// @tparam VectorType
/// Type of object stored in Shark data container
/// @tparam LabelType
/// Type of label
template<typename VectorType, typename LabelType>
void importHDF5(
LabeledData<VectorType, LabelType>& labeledData,
const std::string& fileName,
const std::vector<std::string>& cscDatasetName,
const std::string& label)
{
std::vector<VectorType> readinData;
std::vector < std::vector<LabelType> > readinLabel;
detail::loadHDF5Csc(readinData, fileName, cscDatasetName);
detail::loadIntoMatrix(readinLabel, fileName, label);
detail::constructLabeledData(labeledData, readinData, readinLabel);
}
} // namespace shark {
#endif // SHARK_DATA_HDF5_H
|