/usr/include/caffe/parallel.hpp is in libcaffe-cpu-dev 1.0.0~rc4-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 | #ifndef CAFFE_PARALLEL_HPP_
#define CAFFE_PARALLEL_HPP_
#ifdef USE_NCCL
#include <boost/thread.hpp>
#include <string>
#include <vector>
#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/internal_thread.hpp"
#include "caffe/layer.hpp"
#include "caffe/proto/caffe.pb.h"
#include "caffe/solver.hpp"
#include "caffe/syncedmem.hpp"
#include "caffe/util/blocking_queue.hpp"
#include "caffe/util/nccl.hpp"
namespace caffe {
// Represents a net parameters. Once a net is created, its parameter buffers can
// be replaced by ones from Params, to allow parallelization. Params ensures
// parameters are allocated in one consecutive array.
template<typename Dtype>
class Params {
public:
explicit Params(shared_ptr<Solver<Dtype> > root_solver);
virtual ~Params() {
}
inline size_t size() const {
return size_;
}
inline Dtype* data() const {
return data_;
}
inline Dtype* diff() const {
return diff_;
}
protected:
const size_t size_; // Size of buffers
Dtype* data_; // Network parameters
Dtype* diff_; // Gradient
DISABLE_COPY_AND_ASSIGN(Params);
};
// Params stored in GPU memory.
template<typename Dtype>
class GPUParams : public Params<Dtype> {
public:
GPUParams(shared_ptr<Solver<Dtype> > root_solver, int device);
virtual ~GPUParams();
void Configure(Solver<Dtype>* solver) const;
protected:
using Params<Dtype>::size_;
using Params<Dtype>::data_;
using Params<Dtype>::diff_;
};
template<typename Dtype>
class NCCL : public GPUParams<Dtype>,
public Solver<Dtype>::Callback,
public Net<Dtype>::Callback {
public:
/**
* Single process version.
*/
explicit NCCL(shared_ptr<Solver<Dtype> > solver);
/**
* In multi-process settings, first create a NCCL id (new_uid), then
* pass it to each process to create connected instances.
*/
NCCL(shared_ptr<Solver<Dtype> > solver, const string& uid);
~NCCL();
boost::barrier* barrier();
void set_barrier(boost::barrier* value);
/**
* In single process settings, create instances without uids and
* call this to connect them.
*/
static void InitSingleProcess(vector<NCCL<Dtype>*>* nccls);
static string new_uid();
/**
* Broadcast weights from rank 0 other solvers.
*/
void Broadcast();
/**
* Single process multi-GPU.
*/
void Run(const vector<int>& gpus, const char* restore);
protected:
void Init();
void on_start() {}
void run(int layer); // Net callback
void on_gradients_ready();
ncclComm_t comm_;
cudaStream_t stream_;
shared_ptr<Solver<Dtype> > solver_;
// Should not be necessary, https://github.com/NVIDIA/nccl/issues/37
boost::barrier* barrier_;
using Params<Dtype>::size_;
using Params<Dtype>::data_;
using Params<Dtype>::diff_;
};
} // namespace caffe
#endif // USE_NCCL
#endif // header
|