/usr/include/movit/effect_chain.h is in libmovit-dev 1.4.0-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 | #ifndef _MOVIT_EFFECT_CHAIN_H
#define _MOVIT_EFFECT_CHAIN_H 1
// An EffectChain is the largest basic entity in Movit; it contains everything
// needed to connects a series of effects, from inputs to outputs, and render
// them. Generally you set up your effect chain once and then call its render
// functions once per frame; setting one up can be relatively expensive,
// but rendering is fast.
//
// Threading considerations: EffectChain is “thread-compatible”; you can use
// different EffectChains in multiple threads at the same time (assuming the
// threads do not use the same OpenGL context, but this is a good idea anyway),
// but you may not use one EffectChain from multiple threads simultaneously.
// You _are_ allowed to use one EffectChain from multiple threads as long as
// you only use it from one at a time (possibly by doing your own locking),
// but if so, the threads' contexts need to be set up to share resources, since
// the EffectChain holds textures and other OpenGL objects that are tied to the
// context.
//
// Memory management (only relevant if you use multiple contexts):
// See corresponding comment in resource_pool.h. This holds even if you don't
// allocate your own ResourcePool, but let EffectChain hold its own.
#include <epoxy/gl.h>
#include <stdio.h>
#include <list>
#include <map>
#include <set>
#include <string>
#include <vector>
#include <Eigen/Core>
#include "effect.h"
#include "image_format.h"
#include "ycbcr.h"
namespace movit {
class Effect;
class Input;
struct Phase;
class ResourcePool;
// For internal use within Node.
enum AlphaType {
ALPHA_INVALID = -1,
ALPHA_BLANK,
ALPHA_PREMULTIPLIED,
ALPHA_POSTMULTIPLIED,
};
// Whether you want pre- or postmultiplied alpha in the output
// (see effect.h for a discussion of pre- versus postmultiplied alpha).
enum OutputAlphaFormat {
OUTPUT_ALPHA_FORMAT_PREMULTIPLIED,
OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED,
};
// RGBA output is nearly always packed; Y'CbCr, however, is often planar
// due to chroma subsampling. This enum controls how add_ycbcr_output()
// distributes the color channels between the fragment shader outputs.
// Obviously, anything except YCBCR_OUTPUT_INTERLEAVED will be meaningless
// unless you use render_to_fbo() and have an FBO with multiple render
// targets attached (the other outputs will be discarded).
enum YCbCrOutputSplitting {
// Only one output: Store Y'CbCr into the first three output channels,
// respectively, plus alpha. This is also called “chunked” or
// ”packed” mode.
YCBCR_OUTPUT_INTERLEAVED,
// Store Y' and alpha into the first output (in the red and alpha
// channels; effect to the others is undefined), and Cb and Cr into
// the first two channels of the second output. This is particularly
// useful if you want to end up in a format like NV12, where all the
// Y' samples come first and then Cb and Cr come interlevaed afterwards.
// You will still need to do the chroma subsampling yourself to actually
// get down to NV12, though.
YCBCR_OUTPUT_SPLIT_Y_AND_CBCR,
// Store Y' and alpha into the first output, Cb into the first channel
// of the second output and Cr into the first channel of the third output.
// (Effect on the other channels is undefined.) Essentially gives you
// 4:4:4 planar, or ”yuv444p”.
YCBCR_OUTPUT_PLANAR,
};
// Where (0,0) is taken to be in the output. If you want to render to an
// OpenGL screen, you should keep the default of bottom-left, as that is
// OpenGL's natural coordinate system. However, there are cases, such as if you
// render to an FBO and read the pixels back into some other system, where
// you'd want a top-left origin; if so, an additional flip step will be added
// at the very end (but done in a vertex shader, so it will have zero extra
// cost).
//
// Note that Movit's coordinate system in general consistently puts (0,0) in
// the top left for _input_, no matter what you set as output origin.
enum OutputOrigin {
OUTPUT_ORIGIN_BOTTOM_LEFT,
OUTPUT_ORIGIN_TOP_LEFT,
};
// Transformation to apply (if any) to pixel data in temporary buffers.
// See set_intermediate_format() below for more information.
enum FramebufferTransformation {
// The default; just store the value. This is what you usually want.
NO_FRAMEBUFFER_TRANSFORMATION,
// If the values are in linear light, store sqrt(x) to the framebuffer
// instead of x itself, of course undoing it with x² on read. Useful as
// a rough approximation to the sRGB curve. (If the values are not in
// linear light, just store them as-is.)
SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION,
};
// A node in the graph; basically an effect and some associated information.
class Node {
public:
Effect *effect;
bool disabled;
// Edges in the graph (forward and backward).
std::vector<Node *> outgoing_links;
std::vector<Node *> incoming_links;
// For unit tests only. Do not use from other code.
// Will contain an arbitrary choice if the node is in multiple phases.
Phase *containing_phase;
private:
// Logical size of the output of this effect, ie. the resolution
// you would get if you sampled it as a texture. If it is undefined
// (since the inputs differ in resolution), it will be 0x0.
// If both this and output_texture_{width,height} are set,
// they will be equal.
unsigned output_width, output_height;
// If the effect has is_single_texture(), or if the output went to RTT
// and that texture has been bound to a sampler, the sampler number
// will be stored here.
//
// TODO: Can an RTT texture be used as inputs to multiple effects
// within the same phase? If so, we have a problem with modifying
// sampler state here.
int bound_sampler_num;
// Used during the building of the effect chain.
Colorspace output_color_space;
GammaCurve output_gamma_curve;
AlphaType output_alpha_type;
bool needs_mipmaps; // Directly or indirectly.
// Set if this effect, and all effects consuming output from this node
// (in the same phase) have one_to_one_sampling() set.
bool one_to_one_sampling;
friend class EffectChain;
};
// A rendering phase; a single GLSL program rendering a single quad.
struct Phase {
Node *output_node;
GLuint glsl_program_num; // Owned by the resource_pool.
// Position and texcoord attribute indexes, although it doesn't matter
// which is which, because they contain the same data.
std::set<GLint> attribute_indexes;
bool input_needs_mipmaps;
// Inputs are only inputs from other phases (ie., those that come from RTT);
// input textures are counted as part of <effects>.
std::vector<Phase *> inputs;
// Bound sampler numbers for each input. Redundant in a sense
// (it always corresponds to the index), but we need somewhere
// to hold the value for the uniform.
std::vector<int> input_samplers;
std::vector<Node *> effects; // In order.
unsigned output_width, output_height, virtual_output_width, virtual_output_height;
// Identifier used to create unique variables in GLSL.
// Unique per-phase to increase cacheability of compiled shaders.
std::map<Node *, std::string> effect_ids;
// Uniforms for this phase; combined from all the effects.
std::vector<Uniform<int> > uniforms_sampler2d;
std::vector<Uniform<bool> > uniforms_bool;
std::vector<Uniform<int> > uniforms_int;
std::vector<Uniform<float> > uniforms_float;
std::vector<Uniform<float> > uniforms_vec2;
std::vector<Uniform<float> > uniforms_vec3;
std::vector<Uniform<float> > uniforms_vec4;
std::vector<Uniform<Eigen::Matrix3d> > uniforms_mat3;
// For measurement of GPU time used.
std::list<GLuint> timer_query_objects_running;
std::list<GLuint> timer_query_objects_free;
uint64_t time_elapsed_ns;
uint64_t num_measured_iterations;
};
class EffectChain {
public:
// Aspect: e.g. 16.0f, 9.0f for 16:9.
// resource_pool is a pointer to a ResourcePool with which to share shaders
// and other resources (see resource_pool.h). If NULL (the default),
// will create its own that is not shared with anything else. Does not take
// ownership of the passed-in ResourcePool, but will naturally take ownership
// of its own internal one if created.
EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool = NULL);
~EffectChain();
// User API:
// input, effects, output, finalize need to come in that specific order.
// EffectChain takes ownership of the given input.
// input is returned back for convenience.
Input *add_input(Input *input);
// EffectChain takes ownership of the given effect.
// effect is returned back for convenience.
Effect *add_effect(Effect *effect) {
return add_effect(effect, last_added_effect());
}
Effect *add_effect(Effect *effect, Effect *input) {
std::vector<Effect *> inputs;
inputs.push_back(input);
return add_effect(effect, inputs);
}
Effect *add_effect(Effect *effect, Effect *input1, Effect *input2) {
std::vector<Effect *> inputs;
inputs.push_back(input1);
inputs.push_back(input2);
return add_effect(effect, inputs);
}
Effect *add_effect(Effect *effect, Effect *input1, Effect *input2, Effect *input3) {
std::vector<Effect *> inputs;
inputs.push_back(input1);
inputs.push_back(input2);
inputs.push_back(input3);
return add_effect(effect, inputs);
}
Effect *add_effect(Effect *effect, Effect *input1, Effect *input2, Effect *input3, Effect *input4) {
std::vector<Effect *> inputs;
inputs.push_back(input1);
inputs.push_back(input2);
inputs.push_back(input3);
inputs.push_back(input4);
return add_effect(effect, inputs);
}
Effect *add_effect(Effect *effect, Effect *input1, Effect *input2, Effect *input3, Effect *input4, Effect *input5) {
std::vector<Effect *> inputs;
inputs.push_back(input1);
inputs.push_back(input2);
inputs.push_back(input3);
inputs.push_back(input4);
inputs.push_back(input5);
return add_effect(effect, inputs);
}
Effect *add_effect(Effect *effect, const std::vector<Effect *> &inputs);
// Adds an RGBA output. Note that you can have at most one RGBA output and one
// Y'CbCr output (see below for details).
void add_output(const ImageFormat &format, OutputAlphaFormat alpha_format);
// Adds an YCbCr output. Note that you can only have one output.
// Currently, only chunked packed output is supported, and only 4:4:4
// (so chroma_subsampling_x and chroma_subsampling_y must both be 1).
//
// If you have both RGBA and Y'CbCr output, the RGBA output will come
// in the last draw buffer. Also, <format> and <alpha_format> must be
// identical between the two.
void add_ycbcr_output(const ImageFormat &format, OutputAlphaFormat alpha_format,
const YCbCrFormat &ycbcr_format,
YCbCrOutputSplitting output_splitting = YCBCR_OUTPUT_INTERLEAVED);
// Set number of output bits, to scale the dither.
// 8 is the right value for most outputs.
// The default, 0, is a special value that means no dither.
void set_dither_bits(unsigned num_bits)
{
this->num_dither_bits = num_bits;
}
// Set where (0,0) is taken to be in the output. The default is
// OUTPUT_ORIGIN_BOTTOM_LEFT, which is usually what you want
// (see OutputOrigin above for more details).
void set_output_origin(OutputOrigin output_origin)
{
this->output_origin = output_origin;
}
// Set intermediate format for framebuffers used when we need to bounce
// to a temporary texture. The default, GL_RGBA16F, is good for most uses;
// it is precise, has good range, and is relatively efficient. However,
// if you need even more speed and your chain can do with some loss of
// accuracy, you can change the format here (before calling finalize).
// Calculations between bounce buffers are still in 32-bit floating-point
// no matter what you specify.
//
// Of special interest is GL_SRGB8_ALPHA8, which stores sRGB-encoded RGB
// and linear alpha; this is half the memory bandwidth of GL_RGBA16F,
// while retaining reasonable precision for typical image data. It will,
// however, cause some gamut clipping if your colorspace is far from sRGB,
// as it cannot represent values outside [0,1]. NOTE: If you construct
// a chain where you end up bouncing pixels in non-linear light
// (gamma different from GAMMA_LINEAR), this will be the wrong thing.
// However, it's hard to see how this could happen in a non-contrived
// chain; few effects ever need texture bounce or resizing without also
// combining multiple pixels, which really needs linear light and thus
// triggers a conversion before the bounce.
//
// If you don't need alpha (or can do with very little of it), GL_RGB10_A2
// is even better, as it has two more bits for each color component. There
// is no GL_SRGB10, unfortunately, so on its own, it is somewhat worse than
// GL_SRGB8, but you can set <transformation> to SQUARE_ROOT_FRAMEBUFFER_TRANSFORMATION,
// and sqrt(x) will be stored instead of x. This is a rough approximation to
// the sRGB curve, and reduces maximum error (in sRGB distance) by almost an
// order of magnitude, well below what you can get from 8-bit true sRGB.
// (Note that this strategy avoids the problem with bounced non-linear data
// above, since the square root is turned off in that case.) However, texture
// filtering will happen on the transformed values, so if you have heavy
// downscaling or the likes (e.g. mipmaps), you could get subtly bad results.
// You'll need to see which of the two that works the best for you in practice.
void set_intermediate_format(
GLenum intermediate_format,
FramebufferTransformation transformation = NO_FRAMEBUFFER_TRANSFORMATION)
{
this->intermediate_format = intermediate_format;
this->intermediate_transformation = transformation;
}
void finalize();
// Measure the GPU time used for each actual phase during rendering.
// Note that this is only available if GL_ARB_timer_query
// (or, equivalently, OpenGL 3.3) is available. Also note that measurement
// will incur a performance cost, as we wait for the measurements to
// complete at the end of rendering.
void enable_phase_timing(bool enable);
void reset_phase_timing();
void print_phase_timing();
void render_to_screen()
{
render_to_fbo(0, 0, 0);
}
// Render the effect chain to the given FBO. If width=height=0, keeps
// the current viewport.
void render_to_fbo(GLuint fbo, unsigned width, unsigned height);
Effect *last_added_effect() {
if (nodes.empty()) {
return NULL;
} else {
return nodes.back()->effect;
}
}
// API for manipulating the graph directly. Intended to be used from
// effects and by EffectChain itself.
//
// Note that for nodes with multiple inputs, the order of calls to
// connect_nodes() will matter.
Node *add_node(Effect *effect);
void connect_nodes(Node *sender, Node *receiver);
void replace_receiver(Node *old_receiver, Node *new_receiver);
void replace_sender(Node *new_sender, Node *receiver);
void insert_node_between(Node *sender, Node *middle, Node *receiver);
Node *find_node_for_effect(Effect *effect) { return node_map[effect]; }
// Get the OpenGL sampler (GL_TEXTURE0, GL_TEXTURE1, etc.) for the
// input of the given node, so that one can modify the sampler state
// directly. Only valid to call during set_gl_state().
//
// Also, for this to be allowed, <node>'s effect must have
// needs_texture_bounce() set, so that it samples directly from a
// single-sampler input, or from an RTT texture.
GLenum get_input_sampler(Node *node, unsigned input_num) const;
// Whether input <input_num> of <node> corresponds to a single sampler
// (see get_input_sampler()). Normally, you should not need to call this;
// however, if the input Effect has set override_texture_bounce(),
// this will return false, and you could be flexible and check it first
// if you want.
GLenum has_input_sampler(Node *node, unsigned input_num) const;
// Get the current resource pool assigned to this EffectChain.
// Primarily to let effects allocate textures as needed.
// Any resources you get from the pool must be returned to the pool
// no later than in the Effect's destructor.
ResourcePool *get_resource_pool() { return resource_pool; }
private:
// Make sure the output rectangle is at least large enough to hold
// the given input rectangle in both dimensions, and is of the
// current aspect ratio (aspect_nom/aspect_denom).
void size_rectangle_to_fit(unsigned width, unsigned height, unsigned *output_width, unsigned *output_height);
// Compute the input sizes for all inputs for all effects in a given phase,
// and inform the effects about the results.
void inform_input_sizes(Phase *phase);
// Determine the preferred output size of a given phase.
// Requires that all input phases (if any) already have output sizes set.
void find_output_size(Phase *phase);
// Find all inputs eventually feeding into this effect that have
// output gamma different from GAMMA_LINEAR.
void find_all_nonlinear_inputs(Node *effect, std::vector<Node *> *nonlinear_inputs);
// Create a GLSL program computing the effects for this phase in order.
void compile_glsl_program(Phase *phase);
// Create all GLSL programs needed to compute the given effect, and all outputs
// that depend on it (whenever possible). Returns the phase that has <output>
// as the last effect. Also pushes all phases in order onto <phases>.
Phase *construct_phase(Node *output, std::map<Node *, Phase *> *completed_effects);
// Execute one phase, ie. set up all inputs, effects and outputs, and render the quad.
void execute_phase(Phase *phase, bool last_phase,
std::set<GLint> *bound__attribute_indices,
std::map<Phase *, GLuint> *output_textures,
std::set<Phase *> *generated_mipmaps);
// Set up uniforms for one phase. The program must already be bound.
void setup_uniforms(Phase *phase);
// Set up the given sampler number for sampling from an RTT texture.
void setup_rtt_sampler(int sampler_num, bool use_mipmaps);
// Output the current graph to the given file in a Graphviz-compatible format;
// only useful for debugging.
void output_dot(const char *filename);
std::vector<std::string> get_labels_for_edge(const Node *from, const Node *to);
void output_dot_edge(FILE *fp,
const std::string &from_node_id,
const std::string &to_node_id,
const std::vector<std::string> &labels);
// Some of the graph algorithms assume that the nodes array is sorted
// topologically (inputs are always before outputs), but some operations
// (like graph rewriting) can change that. This function restores that order.
void sort_all_nodes_topologically();
// Do the actual topological sort. <nodes> must be a connected, acyclic subgraph;
// links that go to nodes not in the set will be ignored.
std::vector<Node *> topological_sort(const std::vector<Node *> &nodes);
// Utility function used by topological_sort() to do a depth-first search.
// The reason why we store nodes left to visit instead of a more conventional
// list of nodes to visit is that we want to be able to limit ourselves to
// a subgraph instead of all nodes. The set thus serves a dual purpose.
void topological_sort_visit_node(Node *node, std::set<Node *> *nodes_left_to_visit, std::vector<Node *> *sorted_list);
// Used during finalize().
void find_color_spaces_for_inputs();
void propagate_alpha();
void propagate_gamma_and_color_space();
Node *find_output_node();
bool node_needs_colorspace_fix(Node *node);
void fix_internal_color_spaces();
void fix_output_color_space();
bool node_needs_alpha_fix(Node *node);
void fix_internal_alpha(unsigned step);
void fix_output_alpha();
bool node_needs_gamma_fix(Node *node);
void fix_internal_gamma_by_asking_inputs(unsigned step);
void fix_internal_gamma_by_inserting_nodes(unsigned step);
void fix_output_gamma();
void add_ycbcr_conversion_if_needed();
void add_dither_if_needed();
float aspect_nom, aspect_denom;
ImageFormat output_format;
OutputAlphaFormat output_alpha_format;
bool output_color_rgba, output_color_ycbcr;
YCbCrFormat output_ycbcr_format; // If output_color_ycbcr is true.
YCbCrOutputSplitting output_ycbcr_splitting; // If output_color_ycbcr is true.
std::vector<Node *> nodes;
std::map<Effect *, Node *> node_map;
Effect *dither_effect;
std::vector<Input *> inputs; // Also contained in nodes.
std::vector<Phase *> phases;
GLenum intermediate_format;
FramebufferTransformation intermediate_transformation;
unsigned num_dither_bits;
OutputOrigin output_origin;
bool finalized;
GLuint vbo; // Contains vertex and texture coordinate data.
ResourcePool *resource_pool;
bool owns_resource_pool;
bool do_phase_timing;
};
} // namespace movit
#endif // !defined(_MOVIT_EFFECT_CHAIN_H)
|