This file is indexed.

/usr/share/movit/deinterlace_effect.comp is in libmovit8 1.6.1-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Implicit uniforms:
// uniform int PREFIX(current_field_position);
// uniform float PREFIX(inv_width);
// uniform float PREFIX(inv_height);
// uniform float PREFIX(current_field_vertical_offset);

// Compute shader implementation of DeinterlaceEffect. See the fragment
// shader implementation (deinterlace_effect.frag) for comments about the
// algorithm; comments here will mainly be about issues specific to the
// compute shader implementation.

#define DIFF(s1, s2) dot((s1) - (s2), (s1) - (s2))

// In input pixels (so output will be 8x32). Corresponds to get_compute_dimensions()
// in the C++ code. It is illogical that 8x32 would be better than e.g. 32x8,
// since we reuse more data horizontally, but especially Intel cards are much more
// happy about this for whatever reason.
#define GROUP_W 8
#define GROUP_H 16

// When sampling from the current field (spatial interpolation below), we have
// a fringe of three pixels on the left and right sides, so we need to load
// more. We also have one pixel above and below, although our destination pixel
// is squeezed in the middle of them (they don't overlap), so we only need one
// extra pixel.
#define GROUP_W_FRINGE (GROUP_W + 6)
#define GROUP_H_FRINGE (GROUP_H + 1)

layout(local_size_x = GROUP_W, local_size_y = GROUP_H) in;

#if (GROUP_W_FRINGE * GROUP_H_FRINGE) > (GROUP_W * (GROUP_H + 2))
#define TEMP_NUM_ELEM (GROUP_W_FRINGE * GROUP_H_FRINGE)
#else
#define TEMP_NUM_ELEM (GROUP_W * (GROUP_H + 2))
#endif

shared vec4 temp[TEMP_NUM_ELEM];

#if TEMP_NUM_ELEM > (GROUP_W * GROUP_H * 2)
#error Not enough threads to load all data in two loads
#endif

// Load a WxH block of samples. We need to do this in two phases,
// since we have more input samples than we have output samples (threads);
// in the second phase, some threads will be idle.
#define LOAD_PIXEL_BLOCK(base_tc, block_width, block_height, func) \
{ \
	memoryBarrierShared(); \
	barrier(); \
	int thread_id = int(gl_LocalInvocationID.y) * GROUP_W + int(gl_LocalInvocationID.x); \
	{ \
		int x = thread_id % (block_width); \
		int y = thread_id / (block_width); \
		temp[thread_id] = func(vec2((base_tc).x + x * PREFIX(inv_width), \
		                            (base_tc).y + y * PREFIX(inv_height))); \
	} \
	const int num_threads = GROUP_W * GROUP_H; \
	if (thread_id + num_threads < (block_width) * (block_height)) { \
		int x = (thread_id + num_threads) % (block_width); \
		int y = (thread_id + num_threads) / (block_width); \
		temp[thread_id + num_threads] = \
			func(vec2((base_tc).x + x * PREFIX(inv_width), \
		                  (base_tc).y + y * PREFIX(inv_height))); \
	} \
	memoryBarrierShared(); \
	barrier(); \
}

void FUNCNAME() {
	// The current thread is responsible for output of two pixels, namely (x,2y)
	// and (x,2y+1). One will be an unmodified one, the other one will be the
	// pixel we are trying to interpolate. If TFF (current_field_position==0),
	// the unmodified one is 2y+1 (remember OpenGL's bottom-left convention),
	// and if BFF, the unmodified one is 2y. So we need to invert current_field_position
	// to figure out which value to add.
	int yi = int(gl_GlobalInvocationID.y) * 2 + (PREFIX(current_field_position) ^ 1);

	// Load in data for the current field. current_offset signals where the block
	// starts vertically; see set_gl_state() in the C++ code.
	vec2 base_tc = vec2((gl_WorkGroupID.x * uint(GROUP_W) + (0.5f - 3.0f)) * PREFIX(inv_width),
	                    (gl_WorkGroupID.y * uint(GROUP_H) + 0.5f) * PREFIX(inv_height) + PREFIX(current_field_vertical_offset));
	LOAD_PIXEL_BLOCK(base_tc, GROUP_W_FRINGE, GROUP_H_FRINGE, INPUT3);

	int lx = int(gl_LocalInvocationID.x) + 3;
	int ly = int(gl_LocalInvocationID.y);

	// Output the unmodified pixel. For TFF (current_field_position == 0),
	// we have an extra pixel on the bottom that we're only using for interpolation
	// (it's being output by another workgroup), so we have to add 1.
	vec4 val = temp[(ly + (PREFIX(current_field_position) ^ 1)) * GROUP_W_FRINGE + lx];
	OUTPUT(ivec2(gl_GlobalInvocationID.x, yi), val);

	// a b c d e f g     ↑ y
	//       x           |
	// h i j k l m n     +--> x

	vec4 a = temp[(ly + 1) * GROUP_W_FRINGE + lx - 3];
	vec4 b = temp[(ly + 1) * GROUP_W_FRINGE + lx - 2];
	vec4 c = temp[(ly + 1) * GROUP_W_FRINGE + lx - 1];
	vec4 d = temp[(ly + 1) * GROUP_W_FRINGE + lx];
	vec4 e = temp[(ly + 1) * GROUP_W_FRINGE + lx + 1];
	vec4 f = temp[(ly + 1) * GROUP_W_FRINGE + lx + 2];
	vec4 g = temp[(ly + 1) * GROUP_W_FRINGE + lx + 3];

	vec4 h = temp[ly * GROUP_W_FRINGE + lx - 3];
	vec4 i = temp[ly * GROUP_W_FRINGE + lx - 2];
	vec4 j = temp[ly * GROUP_W_FRINGE + lx - 1];
	vec4 k = temp[ly * GROUP_W_FRINGE + lx];
	vec4 l = temp[ly * GROUP_W_FRINGE + lx + 1];
	vec4 m = temp[ly * GROUP_W_FRINGE + lx + 2];
	vec4 n = temp[ly * GROUP_W_FRINGE + lx + 3];

	// 0 degrees.
	vec4 pred = d + k;
	float score;
	float best_score = DIFF(c, j) + DIFF(d, k) + DIFF(e, l) - 1e-4;

	// -45 degrees.
	score = DIFF(b, k) + DIFF(c, l) + DIFF(d, m);
	if (score < best_score) {
		pred = c + l;
		best_score = score;
	}

	// -63 degrees.
	score = DIFF(a, l) + DIFF(b, m) + DIFF(c, n);
	if (score < best_score) {
		pred = b + m;
		best_score = score;
	}

	// +45 degrees.
	score = DIFF(d, i) + DIFF(e, j) + DIFF(f, k);
	if (score < best_score) {
		pred = e + j;
		best_score = score;
	}

	// +63 degrees.
	score = DIFF(e, h) + DIFF(f, i) + DIFF(g, j);
	if (score < best_score) {
		pred = f + i;
		// best_score isn't used anymore.
	}

	pred *= 0.5f;

	// Temporal prediction (p2) of this pixel based on the previous and next fields.
	//
	//                ↑ y
	//     C   H      |
	//   A   F   K    |
	//     D x I      |
	//   B   G   L    |
	//     E   J      |
	//                +-----> time
	//
	// x is obviously aligned with D and I, so we don't need texcoord
	// adjustment for top/bottom field here, unlike earlier. However, we need
	// to start the block one pixel below since we need E/J, thus the -1 in
	// the y coordinate.
	base_tc = vec2((gl_WorkGroupID.x * uint(GROUP_W) + 0.5f) * PREFIX(inv_width),
	               (gl_WorkGroupID.y * uint(GROUP_H) + (0.5f - 1.0f)) * PREFIX(inv_height));
	lx = int(gl_LocalInvocationID.x);
#if YADIF_ENABLE_SPATIAL_INTERLACING_CHECK
	LOAD_PIXEL_BLOCK(base_tc, GROUP_W, GROUP_H + 2, INPUT2);
	vec4 C = temp[(ly + 2) * GROUP_W + lx];
	vec4 D = temp[(ly + 1) * GROUP_W + lx];
	vec4 E = temp[ ly      * GROUP_W + lx];

	LOAD_PIXEL_BLOCK(base_tc, GROUP_W, GROUP_H + 2, INPUT4);
	vec4 H = temp[(ly + 2) * GROUP_W + lx];
	vec4 I = temp[(ly + 1) * GROUP_W + lx];
	vec4 J = temp[ ly      * GROUP_W + lx];
#else
	// Since spatial interlacing check is not enabled, we only need D
	// and I from the previous and next fields; since they are not shared
	// between the neighboring pixels, they can be straight-up loads.
	vec2 DI_pos = vec2((gl_GlobalInvocationID.x + 0.5f) * PREFIX(inv_width),
	                   (gl_GlobalInvocationID.y + 0.5f) * PREFIX(inv_height));
	vec4 D = INPUT2(DI_pos);
	vec4 I = INPUT4(DI_pos);
#endif

	// Load what we need from the previous field into shared memory,
	// since A/B can be reused between neighboring pixels. We need one
	// line above/below, but we don't need the horizontal fringe.
	LOAD_PIXEL_BLOCK(base_tc, GROUP_W, GROUP_H + 1, INPUT1);
	vec4 A = temp[(ly + 1) * GROUP_W + lx];
	vec4 B = temp[ ly      * GROUP_W + lx];

	// What we need from the current field was loaded earlier.
	vec4 F = d;
	vec4 G = k;

	// Next field.
	LOAD_PIXEL_BLOCK(base_tc, GROUP_W, GROUP_H + 1, INPUT5);
	vec4 K = temp[(ly + 1) * GROUP_W + lx];
	vec4 L = temp[ ly      * GROUP_W + lx];

	// Find temporal differences around this line.
	vec4 tdiff0 = abs(D - I);
	vec4 tdiff1 = abs(A - F) + abs(B - G);  // Actually twice tdiff1.
	vec4 tdiff2 = abs(K - F) + abs(L - G);  // Actually twice tdiff2.
	vec4 diff = max(tdiff0, 0.5f * max(tdiff1, tdiff2));

#if YADIF_ENABLE_SPATIAL_INTERLACING_CHECK
	// Spatial interlacing check.
	// We start by temporally interpolating the current vertical line (p0–p4):
	//
	//     C p0 H      ↑ y
	//       p1        |
	//     D p2 I      |
	//       p3        |
	//     E p4 J      +-----> time
	//
	vec4 p0 = 0.5f * (C + H);
	vec4 p1 = F;
	vec4 p2 = 0.5f * (D + I);
	vec4 p3 = G;
	vec4 p4 = 0.5f * (E + J);

	vec4 max_ = max(max(p2 - p3, p2 - p1), min(p0 - p1, p4 - p3));
	vec4 min_ = min(min(p2 - p3, p2 - p1), max(p0 - p1, p4 - p3));
	diff = max(diff, max(min_, -max_));
#else
	vec4 p2 = 0.5f * (D + I);
#endif

	val = clamp(pred, p2 - diff, p2 + diff);
	OUTPUT(ivec2(gl_GlobalInvocationID.x, yi ^ 1), val);
}

#undef LOAD_PIXEL_BLOCK
#undef DIFF
#undef YADIF_ENABLE_SPATIAL_INTERLACING_CHECK