This file is indexed.

/usr/bin/apertium-gen-stopwords-lextor is in apertium 3.1.0-2.

This file is owned by root:root, with mode 0o755.

The actual contents of the file can be viewed below.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
#! /bin/sh
if [ $# != 3 ]
then echo "USAGE: $(basename $0) <n> <input_file> <output_file>" 1>&2
     echo "where <n> is the desired number of stopwords" 1>&2
     echo "      <input_file> contains a large preprocessed corpus" 1>&2
     echo "      <output_file> is the file to which the list of stopwords is written" 1>&2
     exit 1
fi

N=$1
INFILE=$2
OUTFILE=$3

if [ ! -e $INFILE  ]
then echo "ERROR: '$INFILE' file not found" 1>&2
     exit 1
fi

cat $INFILE |\
sed -re "s/(\^[0-9·ÀÁÂÄÇÈÉÊËÌÍÎÏÑÒÓÔÖÙÚÛÜàáâäçèéêëìíîïñòóôöùúûüABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz <>ç+.,;:_'#*%()?¿!¡-]+\\$)/\1\n/g" |\
sed -re "s/^[ \t]+//g" |\
sed -re "s/[ \t]+$//g" |\
sed -re "s/^\^//g" |\
sed -re "s/\\\$$//g" |\
awk '{if (length($0)>0) print tolower($0)}' |\
awk '{ #Only lemma and first tag; rest of tags, if present, are ignored
  if (index($0,">")>0)
    print substr($0,1,index($0,">"));
  else
    print $0;
}' |\
sort | uniq -c | sort -n -r |\
head -n $N |\
awk 'BEGIN{FS=" "}
{
  c="";
  for(i=2; i<=NF; i++) {
    if (length(c)>0)
      c= c " "
    c = c $i  
  }
  print c;
}' > $OUTFILE

exit 0