This file is indexed.

/usr/share/octave/packages/io-2.4.5/private/__OCT_xlsx2oct__.m is in octave-io 2.4.5-1.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
## Copyright (C) 2013,2014 Markus Bergholz
## Parts Copyright (C) 2013-2016 Philip Nienhuis
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*- 
## @deftypefn {Function File} [ @var{raw}, @var{xls}, @var rstatus} ] = __OCT_xlsx2oct__ (@var{xlsx}, @var{wsh}, @var{range}, @spsh_opts)
## Internal function for reading data from an xlsx worksheet
##
## @seealso{}
## @end deftypefn

## Author: Markus Bergholz <markuman+xlsread@gmail.com>
## Created: 2013-10-04

function [ raw, xls, rstatus ] = __OCT_xlsx2oct__ (xls, wsh, crange='', spsh_opts)

  ## spsh_opts is guaranteed to be filled by caller

  ## If a worksheet if given, check if it's given by a name (string) or a number
  if (ischar (wsh))
    ## Search for requested sheet name
    id = find (strcmp (xls.sheets.sh_names, wsh));
    if (isempty (id))
      error ("xls2oct: cannot find sheet '%s' in file %s", wsh, xls.filename);
    else
      wsh = id;
    endif
  elseif (wsh > numel (xls.sheets.sh_names))
    error ("xls2oct: worksheet number %d > number of worksheets in file (%d)", wsh, numel (xls.sheets.sh_names));
  elseif (wsh < 1)
    warning ("xls2oct: illegal worksheet number (%d) - worksheet #1 assumed\n", wsh);
  endif

  ## Prepare to open requested worksheet file in subdir xl/ . 
  ## Note: Windows accepts forward slashes
  rawsheet = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', xls.workbook, wsh));
  if (rawsheet <= 0)
    # Try to open sheet from r:id in worksheets.rels.xml
    wsh       = xls.sheets.rels( xls.sheets.rels(:,1) == id ,2);
    rawsheet  = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', xls.workbook, wsh));
    if (rawsheet <= 0)
      error ("Couldn't open worksheet xml file sheet%d.xml\n", wsh);
    endif
  else
    ## Get data
    rawdata = fread (rawsheet, "char=>char").';
    fclose (rawsheet);
    ## Strings
    try
      fid = fopen (sprintf ("%s/xl/sharedStrings.xml", xls.workbook));
      strings = fread (fid, "char=>char").';
      fclose (fid);
    catch
      ## No sharedStrings.xml; implies no "fixed" strings (computed strings can
      ## still be present)
      strings = "";
    end_try_catch
  endif
  rstatus = 0;

  ## General note for tuning: '"([^"]*)"' (w/o single quotes) could be faster
  ## than '"(.*?)"'
  ## (http://stackoverflow.com/questions/2503413/regular-expression-to-stop-at-first-match comment #7)

  ## As to requested subranges: it's complicated to extract just part of a sheet;
  ## either way the entire sheet would need to be scanned for cell addresses
  ## before one can know what part of the sheet XML the requested range lives.
  ## In addition the endpoint cells of that range may not exist in the sheet XML
  ## (e.g., if they're empty).
  ## So we read *all* data and in the end just return the requested rectangle.

  ## 'val' are the actual values. 'valraw' are the corresponding(!) cell
  ## positions (e.g. B3).  They're extracted separately to avoid (rare cases of)
  ## out-of-phase regexp results that would screw up reshape().

  ## Below are loads of nested IFs. They're needed to catch empty previous
  ## results, even empty sheets.

  ## In below regexps, we ignore "cm"  and "ph" tags immediately after <c and
  ## a "vm" tag immediately after <t> tag. As soon as we hit them in the wild
  ## these can be added (at the cost of speed performance).

  ## 1. Get pure numbers, including booleans, double and boolean formula
  ## results, from cells w/o 's=""' tag
  val = cell2mat (regexp (rawdata, '<c r="\w+"(?: t="[bn]+")?>(?:<f.+?(?:</f>|/>))?<v(?:.*?)>(.*?)</v>', "tokens"));
  if (! isempty (val))
    valraw = cell2mat (regexp (rawdata, '<c r="(\w+)"(?: t="[bn]+")?>(?:<f.+?(?:</f>|/>))?<v(?:.*?)>.*?</v>', "tokens"));
  endif

  ## If val is still empty, try another regexpression (PRN: will this ever
  ## work? haven't seen such cells)
  if (numel (val) == 0)
    val = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+"(?! t="s")><v(?:.*?)>(.*?)</v>', "tokens"));
    if (! isempty (val))
      valraw = cell2mat (regexp (rawdata, '<c r="(\w+)" s="\d+"(?! t="s")><v(?:.*?)>.*?</v>', "tokens"));
    endif
  endif

  ## If 'val' exist, check if there are tagged s="NUMBERS" values
  if (numel (regexp (rawdata, ' s="', "once")) > 0)
    ## Time/date values. Exclude formulas (having <f> </f>  of <f  /> tags),
    ## strings  ('t="s"') and error results ('t="e"')
    valp =            cell2mat (regexp (rawdata, '<c r="\w+" s="\d+"(?: t="[^se]*")?><v(?:.*?)>(.*?)</v>', "tokens"));
    if (! isempty (valp))
      valrawp = cell2mat(regexp (rawdata, '<c r="(\w+)" s="\d+"(?: t="[^se]*")?><v(?:.*?)>.*?</v>', "tokens"));
      if (! isempty (val))
        val =    [val    valp];
        valraw = [valraw valrawp];
      else
        val = valp;
        valraw = valrawp;
        clear valp valrawp ;
      endif
    endif
  endif
  ## Turn strings into numbers
  if (! isempty (val))
    val = num2cell (str2double (val));
  endif

  ## 2. String / text formulas (cached results are in this sheet; fixed strings
  ## in <sharedStrings.xml>)
  ## 2.A Formulas
  if (spsh_opts.formulas_as_text)

    ## Get formulas themselves as text strings. Formulas have no 't="s"' attribute. Keep starting '>' for next line
    valf1 = cell2mat (regexp (rawdata,     '<c r="\w+"(?: s="\d+")?(?: t="\w+")?><f(?: .*?)*(>.*?)</f>(?:<v(?:.*?)>.*?</v>)?', "tokens"));
    if (! isempty (valf1))
      valf1 = regexprep (valf1, '^>', '=');
      valrawf1 = cell2mat(regexp (rawdata, '<c r="(\w+)"(?: s="\d+")?(?: t="\w+")?><f(?: .*?)*>.*?</f>(?:<v(?:.*?)>.*?</v>)?', "tokens"));
      if (isempty (val))
        val = valf1;
      else
        ##  Formulas start with '=' so:
        val = [val valf1];
        valraw = [valraw valrawf1];
      endif
    endif
    clear valf1 valrawf1 ;
  else
    ## Get (cached) formula results. Watch out! as soon as a "t" attibute equals "b" or is missing it is a number
    ## First the non-numeric formula results
    valf2 = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+" t="(?:[^sb]?|str)">(?:<f.+?(?:</f>|/>))<v(?:.*?)>(.*?)</v>', "tokens"));
    if (! isempty (valf2))
      valrawf2 = cell2mat(regexp (rawdata, '<c r="(\w+)" s="\d+" t="(?:[^sb]?|str)">(?:<f.+?(?:</f>|/>))<v(?:.*?)>.*?</v>', "tokens"));
      if (isempty (val))
        val = valf2;
        valraw = valrawf2;
      else
        val = [val valf2];
        valraw = [valraw valrawf2];
      endif
    endif
    clear valf2 valrawf2 ;
    ## Next the numeric formula results. These need additional conversion
    valf3 = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+"(?: t="b")?>(?:<f.+?(?:</f>|/>))<v(?:.*?)>(.*?)</v>', "tokens"));
    if (! isempty (valf3))
      valrawf3 = cell2mat(regexp (rawdata, '<c r="(\w+)" s="\d+"(?: t="b")?>(?:<f.+?(?:</f>|/>))<v(?:.*?)>.*?</v>', "tokens"));
      if (isempty (val))
        val = num2cell(str2double (valf3));
        valraw = valrawf3;
      else
        val = [val num2cell(str2double (valf3))];
        valraw = [valraw valrawf3];
      endif
    endif
    clear valf3 valrawf3 ;
  endif

  ## 2.B. Strings
  if (! isempty (strings))
    ## Extract string values. May be much more than present in current sheet
    strings = regexp (strings, '<si[^>]*>.*?</si>', "match");
    if (! isempty (strings))
      for n = 1:columns(strings)
        ctext{1,n} = cell2mat (cell2mat (regexp (strings{1,n}, '<t[^>]*>(.*?)</t>', "tokens")));
      end
      ## Pointers into sharedStrings.xml. "Hard" (fixed) strings have 't="s"' attribute
      ## For reasons known only to M$ those pointers are zero-based, so:
      vals = str2double (cell2mat (regexp (rawdata, '<c r="\w+"(?: s="\d+")? t="s"><v(?:.*?)>(\d+)</v>', "tokens"))) + 1;
      if (! isempty (vals) && isfinite (vals))
        ## Get actual values. Watch out for empty strings
        vals = ctext(vals);
        ids = cellfun (@isempty, vals);
        if (any (ids))
          vals {find (ids)} = "";
        endif
        ## Cell addresses
        valraws = cell2mat (regexp (rawdata, '<c r="(\w+)"(?: s="\d+")? t="s"><v(?:.*?)>\d+</v>', "tokens"));
        if (isempty (val))
          val = vals;
          valraw = valraws;
        else
          val = [val vals];
          valraw = [valraw valraws];
        endif
      endif
    endif
    clear vals valraws ;
  endif

  ## If val is empty, sheet is empty
  if (isempty (val))
    xls.limits = [];
    raw = {};
    return
  endif

  ## 3. Prepare for assigning extracted values to output cell array
  ## Get the row numbers (currently supported from 1 to 999999)
  vi.row = str2double (cell2mat (regexp (valraw, '(\d+|\d+\d+|\d+\d+\d+|\d+\d+\d+\d+|\d+\d+\d+\d+\+d|\d+\d+\d+\d+\d+\d+)?', "match"))')';

  ## Get the column characters (A to ZZZ) (that are more than 18k supported cols)
  vi.alph = cell2mat (regexp (valraw, '([A-Za-z]+|[A-Za-z]+[A-Za-z]+|[A-Za-z]+[A-Za-z]+[A-Za-z]+)?', "match"));

  ## Free memory; might be useful while reading huge files
  clear valraw ;

  ## If missed formular indices
  idx.all = cell2mat (regexp (rawdata, '<c r="(\w+)"[^>]*><f', "tokens"));
  if (0 < numel (idx.all))
    idx.num = str2double (cell2mat (regexp (idx.all, '(\d+|\d+\d+|\d+\d+\d+|\d+\d+\d+\d+|\d+\d+\d+\d+\+d|\d+\d+\d+\d+\d+\d+)?', "match"))')';
    idx.alph = cell2mat (regexp (idx.all, '([A-Za-z]+|[A-Za-z]+[A-Za-z]+|[A-Za-z]+[A-Za-z]+[A-Za-z]+)?', "match"));
    idx.alph = double (cell2mat (cellfun (@col2num, vi.alph, "UniformOutput", 0)));
  else
    ## To prevent warnings or errors while calculating corresponding NaN matrix
    idx.num = [];
    idx.alph = [];
  end
  ## Transform column character to column number
  ## A -> 1; C -> 3, AB -> 28 ...
  vi.col = double (cell2mat (cellfun (@col2num, vi.alph, "UniformOutput", 0)));

  ## Find data rectangle limits
  idx.mincol = min ([idx.alph vi.col]);
  idx.minrow = min ([idx.num  vi.row]);
  idx.maxrow = max ([idx.num  vi.row]);
  idx.maxcol = max ([idx.alph vi.col]);

  ## Convey limits of data rectangle to xls2oct. Must be done here as first start
  xls.limits = [idx.mincol, idx.maxcol; idx.minrow, idx.maxrow];

  ## column adjustment when first number or formula don't begin in first column
  if (1 < idx.mincol)
        vi.col = vi.col - (idx.mincol - 1);
  endif
  ## row adjustment when first number or formular don't begin in first row
  if (1 < idx.minrow)
        vi.row = vi.row - (idx.minrow - 1);
  endif
  ## Initialize output cell array
  raw = cell (idx.maxrow - idx.minrow + 1, idx.maxcol - idx.mincol + 1);

  ## get logical indices for 'val' from 'valraw' positions in NaN matrix
  vi.idx = sub2ind (size (raw), (vi.row), (vi.col));
  ## set values to the corresponding indizes in final cell matrix
  raw(vi.idx) = val;

  ## Process requested cell range argument
  if (! isempty (crange))
    ## Extract only the requested cell rectangle (see comments higher up)
    [~, nr, nc, tr, lc] = parse_sp_range (crange);
    xls.limits = [max(idx.mincol, lc), min(idx.maxcol, lc+nc-1); ...
                  max(idx.minrow, tr), min(idx.maxrow, tr+nr-1)];
    ## Correct spreadsheet locations for lower right shift of raw
    rc = idx.minrow - 1;
    cc = idx.mincol - 1;
    raw = raw(xls.limits(2, 1)-rc : xls.limits(2, 2)-rc, ...
              xls.limits(1, 1)-cc : xls.limits(1, 2)-cc);
  endif

  if (! isempty (val))
    rstatus = 1;
  endif

endfunction