/usr/share/octave/packages/io-2.4.5/private/__OCT_xlsx2oct__.m is in octave-io 2.4.5-1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 | ## Copyright (C) 2013,2014 Markus Bergholz
## Parts Copyright (C) 2013-2016 Philip Nienhuis
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING. If not, see
## <http://www.gnu.org/licenses/>.
## -*- texinfo -*-
## @deftypefn {Function File} [ @var{raw}, @var{xls}, @var rstatus} ] = __OCT_xlsx2oct__ (@var{xlsx}, @var{wsh}, @var{range}, @spsh_opts)
## Internal function for reading data from an xlsx worksheet
##
## @seealso{}
## @end deftypefn
## Author: Markus Bergholz <markuman+xlsread@gmail.com>
## Created: 2013-10-04
function [ raw, xls, rstatus ] = __OCT_xlsx2oct__ (xls, wsh, crange='', spsh_opts)
## spsh_opts is guaranteed to be filled by caller
## If a worksheet if given, check if it's given by a name (string) or a number
if (ischar (wsh))
## Search for requested sheet name
id = find (strcmp (xls.sheets.sh_names, wsh));
if (isempty (id))
error ("xls2oct: cannot find sheet '%s' in file %s", wsh, xls.filename);
else
wsh = id;
endif
elseif (wsh > numel (xls.sheets.sh_names))
error ("xls2oct: worksheet number %d > number of worksheets in file (%d)", wsh, numel (xls.sheets.sh_names));
elseif (wsh < 1)
warning ("xls2oct: illegal worksheet number (%d) - worksheet #1 assumed\n", wsh);
endif
## Prepare to open requested worksheet file in subdir xl/ .
## Note: Windows accepts forward slashes
rawsheet = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', xls.workbook, wsh));
if (rawsheet <= 0)
# Try to open sheet from r:id in worksheets.rels.xml
wsh = xls.sheets.rels( xls.sheets.rels(:,1) == id ,2);
rawsheet = fopen (sprintf ('%s/xl/worksheets/sheet%d.xml', xls.workbook, wsh));
if (rawsheet <= 0)
error ("Couldn't open worksheet xml file sheet%d.xml\n", wsh);
endif
else
## Get data
rawdata = fread (rawsheet, "char=>char").';
fclose (rawsheet);
## Strings
try
fid = fopen (sprintf ("%s/xl/sharedStrings.xml", xls.workbook));
strings = fread (fid, "char=>char").';
fclose (fid);
catch
## No sharedStrings.xml; implies no "fixed" strings (computed strings can
## still be present)
strings = "";
end_try_catch
endif
rstatus = 0;
## General note for tuning: '"([^"]*)"' (w/o single quotes) could be faster
## than '"(.*?)"'
## (http://stackoverflow.com/questions/2503413/regular-expression-to-stop-at-first-match comment #7)
## As to requested subranges: it's complicated to extract just part of a sheet;
## either way the entire sheet would need to be scanned for cell addresses
## before one can know what part of the sheet XML the requested range lives.
## In addition the endpoint cells of that range may not exist in the sheet XML
## (e.g., if they're empty).
## So we read *all* data and in the end just return the requested rectangle.
## 'val' are the actual values. 'valraw' are the corresponding(!) cell
## positions (e.g. B3). They're extracted separately to avoid (rare cases of)
## out-of-phase regexp results that would screw up reshape().
## Below are loads of nested IFs. They're needed to catch empty previous
## results, even empty sheets.
## In below regexps, we ignore "cm" and "ph" tags immediately after <c and
## a "vm" tag immediately after <t> tag. As soon as we hit them in the wild
## these can be added (at the cost of speed performance).
## 1. Get pure numbers, including booleans, double and boolean formula
## results, from cells w/o 's=""' tag
val = cell2mat (regexp (rawdata, '<c r="\w+"(?: t="[bn]+")?>(?:<f.+?(?:</f>|/>))?<v(?:.*?)>(.*?)</v>', "tokens"));
if (! isempty (val))
valraw = cell2mat (regexp (rawdata, '<c r="(\w+)"(?: t="[bn]+")?>(?:<f.+?(?:</f>|/>))?<v(?:.*?)>.*?</v>', "tokens"));
endif
## If val is still empty, try another regexpression (PRN: will this ever
## work? haven't seen such cells)
if (numel (val) == 0)
val = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+"(?! t="s")><v(?:.*?)>(.*?)</v>', "tokens"));
if (! isempty (val))
valraw = cell2mat (regexp (rawdata, '<c r="(\w+)" s="\d+"(?! t="s")><v(?:.*?)>.*?</v>', "tokens"));
endif
endif
## If 'val' exist, check if there are tagged s="NUMBERS" values
if (numel (regexp (rawdata, ' s="', "once")) > 0)
## Time/date values. Exclude formulas (having <f> </f> of <f /> tags),
## strings ('t="s"') and error results ('t="e"')
valp = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+"(?: t="[^se]*")?><v(?:.*?)>(.*?)</v>', "tokens"));
if (! isempty (valp))
valrawp = cell2mat(regexp (rawdata, '<c r="(\w+)" s="\d+"(?: t="[^se]*")?><v(?:.*?)>.*?</v>', "tokens"));
if (! isempty (val))
val = [val valp];
valraw = [valraw valrawp];
else
val = valp;
valraw = valrawp;
clear valp valrawp ;
endif
endif
endif
## Turn strings into numbers
if (! isempty (val))
val = num2cell (str2double (val));
endif
## 2. String / text formulas (cached results are in this sheet; fixed strings
## in <sharedStrings.xml>)
## 2.A Formulas
if (spsh_opts.formulas_as_text)
## Get formulas themselves as text strings. Formulas have no 't="s"' attribute. Keep starting '>' for next line
valf1 = cell2mat (regexp (rawdata, '<c r="\w+"(?: s="\d+")?(?: t="\w+")?><f(?: .*?)*(>.*?)</f>(?:<v(?:.*?)>.*?</v>)?', "tokens"));
if (! isempty (valf1))
valf1 = regexprep (valf1, '^>', '=');
valrawf1 = cell2mat(regexp (rawdata, '<c r="(\w+)"(?: s="\d+")?(?: t="\w+")?><f(?: .*?)*>.*?</f>(?:<v(?:.*?)>.*?</v>)?', "tokens"));
if (isempty (val))
val = valf1;
else
## Formulas start with '=' so:
val = [val valf1];
valraw = [valraw valrawf1];
endif
endif
clear valf1 valrawf1 ;
else
## Get (cached) formula results. Watch out! as soon as a "t" attibute equals "b" or is missing it is a number
## First the non-numeric formula results
valf2 = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+" t="(?:[^sb]?|str)">(?:<f.+?(?:</f>|/>))<v(?:.*?)>(.*?)</v>', "tokens"));
if (! isempty (valf2))
valrawf2 = cell2mat(regexp (rawdata, '<c r="(\w+)" s="\d+" t="(?:[^sb]?|str)">(?:<f.+?(?:</f>|/>))<v(?:.*?)>.*?</v>', "tokens"));
if (isempty (val))
val = valf2;
valraw = valrawf2;
else
val = [val valf2];
valraw = [valraw valrawf2];
endif
endif
clear valf2 valrawf2 ;
## Next the numeric formula results. These need additional conversion
valf3 = cell2mat (regexp (rawdata, '<c r="\w+" s="\d+"(?: t="b")?>(?:<f.+?(?:</f>|/>))<v(?:.*?)>(.*?)</v>', "tokens"));
if (! isempty (valf3))
valrawf3 = cell2mat(regexp (rawdata, '<c r="(\w+)" s="\d+"(?: t="b")?>(?:<f.+?(?:</f>|/>))<v(?:.*?)>.*?</v>', "tokens"));
if (isempty (val))
val = num2cell(str2double (valf3));
valraw = valrawf3;
else
val = [val num2cell(str2double (valf3))];
valraw = [valraw valrawf3];
endif
endif
clear valf3 valrawf3 ;
endif
## 2.B. Strings
if (! isempty (strings))
## Extract string values. May be much more than present in current sheet
strings = regexp (strings, '<si[^>]*>.*?</si>', "match");
if (! isempty (strings))
for n = 1:columns(strings)
ctext{1,n} = cell2mat (cell2mat (regexp (strings{1,n}, '<t[^>]*>(.*?)</t>', "tokens")));
end
## Pointers into sharedStrings.xml. "Hard" (fixed) strings have 't="s"' attribute
## For reasons known only to M$ those pointers are zero-based, so:
vals = str2double (cell2mat (regexp (rawdata, '<c r="\w+"(?: s="\d+")? t="s"><v(?:.*?)>(\d+)</v>', "tokens"))) + 1;
if (! isempty (vals) && isfinite (vals))
## Get actual values. Watch out for empty strings
vals = ctext(vals);
ids = cellfun (@isempty, vals);
if (any (ids))
vals {find (ids)} = "";
endif
## Cell addresses
valraws = cell2mat (regexp (rawdata, '<c r="(\w+)"(?: s="\d+")? t="s"><v(?:.*?)>\d+</v>', "tokens"));
if (isempty (val))
val = vals;
valraw = valraws;
else
val = [val vals];
valraw = [valraw valraws];
endif
endif
endif
clear vals valraws ;
endif
## If val is empty, sheet is empty
if (isempty (val))
xls.limits = [];
raw = {};
return
endif
## 3. Prepare for assigning extracted values to output cell array
## Get the row numbers (currently supported from 1 to 999999)
vi.row = str2double (cell2mat (regexp (valraw, '(\d+|\d+\d+|\d+\d+\d+|\d+\d+\d+\d+|\d+\d+\d+\d+\+d|\d+\d+\d+\d+\d+\d+)?', "match"))')';
## Get the column characters (A to ZZZ) (that are more than 18k supported cols)
vi.alph = cell2mat (regexp (valraw, '([A-Za-z]+|[A-Za-z]+[A-Za-z]+|[A-Za-z]+[A-Za-z]+[A-Za-z]+)?', "match"));
## Free memory; might be useful while reading huge files
clear valraw ;
## If missed formular indices
idx.all = cell2mat (regexp (rawdata, '<c r="(\w+)"[^>]*><f', "tokens"));
if (0 < numel (idx.all))
idx.num = str2double (cell2mat (regexp (idx.all, '(\d+|\d+\d+|\d+\d+\d+|\d+\d+\d+\d+|\d+\d+\d+\d+\+d|\d+\d+\d+\d+\d+\d+)?', "match"))')';
idx.alph = cell2mat (regexp (idx.all, '([A-Za-z]+|[A-Za-z]+[A-Za-z]+|[A-Za-z]+[A-Za-z]+[A-Za-z]+)?', "match"));
idx.alph = double (cell2mat (cellfun (@col2num, vi.alph, "UniformOutput", 0)));
else
## To prevent warnings or errors while calculating corresponding NaN matrix
idx.num = [];
idx.alph = [];
end
## Transform column character to column number
## A -> 1; C -> 3, AB -> 28 ...
vi.col = double (cell2mat (cellfun (@col2num, vi.alph, "UniformOutput", 0)));
## Find data rectangle limits
idx.mincol = min ([idx.alph vi.col]);
idx.minrow = min ([idx.num vi.row]);
idx.maxrow = max ([idx.num vi.row]);
idx.maxcol = max ([idx.alph vi.col]);
## Convey limits of data rectangle to xls2oct. Must be done here as first start
xls.limits = [idx.mincol, idx.maxcol; idx.minrow, idx.maxrow];
## column adjustment when first number or formula don't begin in first column
if (1 < idx.mincol)
vi.col = vi.col - (idx.mincol - 1);
endif
## row adjustment when first number or formular don't begin in first row
if (1 < idx.minrow)
vi.row = vi.row - (idx.minrow - 1);
endif
## Initialize output cell array
raw = cell (idx.maxrow - idx.minrow + 1, idx.maxcol - idx.mincol + 1);
## get logical indices for 'val' from 'valraw' positions in NaN matrix
vi.idx = sub2ind (size (raw), (vi.row), (vi.col));
## set values to the corresponding indizes in final cell matrix
raw(vi.idx) = val;
## Process requested cell range argument
if (! isempty (crange))
## Extract only the requested cell rectangle (see comments higher up)
[~, nr, nc, tr, lc] = parse_sp_range (crange);
xls.limits = [max(idx.mincol, lc), min(idx.maxcol, lc+nc-1); ...
max(idx.minrow, tr), min(idx.maxrow, tr+nr-1)];
## Correct spreadsheet locations for lower right shift of raw
rc = idx.minrow - 1;
cc = idx.mincol - 1;
raw = raw(xls.limits(2, 1)-rc : xls.limits(2, 2)-rc, ...
xls.limits(1, 1)-cc : xls.limits(1, 2)-cc);
endif
if (! isempty (val))
rstatus = 1;
endif
endfunction
|