/usr/share/idzebra-2.0/tab/soif.flt is in idzebra-2.0-common 2.0.59-1ubuntu1.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | # Crude input-filter for SOIF records -- one record per file.
# Author: Peter Valkenburg / TERENA (valkenburg@terena.nl)
# Version 0.2 (09/09/1998).
# This sort of follows the Nordic Web Index convention of GILS attribute use.
# Modified by Kang-Jin Lee (lee@arco.de)
# 07/10/1999
# We'll use GILS structured records.
BEGIN { begin record gils }
# URL will be GILS' availability/linkage
/^@[A-Za-z](-|[.A-Za-z_])* { / BODY /$/ {
begin element availability
data -element linkage $1
end element
}
# Type will be GILS' availability/linkageType
/^[tT]ype{[0-9]+}:\t/ BODY /$/ {
begin element availability
data -element linkageType $1
end element
}
# Last modification time will be Bib-1 Use Attribute 1012
/^[lL]ast-[mM]odification-[tT]ime{[0-9]+}:\t/ BODY /$/ {
data -element dateOfLastModification $1
}
# The MD5 checksum is used as a unique identifier under Bib-1 Use Attribute 1007
/^[mM][dD]5{[0-9]+}:\t/ BODY /$/ { data -element controlIdentifier $1 }
# Description will be Bib-1 Use Attribute 62
/^[dD]escription{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
data -element abstract $1
unread 2
}
# Author will be Bib-1 Use Attribute 1003 (if gils.abs maps originator to it!!)
/^[aA]uthor{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
data -element author $1
unread 2
}
# Keywords will be GILS' localSubjectIndex/localSubjectTerm
/^[kK]eywords{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
begin element localSubjectIndex
data -element localSubjectTerm $1
unread 2
end element
}
# File-size will be GILS' supplementalInformation/bytes
/^[fF]ile-[sS]ize{[0-9]+}:\t/ BODY /$/ {
begin element supplementalInformation
data -element bytes $1
unread 2
end element
}
# Update-Time will be GILS' supplementalInformation/lastChecked
/^[uU]pdate-[tT]ime{[0-9]+}:\t/ BODY /$/ {
begin element supplementalInformation
data -element lastChecked $1
unread 2
end element
}
# url-references will be GILS' crossReference/linkage
/^[uU]rl-[rR]eferences{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
begin element crossReference
data -element linkage $1
unread 2
end element
}
# Title will be Bib-1 Use Attribute 4
/^[tT]itle{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
data -element Title $1
unread 2
}
# Body and Partial-Text will be Bib-1 Use Attribute 1010
# Is Body really commonly used in SOIF? Anyway, Full-Text is used by Harvest.
#/^[bB]ody{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
# data -element sampleText $1
# unread 2
# }
/^[fF]ull-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
data -element sampleText $1
unread 2
}
/^[pP]artial-[tT]ext{[0-9]+}:\t/ BODY /^((-|[._A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
data -element sampleText $1
unread 2
}
/^(-|[a-zA-Z0-9])+{[0-9]+}:\t/ BODY /^((-|[_A-Za-z0-9])+{[0-9]+}:\t.*|})$/ {
unread 2
}
END { end record }
|