/etc/spamassassin/FuzzyOcr.cf.real is in fuzzyocr 3.6.0-9.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 | # Syntax:
# loadplugin <Plugin_Name> <Location>
# <Location> path where Plugin resides.
loadplugin FuzzyOcr
ifplugin FuzzyOcr
body FUZZY_OCR eval:fuzzyocr_check()
body FUZZY_OCR_WRONG_CTYPE eval:dummy_check()
body FUZZY_OCR_CORRUPT_IMG eval:dummy_check()
body FUZZY_OCR_WRONG_EXTENSION eval:dummy_check()
body FUZZY_OCR_KNOWN_HASH eval:dummy_check()
describe FUZZY_OCR Mail contains an image with common spam text inside
describe FUZZY_OCR_WRONG_CTYPE Mail contains an image with wrong content-type set
describe FUZZY_OCR_WRONG_EXTENSION Mail contains an image with wrong file extension
describe FUZZY_OCR_CORRUPT_IMG Mail contains a corrupted image
describe FUZZY_OCR_KNOWN_HASH Mail contains an image with known hash
priority FUZZY_OCR 900
###
### Plugin Configuration
###
###
### Logging options
###
# Verbosity level (see manual)
# Level 0 - Errors only
# Level 1 - Errors and Warnings
# Level 2 - Errors, Warnings and Info Messages
# Level 3 - Full debug output
# Default value: 1
#focr_verbose 3
# Log Message-Id, From, To
# Default: 1
#focr_log_pmsinfo 0
# Send logging output to stderr.
# Default value: 1
#focr_log_stderr 0
# Logfile (make sure it is writable by the plugin)
# Default value: none
#focr_logfile /tmp/FuzzyOcr.log
###
### Wordlists
###
# Here we defined the words to scan for
# Default value: /etc/spamassassin/FuzzyOcr.words
#focr_global_wordlist /etc/spamassassin/FuzzyOcr.words
#
# This is the path RELATIVE to the respective home directory
# for the personalized list. This list is merged with the global
# word list on execution.
# Default value: ~/.spamassassin/fuzzyocr.words
# If value begins with '/', it is treated as fixed path.
#focr_personal_wordlist fuzzyocr.words
#
# This option allows you to disable the whole personalization stuff,
# i.e. FuzzyOcr will not call functions in SA that require home
# directories for your users. This is only required if you are running
# an environment where the users don't have home directories at all.
# Default value: 0
#
#focr_no_homedirs 1
#
## Optionally, disable this option if you want to scan for numbers
## Setting this to 0 will cause FuzzyOcr not to strip numbers from
## both the wordlist and the OCR results
#
#focr_strip_numbers 1
###
### Helper Applications
###
# These parameters can be used to change other detection settings
# If you leave these commented out, the defaults will be used.
# Do not use " " around any parameters!
###
### Step 1:
### Inform the plugin which helper apps are required.
###
# The following are already included by default:
#
#focr_bin_helper gifsicle, giffix, giftext, gifinter, giftopnm
#focr_bin_helper jpegtopnm, pngtopnm, bmptopnm, tifftopnm, ppmhist
#focr_bin_helper gocr, ocrad
# Include additional scanner/preprocessor commands here:
#
focr_bin_helper pnmnorm, pnminvert, ppmtopgm
#not available in Debian: pamthreshold,pamtopnm
focr_bin_helper tesseract
# These helpers must be defined before enabling PDF scanning
#focr_bin_helper pdfinfo, pdftops, pstopnm
###
### Step 2:
### Inform the plugin of the search path to find all helper apps.
### Only the first match will be considered, so the order is important.
###
# Search path for locating helper applications
#focr_path_bin /usr/local/netpbm/bin:/usr/local/bin:/usr/bin
###
### Step 3:
### You can optionally define a helper application location, bypassing
### the search path algorithm. Please note that if the helper app is not
### previously defined, it will generate an error:
#focr_bin_gifsicle /usr/bin/gifsicle
#focr_bin_giffix /usr/bin/giffix
#focr_bin_giftext /usr/bin/giftext
#focr_bin_gifinter /usr/bin/gifinter
#focr_bin_giftopnm /usr/bin/giftopnm
#focr_bin_jpegtopnm /usr/bin/jpegtopnm
#focr_bin_pngtopnm /usr/bin/pngtopnm
#focr_bin_bmptopnm /usr/bin/bmptopnm
#focr_bin_tifftopnm /usr/bin/tifftopnm
#focr_bin_ppmhist /usr/bin/ppmhist
#focr_bin_gocr /usr/bin/gocr
#focr_bin_ocrad /usr/bin/ocrad
#focr_bin_pnmnorm /usr/bin/pnmnorm
#focr_bin_pnminvert /usr/bin/pnminvert
#focr_bin_pdfinfo /usr/bin/pdfinfo
#focr_bin_pdftops /usr/bin/pdftops
#focr_bin_pstopnm /usr/bin/pstopnm
###
### Scansets
###
# Paths to the files containing Scansets and Preprocessors definitions
#
#focr_preprocessor_file /etc/spamassassin/FuzzyOcr.preps
#focr_scanset_file /etc/spamassassin/FuzzyOcr.scansets
# Setting this to 1 will cause FuzzyOcr to skip all other scansets,
# if a scanset has reached the amount of hits specified in
# focr_counts_required. (i.e. if the image is detected as spam).
# This saves resources, but lowers the scores because not the best,
# but the first best scanset is taken as result.
# Default value: 1
#focr_minimal_scanset 0
# This option is only used when focr_minimal_scanset is enabled.
# Basically, this counts the effectiveness of a scanset on the current
# mail traffic and resorts the scansets with the most effective first.
# This saves unnecessary scanner passes and saves resources.
# Default value: 1.
#focr_autosort_scanset 0
# This is a parameter for the focr_autosort_scanset function, and specifies
# the maximum value of the effectiveness counter used in each scanset. If you
# increase this, it will take longer until the autosort function adapts to new
# types of spam, setting it too low will lower the effectiveness of the
# function.
# Default value: 10
#focr_autosort_buffer 10
###
### Scan Settings
###
# Timeout for the plugin, in seconds. (Maximum runtime of the plugin)
# Default value: 10
#focr_timeout 15
# Use a global timeout value instead of per helper application.
# Default value: 0
#focr_global_timeout 1
# Minimum image size to scan. Images with dimensions smaller than the
# ones specified here will be skipped:
# (This parameter does not apply to PDF files)
# Default: Height:4 Width:4
#
#focr_min_height 4
#focr_min_width 4
# Maximum image size to scan. Images with dimensions bigger than the
# ones specified here will be skipped:
# (This parameter does not apply to PDF files)
# Default: Height:800 Width:800
#
#focr_max_height 800
#focr_max_width 800
# Maximum file size for different formats in byte, bigger pictures
# will not be scanned
# Default values: Unlimited)
#focr_max_size_gif 80000
#focr_max_size_jpeg 100000
#focr_max_size_png 80000
#focr_max_size_bmp 500000
#focr_max_size_tiff 500000
# Skip checking the following image types
# Default value: 0 (check image type)
#focr_skip_gif 1
#focr_skip_jpeg 1
#focr_skip_png 1
#focr_skip_bmp 1
#focr_skip_tiff 1
#
# PDF specific options
# WARNING: Enable this at your own risk, this might lead to false positives and classify
# important documents as spam. YOU HAVE BEEN WARNED.
#focr_scan_pdfs 0
# PDFs having more pages than this value will be skipped
#focr_pdf_maxpages 1
# Default detection treshold (see manual)
# Default value: 0.25 (Can be changed on a per word basis in the wordlist).
#focr_threshold 0.20
# Number of minimum matches before the rule scores (Default value: 2)
#focr_counts_required 3
# Setting this will cause every word to be matched only once per image (Default value: 0)
#focr_unique_matches 1
# This is the score for a hit after focr_counts_required matches
# Default value: 5
#focr_base_score 5
# This is the additional score for every additional match after
# focr_counts_required matches
# Default value: 1
#focr_add_score 0.375
# This option defines the factor, which is multiplied with the number
# of matches, that were made without stripping spaces. FuzzyOcr does two
# matching attempts on OCR results, one without space strippings and one with.
# To weight the first match type more, this factor is applied.
# Default value: 1.5
#focr_twopass_scoring_factor 1.5
# This is the score to give for a wrong content-type.
# e.g. JPEG image but content type says GIF
# Default value: 1.5
#focr_wrongctype_score 1.5
# This is the score to give for a wrong file extension.
# e.g. JPEG image but file extension says GIF
# Default value: 1.5
#focr_wrongext_score 1.5
# This is the score to give for a corrupted image.
# This currently affects only GIF images
# Default value: 2.5
#focr_corrupt_score 2.5
# This is the score to give for a corrupted unfixable image.
# This currently affects only GIF images.
# Default value: 5
#focr_corrupt_unfixable_score 5
# This is used to disable the OCR engine if the message has
# already more points than this value
# Default value: 10
#focr_autodisable_score 30
# This is used to disable the OCR engine if the message has
# already less points than this value
# Default value: -5
#focr_autodisable_negative_score -5
###
### Hashing Options (Optional)
###
# Select which type of image hashing to use:
# Default value: 0 (disabled)
# Allowed values:
# 1 ... use digest_hash only (deprecated)
# 2 ... use digest_db w/digest_hash import (see requirements, recommended)
# 3 ... use mysql database (see requirements, experimental)
#--
# The score is saved with the hash in the database, allowing the plugin to
# skip the scans when the image is found in the database, using the score
# from the previous scans.
#--
#focr_enable_image_hashing 3
# Set this to skip updating the hashing database at startup
# Default value: 0 (update at startup)
#focr_skip_updates 1
# Automatically add hashes of spam images recognized by OCR to the Image
# Hash database, to disable, set to 0
# Default value: 1 (learn)
#focr_hashing_learn_scanned 1
# Score images who's global word count is below focr_counts_required using
# the following formulae: (focr_add_score * word count) as score.
# Default value: 0 (ignore images)
#focr_score_ham 1
# If the image hash database feature is enabled (Type 1 Hashing),
# specify the file to use as database
# Default value: /etc/spamassassin/FuzzyOcr.hashdb
#focr_digest_db /etc/spamassassin/FuzzyOcr.hashdb
# If the image hash db feature is enabled (Type 2 Hashing),
# specify the file to use as the SPAM database
# Default value: /etc/spamassassin/FuzzyOcr.db
#focr_db_hash /etc/spamassassin/FuzzyOcr.db
# If the image hash db feature is enabled (Type 2 Hashing),
# specify the file to use as the HAM database
# Default value: /etc/spamassassin/FuzzyOcr.safe.db
#focr_db_safe /etc/spamassassin/FuzzyOcr.safe.db
# Auto-prune: Expire records from hasing databases after these many days
# Default value: 35
#focr_db_max_days 15
###
### MySQL options (Type 3 Hashing)
###
#focr_mysql_db FuzzyOcr
#focr_mysql_hash Hash
#focr_mysql_safe Safe
#focr_mysql_user fuzzyocr
#focr_mysql_pass fuzzyocr
#focr_mysql_host localhost
#focr_mysql_port 3306
#focr_mysql_socket /tmp/mysql.sock
# If set, the database table is updated with different data from one of
# the following:
# + filename,
# + image-params,
# + content-type,
# + file-type,
# + score,
# + word-info
# Default value: 0
#focr_mysql_update_hash 1
###
### Miscellaneous Options
###
# The pluging uses a temporary directory to store intermediate information.
# In order to Keep these files for debugging purposes use any of these
# values:
# 0 = always cleanup (default value)
# 1 = keep only if error
# 2 = always keep
#--
# Keeping these intermediate files could fill your HDD _very_ fast!
# Make shure you periodically empty your temp dir (usually: /tmp) or
# suffer the conscecuences. You've been warned!!
#--
#focr_keep_bad_images 1
#################################################################
# DO NOT REMOVE THIS LINE, IT IS REQUIRED UNDER ALL CIRCUMSTANCES
focr_end_config
endif
|